{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.897269499204114, "eval_steps": 500, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016325864250438759, "grad_norm": 2.6033101081848145, "learning_rate": 2e-08, "loss": 3.4004, "step": 1 }, { "epoch": 0.00032651728500877517, "grad_norm": 3.7327322959899902, "learning_rate": 4e-08, "loss": 3.5503, "step": 2 }, { "epoch": 0.0004897759275131628, "grad_norm": 2.869396924972534, "learning_rate": 6.000000000000001e-08, "loss": 3.5052, "step": 3 }, { "epoch": 0.0006530345700175503, "grad_norm": 3.605945587158203, "learning_rate": 8e-08, "loss": 3.5679, "step": 4 }, { "epoch": 0.0008162932125219379, "grad_norm": 3.122082471847534, "learning_rate": 1.0000000000000001e-07, "loss": 3.5535, "step": 5 }, { "epoch": 0.0009795518550263255, "grad_norm": 3.1517856121063232, "learning_rate": 1.2000000000000002e-07, "loss": 3.531, "step": 6 }, { "epoch": 0.001142810497530713, "grad_norm": 2.8109405040740967, "learning_rate": 1.4e-07, "loss": 3.5358, "step": 7 }, { "epoch": 0.0013060691400351007, "grad_norm": 2.6619415283203125, "learning_rate": 1.6e-07, "loss": 3.4478, "step": 8 }, { "epoch": 0.0014693277825394882, "grad_norm": 3.745173454284668, "learning_rate": 1.8e-07, "loss": 3.5365, "step": 9 }, { "epoch": 0.0016325864250438759, "grad_norm": 2.8928394317626953, "learning_rate": 2.0000000000000002e-07, "loss": 3.6134, "step": 10 }, { "epoch": 0.0017958450675482633, "grad_norm": 2.726499557495117, "learning_rate": 2.2e-07, "loss": 3.4948, "step": 11 }, { "epoch": 0.001959103710052651, "grad_norm": 2.678392171859741, "learning_rate": 2.4000000000000003e-07, "loss": 3.4073, "step": 12 }, { "epoch": 0.0021223623525570383, "grad_norm": 2.7368736267089844, "learning_rate": 2.6e-07, "loss": 3.3911, "step": 13 }, { "epoch": 0.002285620995061426, "grad_norm": 2.772325038909912, "learning_rate": 2.8e-07, "loss": 3.6133, "step": 14 }, { "epoch": 0.0024488796375658137, "grad_norm": 3.1457278728485107, "learning_rate": 3.0000000000000004e-07, "loss": 3.4348, "step": 15 }, { "epoch": 0.0026121382800702014, "grad_norm": 3.166409492492676, "learning_rate": 3.2e-07, "loss": 3.5246, "step": 16 }, { "epoch": 0.0027753969225745886, "grad_norm": 3.016062021255493, "learning_rate": 3.4000000000000003e-07, "loss": 3.5491, "step": 17 }, { "epoch": 0.0029386555650789763, "grad_norm": 2.6820476055145264, "learning_rate": 3.6e-07, "loss": 3.437, "step": 18 }, { "epoch": 0.003101914207583364, "grad_norm": 2.798854351043701, "learning_rate": 3.8e-07, "loss": 3.2704, "step": 19 }, { "epoch": 0.0032651728500877517, "grad_norm": 2.497466564178467, "learning_rate": 4.0000000000000003e-07, "loss": 3.3624, "step": 20 }, { "epoch": 0.003428431492592139, "grad_norm": 2.721534252166748, "learning_rate": 4.2000000000000006e-07, "loss": 3.3006, "step": 21 }, { "epoch": 0.0035916901350965267, "grad_norm": 2.7939298152923584, "learning_rate": 4.4e-07, "loss": 3.4231, "step": 22 }, { "epoch": 0.0037549487776009144, "grad_norm": 3.022852897644043, "learning_rate": 4.6000000000000004e-07, "loss": 3.5319, "step": 23 }, { "epoch": 0.003918207420105302, "grad_norm": 2.7380170822143555, "learning_rate": 4.800000000000001e-07, "loss": 3.3093, "step": 24 }, { "epoch": 0.00408146606260969, "grad_norm": 2.8686535358428955, "learning_rate": 5.000000000000001e-07, "loss": 3.6839, "step": 25 }, { "epoch": 0.004244724705114077, "grad_norm": 2.96225643157959, "learning_rate": 5.2e-07, "loss": 3.4209, "step": 26 }, { "epoch": 0.004407983347618464, "grad_norm": 3.343003034591675, "learning_rate": 5.4e-07, "loss": 3.6372, "step": 27 }, { "epoch": 0.004571241990122852, "grad_norm": 3.239604949951172, "learning_rate": 5.6e-07, "loss": 3.574, "step": 28 }, { "epoch": 0.00473450063262724, "grad_norm": 3.0173134803771973, "learning_rate": 5.800000000000001e-07, "loss": 3.4681, "step": 29 }, { "epoch": 0.004897759275131627, "grad_norm": 2.6635303497314453, "learning_rate": 6.000000000000001e-07, "loss": 3.3091, "step": 30 }, { "epoch": 0.005061017917636015, "grad_norm": 3.0312557220458984, "learning_rate": 6.200000000000001e-07, "loss": 3.4112, "step": 31 }, { "epoch": 0.005224276560140403, "grad_norm": 2.696537494659424, "learning_rate": 6.4e-07, "loss": 3.5429, "step": 32 }, { "epoch": 0.00538753520264479, "grad_norm": 3.657029628753662, "learning_rate": 6.6e-07, "loss": 3.4713, "step": 33 }, { "epoch": 0.005550793845149177, "grad_norm": 3.3929457664489746, "learning_rate": 6.800000000000001e-07, "loss": 3.4757, "step": 34 }, { "epoch": 0.005714052487653565, "grad_norm": 2.9206650257110596, "learning_rate": 7.000000000000001e-07, "loss": 3.4167, "step": 35 }, { "epoch": 0.005877311130157953, "grad_norm": 3.096414089202881, "learning_rate": 7.2e-07, "loss": 3.5128, "step": 36 }, { "epoch": 0.00604056977266234, "grad_norm": 3.2433276176452637, "learning_rate": 7.4e-07, "loss": 3.5109, "step": 37 }, { "epoch": 0.006203828415166728, "grad_norm": 3.1589174270629883, "learning_rate": 7.6e-07, "loss": 3.3431, "step": 38 }, { "epoch": 0.006367087057671116, "grad_norm": 2.8945250511169434, "learning_rate": 7.8e-07, "loss": 3.4019, "step": 39 }, { "epoch": 0.006530345700175503, "grad_norm": 3.9912614822387695, "learning_rate": 8.000000000000001e-07, "loss": 3.5928, "step": 40 }, { "epoch": 0.00669360434267989, "grad_norm": 2.637004852294922, "learning_rate": 8.200000000000001e-07, "loss": 3.2395, "step": 41 }, { "epoch": 0.006856862985184278, "grad_norm": 2.9251229763031006, "learning_rate": 8.400000000000001e-07, "loss": 3.4691, "step": 42 }, { "epoch": 0.007020121627688666, "grad_norm": 2.8292365074157715, "learning_rate": 8.6e-07, "loss": 3.4685, "step": 43 }, { "epoch": 0.007183380270193053, "grad_norm": 2.774548053741455, "learning_rate": 8.8e-07, "loss": 3.4606, "step": 44 }, { "epoch": 0.007346638912697441, "grad_norm": 3.066887617111206, "learning_rate": 9.000000000000001e-07, "loss": 3.5057, "step": 45 }, { "epoch": 0.007509897555201829, "grad_norm": 2.674781084060669, "learning_rate": 9.200000000000001e-07, "loss": 3.408, "step": 46 }, { "epoch": 0.007673156197706216, "grad_norm": 2.6978938579559326, "learning_rate": 9.400000000000001e-07, "loss": 3.488, "step": 47 }, { "epoch": 0.007836414840210604, "grad_norm": 3.810103416442871, "learning_rate": 9.600000000000001e-07, "loss": 3.4489, "step": 48 }, { "epoch": 0.007999673482714992, "grad_norm": 3.0675759315490723, "learning_rate": 9.800000000000001e-07, "loss": 3.3242, "step": 49 }, { "epoch": 0.00816293212521938, "grad_norm": 2.642082929611206, "learning_rate": 1.0000000000000002e-06, "loss": 3.2969, "step": 50 }, { "epoch": 0.008326190767723767, "grad_norm": 2.9033143520355225, "learning_rate": 1.02e-06, "loss": 3.3804, "step": 51 }, { "epoch": 0.008489449410228153, "grad_norm": 3.209754705429077, "learning_rate": 1.04e-06, "loss": 3.3548, "step": 52 }, { "epoch": 0.00865270805273254, "grad_norm": 2.7264466285705566, "learning_rate": 1.06e-06, "loss": 3.1932, "step": 53 }, { "epoch": 0.008815966695236929, "grad_norm": 2.7314743995666504, "learning_rate": 1.08e-06, "loss": 3.3246, "step": 54 }, { "epoch": 0.008979225337741316, "grad_norm": 2.9919331073760986, "learning_rate": 1.1e-06, "loss": 3.3179, "step": 55 }, { "epoch": 0.009142483980245704, "grad_norm": 2.994581937789917, "learning_rate": 1.12e-06, "loss": 3.4928, "step": 56 }, { "epoch": 0.009305742622750092, "grad_norm": 2.964712619781494, "learning_rate": 1.14e-06, "loss": 3.4166, "step": 57 }, { "epoch": 0.00946900126525448, "grad_norm": 2.652458906173706, "learning_rate": 1.1600000000000001e-06, "loss": 3.3717, "step": 58 }, { "epoch": 0.009632259907758867, "grad_norm": 3.0676703453063965, "learning_rate": 1.1800000000000001e-06, "loss": 3.3525, "step": 59 }, { "epoch": 0.009795518550263255, "grad_norm": 2.994270086288452, "learning_rate": 1.2000000000000002e-06, "loss": 3.378, "step": 60 }, { "epoch": 0.009958777192767642, "grad_norm": 3.4734878540039062, "learning_rate": 1.2200000000000002e-06, "loss": 3.3916, "step": 61 }, { "epoch": 0.01012203583527203, "grad_norm": 2.6581945419311523, "learning_rate": 1.2400000000000002e-06, "loss": 3.2896, "step": 62 }, { "epoch": 0.010285294477776418, "grad_norm": 2.923051118850708, "learning_rate": 1.26e-06, "loss": 3.2224, "step": 63 }, { "epoch": 0.010448553120280805, "grad_norm": 2.9782087802886963, "learning_rate": 1.28e-06, "loss": 3.4146, "step": 64 }, { "epoch": 0.010611811762785193, "grad_norm": 3.0207362174987793, "learning_rate": 1.3e-06, "loss": 3.5217, "step": 65 }, { "epoch": 0.01077507040528958, "grad_norm": 2.6329057216644287, "learning_rate": 1.32e-06, "loss": 3.1558, "step": 66 }, { "epoch": 0.010938329047793967, "grad_norm": 2.6853442192077637, "learning_rate": 1.34e-06, "loss": 3.4115, "step": 67 }, { "epoch": 0.011101587690298355, "grad_norm": 3.012260913848877, "learning_rate": 1.3600000000000001e-06, "loss": 3.3941, "step": 68 }, { "epoch": 0.011264846332802742, "grad_norm": 2.6659438610076904, "learning_rate": 1.3800000000000001e-06, "loss": 3.3189, "step": 69 }, { "epoch": 0.01142810497530713, "grad_norm": 3.170567035675049, "learning_rate": 1.4000000000000001e-06, "loss": 3.3776, "step": 70 }, { "epoch": 0.011591363617811518, "grad_norm": 2.726120710372925, "learning_rate": 1.42e-06, "loss": 3.23, "step": 71 }, { "epoch": 0.011754622260315905, "grad_norm": 2.7397310733795166, "learning_rate": 1.44e-06, "loss": 3.3241, "step": 72 }, { "epoch": 0.011917880902820293, "grad_norm": 2.8247501850128174, "learning_rate": 1.46e-06, "loss": 3.2031, "step": 73 }, { "epoch": 0.01208113954532468, "grad_norm": 2.9634339809417725, "learning_rate": 1.48e-06, "loss": 3.1842, "step": 74 }, { "epoch": 0.012244398187829068, "grad_norm": 2.6376233100891113, "learning_rate": 1.5e-06, "loss": 3.2034, "step": 75 }, { "epoch": 0.012407656830333456, "grad_norm": 2.740602970123291, "learning_rate": 1.52e-06, "loss": 3.1517, "step": 76 }, { "epoch": 0.012570915472837844, "grad_norm": 2.8798398971557617, "learning_rate": 1.54e-06, "loss": 3.2775, "step": 77 }, { "epoch": 0.012734174115342231, "grad_norm": 2.9144585132598877, "learning_rate": 1.56e-06, "loss": 3.2373, "step": 78 }, { "epoch": 0.01289743275784662, "grad_norm": 2.5058372020721436, "learning_rate": 1.5800000000000001e-06, "loss": 3.2275, "step": 79 }, { "epoch": 0.013060691400351007, "grad_norm": 2.738837242126465, "learning_rate": 1.6000000000000001e-06, "loss": 3.0292, "step": 80 }, { "epoch": 0.013223950042855395, "grad_norm": 3.099492073059082, "learning_rate": 1.6200000000000002e-06, "loss": 3.145, "step": 81 }, { "epoch": 0.01338720868535978, "grad_norm": 2.5461785793304443, "learning_rate": 1.6400000000000002e-06, "loss": 3.0796, "step": 82 }, { "epoch": 0.013550467327864168, "grad_norm": 3.377657175064087, "learning_rate": 1.6600000000000002e-06, "loss": 3.0228, "step": 83 }, { "epoch": 0.013713725970368556, "grad_norm": 2.7379884719848633, "learning_rate": 1.6800000000000002e-06, "loss": 2.9684, "step": 84 }, { "epoch": 0.013876984612872944, "grad_norm": 2.889983654022217, "learning_rate": 1.7000000000000002e-06, "loss": 3.0686, "step": 85 }, { "epoch": 0.014040243255377331, "grad_norm": 3.0050642490386963, "learning_rate": 1.72e-06, "loss": 3.1926, "step": 86 }, { "epoch": 0.014203501897881719, "grad_norm": 2.707963705062866, "learning_rate": 1.74e-06, "loss": 3.0134, "step": 87 }, { "epoch": 0.014366760540386107, "grad_norm": 2.730029344558716, "learning_rate": 1.76e-06, "loss": 3.0117, "step": 88 }, { "epoch": 0.014530019182890494, "grad_norm": 2.466172695159912, "learning_rate": 1.7800000000000001e-06, "loss": 2.8155, "step": 89 }, { "epoch": 0.014693277825394882, "grad_norm": 2.728130340576172, "learning_rate": 1.8000000000000001e-06, "loss": 2.9858, "step": 90 }, { "epoch": 0.01485653646789927, "grad_norm": 2.8771209716796875, "learning_rate": 1.8200000000000002e-06, "loss": 3.1122, "step": 91 }, { "epoch": 0.015019795110403657, "grad_norm": 2.803978204727173, "learning_rate": 1.8400000000000002e-06, "loss": 2.9593, "step": 92 }, { "epoch": 0.015183053752908045, "grad_norm": 2.448578357696533, "learning_rate": 1.8600000000000002e-06, "loss": 2.5863, "step": 93 }, { "epoch": 0.015346312395412433, "grad_norm": 2.9117980003356934, "learning_rate": 1.8800000000000002e-06, "loss": 2.6925, "step": 94 }, { "epoch": 0.01550957103791682, "grad_norm": 2.8578615188598633, "learning_rate": 1.9000000000000002e-06, "loss": 2.8129, "step": 95 }, { "epoch": 0.015672829680421208, "grad_norm": 3.0731899738311768, "learning_rate": 1.9200000000000003e-06, "loss": 2.9359, "step": 96 }, { "epoch": 0.015836088322925594, "grad_norm": 3.112687826156616, "learning_rate": 1.94e-06, "loss": 2.7836, "step": 97 }, { "epoch": 0.015999346965429984, "grad_norm": 3.0433127880096436, "learning_rate": 1.9600000000000003e-06, "loss": 2.6969, "step": 98 }, { "epoch": 0.01616260560793437, "grad_norm": 3.2575583457946777, "learning_rate": 1.98e-06, "loss": 2.7697, "step": 99 }, { "epoch": 0.01632586425043876, "grad_norm": 2.598994493484497, "learning_rate": 2.0000000000000003e-06, "loss": 2.751, "step": 100 }, { "epoch": 0.016489122892943145, "grad_norm": 3.5013861656188965, "learning_rate": 2.02e-06, "loss": 2.6693, "step": 101 }, { "epoch": 0.016652381535447534, "grad_norm": 3.2837140560150146, "learning_rate": 2.04e-06, "loss": 2.7556, "step": 102 }, { "epoch": 0.01681564017795192, "grad_norm": 3.8987998962402344, "learning_rate": 2.06e-06, "loss": 2.6718, "step": 103 }, { "epoch": 0.016978898820456306, "grad_norm": 3.455641508102417, "learning_rate": 2.08e-06, "loss": 2.7046, "step": 104 }, { "epoch": 0.017142157462960696, "grad_norm": 3.2689836025238037, "learning_rate": 2.1000000000000002e-06, "loss": 2.5543, "step": 105 }, { "epoch": 0.01730541610546508, "grad_norm": 3.1042590141296387, "learning_rate": 2.12e-06, "loss": 2.4787, "step": 106 }, { "epoch": 0.01746867474796947, "grad_norm": 3.2438113689422607, "learning_rate": 2.1400000000000003e-06, "loss": 2.5919, "step": 107 }, { "epoch": 0.017631933390473857, "grad_norm": 3.271658420562744, "learning_rate": 2.16e-06, "loss": 2.3686, "step": 108 }, { "epoch": 0.017795192032978246, "grad_norm": 3.2078323364257812, "learning_rate": 2.1800000000000003e-06, "loss": 2.4998, "step": 109 }, { "epoch": 0.017958450675482632, "grad_norm": 3.267106771469116, "learning_rate": 2.2e-06, "loss": 2.5409, "step": 110 }, { "epoch": 0.018121709317987022, "grad_norm": 2.9392611980438232, "learning_rate": 2.2200000000000003e-06, "loss": 2.3312, "step": 111 }, { "epoch": 0.018284967960491408, "grad_norm": 3.393911361694336, "learning_rate": 2.24e-06, "loss": 2.4156, "step": 112 }, { "epoch": 0.018448226602995797, "grad_norm": 3.66111421585083, "learning_rate": 2.2600000000000004e-06, "loss": 2.532, "step": 113 }, { "epoch": 0.018611485245500183, "grad_norm": 3.183798313140869, "learning_rate": 2.28e-06, "loss": 2.3355, "step": 114 }, { "epoch": 0.018774743888004573, "grad_norm": 3.639376163482666, "learning_rate": 2.3000000000000004e-06, "loss": 2.4429, "step": 115 }, { "epoch": 0.01893800253050896, "grad_norm": 2.9287357330322266, "learning_rate": 2.3200000000000002e-06, "loss": 2.15, "step": 116 }, { "epoch": 0.019101261173013348, "grad_norm": 3.2446250915527344, "learning_rate": 2.3400000000000005e-06, "loss": 2.2276, "step": 117 }, { "epoch": 0.019264519815517734, "grad_norm": 2.768106698989868, "learning_rate": 2.3600000000000003e-06, "loss": 2.2567, "step": 118 }, { "epoch": 0.01942777845802212, "grad_norm": 2.6894707679748535, "learning_rate": 2.38e-06, "loss": 2.1898, "step": 119 }, { "epoch": 0.01959103710052651, "grad_norm": 2.929439067840576, "learning_rate": 2.4000000000000003e-06, "loss": 2.1807, "step": 120 }, { "epoch": 0.019754295743030895, "grad_norm": 3.2130661010742188, "learning_rate": 2.42e-06, "loss": 2.1286, "step": 121 }, { "epoch": 0.019917554385535285, "grad_norm": 3.124307870864868, "learning_rate": 2.4400000000000004e-06, "loss": 2.2528, "step": 122 }, { "epoch": 0.02008081302803967, "grad_norm": 2.8239824771881104, "learning_rate": 2.46e-06, "loss": 2.1212, "step": 123 }, { "epoch": 0.02024407167054406, "grad_norm": 2.901832342147827, "learning_rate": 2.4800000000000004e-06, "loss": 1.8622, "step": 124 }, { "epoch": 0.020407330313048446, "grad_norm": 2.785125732421875, "learning_rate": 2.5e-06, "loss": 2.1786, "step": 125 }, { "epoch": 0.020570588955552836, "grad_norm": 2.7350783348083496, "learning_rate": 2.52e-06, "loss": 1.9966, "step": 126 }, { "epoch": 0.02073384759805722, "grad_norm": 2.692490339279175, "learning_rate": 2.5400000000000002e-06, "loss": 1.9132, "step": 127 }, { "epoch": 0.02089710624056161, "grad_norm": 3.1648802757263184, "learning_rate": 2.56e-06, "loss": 1.8623, "step": 128 }, { "epoch": 0.021060364883065997, "grad_norm": 2.889390230178833, "learning_rate": 2.5800000000000003e-06, "loss": 1.7629, "step": 129 }, { "epoch": 0.021223623525570386, "grad_norm": 3.0257678031921387, "learning_rate": 2.6e-06, "loss": 1.9024, "step": 130 }, { "epoch": 0.021386882168074772, "grad_norm": 2.9072747230529785, "learning_rate": 2.6200000000000003e-06, "loss": 1.8467, "step": 131 }, { "epoch": 0.02155014081057916, "grad_norm": 3.107095241546631, "learning_rate": 2.64e-06, "loss": 1.8584, "step": 132 }, { "epoch": 0.021713399453083548, "grad_norm": 2.426776170730591, "learning_rate": 2.6600000000000004e-06, "loss": 1.8054, "step": 133 }, { "epoch": 0.021876658095587934, "grad_norm": 2.595407724380493, "learning_rate": 2.68e-06, "loss": 1.6519, "step": 134 }, { "epoch": 0.022039916738092323, "grad_norm": 2.245654821395874, "learning_rate": 2.7000000000000004e-06, "loss": 1.721, "step": 135 }, { "epoch": 0.02220317538059671, "grad_norm": 1.9480783939361572, "learning_rate": 2.7200000000000002e-06, "loss": 1.7439, "step": 136 }, { "epoch": 0.0223664340231011, "grad_norm": 2.581364870071411, "learning_rate": 2.7400000000000004e-06, "loss": 1.8561, "step": 137 }, { "epoch": 0.022529692665605484, "grad_norm": 1.8394737243652344, "learning_rate": 2.7600000000000003e-06, "loss": 1.7832, "step": 138 }, { "epoch": 0.022692951308109874, "grad_norm": 1.96915864944458, "learning_rate": 2.7800000000000005e-06, "loss": 1.7759, "step": 139 }, { "epoch": 0.02285620995061426, "grad_norm": 2.141920804977417, "learning_rate": 2.8000000000000003e-06, "loss": 1.7082, "step": 140 }, { "epoch": 0.02301946859311865, "grad_norm": 2.139808416366577, "learning_rate": 2.82e-06, "loss": 1.7296, "step": 141 }, { "epoch": 0.023182727235623035, "grad_norm": 2.474648952484131, "learning_rate": 2.84e-06, "loss": 1.7241, "step": 142 }, { "epoch": 0.023345985878127425, "grad_norm": 2.2544307708740234, "learning_rate": 2.86e-06, "loss": 1.6342, "step": 143 }, { "epoch": 0.02350924452063181, "grad_norm": 1.8940359354019165, "learning_rate": 2.88e-06, "loss": 1.8047, "step": 144 }, { "epoch": 0.0236725031631362, "grad_norm": 2.0767056941986084, "learning_rate": 2.9e-06, "loss": 1.6129, "step": 145 }, { "epoch": 0.023835761805640586, "grad_norm": 1.9623079299926758, "learning_rate": 2.92e-06, "loss": 1.7396, "step": 146 }, { "epoch": 0.023999020448144975, "grad_norm": 2.2459523677825928, "learning_rate": 2.9400000000000002e-06, "loss": 1.7015, "step": 147 }, { "epoch": 0.02416227909064936, "grad_norm": 1.9305064678192139, "learning_rate": 2.96e-06, "loss": 1.7263, "step": 148 }, { "epoch": 0.024325537733153747, "grad_norm": 2.420577049255371, "learning_rate": 2.9800000000000003e-06, "loss": 1.6789, "step": 149 }, { "epoch": 0.024488796375658137, "grad_norm": 2.3346197605133057, "learning_rate": 3e-06, "loss": 1.8634, "step": 150 }, { "epoch": 0.024652055018162523, "grad_norm": 2.034031391143799, "learning_rate": 3.0200000000000003e-06, "loss": 1.5641, "step": 151 }, { "epoch": 0.024815313660666912, "grad_norm": 2.277332067489624, "learning_rate": 3.04e-06, "loss": 1.7236, "step": 152 }, { "epoch": 0.024978572303171298, "grad_norm": 2.122135639190674, "learning_rate": 3.0600000000000003e-06, "loss": 1.5725, "step": 153 }, { "epoch": 0.025141830945675687, "grad_norm": 2.085005044937134, "learning_rate": 3.08e-06, "loss": 1.571, "step": 154 }, { "epoch": 0.025305089588180073, "grad_norm": 2.0196194648742676, "learning_rate": 3.1000000000000004e-06, "loss": 1.6876, "step": 155 }, { "epoch": 0.025468348230684463, "grad_norm": 2.0159735679626465, "learning_rate": 3.12e-06, "loss": 1.5243, "step": 156 }, { "epoch": 0.02563160687318885, "grad_norm": 1.865037441253662, "learning_rate": 3.1400000000000004e-06, "loss": 1.5005, "step": 157 }, { "epoch": 0.02579486551569324, "grad_norm": 1.8383852243423462, "learning_rate": 3.1600000000000002e-06, "loss": 1.6258, "step": 158 }, { "epoch": 0.025958124158197624, "grad_norm": 2.28385329246521, "learning_rate": 3.1800000000000005e-06, "loss": 1.4342, "step": 159 }, { "epoch": 0.026121382800702014, "grad_norm": 1.7752000093460083, "learning_rate": 3.2000000000000003e-06, "loss": 1.5506, "step": 160 }, { "epoch": 0.0262846414432064, "grad_norm": 2.923414945602417, "learning_rate": 3.2200000000000005e-06, "loss": 1.5635, "step": 161 }, { "epoch": 0.02644790008571079, "grad_norm": 1.5688098669052124, "learning_rate": 3.2400000000000003e-06, "loss": 1.305, "step": 162 }, { "epoch": 0.026611158728215175, "grad_norm": 2.4980533123016357, "learning_rate": 3.2600000000000006e-06, "loss": 1.5358, "step": 163 }, { "epoch": 0.02677441737071956, "grad_norm": 1.7541378736495972, "learning_rate": 3.2800000000000004e-06, "loss": 1.4512, "step": 164 }, { "epoch": 0.02693767601322395, "grad_norm": 2.055202007293701, "learning_rate": 3.3000000000000006e-06, "loss": 1.668, "step": 165 }, { "epoch": 0.027100934655728336, "grad_norm": 1.4846760034561157, "learning_rate": 3.3200000000000004e-06, "loss": 1.3364, "step": 166 }, { "epoch": 0.027264193298232726, "grad_norm": 1.6579443216323853, "learning_rate": 3.3400000000000006e-06, "loss": 1.432, "step": 167 }, { "epoch": 0.027427451940737112, "grad_norm": 1.7275137901306152, "learning_rate": 3.3600000000000004e-06, "loss": 1.5508, "step": 168 }, { "epoch": 0.0275907105832415, "grad_norm": 1.9515480995178223, "learning_rate": 3.3800000000000007e-06, "loss": 1.4596, "step": 169 }, { "epoch": 0.027753969225745887, "grad_norm": 1.8022053241729736, "learning_rate": 3.4000000000000005e-06, "loss": 1.5817, "step": 170 }, { "epoch": 0.027917227868250277, "grad_norm": 1.7202377319335938, "learning_rate": 3.4200000000000007e-06, "loss": 1.4101, "step": 171 }, { "epoch": 0.028080486510754663, "grad_norm": 2.0519022941589355, "learning_rate": 3.44e-06, "loss": 1.4796, "step": 172 }, { "epoch": 0.028243745153259052, "grad_norm": 1.7433277368545532, "learning_rate": 3.46e-06, "loss": 1.6402, "step": 173 }, { "epoch": 0.028407003795763438, "grad_norm": 1.6006267070770264, "learning_rate": 3.48e-06, "loss": 1.5036, "step": 174 }, { "epoch": 0.028570262438267827, "grad_norm": 1.6294777393341064, "learning_rate": 3.5e-06, "loss": 1.4822, "step": 175 }, { "epoch": 0.028733521080772213, "grad_norm": 1.6254267692565918, "learning_rate": 3.52e-06, "loss": 1.4198, "step": 176 }, { "epoch": 0.0288967797232766, "grad_norm": 1.8645423650741577, "learning_rate": 3.54e-06, "loss": 1.4994, "step": 177 }, { "epoch": 0.02906003836578099, "grad_norm": 1.8686392307281494, "learning_rate": 3.5600000000000002e-06, "loss": 1.3703, "step": 178 }, { "epoch": 0.029223297008285375, "grad_norm": 1.5615224838256836, "learning_rate": 3.58e-06, "loss": 1.4052, "step": 179 }, { "epoch": 0.029386555650789764, "grad_norm": 1.4917488098144531, "learning_rate": 3.6000000000000003e-06, "loss": 1.4012, "step": 180 }, { "epoch": 0.02954981429329415, "grad_norm": 1.5183836221694946, "learning_rate": 3.62e-06, "loss": 1.3347, "step": 181 }, { "epoch": 0.02971307293579854, "grad_norm": 1.6634671688079834, "learning_rate": 3.6400000000000003e-06, "loss": 1.409, "step": 182 }, { "epoch": 0.029876331578302925, "grad_norm": 1.7330362796783447, "learning_rate": 3.66e-06, "loss": 1.3344, "step": 183 }, { "epoch": 0.030039590220807315, "grad_norm": 1.5642423629760742, "learning_rate": 3.6800000000000003e-06, "loss": 1.4214, "step": 184 }, { "epoch": 0.0302028488633117, "grad_norm": 1.682076334953308, "learning_rate": 3.7e-06, "loss": 1.5688, "step": 185 }, { "epoch": 0.03036610750581609, "grad_norm": 1.6651852130889893, "learning_rate": 3.7200000000000004e-06, "loss": 1.4421, "step": 186 }, { "epoch": 0.030529366148320476, "grad_norm": 1.5420279502868652, "learning_rate": 3.74e-06, "loss": 1.3029, "step": 187 }, { "epoch": 0.030692624790824866, "grad_norm": 1.7709190845489502, "learning_rate": 3.7600000000000004e-06, "loss": 1.5403, "step": 188 }, { "epoch": 0.03085588343332925, "grad_norm": 1.604353427886963, "learning_rate": 3.7800000000000002e-06, "loss": 1.4906, "step": 189 }, { "epoch": 0.03101914207583364, "grad_norm": 2.0194711685180664, "learning_rate": 3.8000000000000005e-06, "loss": 1.2699, "step": 190 }, { "epoch": 0.031182400718338027, "grad_norm": 1.5488393306732178, "learning_rate": 3.820000000000001e-06, "loss": 1.61, "step": 191 }, { "epoch": 0.031345659360842416, "grad_norm": 1.4095816612243652, "learning_rate": 3.8400000000000005e-06, "loss": 1.3254, "step": 192 }, { "epoch": 0.0315089180033468, "grad_norm": 1.522632360458374, "learning_rate": 3.86e-06, "loss": 1.4244, "step": 193 }, { "epoch": 0.03167217664585119, "grad_norm": 1.754141926765442, "learning_rate": 3.88e-06, "loss": 1.5017, "step": 194 }, { "epoch": 0.03183543528835558, "grad_norm": 1.8344064950942993, "learning_rate": 3.900000000000001e-06, "loss": 1.3473, "step": 195 }, { "epoch": 0.03199869393085997, "grad_norm": 1.5926331281661987, "learning_rate": 3.920000000000001e-06, "loss": 1.5758, "step": 196 }, { "epoch": 0.03216195257336435, "grad_norm": 1.4991172552108765, "learning_rate": 3.94e-06, "loss": 1.3974, "step": 197 }, { "epoch": 0.03232521121586874, "grad_norm": 1.8705028295516968, "learning_rate": 3.96e-06, "loss": 1.5125, "step": 198 }, { "epoch": 0.03248846985837313, "grad_norm": 1.6602659225463867, "learning_rate": 3.980000000000001e-06, "loss": 1.5436, "step": 199 }, { "epoch": 0.03265172850087752, "grad_norm": 1.819608449935913, "learning_rate": 4.000000000000001e-06, "loss": 1.5024, "step": 200 }, { "epoch": 0.0328149871433819, "grad_norm": 1.6650482416152954, "learning_rate": 4.0200000000000005e-06, "loss": 1.5034, "step": 201 }, { "epoch": 0.03297824578588629, "grad_norm": 1.758331537246704, "learning_rate": 4.04e-06, "loss": 1.4673, "step": 202 }, { "epoch": 0.03314150442839068, "grad_norm": 1.3998029232025146, "learning_rate": 4.060000000000001e-06, "loss": 1.2635, "step": 203 }, { "epoch": 0.03330476307089507, "grad_norm": 1.6199387311935425, "learning_rate": 4.08e-06, "loss": 1.3724, "step": 204 }, { "epoch": 0.03346802171339945, "grad_norm": 1.8421322107315063, "learning_rate": 4.1e-06, "loss": 1.4737, "step": 205 }, { "epoch": 0.03363128035590384, "grad_norm": 1.8446450233459473, "learning_rate": 4.12e-06, "loss": 1.1772, "step": 206 }, { "epoch": 0.03379453899840823, "grad_norm": 1.6325796842575073, "learning_rate": 4.14e-06, "loss": 1.5462, "step": 207 }, { "epoch": 0.03395779764091261, "grad_norm": 1.5166337490081787, "learning_rate": 4.16e-06, "loss": 1.2812, "step": 208 }, { "epoch": 0.034121056283417, "grad_norm": 2.056048631668091, "learning_rate": 4.18e-06, "loss": 1.4828, "step": 209 }, { "epoch": 0.03428431492592139, "grad_norm": 1.6736007928848267, "learning_rate": 4.2000000000000004e-06, "loss": 1.5851, "step": 210 }, { "epoch": 0.03444757356842578, "grad_norm": 1.7450826168060303, "learning_rate": 4.22e-06, "loss": 1.3061, "step": 211 }, { "epoch": 0.03461083221093016, "grad_norm": 2.008894443511963, "learning_rate": 4.24e-06, "loss": 1.4816, "step": 212 }, { "epoch": 0.03477409085343455, "grad_norm": 1.7426434755325317, "learning_rate": 4.26e-06, "loss": 1.3139, "step": 213 }, { "epoch": 0.03493734949593894, "grad_norm": 1.716793417930603, "learning_rate": 4.2800000000000005e-06, "loss": 1.4927, "step": 214 }, { "epoch": 0.03510060813844333, "grad_norm": 1.6640710830688477, "learning_rate": 4.3e-06, "loss": 1.4486, "step": 215 }, { "epoch": 0.035263866780947714, "grad_norm": 1.6972944736480713, "learning_rate": 4.32e-06, "loss": 1.4054, "step": 216 }, { "epoch": 0.035427125423452104, "grad_norm": 1.5209475755691528, "learning_rate": 4.34e-06, "loss": 1.1952, "step": 217 }, { "epoch": 0.03559038406595649, "grad_norm": 1.6674305200576782, "learning_rate": 4.360000000000001e-06, "loss": 1.3865, "step": 218 }, { "epoch": 0.03575364270846088, "grad_norm": 1.6292712688446045, "learning_rate": 4.38e-06, "loss": 1.4336, "step": 219 }, { "epoch": 0.035916901350965265, "grad_norm": 1.8487358093261719, "learning_rate": 4.4e-06, "loss": 1.2439, "step": 220 }, { "epoch": 0.036080159993469654, "grad_norm": 1.9384684562683105, "learning_rate": 4.42e-06, "loss": 1.2103, "step": 221 }, { "epoch": 0.036243418635974044, "grad_norm": 1.9539082050323486, "learning_rate": 4.440000000000001e-06, "loss": 1.3662, "step": 222 }, { "epoch": 0.036406677278478426, "grad_norm": 1.5603522062301636, "learning_rate": 4.4600000000000005e-06, "loss": 1.447, "step": 223 }, { "epoch": 0.036569935920982816, "grad_norm": 1.6361925601959229, "learning_rate": 4.48e-06, "loss": 1.5757, "step": 224 }, { "epoch": 0.036733194563487205, "grad_norm": 1.5589345693588257, "learning_rate": 4.5e-06, "loss": 1.3436, "step": 225 }, { "epoch": 0.036896453205991595, "grad_norm": 1.6450508832931519, "learning_rate": 4.520000000000001e-06, "loss": 1.5028, "step": 226 }, { "epoch": 0.03705971184849598, "grad_norm": 1.9003053903579712, "learning_rate": 4.540000000000001e-06, "loss": 1.5323, "step": 227 }, { "epoch": 0.037222970491000366, "grad_norm": 1.8154126405715942, "learning_rate": 4.56e-06, "loss": 1.3956, "step": 228 }, { "epoch": 0.037386229133504756, "grad_norm": 1.8339314460754395, "learning_rate": 4.58e-06, "loss": 1.4306, "step": 229 }, { "epoch": 0.037549487776009145, "grad_norm": 1.834531307220459, "learning_rate": 4.600000000000001e-06, "loss": 1.3822, "step": 230 }, { "epoch": 0.03771274641851353, "grad_norm": 1.6129051446914673, "learning_rate": 4.620000000000001e-06, "loss": 1.4301, "step": 231 }, { "epoch": 0.03787600506101792, "grad_norm": 1.5726324319839478, "learning_rate": 4.6400000000000005e-06, "loss": 1.4197, "step": 232 }, { "epoch": 0.03803926370352231, "grad_norm": 1.6806925535202026, "learning_rate": 4.66e-06, "loss": 1.4155, "step": 233 }, { "epoch": 0.038202522346026696, "grad_norm": 1.9844831228256226, "learning_rate": 4.680000000000001e-06, "loss": 1.5435, "step": 234 }, { "epoch": 0.03836578098853108, "grad_norm": 1.6292520761489868, "learning_rate": 4.7e-06, "loss": 1.3312, "step": 235 }, { "epoch": 0.03852903963103547, "grad_norm": 1.629751443862915, "learning_rate": 4.7200000000000005e-06, "loss": 1.3402, "step": 236 }, { "epoch": 0.03869229827353986, "grad_norm": 1.5564073324203491, "learning_rate": 4.74e-06, "loss": 1.4724, "step": 237 }, { "epoch": 0.03885555691604424, "grad_norm": 1.6269237995147705, "learning_rate": 4.76e-06, "loss": 1.2937, "step": 238 }, { "epoch": 0.03901881555854863, "grad_norm": 1.9189403057098389, "learning_rate": 4.78e-06, "loss": 1.3602, "step": 239 }, { "epoch": 0.03918207420105302, "grad_norm": 1.6265121698379517, "learning_rate": 4.800000000000001e-06, "loss": 1.394, "step": 240 }, { "epoch": 0.03934533284355741, "grad_norm": 1.965876579284668, "learning_rate": 4.8200000000000004e-06, "loss": 1.4015, "step": 241 }, { "epoch": 0.03950859148606179, "grad_norm": 1.7224138975143433, "learning_rate": 4.84e-06, "loss": 1.4144, "step": 242 }, { "epoch": 0.03967185012856618, "grad_norm": 1.6164450645446777, "learning_rate": 4.86e-06, "loss": 1.5031, "step": 243 }, { "epoch": 0.03983510877107057, "grad_norm": 1.755733847618103, "learning_rate": 4.880000000000001e-06, "loss": 1.3916, "step": 244 }, { "epoch": 0.03999836741357496, "grad_norm": 1.854887843132019, "learning_rate": 4.9000000000000005e-06, "loss": 1.5837, "step": 245 }, { "epoch": 0.04016162605607934, "grad_norm": 1.747153878211975, "learning_rate": 4.92e-06, "loss": 1.3597, "step": 246 }, { "epoch": 0.04032488469858373, "grad_norm": 2.0980474948883057, "learning_rate": 4.94e-06, "loss": 1.1637, "step": 247 }, { "epoch": 0.04048814334108812, "grad_norm": 1.9024879932403564, "learning_rate": 4.960000000000001e-06, "loss": 1.3828, "step": 248 }, { "epoch": 0.04065140198359251, "grad_norm": 1.8530932664871216, "learning_rate": 4.980000000000001e-06, "loss": 1.3278, "step": 249 }, { "epoch": 0.04081466062609689, "grad_norm": 1.8176486492156982, "learning_rate": 5e-06, "loss": 1.4399, "step": 250 }, { "epoch": 0.04097791926860128, "grad_norm": 2.028615713119507, "learning_rate": 5.02e-06, "loss": 1.3479, "step": 251 }, { "epoch": 0.04114117791110567, "grad_norm": 1.8009893894195557, "learning_rate": 5.04e-06, "loss": 1.3192, "step": 252 }, { "epoch": 0.041304436553610054, "grad_norm": 1.5895270109176636, "learning_rate": 5.060000000000001e-06, "loss": 1.3178, "step": 253 }, { "epoch": 0.04146769519611444, "grad_norm": 1.7072293758392334, "learning_rate": 5.0800000000000005e-06, "loss": 1.2674, "step": 254 }, { "epoch": 0.04163095383861883, "grad_norm": 1.9572274684906006, "learning_rate": 5.1e-06, "loss": 1.3481, "step": 255 }, { "epoch": 0.04179421248112322, "grad_norm": 2.0055418014526367, "learning_rate": 5.12e-06, "loss": 1.4567, "step": 256 }, { "epoch": 0.041957471123627604, "grad_norm": 1.632832646369934, "learning_rate": 5.140000000000001e-06, "loss": 1.3515, "step": 257 }, { "epoch": 0.042120729766131994, "grad_norm": 1.7226401567459106, "learning_rate": 5.1600000000000006e-06, "loss": 1.2223, "step": 258 }, { "epoch": 0.04228398840863638, "grad_norm": 1.745452642440796, "learning_rate": 5.18e-06, "loss": 1.2768, "step": 259 }, { "epoch": 0.04244724705114077, "grad_norm": 1.5732847452163696, "learning_rate": 5.2e-06, "loss": 1.2221, "step": 260 }, { "epoch": 0.042610505693645155, "grad_norm": 2.1821823120117188, "learning_rate": 5.220000000000001e-06, "loss": 1.2095, "step": 261 }, { "epoch": 0.042773764336149545, "grad_norm": 1.6556347608566284, "learning_rate": 5.240000000000001e-06, "loss": 1.3607, "step": 262 }, { "epoch": 0.042937022978653934, "grad_norm": 2.3543052673339844, "learning_rate": 5.2600000000000005e-06, "loss": 1.3836, "step": 263 }, { "epoch": 0.04310028162115832, "grad_norm": 1.6909420490264893, "learning_rate": 5.28e-06, "loss": 1.244, "step": 264 }, { "epoch": 0.043263540263662706, "grad_norm": 1.8927441835403442, "learning_rate": 5.300000000000001e-06, "loss": 1.4352, "step": 265 }, { "epoch": 0.043426798906167095, "grad_norm": 1.794523000717163, "learning_rate": 5.320000000000001e-06, "loss": 1.3977, "step": 266 }, { "epoch": 0.043590057548671485, "grad_norm": 1.9960391521453857, "learning_rate": 5.3400000000000005e-06, "loss": 1.1623, "step": 267 }, { "epoch": 0.04375331619117587, "grad_norm": 1.8895787000656128, "learning_rate": 5.36e-06, "loss": 1.2235, "step": 268 }, { "epoch": 0.04391657483368026, "grad_norm": 1.9792596101760864, "learning_rate": 5.380000000000001e-06, "loss": 1.4607, "step": 269 }, { "epoch": 0.044079833476184646, "grad_norm": 1.84207022190094, "learning_rate": 5.400000000000001e-06, "loss": 1.3426, "step": 270 }, { "epoch": 0.044243092118689036, "grad_norm": 2.020681619644165, "learning_rate": 5.420000000000001e-06, "loss": 1.4241, "step": 271 }, { "epoch": 0.04440635076119342, "grad_norm": 1.750666856765747, "learning_rate": 5.4400000000000004e-06, "loss": 1.1745, "step": 272 }, { "epoch": 0.04456960940369781, "grad_norm": 1.7200289964675903, "learning_rate": 5.460000000000001e-06, "loss": 1.2417, "step": 273 }, { "epoch": 0.0447328680462022, "grad_norm": 1.8889890909194946, "learning_rate": 5.480000000000001e-06, "loss": 1.5173, "step": 274 }, { "epoch": 0.044896126688706586, "grad_norm": 1.6877647638320923, "learning_rate": 5.500000000000001e-06, "loss": 1.2195, "step": 275 }, { "epoch": 0.04505938533121097, "grad_norm": 1.7130943536758423, "learning_rate": 5.5200000000000005e-06, "loss": 1.306, "step": 276 }, { "epoch": 0.04522264397371536, "grad_norm": 2.269832134246826, "learning_rate": 5.540000000000001e-06, "loss": 1.3022, "step": 277 }, { "epoch": 0.04538590261621975, "grad_norm": 2.1610982418060303, "learning_rate": 5.560000000000001e-06, "loss": 1.3729, "step": 278 }, { "epoch": 0.04554916125872414, "grad_norm": 1.6647307872772217, "learning_rate": 5.580000000000001e-06, "loss": 1.2433, "step": 279 }, { "epoch": 0.04571241990122852, "grad_norm": 1.9671560525894165, "learning_rate": 5.600000000000001e-06, "loss": 1.1031, "step": 280 }, { "epoch": 0.04587567854373291, "grad_norm": 1.8172413110733032, "learning_rate": 5.620000000000001e-06, "loss": 1.0862, "step": 281 }, { "epoch": 0.0460389371862373, "grad_norm": 2.103264331817627, "learning_rate": 5.64e-06, "loss": 1.2269, "step": 282 }, { "epoch": 0.04620219582874168, "grad_norm": 1.8390167951583862, "learning_rate": 5.66e-06, "loss": 1.3608, "step": 283 }, { "epoch": 0.04636545447124607, "grad_norm": 1.8240208625793457, "learning_rate": 5.68e-06, "loss": 1.3546, "step": 284 }, { "epoch": 0.04652871311375046, "grad_norm": 2.0313069820404053, "learning_rate": 5.7e-06, "loss": 1.2245, "step": 285 }, { "epoch": 0.04669197175625485, "grad_norm": 2.0402538776397705, "learning_rate": 5.72e-06, "loss": 1.2089, "step": 286 }, { "epoch": 0.04685523039875923, "grad_norm": 2.128643274307251, "learning_rate": 5.74e-06, "loss": 1.4644, "step": 287 }, { "epoch": 0.04701848904126362, "grad_norm": 2.0624396800994873, "learning_rate": 5.76e-06, "loss": 1.2678, "step": 288 }, { "epoch": 0.04718174768376801, "grad_norm": 1.8395850658416748, "learning_rate": 5.78e-06, "loss": 1.2465, "step": 289 }, { "epoch": 0.0473450063262724, "grad_norm": 2.0606637001037598, "learning_rate": 5.8e-06, "loss": 1.377, "step": 290 }, { "epoch": 0.04750826496877678, "grad_norm": 1.9317939281463623, "learning_rate": 5.82e-06, "loss": 1.2789, "step": 291 }, { "epoch": 0.04767152361128117, "grad_norm": 1.8050525188446045, "learning_rate": 5.84e-06, "loss": 1.3675, "step": 292 }, { "epoch": 0.04783478225378556, "grad_norm": 1.8397572040557861, "learning_rate": 5.86e-06, "loss": 1.2639, "step": 293 }, { "epoch": 0.04799804089628995, "grad_norm": 1.796512484550476, "learning_rate": 5.8800000000000005e-06, "loss": 1.2712, "step": 294 }, { "epoch": 0.04816129953879433, "grad_norm": 1.7678298950195312, "learning_rate": 5.9e-06, "loss": 1.4187, "step": 295 }, { "epoch": 0.04832455818129872, "grad_norm": 2.002376079559326, "learning_rate": 5.92e-06, "loss": 1.3233, "step": 296 }, { "epoch": 0.04848781682380311, "grad_norm": 2.010481595993042, "learning_rate": 5.94e-06, "loss": 1.3472, "step": 297 }, { "epoch": 0.048651075466307495, "grad_norm": 1.9292447566986084, "learning_rate": 5.9600000000000005e-06, "loss": 1.1185, "step": 298 }, { "epoch": 0.048814334108811884, "grad_norm": 1.8887194395065308, "learning_rate": 5.98e-06, "loss": 1.2652, "step": 299 }, { "epoch": 0.04897759275131627, "grad_norm": 2.141409397125244, "learning_rate": 6e-06, "loss": 1.271, "step": 300 }, { "epoch": 0.04914085139382066, "grad_norm": 1.881855845451355, "learning_rate": 6.02e-06, "loss": 1.2796, "step": 301 }, { "epoch": 0.049304110036325045, "grad_norm": 2.150317430496216, "learning_rate": 6.040000000000001e-06, "loss": 1.0951, "step": 302 }, { "epoch": 0.049467368678829435, "grad_norm": 1.9875880479812622, "learning_rate": 6.0600000000000004e-06, "loss": 1.3246, "step": 303 }, { "epoch": 0.049630627321333824, "grad_norm": 2.255772590637207, "learning_rate": 6.08e-06, "loss": 1.4989, "step": 304 }, { "epoch": 0.049793885963838214, "grad_norm": 2.045013904571533, "learning_rate": 6.1e-06, "loss": 1.2866, "step": 305 }, { "epoch": 0.049957144606342596, "grad_norm": 2.1231534481048584, "learning_rate": 6.120000000000001e-06, "loss": 1.5194, "step": 306 }, { "epoch": 0.050120403248846986, "grad_norm": 2.1739463806152344, "learning_rate": 6.1400000000000005e-06, "loss": 1.2079, "step": 307 }, { "epoch": 0.050283661891351375, "grad_norm": 2.1337294578552246, "learning_rate": 6.16e-06, "loss": 1.3547, "step": 308 }, { "epoch": 0.050446920533855764, "grad_norm": 1.978772759437561, "learning_rate": 6.18e-06, "loss": 1.2434, "step": 309 }, { "epoch": 0.05061017917636015, "grad_norm": 2.1697778701782227, "learning_rate": 6.200000000000001e-06, "loss": 1.5144, "step": 310 }, { "epoch": 0.050773437818864536, "grad_norm": 2.114093780517578, "learning_rate": 6.220000000000001e-06, "loss": 1.482, "step": 311 }, { "epoch": 0.050936696461368926, "grad_norm": 1.9082748889923096, "learning_rate": 6.24e-06, "loss": 1.3339, "step": 312 }, { "epoch": 0.05109995510387331, "grad_norm": 1.949682593345642, "learning_rate": 6.26e-06, "loss": 1.145, "step": 313 }, { "epoch": 0.0512632137463777, "grad_norm": 2.2155940532684326, "learning_rate": 6.280000000000001e-06, "loss": 1.2347, "step": 314 }, { "epoch": 0.05142647238888209, "grad_norm": 2.166496992111206, "learning_rate": 6.300000000000001e-06, "loss": 1.503, "step": 315 }, { "epoch": 0.05158973103138648, "grad_norm": 1.980858564376831, "learning_rate": 6.3200000000000005e-06, "loss": 1.1723, "step": 316 }, { "epoch": 0.05175298967389086, "grad_norm": 2.2432358264923096, "learning_rate": 6.34e-06, "loss": 1.2782, "step": 317 }, { "epoch": 0.05191624831639525, "grad_norm": 2.169797658920288, "learning_rate": 6.360000000000001e-06, "loss": 1.3054, "step": 318 }, { "epoch": 0.05207950695889964, "grad_norm": 1.794983983039856, "learning_rate": 6.380000000000001e-06, "loss": 1.3602, "step": 319 }, { "epoch": 0.05224276560140403, "grad_norm": 1.9446194171905518, "learning_rate": 6.4000000000000006e-06, "loss": 1.2564, "step": 320 }, { "epoch": 0.05240602424390841, "grad_norm": 1.8768842220306396, "learning_rate": 6.42e-06, "loss": 1.2697, "step": 321 }, { "epoch": 0.0525692828864128, "grad_norm": 2.0316975116729736, "learning_rate": 6.440000000000001e-06, "loss": 1.3533, "step": 322 }, { "epoch": 0.05273254152891719, "grad_norm": 2.2916362285614014, "learning_rate": 6.460000000000001e-06, "loss": 1.0379, "step": 323 }, { "epoch": 0.05289580017142158, "grad_norm": 2.2609684467315674, "learning_rate": 6.480000000000001e-06, "loss": 1.3794, "step": 324 }, { "epoch": 0.05305905881392596, "grad_norm": 1.8895182609558105, "learning_rate": 6.5000000000000004e-06, "loss": 1.2537, "step": 325 }, { "epoch": 0.05322231745643035, "grad_norm": 2.064990520477295, "learning_rate": 6.520000000000001e-06, "loss": 1.4369, "step": 326 }, { "epoch": 0.05338557609893474, "grad_norm": 2.1281538009643555, "learning_rate": 6.540000000000001e-06, "loss": 1.4265, "step": 327 }, { "epoch": 0.05354883474143912, "grad_norm": 1.9302514791488647, "learning_rate": 6.560000000000001e-06, "loss": 1.1931, "step": 328 }, { "epoch": 0.05371209338394351, "grad_norm": 2.119605541229248, "learning_rate": 6.5800000000000005e-06, "loss": 1.1733, "step": 329 }, { "epoch": 0.0538753520264479, "grad_norm": 1.921176791191101, "learning_rate": 6.600000000000001e-06, "loss": 1.2275, "step": 330 }, { "epoch": 0.05403861066895229, "grad_norm": 1.9931433200836182, "learning_rate": 6.620000000000001e-06, "loss": 1.3588, "step": 331 }, { "epoch": 0.05420186931145667, "grad_norm": 2.1673290729522705, "learning_rate": 6.640000000000001e-06, "loss": 1.1609, "step": 332 }, { "epoch": 0.05436512795396106, "grad_norm": 1.8807066679000854, "learning_rate": 6.660000000000001e-06, "loss": 1.4022, "step": 333 }, { "epoch": 0.05452838659646545, "grad_norm": 2.072364568710327, "learning_rate": 6.680000000000001e-06, "loss": 1.2992, "step": 334 }, { "epoch": 0.05469164523896984, "grad_norm": 2.0293211936950684, "learning_rate": 6.700000000000001e-06, "loss": 1.3371, "step": 335 }, { "epoch": 0.054854903881474223, "grad_norm": 1.8143714666366577, "learning_rate": 6.720000000000001e-06, "loss": 1.2919, "step": 336 }, { "epoch": 0.05501816252397861, "grad_norm": 1.8407166004180908, "learning_rate": 6.740000000000001e-06, "loss": 1.2765, "step": 337 }, { "epoch": 0.055181421166483, "grad_norm": 2.0948259830474854, "learning_rate": 6.760000000000001e-06, "loss": 1.2772, "step": 338 }, { "epoch": 0.055344679808987385, "grad_norm": 1.8377389907836914, "learning_rate": 6.780000000000001e-06, "loss": 1.1669, "step": 339 }, { "epoch": 0.055507938451491774, "grad_norm": 1.992203950881958, "learning_rate": 6.800000000000001e-06, "loss": 1.2527, "step": 340 }, { "epoch": 0.055671197093996164, "grad_norm": 1.9831184148788452, "learning_rate": 6.820000000000001e-06, "loss": 1.0334, "step": 341 }, { "epoch": 0.05583445573650055, "grad_norm": 1.8389374017715454, "learning_rate": 6.8400000000000014e-06, "loss": 1.2587, "step": 342 }, { "epoch": 0.055997714379004936, "grad_norm": 2.089749574661255, "learning_rate": 6.860000000000001e-06, "loss": 1.4855, "step": 343 }, { "epoch": 0.056160973021509325, "grad_norm": 1.974974274635315, "learning_rate": 6.88e-06, "loss": 1.3715, "step": 344 }, { "epoch": 0.056324231664013714, "grad_norm": 1.825980544090271, "learning_rate": 6.9e-06, "loss": 1.2328, "step": 345 }, { "epoch": 0.056487490306518104, "grad_norm": 1.8808982372283936, "learning_rate": 6.92e-06, "loss": 1.3186, "step": 346 }, { "epoch": 0.056650748949022486, "grad_norm": 2.0029497146606445, "learning_rate": 6.9400000000000005e-06, "loss": 1.3116, "step": 347 }, { "epoch": 0.056814007591526876, "grad_norm": 2.1018147468566895, "learning_rate": 6.96e-06, "loss": 1.1871, "step": 348 }, { "epoch": 0.056977266234031265, "grad_norm": 1.9819332361221313, "learning_rate": 6.98e-06, "loss": 1.0122, "step": 349 }, { "epoch": 0.057140524876535655, "grad_norm": 2.356565475463867, "learning_rate": 7e-06, "loss": 1.2615, "step": 350 }, { "epoch": 0.05730378351904004, "grad_norm": 1.7581766843795776, "learning_rate": 7.0200000000000006e-06, "loss": 1.1528, "step": 351 }, { "epoch": 0.05746704216154443, "grad_norm": 1.815470576286316, "learning_rate": 7.04e-06, "loss": 1.1911, "step": 352 }, { "epoch": 0.057630300804048816, "grad_norm": 2.0752034187316895, "learning_rate": 7.06e-06, "loss": 1.1539, "step": 353 }, { "epoch": 0.0577935594465532, "grad_norm": 1.9206302165985107, "learning_rate": 7.08e-06, "loss": 1.2542, "step": 354 }, { "epoch": 0.05795681808905759, "grad_norm": 1.9105370044708252, "learning_rate": 7.100000000000001e-06, "loss": 1.1524, "step": 355 }, { "epoch": 0.05812007673156198, "grad_norm": 2.051164388656616, "learning_rate": 7.1200000000000004e-06, "loss": 1.4537, "step": 356 }, { "epoch": 0.05828333537406637, "grad_norm": 1.989436388015747, "learning_rate": 7.14e-06, "loss": 1.114, "step": 357 }, { "epoch": 0.05844659401657075, "grad_norm": 1.846563696861267, "learning_rate": 7.16e-06, "loss": 1.1085, "step": 358 }, { "epoch": 0.05860985265907514, "grad_norm": 2.179766893386841, "learning_rate": 7.180000000000001e-06, "loss": 1.1198, "step": 359 }, { "epoch": 0.05877311130157953, "grad_norm": 1.6388657093048096, "learning_rate": 7.2000000000000005e-06, "loss": 1.0753, "step": 360 }, { "epoch": 0.05893636994408392, "grad_norm": 1.9925389289855957, "learning_rate": 7.22e-06, "loss": 1.3266, "step": 361 }, { "epoch": 0.0590996285865883, "grad_norm": 1.8099873065948486, "learning_rate": 7.24e-06, "loss": 1.2354, "step": 362 }, { "epoch": 0.05926288722909269, "grad_norm": 2.0619328022003174, "learning_rate": 7.260000000000001e-06, "loss": 1.3734, "step": 363 }, { "epoch": 0.05942614587159708, "grad_norm": 1.8196241855621338, "learning_rate": 7.280000000000001e-06, "loss": 1.1274, "step": 364 }, { "epoch": 0.05958940451410147, "grad_norm": 1.8758442401885986, "learning_rate": 7.3e-06, "loss": 1.1189, "step": 365 }, { "epoch": 0.05975266315660585, "grad_norm": 1.8072453737258911, "learning_rate": 7.32e-06, "loss": 1.2812, "step": 366 }, { "epoch": 0.05991592179911024, "grad_norm": 1.901843786239624, "learning_rate": 7.340000000000001e-06, "loss": 1.2839, "step": 367 }, { "epoch": 0.06007918044161463, "grad_norm": 2.108508825302124, "learning_rate": 7.360000000000001e-06, "loss": 1.2649, "step": 368 }, { "epoch": 0.06024243908411901, "grad_norm": 2.0627455711364746, "learning_rate": 7.3800000000000005e-06, "loss": 1.3192, "step": 369 }, { "epoch": 0.0604056977266234, "grad_norm": 2.1426141262054443, "learning_rate": 7.4e-06, "loss": 1.3641, "step": 370 }, { "epoch": 0.06056895636912779, "grad_norm": 1.8952516317367554, "learning_rate": 7.420000000000001e-06, "loss": 1.1985, "step": 371 }, { "epoch": 0.06073221501163218, "grad_norm": 1.6583818197250366, "learning_rate": 7.440000000000001e-06, "loss": 1.0724, "step": 372 }, { "epoch": 0.06089547365413656, "grad_norm": 2.5599281787872314, "learning_rate": 7.4600000000000006e-06, "loss": 1.2023, "step": 373 }, { "epoch": 0.06105873229664095, "grad_norm": 2.0398690700531006, "learning_rate": 7.48e-06, "loss": 1.3233, "step": 374 }, { "epoch": 0.06122199093914534, "grad_norm": 2.041804313659668, "learning_rate": 7.500000000000001e-06, "loss": 1.2496, "step": 375 }, { "epoch": 0.06138524958164973, "grad_norm": 1.9788709878921509, "learning_rate": 7.520000000000001e-06, "loss": 1.4004, "step": 376 }, { "epoch": 0.061548508224154114, "grad_norm": 2.121373414993286, "learning_rate": 7.540000000000001e-06, "loss": 1.2866, "step": 377 }, { "epoch": 0.0617117668666585, "grad_norm": 2.0985896587371826, "learning_rate": 7.5600000000000005e-06, "loss": 1.2359, "step": 378 }, { "epoch": 0.06187502550916289, "grad_norm": 2.0373647212982178, "learning_rate": 7.58e-06, "loss": 1.1656, "step": 379 }, { "epoch": 0.06203828415166728, "grad_norm": 1.9046670198440552, "learning_rate": 7.600000000000001e-06, "loss": 1.246, "step": 380 }, { "epoch": 0.062201542794171664, "grad_norm": 1.9420348405838013, "learning_rate": 7.620000000000001e-06, "loss": 1.1311, "step": 381 }, { "epoch": 0.062364801436676054, "grad_norm": 2.326917886734009, "learning_rate": 7.640000000000001e-06, "loss": 1.2461, "step": 382 }, { "epoch": 0.06252806007918044, "grad_norm": 1.8557238578796387, "learning_rate": 7.660000000000001e-06, "loss": 1.0143, "step": 383 }, { "epoch": 0.06269131872168483, "grad_norm": 1.9980357885360718, "learning_rate": 7.680000000000001e-06, "loss": 1.1776, "step": 384 }, { "epoch": 0.06285457736418922, "grad_norm": 1.8375717401504517, "learning_rate": 7.7e-06, "loss": 1.2098, "step": 385 }, { "epoch": 0.0630178360066936, "grad_norm": 2.0467495918273926, "learning_rate": 7.72e-06, "loss": 1.3015, "step": 386 }, { "epoch": 0.063181094649198, "grad_norm": 1.9435759782791138, "learning_rate": 7.74e-06, "loss": 1.2486, "step": 387 }, { "epoch": 0.06334435329170238, "grad_norm": 1.9634602069854736, "learning_rate": 7.76e-06, "loss": 1.1066, "step": 388 }, { "epoch": 0.06350761193420677, "grad_norm": 1.8496242761611938, "learning_rate": 7.78e-06, "loss": 1.2167, "step": 389 }, { "epoch": 0.06367087057671116, "grad_norm": 2.064605951309204, "learning_rate": 7.800000000000002e-06, "loss": 1.1604, "step": 390 }, { "epoch": 0.06383412921921554, "grad_norm": 1.8987208604812622, "learning_rate": 7.820000000000001e-06, "loss": 1.3364, "step": 391 }, { "epoch": 0.06399738786171993, "grad_norm": 2.062685012817383, "learning_rate": 7.840000000000001e-06, "loss": 1.1012, "step": 392 }, { "epoch": 0.06416064650422432, "grad_norm": 2.044191598892212, "learning_rate": 7.860000000000001e-06, "loss": 1.2192, "step": 393 }, { "epoch": 0.0643239051467287, "grad_norm": 1.8554444313049316, "learning_rate": 7.88e-06, "loss": 1.1752, "step": 394 }, { "epoch": 0.0644871637892331, "grad_norm": 2.195918321609497, "learning_rate": 7.9e-06, "loss": 1.4694, "step": 395 }, { "epoch": 0.06465042243173748, "grad_norm": 2.0799505710601807, "learning_rate": 7.92e-06, "loss": 1.3756, "step": 396 }, { "epoch": 0.06481368107424187, "grad_norm": 1.980363368988037, "learning_rate": 7.94e-06, "loss": 1.4176, "step": 397 }, { "epoch": 0.06497693971674626, "grad_norm": 2.225811719894409, "learning_rate": 7.960000000000002e-06, "loss": 1.1459, "step": 398 }, { "epoch": 0.06514019835925064, "grad_norm": 2.4319510459899902, "learning_rate": 7.980000000000002e-06, "loss": 1.0911, "step": 399 }, { "epoch": 0.06530345700175504, "grad_norm": 2.096545457839966, "learning_rate": 8.000000000000001e-06, "loss": 1.3741, "step": 400 }, { "epoch": 0.06546671564425942, "grad_norm": 2.0194480419158936, "learning_rate": 8.020000000000001e-06, "loss": 1.1103, "step": 401 }, { "epoch": 0.0656299742867638, "grad_norm": 2.086824417114258, "learning_rate": 8.040000000000001e-06, "loss": 1.1551, "step": 402 }, { "epoch": 0.0657932329292682, "grad_norm": 2.0286951065063477, "learning_rate": 8.06e-06, "loss": 1.2531, "step": 403 }, { "epoch": 0.06595649157177258, "grad_norm": 1.861185073852539, "learning_rate": 8.08e-06, "loss": 1.0743, "step": 404 }, { "epoch": 0.06611975021427696, "grad_norm": 1.8385728597640991, "learning_rate": 8.1e-06, "loss": 1.2443, "step": 405 }, { "epoch": 0.06628300885678136, "grad_norm": 2.05220365524292, "learning_rate": 8.120000000000002e-06, "loss": 1.1598, "step": 406 }, { "epoch": 0.06644626749928574, "grad_norm": 2.015984535217285, "learning_rate": 8.14e-06, "loss": 1.1157, "step": 407 }, { "epoch": 0.06660952614179014, "grad_norm": 1.9400795698165894, "learning_rate": 8.16e-06, "loss": 1.2048, "step": 408 }, { "epoch": 0.06677278478429452, "grad_norm": 1.810413122177124, "learning_rate": 8.18e-06, "loss": 1.1264, "step": 409 }, { "epoch": 0.0669360434267989, "grad_norm": 1.7337501049041748, "learning_rate": 8.2e-06, "loss": 1.065, "step": 410 }, { "epoch": 0.0670993020693033, "grad_norm": 1.6440634727478027, "learning_rate": 8.220000000000001e-06, "loss": 1.1687, "step": 411 }, { "epoch": 0.06726256071180768, "grad_norm": 1.9802428483963013, "learning_rate": 8.24e-06, "loss": 1.1269, "step": 412 }, { "epoch": 0.06742581935431206, "grad_norm": 1.740654468536377, "learning_rate": 8.26e-06, "loss": 0.9955, "step": 413 }, { "epoch": 0.06758907799681646, "grad_norm": 1.9390037059783936, "learning_rate": 8.28e-06, "loss": 1.2966, "step": 414 }, { "epoch": 0.06775233663932084, "grad_norm": 1.8586387634277344, "learning_rate": 8.3e-06, "loss": 1.0892, "step": 415 }, { "epoch": 0.06791559528182523, "grad_norm": 1.924579381942749, "learning_rate": 8.32e-06, "loss": 1.1705, "step": 416 }, { "epoch": 0.06807885392432962, "grad_norm": 2.036813974380493, "learning_rate": 8.34e-06, "loss": 1.2578, "step": 417 }, { "epoch": 0.068242112566834, "grad_norm": 2.1908116340637207, "learning_rate": 8.36e-06, "loss": 1.0804, "step": 418 }, { "epoch": 0.0684053712093384, "grad_norm": 2.3113887310028076, "learning_rate": 8.380000000000001e-06, "loss": 1.1817, "step": 419 }, { "epoch": 0.06856862985184278, "grad_norm": 2.203634738922119, "learning_rate": 8.400000000000001e-06, "loss": 1.2, "step": 420 }, { "epoch": 0.06873188849434717, "grad_norm": 2.1560781002044678, "learning_rate": 8.42e-06, "loss": 1.1064, "step": 421 }, { "epoch": 0.06889514713685156, "grad_norm": 1.9895814657211304, "learning_rate": 8.44e-06, "loss": 1.2488, "step": 422 }, { "epoch": 0.06905840577935594, "grad_norm": 2.2973825931549072, "learning_rate": 8.46e-06, "loss": 1.2532, "step": 423 }, { "epoch": 0.06922166442186033, "grad_norm": 1.9881922006607056, "learning_rate": 8.48e-06, "loss": 1.1107, "step": 424 }, { "epoch": 0.06938492306436472, "grad_norm": 1.9441295862197876, "learning_rate": 8.5e-06, "loss": 1.2486, "step": 425 }, { "epoch": 0.0695481817068691, "grad_norm": 1.9968016147613525, "learning_rate": 8.52e-06, "loss": 1.2483, "step": 426 }, { "epoch": 0.0697114403493735, "grad_norm": 1.8959996700286865, "learning_rate": 8.540000000000001e-06, "loss": 1.0312, "step": 427 }, { "epoch": 0.06987469899187788, "grad_norm": 1.7172850370407104, "learning_rate": 8.560000000000001e-06, "loss": 0.9756, "step": 428 }, { "epoch": 0.07003795763438227, "grad_norm": 3.1167006492614746, "learning_rate": 8.580000000000001e-06, "loss": 1.1734, "step": 429 }, { "epoch": 0.07020121627688666, "grad_norm": 2.0712010860443115, "learning_rate": 8.6e-06, "loss": 1.2294, "step": 430 }, { "epoch": 0.07036447491939105, "grad_norm": 1.8392446041107178, "learning_rate": 8.62e-06, "loss": 1.2863, "step": 431 }, { "epoch": 0.07052773356189543, "grad_norm": 1.9474895000457764, "learning_rate": 8.64e-06, "loss": 1.247, "step": 432 }, { "epoch": 0.07069099220439982, "grad_norm": 2.0882370471954346, "learning_rate": 8.66e-06, "loss": 1.2153, "step": 433 }, { "epoch": 0.07085425084690421, "grad_norm": 1.9889891147613525, "learning_rate": 8.68e-06, "loss": 1.3358, "step": 434 }, { "epoch": 0.07101750948940859, "grad_norm": 2.1056065559387207, "learning_rate": 8.700000000000001e-06, "loss": 1.3148, "step": 435 }, { "epoch": 0.07118076813191299, "grad_norm": 1.9358744621276855, "learning_rate": 8.720000000000001e-06, "loss": 1.1772, "step": 436 }, { "epoch": 0.07134402677441737, "grad_norm": 1.9889074563980103, "learning_rate": 8.740000000000001e-06, "loss": 1.1282, "step": 437 }, { "epoch": 0.07150728541692176, "grad_norm": 1.7672903537750244, "learning_rate": 8.76e-06, "loss": 0.9575, "step": 438 }, { "epoch": 0.07167054405942615, "grad_norm": 2.0771145820617676, "learning_rate": 8.78e-06, "loss": 1.2015, "step": 439 }, { "epoch": 0.07183380270193053, "grad_norm": 1.8405672311782837, "learning_rate": 8.8e-06, "loss": 1.0683, "step": 440 }, { "epoch": 0.07199706134443493, "grad_norm": 1.8821085691452026, "learning_rate": 8.82e-06, "loss": 1.1916, "step": 441 }, { "epoch": 0.07216031998693931, "grad_norm": 2.5297975540161133, "learning_rate": 8.84e-06, "loss": 1.2339, "step": 442 }, { "epoch": 0.07232357862944369, "grad_norm": 2.138904333114624, "learning_rate": 8.860000000000002e-06, "loss": 1.1932, "step": 443 }, { "epoch": 0.07248683727194809, "grad_norm": 2.216179132461548, "learning_rate": 8.880000000000001e-06, "loss": 1.0888, "step": 444 }, { "epoch": 0.07265009591445247, "grad_norm": 1.9238756895065308, "learning_rate": 8.900000000000001e-06, "loss": 1.0363, "step": 445 }, { "epoch": 0.07281335455695685, "grad_norm": 2.006751298904419, "learning_rate": 8.920000000000001e-06, "loss": 1.1825, "step": 446 }, { "epoch": 0.07297661319946125, "grad_norm": 2.1362032890319824, "learning_rate": 8.94e-06, "loss": 1.285, "step": 447 }, { "epoch": 0.07313987184196563, "grad_norm": 2.2246041297912598, "learning_rate": 8.96e-06, "loss": 1.1811, "step": 448 }, { "epoch": 0.07330313048447003, "grad_norm": 2.350745677947998, "learning_rate": 8.98e-06, "loss": 1.1997, "step": 449 }, { "epoch": 0.07346638912697441, "grad_norm": 1.8694416284561157, "learning_rate": 9e-06, "loss": 1.1203, "step": 450 }, { "epoch": 0.07362964776947879, "grad_norm": 2.395637035369873, "learning_rate": 9.020000000000002e-06, "loss": 1.2534, "step": 451 }, { "epoch": 0.07379290641198319, "grad_norm": 2.384709119796753, "learning_rate": 9.040000000000002e-06, "loss": 1.2784, "step": 452 }, { "epoch": 0.07395616505448757, "grad_norm": 1.963156819343567, "learning_rate": 9.060000000000001e-06, "loss": 1.0283, "step": 453 }, { "epoch": 0.07411942369699195, "grad_norm": 2.0077297687530518, "learning_rate": 9.080000000000001e-06, "loss": 1.0107, "step": 454 }, { "epoch": 0.07428268233949635, "grad_norm": 2.4147799015045166, "learning_rate": 9.100000000000001e-06, "loss": 1.1989, "step": 455 }, { "epoch": 0.07444594098200073, "grad_norm": 2.231031894683838, "learning_rate": 9.12e-06, "loss": 1.1359, "step": 456 }, { "epoch": 0.07460919962450512, "grad_norm": 2.117201089859009, "learning_rate": 9.14e-06, "loss": 1.1769, "step": 457 }, { "epoch": 0.07477245826700951, "grad_norm": 2.1280832290649414, "learning_rate": 9.16e-06, "loss": 1.2116, "step": 458 }, { "epoch": 0.0749357169095139, "grad_norm": 2.199897289276123, "learning_rate": 9.180000000000002e-06, "loss": 1.1493, "step": 459 }, { "epoch": 0.07509897555201829, "grad_norm": 2.391735315322876, "learning_rate": 9.200000000000002e-06, "loss": 1.2247, "step": 460 }, { "epoch": 0.07526223419452267, "grad_norm": 2.252683401107788, "learning_rate": 9.220000000000002e-06, "loss": 1.1125, "step": 461 }, { "epoch": 0.07542549283702706, "grad_norm": 1.8622978925704956, "learning_rate": 9.240000000000001e-06, "loss": 1.2454, "step": 462 }, { "epoch": 0.07558875147953145, "grad_norm": 2.054431915283203, "learning_rate": 9.260000000000001e-06, "loss": 1.1081, "step": 463 }, { "epoch": 0.07575201012203583, "grad_norm": 1.9579373598098755, "learning_rate": 9.280000000000001e-06, "loss": 1.1217, "step": 464 }, { "epoch": 0.07591526876454022, "grad_norm": 1.9501029253005981, "learning_rate": 9.3e-06, "loss": 1.0647, "step": 465 }, { "epoch": 0.07607852740704461, "grad_norm": 1.9215545654296875, "learning_rate": 9.32e-06, "loss": 1.072, "step": 466 }, { "epoch": 0.076241786049549, "grad_norm": 2.225527763366699, "learning_rate": 9.340000000000002e-06, "loss": 1.2011, "step": 467 }, { "epoch": 0.07640504469205339, "grad_norm": 2.4547829627990723, "learning_rate": 9.360000000000002e-06, "loss": 1.2743, "step": 468 }, { "epoch": 0.07656830333455777, "grad_norm": 2.123399496078491, "learning_rate": 9.38e-06, "loss": 1.1489, "step": 469 }, { "epoch": 0.07673156197706216, "grad_norm": 2.327552556991577, "learning_rate": 9.4e-06, "loss": 1.1451, "step": 470 }, { "epoch": 0.07689482061956655, "grad_norm": 2.096482038497925, "learning_rate": 9.42e-06, "loss": 1.1311, "step": 471 }, { "epoch": 0.07705807926207094, "grad_norm": 2.1612589359283447, "learning_rate": 9.440000000000001e-06, "loss": 1.1717, "step": 472 }, { "epoch": 0.07722133790457532, "grad_norm": 2.1531004905700684, "learning_rate": 9.460000000000001e-06, "loss": 1.3949, "step": 473 }, { "epoch": 0.07738459654707971, "grad_norm": 2.128967761993408, "learning_rate": 9.48e-06, "loss": 1.4293, "step": 474 }, { "epoch": 0.0775478551895841, "grad_norm": 2.3594679832458496, "learning_rate": 9.5e-06, "loss": 1.1379, "step": 475 }, { "epoch": 0.07771111383208848, "grad_norm": 2.103909969329834, "learning_rate": 9.52e-06, "loss": 1.109, "step": 476 }, { "epoch": 0.07787437247459288, "grad_norm": 2.041992425918579, "learning_rate": 9.54e-06, "loss": 1.1604, "step": 477 }, { "epoch": 0.07803763111709726, "grad_norm": 2.0942649841308594, "learning_rate": 9.56e-06, "loss": 1.2305, "step": 478 }, { "epoch": 0.07820088975960166, "grad_norm": 2.137141704559326, "learning_rate": 9.58e-06, "loss": 1.1487, "step": 479 }, { "epoch": 0.07836414840210604, "grad_norm": 2.5260872840881348, "learning_rate": 9.600000000000001e-06, "loss": 1.0727, "step": 480 }, { "epoch": 0.07852740704461042, "grad_norm": 1.8872711658477783, "learning_rate": 9.620000000000001e-06, "loss": 1.2242, "step": 481 }, { "epoch": 0.07869066568711482, "grad_norm": 2.1657471656799316, "learning_rate": 9.640000000000001e-06, "loss": 1.1084, "step": 482 }, { "epoch": 0.0788539243296192, "grad_norm": 2.072225332260132, "learning_rate": 9.66e-06, "loss": 1.1961, "step": 483 }, { "epoch": 0.07901718297212358, "grad_norm": 2.4796104431152344, "learning_rate": 9.68e-06, "loss": 1.2055, "step": 484 }, { "epoch": 0.07918044161462798, "grad_norm": 2.453747510910034, "learning_rate": 9.7e-06, "loss": 1.1408, "step": 485 }, { "epoch": 0.07934370025713236, "grad_norm": 2.501016139984131, "learning_rate": 9.72e-06, "loss": 1.116, "step": 486 }, { "epoch": 0.07950695889963674, "grad_norm": 2.1103854179382324, "learning_rate": 9.74e-06, "loss": 1.1007, "step": 487 }, { "epoch": 0.07967021754214114, "grad_norm": 1.7755740880966187, "learning_rate": 9.760000000000001e-06, "loss": 1.0018, "step": 488 }, { "epoch": 0.07983347618464552, "grad_norm": 2.236894369125366, "learning_rate": 9.780000000000001e-06, "loss": 1.2326, "step": 489 }, { "epoch": 0.07999673482714992, "grad_norm": 1.8976962566375732, "learning_rate": 9.800000000000001e-06, "loss": 1.1171, "step": 490 }, { "epoch": 0.0801599934696543, "grad_norm": 1.9749926328659058, "learning_rate": 9.820000000000001e-06, "loss": 1.0314, "step": 491 }, { "epoch": 0.08032325211215868, "grad_norm": 2.452061176300049, "learning_rate": 9.84e-06, "loss": 1.3033, "step": 492 }, { "epoch": 0.08048651075466308, "grad_norm": 1.8078688383102417, "learning_rate": 9.86e-06, "loss": 1.0124, "step": 493 }, { "epoch": 0.08064976939716746, "grad_norm": 2.379065990447998, "learning_rate": 9.88e-06, "loss": 0.9952, "step": 494 }, { "epoch": 0.08081302803967184, "grad_norm": 2.2535061836242676, "learning_rate": 9.9e-06, "loss": 0.9827, "step": 495 }, { "epoch": 0.08097628668217624, "grad_norm": 1.9587634801864624, "learning_rate": 9.920000000000002e-06, "loss": 1.0987, "step": 496 }, { "epoch": 0.08113954532468062, "grad_norm": 2.154000997543335, "learning_rate": 9.940000000000001e-06, "loss": 1.1826, "step": 497 }, { "epoch": 0.08130280396718502, "grad_norm": 2.02744197845459, "learning_rate": 9.960000000000001e-06, "loss": 1.26, "step": 498 }, { "epoch": 0.0814660626096894, "grad_norm": 2.0693888664245605, "learning_rate": 9.980000000000001e-06, "loss": 1.143, "step": 499 }, { "epoch": 0.08162932125219378, "grad_norm": 2.0301690101623535, "learning_rate": 1e-05, "loss": 1.2021, "step": 500 }, { "epoch": 0.08179257989469818, "grad_norm": 2.30107045173645, "learning_rate": 1.002e-05, "loss": 1.2953, "step": 501 }, { "epoch": 0.08195583853720256, "grad_norm": 1.9231711626052856, "learning_rate": 1.004e-05, "loss": 1.1801, "step": 502 }, { "epoch": 0.08211909717970695, "grad_norm": 2.354249954223633, "learning_rate": 1.006e-05, "loss": 1.117, "step": 503 }, { "epoch": 0.08228235582221134, "grad_norm": 2.048701524734497, "learning_rate": 1.008e-05, "loss": 1.2776, "step": 504 }, { "epoch": 0.08244561446471572, "grad_norm": 1.9473559856414795, "learning_rate": 1.0100000000000002e-05, "loss": 1.0401, "step": 505 }, { "epoch": 0.08260887310722011, "grad_norm": 1.8621548414230347, "learning_rate": 1.0120000000000001e-05, "loss": 0.9931, "step": 506 }, { "epoch": 0.0827721317497245, "grad_norm": 2.0183658599853516, "learning_rate": 1.0140000000000001e-05, "loss": 1.0373, "step": 507 }, { "epoch": 0.08293539039222889, "grad_norm": 2.3650753498077393, "learning_rate": 1.0160000000000001e-05, "loss": 1.2169, "step": 508 }, { "epoch": 0.08309864903473328, "grad_norm": 2.2234432697296143, "learning_rate": 1.018e-05, "loss": 1.107, "step": 509 }, { "epoch": 0.08326190767723766, "grad_norm": 2.1484463214874268, "learning_rate": 1.02e-05, "loss": 1.0828, "step": 510 }, { "epoch": 0.08342516631974205, "grad_norm": 2.2211270332336426, "learning_rate": 1.022e-05, "loss": 1.1425, "step": 511 }, { "epoch": 0.08358842496224644, "grad_norm": 2.0632476806640625, "learning_rate": 1.024e-05, "loss": 1.0173, "step": 512 }, { "epoch": 0.08375168360475083, "grad_norm": 2.106527328491211, "learning_rate": 1.0260000000000002e-05, "loss": 1.1155, "step": 513 }, { "epoch": 0.08391494224725521, "grad_norm": 2.3124008178710938, "learning_rate": 1.0280000000000002e-05, "loss": 1.343, "step": 514 }, { "epoch": 0.0840782008897596, "grad_norm": 2.581451416015625, "learning_rate": 1.0300000000000001e-05, "loss": 1.3468, "step": 515 }, { "epoch": 0.08424145953226399, "grad_norm": 2.043722629547119, "learning_rate": 1.0320000000000001e-05, "loss": 1.0537, "step": 516 }, { "epoch": 0.08440471817476837, "grad_norm": 2.065143585205078, "learning_rate": 1.0340000000000001e-05, "loss": 1.251, "step": 517 }, { "epoch": 0.08456797681727277, "grad_norm": 2.0450820922851562, "learning_rate": 1.036e-05, "loss": 1.2028, "step": 518 }, { "epoch": 0.08473123545977715, "grad_norm": 2.0890326499938965, "learning_rate": 1.038e-05, "loss": 1.1759, "step": 519 }, { "epoch": 0.08489449410228155, "grad_norm": 2.080613613128662, "learning_rate": 1.04e-05, "loss": 1.0664, "step": 520 }, { "epoch": 0.08505775274478593, "grad_norm": 2.1385669708251953, "learning_rate": 1.0420000000000002e-05, "loss": 1.164, "step": 521 }, { "epoch": 0.08522101138729031, "grad_norm": 2.360839605331421, "learning_rate": 1.0440000000000002e-05, "loss": 1.444, "step": 522 }, { "epoch": 0.0853842700297947, "grad_norm": 2.0543017387390137, "learning_rate": 1.0460000000000001e-05, "loss": 1.2037, "step": 523 }, { "epoch": 0.08554752867229909, "grad_norm": 2.4931371212005615, "learning_rate": 1.0480000000000001e-05, "loss": 1.1363, "step": 524 }, { "epoch": 0.08571078731480347, "grad_norm": 2.6861774921417236, "learning_rate": 1.0500000000000001e-05, "loss": 1.3656, "step": 525 }, { "epoch": 0.08587404595730787, "grad_norm": 1.9597691297531128, "learning_rate": 1.0520000000000001e-05, "loss": 1.0924, "step": 526 }, { "epoch": 0.08603730459981225, "grad_norm": 2.2866878509521484, "learning_rate": 1.054e-05, "loss": 1.071, "step": 527 }, { "epoch": 0.08620056324231665, "grad_norm": 2.089154005050659, "learning_rate": 1.056e-05, "loss": 0.9921, "step": 528 }, { "epoch": 0.08636382188482103, "grad_norm": 2.1897799968719482, "learning_rate": 1.0580000000000002e-05, "loss": 1.1252, "step": 529 }, { "epoch": 0.08652708052732541, "grad_norm": 2.064509630203247, "learning_rate": 1.0600000000000002e-05, "loss": 1.1517, "step": 530 }, { "epoch": 0.08669033916982981, "grad_norm": 2.5933384895324707, "learning_rate": 1.0620000000000002e-05, "loss": 1.2152, "step": 531 }, { "epoch": 0.08685359781233419, "grad_norm": 2.1191277503967285, "learning_rate": 1.0640000000000001e-05, "loss": 1.179, "step": 532 }, { "epoch": 0.08701685645483857, "grad_norm": 2.110588312149048, "learning_rate": 1.0660000000000001e-05, "loss": 1.1441, "step": 533 }, { "epoch": 0.08718011509734297, "grad_norm": 2.329209566116333, "learning_rate": 1.0680000000000001e-05, "loss": 0.9713, "step": 534 }, { "epoch": 0.08734337373984735, "grad_norm": 2.2991254329681396, "learning_rate": 1.0700000000000001e-05, "loss": 1.2401, "step": 535 }, { "epoch": 0.08750663238235173, "grad_norm": 2.3157129287719727, "learning_rate": 1.072e-05, "loss": 1.2062, "step": 536 }, { "epoch": 0.08766989102485613, "grad_norm": 2.2499959468841553, "learning_rate": 1.0740000000000002e-05, "loss": 1.1536, "step": 537 }, { "epoch": 0.08783314966736051, "grad_norm": 2.0809717178344727, "learning_rate": 1.0760000000000002e-05, "loss": 1.1889, "step": 538 }, { "epoch": 0.08799640830986491, "grad_norm": 1.8503974676132202, "learning_rate": 1.0780000000000002e-05, "loss": 1.1144, "step": 539 }, { "epoch": 0.08815966695236929, "grad_norm": 2.1475627422332764, "learning_rate": 1.0800000000000002e-05, "loss": 1.351, "step": 540 }, { "epoch": 0.08832292559487367, "grad_norm": 2.1597707271575928, "learning_rate": 1.0820000000000001e-05, "loss": 1.2216, "step": 541 }, { "epoch": 0.08848618423737807, "grad_norm": 2.3894410133361816, "learning_rate": 1.0840000000000001e-05, "loss": 1.0663, "step": 542 }, { "epoch": 0.08864944287988245, "grad_norm": 2.2700750827789307, "learning_rate": 1.0860000000000001e-05, "loss": 1.1551, "step": 543 }, { "epoch": 0.08881270152238684, "grad_norm": 2.050352096557617, "learning_rate": 1.0880000000000001e-05, "loss": 1.2078, "step": 544 }, { "epoch": 0.08897596016489123, "grad_norm": 2.765122652053833, "learning_rate": 1.0900000000000002e-05, "loss": 1.2681, "step": 545 }, { "epoch": 0.08913921880739561, "grad_norm": 1.9609044790267944, "learning_rate": 1.0920000000000002e-05, "loss": 1.0512, "step": 546 }, { "epoch": 0.0893024774499, "grad_norm": 2.1948235034942627, "learning_rate": 1.0940000000000002e-05, "loss": 1.0793, "step": 547 }, { "epoch": 0.0894657360924044, "grad_norm": 2.0248279571533203, "learning_rate": 1.0960000000000002e-05, "loss": 1.2348, "step": 548 }, { "epoch": 0.08962899473490878, "grad_norm": 1.8916188478469849, "learning_rate": 1.0980000000000002e-05, "loss": 1.0623, "step": 549 }, { "epoch": 0.08979225337741317, "grad_norm": 2.194124221801758, "learning_rate": 1.1000000000000001e-05, "loss": 1.25, "step": 550 }, { "epoch": 0.08995551201991756, "grad_norm": 2.102618455886841, "learning_rate": 1.1020000000000001e-05, "loss": 1.2631, "step": 551 }, { "epoch": 0.09011877066242194, "grad_norm": 2.1111202239990234, "learning_rate": 1.1040000000000001e-05, "loss": 1.0794, "step": 552 }, { "epoch": 0.09028202930492633, "grad_norm": 2.1188979148864746, "learning_rate": 1.1060000000000003e-05, "loss": 1.2089, "step": 553 }, { "epoch": 0.09044528794743072, "grad_norm": 2.3771398067474365, "learning_rate": 1.1080000000000002e-05, "loss": 1.0782, "step": 554 }, { "epoch": 0.0906085465899351, "grad_norm": 2.024806261062622, "learning_rate": 1.1100000000000002e-05, "loss": 1.1943, "step": 555 }, { "epoch": 0.0907718052324395, "grad_norm": 2.2835841178894043, "learning_rate": 1.1120000000000002e-05, "loss": 1.1081, "step": 556 }, { "epoch": 0.09093506387494388, "grad_norm": 2.400186061859131, "learning_rate": 1.1140000000000002e-05, "loss": 1.1067, "step": 557 }, { "epoch": 0.09109832251744827, "grad_norm": 2.1547060012817383, "learning_rate": 1.1160000000000002e-05, "loss": 1.2332, "step": 558 }, { "epoch": 0.09126158115995266, "grad_norm": 2.4429080486297607, "learning_rate": 1.1180000000000001e-05, "loss": 1.1745, "step": 559 }, { "epoch": 0.09142483980245704, "grad_norm": 2.3900227546691895, "learning_rate": 1.1200000000000001e-05, "loss": 1.1192, "step": 560 }, { "epoch": 0.09158809844496144, "grad_norm": 2.234807014465332, "learning_rate": 1.1220000000000003e-05, "loss": 0.9934, "step": 561 }, { "epoch": 0.09175135708746582, "grad_norm": 2.44914174079895, "learning_rate": 1.1240000000000002e-05, "loss": 1.2582, "step": 562 }, { "epoch": 0.0919146157299702, "grad_norm": 2.4764840602874756, "learning_rate": 1.126e-05, "loss": 1.3293, "step": 563 }, { "epoch": 0.0920778743724746, "grad_norm": 1.8345723152160645, "learning_rate": 1.128e-05, "loss": 0.9171, "step": 564 }, { "epoch": 0.09224113301497898, "grad_norm": 2.7708799839019775, "learning_rate": 1.13e-05, "loss": 1.0781, "step": 565 }, { "epoch": 0.09240439165748336, "grad_norm": 2.0084972381591797, "learning_rate": 1.132e-05, "loss": 1.0984, "step": 566 }, { "epoch": 0.09256765029998776, "grad_norm": 2.392289638519287, "learning_rate": 1.134e-05, "loss": 1.1296, "step": 567 }, { "epoch": 0.09273090894249214, "grad_norm": 2.2483296394348145, "learning_rate": 1.136e-05, "loss": 1.1202, "step": 568 }, { "epoch": 0.09289416758499654, "grad_norm": 2.3153254985809326, "learning_rate": 1.138e-05, "loss": 1.2516, "step": 569 }, { "epoch": 0.09305742622750092, "grad_norm": 2.4120562076568604, "learning_rate": 1.14e-05, "loss": 1.2738, "step": 570 }, { "epoch": 0.0932206848700053, "grad_norm": 2.2585086822509766, "learning_rate": 1.142e-05, "loss": 1.0587, "step": 571 }, { "epoch": 0.0933839435125097, "grad_norm": 2.134504795074463, "learning_rate": 1.144e-05, "loss": 1.1773, "step": 572 }, { "epoch": 0.09354720215501408, "grad_norm": 2.3113505840301514, "learning_rate": 1.146e-05, "loss": 1.12, "step": 573 }, { "epoch": 0.09371046079751846, "grad_norm": 2.0091545581817627, "learning_rate": 1.148e-05, "loss": 1.1135, "step": 574 }, { "epoch": 0.09387371944002286, "grad_norm": 2.099668264389038, "learning_rate": 1.15e-05, "loss": 0.9895, "step": 575 }, { "epoch": 0.09403697808252724, "grad_norm": 2.4951369762420654, "learning_rate": 1.152e-05, "loss": 0.9999, "step": 576 }, { "epoch": 0.09420023672503162, "grad_norm": 2.508176326751709, "learning_rate": 1.154e-05, "loss": 1.205, "step": 577 }, { "epoch": 0.09436349536753602, "grad_norm": 2.27274489402771, "learning_rate": 1.156e-05, "loss": 1.2918, "step": 578 }, { "epoch": 0.0945267540100404, "grad_norm": 2.387101650238037, "learning_rate": 1.1580000000000001e-05, "loss": 1.1922, "step": 579 }, { "epoch": 0.0946900126525448, "grad_norm": 2.4729442596435547, "learning_rate": 1.16e-05, "loss": 1.1973, "step": 580 }, { "epoch": 0.09485327129504918, "grad_norm": 2.2617437839508057, "learning_rate": 1.162e-05, "loss": 1.0657, "step": 581 }, { "epoch": 0.09501652993755356, "grad_norm": 2.2428550720214844, "learning_rate": 1.164e-05, "loss": 1.1562, "step": 582 }, { "epoch": 0.09517978858005796, "grad_norm": 2.196427345275879, "learning_rate": 1.166e-05, "loss": 1.0205, "step": 583 }, { "epoch": 0.09534304722256234, "grad_norm": 2.013038158416748, "learning_rate": 1.168e-05, "loss": 0.9623, "step": 584 }, { "epoch": 0.09550630586506673, "grad_norm": 2.2273364067077637, "learning_rate": 1.17e-05, "loss": 1.2066, "step": 585 }, { "epoch": 0.09566956450757112, "grad_norm": 2.2216742038726807, "learning_rate": 1.172e-05, "loss": 1.0945, "step": 586 }, { "epoch": 0.0958328231500755, "grad_norm": 2.389660596847534, "learning_rate": 1.1740000000000001e-05, "loss": 1.3045, "step": 587 }, { "epoch": 0.0959960817925799, "grad_norm": 2.3388900756835938, "learning_rate": 1.1760000000000001e-05, "loss": 0.9157, "step": 588 }, { "epoch": 0.09615934043508428, "grad_norm": 2.515687942504883, "learning_rate": 1.178e-05, "loss": 1.1609, "step": 589 }, { "epoch": 0.09632259907758867, "grad_norm": 2.1678519248962402, "learning_rate": 1.18e-05, "loss": 1.1717, "step": 590 }, { "epoch": 0.09648585772009306, "grad_norm": 1.849572777748108, "learning_rate": 1.182e-05, "loss": 0.8716, "step": 591 }, { "epoch": 0.09664911636259745, "grad_norm": 2.2102913856506348, "learning_rate": 1.184e-05, "loss": 1.1424, "step": 592 }, { "epoch": 0.09681237500510183, "grad_norm": 2.0526554584503174, "learning_rate": 1.186e-05, "loss": 1.0293, "step": 593 }, { "epoch": 0.09697563364760622, "grad_norm": 2.3345088958740234, "learning_rate": 1.188e-05, "loss": 1.0374, "step": 594 }, { "epoch": 0.0971388922901106, "grad_norm": 2.162921667098999, "learning_rate": 1.1900000000000001e-05, "loss": 1.0631, "step": 595 }, { "epoch": 0.09730215093261499, "grad_norm": 2.481452465057373, "learning_rate": 1.1920000000000001e-05, "loss": 1.0674, "step": 596 }, { "epoch": 0.09746540957511939, "grad_norm": 2.1651577949523926, "learning_rate": 1.1940000000000001e-05, "loss": 1.1815, "step": 597 }, { "epoch": 0.09762866821762377, "grad_norm": 2.4811413288116455, "learning_rate": 1.196e-05, "loss": 0.947, "step": 598 }, { "epoch": 0.09779192686012816, "grad_norm": 2.122241973876953, "learning_rate": 1.198e-05, "loss": 1.222, "step": 599 }, { "epoch": 0.09795518550263255, "grad_norm": 2.29854416847229, "learning_rate": 1.2e-05, "loss": 1.0891, "step": 600 }, { "epoch": 0.09811844414513693, "grad_norm": 2.8610055446624756, "learning_rate": 1.202e-05, "loss": 1.296, "step": 601 }, { "epoch": 0.09828170278764133, "grad_norm": 2.278808832168579, "learning_rate": 1.204e-05, "loss": 1.001, "step": 602 }, { "epoch": 0.09844496143014571, "grad_norm": 2.1872823238372803, "learning_rate": 1.2060000000000001e-05, "loss": 1.0714, "step": 603 }, { "epoch": 0.09860822007265009, "grad_norm": 2.403470516204834, "learning_rate": 1.2080000000000001e-05, "loss": 1.1625, "step": 604 }, { "epoch": 0.09877147871515449, "grad_norm": 2.1634583473205566, "learning_rate": 1.2100000000000001e-05, "loss": 1.1078, "step": 605 }, { "epoch": 0.09893473735765887, "grad_norm": 2.5648088455200195, "learning_rate": 1.2120000000000001e-05, "loss": 1.2635, "step": 606 }, { "epoch": 0.09909799600016325, "grad_norm": 2.313460350036621, "learning_rate": 1.214e-05, "loss": 1.1816, "step": 607 }, { "epoch": 0.09926125464266765, "grad_norm": 2.6342110633850098, "learning_rate": 1.216e-05, "loss": 1.1403, "step": 608 }, { "epoch": 0.09942451328517203, "grad_norm": 2.3983922004699707, "learning_rate": 1.218e-05, "loss": 1.115, "step": 609 }, { "epoch": 0.09958777192767643, "grad_norm": 2.0575366020202637, "learning_rate": 1.22e-05, "loss": 1.0788, "step": 610 }, { "epoch": 0.09975103057018081, "grad_norm": 2.4335927963256836, "learning_rate": 1.2220000000000002e-05, "loss": 1.1847, "step": 611 }, { "epoch": 0.09991428921268519, "grad_norm": 2.651470184326172, "learning_rate": 1.2240000000000001e-05, "loss": 1.3627, "step": 612 }, { "epoch": 0.10007754785518959, "grad_norm": 2.40061092376709, "learning_rate": 1.2260000000000001e-05, "loss": 1.1795, "step": 613 }, { "epoch": 0.10024080649769397, "grad_norm": 2.3222713470458984, "learning_rate": 1.2280000000000001e-05, "loss": 1.2542, "step": 614 }, { "epoch": 0.10040406514019835, "grad_norm": 1.927268624305725, "learning_rate": 1.23e-05, "loss": 0.8903, "step": 615 }, { "epoch": 0.10056732378270275, "grad_norm": 2.3598694801330566, "learning_rate": 1.232e-05, "loss": 1.2318, "step": 616 }, { "epoch": 0.10073058242520713, "grad_norm": 2.3972742557525635, "learning_rate": 1.234e-05, "loss": 1.1292, "step": 617 }, { "epoch": 0.10089384106771153, "grad_norm": 2.6992738246917725, "learning_rate": 1.236e-05, "loss": 1.2477, "step": 618 }, { "epoch": 0.10105709971021591, "grad_norm": 2.1505918502807617, "learning_rate": 1.2380000000000002e-05, "loss": 1.09, "step": 619 }, { "epoch": 0.1012203583527203, "grad_norm": 2.234858989715576, "learning_rate": 1.2400000000000002e-05, "loss": 1.0779, "step": 620 }, { "epoch": 0.10138361699522469, "grad_norm": 1.892857313156128, "learning_rate": 1.2420000000000001e-05, "loss": 0.8626, "step": 621 }, { "epoch": 0.10154687563772907, "grad_norm": 1.979612112045288, "learning_rate": 1.2440000000000001e-05, "loss": 1.1399, "step": 622 }, { "epoch": 0.10171013428023346, "grad_norm": 2.1129825115203857, "learning_rate": 1.2460000000000001e-05, "loss": 1.1086, "step": 623 }, { "epoch": 0.10187339292273785, "grad_norm": 2.2777364253997803, "learning_rate": 1.248e-05, "loss": 1.1868, "step": 624 }, { "epoch": 0.10203665156524223, "grad_norm": 2.2597436904907227, "learning_rate": 1.25e-05, "loss": 1.1333, "step": 625 }, { "epoch": 0.10219991020774662, "grad_norm": 2.003836154937744, "learning_rate": 1.252e-05, "loss": 1.1433, "step": 626 }, { "epoch": 0.10236316885025101, "grad_norm": 2.2667949199676514, "learning_rate": 1.254e-05, "loss": 1.0817, "step": 627 }, { "epoch": 0.1025264274927554, "grad_norm": 2.122574806213379, "learning_rate": 1.2560000000000002e-05, "loss": 1.2371, "step": 628 }, { "epoch": 0.10268968613525979, "grad_norm": 2.221588611602783, "learning_rate": 1.2580000000000002e-05, "loss": 1.0462, "step": 629 }, { "epoch": 0.10285294477776417, "grad_norm": 2.48842453956604, "learning_rate": 1.2600000000000001e-05, "loss": 1.2567, "step": 630 }, { "epoch": 0.10301620342026856, "grad_norm": 2.400120973587036, "learning_rate": 1.2620000000000001e-05, "loss": 1.2489, "step": 631 }, { "epoch": 0.10317946206277295, "grad_norm": 2.3211960792541504, "learning_rate": 1.2640000000000001e-05, "loss": 1.0691, "step": 632 }, { "epoch": 0.10334272070527734, "grad_norm": 2.183497190475464, "learning_rate": 1.266e-05, "loss": 1.0862, "step": 633 }, { "epoch": 0.10350597934778172, "grad_norm": 2.494892120361328, "learning_rate": 1.268e-05, "loss": 1.1687, "step": 634 }, { "epoch": 0.10366923799028611, "grad_norm": 2.3134379386901855, "learning_rate": 1.27e-05, "loss": 1.022, "step": 635 }, { "epoch": 0.1038324966327905, "grad_norm": 1.8653385639190674, "learning_rate": 1.2720000000000002e-05, "loss": 1.0266, "step": 636 }, { "epoch": 0.10399575527529488, "grad_norm": 2.489906072616577, "learning_rate": 1.2740000000000002e-05, "loss": 1.1891, "step": 637 }, { "epoch": 0.10415901391779928, "grad_norm": 2.2346251010894775, "learning_rate": 1.2760000000000001e-05, "loss": 1.0892, "step": 638 }, { "epoch": 0.10432227256030366, "grad_norm": 2.2134947776794434, "learning_rate": 1.2780000000000001e-05, "loss": 0.9985, "step": 639 }, { "epoch": 0.10448553120280805, "grad_norm": 2.2416558265686035, "learning_rate": 1.2800000000000001e-05, "loss": 1.1691, "step": 640 }, { "epoch": 0.10464878984531244, "grad_norm": 2.217766523361206, "learning_rate": 1.2820000000000001e-05, "loss": 0.9682, "step": 641 }, { "epoch": 0.10481204848781682, "grad_norm": 2.610581874847412, "learning_rate": 1.284e-05, "loss": 1.1064, "step": 642 }, { "epoch": 0.10497530713032122, "grad_norm": 2.012836456298828, "learning_rate": 1.286e-05, "loss": 0.9583, "step": 643 }, { "epoch": 0.1051385657728256, "grad_norm": 2.2592179775238037, "learning_rate": 1.2880000000000002e-05, "loss": 1.1472, "step": 644 }, { "epoch": 0.10530182441532998, "grad_norm": 2.37949275970459, "learning_rate": 1.2900000000000002e-05, "loss": 0.995, "step": 645 }, { "epoch": 0.10546508305783438, "grad_norm": 2.1269478797912598, "learning_rate": 1.2920000000000002e-05, "loss": 1.1412, "step": 646 }, { "epoch": 0.10562834170033876, "grad_norm": 2.4893782138824463, "learning_rate": 1.2940000000000001e-05, "loss": 1.1055, "step": 647 }, { "epoch": 0.10579160034284316, "grad_norm": 2.216463804244995, "learning_rate": 1.2960000000000001e-05, "loss": 1.1752, "step": 648 }, { "epoch": 0.10595485898534754, "grad_norm": 2.459069013595581, "learning_rate": 1.2980000000000001e-05, "loss": 1.2992, "step": 649 }, { "epoch": 0.10611811762785192, "grad_norm": 2.3783762454986572, "learning_rate": 1.3000000000000001e-05, "loss": 1.1758, "step": 650 }, { "epoch": 0.10628137627035632, "grad_norm": 2.1655356884002686, "learning_rate": 1.302e-05, "loss": 1.2611, "step": 651 }, { "epoch": 0.1064446349128607, "grad_norm": 2.161815643310547, "learning_rate": 1.3040000000000002e-05, "loss": 1.0408, "step": 652 }, { "epoch": 0.10660789355536508, "grad_norm": 1.9729121923446655, "learning_rate": 1.3060000000000002e-05, "loss": 1.0232, "step": 653 }, { "epoch": 0.10677115219786948, "grad_norm": 2.0821754932403564, "learning_rate": 1.3080000000000002e-05, "loss": 0.9521, "step": 654 }, { "epoch": 0.10693441084037386, "grad_norm": 2.408712863922119, "learning_rate": 1.3100000000000002e-05, "loss": 1.0755, "step": 655 }, { "epoch": 0.10709766948287824, "grad_norm": 2.3315343856811523, "learning_rate": 1.3120000000000001e-05, "loss": 0.9234, "step": 656 }, { "epoch": 0.10726092812538264, "grad_norm": 1.9373000860214233, "learning_rate": 1.3140000000000001e-05, "loss": 0.9108, "step": 657 }, { "epoch": 0.10742418676788702, "grad_norm": 2.2427690029144287, "learning_rate": 1.3160000000000001e-05, "loss": 1.0481, "step": 658 }, { "epoch": 0.10758744541039142, "grad_norm": 2.1169705390930176, "learning_rate": 1.3180000000000001e-05, "loss": 1.1872, "step": 659 }, { "epoch": 0.1077507040528958, "grad_norm": 2.207862138748169, "learning_rate": 1.3200000000000002e-05, "loss": 1.0943, "step": 660 }, { "epoch": 0.10791396269540018, "grad_norm": 2.1384873390197754, "learning_rate": 1.3220000000000002e-05, "loss": 1.0693, "step": 661 }, { "epoch": 0.10807722133790458, "grad_norm": 2.3833110332489014, "learning_rate": 1.3240000000000002e-05, "loss": 1.2993, "step": 662 }, { "epoch": 0.10824047998040896, "grad_norm": 1.946217656135559, "learning_rate": 1.3260000000000002e-05, "loss": 0.9552, "step": 663 }, { "epoch": 0.10840373862291335, "grad_norm": 2.142319917678833, "learning_rate": 1.3280000000000002e-05, "loss": 1.2534, "step": 664 }, { "epoch": 0.10856699726541774, "grad_norm": 2.3199260234832764, "learning_rate": 1.3300000000000001e-05, "loss": 1.0762, "step": 665 }, { "epoch": 0.10873025590792212, "grad_norm": 2.9386916160583496, "learning_rate": 1.3320000000000001e-05, "loss": 1.2278, "step": 666 }, { "epoch": 0.1088935145504265, "grad_norm": 2.275683879852295, "learning_rate": 1.3340000000000001e-05, "loss": 1.2545, "step": 667 }, { "epoch": 0.1090567731929309, "grad_norm": 2.272437572479248, "learning_rate": 1.3360000000000003e-05, "loss": 1.1936, "step": 668 }, { "epoch": 0.10922003183543529, "grad_norm": 2.2124342918395996, "learning_rate": 1.3380000000000002e-05, "loss": 1.2094, "step": 669 }, { "epoch": 0.10938329047793968, "grad_norm": 2.012925386428833, "learning_rate": 1.3400000000000002e-05, "loss": 0.8534, "step": 670 }, { "epoch": 0.10954654912044406, "grad_norm": 2.936959743499756, "learning_rate": 1.3420000000000002e-05, "loss": 1.3426, "step": 671 }, { "epoch": 0.10970980776294845, "grad_norm": 2.002520799636841, "learning_rate": 1.3440000000000002e-05, "loss": 1.1952, "step": 672 }, { "epoch": 0.10987306640545284, "grad_norm": 2.1583547592163086, "learning_rate": 1.3460000000000002e-05, "loss": 0.9572, "step": 673 }, { "epoch": 0.11003632504795723, "grad_norm": 2.11974835395813, "learning_rate": 1.3480000000000001e-05, "loss": 0.9344, "step": 674 }, { "epoch": 0.11019958369046161, "grad_norm": 2.084968328475952, "learning_rate": 1.3500000000000001e-05, "loss": 0.9384, "step": 675 }, { "epoch": 0.110362842332966, "grad_norm": 2.271674871444702, "learning_rate": 1.3520000000000003e-05, "loss": 1.1174, "step": 676 }, { "epoch": 0.11052610097547039, "grad_norm": 2.2235991954803467, "learning_rate": 1.3540000000000003e-05, "loss": 1.137, "step": 677 }, { "epoch": 0.11068935961797477, "grad_norm": 2.312089204788208, "learning_rate": 1.3560000000000002e-05, "loss": 0.9163, "step": 678 }, { "epoch": 0.11085261826047917, "grad_norm": 2.362788677215576, "learning_rate": 1.3580000000000002e-05, "loss": 1.2848, "step": 679 }, { "epoch": 0.11101587690298355, "grad_norm": 2.237948417663574, "learning_rate": 1.3600000000000002e-05, "loss": 1.2563, "step": 680 }, { "epoch": 0.11117913554548794, "grad_norm": 2.2222514152526855, "learning_rate": 1.3620000000000002e-05, "loss": 0.8566, "step": 681 }, { "epoch": 0.11134239418799233, "grad_norm": 2.18951416015625, "learning_rate": 1.3640000000000002e-05, "loss": 0.9657, "step": 682 }, { "epoch": 0.11150565283049671, "grad_norm": 2.488557815551758, "learning_rate": 1.3660000000000001e-05, "loss": 1.1271, "step": 683 }, { "epoch": 0.1116689114730011, "grad_norm": 2.2374722957611084, "learning_rate": 1.3680000000000003e-05, "loss": 1.0334, "step": 684 }, { "epoch": 0.11183217011550549, "grad_norm": 2.1181163787841797, "learning_rate": 1.3700000000000003e-05, "loss": 1.0255, "step": 685 }, { "epoch": 0.11199542875800987, "grad_norm": 2.5455894470214844, "learning_rate": 1.3720000000000002e-05, "loss": 1.2454, "step": 686 }, { "epoch": 0.11215868740051427, "grad_norm": 2.715745687484741, "learning_rate": 1.3740000000000002e-05, "loss": 1.2491, "step": 687 }, { "epoch": 0.11232194604301865, "grad_norm": 2.4145407676696777, "learning_rate": 1.376e-05, "loss": 1.1853, "step": 688 }, { "epoch": 0.11248520468552305, "grad_norm": 2.3001222610473633, "learning_rate": 1.378e-05, "loss": 1.1955, "step": 689 }, { "epoch": 0.11264846332802743, "grad_norm": 2.644507884979248, "learning_rate": 1.38e-05, "loss": 1.2369, "step": 690 }, { "epoch": 0.11281172197053181, "grad_norm": 2.3529052734375, "learning_rate": 1.382e-05, "loss": 1.0045, "step": 691 }, { "epoch": 0.11297498061303621, "grad_norm": 2.4610183238983154, "learning_rate": 1.384e-05, "loss": 1.0641, "step": 692 }, { "epoch": 0.11313823925554059, "grad_norm": 2.415421962738037, "learning_rate": 1.386e-05, "loss": 1.0964, "step": 693 }, { "epoch": 0.11330149789804497, "grad_norm": 2.3912975788116455, "learning_rate": 1.3880000000000001e-05, "loss": 1.149, "step": 694 }, { "epoch": 0.11346475654054937, "grad_norm": 2.6268279552459717, "learning_rate": 1.39e-05, "loss": 1.3216, "step": 695 }, { "epoch": 0.11362801518305375, "grad_norm": 2.5815670490264893, "learning_rate": 1.392e-05, "loss": 1.0412, "step": 696 }, { "epoch": 0.11379127382555813, "grad_norm": 2.3840436935424805, "learning_rate": 1.394e-05, "loss": 1.1186, "step": 697 }, { "epoch": 0.11395453246806253, "grad_norm": 2.1603963375091553, "learning_rate": 1.396e-05, "loss": 1.2135, "step": 698 }, { "epoch": 0.11411779111056691, "grad_norm": 2.9880950450897217, "learning_rate": 1.398e-05, "loss": 1.1652, "step": 699 }, { "epoch": 0.11428104975307131, "grad_norm": 2.3261570930480957, "learning_rate": 1.4e-05, "loss": 1.2449, "step": 700 }, { "epoch": 0.11444430839557569, "grad_norm": 2.495210647583008, "learning_rate": 1.402e-05, "loss": 1.274, "step": 701 }, { "epoch": 0.11460756703808007, "grad_norm": 2.5343966484069824, "learning_rate": 1.4040000000000001e-05, "loss": 1.1562, "step": 702 }, { "epoch": 0.11477082568058447, "grad_norm": 2.2462477684020996, "learning_rate": 1.4060000000000001e-05, "loss": 1.0743, "step": 703 }, { "epoch": 0.11493408432308885, "grad_norm": 2.763458251953125, "learning_rate": 1.408e-05, "loss": 1.3091, "step": 704 }, { "epoch": 0.11509734296559324, "grad_norm": 2.1928839683532715, "learning_rate": 1.41e-05, "loss": 1.1241, "step": 705 }, { "epoch": 0.11526060160809763, "grad_norm": 2.0539863109588623, "learning_rate": 1.412e-05, "loss": 1.1229, "step": 706 }, { "epoch": 0.11542386025060201, "grad_norm": 2.0049281120300293, "learning_rate": 1.414e-05, "loss": 1.1066, "step": 707 }, { "epoch": 0.1155871188931064, "grad_norm": 2.1301469802856445, "learning_rate": 1.416e-05, "loss": 1.0123, "step": 708 }, { "epoch": 0.1157503775356108, "grad_norm": 2.1698412895202637, "learning_rate": 1.418e-05, "loss": 1.0132, "step": 709 }, { "epoch": 0.11591363617811518, "grad_norm": 2.023695945739746, "learning_rate": 1.4200000000000001e-05, "loss": 1.1145, "step": 710 }, { "epoch": 0.11607689482061957, "grad_norm": 2.1574056148529053, "learning_rate": 1.4220000000000001e-05, "loss": 1.0613, "step": 711 }, { "epoch": 0.11624015346312395, "grad_norm": 2.7349820137023926, "learning_rate": 1.4240000000000001e-05, "loss": 1.0168, "step": 712 }, { "epoch": 0.11640341210562834, "grad_norm": 2.127218723297119, "learning_rate": 1.426e-05, "loss": 0.9757, "step": 713 }, { "epoch": 0.11656667074813273, "grad_norm": 2.580047607421875, "learning_rate": 1.428e-05, "loss": 1.1039, "step": 714 }, { "epoch": 0.11672992939063712, "grad_norm": 2.18753981590271, "learning_rate": 1.43e-05, "loss": 1.0925, "step": 715 }, { "epoch": 0.1168931880331415, "grad_norm": 2.3090806007385254, "learning_rate": 1.432e-05, "loss": 1.2568, "step": 716 }, { "epoch": 0.1170564466756459, "grad_norm": 2.2329230308532715, "learning_rate": 1.434e-05, "loss": 1.1274, "step": 717 }, { "epoch": 0.11721970531815028, "grad_norm": 2.684112787246704, "learning_rate": 1.4360000000000001e-05, "loss": 1.2778, "step": 718 }, { "epoch": 0.11738296396065467, "grad_norm": 2.398986339569092, "learning_rate": 1.4380000000000001e-05, "loss": 1.0811, "step": 719 }, { "epoch": 0.11754622260315906, "grad_norm": 2.491607427597046, "learning_rate": 1.4400000000000001e-05, "loss": 1.0466, "step": 720 }, { "epoch": 0.11770948124566344, "grad_norm": 2.4555017948150635, "learning_rate": 1.4420000000000001e-05, "loss": 1.1665, "step": 721 }, { "epoch": 0.11787273988816784, "grad_norm": 2.703523874282837, "learning_rate": 1.444e-05, "loss": 1.2154, "step": 722 }, { "epoch": 0.11803599853067222, "grad_norm": 2.1012768745422363, "learning_rate": 1.446e-05, "loss": 0.9451, "step": 723 }, { "epoch": 0.1181992571731766, "grad_norm": 2.0012354850769043, "learning_rate": 1.448e-05, "loss": 0.9802, "step": 724 }, { "epoch": 0.118362515815681, "grad_norm": 2.3113489151000977, "learning_rate": 1.45e-05, "loss": 1.0418, "step": 725 }, { "epoch": 0.11852577445818538, "grad_norm": 2.1188430786132812, "learning_rate": 1.4520000000000002e-05, "loss": 1.1208, "step": 726 }, { "epoch": 0.11868903310068976, "grad_norm": 2.2806811332702637, "learning_rate": 1.4540000000000001e-05, "loss": 1.0697, "step": 727 }, { "epoch": 0.11885229174319416, "grad_norm": 2.280212163925171, "learning_rate": 1.4560000000000001e-05, "loss": 0.9671, "step": 728 }, { "epoch": 0.11901555038569854, "grad_norm": 2.061343193054199, "learning_rate": 1.4580000000000001e-05, "loss": 0.9964, "step": 729 }, { "epoch": 0.11917880902820294, "grad_norm": 2.514763116836548, "learning_rate": 1.46e-05, "loss": 1.1097, "step": 730 }, { "epoch": 0.11934206767070732, "grad_norm": 2.141361713409424, "learning_rate": 1.462e-05, "loss": 1.0284, "step": 731 }, { "epoch": 0.1195053263132117, "grad_norm": 2.612725019454956, "learning_rate": 1.464e-05, "loss": 1.3805, "step": 732 }, { "epoch": 0.1196685849557161, "grad_norm": 2.377556562423706, "learning_rate": 1.466e-05, "loss": 1.1615, "step": 733 }, { "epoch": 0.11983184359822048, "grad_norm": 2.8224315643310547, "learning_rate": 1.4680000000000002e-05, "loss": 1.1403, "step": 734 }, { "epoch": 0.11999510224072486, "grad_norm": 2.278228998184204, "learning_rate": 1.4700000000000002e-05, "loss": 1.0359, "step": 735 }, { "epoch": 0.12015836088322926, "grad_norm": 2.27772855758667, "learning_rate": 1.4720000000000001e-05, "loss": 1.073, "step": 736 }, { "epoch": 0.12032161952573364, "grad_norm": 2.2087297439575195, "learning_rate": 1.4740000000000001e-05, "loss": 1.0801, "step": 737 }, { "epoch": 0.12048487816823802, "grad_norm": 2.184826135635376, "learning_rate": 1.4760000000000001e-05, "loss": 0.9646, "step": 738 }, { "epoch": 0.12064813681074242, "grad_norm": 2.453735828399658, "learning_rate": 1.478e-05, "loss": 1.2635, "step": 739 }, { "epoch": 0.1208113954532468, "grad_norm": 2.414106845855713, "learning_rate": 1.48e-05, "loss": 0.9237, "step": 740 }, { "epoch": 0.1209746540957512, "grad_norm": 2.2158467769622803, "learning_rate": 1.482e-05, "loss": 1.0015, "step": 741 }, { "epoch": 0.12113791273825558, "grad_norm": 2.1643996238708496, "learning_rate": 1.4840000000000002e-05, "loss": 1.1763, "step": 742 }, { "epoch": 0.12130117138075996, "grad_norm": 2.547316551208496, "learning_rate": 1.4860000000000002e-05, "loss": 1.0759, "step": 743 }, { "epoch": 0.12146443002326436, "grad_norm": 2.59954571723938, "learning_rate": 1.4880000000000002e-05, "loss": 1.1794, "step": 744 }, { "epoch": 0.12162768866576874, "grad_norm": 2.4989686012268066, "learning_rate": 1.4900000000000001e-05, "loss": 1.0237, "step": 745 }, { "epoch": 0.12179094730827313, "grad_norm": 2.4219133853912354, "learning_rate": 1.4920000000000001e-05, "loss": 1.1134, "step": 746 }, { "epoch": 0.12195420595077752, "grad_norm": 2.3253793716430664, "learning_rate": 1.4940000000000001e-05, "loss": 1.0946, "step": 747 }, { "epoch": 0.1221174645932819, "grad_norm": 2.725053071975708, "learning_rate": 1.496e-05, "loss": 1.0228, "step": 748 }, { "epoch": 0.1222807232357863, "grad_norm": 2.5383968353271484, "learning_rate": 1.498e-05, "loss": 1.2493, "step": 749 }, { "epoch": 0.12244398187829068, "grad_norm": 2.634434938430786, "learning_rate": 1.5000000000000002e-05, "loss": 1.0577, "step": 750 }, { "epoch": 0.12260724052079507, "grad_norm": 2.4397354125976562, "learning_rate": 1.5020000000000002e-05, "loss": 1.1039, "step": 751 }, { "epoch": 0.12277049916329946, "grad_norm": 2.715039014816284, "learning_rate": 1.5040000000000002e-05, "loss": 1.2254, "step": 752 }, { "epoch": 0.12293375780580384, "grad_norm": 2.3144099712371826, "learning_rate": 1.5060000000000001e-05, "loss": 0.9922, "step": 753 }, { "epoch": 0.12309701644830823, "grad_norm": 2.6371517181396484, "learning_rate": 1.5080000000000001e-05, "loss": 1.249, "step": 754 }, { "epoch": 0.12326027509081262, "grad_norm": 2.331406593322754, "learning_rate": 1.5100000000000001e-05, "loss": 1.0866, "step": 755 }, { "epoch": 0.123423533733317, "grad_norm": 2.396252155303955, "learning_rate": 1.5120000000000001e-05, "loss": 1.0895, "step": 756 }, { "epoch": 0.12358679237582139, "grad_norm": 2.273235559463501, "learning_rate": 1.514e-05, "loss": 1.157, "step": 757 }, { "epoch": 0.12375005101832579, "grad_norm": 2.4729843139648438, "learning_rate": 1.516e-05, "loss": 1.0587, "step": 758 }, { "epoch": 0.12391330966083017, "grad_norm": 2.291576385498047, "learning_rate": 1.5180000000000002e-05, "loss": 1.0536, "step": 759 }, { "epoch": 0.12407656830333456, "grad_norm": 2.4626877307891846, "learning_rate": 1.5200000000000002e-05, "loss": 0.9912, "step": 760 }, { "epoch": 0.12423982694583895, "grad_norm": 2.3226864337921143, "learning_rate": 1.5220000000000002e-05, "loss": 1.0823, "step": 761 }, { "epoch": 0.12440308558834333, "grad_norm": 2.7476718425750732, "learning_rate": 1.5240000000000001e-05, "loss": 1.0776, "step": 762 }, { "epoch": 0.12456634423084773, "grad_norm": 2.017200469970703, "learning_rate": 1.5260000000000003e-05, "loss": 1.028, "step": 763 }, { "epoch": 0.12472960287335211, "grad_norm": 2.230628728866577, "learning_rate": 1.5280000000000003e-05, "loss": 0.9139, "step": 764 }, { "epoch": 0.12489286151585649, "grad_norm": 2.4791698455810547, "learning_rate": 1.5300000000000003e-05, "loss": 1.266, "step": 765 }, { "epoch": 0.12505612015836087, "grad_norm": 2.521533489227295, "learning_rate": 1.5320000000000002e-05, "loss": 0.9969, "step": 766 }, { "epoch": 0.12521937880086528, "grad_norm": 2.473158121109009, "learning_rate": 1.5340000000000002e-05, "loss": 0.9861, "step": 767 }, { "epoch": 0.12538263744336967, "grad_norm": 2.7101058959960938, "learning_rate": 1.5360000000000002e-05, "loss": 0.8905, "step": 768 }, { "epoch": 0.12554589608587405, "grad_norm": 2.70375657081604, "learning_rate": 1.5380000000000002e-05, "loss": 1.2277, "step": 769 }, { "epoch": 0.12570915472837843, "grad_norm": 2.231943368911743, "learning_rate": 1.54e-05, "loss": 1.0401, "step": 770 }, { "epoch": 0.1258724133708828, "grad_norm": 2.251126766204834, "learning_rate": 1.542e-05, "loss": 1.1845, "step": 771 }, { "epoch": 0.1260356720133872, "grad_norm": 2.613600492477417, "learning_rate": 1.544e-05, "loss": 1.075, "step": 772 }, { "epoch": 0.1261989306558916, "grad_norm": 2.5316648483276367, "learning_rate": 1.546e-05, "loss": 1.0518, "step": 773 }, { "epoch": 0.126362189298396, "grad_norm": 2.1738829612731934, "learning_rate": 1.548e-05, "loss": 0.9975, "step": 774 }, { "epoch": 0.12652544794090037, "grad_norm": 2.2844667434692383, "learning_rate": 1.55e-05, "loss": 1.146, "step": 775 }, { "epoch": 0.12668870658340475, "grad_norm": 2.266869068145752, "learning_rate": 1.552e-05, "loss": 1.1181, "step": 776 }, { "epoch": 0.12685196522590914, "grad_norm": 2.207763671875, "learning_rate": 1.554e-05, "loss": 0.92, "step": 777 }, { "epoch": 0.12701522386841355, "grad_norm": 2.888216733932495, "learning_rate": 1.556e-05, "loss": 1.1068, "step": 778 }, { "epoch": 0.12717848251091793, "grad_norm": 2.5158331394195557, "learning_rate": 1.5580000000000003e-05, "loss": 1.0591, "step": 779 }, { "epoch": 0.1273417411534223, "grad_norm": 2.4173104763031006, "learning_rate": 1.5600000000000003e-05, "loss": 1.0881, "step": 780 }, { "epoch": 0.1275049997959267, "grad_norm": 2.8312129974365234, "learning_rate": 1.5620000000000003e-05, "loss": 1.2619, "step": 781 }, { "epoch": 0.12766825843843108, "grad_norm": 2.542279005050659, "learning_rate": 1.5640000000000003e-05, "loss": 1.0846, "step": 782 }, { "epoch": 0.12783151708093546, "grad_norm": 2.1690378189086914, "learning_rate": 1.5660000000000003e-05, "loss": 0.9546, "step": 783 }, { "epoch": 0.12799477572343987, "grad_norm": 2.281083345413208, "learning_rate": 1.5680000000000002e-05, "loss": 1.3403, "step": 784 }, { "epoch": 0.12815803436594425, "grad_norm": 2.5888724327087402, "learning_rate": 1.5700000000000002e-05, "loss": 1.1039, "step": 785 }, { "epoch": 0.12832129300844863, "grad_norm": 2.0737390518188477, "learning_rate": 1.5720000000000002e-05, "loss": 0.9884, "step": 786 }, { "epoch": 0.12848455165095302, "grad_norm": 2.239034652709961, "learning_rate": 1.5740000000000002e-05, "loss": 1.2374, "step": 787 }, { "epoch": 0.1286478102934574, "grad_norm": 2.0441274642944336, "learning_rate": 1.576e-05, "loss": 0.9067, "step": 788 }, { "epoch": 0.1288110689359618, "grad_norm": 2.762007474899292, "learning_rate": 1.578e-05, "loss": 1.1817, "step": 789 }, { "epoch": 0.1289743275784662, "grad_norm": 2.167156934738159, "learning_rate": 1.58e-05, "loss": 1.0362, "step": 790 }, { "epoch": 0.12913758622097057, "grad_norm": 2.466776132583618, "learning_rate": 1.582e-05, "loss": 1.0475, "step": 791 }, { "epoch": 0.12930084486347496, "grad_norm": 2.870243787765503, "learning_rate": 1.584e-05, "loss": 1.075, "step": 792 }, { "epoch": 0.12946410350597934, "grad_norm": 2.1869876384735107, "learning_rate": 1.586e-05, "loss": 0.9839, "step": 793 }, { "epoch": 0.12962736214848375, "grad_norm": 2.2803492546081543, "learning_rate": 1.588e-05, "loss": 1.0328, "step": 794 }, { "epoch": 0.12979062079098813, "grad_norm": 2.5293941497802734, "learning_rate": 1.5900000000000004e-05, "loss": 1.0025, "step": 795 }, { "epoch": 0.12995387943349251, "grad_norm": 2.30033802986145, "learning_rate": 1.5920000000000003e-05, "loss": 0.9179, "step": 796 }, { "epoch": 0.1301171380759969, "grad_norm": 2.378999710083008, "learning_rate": 1.5940000000000003e-05, "loss": 1.1092, "step": 797 }, { "epoch": 0.13028039671850128, "grad_norm": 2.3963797092437744, "learning_rate": 1.5960000000000003e-05, "loss": 0.9248, "step": 798 }, { "epoch": 0.13044365536100566, "grad_norm": 2.2756881713867188, "learning_rate": 1.5980000000000003e-05, "loss": 0.9011, "step": 799 }, { "epoch": 0.13060691400351007, "grad_norm": 2.4396896362304688, "learning_rate": 1.6000000000000003e-05, "loss": 1.1119, "step": 800 }, { "epoch": 0.13077017264601445, "grad_norm": 2.329301357269287, "learning_rate": 1.6020000000000002e-05, "loss": 1.1733, "step": 801 }, { "epoch": 0.13093343128851884, "grad_norm": 2.3051140308380127, "learning_rate": 1.6040000000000002e-05, "loss": 1.0397, "step": 802 }, { "epoch": 0.13109668993102322, "grad_norm": 2.466191530227661, "learning_rate": 1.6060000000000002e-05, "loss": 1.1942, "step": 803 }, { "epoch": 0.1312599485735276, "grad_norm": 2.410820722579956, "learning_rate": 1.6080000000000002e-05, "loss": 1.1701, "step": 804 }, { "epoch": 0.131423207216032, "grad_norm": 2.2420690059661865, "learning_rate": 1.6100000000000002e-05, "loss": 0.9753, "step": 805 }, { "epoch": 0.1315864658585364, "grad_norm": 2.461653232574463, "learning_rate": 1.612e-05, "loss": 1.0665, "step": 806 }, { "epoch": 0.13174972450104078, "grad_norm": 2.347559690475464, "learning_rate": 1.614e-05, "loss": 1.0816, "step": 807 }, { "epoch": 0.13191298314354516, "grad_norm": 2.683488368988037, "learning_rate": 1.616e-05, "loss": 1.0225, "step": 808 }, { "epoch": 0.13207624178604954, "grad_norm": 2.085374355316162, "learning_rate": 1.618e-05, "loss": 1.0274, "step": 809 }, { "epoch": 0.13223950042855392, "grad_norm": 2.2985503673553467, "learning_rate": 1.62e-05, "loss": 1.1442, "step": 810 }, { "epoch": 0.13240275907105833, "grad_norm": 2.378345489501953, "learning_rate": 1.6220000000000004e-05, "loss": 0.9335, "step": 811 }, { "epoch": 0.13256601771356272, "grad_norm": 2.120229959487915, "learning_rate": 1.6240000000000004e-05, "loss": 1.0921, "step": 812 }, { "epoch": 0.1327292763560671, "grad_norm": 2.2785110473632812, "learning_rate": 1.626e-05, "loss": 1.0372, "step": 813 }, { "epoch": 0.13289253499857148, "grad_norm": 2.3823118209838867, "learning_rate": 1.628e-05, "loss": 1.1213, "step": 814 }, { "epoch": 0.13305579364107586, "grad_norm": 2.504801034927368, "learning_rate": 1.63e-05, "loss": 1.2661, "step": 815 }, { "epoch": 0.13321905228358027, "grad_norm": 2.092095136642456, "learning_rate": 1.632e-05, "loss": 1.0209, "step": 816 }, { "epoch": 0.13338231092608466, "grad_norm": 2.267958402633667, "learning_rate": 1.634e-05, "loss": 1.1929, "step": 817 }, { "epoch": 0.13354556956858904, "grad_norm": 2.91835618019104, "learning_rate": 1.636e-05, "loss": 1.178, "step": 818 }, { "epoch": 0.13370882821109342, "grad_norm": 2.530365228652954, "learning_rate": 1.638e-05, "loss": 1.0486, "step": 819 }, { "epoch": 0.1338720868535978, "grad_norm": 2.5693790912628174, "learning_rate": 1.64e-05, "loss": 1.0119, "step": 820 }, { "epoch": 0.1340353454961022, "grad_norm": 3.007214069366455, "learning_rate": 1.6420000000000002e-05, "loss": 1.3315, "step": 821 }, { "epoch": 0.1341986041386066, "grad_norm": 2.337501049041748, "learning_rate": 1.6440000000000002e-05, "loss": 1.0489, "step": 822 }, { "epoch": 0.13436186278111098, "grad_norm": 2.2695515155792236, "learning_rate": 1.646e-05, "loss": 1.0759, "step": 823 }, { "epoch": 0.13452512142361536, "grad_norm": 2.47705340385437, "learning_rate": 1.648e-05, "loss": 1.1809, "step": 824 }, { "epoch": 0.13468838006611975, "grad_norm": 2.219982147216797, "learning_rate": 1.65e-05, "loss": 0.9075, "step": 825 }, { "epoch": 0.13485163870862413, "grad_norm": 2.298489570617676, "learning_rate": 1.652e-05, "loss": 0.9577, "step": 826 }, { "epoch": 0.13501489735112854, "grad_norm": 2.265995502471924, "learning_rate": 1.654e-05, "loss": 1.0926, "step": 827 }, { "epoch": 0.13517815599363292, "grad_norm": 2.2554972171783447, "learning_rate": 1.656e-05, "loss": 1.0165, "step": 828 }, { "epoch": 0.1353414146361373, "grad_norm": 2.1899595260620117, "learning_rate": 1.658e-05, "loss": 0.8842, "step": 829 }, { "epoch": 0.13550467327864169, "grad_norm": 2.708967924118042, "learning_rate": 1.66e-05, "loss": 1.0506, "step": 830 }, { "epoch": 0.13566793192114607, "grad_norm": 2.488602638244629, "learning_rate": 1.662e-05, "loss": 1.1737, "step": 831 }, { "epoch": 0.13583119056365045, "grad_norm": 2.2469325065612793, "learning_rate": 1.664e-05, "loss": 1.1111, "step": 832 }, { "epoch": 0.13599444920615486, "grad_norm": 2.581990957260132, "learning_rate": 1.666e-05, "loss": 1.0497, "step": 833 }, { "epoch": 0.13615770784865924, "grad_norm": 2.4781339168548584, "learning_rate": 1.668e-05, "loss": 0.9631, "step": 834 }, { "epoch": 0.13632096649116363, "grad_norm": 2.491020679473877, "learning_rate": 1.67e-05, "loss": 1.2637, "step": 835 }, { "epoch": 0.136484225133668, "grad_norm": 2.620746612548828, "learning_rate": 1.672e-05, "loss": 1.2318, "step": 836 }, { "epoch": 0.1366474837761724, "grad_norm": 2.739013671875, "learning_rate": 1.6740000000000002e-05, "loss": 0.9492, "step": 837 }, { "epoch": 0.1368107424186768, "grad_norm": 2.1043741703033447, "learning_rate": 1.6760000000000002e-05, "loss": 0.949, "step": 838 }, { "epoch": 0.13697400106118118, "grad_norm": 2.4798238277435303, "learning_rate": 1.6780000000000002e-05, "loss": 1.2807, "step": 839 }, { "epoch": 0.13713725970368557, "grad_norm": 2.0695459842681885, "learning_rate": 1.6800000000000002e-05, "loss": 1.07, "step": 840 }, { "epoch": 0.13730051834618995, "grad_norm": 2.578718423843384, "learning_rate": 1.682e-05, "loss": 1.122, "step": 841 }, { "epoch": 0.13746377698869433, "grad_norm": 2.4468400478363037, "learning_rate": 1.684e-05, "loss": 1.0477, "step": 842 }, { "epoch": 0.1376270356311987, "grad_norm": 2.2157211303710938, "learning_rate": 1.686e-05, "loss": 1.0999, "step": 843 }, { "epoch": 0.13779029427370312, "grad_norm": 2.626016616821289, "learning_rate": 1.688e-05, "loss": 0.919, "step": 844 }, { "epoch": 0.1379535529162075, "grad_norm": 2.4097883701324463, "learning_rate": 1.69e-05, "loss": 1.018, "step": 845 }, { "epoch": 0.1381168115587119, "grad_norm": 2.5626206398010254, "learning_rate": 1.692e-05, "loss": 1.2025, "step": 846 }, { "epoch": 0.13828007020121627, "grad_norm": 2.166566848754883, "learning_rate": 1.694e-05, "loss": 1.2181, "step": 847 }, { "epoch": 0.13844332884372065, "grad_norm": 2.3883957862854004, "learning_rate": 1.696e-05, "loss": 1.179, "step": 848 }, { "epoch": 0.13860658748622506, "grad_norm": 2.9392340183258057, "learning_rate": 1.698e-05, "loss": 1.194, "step": 849 }, { "epoch": 0.13876984612872945, "grad_norm": 2.3590869903564453, "learning_rate": 1.7e-05, "loss": 0.9364, "step": 850 }, { "epoch": 0.13893310477123383, "grad_norm": 2.6429126262664795, "learning_rate": 1.702e-05, "loss": 1.2343, "step": 851 }, { "epoch": 0.1390963634137382, "grad_norm": 2.3160974979400635, "learning_rate": 1.704e-05, "loss": 1.1265, "step": 852 }, { "epoch": 0.1392596220562426, "grad_norm": 2.275766611099243, "learning_rate": 1.7060000000000003e-05, "loss": 0.9642, "step": 853 }, { "epoch": 0.139422880698747, "grad_norm": 2.634352684020996, "learning_rate": 1.7080000000000002e-05, "loss": 1.126, "step": 854 }, { "epoch": 0.1395861393412514, "grad_norm": 2.3045482635498047, "learning_rate": 1.7100000000000002e-05, "loss": 1.1128, "step": 855 }, { "epoch": 0.13974939798375577, "grad_norm": 2.288778066635132, "learning_rate": 1.7120000000000002e-05, "loss": 1.2359, "step": 856 }, { "epoch": 0.13991265662626015, "grad_norm": 2.6486730575561523, "learning_rate": 1.7140000000000002e-05, "loss": 1.0841, "step": 857 }, { "epoch": 0.14007591526876453, "grad_norm": 2.1670572757720947, "learning_rate": 1.7160000000000002e-05, "loss": 1.0854, "step": 858 }, { "epoch": 0.14023917391126892, "grad_norm": 2.4031360149383545, "learning_rate": 1.718e-05, "loss": 1.2407, "step": 859 }, { "epoch": 0.14040243255377333, "grad_norm": 2.2284305095672607, "learning_rate": 1.72e-05, "loss": 1.0318, "step": 860 }, { "epoch": 0.1405656911962777, "grad_norm": 2.4811019897460938, "learning_rate": 1.722e-05, "loss": 1.321, "step": 861 }, { "epoch": 0.1407289498387821, "grad_norm": 2.680697441101074, "learning_rate": 1.724e-05, "loss": 1.1752, "step": 862 }, { "epoch": 0.14089220848128647, "grad_norm": 2.218731641769409, "learning_rate": 1.726e-05, "loss": 1.1682, "step": 863 }, { "epoch": 0.14105546712379086, "grad_norm": 3.526254177093506, "learning_rate": 1.728e-05, "loss": 1.2073, "step": 864 }, { "epoch": 0.14121872576629527, "grad_norm": 2.1793603897094727, "learning_rate": 1.73e-05, "loss": 1.2821, "step": 865 }, { "epoch": 0.14138198440879965, "grad_norm": 2.213533639907837, "learning_rate": 1.732e-05, "loss": 1.1173, "step": 866 }, { "epoch": 0.14154524305130403, "grad_norm": 2.397644519805908, "learning_rate": 1.734e-05, "loss": 1.2369, "step": 867 }, { "epoch": 0.14170850169380841, "grad_norm": 2.247380018234253, "learning_rate": 1.736e-05, "loss": 1.0636, "step": 868 }, { "epoch": 0.1418717603363128, "grad_norm": 2.516901969909668, "learning_rate": 1.7380000000000003e-05, "loss": 1.2021, "step": 869 }, { "epoch": 0.14203501897881718, "grad_norm": 2.1053361892700195, "learning_rate": 1.7400000000000003e-05, "loss": 1.0103, "step": 870 }, { "epoch": 0.1421982776213216, "grad_norm": 2.3672521114349365, "learning_rate": 1.7420000000000003e-05, "loss": 0.8694, "step": 871 }, { "epoch": 0.14236153626382597, "grad_norm": 2.465688943862915, "learning_rate": 1.7440000000000002e-05, "loss": 0.9321, "step": 872 }, { "epoch": 0.14252479490633035, "grad_norm": 1.9444098472595215, "learning_rate": 1.7460000000000002e-05, "loss": 0.9203, "step": 873 }, { "epoch": 0.14268805354883474, "grad_norm": 2.4069600105285645, "learning_rate": 1.7480000000000002e-05, "loss": 1.1466, "step": 874 }, { "epoch": 0.14285131219133912, "grad_norm": 2.3116390705108643, "learning_rate": 1.7500000000000002e-05, "loss": 1.062, "step": 875 }, { "epoch": 0.14301457083384353, "grad_norm": 2.219588041305542, "learning_rate": 1.752e-05, "loss": 1.0319, "step": 876 }, { "epoch": 0.1431778294763479, "grad_norm": 2.5299127101898193, "learning_rate": 1.754e-05, "loss": 1.0672, "step": 877 }, { "epoch": 0.1433410881188523, "grad_norm": 2.377128839492798, "learning_rate": 1.756e-05, "loss": 1.0066, "step": 878 }, { "epoch": 0.14350434676135668, "grad_norm": 2.3860764503479004, "learning_rate": 1.758e-05, "loss": 1.1342, "step": 879 }, { "epoch": 0.14366760540386106, "grad_norm": 2.5924859046936035, "learning_rate": 1.76e-05, "loss": 0.971, "step": 880 }, { "epoch": 0.14383086404636544, "grad_norm": 2.513972759246826, "learning_rate": 1.762e-05, "loss": 0.9887, "step": 881 }, { "epoch": 0.14399412268886985, "grad_norm": 2.78017520904541, "learning_rate": 1.764e-05, "loss": 1.3437, "step": 882 }, { "epoch": 0.14415738133137423, "grad_norm": 2.2695229053497314, "learning_rate": 1.766e-05, "loss": 1.0414, "step": 883 }, { "epoch": 0.14432063997387862, "grad_norm": 2.2176451683044434, "learning_rate": 1.768e-05, "loss": 1.0601, "step": 884 }, { "epoch": 0.144483898616383, "grad_norm": 2.479351043701172, "learning_rate": 1.77e-05, "loss": 1.2492, "step": 885 }, { "epoch": 0.14464715725888738, "grad_norm": 2.792762279510498, "learning_rate": 1.7720000000000003e-05, "loss": 1.1413, "step": 886 }, { "epoch": 0.1448104159013918, "grad_norm": 2.196213722229004, "learning_rate": 1.7740000000000003e-05, "loss": 0.9508, "step": 887 }, { "epoch": 0.14497367454389617, "grad_norm": 2.2285099029541016, "learning_rate": 1.7760000000000003e-05, "loss": 1.0122, "step": 888 }, { "epoch": 0.14513693318640056, "grad_norm": 2.2343876361846924, "learning_rate": 1.7780000000000003e-05, "loss": 1.1884, "step": 889 }, { "epoch": 0.14530019182890494, "grad_norm": 2.203963279724121, "learning_rate": 1.7800000000000002e-05, "loss": 0.8829, "step": 890 }, { "epoch": 0.14546345047140932, "grad_norm": 2.469761610031128, "learning_rate": 1.7820000000000002e-05, "loss": 1.1417, "step": 891 }, { "epoch": 0.1456267091139137, "grad_norm": 2.4011363983154297, "learning_rate": 1.7840000000000002e-05, "loss": 1.007, "step": 892 }, { "epoch": 0.14578996775641812, "grad_norm": 2.156428575515747, "learning_rate": 1.7860000000000002e-05, "loss": 1.035, "step": 893 }, { "epoch": 0.1459532263989225, "grad_norm": 2.422863483428955, "learning_rate": 1.788e-05, "loss": 1.1256, "step": 894 }, { "epoch": 0.14611648504142688, "grad_norm": 2.4491419792175293, "learning_rate": 1.79e-05, "loss": 1.0105, "step": 895 }, { "epoch": 0.14627974368393126, "grad_norm": 2.2466745376586914, "learning_rate": 1.792e-05, "loss": 1.0177, "step": 896 }, { "epoch": 0.14644300232643565, "grad_norm": 2.443605661392212, "learning_rate": 1.794e-05, "loss": 1.0399, "step": 897 }, { "epoch": 0.14660626096894006, "grad_norm": 2.762124538421631, "learning_rate": 1.796e-05, "loss": 1.1509, "step": 898 }, { "epoch": 0.14676951961144444, "grad_norm": 2.450104236602783, "learning_rate": 1.798e-05, "loss": 1.2523, "step": 899 }, { "epoch": 0.14693277825394882, "grad_norm": 2.320499897003174, "learning_rate": 1.8e-05, "loss": 1.0628, "step": 900 }, { "epoch": 0.1470960368964532, "grad_norm": 2.3713152408599854, "learning_rate": 1.802e-05, "loss": 1.073, "step": 901 }, { "epoch": 0.14725929553895759, "grad_norm": 2.3368642330169678, "learning_rate": 1.8040000000000003e-05, "loss": 1.0805, "step": 902 }, { "epoch": 0.14742255418146197, "grad_norm": 2.1584417819976807, "learning_rate": 1.8060000000000003e-05, "loss": 1.015, "step": 903 }, { "epoch": 0.14758581282396638, "grad_norm": 2.605186939239502, "learning_rate": 1.8080000000000003e-05, "loss": 1.0982, "step": 904 }, { "epoch": 0.14774907146647076, "grad_norm": 2.4269142150878906, "learning_rate": 1.8100000000000003e-05, "loss": 0.9702, "step": 905 }, { "epoch": 0.14791233010897514, "grad_norm": 2.6633119583129883, "learning_rate": 1.8120000000000003e-05, "loss": 1.282, "step": 906 }, { "epoch": 0.14807558875147953, "grad_norm": 2.067863702774048, "learning_rate": 1.8140000000000003e-05, "loss": 0.9699, "step": 907 }, { "epoch": 0.1482388473939839, "grad_norm": 2.3342347145080566, "learning_rate": 1.8160000000000002e-05, "loss": 1.105, "step": 908 }, { "epoch": 0.14840210603648832, "grad_norm": 2.120023250579834, "learning_rate": 1.8180000000000002e-05, "loss": 0.9601, "step": 909 }, { "epoch": 0.1485653646789927, "grad_norm": 2.340888500213623, "learning_rate": 1.8200000000000002e-05, "loss": 1.2264, "step": 910 }, { "epoch": 0.14872862332149708, "grad_norm": 2.3110930919647217, "learning_rate": 1.8220000000000002e-05, "loss": 1.1465, "step": 911 }, { "epoch": 0.14889188196400147, "grad_norm": 2.3530757427215576, "learning_rate": 1.824e-05, "loss": 1.0401, "step": 912 }, { "epoch": 0.14905514060650585, "grad_norm": 2.5621514320373535, "learning_rate": 1.826e-05, "loss": 1.1464, "step": 913 }, { "epoch": 0.14921839924901023, "grad_norm": 2.6963536739349365, "learning_rate": 1.828e-05, "loss": 1.6817, "step": 914 }, { "epoch": 0.14938165789151464, "grad_norm": 2.5850436687469482, "learning_rate": 1.83e-05, "loss": 1.2472, "step": 915 }, { "epoch": 0.14954491653401902, "grad_norm": 2.27846622467041, "learning_rate": 1.832e-05, "loss": 1.088, "step": 916 }, { "epoch": 0.1497081751765234, "grad_norm": 2.473759412765503, "learning_rate": 1.834e-05, "loss": 1.0865, "step": 917 }, { "epoch": 0.1498714338190278, "grad_norm": 2.447110176086426, "learning_rate": 1.8360000000000004e-05, "loss": 1.1901, "step": 918 }, { "epoch": 0.15003469246153217, "grad_norm": 2.13765549659729, "learning_rate": 1.8380000000000004e-05, "loss": 1.1359, "step": 919 }, { "epoch": 0.15019795110403658, "grad_norm": 2.4469408988952637, "learning_rate": 1.8400000000000003e-05, "loss": 0.9667, "step": 920 }, { "epoch": 0.15036120974654096, "grad_norm": 2.3996708393096924, "learning_rate": 1.8420000000000003e-05, "loss": 1.0945, "step": 921 }, { "epoch": 0.15052446838904535, "grad_norm": 2.118812084197998, "learning_rate": 1.8440000000000003e-05, "loss": 1.1095, "step": 922 }, { "epoch": 0.15068772703154973, "grad_norm": 2.3676974773406982, "learning_rate": 1.8460000000000003e-05, "loss": 1.0102, "step": 923 }, { "epoch": 0.1508509856740541, "grad_norm": 2.2139267921447754, "learning_rate": 1.8480000000000003e-05, "loss": 0.9487, "step": 924 }, { "epoch": 0.15101424431655852, "grad_norm": 2.4325449466705322, "learning_rate": 1.8500000000000002e-05, "loss": 1.0409, "step": 925 }, { "epoch": 0.1511775029590629, "grad_norm": 2.704031229019165, "learning_rate": 1.8520000000000002e-05, "loss": 1.0144, "step": 926 }, { "epoch": 0.1513407616015673, "grad_norm": 2.346242904663086, "learning_rate": 1.8540000000000002e-05, "loss": 1.0091, "step": 927 }, { "epoch": 0.15150402024407167, "grad_norm": 2.290635824203491, "learning_rate": 1.8560000000000002e-05, "loss": 1.1726, "step": 928 }, { "epoch": 0.15166727888657605, "grad_norm": 2.4403936862945557, "learning_rate": 1.858e-05, "loss": 1.1241, "step": 929 }, { "epoch": 0.15183053752908043, "grad_norm": 2.4562816619873047, "learning_rate": 1.86e-05, "loss": 1.1553, "step": 930 }, { "epoch": 0.15199379617158484, "grad_norm": 2.367035388946533, "learning_rate": 1.862e-05, "loss": 0.9737, "step": 931 }, { "epoch": 0.15215705481408923, "grad_norm": 2.527221202850342, "learning_rate": 1.864e-05, "loss": 1.2389, "step": 932 }, { "epoch": 0.1523203134565936, "grad_norm": 2.767686128616333, "learning_rate": 1.866e-05, "loss": 1.0117, "step": 933 }, { "epoch": 0.152483572099098, "grad_norm": 3.014974355697632, "learning_rate": 1.8680000000000004e-05, "loss": 1.0793, "step": 934 }, { "epoch": 0.15264683074160237, "grad_norm": 2.3546905517578125, "learning_rate": 1.8700000000000004e-05, "loss": 1.009, "step": 935 }, { "epoch": 0.15281008938410678, "grad_norm": 2.686230421066284, "learning_rate": 1.8720000000000004e-05, "loss": 1.8782, "step": 936 }, { "epoch": 0.15297334802661117, "grad_norm": 2.307716131210327, "learning_rate": 1.8740000000000004e-05, "loss": 1.0153, "step": 937 }, { "epoch": 0.15313660666911555, "grad_norm": 2.2142722606658936, "learning_rate": 1.876e-05, "loss": 0.9678, "step": 938 }, { "epoch": 0.15329986531161993, "grad_norm": 2.4044694900512695, "learning_rate": 1.878e-05, "loss": 0.8921, "step": 939 }, { "epoch": 0.15346312395412431, "grad_norm": 2.0629494190216064, "learning_rate": 1.88e-05, "loss": 0.9116, "step": 940 }, { "epoch": 0.1536263825966287, "grad_norm": 2.3467674255371094, "learning_rate": 1.882e-05, "loss": 0.9945, "step": 941 }, { "epoch": 0.1537896412391331, "grad_norm": 2.241781234741211, "learning_rate": 1.884e-05, "loss": 1.0151, "step": 942 }, { "epoch": 0.1539528998816375, "grad_norm": 2.0616438388824463, "learning_rate": 1.886e-05, "loss": 0.9926, "step": 943 }, { "epoch": 0.15411615852414187, "grad_norm": 2.4432897567749023, "learning_rate": 1.8880000000000002e-05, "loss": 0.9447, "step": 944 }, { "epoch": 0.15427941716664625, "grad_norm": 2.038989782333374, "learning_rate": 1.8900000000000002e-05, "loss": 1.0205, "step": 945 }, { "epoch": 0.15444267580915064, "grad_norm": 2.1921145915985107, "learning_rate": 1.8920000000000002e-05, "loss": 0.9221, "step": 946 }, { "epoch": 0.15460593445165505, "grad_norm": 2.7732040882110596, "learning_rate": 1.894e-05, "loss": 1.0628, "step": 947 }, { "epoch": 0.15476919309415943, "grad_norm": 2.082597255706787, "learning_rate": 1.896e-05, "loss": 0.8795, "step": 948 }, { "epoch": 0.1549324517366638, "grad_norm": 2.2344720363616943, "learning_rate": 1.898e-05, "loss": 0.9981, "step": 949 }, { "epoch": 0.1550957103791682, "grad_norm": 2.1080291271209717, "learning_rate": 1.9e-05, "loss": 0.8675, "step": 950 }, { "epoch": 0.15525896902167258, "grad_norm": 2.2417209148406982, "learning_rate": 1.902e-05, "loss": 0.8967, "step": 951 }, { "epoch": 0.15542222766417696, "grad_norm": 2.2811005115509033, "learning_rate": 1.904e-05, "loss": 1.0032, "step": 952 }, { "epoch": 0.15558548630668137, "grad_norm": 2.200798273086548, "learning_rate": 1.906e-05, "loss": 1.0215, "step": 953 }, { "epoch": 0.15574874494918575, "grad_norm": 2.081390142440796, "learning_rate": 1.908e-05, "loss": 0.9008, "step": 954 }, { "epoch": 0.15591200359169013, "grad_norm": 2.841245412826538, "learning_rate": 1.91e-05, "loss": 1.1497, "step": 955 }, { "epoch": 0.15607526223419452, "grad_norm": 2.388960123062134, "learning_rate": 1.912e-05, "loss": 1.278, "step": 956 }, { "epoch": 0.1562385208766989, "grad_norm": 2.2032625675201416, "learning_rate": 1.914e-05, "loss": 1.0945, "step": 957 }, { "epoch": 0.1564017795192033, "grad_norm": 2.2127585411071777, "learning_rate": 1.916e-05, "loss": 0.888, "step": 958 }, { "epoch": 0.1565650381617077, "grad_norm": 2.2445883750915527, "learning_rate": 1.918e-05, "loss": 1.0393, "step": 959 }, { "epoch": 0.15672829680421207, "grad_norm": 2.3601393699645996, "learning_rate": 1.9200000000000003e-05, "loss": 1.0214, "step": 960 }, { "epoch": 0.15689155544671646, "grad_norm": 2.6366095542907715, "learning_rate": 1.9220000000000002e-05, "loss": 1.1965, "step": 961 }, { "epoch": 0.15705481408922084, "grad_norm": 2.335256338119507, "learning_rate": 1.9240000000000002e-05, "loss": 1.1656, "step": 962 }, { "epoch": 0.15721807273172522, "grad_norm": 2.935983896255493, "learning_rate": 1.9260000000000002e-05, "loss": 1.1075, "step": 963 }, { "epoch": 0.15738133137422963, "grad_norm": 2.0931265354156494, "learning_rate": 1.9280000000000002e-05, "loss": 0.9205, "step": 964 }, { "epoch": 0.15754459001673402, "grad_norm": 2.435394287109375, "learning_rate": 1.93e-05, "loss": 1.1145, "step": 965 }, { "epoch": 0.1577078486592384, "grad_norm": 2.206242322921753, "learning_rate": 1.932e-05, "loss": 1.1818, "step": 966 }, { "epoch": 0.15787110730174278, "grad_norm": 2.281053066253662, "learning_rate": 1.934e-05, "loss": 0.9596, "step": 967 }, { "epoch": 0.15803436594424716, "grad_norm": 2.3303661346435547, "learning_rate": 1.936e-05, "loss": 1.0887, "step": 968 }, { "epoch": 0.15819762458675157, "grad_norm": 1.9661091566085815, "learning_rate": 1.938e-05, "loss": 0.9627, "step": 969 }, { "epoch": 0.15836088322925596, "grad_norm": 2.201622724533081, "learning_rate": 1.94e-05, "loss": 0.9263, "step": 970 }, { "epoch": 0.15852414187176034, "grad_norm": 2.303248882293701, "learning_rate": 1.942e-05, "loss": 1.0863, "step": 971 }, { "epoch": 0.15868740051426472, "grad_norm": 2.137838840484619, "learning_rate": 1.944e-05, "loss": 0.9642, "step": 972 }, { "epoch": 0.1588506591567691, "grad_norm": 2.151686906814575, "learning_rate": 1.946e-05, "loss": 1.0841, "step": 973 }, { "epoch": 0.15901391779927349, "grad_norm": 2.3956427574157715, "learning_rate": 1.948e-05, "loss": 0.9505, "step": 974 }, { "epoch": 0.1591771764417779, "grad_norm": 2.575671911239624, "learning_rate": 1.95e-05, "loss": 1.1096, "step": 975 }, { "epoch": 0.15934043508428228, "grad_norm": 2.334066867828369, "learning_rate": 1.9520000000000003e-05, "loss": 1.006, "step": 976 }, { "epoch": 0.15950369372678666, "grad_norm": 2.7651143074035645, "learning_rate": 1.9540000000000003e-05, "loss": 0.8839, "step": 977 }, { "epoch": 0.15966695236929104, "grad_norm": 2.4744086265563965, "learning_rate": 1.9560000000000002e-05, "loss": 0.9218, "step": 978 }, { "epoch": 0.15983021101179543, "grad_norm": 2.304168224334717, "learning_rate": 1.9580000000000002e-05, "loss": 0.979, "step": 979 }, { "epoch": 0.15999346965429984, "grad_norm": 3.1773602962493896, "learning_rate": 1.9600000000000002e-05, "loss": 0.8332, "step": 980 }, { "epoch": 0.16015672829680422, "grad_norm": 2.3833625316619873, "learning_rate": 1.9620000000000002e-05, "loss": 1.0549, "step": 981 }, { "epoch": 0.1603199869393086, "grad_norm": 2.2018184661865234, "learning_rate": 1.9640000000000002e-05, "loss": 1.0805, "step": 982 }, { "epoch": 0.16048324558181298, "grad_norm": 2.5941967964172363, "learning_rate": 1.966e-05, "loss": 1.0817, "step": 983 }, { "epoch": 0.16064650422431737, "grad_norm": 2.1696386337280273, "learning_rate": 1.968e-05, "loss": 1.0285, "step": 984 }, { "epoch": 0.16080976286682178, "grad_norm": 2.133671283721924, "learning_rate": 1.97e-05, "loss": 0.9338, "step": 985 }, { "epoch": 0.16097302150932616, "grad_norm": 2.1205811500549316, "learning_rate": 1.972e-05, "loss": 0.984, "step": 986 }, { "epoch": 0.16113628015183054, "grad_norm": 2.0616884231567383, "learning_rate": 1.974e-05, "loss": 0.8192, "step": 987 }, { "epoch": 0.16129953879433492, "grad_norm": 2.1773252487182617, "learning_rate": 1.976e-05, "loss": 1.0812, "step": 988 }, { "epoch": 0.1614627974368393, "grad_norm": 2.3835434913635254, "learning_rate": 1.978e-05, "loss": 1.0698, "step": 989 }, { "epoch": 0.1616260560793437, "grad_norm": 2.598984479904175, "learning_rate": 1.98e-05, "loss": 1.2068, "step": 990 }, { "epoch": 0.1617893147218481, "grad_norm": 2.163893699645996, "learning_rate": 1.982e-05, "loss": 1.1473, "step": 991 }, { "epoch": 0.16195257336435248, "grad_norm": 2.4261348247528076, "learning_rate": 1.9840000000000003e-05, "loss": 1.0327, "step": 992 }, { "epoch": 0.16211583200685686, "grad_norm": 2.2704286575317383, "learning_rate": 1.9860000000000003e-05, "loss": 1.2738, "step": 993 }, { "epoch": 0.16227909064936125, "grad_norm": 2.5879311561584473, "learning_rate": 1.9880000000000003e-05, "loss": 1.1713, "step": 994 }, { "epoch": 0.16244234929186563, "grad_norm": 2.484565496444702, "learning_rate": 1.9900000000000003e-05, "loss": 1.112, "step": 995 }, { "epoch": 0.16260560793437004, "grad_norm": 2.3270070552825928, "learning_rate": 1.9920000000000002e-05, "loss": 0.9787, "step": 996 }, { "epoch": 0.16276886657687442, "grad_norm": 2.4373741149902344, "learning_rate": 1.9940000000000002e-05, "loss": 0.8845, "step": 997 }, { "epoch": 0.1629321252193788, "grad_norm": 2.1516246795654297, "learning_rate": 1.9960000000000002e-05, "loss": 1.0106, "step": 998 }, { "epoch": 0.1630953838618832, "grad_norm": 2.3449010848999023, "learning_rate": 1.9980000000000002e-05, "loss": 1.0922, "step": 999 }, { "epoch": 0.16325864250438757, "grad_norm": 2.336559295654297, "learning_rate": 2e-05, "loss": 1.1378, "step": 1000 }, { "epoch": 0.16342190114689195, "grad_norm": 2.3514554500579834, "learning_rate": 1.9999999994965004e-05, "loss": 1.0644, "step": 1001 }, { "epoch": 0.16358515978939636, "grad_norm": 2.2715625762939453, "learning_rate": 1.9999999979860004e-05, "loss": 1.0649, "step": 1002 }, { "epoch": 0.16374841843190074, "grad_norm": 2.4589476585388184, "learning_rate": 1.9999999954685013e-05, "loss": 1.0996, "step": 1003 }, { "epoch": 0.16391167707440513, "grad_norm": 2.614118814468384, "learning_rate": 1.9999999919440023e-05, "loss": 1.2055, "step": 1004 }, { "epoch": 0.1640749357169095, "grad_norm": 2.2623252868652344, "learning_rate": 1.9999999874125034e-05, "loss": 0.9441, "step": 1005 }, { "epoch": 0.1642381943594139, "grad_norm": 2.55422306060791, "learning_rate": 1.999999981874005e-05, "loss": 1.0869, "step": 1006 }, { "epoch": 0.1644014530019183, "grad_norm": 2.1740806102752686, "learning_rate": 1.9999999753285067e-05, "loss": 0.9363, "step": 1007 }, { "epoch": 0.16456471164442268, "grad_norm": 2.140511989593506, "learning_rate": 1.9999999677760086e-05, "loss": 0.9024, "step": 1008 }, { "epoch": 0.16472797028692707, "grad_norm": 2.785182237625122, "learning_rate": 1.999999959216511e-05, "loss": 1.0512, "step": 1009 }, { "epoch": 0.16489122892943145, "grad_norm": 2.6590869426727295, "learning_rate": 1.9999999496500138e-05, "loss": 1.129, "step": 1010 }, { "epoch": 0.16505448757193583, "grad_norm": 3.057839870452881, "learning_rate": 1.9999999390765168e-05, "loss": 1.2267, "step": 1011 }, { "epoch": 0.16521774621444021, "grad_norm": 2.4520065784454346, "learning_rate": 1.99999992749602e-05, "loss": 0.9163, "step": 1012 }, { "epoch": 0.16538100485694462, "grad_norm": 2.593477487564087, "learning_rate": 1.9999999149085238e-05, "loss": 1.0129, "step": 1013 }, { "epoch": 0.165544263499449, "grad_norm": 2.6609976291656494, "learning_rate": 1.999999901314028e-05, "loss": 1.1978, "step": 1014 }, { "epoch": 0.1657075221419534, "grad_norm": 2.2153432369232178, "learning_rate": 1.999999886712532e-05, "loss": 1.022, "step": 1015 }, { "epoch": 0.16587078078445777, "grad_norm": 2.2598860263824463, "learning_rate": 1.999999871104037e-05, "loss": 1.1382, "step": 1016 }, { "epoch": 0.16603403942696215, "grad_norm": 2.4949610233306885, "learning_rate": 1.9999998544885422e-05, "loss": 1.0505, "step": 1017 }, { "epoch": 0.16619729806946656, "grad_norm": 2.293769121170044, "learning_rate": 1.9999998368660475e-05, "loss": 1.1003, "step": 1018 }, { "epoch": 0.16636055671197095, "grad_norm": 2.0951051712036133, "learning_rate": 1.9999998182365536e-05, "loss": 1.0586, "step": 1019 }, { "epoch": 0.16652381535447533, "grad_norm": 2.4665348529815674, "learning_rate": 1.9999997986000598e-05, "loss": 1.0945, "step": 1020 }, { "epoch": 0.1666870739969797, "grad_norm": 2.3877999782562256, "learning_rate": 1.999999777956567e-05, "loss": 1.0852, "step": 1021 }, { "epoch": 0.1668503326394841, "grad_norm": 2.138324499130249, "learning_rate": 1.9999997563060744e-05, "loss": 1.1416, "step": 1022 }, { "epoch": 0.16701359128198848, "grad_norm": 2.38706111907959, "learning_rate": 1.999999733648582e-05, "loss": 0.978, "step": 1023 }, { "epoch": 0.1671768499244929, "grad_norm": 2.095360040664673, "learning_rate": 1.9999997099840905e-05, "loss": 1.0607, "step": 1024 }, { "epoch": 0.16734010856699727, "grad_norm": 2.2453858852386475, "learning_rate": 1.9999996853125995e-05, "loss": 1.0933, "step": 1025 }, { "epoch": 0.16750336720950165, "grad_norm": 2.563276767730713, "learning_rate": 1.9999996596341092e-05, "loss": 1.159, "step": 1026 }, { "epoch": 0.16766662585200603, "grad_norm": 2.481497049331665, "learning_rate": 1.9999996329486195e-05, "loss": 1.5889, "step": 1027 }, { "epoch": 0.16782988449451042, "grad_norm": 2.391479253768921, "learning_rate": 1.9999996052561302e-05, "loss": 1.2435, "step": 1028 }, { "epoch": 0.16799314313701483, "grad_norm": 2.746399164199829, "learning_rate": 1.9999995765566414e-05, "loss": 1.3302, "step": 1029 }, { "epoch": 0.1681564017795192, "grad_norm": 2.409726142883301, "learning_rate": 1.9999995468501538e-05, "loss": 1.1857, "step": 1030 }, { "epoch": 0.1683196604220236, "grad_norm": 2.2544591426849365, "learning_rate": 1.999999516136667e-05, "loss": 1.0864, "step": 1031 }, { "epoch": 0.16848291906452798, "grad_norm": 2.6994612216949463, "learning_rate": 1.9999994844161802e-05, "loss": 1.1999, "step": 1032 }, { "epoch": 0.16864617770703236, "grad_norm": 1.8875113725662231, "learning_rate": 1.9999994516886947e-05, "loss": 0.922, "step": 1033 }, { "epoch": 0.16880943634953674, "grad_norm": 2.148346185684204, "learning_rate": 1.99999941795421e-05, "loss": 1.0054, "step": 1034 }, { "epoch": 0.16897269499204115, "grad_norm": 1.928247094154358, "learning_rate": 1.999999383212726e-05, "loss": 0.9847, "step": 1035 }, { "epoch": 0.16913595363454553, "grad_norm": 2.3509058952331543, "learning_rate": 1.999999347464243e-05, "loss": 1.1257, "step": 1036 }, { "epoch": 0.16929921227704992, "grad_norm": 2.087721824645996, "learning_rate": 1.999999310708761e-05, "loss": 0.9637, "step": 1037 }, { "epoch": 0.1694624709195543, "grad_norm": 2.2591004371643066, "learning_rate": 1.99999927294628e-05, "loss": 0.9033, "step": 1038 }, { "epoch": 0.16962572956205868, "grad_norm": 2.5685207843780518, "learning_rate": 1.9999992341767995e-05, "loss": 1.0999, "step": 1039 }, { "epoch": 0.1697889882045631, "grad_norm": 2.15116286277771, "learning_rate": 1.9999991944003204e-05, "loss": 1.1206, "step": 1040 }, { "epoch": 0.16995224684706747, "grad_norm": 2.097074508666992, "learning_rate": 1.9999991536168424e-05, "loss": 0.9521, "step": 1041 }, { "epoch": 0.17011550548957186, "grad_norm": 2.128910541534424, "learning_rate": 1.9999991118263655e-05, "loss": 0.845, "step": 1042 }, { "epoch": 0.17027876413207624, "grad_norm": 2.199960708618164, "learning_rate": 1.9999990690288898e-05, "loss": 1.0856, "step": 1043 }, { "epoch": 0.17044202277458062, "grad_norm": 2.304919719696045, "learning_rate": 1.9999990252244153e-05, "loss": 1.0568, "step": 1044 }, { "epoch": 0.17060528141708503, "grad_norm": 2.1934592723846436, "learning_rate": 1.999998980412942e-05, "loss": 1.1594, "step": 1045 }, { "epoch": 0.1707685400595894, "grad_norm": 2.3597006797790527, "learning_rate": 1.99999893459447e-05, "loss": 1.0172, "step": 1046 }, { "epoch": 0.1709317987020938, "grad_norm": 2.3874928951263428, "learning_rate": 1.9999988877689992e-05, "loss": 1.0739, "step": 1047 }, { "epoch": 0.17109505734459818, "grad_norm": 2.2946035861968994, "learning_rate": 1.99999883993653e-05, "loss": 0.8877, "step": 1048 }, { "epoch": 0.17125831598710256, "grad_norm": 2.2626149654388428, "learning_rate": 1.999998791097062e-05, "loss": 1.1662, "step": 1049 }, { "epoch": 0.17142157462960694, "grad_norm": 2.1669869422912598, "learning_rate": 1.9999987412505956e-05, "loss": 0.9913, "step": 1050 }, { "epoch": 0.17158483327211135, "grad_norm": 2.5278143882751465, "learning_rate": 1.999998690397131e-05, "loss": 1.1662, "step": 1051 }, { "epoch": 0.17174809191461574, "grad_norm": 2.166165828704834, "learning_rate": 1.9999986385366675e-05, "loss": 1.0432, "step": 1052 }, { "epoch": 0.17191135055712012, "grad_norm": 2.134746551513672, "learning_rate": 1.9999985856692058e-05, "loss": 1.0275, "step": 1053 }, { "epoch": 0.1720746091996245, "grad_norm": 2.0434696674346924, "learning_rate": 1.9999985317947458e-05, "loss": 0.9469, "step": 1054 }, { "epoch": 0.17223786784212888, "grad_norm": 2.1447222232818604, "learning_rate": 1.999998476913288e-05, "loss": 1.0476, "step": 1055 }, { "epoch": 0.1724011264846333, "grad_norm": 2.585195541381836, "learning_rate": 1.9999984210248314e-05, "loss": 1.1478, "step": 1056 }, { "epoch": 0.17256438512713768, "grad_norm": 2.3194594383239746, "learning_rate": 1.999998364129377e-05, "loss": 0.9964, "step": 1057 }, { "epoch": 0.17272764376964206, "grad_norm": 2.095606565475464, "learning_rate": 1.9999983062269243e-05, "loss": 1.0483, "step": 1058 }, { "epoch": 0.17289090241214644, "grad_norm": 2.25608229637146, "learning_rate": 1.9999982473174738e-05, "loss": 1.1089, "step": 1059 }, { "epoch": 0.17305416105465082, "grad_norm": 2.320568561553955, "learning_rate": 1.999998187401025e-05, "loss": 1.0553, "step": 1060 }, { "epoch": 0.1732174196971552, "grad_norm": 2.3252997398376465, "learning_rate": 1.9999981264775784e-05, "loss": 1.1983, "step": 1061 }, { "epoch": 0.17338067833965962, "grad_norm": 3.0871660709381104, "learning_rate": 1.999998064547134e-05, "loss": 1.2339, "step": 1062 }, { "epoch": 0.173543936982164, "grad_norm": 2.380835771560669, "learning_rate": 1.999998001609692e-05, "loss": 1.0403, "step": 1063 }, { "epoch": 0.17370719562466838, "grad_norm": 2.651228666305542, "learning_rate": 1.999997937665252e-05, "loss": 0.916, "step": 1064 }, { "epoch": 0.17387045426717276, "grad_norm": 2.321899652481079, "learning_rate": 1.9999978727138146e-05, "loss": 1.1296, "step": 1065 }, { "epoch": 0.17403371290967715, "grad_norm": 2.0351624488830566, "learning_rate": 1.9999978067553796e-05, "loss": 1.026, "step": 1066 }, { "epoch": 0.17419697155218156, "grad_norm": 2.510901689529419, "learning_rate": 1.999997739789947e-05, "loss": 0.9268, "step": 1067 }, { "epoch": 0.17436023019468594, "grad_norm": 2.541374683380127, "learning_rate": 1.9999976718175166e-05, "loss": 1.0569, "step": 1068 }, { "epoch": 0.17452348883719032, "grad_norm": 2.503784418106079, "learning_rate": 1.999997602838089e-05, "loss": 1.1805, "step": 1069 }, { "epoch": 0.1746867474796947, "grad_norm": 2.244020938873291, "learning_rate": 1.9999975328516643e-05, "loss": 1.0966, "step": 1070 }, { "epoch": 0.1748500061221991, "grad_norm": 2.1564693450927734, "learning_rate": 1.999997461858242e-05, "loss": 1.1774, "step": 1071 }, { "epoch": 0.17501326476470347, "grad_norm": 1.9784401655197144, "learning_rate": 1.999997389857823e-05, "loss": 0.9598, "step": 1072 }, { "epoch": 0.17517652340720788, "grad_norm": 2.2448673248291016, "learning_rate": 1.9999973168504067e-05, "loss": 0.9912, "step": 1073 }, { "epoch": 0.17533978204971226, "grad_norm": 2.1335999965667725, "learning_rate": 1.9999972428359932e-05, "loss": 0.9893, "step": 1074 }, { "epoch": 0.17550304069221664, "grad_norm": 2.3255131244659424, "learning_rate": 1.999997167814583e-05, "loss": 1.0537, "step": 1075 }, { "epoch": 0.17566629933472103, "grad_norm": 2.0237951278686523, "learning_rate": 1.9999970917861757e-05, "loss": 1.0538, "step": 1076 }, { "epoch": 0.1758295579772254, "grad_norm": 2.1195428371429443, "learning_rate": 1.9999970147507714e-05, "loss": 1.0867, "step": 1077 }, { "epoch": 0.17599281661972982, "grad_norm": 1.9172940254211426, "learning_rate": 1.999996936708371e-05, "loss": 0.8206, "step": 1078 }, { "epoch": 0.1761560752622342, "grad_norm": 2.0909342765808105, "learning_rate": 1.999996857658973e-05, "loss": 1.0318, "step": 1079 }, { "epoch": 0.17631933390473858, "grad_norm": 2.5228865146636963, "learning_rate": 1.9999967776025794e-05, "loss": 1.0434, "step": 1080 }, { "epoch": 0.17648259254724297, "grad_norm": 2.6171646118164062, "learning_rate": 1.999996696539189e-05, "loss": 1.0133, "step": 1081 }, { "epoch": 0.17664585118974735, "grad_norm": 2.546786069869995, "learning_rate": 1.9999966144688022e-05, "loss": 0.998, "step": 1082 }, { "epoch": 0.17680910983225173, "grad_norm": 2.314805030822754, "learning_rate": 1.9999965313914187e-05, "loss": 1.2567, "step": 1083 }, { "epoch": 0.17697236847475614, "grad_norm": 2.327486515045166, "learning_rate": 1.999996447307039e-05, "loss": 0.9973, "step": 1084 }, { "epoch": 0.17713562711726052, "grad_norm": 2.1672661304473877, "learning_rate": 1.9999963622156637e-05, "loss": 0.8303, "step": 1085 }, { "epoch": 0.1772988857597649, "grad_norm": 2.19378662109375, "learning_rate": 1.9999962761172918e-05, "loss": 0.9721, "step": 1086 }, { "epoch": 0.1774621444022693, "grad_norm": 2.725313901901245, "learning_rate": 1.9999961890119245e-05, "loss": 1.3225, "step": 1087 }, { "epoch": 0.17762540304477367, "grad_norm": 2.0173120498657227, "learning_rate": 1.9999961008995607e-05, "loss": 1.0302, "step": 1088 }, { "epoch": 0.17778866168727808, "grad_norm": 2.1488540172576904, "learning_rate": 1.9999960117802014e-05, "loss": 1.0383, "step": 1089 }, { "epoch": 0.17795192032978246, "grad_norm": 2.299060583114624, "learning_rate": 1.9999959216538463e-05, "loss": 1.0007, "step": 1090 }, { "epoch": 0.17811517897228685, "grad_norm": 2.6042659282684326, "learning_rate": 1.9999958305204955e-05, "loss": 1.2325, "step": 1091 }, { "epoch": 0.17827843761479123, "grad_norm": 1.9889030456542969, "learning_rate": 1.9999957383801495e-05, "loss": 0.9731, "step": 1092 }, { "epoch": 0.1784416962572956, "grad_norm": 2.363703966140747, "learning_rate": 1.9999956452328077e-05, "loss": 1.1493, "step": 1093 }, { "epoch": 0.1786049548998, "grad_norm": 2.4196512699127197, "learning_rate": 1.9999955510784705e-05, "loss": 1.054, "step": 1094 }, { "epoch": 0.1787682135423044, "grad_norm": 2.3570573329925537, "learning_rate": 1.999995455917138e-05, "loss": 1.1088, "step": 1095 }, { "epoch": 0.1789314721848088, "grad_norm": 2.4296398162841797, "learning_rate": 1.9999953597488106e-05, "loss": 0.9603, "step": 1096 }, { "epoch": 0.17909473082731317, "grad_norm": 2.5928080081939697, "learning_rate": 1.9999952625734884e-05, "loss": 1.1198, "step": 1097 }, { "epoch": 0.17925798946981755, "grad_norm": 2.5594751834869385, "learning_rate": 1.9999951643911706e-05, "loss": 0.9519, "step": 1098 }, { "epoch": 0.17942124811232193, "grad_norm": 2.5569941997528076, "learning_rate": 1.9999950652018585e-05, "loss": 1.1821, "step": 1099 }, { "epoch": 0.17958450675482635, "grad_norm": 1.89167058467865, "learning_rate": 1.9999949650055512e-05, "loss": 0.9435, "step": 1100 }, { "epoch": 0.17974776539733073, "grad_norm": 1.994001865386963, "learning_rate": 1.9999948638022495e-05, "loss": 1.0539, "step": 1101 }, { "epoch": 0.1799110240398351, "grad_norm": 2.433737277984619, "learning_rate": 1.999994761591953e-05, "loss": 1.1752, "step": 1102 }, { "epoch": 0.1800742826823395, "grad_norm": 1.8177391290664673, "learning_rate": 1.999994658374662e-05, "loss": 0.8957, "step": 1103 }, { "epoch": 0.18023754132484388, "grad_norm": 2.210867404937744, "learning_rate": 1.999994554150377e-05, "loss": 0.945, "step": 1104 }, { "epoch": 0.18040079996734826, "grad_norm": 2.2383158206939697, "learning_rate": 1.9999944489190975e-05, "loss": 1.2078, "step": 1105 }, { "epoch": 0.18056405860985267, "grad_norm": 2.2448253631591797, "learning_rate": 1.999994342680824e-05, "loss": 1.1679, "step": 1106 }, { "epoch": 0.18072731725235705, "grad_norm": 2.525371789932251, "learning_rate": 1.9999942354355566e-05, "loss": 1.2366, "step": 1107 }, { "epoch": 0.18089057589486143, "grad_norm": 2.216412305831909, "learning_rate": 1.999994127183295e-05, "loss": 1.0239, "step": 1108 }, { "epoch": 0.18105383453736582, "grad_norm": 2.325221061706543, "learning_rate": 1.9999940179240395e-05, "loss": 1.1005, "step": 1109 }, { "epoch": 0.1812170931798702, "grad_norm": 2.2306694984436035, "learning_rate": 1.9999939076577906e-05, "loss": 1.1815, "step": 1110 }, { "epoch": 0.1813803518223746, "grad_norm": 2.216809034347534, "learning_rate": 1.9999937963845478e-05, "loss": 1.0072, "step": 1111 }, { "epoch": 0.181543610464879, "grad_norm": 2.0775082111358643, "learning_rate": 1.9999936841043116e-05, "loss": 1.0162, "step": 1112 }, { "epoch": 0.18170686910738337, "grad_norm": 2.219434976577759, "learning_rate": 1.999993570817082e-05, "loss": 1.0333, "step": 1113 }, { "epoch": 0.18187012774988776, "grad_norm": 2.2550816535949707, "learning_rate": 1.9999934565228594e-05, "loss": 1.0297, "step": 1114 }, { "epoch": 0.18203338639239214, "grad_norm": 1.8956637382507324, "learning_rate": 1.9999933412216436e-05, "loss": 0.9578, "step": 1115 }, { "epoch": 0.18219664503489655, "grad_norm": 2.551481246948242, "learning_rate": 1.9999932249134347e-05, "loss": 1.0417, "step": 1116 }, { "epoch": 0.18235990367740093, "grad_norm": 2.552155017852783, "learning_rate": 1.9999931075982328e-05, "loss": 1.1275, "step": 1117 }, { "epoch": 0.1825231623199053, "grad_norm": 2.355808734893799, "learning_rate": 1.999992989276038e-05, "loss": 1.0488, "step": 1118 }, { "epoch": 0.1826864209624097, "grad_norm": 2.86993670463562, "learning_rate": 1.9999928699468506e-05, "loss": 0.8156, "step": 1119 }, { "epoch": 0.18284967960491408, "grad_norm": 2.4833908081054688, "learning_rate": 1.9999927496106708e-05, "loss": 1.2515, "step": 1120 }, { "epoch": 0.18301293824741846, "grad_norm": 2.038306713104248, "learning_rate": 1.9999926282674985e-05, "loss": 1.0707, "step": 1121 }, { "epoch": 0.18317619688992287, "grad_norm": 2.3936352729797363, "learning_rate": 1.999992505917334e-05, "loss": 0.9615, "step": 1122 }, { "epoch": 0.18333945553242725, "grad_norm": 2.4925572872161865, "learning_rate": 1.9999923825601768e-05, "loss": 1.2249, "step": 1123 }, { "epoch": 0.18350271417493164, "grad_norm": 2.205446243286133, "learning_rate": 1.999992258196028e-05, "loss": 0.9487, "step": 1124 }, { "epoch": 0.18366597281743602, "grad_norm": 2.225482225418091, "learning_rate": 1.999992132824887e-05, "loss": 1.0573, "step": 1125 }, { "epoch": 0.1838292314599404, "grad_norm": 2.1631839275360107, "learning_rate": 1.9999920064467545e-05, "loss": 0.979, "step": 1126 }, { "epoch": 0.1839924901024448, "grad_norm": 2.3457841873168945, "learning_rate": 1.9999918790616305e-05, "loss": 1.1933, "step": 1127 }, { "epoch": 0.1841557487449492, "grad_norm": 2.2748007774353027, "learning_rate": 1.9999917506695144e-05, "loss": 1.0622, "step": 1128 }, { "epoch": 0.18431900738745358, "grad_norm": 2.6205477714538574, "learning_rate": 1.999991621270407e-05, "loss": 1.0085, "step": 1129 }, { "epoch": 0.18448226602995796, "grad_norm": 2.382124423980713, "learning_rate": 1.9999914908643084e-05, "loss": 1.0002, "step": 1130 }, { "epoch": 0.18464552467246234, "grad_norm": 2.582602024078369, "learning_rate": 1.999991359451219e-05, "loss": 1.1519, "step": 1131 }, { "epoch": 0.18480878331496672, "grad_norm": 2.577791690826416, "learning_rate": 1.9999912270311376e-05, "loss": 0.8848, "step": 1132 }, { "epoch": 0.18497204195747113, "grad_norm": 2.2415804862976074, "learning_rate": 1.9999910936040662e-05, "loss": 1.1085, "step": 1133 }, { "epoch": 0.18513530059997552, "grad_norm": 2.088890314102173, "learning_rate": 1.9999909591700035e-05, "loss": 0.929, "step": 1134 }, { "epoch": 0.1852985592424799, "grad_norm": 2.7266616821289062, "learning_rate": 1.9999908237289504e-05, "loss": 1.105, "step": 1135 }, { "epoch": 0.18546181788498428, "grad_norm": 2.5893471240997314, "learning_rate": 1.999990687280907e-05, "loss": 1.1033, "step": 1136 }, { "epoch": 0.18562507652748866, "grad_norm": 2.6022417545318604, "learning_rate": 1.999990549825873e-05, "loss": 1.253, "step": 1137 }, { "epoch": 0.18578833516999307, "grad_norm": 2.2605323791503906, "learning_rate": 1.9999904113638488e-05, "loss": 1.1433, "step": 1138 }, { "epoch": 0.18595159381249746, "grad_norm": 2.047659397125244, "learning_rate": 1.9999902718948345e-05, "loss": 0.9878, "step": 1139 }, { "epoch": 0.18611485245500184, "grad_norm": 2.3359527587890625, "learning_rate": 1.9999901314188302e-05, "loss": 1.1012, "step": 1140 }, { "epoch": 0.18627811109750622, "grad_norm": 2.625244617462158, "learning_rate": 1.9999899899358362e-05, "loss": 1.0277, "step": 1141 }, { "epoch": 0.1864413697400106, "grad_norm": 2.3946313858032227, "learning_rate": 1.9999898474458525e-05, "loss": 0.9741, "step": 1142 }, { "epoch": 0.186604628382515, "grad_norm": 2.119610071182251, "learning_rate": 1.9999897039488794e-05, "loss": 1.0047, "step": 1143 }, { "epoch": 0.1867678870250194, "grad_norm": 2.176919937133789, "learning_rate": 1.999989559444917e-05, "loss": 0.9946, "step": 1144 }, { "epoch": 0.18693114566752378, "grad_norm": 2.5582399368286133, "learning_rate": 1.9999894139339652e-05, "loss": 0.9903, "step": 1145 }, { "epoch": 0.18709440431002816, "grad_norm": 2.4175796508789062, "learning_rate": 1.9999892674160244e-05, "loss": 1.0786, "step": 1146 }, { "epoch": 0.18725766295253254, "grad_norm": 2.4266650676727295, "learning_rate": 1.999989119891095e-05, "loss": 1.0346, "step": 1147 }, { "epoch": 0.18742092159503693, "grad_norm": 1.9651210308074951, "learning_rate": 1.999988971359176e-05, "loss": 0.9538, "step": 1148 }, { "epoch": 0.18758418023754134, "grad_norm": 2.1556639671325684, "learning_rate": 1.999988821820269e-05, "loss": 1.0038, "step": 1149 }, { "epoch": 0.18774743888004572, "grad_norm": 1.9531604051589966, "learning_rate": 1.9999886712743734e-05, "loss": 1.036, "step": 1150 }, { "epoch": 0.1879106975225501, "grad_norm": 2.4789934158325195, "learning_rate": 1.9999885197214895e-05, "loss": 1.0816, "step": 1151 }, { "epoch": 0.18807395616505448, "grad_norm": 1.9194669723510742, "learning_rate": 1.9999883671616173e-05, "loss": 1.0355, "step": 1152 }, { "epoch": 0.18823721480755887, "grad_norm": 2.1420159339904785, "learning_rate": 1.9999882135947574e-05, "loss": 1.0958, "step": 1153 }, { "epoch": 0.18840047345006325, "grad_norm": 2.0625131130218506, "learning_rate": 1.999988059020909e-05, "loss": 1.0212, "step": 1154 }, { "epoch": 0.18856373209256766, "grad_norm": 2.0413875579833984, "learning_rate": 1.9999879034400733e-05, "loss": 0.9744, "step": 1155 }, { "epoch": 0.18872699073507204, "grad_norm": 2.2596724033355713, "learning_rate": 1.99998774685225e-05, "loss": 1.1592, "step": 1156 }, { "epoch": 0.18889024937757642, "grad_norm": 1.8991316556930542, "learning_rate": 1.9999875892574395e-05, "loss": 0.9992, "step": 1157 }, { "epoch": 0.1890535080200808, "grad_norm": 2.1433775424957275, "learning_rate": 1.9999874306556416e-05, "loss": 1.0734, "step": 1158 }, { "epoch": 0.1892167666625852, "grad_norm": 2.1161365509033203, "learning_rate": 1.9999872710468568e-05, "loss": 1.0906, "step": 1159 }, { "epoch": 0.1893800253050896, "grad_norm": 2.5800371170043945, "learning_rate": 1.9999871104310846e-05, "loss": 1.026, "step": 1160 }, { "epoch": 0.18954328394759398, "grad_norm": 2.267658233642578, "learning_rate": 1.9999869488083257e-05, "loss": 1.0681, "step": 1161 }, { "epoch": 0.18970654259009836, "grad_norm": 2.2277655601501465, "learning_rate": 1.9999867861785806e-05, "loss": 0.9169, "step": 1162 }, { "epoch": 0.18986980123260275, "grad_norm": 2.348982334136963, "learning_rate": 1.9999866225418488e-05, "loss": 1.137, "step": 1163 }, { "epoch": 0.19003305987510713, "grad_norm": 2.3634753227233887, "learning_rate": 1.999986457898131e-05, "loss": 1.1237, "step": 1164 }, { "epoch": 0.1901963185176115, "grad_norm": 2.1389710903167725, "learning_rate": 1.999986292247427e-05, "loss": 1.0627, "step": 1165 }, { "epoch": 0.19035957716011592, "grad_norm": 2.117138624191284, "learning_rate": 1.999986125589737e-05, "loss": 0.856, "step": 1166 }, { "epoch": 0.1905228358026203, "grad_norm": 2.3714962005615234, "learning_rate": 1.9999859579250612e-05, "loss": 1.0273, "step": 1167 }, { "epoch": 0.1906860944451247, "grad_norm": 2.1897380352020264, "learning_rate": 1.9999857892534e-05, "loss": 0.8903, "step": 1168 }, { "epoch": 0.19084935308762907, "grad_norm": 2.0271244049072266, "learning_rate": 1.999985619574753e-05, "loss": 0.8694, "step": 1169 }, { "epoch": 0.19101261173013345, "grad_norm": 2.8580963611602783, "learning_rate": 1.9999854488891214e-05, "loss": 1.2105, "step": 1170 }, { "epoch": 0.19117587037263786, "grad_norm": 2.389904022216797, "learning_rate": 1.9999852771965042e-05, "loss": 0.9998, "step": 1171 }, { "epoch": 0.19133912901514225, "grad_norm": 2.6290574073791504, "learning_rate": 1.999985104496902e-05, "loss": 1.0353, "step": 1172 }, { "epoch": 0.19150238765764663, "grad_norm": 2.6093568801879883, "learning_rate": 1.9999849307903153e-05, "loss": 1.049, "step": 1173 }, { "epoch": 0.191665646300151, "grad_norm": 2.4639508724212646, "learning_rate": 1.999984756076744e-05, "loss": 1.0843, "step": 1174 }, { "epoch": 0.1918289049426554, "grad_norm": 2.90348219871521, "learning_rate": 1.9999845803561882e-05, "loss": 1.0587, "step": 1175 }, { "epoch": 0.1919921635851598, "grad_norm": 2.272756576538086, "learning_rate": 1.9999844036286483e-05, "loss": 0.9462, "step": 1176 }, { "epoch": 0.19215542222766419, "grad_norm": 2.4548888206481934, "learning_rate": 1.9999842258941244e-05, "loss": 1.1625, "step": 1177 }, { "epoch": 0.19231868087016857, "grad_norm": 2.604268789291382, "learning_rate": 1.9999840471526166e-05, "loss": 1.1899, "step": 1178 }, { "epoch": 0.19248193951267295, "grad_norm": 2.066979169845581, "learning_rate": 1.9999838674041252e-05, "loss": 0.9286, "step": 1179 }, { "epoch": 0.19264519815517733, "grad_norm": 2.3797128200531006, "learning_rate": 1.9999836866486505e-05, "loss": 1.0364, "step": 1180 }, { "epoch": 0.19280845679768172, "grad_norm": 2.6473474502563477, "learning_rate": 1.999983504886192e-05, "loss": 1.2209, "step": 1181 }, { "epoch": 0.19297171544018613, "grad_norm": 2.0787739753723145, "learning_rate": 1.9999833221167507e-05, "loss": 1.0413, "step": 1182 }, { "epoch": 0.1931349740826905, "grad_norm": 2.279916763305664, "learning_rate": 1.9999831383403263e-05, "loss": 0.9349, "step": 1183 }, { "epoch": 0.1932982327251949, "grad_norm": 2.120527744293213, "learning_rate": 1.9999829535569196e-05, "loss": 0.8887, "step": 1184 }, { "epoch": 0.19346149136769927, "grad_norm": 2.002307891845703, "learning_rate": 1.99998276776653e-05, "loss": 0.9832, "step": 1185 }, { "epoch": 0.19362475001020366, "grad_norm": 2.755699396133423, "learning_rate": 1.9999825809691577e-05, "loss": 1.1735, "step": 1186 }, { "epoch": 0.19378800865270807, "grad_norm": 2.5381875038146973, "learning_rate": 1.9999823931648036e-05, "loss": 1.0875, "step": 1187 }, { "epoch": 0.19395126729521245, "grad_norm": 2.0140693187713623, "learning_rate": 1.9999822043534673e-05, "loss": 1.0085, "step": 1188 }, { "epoch": 0.19411452593771683, "grad_norm": 2.2564597129821777, "learning_rate": 1.9999820145351493e-05, "loss": 1.0603, "step": 1189 }, { "epoch": 0.1942777845802212, "grad_norm": 2.2346925735473633, "learning_rate": 1.9999818237098495e-05, "loss": 0.9727, "step": 1190 }, { "epoch": 0.1944410432227256, "grad_norm": 2.393723726272583, "learning_rate": 1.9999816318775688e-05, "loss": 0.8842, "step": 1191 }, { "epoch": 0.19460430186522998, "grad_norm": 2.2890357971191406, "learning_rate": 1.999981439038306e-05, "loss": 0.8331, "step": 1192 }, { "epoch": 0.1947675605077344, "grad_norm": 2.5719926357269287, "learning_rate": 1.999981245192063e-05, "loss": 1.0345, "step": 1193 }, { "epoch": 0.19493081915023877, "grad_norm": 2.381460428237915, "learning_rate": 1.9999810503388386e-05, "loss": 1.1593, "step": 1194 }, { "epoch": 0.19509407779274315, "grad_norm": 2.2159101963043213, "learning_rate": 1.9999808544786336e-05, "loss": 0.9811, "step": 1195 }, { "epoch": 0.19525733643524754, "grad_norm": 2.201510429382324, "learning_rate": 1.9999806576114485e-05, "loss": 0.8886, "step": 1196 }, { "epoch": 0.19542059507775192, "grad_norm": 2.8090901374816895, "learning_rate": 1.999980459737283e-05, "loss": 1.2377, "step": 1197 }, { "epoch": 0.19558385372025633, "grad_norm": 2.294390916824341, "learning_rate": 1.999980260856137e-05, "loss": 1.0078, "step": 1198 }, { "epoch": 0.1957471123627607, "grad_norm": 2.2880094051361084, "learning_rate": 1.9999800609680117e-05, "loss": 1.1318, "step": 1199 }, { "epoch": 0.1959103710052651, "grad_norm": 2.2412564754486084, "learning_rate": 1.9999798600729067e-05, "loss": 0.9749, "step": 1200 }, { "epoch": 0.19607362964776948, "grad_norm": 1.8755762577056885, "learning_rate": 1.9999796581708222e-05, "loss": 0.8569, "step": 1201 }, { "epoch": 0.19623688829027386, "grad_norm": 2.215679407119751, "learning_rate": 1.999979455261758e-05, "loss": 0.9661, "step": 1202 }, { "epoch": 0.19640014693277824, "grad_norm": 2.204582691192627, "learning_rate": 1.9999792513457152e-05, "loss": 0.9762, "step": 1203 }, { "epoch": 0.19656340557528265, "grad_norm": 2.370715856552124, "learning_rate": 1.9999790464226934e-05, "loss": 1.2432, "step": 1204 }, { "epoch": 0.19672666421778703, "grad_norm": 2.324059247970581, "learning_rate": 1.999978840492693e-05, "loss": 1.1338, "step": 1205 }, { "epoch": 0.19688992286029142, "grad_norm": 2.1559267044067383, "learning_rate": 1.9999786335557143e-05, "loss": 0.981, "step": 1206 }, { "epoch": 0.1970531815027958, "grad_norm": 2.0476436614990234, "learning_rate": 1.9999784256117573e-05, "loss": 0.9939, "step": 1207 }, { "epoch": 0.19721644014530018, "grad_norm": 2.2658190727233887, "learning_rate": 1.9999782166608225e-05, "loss": 1.117, "step": 1208 }, { "epoch": 0.1973796987878046, "grad_norm": 2.3181371688842773, "learning_rate": 1.9999780067029095e-05, "loss": 1.1727, "step": 1209 }, { "epoch": 0.19754295743030897, "grad_norm": 2.826958179473877, "learning_rate": 1.999977795738019e-05, "loss": 1.5862, "step": 1210 }, { "epoch": 0.19770621607281336, "grad_norm": 2.242987632751465, "learning_rate": 1.9999775837661513e-05, "loss": 0.9381, "step": 1211 }, { "epoch": 0.19786947471531774, "grad_norm": 2.1063759326934814, "learning_rate": 1.999977370787306e-05, "loss": 0.9262, "step": 1212 }, { "epoch": 0.19803273335782212, "grad_norm": 2.459761619567871, "learning_rate": 1.9999771568014845e-05, "loss": 1.1155, "step": 1213 }, { "epoch": 0.1981959920003265, "grad_norm": 2.1590492725372314, "learning_rate": 1.999976941808686e-05, "loss": 0.9855, "step": 1214 }, { "epoch": 0.19835925064283091, "grad_norm": 2.3454430103302, "learning_rate": 1.9999767258089107e-05, "loss": 1.1352, "step": 1215 }, { "epoch": 0.1985225092853353, "grad_norm": 2.3492486476898193, "learning_rate": 1.9999765088021596e-05, "loss": 1.0754, "step": 1216 }, { "epoch": 0.19868576792783968, "grad_norm": 2.1677584648132324, "learning_rate": 1.999976290788432e-05, "loss": 0.9619, "step": 1217 }, { "epoch": 0.19884902657034406, "grad_norm": 2.3685142993927, "learning_rate": 1.9999760717677286e-05, "loss": 1.029, "step": 1218 }, { "epoch": 0.19901228521284844, "grad_norm": 2.2057154178619385, "learning_rate": 1.9999758517400494e-05, "loss": 1.1021, "step": 1219 }, { "epoch": 0.19917554385535285, "grad_norm": 2.1651723384857178, "learning_rate": 1.9999756307053947e-05, "loss": 0.9394, "step": 1220 }, { "epoch": 0.19933880249785724, "grad_norm": 1.9442890882492065, "learning_rate": 1.9999754086637652e-05, "loss": 0.9528, "step": 1221 }, { "epoch": 0.19950206114036162, "grad_norm": 2.0366950035095215, "learning_rate": 1.9999751856151606e-05, "loss": 0.958, "step": 1222 }, { "epoch": 0.199665319782866, "grad_norm": 2.293421506881714, "learning_rate": 1.9999749615595813e-05, "loss": 1.1066, "step": 1223 }, { "epoch": 0.19982857842537038, "grad_norm": 1.8304027318954468, "learning_rate": 1.9999747364970274e-05, "loss": 0.9793, "step": 1224 }, { "epoch": 0.19999183706787477, "grad_norm": 1.9746372699737549, "learning_rate": 1.9999745104274995e-05, "loss": 0.8131, "step": 1225 }, { "epoch": 0.20015509571037918, "grad_norm": 2.0511436462402344, "learning_rate": 1.999974283350997e-05, "loss": 1.0009, "step": 1226 }, { "epoch": 0.20031835435288356, "grad_norm": 2.3559556007385254, "learning_rate": 1.9999740552675212e-05, "loss": 1.2299, "step": 1227 }, { "epoch": 0.20048161299538794, "grad_norm": 2.171827554702759, "learning_rate": 1.9999738261770713e-05, "loss": 0.901, "step": 1228 }, { "epoch": 0.20064487163789232, "grad_norm": 2.365586996078491, "learning_rate": 1.9999735960796482e-05, "loss": 1.1892, "step": 1229 }, { "epoch": 0.2008081302803967, "grad_norm": 2.331192970275879, "learning_rate": 1.999973364975252e-05, "loss": 0.7867, "step": 1230 }, { "epoch": 0.20097138892290112, "grad_norm": 2.357205390930176, "learning_rate": 1.9999731328638828e-05, "loss": 0.8799, "step": 1231 }, { "epoch": 0.2011346475654055, "grad_norm": 2.172893762588501, "learning_rate": 1.999972899745541e-05, "loss": 0.9546, "step": 1232 }, { "epoch": 0.20129790620790988, "grad_norm": 2.3461246490478516, "learning_rate": 1.999972665620227e-05, "loss": 1.1344, "step": 1233 }, { "epoch": 0.20146116485041426, "grad_norm": 2.1069836616516113, "learning_rate": 1.9999724304879406e-05, "loss": 1.0727, "step": 1234 }, { "epoch": 0.20162442349291865, "grad_norm": 2.5568220615386963, "learning_rate": 1.999972194348682e-05, "loss": 1.1865, "step": 1235 }, { "epoch": 0.20178768213542306, "grad_norm": 2.0884177684783936, "learning_rate": 1.999971957202452e-05, "loss": 0.8975, "step": 1236 }, { "epoch": 0.20195094077792744, "grad_norm": 2.4405572414398193, "learning_rate": 1.9999717190492503e-05, "loss": 1.0843, "step": 1237 }, { "epoch": 0.20211419942043182, "grad_norm": 2.220410108566284, "learning_rate": 1.9999714798890775e-05, "loss": 1.0091, "step": 1238 }, { "epoch": 0.2022774580629362, "grad_norm": 2.249708890914917, "learning_rate": 1.9999712397219337e-05, "loss": 1.0198, "step": 1239 }, { "epoch": 0.2024407167054406, "grad_norm": 2.0833353996276855, "learning_rate": 1.9999709985478188e-05, "loss": 0.848, "step": 1240 }, { "epoch": 0.20260397534794497, "grad_norm": 2.45491099357605, "learning_rate": 1.9999707563667338e-05, "loss": 1.2241, "step": 1241 }, { "epoch": 0.20276723399044938, "grad_norm": 2.3407816886901855, "learning_rate": 1.999970513178678e-05, "loss": 0.9794, "step": 1242 }, { "epoch": 0.20293049263295376, "grad_norm": 2.091902256011963, "learning_rate": 1.9999702689836527e-05, "loss": 0.9687, "step": 1243 }, { "epoch": 0.20309375127545815, "grad_norm": 2.0730838775634766, "learning_rate": 1.999970023781657e-05, "loss": 1.1505, "step": 1244 }, { "epoch": 0.20325700991796253, "grad_norm": 2.111569881439209, "learning_rate": 1.999969777572692e-05, "loss": 0.8972, "step": 1245 }, { "epoch": 0.2034202685604669, "grad_norm": 2.1591970920562744, "learning_rate": 1.999969530356758e-05, "loss": 0.9328, "step": 1246 }, { "epoch": 0.20358352720297132, "grad_norm": 2.1966850757598877, "learning_rate": 1.9999692821338547e-05, "loss": 1.0219, "step": 1247 }, { "epoch": 0.2037467858454757, "grad_norm": 2.2881972789764404, "learning_rate": 1.999969032903983e-05, "loss": 1.0175, "step": 1248 }, { "epoch": 0.20391004448798009, "grad_norm": 2.8964879512786865, "learning_rate": 1.9999687826671422e-05, "loss": 1.1091, "step": 1249 }, { "epoch": 0.20407330313048447, "grad_norm": 1.9811725616455078, "learning_rate": 1.9999685314233333e-05, "loss": 0.7789, "step": 1250 }, { "epoch": 0.20423656177298885, "grad_norm": 2.0875306129455566, "learning_rate": 1.9999682791725563e-05, "loss": 0.8624, "step": 1251 }, { "epoch": 0.20439982041549323, "grad_norm": 2.2419612407684326, "learning_rate": 1.9999680259148117e-05, "loss": 0.9159, "step": 1252 }, { "epoch": 0.20456307905799764, "grad_norm": 2.273787260055542, "learning_rate": 1.9999677716500994e-05, "loss": 0.8924, "step": 1253 }, { "epoch": 0.20472633770050203, "grad_norm": 2.7177908420562744, "learning_rate": 1.9999675163784197e-05, "loss": 1.197, "step": 1254 }, { "epoch": 0.2048895963430064, "grad_norm": 2.159367561340332, "learning_rate": 1.9999672600997734e-05, "loss": 0.9107, "step": 1255 }, { "epoch": 0.2050528549855108, "grad_norm": 2.605884313583374, "learning_rate": 1.9999670028141598e-05, "loss": 1.1049, "step": 1256 }, { "epoch": 0.20521611362801517, "grad_norm": 2.570202589035034, "learning_rate": 1.99996674452158e-05, "loss": 0.9643, "step": 1257 }, { "epoch": 0.20537937227051958, "grad_norm": 2.489055633544922, "learning_rate": 1.999966485222034e-05, "loss": 0.9722, "step": 1258 }, { "epoch": 0.20554263091302397, "grad_norm": 2.2112061977386475, "learning_rate": 1.999966224915522e-05, "loss": 0.8344, "step": 1259 }, { "epoch": 0.20570588955552835, "grad_norm": 2.590627908706665, "learning_rate": 1.999965963602044e-05, "loss": 1.3387, "step": 1260 }, { "epoch": 0.20586914819803273, "grad_norm": 2.5064964294433594, "learning_rate": 1.9999657012816008e-05, "loss": 1.3058, "step": 1261 }, { "epoch": 0.2060324068405371, "grad_norm": 2.611572265625, "learning_rate": 1.9999654379541923e-05, "loss": 1.0867, "step": 1262 }, { "epoch": 0.2061956654830415, "grad_norm": 2.591822862625122, "learning_rate": 1.999965173619819e-05, "loss": 1.2636, "step": 1263 }, { "epoch": 0.2063589241255459, "grad_norm": 2.198455810546875, "learning_rate": 1.9999649082784807e-05, "loss": 0.9545, "step": 1264 }, { "epoch": 0.2065221827680503, "grad_norm": 2.234243392944336, "learning_rate": 1.9999646419301783e-05, "loss": 0.9726, "step": 1265 }, { "epoch": 0.20668544141055467, "grad_norm": 2.020203113555908, "learning_rate": 1.9999643745749116e-05, "loss": 0.887, "step": 1266 }, { "epoch": 0.20684870005305905, "grad_norm": 2.0185112953186035, "learning_rate": 1.9999641062126814e-05, "loss": 0.8834, "step": 1267 }, { "epoch": 0.20701195869556344, "grad_norm": 2.165705680847168, "learning_rate": 1.999963836843487e-05, "loss": 1.0574, "step": 1268 }, { "epoch": 0.20717521733806785, "grad_norm": 2.4223058223724365, "learning_rate": 1.99996356646733e-05, "loss": 0.8913, "step": 1269 }, { "epoch": 0.20733847598057223, "grad_norm": 2.149723529815674, "learning_rate": 1.9999632950842095e-05, "loss": 1.2076, "step": 1270 }, { "epoch": 0.2075017346230766, "grad_norm": 2.0023581981658936, "learning_rate": 1.9999630226941265e-05, "loss": 0.9207, "step": 1271 }, { "epoch": 0.207664993265581, "grad_norm": 2.56469988822937, "learning_rate": 1.9999627492970805e-05, "loss": 1.251, "step": 1272 }, { "epoch": 0.20782825190808538, "grad_norm": 2.017416477203369, "learning_rate": 1.999962474893073e-05, "loss": 0.9956, "step": 1273 }, { "epoch": 0.20799151055058976, "grad_norm": 2.3790576457977295, "learning_rate": 1.999962199482103e-05, "loss": 1.0752, "step": 1274 }, { "epoch": 0.20815476919309417, "grad_norm": 2.342595100402832, "learning_rate": 1.9999619230641714e-05, "loss": 0.9823, "step": 1275 }, { "epoch": 0.20831802783559855, "grad_norm": 2.540520429611206, "learning_rate": 1.9999616456392785e-05, "loss": 1.1521, "step": 1276 }, { "epoch": 0.20848128647810293, "grad_norm": 2.3206164836883545, "learning_rate": 1.9999613672074246e-05, "loss": 1.0796, "step": 1277 }, { "epoch": 0.20864454512060732, "grad_norm": 2.3632194995880127, "learning_rate": 1.99996108776861e-05, "loss": 1.0677, "step": 1278 }, { "epoch": 0.2088078037631117, "grad_norm": 2.3757784366607666, "learning_rate": 1.9999608073228342e-05, "loss": 0.9669, "step": 1279 }, { "epoch": 0.2089710624056161, "grad_norm": 2.3979928493499756, "learning_rate": 1.9999605258700983e-05, "loss": 0.9178, "step": 1280 }, { "epoch": 0.2091343210481205, "grad_norm": 2.3388993740081787, "learning_rate": 1.999960243410403e-05, "loss": 0.928, "step": 1281 }, { "epoch": 0.20929757969062487, "grad_norm": 2.285773277282715, "learning_rate": 1.9999599599437476e-05, "loss": 0.9614, "step": 1282 }, { "epoch": 0.20946083833312926, "grad_norm": 2.317728281021118, "learning_rate": 1.9999596754701328e-05, "loss": 0.9957, "step": 1283 }, { "epoch": 0.20962409697563364, "grad_norm": 2.3520326614379883, "learning_rate": 1.999959389989559e-05, "loss": 1.0384, "step": 1284 }, { "epoch": 0.20978735561813802, "grad_norm": 2.4376580715179443, "learning_rate": 1.999959103502026e-05, "loss": 1.0004, "step": 1285 }, { "epoch": 0.20995061426064243, "grad_norm": 2.5730464458465576, "learning_rate": 1.999958816007535e-05, "loss": 1.1472, "step": 1286 }, { "epoch": 0.21011387290314681, "grad_norm": 2.2769081592559814, "learning_rate": 1.9999585275060854e-05, "loss": 1.0164, "step": 1287 }, { "epoch": 0.2102771315456512, "grad_norm": 2.3943207263946533, "learning_rate": 1.999958237997678e-05, "loss": 0.9327, "step": 1288 }, { "epoch": 0.21044039018815558, "grad_norm": 2.2491486072540283, "learning_rate": 1.9999579474823126e-05, "loss": 0.9422, "step": 1289 }, { "epoch": 0.21060364883065996, "grad_norm": 2.295269727706909, "learning_rate": 1.99995765595999e-05, "loss": 1.0003, "step": 1290 }, { "epoch": 0.21076690747316437, "grad_norm": 2.0796635150909424, "learning_rate": 1.9999573634307104e-05, "loss": 0.9415, "step": 1291 }, { "epoch": 0.21093016611566875, "grad_norm": 2.276812791824341, "learning_rate": 1.9999570698944743e-05, "loss": 1.0389, "step": 1292 }, { "epoch": 0.21109342475817314, "grad_norm": 2.033170700073242, "learning_rate": 1.999956775351281e-05, "loss": 0.9762, "step": 1293 }, { "epoch": 0.21125668340067752, "grad_norm": 2.484546184539795, "learning_rate": 1.999956479801132e-05, "loss": 0.9551, "step": 1294 }, { "epoch": 0.2114199420431819, "grad_norm": 2.6277737617492676, "learning_rate": 1.9999561832440268e-05, "loss": 1.2138, "step": 1295 }, { "epoch": 0.2115832006856863, "grad_norm": 2.1640849113464355, "learning_rate": 1.999955885679966e-05, "loss": 1.1261, "step": 1296 }, { "epoch": 0.2117464593281907, "grad_norm": 2.3965718746185303, "learning_rate": 1.99995558710895e-05, "loss": 0.9749, "step": 1297 }, { "epoch": 0.21190971797069508, "grad_norm": 2.2983665466308594, "learning_rate": 1.999955287530979e-05, "loss": 0.9456, "step": 1298 }, { "epoch": 0.21207297661319946, "grad_norm": 2.103120803833008, "learning_rate": 1.9999549869460534e-05, "loss": 0.94, "step": 1299 }, { "epoch": 0.21223623525570384, "grad_norm": 1.9734042882919312, "learning_rate": 1.9999546853541728e-05, "loss": 0.8945, "step": 1300 }, { "epoch": 0.21239949389820822, "grad_norm": 2.0586676597595215, "learning_rate": 1.999954382755339e-05, "loss": 0.9791, "step": 1301 }, { "epoch": 0.21256275254071264, "grad_norm": 2.274178981781006, "learning_rate": 1.9999540791495507e-05, "loss": 0.9691, "step": 1302 }, { "epoch": 0.21272601118321702, "grad_norm": 2.022470712661743, "learning_rate": 1.999953774536809e-05, "loss": 0.9499, "step": 1303 }, { "epoch": 0.2128892698257214, "grad_norm": 2.472344160079956, "learning_rate": 1.9999534689171146e-05, "loss": 0.9589, "step": 1304 }, { "epoch": 0.21305252846822578, "grad_norm": 1.9726943969726562, "learning_rate": 1.999953162290467e-05, "loss": 0.8441, "step": 1305 }, { "epoch": 0.21321578711073016, "grad_norm": 2.1743080615997314, "learning_rate": 1.9999528546568667e-05, "loss": 0.9969, "step": 1306 }, { "epoch": 0.21337904575323458, "grad_norm": 2.4251761436462402, "learning_rate": 1.9999525460163143e-05, "loss": 1.1353, "step": 1307 }, { "epoch": 0.21354230439573896, "grad_norm": 2.0615875720977783, "learning_rate": 1.99995223636881e-05, "loss": 0.7659, "step": 1308 }, { "epoch": 0.21370556303824334, "grad_norm": 2.5262575149536133, "learning_rate": 1.999951925714354e-05, "loss": 1.2011, "step": 1309 }, { "epoch": 0.21386882168074772, "grad_norm": 2.1753737926483154, "learning_rate": 1.9999516140529465e-05, "loss": 1.0763, "step": 1310 }, { "epoch": 0.2140320803232521, "grad_norm": 2.435194492340088, "learning_rate": 1.999951301384588e-05, "loss": 1.0107, "step": 1311 }, { "epoch": 0.2141953389657565, "grad_norm": 2.2366535663604736, "learning_rate": 1.999950987709279e-05, "loss": 0.9, "step": 1312 }, { "epoch": 0.2143585976082609, "grad_norm": 2.2835752964019775, "learning_rate": 1.9999506730270198e-05, "loss": 0.9283, "step": 1313 }, { "epoch": 0.21452185625076528, "grad_norm": 2.4992947578430176, "learning_rate": 1.9999503573378102e-05, "loss": 1.0813, "step": 1314 }, { "epoch": 0.21468511489326966, "grad_norm": 2.288408041000366, "learning_rate": 1.999950040641651e-05, "loss": 0.9945, "step": 1315 }, { "epoch": 0.21484837353577405, "grad_norm": 2.0417964458465576, "learning_rate": 1.9999497229385422e-05, "loss": 0.936, "step": 1316 }, { "epoch": 0.21501163217827843, "grad_norm": 2.712717294692993, "learning_rate": 1.9999494042284844e-05, "loss": 1.0761, "step": 1317 }, { "epoch": 0.21517489082078284, "grad_norm": 2.1315293312072754, "learning_rate": 1.999949084511478e-05, "loss": 0.9006, "step": 1318 }, { "epoch": 0.21533814946328722, "grad_norm": 2.0772135257720947, "learning_rate": 1.999948763787523e-05, "loss": 1.0365, "step": 1319 }, { "epoch": 0.2155014081057916, "grad_norm": 2.3228888511657715, "learning_rate": 1.9999484420566197e-05, "loss": 1.1043, "step": 1320 }, { "epoch": 0.21566466674829599, "grad_norm": 2.0455996990203857, "learning_rate": 1.9999481193187685e-05, "loss": 0.8884, "step": 1321 }, { "epoch": 0.21582792539080037, "grad_norm": 2.403216600418091, "learning_rate": 1.9999477955739703e-05, "loss": 1.0579, "step": 1322 }, { "epoch": 0.21599118403330475, "grad_norm": 2.4208836555480957, "learning_rate": 1.9999474708222248e-05, "loss": 0.8545, "step": 1323 }, { "epoch": 0.21615444267580916, "grad_norm": 2.4777541160583496, "learning_rate": 1.9999471450635322e-05, "loss": 0.9584, "step": 1324 }, { "epoch": 0.21631770131831354, "grad_norm": 2.428277015686035, "learning_rate": 1.999946818297893e-05, "loss": 1.0093, "step": 1325 }, { "epoch": 0.21648095996081793, "grad_norm": 2.368472099304199, "learning_rate": 1.999946490525308e-05, "loss": 0.9628, "step": 1326 }, { "epoch": 0.2166442186033223, "grad_norm": 2.0551724433898926, "learning_rate": 1.9999461617457773e-05, "loss": 1.0424, "step": 1327 }, { "epoch": 0.2168074772458267, "grad_norm": 2.1272008419036865, "learning_rate": 1.9999458319593008e-05, "loss": 1.0637, "step": 1328 }, { "epoch": 0.2169707358883311, "grad_norm": 2.2524030208587646, "learning_rate": 1.999945501165879e-05, "loss": 0.9783, "step": 1329 }, { "epoch": 0.21713399453083548, "grad_norm": 2.1017332077026367, "learning_rate": 1.9999451693655125e-05, "loss": 1.0354, "step": 1330 }, { "epoch": 0.21729725317333987, "grad_norm": 2.338069200515747, "learning_rate": 1.9999448365582014e-05, "loss": 1.1087, "step": 1331 }, { "epoch": 0.21746051181584425, "grad_norm": 2.274113178253174, "learning_rate": 1.999944502743946e-05, "loss": 1.0582, "step": 1332 }, { "epoch": 0.21762377045834863, "grad_norm": 2.543405532836914, "learning_rate": 1.9999441679227466e-05, "loss": 1.0986, "step": 1333 }, { "epoch": 0.217787029100853, "grad_norm": 2.4448163509368896, "learning_rate": 1.999943832094604e-05, "loss": 2.1344, "step": 1334 }, { "epoch": 0.21795028774335742, "grad_norm": 2.404064416885376, "learning_rate": 1.9999434952595184e-05, "loss": 1.198, "step": 1335 }, { "epoch": 0.2181135463858618, "grad_norm": 2.368515968322754, "learning_rate": 1.9999431574174895e-05, "loss": 1.0837, "step": 1336 }, { "epoch": 0.2182768050283662, "grad_norm": 2.336869955062866, "learning_rate": 1.9999428185685187e-05, "loss": 1.0213, "step": 1337 }, { "epoch": 0.21844006367087057, "grad_norm": 2.384500026702881, "learning_rate": 1.999942478712605e-05, "loss": 0.9344, "step": 1338 }, { "epoch": 0.21860332231337495, "grad_norm": 2.080235004425049, "learning_rate": 1.9999421378497504e-05, "loss": 1.0958, "step": 1339 }, { "epoch": 0.21876658095587936, "grad_norm": 2.1801416873931885, "learning_rate": 1.9999417959799538e-05, "loss": 0.965, "step": 1340 }, { "epoch": 0.21892983959838375, "grad_norm": 2.2784318923950195, "learning_rate": 1.999941453103216e-05, "loss": 1.0562, "step": 1341 }, { "epoch": 0.21909309824088813, "grad_norm": 2.4351587295532227, "learning_rate": 1.9999411092195373e-05, "loss": 1.1061, "step": 1342 }, { "epoch": 0.2192563568833925, "grad_norm": 2.589292049407959, "learning_rate": 1.9999407643289187e-05, "loss": 1.0785, "step": 1343 }, { "epoch": 0.2194196155258969, "grad_norm": 2.379124641418457, "learning_rate": 1.9999404184313595e-05, "loss": 1.174, "step": 1344 }, { "epoch": 0.21958287416840128, "grad_norm": 2.216259002685547, "learning_rate": 1.9999400715268607e-05, "loss": 1.1748, "step": 1345 }, { "epoch": 0.2197461328109057, "grad_norm": 2.670081615447998, "learning_rate": 1.9999397236154228e-05, "loss": 0.9275, "step": 1346 }, { "epoch": 0.21990939145341007, "grad_norm": 2.440054416656494, "learning_rate": 1.9999393746970456e-05, "loss": 0.855, "step": 1347 }, { "epoch": 0.22007265009591445, "grad_norm": 2.3037898540496826, "learning_rate": 1.9999390247717296e-05, "loss": 0.8509, "step": 1348 }, { "epoch": 0.22023590873841883, "grad_norm": 2.0270252227783203, "learning_rate": 1.9999386738394757e-05, "loss": 1.0842, "step": 1349 }, { "epoch": 0.22039916738092322, "grad_norm": 2.1370058059692383, "learning_rate": 1.9999383219002836e-05, "loss": 0.9708, "step": 1350 }, { "epoch": 0.22056242602342763, "grad_norm": 2.2172136306762695, "learning_rate": 1.9999379689541536e-05, "loss": 0.9063, "step": 1351 }, { "epoch": 0.220725684665932, "grad_norm": 2.106698751449585, "learning_rate": 1.9999376150010868e-05, "loss": 0.9857, "step": 1352 }, { "epoch": 0.2208889433084364, "grad_norm": 2.2829997539520264, "learning_rate": 1.9999372600410828e-05, "loss": 0.9354, "step": 1353 }, { "epoch": 0.22105220195094077, "grad_norm": 2.5648410320281982, "learning_rate": 1.9999369040741423e-05, "loss": 0.9826, "step": 1354 }, { "epoch": 0.22121546059344516, "grad_norm": 2.2933948040008545, "learning_rate": 1.9999365471002656e-05, "loss": 1.0284, "step": 1355 }, { "epoch": 0.22137871923594954, "grad_norm": 2.216158866882324, "learning_rate": 1.999936189119453e-05, "loss": 0.8352, "step": 1356 }, { "epoch": 0.22154197787845395, "grad_norm": 2.386352777481079, "learning_rate": 1.999935830131705e-05, "loss": 1.1436, "step": 1357 }, { "epoch": 0.22170523652095833, "grad_norm": 1.8827351331710815, "learning_rate": 1.9999354701370217e-05, "loss": 0.7611, "step": 1358 }, { "epoch": 0.22186849516346271, "grad_norm": 2.6127867698669434, "learning_rate": 1.9999351091354038e-05, "loss": 0.8851, "step": 1359 }, { "epoch": 0.2220317538059671, "grad_norm": 2.170525550842285, "learning_rate": 1.9999347471268517e-05, "loss": 0.9324, "step": 1360 }, { "epoch": 0.22219501244847148, "grad_norm": 2.5319437980651855, "learning_rate": 1.9999343841113652e-05, "loss": 1.3992, "step": 1361 }, { "epoch": 0.2223582710909759, "grad_norm": 2.2870419025421143, "learning_rate": 1.9999340200889455e-05, "loss": 1.0953, "step": 1362 }, { "epoch": 0.22252152973348027, "grad_norm": 2.201634645462036, "learning_rate": 1.999933655059592e-05, "loss": 1.0753, "step": 1363 }, { "epoch": 0.22268478837598465, "grad_norm": 2.5825555324554443, "learning_rate": 1.999933289023306e-05, "loss": 1.1056, "step": 1364 }, { "epoch": 0.22284804701848904, "grad_norm": 2.396245002746582, "learning_rate": 1.999932921980087e-05, "loss": 1.0403, "step": 1365 }, { "epoch": 0.22301130566099342, "grad_norm": 1.9141837358474731, "learning_rate": 1.999932553929936e-05, "loss": 0.8429, "step": 1366 }, { "epoch": 0.22317456430349783, "grad_norm": 1.8894944190979004, "learning_rate": 1.9999321848728535e-05, "loss": 0.9629, "step": 1367 }, { "epoch": 0.2233378229460022, "grad_norm": 2.036377191543579, "learning_rate": 1.9999318148088392e-05, "loss": 0.988, "step": 1368 }, { "epoch": 0.2235010815885066, "grad_norm": 2.181813955307007, "learning_rate": 1.9999314437378942e-05, "loss": 0.9079, "step": 1369 }, { "epoch": 0.22366434023101098, "grad_norm": 2.5406486988067627, "learning_rate": 1.999931071660018e-05, "loss": 1.0315, "step": 1370 }, { "epoch": 0.22382759887351536, "grad_norm": 2.390594482421875, "learning_rate": 1.999930698575212e-05, "loss": 1.1485, "step": 1371 }, { "epoch": 0.22399085751601974, "grad_norm": 2.5430421829223633, "learning_rate": 1.9999303244834756e-05, "loss": 1.1223, "step": 1372 }, { "epoch": 0.22415411615852415, "grad_norm": 2.1572649478912354, "learning_rate": 1.99992994938481e-05, "loss": 0.9383, "step": 1373 }, { "epoch": 0.22431737480102854, "grad_norm": 2.532707691192627, "learning_rate": 1.9999295732792146e-05, "loss": 1.0254, "step": 1374 }, { "epoch": 0.22448063344353292, "grad_norm": 2.120185613632202, "learning_rate": 1.999929196166691e-05, "loss": 1.1167, "step": 1375 }, { "epoch": 0.2246438920860373, "grad_norm": 2.017282724380493, "learning_rate": 1.9999288180472388e-05, "loss": 1.0245, "step": 1376 }, { "epoch": 0.22480715072854168, "grad_norm": 2.0543930530548096, "learning_rate": 1.9999284389208586e-05, "loss": 1.0614, "step": 1377 }, { "epoch": 0.2249704093710461, "grad_norm": 2.2948455810546875, "learning_rate": 1.9999280587875504e-05, "loss": 1.1396, "step": 1378 }, { "epoch": 0.22513366801355048, "grad_norm": 2.0332608222961426, "learning_rate": 1.9999276776473152e-05, "loss": 0.9794, "step": 1379 }, { "epoch": 0.22529692665605486, "grad_norm": 2.266589403152466, "learning_rate": 1.999927295500153e-05, "loss": 1.0102, "step": 1380 }, { "epoch": 0.22546018529855924, "grad_norm": 2.2956466674804688, "learning_rate": 1.9999269123460644e-05, "loss": 0.7885, "step": 1381 }, { "epoch": 0.22562344394106362, "grad_norm": 2.3948919773101807, "learning_rate": 1.9999265281850495e-05, "loss": 1.0625, "step": 1382 }, { "epoch": 0.225786702583568, "grad_norm": 2.183424949645996, "learning_rate": 1.999926143017109e-05, "loss": 1.0145, "step": 1383 }, { "epoch": 0.22594996122607242, "grad_norm": 2.180690288543701, "learning_rate": 1.999925756842243e-05, "loss": 0.9044, "step": 1384 }, { "epoch": 0.2261132198685768, "grad_norm": 2.635434865951538, "learning_rate": 1.9999253696604522e-05, "loss": 1.016, "step": 1385 }, { "epoch": 0.22627647851108118, "grad_norm": 2.303046226501465, "learning_rate": 1.9999249814717364e-05, "loss": 1.0416, "step": 1386 }, { "epoch": 0.22643973715358556, "grad_norm": 2.290841817855835, "learning_rate": 1.999924592276097e-05, "loss": 1.145, "step": 1387 }, { "epoch": 0.22660299579608995, "grad_norm": 2.2113678455352783, "learning_rate": 1.9999242020735338e-05, "loss": 0.9243, "step": 1388 }, { "epoch": 0.22676625443859436, "grad_norm": 2.449328899383545, "learning_rate": 1.9999238108640467e-05, "loss": 1.0628, "step": 1389 }, { "epoch": 0.22692951308109874, "grad_norm": 2.423537254333496, "learning_rate": 1.9999234186476366e-05, "loss": 1.0556, "step": 1390 }, { "epoch": 0.22709277172360312, "grad_norm": 2.016934633255005, "learning_rate": 1.999923025424304e-05, "loss": 0.9526, "step": 1391 }, { "epoch": 0.2272560303661075, "grad_norm": 2.370967388153076, "learning_rate": 1.9999226311940494e-05, "loss": 1.6729, "step": 1392 }, { "epoch": 0.22741928900861189, "grad_norm": 2.3866734504699707, "learning_rate": 1.9999222359568732e-05, "loss": 1.0401, "step": 1393 }, { "epoch": 0.22758254765111627, "grad_norm": 2.1386513710021973, "learning_rate": 1.999921839712775e-05, "loss": 0.9644, "step": 1394 }, { "epoch": 0.22774580629362068, "grad_norm": 2.1274666786193848, "learning_rate": 1.999921442461756e-05, "loss": 0.977, "step": 1395 }, { "epoch": 0.22790906493612506, "grad_norm": 2.0370278358459473, "learning_rate": 1.9999210442038164e-05, "loss": 0.8958, "step": 1396 }, { "epoch": 0.22807232357862944, "grad_norm": 2.118417501449585, "learning_rate": 1.9999206449389566e-05, "loss": 0.8765, "step": 1397 }, { "epoch": 0.22823558222113383, "grad_norm": 2.3906350135803223, "learning_rate": 1.9999202446671768e-05, "loss": 0.9643, "step": 1398 }, { "epoch": 0.2283988408636382, "grad_norm": 2.0891644954681396, "learning_rate": 1.9999198433884778e-05, "loss": 0.8123, "step": 1399 }, { "epoch": 0.22856209950614262, "grad_norm": 2.760629892349243, "learning_rate": 1.9999194411028596e-05, "loss": 1.0803, "step": 1400 }, { "epoch": 0.228725358148647, "grad_norm": 1.9869005680084229, "learning_rate": 1.9999190378103228e-05, "loss": 1.0466, "step": 1401 }, { "epoch": 0.22888861679115138, "grad_norm": 2.286304473876953, "learning_rate": 1.999918633510868e-05, "loss": 0.8207, "step": 1402 }, { "epoch": 0.22905187543365577, "grad_norm": 2.0858097076416016, "learning_rate": 1.999918228204495e-05, "loss": 0.8137, "step": 1403 }, { "epoch": 0.22921513407616015, "grad_norm": 2.5562191009521484, "learning_rate": 1.999917821891205e-05, "loss": 0.9105, "step": 1404 }, { "epoch": 0.22937839271866453, "grad_norm": 2.386957883834839, "learning_rate": 1.9999174145709978e-05, "loss": 1.002, "step": 1405 }, { "epoch": 0.22954165136116894, "grad_norm": 2.51273775100708, "learning_rate": 1.999917006243874e-05, "loss": 1.0689, "step": 1406 }, { "epoch": 0.22970491000367332, "grad_norm": 1.989636778831482, "learning_rate": 1.9999165969098344e-05, "loss": 0.8451, "step": 1407 }, { "epoch": 0.2298681686461777, "grad_norm": 2.369889736175537, "learning_rate": 1.9999161865688787e-05, "loss": 1.1861, "step": 1408 }, { "epoch": 0.2300314272886821, "grad_norm": 2.5874602794647217, "learning_rate": 1.9999157752210078e-05, "loss": 0.9587, "step": 1409 }, { "epoch": 0.23019468593118647, "grad_norm": 2.3225300312042236, "learning_rate": 1.9999153628662217e-05, "loss": 1.0108, "step": 1410 }, { "epoch": 0.23035794457369088, "grad_norm": 2.2423036098480225, "learning_rate": 1.9999149495045215e-05, "loss": 0.9892, "step": 1411 }, { "epoch": 0.23052120321619526, "grad_norm": 2.851832628250122, "learning_rate": 1.999914535135907e-05, "loss": 0.9404, "step": 1412 }, { "epoch": 0.23068446185869965, "grad_norm": 2.4121601581573486, "learning_rate": 1.999914119760379e-05, "loss": 0.973, "step": 1413 }, { "epoch": 0.23084772050120403, "grad_norm": 2.440477132797241, "learning_rate": 1.9999137033779377e-05, "loss": 1.0385, "step": 1414 }, { "epoch": 0.2310109791437084, "grad_norm": 2.185458183288574, "learning_rate": 1.9999132859885836e-05, "loss": 0.9599, "step": 1415 }, { "epoch": 0.2311742377862128, "grad_norm": 2.3907816410064697, "learning_rate": 1.9999128675923167e-05, "loss": 0.8458, "step": 1416 }, { "epoch": 0.2313374964287172, "grad_norm": 2.205493450164795, "learning_rate": 1.999912448189138e-05, "loss": 1.3449, "step": 1417 }, { "epoch": 0.2315007550712216, "grad_norm": 2.369933843612671, "learning_rate": 1.9999120277790477e-05, "loss": 1.0692, "step": 1418 }, { "epoch": 0.23166401371372597, "grad_norm": 2.2691874504089355, "learning_rate": 1.9999116063620465e-05, "loss": 1.3118, "step": 1419 }, { "epoch": 0.23182727235623035, "grad_norm": 2.6719963550567627, "learning_rate": 1.9999111839381346e-05, "loss": 0.9667, "step": 1420 }, { "epoch": 0.23199053099873473, "grad_norm": 2.344728946685791, "learning_rate": 1.9999107605073123e-05, "loss": 0.9749, "step": 1421 }, { "epoch": 0.23215378964123914, "grad_norm": 2.1785640716552734, "learning_rate": 1.9999103360695802e-05, "loss": 1.0146, "step": 1422 }, { "epoch": 0.23231704828374353, "grad_norm": 2.3754987716674805, "learning_rate": 1.9999099106249384e-05, "loss": 1.1816, "step": 1423 }, { "epoch": 0.2324803069262479, "grad_norm": 2.326988458633423, "learning_rate": 1.999909484173388e-05, "loss": 1.1123, "step": 1424 }, { "epoch": 0.2326435655687523, "grad_norm": 2.1828572750091553, "learning_rate": 1.9999090567149283e-05, "loss": 0.9252, "step": 1425 }, { "epoch": 0.23280682421125667, "grad_norm": 2.2992196083068848, "learning_rate": 1.999908628249561e-05, "loss": 1.1553, "step": 1426 }, { "epoch": 0.23297008285376108, "grad_norm": 2.4465243816375732, "learning_rate": 1.999908198777286e-05, "loss": 1.0558, "step": 1427 }, { "epoch": 0.23313334149626547, "grad_norm": 2.6177003383636475, "learning_rate": 1.9999077682981033e-05, "loss": 0.9485, "step": 1428 }, { "epoch": 0.23329660013876985, "grad_norm": 2.356412649154663, "learning_rate": 1.9999073368120142e-05, "loss": 1.0065, "step": 1429 }, { "epoch": 0.23345985878127423, "grad_norm": 2.119990110397339, "learning_rate": 1.9999069043190184e-05, "loss": 0.9982, "step": 1430 }, { "epoch": 0.23362311742377861, "grad_norm": 2.2722253799438477, "learning_rate": 1.9999064708191167e-05, "loss": 1.077, "step": 1431 }, { "epoch": 0.233786376066283, "grad_norm": 2.3732821941375732, "learning_rate": 1.9999060363123096e-05, "loss": 0.9463, "step": 1432 }, { "epoch": 0.2339496347087874, "grad_norm": 2.402902603149414, "learning_rate": 1.999905600798597e-05, "loss": 0.9248, "step": 1433 }, { "epoch": 0.2341128933512918, "grad_norm": 2.02632212638855, "learning_rate": 1.99990516427798e-05, "loss": 0.9383, "step": 1434 }, { "epoch": 0.23427615199379617, "grad_norm": 2.4528942108154297, "learning_rate": 1.9999047267504587e-05, "loss": 0.974, "step": 1435 }, { "epoch": 0.23443941063630055, "grad_norm": 2.2176969051361084, "learning_rate": 1.9999042882160336e-05, "loss": 1.0099, "step": 1436 }, { "epoch": 0.23460266927880494, "grad_norm": 2.0852878093719482, "learning_rate": 1.9999038486747053e-05, "loss": 0.8545, "step": 1437 }, { "epoch": 0.23476592792130935, "grad_norm": 2.4913487434387207, "learning_rate": 1.999903408126474e-05, "loss": 1.051, "step": 1438 }, { "epoch": 0.23492918656381373, "grad_norm": 2.172224283218384, "learning_rate": 1.99990296657134e-05, "loss": 0.8908, "step": 1439 }, { "epoch": 0.2350924452063181, "grad_norm": 2.3027777671813965, "learning_rate": 1.9999025240093045e-05, "loss": 0.7996, "step": 1440 }, { "epoch": 0.2352557038488225, "grad_norm": 2.466632604598999, "learning_rate": 1.999902080440367e-05, "loss": 1.0025, "step": 1441 }, { "epoch": 0.23541896249132688, "grad_norm": 2.1049580574035645, "learning_rate": 1.9999016358645283e-05, "loss": 0.7542, "step": 1442 }, { "epoch": 0.23558222113383126, "grad_norm": 2.0810844898223877, "learning_rate": 1.999901190281789e-05, "loss": 0.9617, "step": 1443 }, { "epoch": 0.23574547977633567, "grad_norm": 2.6055381298065186, "learning_rate": 1.9999007436921497e-05, "loss": 1.1758, "step": 1444 }, { "epoch": 0.23590873841884005, "grad_norm": 2.4072861671447754, "learning_rate": 1.9999002960956105e-05, "loss": 0.9462, "step": 1445 }, { "epoch": 0.23607199706134444, "grad_norm": 2.1017234325408936, "learning_rate": 1.999899847492172e-05, "loss": 0.7997, "step": 1446 }, { "epoch": 0.23623525570384882, "grad_norm": 2.5558464527130127, "learning_rate": 1.9998993978818345e-05, "loss": 1.0884, "step": 1447 }, { "epoch": 0.2363985143463532, "grad_norm": 2.5460708141326904, "learning_rate": 1.999898947264599e-05, "loss": 1.1385, "step": 1448 }, { "epoch": 0.2365617729888576, "grad_norm": 2.0711004734039307, "learning_rate": 1.999898495640465e-05, "loss": 0.8552, "step": 1449 }, { "epoch": 0.236725031631362, "grad_norm": 2.5267064571380615, "learning_rate": 1.9998980430094333e-05, "loss": 1.0751, "step": 1450 }, { "epoch": 0.23688829027386638, "grad_norm": 1.9658653736114502, "learning_rate": 1.999897589371505e-05, "loss": 0.9529, "step": 1451 }, { "epoch": 0.23705154891637076, "grad_norm": 2.2422935962677, "learning_rate": 1.99989713472668e-05, "loss": 0.955, "step": 1452 }, { "epoch": 0.23721480755887514, "grad_norm": 2.404816150665283, "learning_rate": 1.9998966790749586e-05, "loss": 1.1408, "step": 1453 }, { "epoch": 0.23737806620137952, "grad_norm": 2.3228487968444824, "learning_rate": 1.999896222416342e-05, "loss": 0.8905, "step": 1454 }, { "epoch": 0.23754132484388393, "grad_norm": 2.3918845653533936, "learning_rate": 1.99989576475083e-05, "loss": 1.0516, "step": 1455 }, { "epoch": 0.23770458348638832, "grad_norm": 2.2459566593170166, "learning_rate": 1.999895306078423e-05, "loss": 0.9331, "step": 1456 }, { "epoch": 0.2378678421288927, "grad_norm": 2.4983103275299072, "learning_rate": 1.999894846399122e-05, "loss": 1.1322, "step": 1457 }, { "epoch": 0.23803110077139708, "grad_norm": 2.425989866256714, "learning_rate": 1.999894385712927e-05, "loss": 0.8786, "step": 1458 }, { "epoch": 0.23819435941390146, "grad_norm": 2.1459083557128906, "learning_rate": 1.9998939240198384e-05, "loss": 1.5598, "step": 1459 }, { "epoch": 0.23835761805640587, "grad_norm": 2.112872362136841, "learning_rate": 1.999893461319857e-05, "loss": 1.0029, "step": 1460 }, { "epoch": 0.23852087669891026, "grad_norm": 2.5439507961273193, "learning_rate": 1.9998929976129834e-05, "loss": 1.072, "step": 1461 }, { "epoch": 0.23868413534141464, "grad_norm": 2.1317644119262695, "learning_rate": 1.9998925328992175e-05, "loss": 0.9668, "step": 1462 }, { "epoch": 0.23884739398391902, "grad_norm": 2.0046586990356445, "learning_rate": 1.9998920671785602e-05, "loss": 0.8435, "step": 1463 }, { "epoch": 0.2390106526264234, "grad_norm": 2.114110231399536, "learning_rate": 1.999891600451012e-05, "loss": 0.9386, "step": 1464 }, { "epoch": 0.23917391126892779, "grad_norm": 1.9382222890853882, "learning_rate": 1.999891132716573e-05, "loss": 0.903, "step": 1465 }, { "epoch": 0.2393371699114322, "grad_norm": 2.161062240600586, "learning_rate": 1.999890663975244e-05, "loss": 0.9577, "step": 1466 }, { "epoch": 0.23950042855393658, "grad_norm": 2.14208722114563, "learning_rate": 1.9998901942270254e-05, "loss": 1.0309, "step": 1467 }, { "epoch": 0.23966368719644096, "grad_norm": 2.3023059368133545, "learning_rate": 1.9998897234719177e-05, "loss": 0.9797, "step": 1468 }, { "epoch": 0.23982694583894534, "grad_norm": 2.7637124061584473, "learning_rate": 1.9998892517099212e-05, "loss": 1.211, "step": 1469 }, { "epoch": 0.23999020448144973, "grad_norm": 2.2507212162017822, "learning_rate": 1.9998887789410363e-05, "loss": 0.9452, "step": 1470 }, { "epoch": 0.24015346312395414, "grad_norm": 2.3210856914520264, "learning_rate": 1.999888305165264e-05, "loss": 1.0974, "step": 1471 }, { "epoch": 0.24031672176645852, "grad_norm": 2.1683733463287354, "learning_rate": 1.9998878303826045e-05, "loss": 0.9439, "step": 1472 }, { "epoch": 0.2404799804089629, "grad_norm": 2.733599901199341, "learning_rate": 1.999887354593058e-05, "loss": 1.0075, "step": 1473 }, { "epoch": 0.24064323905146728, "grad_norm": 2.223419189453125, "learning_rate": 1.999886877796625e-05, "loss": 0.9918, "step": 1474 }, { "epoch": 0.24080649769397167, "grad_norm": 2.3882501125335693, "learning_rate": 1.9998863999933065e-05, "loss": 1.0608, "step": 1475 }, { "epoch": 0.24096975633647605, "grad_norm": 2.3184542655944824, "learning_rate": 1.9998859211831024e-05, "loss": 0.8315, "step": 1476 }, { "epoch": 0.24113301497898046, "grad_norm": 3.0580360889434814, "learning_rate": 1.9998854413660137e-05, "loss": 1.3356, "step": 1477 }, { "epoch": 0.24129627362148484, "grad_norm": 2.5476059913635254, "learning_rate": 1.9998849605420404e-05, "loss": 0.8755, "step": 1478 }, { "epoch": 0.24145953226398922, "grad_norm": 2.3434176445007324, "learning_rate": 1.9998844787111834e-05, "loss": 0.8769, "step": 1479 }, { "epoch": 0.2416227909064936, "grad_norm": 2.677049160003662, "learning_rate": 1.999883995873443e-05, "loss": 1.0425, "step": 1480 }, { "epoch": 0.241786049548998, "grad_norm": 2.4634435176849365, "learning_rate": 1.9998835120288197e-05, "loss": 1.0394, "step": 1481 }, { "epoch": 0.2419493081915024, "grad_norm": 2.212402820587158, "learning_rate": 1.999883027177314e-05, "loss": 0.7754, "step": 1482 }, { "epoch": 0.24211256683400678, "grad_norm": 2.1681182384490967, "learning_rate": 1.9998825413189262e-05, "loss": 1.0417, "step": 1483 }, { "epoch": 0.24227582547651116, "grad_norm": 2.4645488262176514, "learning_rate": 1.999882054453657e-05, "loss": 1.0478, "step": 1484 }, { "epoch": 0.24243908411901555, "grad_norm": 2.2799720764160156, "learning_rate": 1.9998815665815066e-05, "loss": 1.0213, "step": 1485 }, { "epoch": 0.24260234276151993, "grad_norm": 2.281770944595337, "learning_rate": 1.9998810777024762e-05, "loss": 0.9329, "step": 1486 }, { "epoch": 0.24276560140402434, "grad_norm": 2.2412912845611572, "learning_rate": 1.9998805878165656e-05, "loss": 0.9762, "step": 1487 }, { "epoch": 0.24292886004652872, "grad_norm": 2.2016985416412354, "learning_rate": 1.9998800969237754e-05, "loss": 0.8546, "step": 1488 }, { "epoch": 0.2430921186890331, "grad_norm": 2.065603733062744, "learning_rate": 1.9998796050241066e-05, "loss": 1.0957, "step": 1489 }, { "epoch": 0.2432553773315375, "grad_norm": 2.2019879817962646, "learning_rate": 1.999879112117559e-05, "loss": 0.9278, "step": 1490 }, { "epoch": 0.24341863597404187, "grad_norm": 2.5850536823272705, "learning_rate": 1.9998786182041333e-05, "loss": 1.2565, "step": 1491 }, { "epoch": 0.24358189461654625, "grad_norm": 2.2490878105163574, "learning_rate": 1.9998781232838302e-05, "loss": 0.8471, "step": 1492 }, { "epoch": 0.24374515325905066, "grad_norm": 2.3886966705322266, "learning_rate": 1.99987762735665e-05, "loss": 0.9601, "step": 1493 }, { "epoch": 0.24390841190155504, "grad_norm": 2.222541093826294, "learning_rate": 1.9998771304225933e-05, "loss": 1.0927, "step": 1494 }, { "epoch": 0.24407167054405943, "grad_norm": 2.399160623550415, "learning_rate": 1.9998766324816606e-05, "loss": 0.9948, "step": 1495 }, { "epoch": 0.2442349291865638, "grad_norm": 2.2189831733703613, "learning_rate": 1.9998761335338527e-05, "loss": 1.0703, "step": 1496 }, { "epoch": 0.2443981878290682, "grad_norm": 2.16328763961792, "learning_rate": 1.9998756335791696e-05, "loss": 0.9389, "step": 1497 }, { "epoch": 0.2445614464715726, "grad_norm": 2.092993974685669, "learning_rate": 1.999875132617612e-05, "loss": 0.8331, "step": 1498 }, { "epoch": 0.24472470511407698, "grad_norm": 2.309347629547119, "learning_rate": 1.9998746306491802e-05, "loss": 0.9673, "step": 1499 }, { "epoch": 0.24488796375658137, "grad_norm": 2.4202919006347656, "learning_rate": 1.9998741276738753e-05, "loss": 0.9527, "step": 1500 }, { "epoch": 0.24505122239908575, "grad_norm": 2.4826650619506836, "learning_rate": 1.9998736236916973e-05, "loss": 1.1269, "step": 1501 }, { "epoch": 0.24521448104159013, "grad_norm": 2.2730469703674316, "learning_rate": 1.9998731187026464e-05, "loss": 1.1311, "step": 1502 }, { "epoch": 0.24537773968409451, "grad_norm": 2.3420541286468506, "learning_rate": 1.999872612706724e-05, "loss": 1.0369, "step": 1503 }, { "epoch": 0.24554099832659892, "grad_norm": 2.153437852859497, "learning_rate": 1.99987210570393e-05, "loss": 0.9874, "step": 1504 }, { "epoch": 0.2457042569691033, "grad_norm": 2.032949209213257, "learning_rate": 1.999871597694265e-05, "loss": 1.045, "step": 1505 }, { "epoch": 0.2458675156116077, "grad_norm": 2.3785126209259033, "learning_rate": 1.9998710886777298e-05, "loss": 0.921, "step": 1506 }, { "epoch": 0.24603077425411207, "grad_norm": 2.154026985168457, "learning_rate": 1.9998705786543247e-05, "loss": 0.9143, "step": 1507 }, { "epoch": 0.24619403289661645, "grad_norm": 2.1284878253936768, "learning_rate": 1.99987006762405e-05, "loss": 1.0371, "step": 1508 }, { "epoch": 0.24635729153912087, "grad_norm": 2.11383056640625, "learning_rate": 1.9998695555869063e-05, "loss": 1.047, "step": 1509 }, { "epoch": 0.24652055018162525, "grad_norm": 2.1327617168426514, "learning_rate": 1.9998690425428943e-05, "loss": 0.9771, "step": 1510 }, { "epoch": 0.24668380882412963, "grad_norm": 2.110987424850464, "learning_rate": 1.9998685284920146e-05, "loss": 0.8581, "step": 1511 }, { "epoch": 0.246847067466634, "grad_norm": 2.160046100616455, "learning_rate": 1.9998680134342675e-05, "loss": 0.861, "step": 1512 }, { "epoch": 0.2470103261091384, "grad_norm": 2.345703125, "learning_rate": 1.9998674973696534e-05, "loss": 1.0274, "step": 1513 }, { "epoch": 0.24717358475164278, "grad_norm": 1.9081501960754395, "learning_rate": 1.999866980298173e-05, "loss": 0.8382, "step": 1514 }, { "epoch": 0.2473368433941472, "grad_norm": 2.9701004028320312, "learning_rate": 1.999866462219827e-05, "loss": 0.9373, "step": 1515 }, { "epoch": 0.24750010203665157, "grad_norm": 2.34753155708313, "learning_rate": 1.9998659431346158e-05, "loss": 0.891, "step": 1516 }, { "epoch": 0.24766336067915595, "grad_norm": 2.2245185375213623, "learning_rate": 1.9998654230425396e-05, "loss": 1.0074, "step": 1517 }, { "epoch": 0.24782661932166034, "grad_norm": 2.178072929382324, "learning_rate": 1.9998649019435994e-05, "loss": 1.0016, "step": 1518 }, { "epoch": 0.24798987796416472, "grad_norm": 2.284916400909424, "learning_rate": 1.999864379837795e-05, "loss": 0.7719, "step": 1519 }, { "epoch": 0.24815313660666913, "grad_norm": 2.2304069995880127, "learning_rate": 1.9998638567251283e-05, "loss": 0.8286, "step": 1520 }, { "epoch": 0.2483163952491735, "grad_norm": 2.528050661087036, "learning_rate": 1.9998633326055984e-05, "loss": 0.9881, "step": 1521 }, { "epoch": 0.2484796538916779, "grad_norm": 2.3047289848327637, "learning_rate": 1.9998628074792066e-05, "loss": 1.0666, "step": 1522 }, { "epoch": 0.24864291253418228, "grad_norm": 1.966029405593872, "learning_rate": 1.999862281345953e-05, "loss": 0.9146, "step": 1523 }, { "epoch": 0.24880617117668666, "grad_norm": 2.1465721130371094, "learning_rate": 1.9998617542058384e-05, "loss": 0.8277, "step": 1524 }, { "epoch": 0.24896942981919104, "grad_norm": 2.6145849227905273, "learning_rate": 1.9998612260588634e-05, "loss": 1.1829, "step": 1525 }, { "epoch": 0.24913268846169545, "grad_norm": 2.1193745136260986, "learning_rate": 1.9998606969050285e-05, "loss": 0.9246, "step": 1526 }, { "epoch": 0.24929594710419983, "grad_norm": 2.849768877029419, "learning_rate": 1.999860166744334e-05, "loss": 0.9778, "step": 1527 }, { "epoch": 0.24945920574670422, "grad_norm": 2.1113669872283936, "learning_rate": 1.9998596355767805e-05, "loss": 0.8719, "step": 1528 }, { "epoch": 0.2496224643892086, "grad_norm": 2.4507896900177, "learning_rate": 1.9998591034023688e-05, "loss": 1.0836, "step": 1529 }, { "epoch": 0.24978572303171298, "grad_norm": 2.362757444381714, "learning_rate": 1.9998585702210992e-05, "loss": 1.0842, "step": 1530 }, { "epoch": 0.2499489816742174, "grad_norm": 2.245173692703247, "learning_rate": 1.999858036032972e-05, "loss": 0.878, "step": 1531 }, { "epoch": 0.25011224031672175, "grad_norm": 2.061924457550049, "learning_rate": 1.9998575008379887e-05, "loss": 0.9249, "step": 1532 }, { "epoch": 0.25027549895922613, "grad_norm": 2.1389143466949463, "learning_rate": 1.9998569646361484e-05, "loss": 0.9873, "step": 1533 }, { "epoch": 0.25043875760173057, "grad_norm": 2.0788917541503906, "learning_rate": 1.9998564274274527e-05, "loss": 0.9244, "step": 1534 }, { "epoch": 0.25060201624423495, "grad_norm": 2.3605082035064697, "learning_rate": 1.9998558892119017e-05, "loss": 1.0441, "step": 1535 }, { "epoch": 0.25076527488673933, "grad_norm": 2.5390236377716064, "learning_rate": 1.9998553499894963e-05, "loss": 1.1871, "step": 1536 }, { "epoch": 0.2509285335292437, "grad_norm": 2.2017407417297363, "learning_rate": 1.999854809760237e-05, "loss": 0.8647, "step": 1537 }, { "epoch": 0.2510917921717481, "grad_norm": 2.0021510124206543, "learning_rate": 1.999854268524124e-05, "loss": 0.8848, "step": 1538 }, { "epoch": 0.2512550508142525, "grad_norm": 2.070573091506958, "learning_rate": 1.999853726281158e-05, "loss": 0.9853, "step": 1539 }, { "epoch": 0.25141830945675686, "grad_norm": 2.2029378414154053, "learning_rate": 1.9998531830313394e-05, "loss": 0.9324, "step": 1540 }, { "epoch": 0.25158156809926124, "grad_norm": 2.452939748764038, "learning_rate": 1.9998526387746692e-05, "loss": 0.9051, "step": 1541 }, { "epoch": 0.2517448267417656, "grad_norm": 1.935509443283081, "learning_rate": 1.999852093511147e-05, "loss": 0.8392, "step": 1542 }, { "epoch": 0.25190808538427, "grad_norm": 2.1782240867614746, "learning_rate": 1.9998515472407747e-05, "loss": 0.8205, "step": 1543 }, { "epoch": 0.2520713440267744, "grad_norm": 2.0976052284240723, "learning_rate": 1.999850999963552e-05, "loss": 0.9226, "step": 1544 }, { "epoch": 0.25223460266927883, "grad_norm": 2.405000925064087, "learning_rate": 1.9998504516794797e-05, "loss": 1.0881, "step": 1545 }, { "epoch": 0.2523978613117832, "grad_norm": 2.09303879737854, "learning_rate": 1.999849902388558e-05, "loss": 0.8997, "step": 1546 }, { "epoch": 0.2525611199542876, "grad_norm": 2.3805785179138184, "learning_rate": 1.999849352090788e-05, "loss": 0.8635, "step": 1547 }, { "epoch": 0.252724378596792, "grad_norm": 2.4358837604522705, "learning_rate": 1.9998488007861695e-05, "loss": 1.0361, "step": 1548 }, { "epoch": 0.25288763723929636, "grad_norm": 2.5397839546203613, "learning_rate": 1.999848248474704e-05, "loss": 1.0601, "step": 1549 }, { "epoch": 0.25305089588180074, "grad_norm": 2.1588454246520996, "learning_rate": 1.9998476951563914e-05, "loss": 0.8145, "step": 1550 }, { "epoch": 0.2532141545243051, "grad_norm": 2.297045946121216, "learning_rate": 1.9998471408312326e-05, "loss": 1.0361, "step": 1551 }, { "epoch": 0.2533774131668095, "grad_norm": 2.5115957260131836, "learning_rate": 1.9998465854992278e-05, "loss": 1.0172, "step": 1552 }, { "epoch": 0.2535406718093139, "grad_norm": 2.2926783561706543, "learning_rate": 1.9998460291603776e-05, "loss": 1.0654, "step": 1553 }, { "epoch": 0.25370393045181827, "grad_norm": 2.562986135482788, "learning_rate": 1.999845471814683e-05, "loss": 1.841, "step": 1554 }, { "epoch": 0.25386718909432265, "grad_norm": 2.257481336593628, "learning_rate": 1.9998449134621442e-05, "loss": 0.9736, "step": 1555 }, { "epoch": 0.2540304477368271, "grad_norm": 2.180178642272949, "learning_rate": 1.999844354102762e-05, "loss": 0.9232, "step": 1556 }, { "epoch": 0.2541937063793315, "grad_norm": 2.0806853771209717, "learning_rate": 1.9998437937365365e-05, "loss": 0.9799, "step": 1557 }, { "epoch": 0.25435696502183586, "grad_norm": 2.0185790061950684, "learning_rate": 1.9998432323634683e-05, "loss": 0.8551, "step": 1558 }, { "epoch": 0.25452022366434024, "grad_norm": 1.8499635457992554, "learning_rate": 1.9998426699835588e-05, "loss": 0.7678, "step": 1559 }, { "epoch": 0.2546834823068446, "grad_norm": 2.428950309753418, "learning_rate": 1.999842106596808e-05, "loss": 1.0661, "step": 1560 }, { "epoch": 0.254846740949349, "grad_norm": 3.280778169631958, "learning_rate": 1.9998415422032163e-05, "loss": 1.0602, "step": 1561 }, { "epoch": 0.2550099995918534, "grad_norm": 1.896915316581726, "learning_rate": 1.9998409768027846e-05, "loss": 0.961, "step": 1562 }, { "epoch": 0.25517325823435777, "grad_norm": 2.0367724895477295, "learning_rate": 1.9998404103955126e-05, "loss": 0.9002, "step": 1563 }, { "epoch": 0.25533651687686215, "grad_norm": 2.1089634895324707, "learning_rate": 1.9998398429814024e-05, "loss": 0.8505, "step": 1564 }, { "epoch": 0.25549977551936653, "grad_norm": 2.0980021953582764, "learning_rate": 1.9998392745604533e-05, "loss": 0.8176, "step": 1565 }, { "epoch": 0.2556630341618709, "grad_norm": 2.2106893062591553, "learning_rate": 1.9998387051326665e-05, "loss": 0.8366, "step": 1566 }, { "epoch": 0.25582629280437535, "grad_norm": 2.1966288089752197, "learning_rate": 1.9998381346980423e-05, "loss": 1.1024, "step": 1567 }, { "epoch": 0.25598955144687974, "grad_norm": 2.3940885066986084, "learning_rate": 1.9998375632565814e-05, "loss": 1.1565, "step": 1568 }, { "epoch": 0.2561528100893841, "grad_norm": 1.9346933364868164, "learning_rate": 1.9998369908082844e-05, "loss": 1.0515, "step": 1569 }, { "epoch": 0.2563160687318885, "grad_norm": 2.501025438308716, "learning_rate": 1.9998364173531514e-05, "loss": 1.052, "step": 1570 }, { "epoch": 0.2564793273743929, "grad_norm": 2.4516477584838867, "learning_rate": 1.999835842891184e-05, "loss": 1.1361, "step": 1571 }, { "epoch": 0.25664258601689727, "grad_norm": 2.2060141563415527, "learning_rate": 1.9998352674223816e-05, "loss": 1.0832, "step": 1572 }, { "epoch": 0.25680584465940165, "grad_norm": 2.3892993927001953, "learning_rate": 1.999834690946746e-05, "loss": 1.0129, "step": 1573 }, { "epoch": 0.25696910330190603, "grad_norm": 2.552574634552002, "learning_rate": 1.999834113464277e-05, "loss": 0.959, "step": 1574 }, { "epoch": 0.2571323619444104, "grad_norm": 2.1598026752471924, "learning_rate": 1.999833534974975e-05, "loss": 1.1556, "step": 1575 }, { "epoch": 0.2572956205869148, "grad_norm": 2.3585972785949707, "learning_rate": 1.9998329554788407e-05, "loss": 1.1857, "step": 1576 }, { "epoch": 0.2574588792294192, "grad_norm": 2.315092086791992, "learning_rate": 1.9998323749758756e-05, "loss": 1.0557, "step": 1577 }, { "epoch": 0.2576221378719236, "grad_norm": 2.0442087650299072, "learning_rate": 1.999831793466079e-05, "loss": 0.9384, "step": 1578 }, { "epoch": 0.257785396514428, "grad_norm": 2.3240437507629395, "learning_rate": 1.9998312109494523e-05, "loss": 1.1384, "step": 1579 }, { "epoch": 0.2579486551569324, "grad_norm": 2.2552802562713623, "learning_rate": 1.9998306274259955e-05, "loss": 1.049, "step": 1580 }, { "epoch": 0.25811191379943677, "grad_norm": 2.3245856761932373, "learning_rate": 1.9998300428957096e-05, "loss": 0.8941, "step": 1581 }, { "epoch": 0.25827517244194115, "grad_norm": 1.9388028383255005, "learning_rate": 1.9998294573585953e-05, "loss": 0.9672, "step": 1582 }, { "epoch": 0.25843843108444553, "grad_norm": 2.3898706436157227, "learning_rate": 1.999828870814653e-05, "loss": 1.0688, "step": 1583 }, { "epoch": 0.2586016897269499, "grad_norm": 2.032569169998169, "learning_rate": 1.9998282832638834e-05, "loss": 0.943, "step": 1584 }, { "epoch": 0.2587649483694543, "grad_norm": 2.325777530670166, "learning_rate": 1.999827694706287e-05, "loss": 0.9167, "step": 1585 }, { "epoch": 0.2589282070119587, "grad_norm": 2.3224668502807617, "learning_rate": 1.999827105141864e-05, "loss": 0.9083, "step": 1586 }, { "epoch": 0.25909146565446306, "grad_norm": 2.0769941806793213, "learning_rate": 1.9998265145706156e-05, "loss": 0.9112, "step": 1587 }, { "epoch": 0.2592547242969675, "grad_norm": 2.6352624893188477, "learning_rate": 1.9998259229925422e-05, "loss": 1.0263, "step": 1588 }, { "epoch": 0.2594179829394719, "grad_norm": 2.4069159030914307, "learning_rate": 1.9998253304076442e-05, "loss": 1.1865, "step": 1589 }, { "epoch": 0.25958124158197626, "grad_norm": 1.9527581930160522, "learning_rate": 1.9998247368159225e-05, "loss": 0.9937, "step": 1590 }, { "epoch": 0.25974450022448065, "grad_norm": 2.4401156902313232, "learning_rate": 1.9998241422173774e-05, "loss": 0.972, "step": 1591 }, { "epoch": 0.25990775886698503, "grad_norm": 2.6888279914855957, "learning_rate": 1.99982354661201e-05, "loss": 0.8185, "step": 1592 }, { "epoch": 0.2600710175094894, "grad_norm": 2.214204788208008, "learning_rate": 1.9998229499998203e-05, "loss": 0.9354, "step": 1593 }, { "epoch": 0.2602342761519938, "grad_norm": 2.2476868629455566, "learning_rate": 1.9998223523808092e-05, "loss": 0.8918, "step": 1594 }, { "epoch": 0.2603975347944982, "grad_norm": 2.2604010105133057, "learning_rate": 1.9998217537549772e-05, "loss": 0.9494, "step": 1595 }, { "epoch": 0.26056079343700256, "grad_norm": 2.822908878326416, "learning_rate": 1.9998211541223253e-05, "loss": 1.1784, "step": 1596 }, { "epoch": 0.26072405207950694, "grad_norm": 2.354078531265259, "learning_rate": 1.999820553482853e-05, "loss": 1.0766, "step": 1597 }, { "epoch": 0.2608873107220113, "grad_norm": 2.2730464935302734, "learning_rate": 1.9998199518365622e-05, "loss": 0.9455, "step": 1598 }, { "epoch": 0.26105056936451576, "grad_norm": 2.1004371643066406, "learning_rate": 1.999819349183453e-05, "loss": 0.8128, "step": 1599 }, { "epoch": 0.26121382800702014, "grad_norm": 2.533642053604126, "learning_rate": 1.999818745523526e-05, "loss": 0.9828, "step": 1600 }, { "epoch": 0.2613770866495245, "grad_norm": 2.904257297515869, "learning_rate": 1.9998181408567815e-05, "loss": 0.9588, "step": 1601 }, { "epoch": 0.2615403452920289, "grad_norm": 2.8340792655944824, "learning_rate": 1.9998175351832207e-05, "loss": 1.0453, "step": 1602 }, { "epoch": 0.2617036039345333, "grad_norm": 2.5507748126983643, "learning_rate": 1.9998169285028436e-05, "loss": 1.0701, "step": 1603 }, { "epoch": 0.2618668625770377, "grad_norm": 2.3377580642700195, "learning_rate": 1.9998163208156517e-05, "loss": 0.9628, "step": 1604 }, { "epoch": 0.26203012121954206, "grad_norm": 2.3866794109344482, "learning_rate": 1.9998157121216442e-05, "loss": 1.0395, "step": 1605 }, { "epoch": 0.26219337986204644, "grad_norm": 2.1547176837921143, "learning_rate": 1.9998151024208232e-05, "loss": 0.9158, "step": 1606 }, { "epoch": 0.2623566385045508, "grad_norm": 2.375020980834961, "learning_rate": 1.9998144917131884e-05, "loss": 0.9389, "step": 1607 }, { "epoch": 0.2625198971470552, "grad_norm": 2.4806644916534424, "learning_rate": 1.9998138799987407e-05, "loss": 1.0151, "step": 1608 }, { "epoch": 0.2626831557895596, "grad_norm": 2.5218520164489746, "learning_rate": 1.999813267277481e-05, "loss": 0.9753, "step": 1609 }, { "epoch": 0.262846414432064, "grad_norm": 2.500267744064331, "learning_rate": 1.999812653549409e-05, "loss": 0.9941, "step": 1610 }, { "epoch": 0.2630096730745684, "grad_norm": 2.330549478530884, "learning_rate": 1.9998120388145264e-05, "loss": 0.9842, "step": 1611 }, { "epoch": 0.2631729317170728, "grad_norm": 2.485522747039795, "learning_rate": 1.999811423072833e-05, "loss": 0.973, "step": 1612 }, { "epoch": 0.26333619035957717, "grad_norm": 2.0989956855773926, "learning_rate": 1.9998108063243298e-05, "loss": 0.8529, "step": 1613 }, { "epoch": 0.26349944900208155, "grad_norm": 2.3262698650360107, "learning_rate": 1.9998101885690176e-05, "loss": 0.9587, "step": 1614 }, { "epoch": 0.26366270764458594, "grad_norm": 2.1522011756896973, "learning_rate": 1.999809569806897e-05, "loss": 1.0415, "step": 1615 }, { "epoch": 0.2638259662870903, "grad_norm": 2.226195812225342, "learning_rate": 1.999808950037968e-05, "loss": 0.9189, "step": 1616 }, { "epoch": 0.2639892249295947, "grad_norm": 2.1276137828826904, "learning_rate": 1.9998083292622315e-05, "loss": 0.873, "step": 1617 }, { "epoch": 0.2641524835720991, "grad_norm": 1.9301470518112183, "learning_rate": 1.999807707479689e-05, "loss": 0.9848, "step": 1618 }, { "epoch": 0.26431574221460347, "grad_norm": 2.3828012943267822, "learning_rate": 1.9998070846903397e-05, "loss": 0.9448, "step": 1619 }, { "epoch": 0.26447900085710785, "grad_norm": 2.3664534091949463, "learning_rate": 1.999806460894185e-05, "loss": 1.0607, "step": 1620 }, { "epoch": 0.2646422594996123, "grad_norm": 2.5018577575683594, "learning_rate": 1.999805836091226e-05, "loss": 1.04, "step": 1621 }, { "epoch": 0.26480551814211667, "grad_norm": 1.9868546724319458, "learning_rate": 1.9998052102814624e-05, "loss": 1.0129, "step": 1622 }, { "epoch": 0.26496877678462105, "grad_norm": 2.041656970977783, "learning_rate": 1.9998045834648953e-05, "loss": 0.9558, "step": 1623 }, { "epoch": 0.26513203542712543, "grad_norm": 2.130643367767334, "learning_rate": 1.9998039556415253e-05, "loss": 0.9915, "step": 1624 }, { "epoch": 0.2652952940696298, "grad_norm": 2.3569979667663574, "learning_rate": 1.999803326811353e-05, "loss": 1.0052, "step": 1625 }, { "epoch": 0.2654585527121342, "grad_norm": 2.2775113582611084, "learning_rate": 1.9998026969743788e-05, "loss": 1.0191, "step": 1626 }, { "epoch": 0.2656218113546386, "grad_norm": 2.1333043575286865, "learning_rate": 1.9998020661306037e-05, "loss": 0.9171, "step": 1627 }, { "epoch": 0.26578506999714296, "grad_norm": 2.5165746212005615, "learning_rate": 1.999801434280028e-05, "loss": 1.1021, "step": 1628 }, { "epoch": 0.26594832863964735, "grad_norm": 2.580007791519165, "learning_rate": 1.9998008014226527e-05, "loss": 1.1493, "step": 1629 }, { "epoch": 0.26611158728215173, "grad_norm": 2.4853732585906982, "learning_rate": 1.999800167558478e-05, "loss": 1.1057, "step": 1630 }, { "epoch": 0.2662748459246561, "grad_norm": 2.509490966796875, "learning_rate": 1.9997995326875053e-05, "loss": 0.9258, "step": 1631 }, { "epoch": 0.26643810456716055, "grad_norm": 2.2175002098083496, "learning_rate": 1.9997988968097345e-05, "loss": 0.7514, "step": 1632 }, { "epoch": 0.26660136320966493, "grad_norm": 2.173711061477661, "learning_rate": 1.999798259925166e-05, "loss": 0.9267, "step": 1633 }, { "epoch": 0.2667646218521693, "grad_norm": 2.260765552520752, "learning_rate": 1.9997976220338015e-05, "loss": 1.0368, "step": 1634 }, { "epoch": 0.2669278804946737, "grad_norm": 2.242966413497925, "learning_rate": 1.999796983135641e-05, "loss": 1.0133, "step": 1635 }, { "epoch": 0.2670911391371781, "grad_norm": 2.297445297241211, "learning_rate": 1.9997963432306852e-05, "loss": 0.8132, "step": 1636 }, { "epoch": 0.26725439777968246, "grad_norm": 2.109172821044922, "learning_rate": 1.9997957023189346e-05, "loss": 0.7029, "step": 1637 }, { "epoch": 0.26741765642218684, "grad_norm": 2.336223602294922, "learning_rate": 1.99979506040039e-05, "loss": 0.9521, "step": 1638 }, { "epoch": 0.2675809150646912, "grad_norm": 2.297769784927368, "learning_rate": 1.999794417475052e-05, "loss": 0.9215, "step": 1639 }, { "epoch": 0.2677441737071956, "grad_norm": 2.0260095596313477, "learning_rate": 1.999793773542922e-05, "loss": 0.8628, "step": 1640 }, { "epoch": 0.2679074323497, "grad_norm": 2.1957921981811523, "learning_rate": 1.9997931286039992e-05, "loss": 0.8311, "step": 1641 }, { "epoch": 0.2680706909922044, "grad_norm": 2.393292188644409, "learning_rate": 1.9997924826582847e-05, "loss": 0.913, "step": 1642 }, { "epoch": 0.2682339496347088, "grad_norm": 2.118877410888672, "learning_rate": 1.99979183570578e-05, "loss": 0.8079, "step": 1643 }, { "epoch": 0.2683972082772132, "grad_norm": 2.3472061157226562, "learning_rate": 1.999791187746485e-05, "loss": 1.0146, "step": 1644 }, { "epoch": 0.2685604669197176, "grad_norm": 2.4542300701141357, "learning_rate": 1.9997905387804007e-05, "loss": 1.0577, "step": 1645 }, { "epoch": 0.26872372556222196, "grad_norm": 2.2689297199249268, "learning_rate": 1.9997898888075273e-05, "loss": 0.8602, "step": 1646 }, { "epoch": 0.26888698420472634, "grad_norm": 2.413187026977539, "learning_rate": 1.999789237827866e-05, "loss": 1.0737, "step": 1647 }, { "epoch": 0.2690502428472307, "grad_norm": 2.8695242404937744, "learning_rate": 1.999788585841417e-05, "loss": 1.1135, "step": 1648 }, { "epoch": 0.2692135014897351, "grad_norm": 2.277801275253296, "learning_rate": 1.9997879328481816e-05, "loss": 0.8762, "step": 1649 }, { "epoch": 0.2693767601322395, "grad_norm": 2.2017099857330322, "learning_rate": 1.9997872788481595e-05, "loss": 0.998, "step": 1650 }, { "epoch": 0.2695400187747439, "grad_norm": 2.3423523902893066, "learning_rate": 1.999786623841352e-05, "loss": 0.9666, "step": 1651 }, { "epoch": 0.26970327741724825, "grad_norm": 2.263124704360962, "learning_rate": 1.9997859678277596e-05, "loss": 0.9831, "step": 1652 }, { "epoch": 0.26986653605975264, "grad_norm": 2.322350025177002, "learning_rate": 1.9997853108073833e-05, "loss": 1.1455, "step": 1653 }, { "epoch": 0.2700297947022571, "grad_norm": 2.317052125930786, "learning_rate": 1.999784652780223e-05, "loss": 1.7202, "step": 1654 }, { "epoch": 0.27019305334476146, "grad_norm": 2.520709753036499, "learning_rate": 1.99978399374628e-05, "loss": 1.4526, "step": 1655 }, { "epoch": 0.27035631198726584, "grad_norm": 2.4490184783935547, "learning_rate": 1.9997833337055552e-05, "loss": 1.0673, "step": 1656 }, { "epoch": 0.2705195706297702, "grad_norm": 2.217167377471924, "learning_rate": 1.9997826726580483e-05, "loss": 0.9699, "step": 1657 }, { "epoch": 0.2706828292722746, "grad_norm": 2.0539422035217285, "learning_rate": 1.999782010603761e-05, "loss": 1.0228, "step": 1658 }, { "epoch": 0.270846087914779, "grad_norm": 2.2571682929992676, "learning_rate": 1.999781347542693e-05, "loss": 1.0471, "step": 1659 }, { "epoch": 0.27100934655728337, "grad_norm": 2.2672970294952393, "learning_rate": 1.9997806834748455e-05, "loss": 1.1108, "step": 1660 }, { "epoch": 0.27117260519978775, "grad_norm": 1.935877799987793, "learning_rate": 1.9997800184002194e-05, "loss": 0.9211, "step": 1661 }, { "epoch": 0.27133586384229214, "grad_norm": 1.9514377117156982, "learning_rate": 1.999779352318815e-05, "loss": 0.9121, "step": 1662 }, { "epoch": 0.2714991224847965, "grad_norm": 1.8865456581115723, "learning_rate": 1.999778685230633e-05, "loss": 0.798, "step": 1663 }, { "epoch": 0.2716623811273009, "grad_norm": 2.085359573364258, "learning_rate": 1.999778017135674e-05, "loss": 1.0143, "step": 1664 }, { "epoch": 0.27182563976980534, "grad_norm": 1.925869107246399, "learning_rate": 1.999777348033939e-05, "loss": 0.8887, "step": 1665 }, { "epoch": 0.2719888984123097, "grad_norm": 2.187286853790283, "learning_rate": 1.9997766779254283e-05, "loss": 0.8771, "step": 1666 }, { "epoch": 0.2721521570548141, "grad_norm": 2.073378801345825, "learning_rate": 1.999776006810143e-05, "loss": 0.8269, "step": 1667 }, { "epoch": 0.2723154156973185, "grad_norm": 2.08562970161438, "learning_rate": 1.9997753346880834e-05, "loss": 0.887, "step": 1668 }, { "epoch": 0.27247867433982287, "grad_norm": 2.305457830429077, "learning_rate": 1.9997746615592503e-05, "loss": 0.8997, "step": 1669 }, { "epoch": 0.27264193298232725, "grad_norm": 2.546196699142456, "learning_rate": 1.9997739874236444e-05, "loss": 1.1771, "step": 1670 }, { "epoch": 0.27280519162483163, "grad_norm": 1.9601502418518066, "learning_rate": 1.9997733122812663e-05, "loss": 0.8145, "step": 1671 }, { "epoch": 0.272968450267336, "grad_norm": 2.5266425609588623, "learning_rate": 1.999772636132117e-05, "loss": 0.9758, "step": 1672 }, { "epoch": 0.2731317089098404, "grad_norm": 2.358248233795166, "learning_rate": 1.9997719589761965e-05, "loss": 0.8518, "step": 1673 }, { "epoch": 0.2732949675523448, "grad_norm": 2.7241437435150146, "learning_rate": 1.999771280813506e-05, "loss": 1.0817, "step": 1674 }, { "epoch": 0.27345822619484916, "grad_norm": 2.140350103378296, "learning_rate": 1.9997706016440462e-05, "loss": 0.773, "step": 1675 }, { "epoch": 0.2736214848373536, "grad_norm": 2.663552761077881, "learning_rate": 1.9997699214678177e-05, "loss": 0.9427, "step": 1676 }, { "epoch": 0.273784743479858, "grad_norm": 1.9863176345825195, "learning_rate": 1.9997692402848214e-05, "loss": 0.6763, "step": 1677 }, { "epoch": 0.27394800212236237, "grad_norm": 2.4186410903930664, "learning_rate": 1.999768558095057e-05, "loss": 0.9637, "step": 1678 }, { "epoch": 0.27411126076486675, "grad_norm": 3.0716097354888916, "learning_rate": 1.9997678748985265e-05, "loss": 0.9685, "step": 1679 }, { "epoch": 0.27427451940737113, "grad_norm": 2.457216501235962, "learning_rate": 1.99976719069523e-05, "loss": 1.0601, "step": 1680 }, { "epoch": 0.2744377780498755, "grad_norm": 2.243696928024292, "learning_rate": 1.999766505485168e-05, "loss": 0.8549, "step": 1681 }, { "epoch": 0.2746010366923799, "grad_norm": 2.188337802886963, "learning_rate": 1.9997658192683412e-05, "loss": 0.8794, "step": 1682 }, { "epoch": 0.2747642953348843, "grad_norm": 2.316406726837158, "learning_rate": 1.999765132044751e-05, "loss": 1.0367, "step": 1683 }, { "epoch": 0.27492755397738866, "grad_norm": 2.30904221534729, "learning_rate": 1.9997644438143974e-05, "loss": 0.8854, "step": 1684 }, { "epoch": 0.27509081261989304, "grad_norm": 2.2979986667633057, "learning_rate": 1.9997637545772812e-05, "loss": 0.9133, "step": 1685 }, { "epoch": 0.2752540712623974, "grad_norm": 2.2740354537963867, "learning_rate": 1.999763064333403e-05, "loss": 0.9431, "step": 1686 }, { "epoch": 0.27541732990490186, "grad_norm": 2.338010549545288, "learning_rate": 1.999762373082764e-05, "loss": 1.025, "step": 1687 }, { "epoch": 0.27558058854740625, "grad_norm": 2.351081371307373, "learning_rate": 1.9997616808253645e-05, "loss": 0.8859, "step": 1688 }, { "epoch": 0.27574384718991063, "grad_norm": 2.4026598930358887, "learning_rate": 1.9997609875612053e-05, "loss": 1.0489, "step": 1689 }, { "epoch": 0.275907105832415, "grad_norm": 2.311793327331543, "learning_rate": 1.9997602932902866e-05, "loss": 0.9617, "step": 1690 }, { "epoch": 0.2760703644749194, "grad_norm": 2.0073466300964355, "learning_rate": 1.99975959801261e-05, "loss": 0.8674, "step": 1691 }, { "epoch": 0.2762336231174238, "grad_norm": 2.258066415786743, "learning_rate": 1.9997589017281755e-05, "loss": 0.8881, "step": 1692 }, { "epoch": 0.27639688175992816, "grad_norm": 2.2021420001983643, "learning_rate": 1.9997582044369843e-05, "loss": 0.9043, "step": 1693 }, { "epoch": 0.27656014040243254, "grad_norm": 2.2010984420776367, "learning_rate": 1.9997575061390368e-05, "loss": 0.9344, "step": 1694 }, { "epoch": 0.2767233990449369, "grad_norm": 2.5051543712615967, "learning_rate": 1.9997568068343333e-05, "loss": 1.1483, "step": 1695 }, { "epoch": 0.2768866576874413, "grad_norm": 2.1381585597991943, "learning_rate": 1.9997561065228753e-05, "loss": 0.987, "step": 1696 }, { "epoch": 0.2770499163299457, "grad_norm": 1.9149789810180664, "learning_rate": 1.999755405204663e-05, "loss": 0.7444, "step": 1697 }, { "epoch": 0.2772131749724501, "grad_norm": 2.3960580825805664, "learning_rate": 1.999754702879698e-05, "loss": 1.1122, "step": 1698 }, { "epoch": 0.2773764336149545, "grad_norm": 2.421442985534668, "learning_rate": 1.9997539995479794e-05, "loss": 1.1685, "step": 1699 }, { "epoch": 0.2775396922574589, "grad_norm": 2.129492998123169, "learning_rate": 1.9997532952095093e-05, "loss": 0.9949, "step": 1700 }, { "epoch": 0.2777029508999633, "grad_norm": 2.243894338607788, "learning_rate": 1.9997525898642876e-05, "loss": 0.9654, "step": 1701 }, { "epoch": 0.27786620954246766, "grad_norm": 2.1951138973236084, "learning_rate": 1.9997518835123155e-05, "loss": 0.9112, "step": 1702 }, { "epoch": 0.27802946818497204, "grad_norm": 2.2304961681365967, "learning_rate": 1.9997511761535935e-05, "loss": 1.0407, "step": 1703 }, { "epoch": 0.2781927268274764, "grad_norm": 2.2933595180511475, "learning_rate": 1.9997504677881224e-05, "loss": 0.9932, "step": 1704 }, { "epoch": 0.2783559854699808, "grad_norm": 2.379369020462036, "learning_rate": 1.9997497584159028e-05, "loss": 0.9062, "step": 1705 }, { "epoch": 0.2785192441124852, "grad_norm": 2.7372329235076904, "learning_rate": 1.999749048036935e-05, "loss": 1.1248, "step": 1706 }, { "epoch": 0.27868250275498957, "grad_norm": 2.449551820755005, "learning_rate": 1.9997483366512206e-05, "loss": 1.1531, "step": 1707 }, { "epoch": 0.278845761397494, "grad_norm": 2.3896937370300293, "learning_rate": 1.99974762425876e-05, "loss": 1.0661, "step": 1708 }, { "epoch": 0.2790090200399984, "grad_norm": 2.32053279876709, "learning_rate": 1.9997469108595538e-05, "loss": 0.9925, "step": 1709 }, { "epoch": 0.2791722786825028, "grad_norm": 2.5755343437194824, "learning_rate": 1.9997461964536024e-05, "loss": 0.8253, "step": 1710 }, { "epoch": 0.27933553732500715, "grad_norm": 2.212538003921509, "learning_rate": 1.9997454810409073e-05, "loss": 0.9059, "step": 1711 }, { "epoch": 0.27949879596751154, "grad_norm": 2.488921880722046, "learning_rate": 1.9997447646214684e-05, "loss": 0.9493, "step": 1712 }, { "epoch": 0.2796620546100159, "grad_norm": 2.3235771656036377, "learning_rate": 1.9997440471952866e-05, "loss": 1.0083, "step": 1713 }, { "epoch": 0.2798253132525203, "grad_norm": 2.0563318729400635, "learning_rate": 1.9997433287623633e-05, "loss": 0.8402, "step": 1714 }, { "epoch": 0.2799885718950247, "grad_norm": 2.5520212650299072, "learning_rate": 1.9997426093226984e-05, "loss": 0.9519, "step": 1715 }, { "epoch": 0.28015183053752907, "grad_norm": 2.3819894790649414, "learning_rate": 1.9997418888762932e-05, "loss": 0.8547, "step": 1716 }, { "epoch": 0.28031508918003345, "grad_norm": 2.2384724617004395, "learning_rate": 1.999741167423148e-05, "loss": 1.064, "step": 1717 }, { "epoch": 0.28047834782253783, "grad_norm": 2.20548939704895, "learning_rate": 1.9997404449632638e-05, "loss": 1.083, "step": 1718 }, { "epoch": 0.28064160646504227, "grad_norm": 2.0349936485290527, "learning_rate": 1.9997397214966413e-05, "loss": 0.9854, "step": 1719 }, { "epoch": 0.28080486510754665, "grad_norm": 2.217069149017334, "learning_rate": 1.999738997023281e-05, "loss": 0.9177, "step": 1720 }, { "epoch": 0.28096812375005104, "grad_norm": 2.2290215492248535, "learning_rate": 1.999738271543184e-05, "loss": 0.91, "step": 1721 }, { "epoch": 0.2811313823925554, "grad_norm": 2.0737690925598145, "learning_rate": 1.9997375450563504e-05, "loss": 1.0694, "step": 1722 }, { "epoch": 0.2812946410350598, "grad_norm": 2.2942206859588623, "learning_rate": 1.9997368175627818e-05, "loss": 1.0193, "step": 1723 }, { "epoch": 0.2814578996775642, "grad_norm": 2.15690016746521, "learning_rate": 1.9997360890624783e-05, "loss": 0.802, "step": 1724 }, { "epoch": 0.28162115832006857, "grad_norm": 2.163816213607788, "learning_rate": 1.999735359555441e-05, "loss": 0.9, "step": 1725 }, { "epoch": 0.28178441696257295, "grad_norm": 2.829559087753296, "learning_rate": 1.9997346290416703e-05, "loss": 0.9583, "step": 1726 }, { "epoch": 0.28194767560507733, "grad_norm": 2.512258291244507, "learning_rate": 1.9997338975211668e-05, "loss": 1.1696, "step": 1727 }, { "epoch": 0.2821109342475817, "grad_norm": 2.7595059871673584, "learning_rate": 1.999733164993932e-05, "loss": 1.088, "step": 1728 }, { "epoch": 0.2822741928900861, "grad_norm": 2.56469464302063, "learning_rate": 1.999732431459966e-05, "loss": 0.8273, "step": 1729 }, { "epoch": 0.28243745153259053, "grad_norm": 1.90068781375885, "learning_rate": 1.9997316969192696e-05, "loss": 0.7508, "step": 1730 }, { "epoch": 0.2826007101750949, "grad_norm": 2.1129913330078125, "learning_rate": 1.999730961371844e-05, "loss": 0.8875, "step": 1731 }, { "epoch": 0.2827639688175993, "grad_norm": 2.1892049312591553, "learning_rate": 1.9997302248176894e-05, "loss": 0.9783, "step": 1732 }, { "epoch": 0.2829272274601037, "grad_norm": 2.0149967670440674, "learning_rate": 1.9997294872568066e-05, "loss": 0.7448, "step": 1733 }, { "epoch": 0.28309048610260806, "grad_norm": 2.2894842624664307, "learning_rate": 1.9997287486891967e-05, "loss": 0.8915, "step": 1734 }, { "epoch": 0.28325374474511245, "grad_norm": 3.0624523162841797, "learning_rate": 1.99972800911486e-05, "loss": 1.0856, "step": 1735 }, { "epoch": 0.28341700338761683, "grad_norm": 2.7290103435516357, "learning_rate": 1.9997272685337975e-05, "loss": 0.9923, "step": 1736 }, { "epoch": 0.2835802620301212, "grad_norm": 2.2716572284698486, "learning_rate": 1.99972652694601e-05, "loss": 1.1229, "step": 1737 }, { "epoch": 0.2837435206726256, "grad_norm": 2.3769171237945557, "learning_rate": 1.999725784351498e-05, "loss": 1.1987, "step": 1738 }, { "epoch": 0.28390677931513, "grad_norm": 2.3682901859283447, "learning_rate": 1.9997250407502627e-05, "loss": 0.923, "step": 1739 }, { "epoch": 0.28407003795763436, "grad_norm": 2.070437431335449, "learning_rate": 1.9997242961423043e-05, "loss": 0.9941, "step": 1740 }, { "epoch": 0.2842332966001388, "grad_norm": 2.248260259628296, "learning_rate": 1.9997235505276235e-05, "loss": 1.0237, "step": 1741 }, { "epoch": 0.2843965552426432, "grad_norm": 2.353444814682007, "learning_rate": 1.999722803906222e-05, "loss": 0.869, "step": 1742 }, { "epoch": 0.28455981388514756, "grad_norm": 2.087865114212036, "learning_rate": 1.9997220562780996e-05, "loss": 0.9296, "step": 1743 }, { "epoch": 0.28472307252765194, "grad_norm": 2.1050267219543457, "learning_rate": 1.9997213076432575e-05, "loss": 0.9705, "step": 1744 }, { "epoch": 0.2848863311701563, "grad_norm": 2.3999109268188477, "learning_rate": 1.9997205580016957e-05, "loss": 0.9756, "step": 1745 }, { "epoch": 0.2850495898126607, "grad_norm": 2.196774959564209, "learning_rate": 1.9997198073534163e-05, "loss": 0.9513, "step": 1746 }, { "epoch": 0.2852128484551651, "grad_norm": 2.0695548057556152, "learning_rate": 1.999719055698419e-05, "loss": 0.8229, "step": 1747 }, { "epoch": 0.2853761070976695, "grad_norm": 3.023033618927002, "learning_rate": 1.999718303036705e-05, "loss": 1.0365, "step": 1748 }, { "epoch": 0.28553936574017386, "grad_norm": 2.175701141357422, "learning_rate": 1.9997175493682745e-05, "loss": 0.8192, "step": 1749 }, { "epoch": 0.28570262438267824, "grad_norm": 2.2661218643188477, "learning_rate": 1.9997167946931293e-05, "loss": 0.9124, "step": 1750 }, { "epoch": 0.2858658830251826, "grad_norm": 2.457913875579834, "learning_rate": 1.9997160390112692e-05, "loss": 0.7647, "step": 1751 }, { "epoch": 0.28602914166768706, "grad_norm": 2.244631290435791, "learning_rate": 1.9997152823226952e-05, "loss": 0.9874, "step": 1752 }, { "epoch": 0.28619240031019144, "grad_norm": 2.134220600128174, "learning_rate": 1.999714524627409e-05, "loss": 0.9269, "step": 1753 }, { "epoch": 0.2863556589526958, "grad_norm": 2.4289536476135254, "learning_rate": 1.9997137659254094e-05, "loss": 1.0393, "step": 1754 }, { "epoch": 0.2865189175952002, "grad_norm": 2.1581428050994873, "learning_rate": 1.9997130062166988e-05, "loss": 1.0649, "step": 1755 }, { "epoch": 0.2866821762377046, "grad_norm": 2.2612667083740234, "learning_rate": 1.9997122455012776e-05, "loss": 0.8685, "step": 1756 }, { "epoch": 0.28684543488020897, "grad_norm": 2.0228095054626465, "learning_rate": 1.9997114837791462e-05, "loss": 0.8604, "step": 1757 }, { "epoch": 0.28700869352271335, "grad_norm": 2.2434167861938477, "learning_rate": 1.999710721050306e-05, "loss": 0.8253, "step": 1758 }, { "epoch": 0.28717195216521774, "grad_norm": 2.064114570617676, "learning_rate": 1.999709957314757e-05, "loss": 0.8035, "step": 1759 }, { "epoch": 0.2873352108077221, "grad_norm": 2.1393179893493652, "learning_rate": 1.9997091925725006e-05, "loss": 0.8987, "step": 1760 }, { "epoch": 0.2874984694502265, "grad_norm": 2.1834259033203125, "learning_rate": 1.999708426823537e-05, "loss": 0.9278, "step": 1761 }, { "epoch": 0.2876617280927309, "grad_norm": 2.135474920272827, "learning_rate": 1.9997076600678676e-05, "loss": 1.1741, "step": 1762 }, { "epoch": 0.2878249867352353, "grad_norm": 2.4918086528778076, "learning_rate": 1.9997068923054925e-05, "loss": 1.0133, "step": 1763 }, { "epoch": 0.2879882453777397, "grad_norm": 2.0163965225219727, "learning_rate": 1.999706123536413e-05, "loss": 0.8634, "step": 1764 }, { "epoch": 0.2881515040202441, "grad_norm": 2.2787725925445557, "learning_rate": 1.9997053537606296e-05, "loss": 0.8585, "step": 1765 }, { "epoch": 0.28831476266274847, "grad_norm": 2.2824740409851074, "learning_rate": 1.9997045829781432e-05, "loss": 0.8824, "step": 1766 }, { "epoch": 0.28847802130525285, "grad_norm": 2.351191997528076, "learning_rate": 1.9997038111889545e-05, "loss": 1.0162, "step": 1767 }, { "epoch": 0.28864127994775723, "grad_norm": 2.037731885910034, "learning_rate": 1.9997030383930647e-05, "loss": 0.8748, "step": 1768 }, { "epoch": 0.2888045385902616, "grad_norm": 1.997275948524475, "learning_rate": 1.999702264590474e-05, "loss": 0.8632, "step": 1769 }, { "epoch": 0.288967797232766, "grad_norm": 2.363363265991211, "learning_rate": 1.9997014897811834e-05, "loss": 0.9279, "step": 1770 }, { "epoch": 0.2891310558752704, "grad_norm": 1.9954055547714233, "learning_rate": 1.9997007139651936e-05, "loss": 0.9655, "step": 1771 }, { "epoch": 0.28929431451777476, "grad_norm": 2.1882033348083496, "learning_rate": 1.999699937142505e-05, "loss": 1.1249, "step": 1772 }, { "epoch": 0.28945757316027915, "grad_norm": 2.3289670944213867, "learning_rate": 1.9996991593131197e-05, "loss": 0.9733, "step": 1773 }, { "epoch": 0.2896208318027836, "grad_norm": 2.334594964981079, "learning_rate": 1.9996983804770372e-05, "loss": 1.0032, "step": 1774 }, { "epoch": 0.28978409044528797, "grad_norm": 2.3415305614471436, "learning_rate": 1.999697600634259e-05, "loss": 0.9444, "step": 1775 }, { "epoch": 0.28994734908779235, "grad_norm": 2.154383897781372, "learning_rate": 1.999696819784785e-05, "loss": 1.099, "step": 1776 }, { "epoch": 0.29011060773029673, "grad_norm": 2.133610725402832, "learning_rate": 1.9996960379286164e-05, "loss": 1.0719, "step": 1777 }, { "epoch": 0.2902738663728011, "grad_norm": 2.2411067485809326, "learning_rate": 1.999695255065755e-05, "loss": 1.0788, "step": 1778 }, { "epoch": 0.2904371250153055, "grad_norm": 1.896086573600769, "learning_rate": 1.9996944711962002e-05, "loss": 0.8539, "step": 1779 }, { "epoch": 0.2906003836578099, "grad_norm": 2.087146282196045, "learning_rate": 1.9996936863199537e-05, "loss": 0.9858, "step": 1780 }, { "epoch": 0.29076364230031426, "grad_norm": 2.506073236465454, "learning_rate": 1.9996929004370152e-05, "loss": 1.1566, "step": 1781 }, { "epoch": 0.29092690094281864, "grad_norm": 2.2587239742279053, "learning_rate": 1.999692113547387e-05, "loss": 0.9451, "step": 1782 }, { "epoch": 0.291090159585323, "grad_norm": 2.5805678367614746, "learning_rate": 1.9996913256510688e-05, "loss": 1.0205, "step": 1783 }, { "epoch": 0.2912534182278274, "grad_norm": 2.261308431625366, "learning_rate": 1.9996905367480618e-05, "loss": 1.1322, "step": 1784 }, { "epoch": 0.29141667687033185, "grad_norm": 1.8181143999099731, "learning_rate": 1.9996897468383663e-05, "loss": 0.724, "step": 1785 }, { "epoch": 0.29157993551283623, "grad_norm": 1.9690752029418945, "learning_rate": 1.9996889559219837e-05, "loss": 0.9462, "step": 1786 }, { "epoch": 0.2917431941553406, "grad_norm": 2.173159599304199, "learning_rate": 1.999688163998915e-05, "loss": 0.909, "step": 1787 }, { "epoch": 0.291906452797845, "grad_norm": 2.365762233734131, "learning_rate": 1.99968737106916e-05, "loss": 1.0998, "step": 1788 }, { "epoch": 0.2920697114403494, "grad_norm": 2.2918171882629395, "learning_rate": 1.9996865771327205e-05, "loss": 0.8516, "step": 1789 }, { "epoch": 0.29223297008285376, "grad_norm": 2.4013500213623047, "learning_rate": 1.9996857821895968e-05, "loss": 1.1067, "step": 1790 }, { "epoch": 0.29239622872535814, "grad_norm": 1.999075174331665, "learning_rate": 1.9996849862397897e-05, "loss": 0.9636, "step": 1791 }, { "epoch": 0.2925594873678625, "grad_norm": 2.4877376556396484, "learning_rate": 1.9996841892833e-05, "loss": 1.0049, "step": 1792 }, { "epoch": 0.2927227460103669, "grad_norm": 2.0414798259735107, "learning_rate": 1.999683391320129e-05, "loss": 0.8821, "step": 1793 }, { "epoch": 0.2928860046528713, "grad_norm": 2.263793706893921, "learning_rate": 1.9996825923502766e-05, "loss": 1.0381, "step": 1794 }, { "epoch": 0.2930492632953757, "grad_norm": 2.078835964202881, "learning_rate": 1.9996817923737443e-05, "loss": 0.9892, "step": 1795 }, { "epoch": 0.2932125219378801, "grad_norm": 2.838857889175415, "learning_rate": 1.9996809913905327e-05, "loss": 0.7975, "step": 1796 }, { "epoch": 0.2933757805803845, "grad_norm": 2.388136148452759, "learning_rate": 1.9996801894006427e-05, "loss": 1.1007, "step": 1797 }, { "epoch": 0.2935390392228889, "grad_norm": 2.319694757461548, "learning_rate": 1.9996793864040748e-05, "loss": 0.9014, "step": 1798 }, { "epoch": 0.29370229786539326, "grad_norm": 2.1210925579071045, "learning_rate": 1.9996785824008302e-05, "loss": 1.0515, "step": 1799 }, { "epoch": 0.29386555650789764, "grad_norm": 2.234692096710205, "learning_rate": 1.9996777773909093e-05, "loss": 0.9736, "step": 1800 }, { "epoch": 0.294028815150402, "grad_norm": 2.2412960529327393, "learning_rate": 1.999676971374313e-05, "loss": 0.954, "step": 1801 }, { "epoch": 0.2941920737929064, "grad_norm": 1.9720630645751953, "learning_rate": 1.9996761643510427e-05, "loss": 0.9463, "step": 1802 }, { "epoch": 0.2943553324354108, "grad_norm": 2.066502094268799, "learning_rate": 1.9996753563210987e-05, "loss": 0.7793, "step": 1803 }, { "epoch": 0.29451859107791517, "grad_norm": 2.0353128910064697, "learning_rate": 1.9996745472844817e-05, "loss": 0.8101, "step": 1804 }, { "epoch": 0.29468184972041955, "grad_norm": 2.017674684524536, "learning_rate": 1.999673737241193e-05, "loss": 0.8605, "step": 1805 }, { "epoch": 0.29484510836292394, "grad_norm": 2.4582204818725586, "learning_rate": 1.9996729261912325e-05, "loss": 0.9876, "step": 1806 }, { "epoch": 0.2950083670054284, "grad_norm": 2.1909172534942627, "learning_rate": 1.9996721141346023e-05, "loss": 0.7002, "step": 1807 }, { "epoch": 0.29517162564793276, "grad_norm": 2.3859176635742188, "learning_rate": 1.9996713010713022e-05, "loss": 0.9628, "step": 1808 }, { "epoch": 0.29533488429043714, "grad_norm": 2.2689006328582764, "learning_rate": 1.9996704870013336e-05, "loss": 1.0471, "step": 1809 }, { "epoch": 0.2954981429329415, "grad_norm": 2.393009662628174, "learning_rate": 1.999669671924697e-05, "loss": 1.0094, "step": 1810 }, { "epoch": 0.2956614015754459, "grad_norm": 2.284860134124756, "learning_rate": 1.999668855841393e-05, "loss": 0.8872, "step": 1811 }, { "epoch": 0.2958246602179503, "grad_norm": 2.42022705078125, "learning_rate": 1.999668038751423e-05, "loss": 0.924, "step": 1812 }, { "epoch": 0.29598791886045467, "grad_norm": 2.097926139831543, "learning_rate": 1.9996672206547874e-05, "loss": 0.7862, "step": 1813 }, { "epoch": 0.29615117750295905, "grad_norm": 2.687265634536743, "learning_rate": 1.999666401551487e-05, "loss": 1.2101, "step": 1814 }, { "epoch": 0.29631443614546343, "grad_norm": 2.167206048965454, "learning_rate": 1.9996655814415235e-05, "loss": 0.868, "step": 1815 }, { "epoch": 0.2964776947879678, "grad_norm": 2.363760232925415, "learning_rate": 1.9996647603248967e-05, "loss": 1.1274, "step": 1816 }, { "epoch": 0.2966409534304722, "grad_norm": 2.518871784210205, "learning_rate": 1.9996639382016075e-05, "loss": 1.0346, "step": 1817 }, { "epoch": 0.29680421207297664, "grad_norm": 2.5150697231292725, "learning_rate": 1.9996631150716573e-05, "loss": 1.1477, "step": 1818 }, { "epoch": 0.296967470715481, "grad_norm": 2.2987048625946045, "learning_rate": 1.9996622909350463e-05, "loss": 0.8676, "step": 1819 }, { "epoch": 0.2971307293579854, "grad_norm": 1.8667775392532349, "learning_rate": 1.999661465791776e-05, "loss": 0.7664, "step": 1820 }, { "epoch": 0.2972939880004898, "grad_norm": 2.6247787475585938, "learning_rate": 1.9996606396418464e-05, "loss": 1.0458, "step": 1821 }, { "epoch": 0.29745724664299417, "grad_norm": 2.195392608642578, "learning_rate": 1.999659812485259e-05, "loss": 1.1182, "step": 1822 }, { "epoch": 0.29762050528549855, "grad_norm": 1.9957014322280884, "learning_rate": 1.9996589843220148e-05, "loss": 0.8267, "step": 1823 }, { "epoch": 0.29778376392800293, "grad_norm": 1.8753207921981812, "learning_rate": 1.999658155152114e-05, "loss": 0.7389, "step": 1824 }, { "epoch": 0.2979470225705073, "grad_norm": 2.1934688091278076, "learning_rate": 1.9996573249755573e-05, "loss": 0.9375, "step": 1825 }, { "epoch": 0.2981102812130117, "grad_norm": 2.035888910293579, "learning_rate": 1.9996564937923464e-05, "loss": 0.9029, "step": 1826 }, { "epoch": 0.2982735398555161, "grad_norm": 2.503812551498413, "learning_rate": 1.9996556616024817e-05, "loss": 1.0332, "step": 1827 }, { "epoch": 0.29843679849802046, "grad_norm": 2.4947330951690674, "learning_rate": 1.999654828405964e-05, "loss": 1.1278, "step": 1828 }, { "epoch": 0.2986000571405249, "grad_norm": 2.050753355026245, "learning_rate": 1.999653994202794e-05, "loss": 0.8583, "step": 1829 }, { "epoch": 0.2987633157830293, "grad_norm": 2.221581220626831, "learning_rate": 1.9996531589929725e-05, "loss": 1.1037, "step": 1830 }, { "epoch": 0.29892657442553366, "grad_norm": 2.2006335258483887, "learning_rate": 1.999652322776501e-05, "loss": 1.0575, "step": 1831 }, { "epoch": 0.29908983306803805, "grad_norm": 2.361781597137451, "learning_rate": 1.9996514855533796e-05, "loss": 1.0388, "step": 1832 }, { "epoch": 0.29925309171054243, "grad_norm": 2.1568288803100586, "learning_rate": 1.9996506473236095e-05, "loss": 0.908, "step": 1833 }, { "epoch": 0.2994163503530468, "grad_norm": 2.2944083213806152, "learning_rate": 1.9996498080871913e-05, "loss": 0.9799, "step": 1834 }, { "epoch": 0.2995796089955512, "grad_norm": 2.647690534591675, "learning_rate": 1.999648967844126e-05, "loss": 0.9652, "step": 1835 }, { "epoch": 0.2997428676380556, "grad_norm": 2.3497045040130615, "learning_rate": 1.9996481265944146e-05, "loss": 0.967, "step": 1836 }, { "epoch": 0.29990612628055996, "grad_norm": 2.1509971618652344, "learning_rate": 1.999647284338058e-05, "loss": 0.8766, "step": 1837 }, { "epoch": 0.30006938492306434, "grad_norm": 2.196582555770874, "learning_rate": 1.9996464410750565e-05, "loss": 0.9295, "step": 1838 }, { "epoch": 0.3002326435655688, "grad_norm": 2.223280429840088, "learning_rate": 1.9996455968054115e-05, "loss": 0.9258, "step": 1839 }, { "epoch": 0.30039590220807316, "grad_norm": 2.3706820011138916, "learning_rate": 1.9996447515291236e-05, "loss": 0.8871, "step": 1840 }, { "epoch": 0.30055916085057754, "grad_norm": 2.203500270843506, "learning_rate": 1.9996439052461935e-05, "loss": 0.88, "step": 1841 }, { "epoch": 0.3007224194930819, "grad_norm": 2.3259363174438477, "learning_rate": 1.9996430579566227e-05, "loss": 1.166, "step": 1842 }, { "epoch": 0.3008856781355863, "grad_norm": 2.0105044841766357, "learning_rate": 1.9996422096604112e-05, "loss": 0.9735, "step": 1843 }, { "epoch": 0.3010489367780907, "grad_norm": 2.0992789268493652, "learning_rate": 1.9996413603575603e-05, "loss": 0.9938, "step": 1844 }, { "epoch": 0.3012121954205951, "grad_norm": 2.4526548385620117, "learning_rate": 1.999640510048071e-05, "loss": 0.9913, "step": 1845 }, { "epoch": 0.30137545406309946, "grad_norm": 2.1673238277435303, "learning_rate": 1.9996396587319438e-05, "loss": 0.9126, "step": 1846 }, { "epoch": 0.30153871270560384, "grad_norm": 2.379727840423584, "learning_rate": 1.99963880640918e-05, "loss": 1.467, "step": 1847 }, { "epoch": 0.3017019713481082, "grad_norm": 2.4432058334350586, "learning_rate": 1.99963795307978e-05, "loss": 1.0507, "step": 1848 }, { "epoch": 0.3018652299906126, "grad_norm": 2.0599846839904785, "learning_rate": 1.999637098743745e-05, "loss": 0.8815, "step": 1849 }, { "epoch": 0.30202848863311704, "grad_norm": 2.3709397315979004, "learning_rate": 1.9996362434010754e-05, "loss": 1.0773, "step": 1850 }, { "epoch": 0.3021917472756214, "grad_norm": 2.1630303859710693, "learning_rate": 1.9996353870517727e-05, "loss": 1.0406, "step": 1851 }, { "epoch": 0.3023550059181258, "grad_norm": 5.419304847717285, "learning_rate": 1.999634529695837e-05, "loss": 0.7661, "step": 1852 }, { "epoch": 0.3025182645606302, "grad_norm": 2.2432124614715576, "learning_rate": 1.99963367133327e-05, "loss": 0.9495, "step": 1853 }, { "epoch": 0.3026815232031346, "grad_norm": 2.6006953716278076, "learning_rate": 1.999632811964072e-05, "loss": 0.8108, "step": 1854 }, { "epoch": 0.30284478184563896, "grad_norm": 2.358459234237671, "learning_rate": 1.9996319515882437e-05, "loss": 0.9541, "step": 1855 }, { "epoch": 0.30300804048814334, "grad_norm": 2.0351223945617676, "learning_rate": 1.999631090205787e-05, "loss": 0.816, "step": 1856 }, { "epoch": 0.3031712991306477, "grad_norm": 1.9229481220245361, "learning_rate": 1.9996302278167015e-05, "loss": 0.928, "step": 1857 }, { "epoch": 0.3033345577731521, "grad_norm": 2.1130125522613525, "learning_rate": 1.9996293644209886e-05, "loss": 0.9349, "step": 1858 }, { "epoch": 0.3034978164156565, "grad_norm": 2.1527414321899414, "learning_rate": 1.9996285000186496e-05, "loss": 0.9109, "step": 1859 }, { "epoch": 0.30366107505816087, "grad_norm": 2.6694931983947754, "learning_rate": 1.9996276346096847e-05, "loss": 0.9562, "step": 1860 }, { "epoch": 0.3038243337006653, "grad_norm": 1.9501043558120728, "learning_rate": 1.9996267681940954e-05, "loss": 0.8062, "step": 1861 }, { "epoch": 0.3039875923431697, "grad_norm": 2.1534764766693115, "learning_rate": 1.9996259007718816e-05, "loss": 0.8883, "step": 1862 }, { "epoch": 0.30415085098567407, "grad_norm": 2.2884817123413086, "learning_rate": 1.9996250323430454e-05, "loss": 0.7966, "step": 1863 }, { "epoch": 0.30431410962817845, "grad_norm": 2.2576916217803955, "learning_rate": 1.9996241629075865e-05, "loss": 0.8951, "step": 1864 }, { "epoch": 0.30447736827068284, "grad_norm": 2.4795234203338623, "learning_rate": 1.9996232924655068e-05, "loss": 1.0171, "step": 1865 }, { "epoch": 0.3046406269131872, "grad_norm": 2.3633334636688232, "learning_rate": 1.9996224210168064e-05, "loss": 0.9066, "step": 1866 }, { "epoch": 0.3048038855556916, "grad_norm": 2.052485942840576, "learning_rate": 1.9996215485614866e-05, "loss": 0.8591, "step": 1867 }, { "epoch": 0.304967144198196, "grad_norm": 2.3114430904388428, "learning_rate": 1.999620675099548e-05, "loss": 0.8355, "step": 1868 }, { "epoch": 0.30513040284070037, "grad_norm": 2.459855318069458, "learning_rate": 1.999619800630992e-05, "loss": 0.9194, "step": 1869 }, { "epoch": 0.30529366148320475, "grad_norm": 2.4072425365448, "learning_rate": 1.999618925155819e-05, "loss": 0.8884, "step": 1870 }, { "epoch": 0.30545692012570913, "grad_norm": 2.284618854522705, "learning_rate": 1.99961804867403e-05, "loss": 1.0507, "step": 1871 }, { "epoch": 0.30562017876821357, "grad_norm": 2.274831771850586, "learning_rate": 1.9996171711856258e-05, "loss": 0.8397, "step": 1872 }, { "epoch": 0.30578343741071795, "grad_norm": 2.380213737487793, "learning_rate": 1.9996162926906073e-05, "loss": 1.0664, "step": 1873 }, { "epoch": 0.30594669605322233, "grad_norm": 2.435422658920288, "learning_rate": 1.9996154131889756e-05, "loss": 0.9538, "step": 1874 }, { "epoch": 0.3061099546957267, "grad_norm": 2.2473526000976562, "learning_rate": 1.9996145326807313e-05, "loss": 0.8381, "step": 1875 }, { "epoch": 0.3062732133382311, "grad_norm": 2.5036516189575195, "learning_rate": 1.9996136511658758e-05, "loss": 0.9812, "step": 1876 }, { "epoch": 0.3064364719807355, "grad_norm": 2.4131202697753906, "learning_rate": 1.999612768644409e-05, "loss": 1.0851, "step": 1877 }, { "epoch": 0.30659973062323986, "grad_norm": 2.074706792831421, "learning_rate": 1.999611885116333e-05, "loss": 0.7702, "step": 1878 }, { "epoch": 0.30676298926574425, "grad_norm": 2.6941747665405273, "learning_rate": 1.9996110005816478e-05, "loss": 1.0339, "step": 1879 }, { "epoch": 0.30692624790824863, "grad_norm": 2.2137928009033203, "learning_rate": 1.9996101150403543e-05, "loss": 0.7859, "step": 1880 }, { "epoch": 0.307089506550753, "grad_norm": 2.206000328063965, "learning_rate": 1.999609228492454e-05, "loss": 0.8538, "step": 1881 }, { "epoch": 0.3072527651932574, "grad_norm": 2.4029436111450195, "learning_rate": 1.9996083409379477e-05, "loss": 1.0059, "step": 1882 }, { "epoch": 0.30741602383576183, "grad_norm": 2.1993868350982666, "learning_rate": 1.9996074523768358e-05, "loss": 0.7822, "step": 1883 }, { "epoch": 0.3075792824782662, "grad_norm": 2.259855270385742, "learning_rate": 1.9996065628091194e-05, "loss": 1.041, "step": 1884 }, { "epoch": 0.3077425411207706, "grad_norm": 2.6312127113342285, "learning_rate": 1.9996056722348e-05, "loss": 1.0809, "step": 1885 }, { "epoch": 0.307905799763275, "grad_norm": 2.402372121810913, "learning_rate": 1.9996047806538774e-05, "loss": 0.7589, "step": 1886 }, { "epoch": 0.30806905840577936, "grad_norm": 2.1683380603790283, "learning_rate": 1.999603888066353e-05, "loss": 0.8481, "step": 1887 }, { "epoch": 0.30823231704828374, "grad_norm": 2.2182719707489014, "learning_rate": 1.9996029944722283e-05, "loss": 0.8417, "step": 1888 }, { "epoch": 0.3083955756907881, "grad_norm": 2.53362774848938, "learning_rate": 1.999602099871503e-05, "loss": 1.0031, "step": 1889 }, { "epoch": 0.3085588343332925, "grad_norm": 2.3196370601654053, "learning_rate": 1.999601204264179e-05, "loss": 1.0138, "step": 1890 }, { "epoch": 0.3087220929757969, "grad_norm": 2.1313841342926025, "learning_rate": 1.9996003076502567e-05, "loss": 0.8826, "step": 1891 }, { "epoch": 0.3088853516183013, "grad_norm": 2.307976722717285, "learning_rate": 1.9995994100297374e-05, "loss": 1.1606, "step": 1892 }, { "epoch": 0.30904861026080566, "grad_norm": 2.456875801086426, "learning_rate": 1.9995985114026215e-05, "loss": 0.9748, "step": 1893 }, { "epoch": 0.3092118689033101, "grad_norm": 2.2529234886169434, "learning_rate": 1.9995976117689103e-05, "loss": 0.9549, "step": 1894 }, { "epoch": 0.3093751275458145, "grad_norm": 2.2278850078582764, "learning_rate": 1.9995967111286044e-05, "loss": 1.0525, "step": 1895 }, { "epoch": 0.30953838618831886, "grad_norm": 2.0293984413146973, "learning_rate": 1.9995958094817053e-05, "loss": 0.9524, "step": 1896 }, { "epoch": 0.30970164483082324, "grad_norm": 2.322810173034668, "learning_rate": 1.9995949068282128e-05, "loss": 0.7795, "step": 1897 }, { "epoch": 0.3098649034733276, "grad_norm": 2.898709774017334, "learning_rate": 1.999594003168129e-05, "loss": 0.9007, "step": 1898 }, { "epoch": 0.310028162115832, "grad_norm": 2.153977394104004, "learning_rate": 1.999593098501454e-05, "loss": 0.8632, "step": 1899 }, { "epoch": 0.3101914207583364, "grad_norm": 1.8902404308319092, "learning_rate": 1.9995921928281893e-05, "loss": 0.7912, "step": 1900 }, { "epoch": 0.31035467940084077, "grad_norm": 2.064795970916748, "learning_rate": 1.9995912861483355e-05, "loss": 0.8242, "step": 1901 }, { "epoch": 0.31051793804334515, "grad_norm": 2.057786703109741, "learning_rate": 1.9995903784618936e-05, "loss": 0.9381, "step": 1902 }, { "epoch": 0.31068119668584954, "grad_norm": 2.115062713623047, "learning_rate": 1.999589469768864e-05, "loss": 0.849, "step": 1903 }, { "epoch": 0.3108444553283539, "grad_norm": 2.1812825202941895, "learning_rate": 1.9995885600692485e-05, "loss": 0.8691, "step": 1904 }, { "epoch": 0.31100771397085836, "grad_norm": 2.2959213256835938, "learning_rate": 1.9995876493630473e-05, "loss": 1.0265, "step": 1905 }, { "epoch": 0.31117097261336274, "grad_norm": 2.5628418922424316, "learning_rate": 1.9995867376502624e-05, "loss": 0.8736, "step": 1906 }, { "epoch": 0.3113342312558671, "grad_norm": 1.9588905572891235, "learning_rate": 1.999585824930893e-05, "loss": 0.8359, "step": 1907 }, { "epoch": 0.3114974898983715, "grad_norm": 2.0684986114501953, "learning_rate": 1.999584911204941e-05, "loss": 0.7631, "step": 1908 }, { "epoch": 0.3116607485408759, "grad_norm": 2.327775001525879, "learning_rate": 1.9995839964724078e-05, "loss": 0.9819, "step": 1909 }, { "epoch": 0.31182400718338027, "grad_norm": 2.471426486968994, "learning_rate": 1.9995830807332934e-05, "loss": 1.0497, "step": 1910 }, { "epoch": 0.31198726582588465, "grad_norm": 2.795081615447998, "learning_rate": 1.9995821639875993e-05, "loss": 0.9949, "step": 1911 }, { "epoch": 0.31215052446838903, "grad_norm": 2.845214366912842, "learning_rate": 1.999581246235326e-05, "loss": 0.9592, "step": 1912 }, { "epoch": 0.3123137831108934, "grad_norm": 2.3473212718963623, "learning_rate": 1.999580327476475e-05, "loss": 0.9192, "step": 1913 }, { "epoch": 0.3124770417533978, "grad_norm": 2.1011221408843994, "learning_rate": 1.9995794077110464e-05, "loss": 0.9326, "step": 1914 }, { "epoch": 0.3126403003959022, "grad_norm": 2.3332345485687256, "learning_rate": 1.9995784869390418e-05, "loss": 0.9522, "step": 1915 }, { "epoch": 0.3128035590384066, "grad_norm": 2.337526798248291, "learning_rate": 1.9995775651604622e-05, "loss": 0.9752, "step": 1916 }, { "epoch": 0.312966817680911, "grad_norm": 2.1353020668029785, "learning_rate": 1.9995766423753077e-05, "loss": 0.9199, "step": 1917 }, { "epoch": 0.3131300763234154, "grad_norm": 2.2876875400543213, "learning_rate": 1.99957571858358e-05, "loss": 1.0367, "step": 1918 }, { "epoch": 0.31329333496591977, "grad_norm": 2.1771531105041504, "learning_rate": 1.9995747937852803e-05, "loss": 0.7713, "step": 1919 }, { "epoch": 0.31345659360842415, "grad_norm": 2.130089044570923, "learning_rate": 1.9995738679804086e-05, "loss": 0.908, "step": 1920 }, { "epoch": 0.31361985225092853, "grad_norm": 2.383592367172241, "learning_rate": 1.9995729411689663e-05, "loss": 0.6984, "step": 1921 }, { "epoch": 0.3137831108934329, "grad_norm": 2.2531981468200684, "learning_rate": 1.9995720133509544e-05, "loss": 0.8238, "step": 1922 }, { "epoch": 0.3139463695359373, "grad_norm": 2.9069082736968994, "learning_rate": 1.9995710845263736e-05, "loss": 0.939, "step": 1923 }, { "epoch": 0.3141096281784417, "grad_norm": 2.290740966796875, "learning_rate": 1.9995701546952252e-05, "loss": 1.0394, "step": 1924 }, { "epoch": 0.31427288682094606, "grad_norm": 1.8700591325759888, "learning_rate": 1.9995692238575097e-05, "loss": 0.7602, "step": 1925 }, { "epoch": 0.31443614546345044, "grad_norm": 2.2481889724731445, "learning_rate": 1.9995682920132283e-05, "loss": 0.9549, "step": 1926 }, { "epoch": 0.3145994041059549, "grad_norm": 2.194477081298828, "learning_rate": 1.999567359162382e-05, "loss": 1.1404, "step": 1927 }, { "epoch": 0.31476266274845927, "grad_norm": 2.3044304847717285, "learning_rate": 1.9995664253049715e-05, "loss": 0.9811, "step": 1928 }, { "epoch": 0.31492592139096365, "grad_norm": 2.171403169631958, "learning_rate": 1.9995654904409983e-05, "loss": 0.8437, "step": 1929 }, { "epoch": 0.31508918003346803, "grad_norm": 1.9033031463623047, "learning_rate": 1.9995645545704624e-05, "loss": 0.7277, "step": 1930 }, { "epoch": 0.3152524386759724, "grad_norm": 2.0085577964782715, "learning_rate": 1.9995636176933653e-05, "loss": 0.8095, "step": 1931 }, { "epoch": 0.3154156973184768, "grad_norm": 2.5117721557617188, "learning_rate": 1.9995626798097082e-05, "loss": 1.0718, "step": 1932 }, { "epoch": 0.3155789559609812, "grad_norm": 2.1465444564819336, "learning_rate": 1.9995617409194917e-05, "loss": 0.9325, "step": 1933 }, { "epoch": 0.31574221460348556, "grad_norm": 2.561150550842285, "learning_rate": 1.9995608010227165e-05, "loss": 1.0459, "step": 1934 }, { "epoch": 0.31590547324598994, "grad_norm": 2.4200587272644043, "learning_rate": 1.9995598601193842e-05, "loss": 1.1183, "step": 1935 }, { "epoch": 0.3160687318884943, "grad_norm": 2.4876551628112793, "learning_rate": 1.9995589182094952e-05, "loss": 1.0658, "step": 1936 }, { "epoch": 0.3162319905309987, "grad_norm": 1.9665910005569458, "learning_rate": 1.999557975293051e-05, "loss": 0.7901, "step": 1937 }, { "epoch": 0.31639524917350315, "grad_norm": 2.49641489982605, "learning_rate": 1.9995570313700516e-05, "loss": 0.9481, "step": 1938 }, { "epoch": 0.31655850781600753, "grad_norm": 3.1538190841674805, "learning_rate": 1.9995560864404986e-05, "loss": 0.8603, "step": 1939 }, { "epoch": 0.3167217664585119, "grad_norm": 2.141737937927246, "learning_rate": 1.999555140504393e-05, "loss": 0.9025, "step": 1940 }, { "epoch": 0.3168850251010163, "grad_norm": 2.2524075508117676, "learning_rate": 1.999554193561736e-05, "loss": 0.8228, "step": 1941 }, { "epoch": 0.3170482837435207, "grad_norm": 2.5607335567474365, "learning_rate": 1.9995532456125276e-05, "loss": 0.9923, "step": 1942 }, { "epoch": 0.31721154238602506, "grad_norm": 2.2738871574401855, "learning_rate": 1.99955229665677e-05, "loss": 1.104, "step": 1943 }, { "epoch": 0.31737480102852944, "grad_norm": 2.4409875869750977, "learning_rate": 1.999551346694463e-05, "loss": 1.0368, "step": 1944 }, { "epoch": 0.3175380596710338, "grad_norm": 2.068826675415039, "learning_rate": 1.999550395725608e-05, "loss": 0.936, "step": 1945 }, { "epoch": 0.3177013183135382, "grad_norm": 2.0867598056793213, "learning_rate": 1.9995494437502064e-05, "loss": 0.8273, "step": 1946 }, { "epoch": 0.3178645769560426, "grad_norm": 1.9942004680633545, "learning_rate": 1.9995484907682585e-05, "loss": 0.9291, "step": 1947 }, { "epoch": 0.31802783559854697, "grad_norm": 1.9516021013259888, "learning_rate": 1.9995475367797657e-05, "loss": 0.7602, "step": 1948 }, { "epoch": 0.3181910942410514, "grad_norm": 2.075179100036621, "learning_rate": 1.9995465817847285e-05, "loss": 0.9861, "step": 1949 }, { "epoch": 0.3183543528835558, "grad_norm": 2.7094974517822266, "learning_rate": 1.9995456257831484e-05, "loss": 0.9338, "step": 1950 }, { "epoch": 0.3185176115260602, "grad_norm": 2.3906681537628174, "learning_rate": 1.9995446687750262e-05, "loss": 0.9018, "step": 1951 }, { "epoch": 0.31868087016856456, "grad_norm": 2.440882921218872, "learning_rate": 1.999543710760363e-05, "loss": 0.821, "step": 1952 }, { "epoch": 0.31884412881106894, "grad_norm": 2.168727397918701, "learning_rate": 1.999542751739159e-05, "loss": 0.8328, "step": 1953 }, { "epoch": 0.3190073874535733, "grad_norm": 2.1331562995910645, "learning_rate": 1.9995417917114158e-05, "loss": 0.9479, "step": 1954 }, { "epoch": 0.3191706460960777, "grad_norm": 2.4438023567199707, "learning_rate": 1.9995408306771346e-05, "loss": 1.1519, "step": 1955 }, { "epoch": 0.3193339047385821, "grad_norm": 2.0842325687408447, "learning_rate": 1.999539868636316e-05, "loss": 0.9023, "step": 1956 }, { "epoch": 0.31949716338108647, "grad_norm": 2.1083788871765137, "learning_rate": 1.9995389055889607e-05, "loss": 0.8745, "step": 1957 }, { "epoch": 0.31966042202359085, "grad_norm": 2.7526612281799316, "learning_rate": 1.99953794153507e-05, "loss": 0.9408, "step": 1958 }, { "epoch": 0.3198236806660953, "grad_norm": 2.8871612548828125, "learning_rate": 1.999536976474645e-05, "loss": 0.7876, "step": 1959 }, { "epoch": 0.31998693930859967, "grad_norm": 2.1247785091400146, "learning_rate": 1.999536010407687e-05, "loss": 0.8282, "step": 1960 }, { "epoch": 0.32015019795110405, "grad_norm": 2.213501453399658, "learning_rate": 1.999535043334196e-05, "loss": 0.8825, "step": 1961 }, { "epoch": 0.32031345659360844, "grad_norm": 2.230036735534668, "learning_rate": 1.9995340752541734e-05, "loss": 0.9411, "step": 1962 }, { "epoch": 0.3204767152361128, "grad_norm": 2.5333335399627686, "learning_rate": 1.9995331061676202e-05, "loss": 0.9424, "step": 1963 }, { "epoch": 0.3206399738786172, "grad_norm": 2.2996723651885986, "learning_rate": 1.9995321360745378e-05, "loss": 0.946, "step": 1964 }, { "epoch": 0.3208032325211216, "grad_norm": 2.258808135986328, "learning_rate": 1.9995311649749265e-05, "loss": 0.9377, "step": 1965 }, { "epoch": 0.32096649116362597, "grad_norm": 2.205732583999634, "learning_rate": 1.9995301928687876e-05, "loss": 0.8508, "step": 1966 }, { "epoch": 0.32112974980613035, "grad_norm": 2.093491315841675, "learning_rate": 1.9995292197561222e-05, "loss": 0.8182, "step": 1967 }, { "epoch": 0.32129300844863473, "grad_norm": 2.314694881439209, "learning_rate": 1.9995282456369313e-05, "loss": 0.9964, "step": 1968 }, { "epoch": 0.3214562670911391, "grad_norm": 2.4808578491210938, "learning_rate": 1.9995272705112155e-05, "loss": 1.0413, "step": 1969 }, { "epoch": 0.32161952573364355, "grad_norm": 2.299713611602783, "learning_rate": 1.999526294378976e-05, "loss": 0.9024, "step": 1970 }, { "epoch": 0.32178278437614793, "grad_norm": 2.3408026695251465, "learning_rate": 1.999525317240214e-05, "loss": 0.9971, "step": 1971 }, { "epoch": 0.3219460430186523, "grad_norm": 2.387197971343994, "learning_rate": 1.99952433909493e-05, "loss": 0.9663, "step": 1972 }, { "epoch": 0.3221093016611567, "grad_norm": 2.513010025024414, "learning_rate": 1.9995233599431252e-05, "loss": 0.7465, "step": 1973 }, { "epoch": 0.3222725603036611, "grad_norm": 1.784935474395752, "learning_rate": 1.9995223797848008e-05, "loss": 0.7262, "step": 1974 }, { "epoch": 0.32243581894616546, "grad_norm": 1.8477271795272827, "learning_rate": 1.9995213986199576e-05, "loss": 0.8789, "step": 1975 }, { "epoch": 0.32259907758866985, "grad_norm": 2.1493494510650635, "learning_rate": 1.9995204164485967e-05, "loss": 1.0142, "step": 1976 }, { "epoch": 0.32276233623117423, "grad_norm": 2.319883108139038, "learning_rate": 1.9995194332707188e-05, "loss": 0.9799, "step": 1977 }, { "epoch": 0.3229255948736786, "grad_norm": 2.0385282039642334, "learning_rate": 1.9995184490863254e-05, "loss": 0.9161, "step": 1978 }, { "epoch": 0.323088853516183, "grad_norm": 2.0282909870147705, "learning_rate": 1.9995174638954167e-05, "loss": 0.9606, "step": 1979 }, { "epoch": 0.3232521121586874, "grad_norm": 2.1738038063049316, "learning_rate": 1.999516477697995e-05, "loss": 1.1171, "step": 1980 }, { "epoch": 0.3234153708011918, "grad_norm": 2.3258984088897705, "learning_rate": 1.99951549049406e-05, "loss": 1.048, "step": 1981 }, { "epoch": 0.3235786294436962, "grad_norm": 2.3648929595947266, "learning_rate": 1.999514502283613e-05, "loss": 1.0, "step": 1982 }, { "epoch": 0.3237418880862006, "grad_norm": 2.0263330936431885, "learning_rate": 1.9995135130666555e-05, "loss": 0.9626, "step": 1983 }, { "epoch": 0.32390514672870496, "grad_norm": 2.278067111968994, "learning_rate": 1.9995125228431877e-05, "loss": 1.0609, "step": 1984 }, { "epoch": 0.32406840537120934, "grad_norm": 2.182239055633545, "learning_rate": 1.9995115316132115e-05, "loss": 0.8626, "step": 1985 }, { "epoch": 0.3242316640137137, "grad_norm": 2.162590980529785, "learning_rate": 1.9995105393767272e-05, "loss": 1.0223, "step": 1986 }, { "epoch": 0.3243949226562181, "grad_norm": 2.054163694381714, "learning_rate": 1.999509546133736e-05, "loss": 0.9514, "step": 1987 }, { "epoch": 0.3245581812987225, "grad_norm": 1.880192518234253, "learning_rate": 1.9995085518842388e-05, "loss": 0.792, "step": 1988 }, { "epoch": 0.3247214399412269, "grad_norm": 2.197519302368164, "learning_rate": 1.9995075566282375e-05, "loss": 0.9808, "step": 1989 }, { "epoch": 0.32488469858373126, "grad_norm": 2.058189868927002, "learning_rate": 1.9995065603657317e-05, "loss": 0.9672, "step": 1990 }, { "epoch": 0.32504795722623564, "grad_norm": 2.078843832015991, "learning_rate": 1.9995055630967234e-05, "loss": 0.9064, "step": 1991 }, { "epoch": 0.3252112158687401, "grad_norm": 2.2384493350982666, "learning_rate": 1.999504564821213e-05, "loss": 0.8692, "step": 1992 }, { "epoch": 0.32537447451124446, "grad_norm": 2.507277250289917, "learning_rate": 1.9995035655392017e-05, "loss": 0.851, "step": 1993 }, { "epoch": 0.32553773315374884, "grad_norm": 2.3797905445098877, "learning_rate": 1.999502565250691e-05, "loss": 1.7626, "step": 1994 }, { "epoch": 0.3257009917962532, "grad_norm": 2.4022936820983887, "learning_rate": 1.999501563955681e-05, "loss": 0.9421, "step": 1995 }, { "epoch": 0.3258642504387576, "grad_norm": 2.4655656814575195, "learning_rate": 1.9995005616541734e-05, "loss": 0.9923, "step": 1996 }, { "epoch": 0.326027509081262, "grad_norm": 2.080310106277466, "learning_rate": 1.9994995583461692e-05, "loss": 0.9348, "step": 1997 }, { "epoch": 0.3261907677237664, "grad_norm": 2.1733334064483643, "learning_rate": 1.999498554031669e-05, "loss": 0.861, "step": 1998 }, { "epoch": 0.32635402636627076, "grad_norm": 2.284602403640747, "learning_rate": 1.999497548710674e-05, "loss": 0.7705, "step": 1999 }, { "epoch": 0.32651728500877514, "grad_norm": 2.149527072906494, "learning_rate": 1.9994965423831853e-05, "loss": 0.7754, "step": 2000 }, { "epoch": 0.3266805436512795, "grad_norm": 2.6528069972991943, "learning_rate": 1.9994955350492036e-05, "loss": 1.1085, "step": 2001 }, { "epoch": 0.3268438022937839, "grad_norm": 2.4617483615875244, "learning_rate": 1.9994945267087303e-05, "loss": 1.0819, "step": 2002 }, { "epoch": 0.32700706093628834, "grad_norm": 2.2044270038604736, "learning_rate": 1.9994935173617668e-05, "loss": 0.8818, "step": 2003 }, { "epoch": 0.3271703195787927, "grad_norm": 2.2330777645111084, "learning_rate": 1.999492507008313e-05, "loss": 1.0008, "step": 2004 }, { "epoch": 0.3273335782212971, "grad_norm": 1.9187195301055908, "learning_rate": 1.9994914956483708e-05, "loss": 0.8032, "step": 2005 }, { "epoch": 0.3274968368638015, "grad_norm": 2.287264823913574, "learning_rate": 1.9994904832819407e-05, "loss": 1.0513, "step": 2006 }, { "epoch": 0.32766009550630587, "grad_norm": 2.0997748374938965, "learning_rate": 1.9994894699090238e-05, "loss": 0.9403, "step": 2007 }, { "epoch": 0.32782335414881025, "grad_norm": 2.169922351837158, "learning_rate": 1.9994884555296216e-05, "loss": 1.0135, "step": 2008 }, { "epoch": 0.32798661279131464, "grad_norm": 1.8399162292480469, "learning_rate": 1.9994874401437346e-05, "loss": 0.9531, "step": 2009 }, { "epoch": 0.328149871433819, "grad_norm": 2.3304731845855713, "learning_rate": 1.9994864237513645e-05, "loss": 0.9157, "step": 2010 }, { "epoch": 0.3283131300763234, "grad_norm": 1.9055558443069458, "learning_rate": 1.9994854063525114e-05, "loss": 0.7696, "step": 2011 }, { "epoch": 0.3284763887188278, "grad_norm": 3.275575876235962, "learning_rate": 1.999484387947177e-05, "loss": 1.0835, "step": 2012 }, { "epoch": 0.32863964736133217, "grad_norm": 2.178969621658325, "learning_rate": 1.9994833685353616e-05, "loss": 0.8852, "step": 2013 }, { "epoch": 0.3288029060038366, "grad_norm": 2.186030149459839, "learning_rate": 1.9994823481170672e-05, "loss": 0.9727, "step": 2014 }, { "epoch": 0.328966164646341, "grad_norm": 2.189082384109497, "learning_rate": 1.999481326692294e-05, "loss": 0.9327, "step": 2015 }, { "epoch": 0.32912942328884537, "grad_norm": 2.274480104446411, "learning_rate": 1.9994803042610435e-05, "loss": 1.1283, "step": 2016 }, { "epoch": 0.32929268193134975, "grad_norm": 2.0124118328094482, "learning_rate": 1.999479280823317e-05, "loss": 0.8464, "step": 2017 }, { "epoch": 0.32945594057385413, "grad_norm": 2.2133171558380127, "learning_rate": 1.9994782563791145e-05, "loss": 0.9932, "step": 2018 }, { "epoch": 0.3296191992163585, "grad_norm": 2.128962516784668, "learning_rate": 1.999477230928438e-05, "loss": 1.0498, "step": 2019 }, { "epoch": 0.3297824578588629, "grad_norm": 1.8741042613983154, "learning_rate": 1.999476204471288e-05, "loss": 0.9217, "step": 2020 }, { "epoch": 0.3299457165013673, "grad_norm": 2.2913742065429688, "learning_rate": 1.999475177007666e-05, "loss": 1.0893, "step": 2021 }, { "epoch": 0.33010897514387166, "grad_norm": 2.260071039199829, "learning_rate": 1.9994741485375722e-05, "loss": 0.975, "step": 2022 }, { "epoch": 0.33027223378637605, "grad_norm": 1.8261796236038208, "learning_rate": 1.999473119061009e-05, "loss": 0.8696, "step": 2023 }, { "epoch": 0.33043549242888043, "grad_norm": 1.9671235084533691, "learning_rate": 1.9994720885779763e-05, "loss": 0.7925, "step": 2024 }, { "epoch": 0.33059875107138487, "grad_norm": 2.142930269241333, "learning_rate": 1.999471057088475e-05, "loss": 0.9468, "step": 2025 }, { "epoch": 0.33076200971388925, "grad_norm": 2.091610908508301, "learning_rate": 1.9994700245925073e-05, "loss": 0.8645, "step": 2026 }, { "epoch": 0.33092526835639363, "grad_norm": 2.206188917160034, "learning_rate": 1.9994689910900733e-05, "loss": 1.022, "step": 2027 }, { "epoch": 0.331088526998898, "grad_norm": 2.12250018119812, "learning_rate": 1.999467956581174e-05, "loss": 0.8605, "step": 2028 }, { "epoch": 0.3312517856414024, "grad_norm": 2.1260790824890137, "learning_rate": 1.9994669210658112e-05, "loss": 0.9439, "step": 2029 }, { "epoch": 0.3314150442839068, "grad_norm": 2.162956476211548, "learning_rate": 1.9994658845439853e-05, "loss": 0.8384, "step": 2030 }, { "epoch": 0.33157830292641116, "grad_norm": 2.5060272216796875, "learning_rate": 1.9994648470156975e-05, "loss": 0.8788, "step": 2031 }, { "epoch": 0.33174156156891554, "grad_norm": 2.014723300933838, "learning_rate": 1.9994638084809486e-05, "loss": 0.9125, "step": 2032 }, { "epoch": 0.3319048202114199, "grad_norm": 1.939516544342041, "learning_rate": 1.9994627689397402e-05, "loss": 0.7789, "step": 2033 }, { "epoch": 0.3320680788539243, "grad_norm": 2.0263335704803467, "learning_rate": 1.999461728392073e-05, "loss": 0.9457, "step": 2034 }, { "epoch": 0.3322313374964287, "grad_norm": 2.236926555633545, "learning_rate": 1.999460686837948e-05, "loss": 0.9398, "step": 2035 }, { "epoch": 0.33239459613893313, "grad_norm": 2.2217295169830322, "learning_rate": 1.9994596442773665e-05, "loss": 0.9888, "step": 2036 }, { "epoch": 0.3325578547814375, "grad_norm": 2.509582757949829, "learning_rate": 1.9994586007103295e-05, "loss": 0.9791, "step": 2037 }, { "epoch": 0.3327211134239419, "grad_norm": 2.2319276332855225, "learning_rate": 1.9994575561368377e-05, "loss": 0.9423, "step": 2038 }, { "epoch": 0.3328843720664463, "grad_norm": 2.3501384258270264, "learning_rate": 1.9994565105568926e-05, "loss": 0.9936, "step": 2039 }, { "epoch": 0.33304763070895066, "grad_norm": 2.050896406173706, "learning_rate": 1.999455463970495e-05, "loss": 0.8577, "step": 2040 }, { "epoch": 0.33321088935145504, "grad_norm": 2.292370319366455, "learning_rate": 1.999454416377646e-05, "loss": 0.9187, "step": 2041 }, { "epoch": 0.3333741479939594, "grad_norm": 2.061749219894409, "learning_rate": 1.999453367778347e-05, "loss": 0.8661, "step": 2042 }, { "epoch": 0.3335374066364638, "grad_norm": 2.3626863956451416, "learning_rate": 1.9994523181725978e-05, "loss": 0.899, "step": 2043 }, { "epoch": 0.3337006652789682, "grad_norm": 2.219214916229248, "learning_rate": 1.999451267560401e-05, "loss": 0.8948, "step": 2044 }, { "epoch": 0.33386392392147257, "grad_norm": 2.380796194076538, "learning_rate": 1.9994502159417576e-05, "loss": 0.9366, "step": 2045 }, { "epoch": 0.33402718256397695, "grad_norm": 2.1140198707580566, "learning_rate": 1.9994491633166674e-05, "loss": 0.9223, "step": 2046 }, { "epoch": 0.3341904412064814, "grad_norm": 2.3979454040527344, "learning_rate": 1.999448109685132e-05, "loss": 1.9587, "step": 2047 }, { "epoch": 0.3343536998489858, "grad_norm": 2.460454225540161, "learning_rate": 1.999447055047153e-05, "loss": 0.9642, "step": 2048 }, { "epoch": 0.33451695849149016, "grad_norm": 2.3328163623809814, "learning_rate": 1.999445999402731e-05, "loss": 0.8157, "step": 2049 }, { "epoch": 0.33468021713399454, "grad_norm": 2.3049585819244385, "learning_rate": 1.999444942751867e-05, "loss": 1.0342, "step": 2050 }, { "epoch": 0.3348434757764989, "grad_norm": 2.8461382389068604, "learning_rate": 1.9994438850945626e-05, "loss": 0.8226, "step": 2051 }, { "epoch": 0.3350067344190033, "grad_norm": 2.18105149269104, "learning_rate": 1.9994428264308177e-05, "loss": 0.9701, "step": 2052 }, { "epoch": 0.3351699930615077, "grad_norm": 2.1959421634674072, "learning_rate": 1.999441766760635e-05, "loss": 0.9388, "step": 2053 }, { "epoch": 0.33533325170401207, "grad_norm": 2.320424795150757, "learning_rate": 1.9994407060840142e-05, "loss": 0.9234, "step": 2054 }, { "epoch": 0.33549651034651645, "grad_norm": 2.2582578659057617, "learning_rate": 1.999439644400957e-05, "loss": 0.95, "step": 2055 }, { "epoch": 0.33565976898902083, "grad_norm": 2.422736883163452, "learning_rate": 1.9994385817114644e-05, "loss": 0.8733, "step": 2056 }, { "epoch": 0.3358230276315252, "grad_norm": 1.9901894330978394, "learning_rate": 1.9994375180155374e-05, "loss": 0.8736, "step": 2057 }, { "epoch": 0.33598628627402966, "grad_norm": 2.180048942565918, "learning_rate": 1.999436453313177e-05, "loss": 1.003, "step": 2058 }, { "epoch": 0.33614954491653404, "grad_norm": 1.9810211658477783, "learning_rate": 1.9994353876043844e-05, "loss": 0.7661, "step": 2059 }, { "epoch": 0.3363128035590384, "grad_norm": 2.0579397678375244, "learning_rate": 1.9994343208891607e-05, "loss": 0.9963, "step": 2060 }, { "epoch": 0.3364760622015428, "grad_norm": 2.465041160583496, "learning_rate": 1.999433253167507e-05, "loss": 1.032, "step": 2061 }, { "epoch": 0.3366393208440472, "grad_norm": 2.716782808303833, "learning_rate": 1.999432184439424e-05, "loss": 0.9079, "step": 2062 }, { "epoch": 0.33680257948655157, "grad_norm": 2.4450056552886963, "learning_rate": 1.9994311147049134e-05, "loss": 1.0619, "step": 2063 }, { "epoch": 0.33696583812905595, "grad_norm": 2.1064541339874268, "learning_rate": 1.9994300439639755e-05, "loss": 0.9634, "step": 2064 }, { "epoch": 0.33712909677156033, "grad_norm": 2.6513583660125732, "learning_rate": 1.999428972216612e-05, "loss": 0.8964, "step": 2065 }, { "epoch": 0.3372923554140647, "grad_norm": 2.3569247722625732, "learning_rate": 1.9994278994628238e-05, "loss": 0.8866, "step": 2066 }, { "epoch": 0.3374556140565691, "grad_norm": 2.380462646484375, "learning_rate": 1.999426825702612e-05, "loss": 0.9127, "step": 2067 }, { "epoch": 0.3376188726990735, "grad_norm": 2.1882009506225586, "learning_rate": 1.9994257509359775e-05, "loss": 0.8994, "step": 2068 }, { "epoch": 0.3377821313415779, "grad_norm": 2.030829429626465, "learning_rate": 1.999424675162922e-05, "loss": 0.7505, "step": 2069 }, { "epoch": 0.3379453899840823, "grad_norm": 2.229931116104126, "learning_rate": 1.9994235983834455e-05, "loss": 1.0204, "step": 2070 }, { "epoch": 0.3381086486265867, "grad_norm": 2.4455432891845703, "learning_rate": 1.9994225205975503e-05, "loss": 1.12, "step": 2071 }, { "epoch": 0.33827190726909107, "grad_norm": 2.4948627948760986, "learning_rate": 1.9994214418052363e-05, "loss": 1.0179, "step": 2072 }, { "epoch": 0.33843516591159545, "grad_norm": 2.1760003566741943, "learning_rate": 1.9994203620065055e-05, "loss": 0.8521, "step": 2073 }, { "epoch": 0.33859842455409983, "grad_norm": 2.46781325340271, "learning_rate": 1.9994192812013587e-05, "loss": 1.0272, "step": 2074 }, { "epoch": 0.3387616831966042, "grad_norm": 2.51511812210083, "learning_rate": 1.9994181993897968e-05, "loss": 0.8909, "step": 2075 }, { "epoch": 0.3389249418391086, "grad_norm": 2.062363386154175, "learning_rate": 1.999417116571821e-05, "loss": 0.8785, "step": 2076 }, { "epoch": 0.339088200481613, "grad_norm": 2.8004567623138428, "learning_rate": 1.9994160327474325e-05, "loss": 0.7429, "step": 2077 }, { "epoch": 0.33925145912411736, "grad_norm": 2.4795336723327637, "learning_rate": 1.9994149479166324e-05, "loss": 1.2717, "step": 2078 }, { "epoch": 0.33941471776662174, "grad_norm": 2.1139636039733887, "learning_rate": 1.9994138620794218e-05, "loss": 0.9475, "step": 2079 }, { "epoch": 0.3395779764091262, "grad_norm": 2.1901333332061768, "learning_rate": 1.9994127752358014e-05, "loss": 0.8075, "step": 2080 }, { "epoch": 0.33974123505163056, "grad_norm": 2.7274088859558105, "learning_rate": 1.9994116873857728e-05, "loss": 0.9243, "step": 2081 }, { "epoch": 0.33990449369413495, "grad_norm": 2.0786941051483154, "learning_rate": 1.9994105985293372e-05, "loss": 0.9116, "step": 2082 }, { "epoch": 0.34006775233663933, "grad_norm": 2.1323153972625732, "learning_rate": 1.9994095086664947e-05, "loss": 0.8995, "step": 2083 }, { "epoch": 0.3402310109791437, "grad_norm": 2.4291841983795166, "learning_rate": 1.9994084177972476e-05, "loss": 0.8695, "step": 2084 }, { "epoch": 0.3403942696216481, "grad_norm": 2.288094997406006, "learning_rate": 1.9994073259215963e-05, "loss": 0.8031, "step": 2085 }, { "epoch": 0.3405575282641525, "grad_norm": 2.4803199768066406, "learning_rate": 1.999406233039542e-05, "loss": 0.886, "step": 2086 }, { "epoch": 0.34072078690665686, "grad_norm": 2.1942403316497803, "learning_rate": 1.999405139151086e-05, "loss": 0.9558, "step": 2087 }, { "epoch": 0.34088404554916124, "grad_norm": 2.8580310344696045, "learning_rate": 1.9994040442562292e-05, "loss": 1.115, "step": 2088 }, { "epoch": 0.3410473041916656, "grad_norm": 2.238694667816162, "learning_rate": 1.9994029483549732e-05, "loss": 0.8577, "step": 2089 }, { "epoch": 0.34121056283417006, "grad_norm": 2.4264864921569824, "learning_rate": 1.9994018514473183e-05, "loss": 0.835, "step": 2090 }, { "epoch": 0.34137382147667444, "grad_norm": 2.27400279045105, "learning_rate": 1.999400753533266e-05, "loss": 0.8389, "step": 2091 }, { "epoch": 0.3415370801191788, "grad_norm": 2.367957353591919, "learning_rate": 1.9993996546128173e-05, "loss": 1.0911, "step": 2092 }, { "epoch": 0.3417003387616832, "grad_norm": 2.168483257293701, "learning_rate": 1.999398554685974e-05, "loss": 0.941, "step": 2093 }, { "epoch": 0.3418635974041876, "grad_norm": 2.2000865936279297, "learning_rate": 1.999397453752736e-05, "loss": 0.9258, "step": 2094 }, { "epoch": 0.342026856046692, "grad_norm": 2.1025407314300537, "learning_rate": 1.9993963518131054e-05, "loss": 0.8735, "step": 2095 }, { "epoch": 0.34219011468919636, "grad_norm": 2.651447296142578, "learning_rate": 1.9993952488670828e-05, "loss": 1.0375, "step": 2096 }, { "epoch": 0.34235337333170074, "grad_norm": 2.0821502208709717, "learning_rate": 1.9993941449146695e-05, "loss": 0.7812, "step": 2097 }, { "epoch": 0.3425166319742051, "grad_norm": 2.144423484802246, "learning_rate": 1.9993930399558662e-05, "loss": 1.1149, "step": 2098 }, { "epoch": 0.3426798906167095, "grad_norm": 2.2029471397399902, "learning_rate": 1.999391933990675e-05, "loss": 1.0421, "step": 2099 }, { "epoch": 0.3428431492592139, "grad_norm": 2.1124396324157715, "learning_rate": 1.999390827019096e-05, "loss": 0.9474, "step": 2100 }, { "epoch": 0.3430064079017183, "grad_norm": 1.9667856693267822, "learning_rate": 1.9993897190411306e-05, "loss": 0.9063, "step": 2101 }, { "epoch": 0.3431696665442227, "grad_norm": 2.061030387878418, "learning_rate": 1.9993886100567802e-05, "loss": 0.8996, "step": 2102 }, { "epoch": 0.3433329251867271, "grad_norm": 2.0891366004943848, "learning_rate": 1.9993875000660455e-05, "loss": 1.025, "step": 2103 }, { "epoch": 0.34349618382923147, "grad_norm": 2.3173506259918213, "learning_rate": 1.9993863890689283e-05, "loss": 0.9889, "step": 2104 }, { "epoch": 0.34365944247173585, "grad_norm": 2.34025239944458, "learning_rate": 1.9993852770654293e-05, "loss": 1.0616, "step": 2105 }, { "epoch": 0.34382270111424024, "grad_norm": 2.251068592071533, "learning_rate": 1.999384164055549e-05, "loss": 0.8137, "step": 2106 }, { "epoch": 0.3439859597567446, "grad_norm": 2.0504937171936035, "learning_rate": 1.9993830500392896e-05, "loss": 0.8871, "step": 2107 }, { "epoch": 0.344149218399249, "grad_norm": 2.12589955329895, "learning_rate": 1.9993819350166514e-05, "loss": 1.076, "step": 2108 }, { "epoch": 0.3443124770417534, "grad_norm": 2.095710515975952, "learning_rate": 1.999380818987636e-05, "loss": 0.8054, "step": 2109 }, { "epoch": 0.34447573568425777, "grad_norm": 2.30148983001709, "learning_rate": 1.9993797019522446e-05, "loss": 0.8246, "step": 2110 }, { "epoch": 0.34463899432676215, "grad_norm": 2.128077507019043, "learning_rate": 1.999378583910478e-05, "loss": 1.0352, "step": 2111 }, { "epoch": 0.3448022529692666, "grad_norm": 2.619269609451294, "learning_rate": 1.999377464862337e-05, "loss": 0.8868, "step": 2112 }, { "epoch": 0.34496551161177097, "grad_norm": 2.2907278537750244, "learning_rate": 1.9993763448078237e-05, "loss": 0.9743, "step": 2113 }, { "epoch": 0.34512877025427535, "grad_norm": 2.165529251098633, "learning_rate": 1.9993752237469387e-05, "loss": 0.8966, "step": 2114 }, { "epoch": 0.34529202889677973, "grad_norm": 1.9542646408081055, "learning_rate": 1.999374101679683e-05, "loss": 0.7128, "step": 2115 }, { "epoch": 0.3454552875392841, "grad_norm": 2.1606602668762207, "learning_rate": 1.9993729786060576e-05, "loss": 0.9415, "step": 2116 }, { "epoch": 0.3456185461817885, "grad_norm": 2.3581011295318604, "learning_rate": 1.999371854526064e-05, "loss": 0.9914, "step": 2117 }, { "epoch": 0.3457818048242929, "grad_norm": 2.1375253200531006, "learning_rate": 1.9993707294397035e-05, "loss": 0.8501, "step": 2118 }, { "epoch": 0.34594506346679726, "grad_norm": 2.8618838787078857, "learning_rate": 1.9993696033469762e-05, "loss": 0.7966, "step": 2119 }, { "epoch": 0.34610832210930165, "grad_norm": 2.259526014328003, "learning_rate": 1.9993684762478846e-05, "loss": 0.7646, "step": 2120 }, { "epoch": 0.34627158075180603, "grad_norm": 2.0475118160247803, "learning_rate": 1.9993673481424292e-05, "loss": 0.8235, "step": 2121 }, { "epoch": 0.3464348393943104, "grad_norm": 2.335235595703125, "learning_rate": 1.999366219030611e-05, "loss": 1.0237, "step": 2122 }, { "epoch": 0.34659809803681485, "grad_norm": 2.608257293701172, "learning_rate": 1.9993650889124313e-05, "loss": 1.7445, "step": 2123 }, { "epoch": 0.34676135667931923, "grad_norm": 2.4408581256866455, "learning_rate": 1.9993639577878912e-05, "loss": 0.8553, "step": 2124 }, { "epoch": 0.3469246153218236, "grad_norm": 2.3189711570739746, "learning_rate": 1.999362825656992e-05, "loss": 0.9223, "step": 2125 }, { "epoch": 0.347087873964328, "grad_norm": 2.0570454597473145, "learning_rate": 1.9993616925197346e-05, "loss": 0.775, "step": 2126 }, { "epoch": 0.3472511326068324, "grad_norm": 2.3881213665008545, "learning_rate": 1.99936055837612e-05, "loss": 0.975, "step": 2127 }, { "epoch": 0.34741439124933676, "grad_norm": 2.2285678386688232, "learning_rate": 1.99935942322615e-05, "loss": 0.7383, "step": 2128 }, { "epoch": 0.34757764989184115, "grad_norm": 1.9476563930511475, "learning_rate": 1.999358287069825e-05, "loss": 1.0025, "step": 2129 }, { "epoch": 0.3477409085343455, "grad_norm": 2.308231830596924, "learning_rate": 1.9993571499071465e-05, "loss": 0.9545, "step": 2130 }, { "epoch": 0.3479041671768499, "grad_norm": 2.1318538188934326, "learning_rate": 1.9993560117381157e-05, "loss": 0.836, "step": 2131 }, { "epoch": 0.3480674258193543, "grad_norm": 2.246293067932129, "learning_rate": 1.9993548725627338e-05, "loss": 0.8676, "step": 2132 }, { "epoch": 0.3482306844618587, "grad_norm": 2.2373719215393066, "learning_rate": 1.9993537323810015e-05, "loss": 0.9473, "step": 2133 }, { "epoch": 0.3483939431043631, "grad_norm": 2.375591993331909, "learning_rate": 1.9993525911929206e-05, "loss": 1.1422, "step": 2134 }, { "epoch": 0.3485572017468675, "grad_norm": 2.101518392562866, "learning_rate": 1.9993514489984917e-05, "loss": 0.8816, "step": 2135 }, { "epoch": 0.3487204603893719, "grad_norm": 2.192919969558716, "learning_rate": 1.999350305797716e-05, "loss": 1.017, "step": 2136 }, { "epoch": 0.34888371903187626, "grad_norm": 2.3834173679351807, "learning_rate": 1.9993491615905953e-05, "loss": 1.0242, "step": 2137 }, { "epoch": 0.34904697767438064, "grad_norm": 2.317965030670166, "learning_rate": 1.99934801637713e-05, "loss": 0.8636, "step": 2138 }, { "epoch": 0.349210236316885, "grad_norm": 2.3477907180786133, "learning_rate": 1.9993468701573213e-05, "loss": 1.6591, "step": 2139 }, { "epoch": 0.3493734949593894, "grad_norm": 2.342668294906616, "learning_rate": 1.9993457229311707e-05, "loss": 0.8015, "step": 2140 }, { "epoch": 0.3495367536018938, "grad_norm": 1.9518961906433105, "learning_rate": 1.9993445746986793e-05, "loss": 0.8932, "step": 2141 }, { "epoch": 0.3497000122443982, "grad_norm": 2.0530073642730713, "learning_rate": 1.999343425459848e-05, "loss": 0.9125, "step": 2142 }, { "epoch": 0.34986327088690256, "grad_norm": 2.084711790084839, "learning_rate": 1.9993422752146786e-05, "loss": 0.8997, "step": 2143 }, { "epoch": 0.35002652952940694, "grad_norm": 2.10321044921875, "learning_rate": 1.9993411239631713e-05, "loss": 0.8179, "step": 2144 }, { "epoch": 0.3501897881719114, "grad_norm": 2.346308708190918, "learning_rate": 1.999339971705328e-05, "loss": 0.9511, "step": 2145 }, { "epoch": 0.35035304681441576, "grad_norm": 2.47147274017334, "learning_rate": 1.9993388184411495e-05, "loss": 0.9644, "step": 2146 }, { "epoch": 0.35051630545692014, "grad_norm": 2.2305777072906494, "learning_rate": 1.999337664170637e-05, "loss": 1.0437, "step": 2147 }, { "epoch": 0.3506795640994245, "grad_norm": 2.229667901992798, "learning_rate": 1.999336508893792e-05, "loss": 0.9786, "step": 2148 }, { "epoch": 0.3508428227419289, "grad_norm": 2.1389808654785156, "learning_rate": 1.9993353526106152e-05, "loss": 0.8375, "step": 2149 }, { "epoch": 0.3510060813844333, "grad_norm": 2.15337872505188, "learning_rate": 1.999334195321108e-05, "loss": 0.9677, "step": 2150 }, { "epoch": 0.35116934002693767, "grad_norm": 2.6053457260131836, "learning_rate": 1.9993330370252713e-05, "loss": 0.8884, "step": 2151 }, { "epoch": 0.35133259866944205, "grad_norm": 2.1354944705963135, "learning_rate": 1.999331877723107e-05, "loss": 0.9774, "step": 2152 }, { "epoch": 0.35149585731194644, "grad_norm": 2.0861690044403076, "learning_rate": 1.9993307174146155e-05, "loss": 0.8699, "step": 2153 }, { "epoch": 0.3516591159544508, "grad_norm": 2.134557008743286, "learning_rate": 1.999329556099798e-05, "loss": 0.976, "step": 2154 }, { "epoch": 0.3518223745969552, "grad_norm": 1.8021939992904663, "learning_rate": 1.9993283937786562e-05, "loss": 0.7702, "step": 2155 }, { "epoch": 0.35198563323945964, "grad_norm": 2.1763951778411865, "learning_rate": 1.9993272304511913e-05, "loss": 1.1184, "step": 2156 }, { "epoch": 0.352148891881964, "grad_norm": 2.4434990882873535, "learning_rate": 1.9993260661174038e-05, "loss": 0.8916, "step": 2157 }, { "epoch": 0.3523121505244684, "grad_norm": 2.2396368980407715, "learning_rate": 1.999324900777295e-05, "loss": 0.9654, "step": 2158 }, { "epoch": 0.3524754091669728, "grad_norm": 1.6811450719833374, "learning_rate": 1.9993237344308666e-05, "loss": 0.7207, "step": 2159 }, { "epoch": 0.35263866780947717, "grad_norm": 2.126481533050537, "learning_rate": 1.9993225670781192e-05, "loss": 0.8318, "step": 2160 }, { "epoch": 0.35280192645198155, "grad_norm": 2.5312836170196533, "learning_rate": 1.9993213987190543e-05, "loss": 0.8627, "step": 2161 }, { "epoch": 0.35296518509448593, "grad_norm": 2.12343430519104, "learning_rate": 1.9993202293536733e-05, "loss": 1.0181, "step": 2162 }, { "epoch": 0.3531284437369903, "grad_norm": 2.1349616050720215, "learning_rate": 1.9993190589819768e-05, "loss": 0.8591, "step": 2163 }, { "epoch": 0.3532917023794947, "grad_norm": 2.408550977706909, "learning_rate": 1.9993178876039662e-05, "loss": 1.1706, "step": 2164 }, { "epoch": 0.3534549610219991, "grad_norm": 2.338921308517456, "learning_rate": 1.9993167152196432e-05, "loss": 0.9855, "step": 2165 }, { "epoch": 0.35361821966450346, "grad_norm": 2.0626108646392822, "learning_rate": 1.999315541829008e-05, "loss": 0.7688, "step": 2166 }, { "epoch": 0.3537814783070079, "grad_norm": 2.1387109756469727, "learning_rate": 1.9993143674320626e-05, "loss": 0.8958, "step": 2167 }, { "epoch": 0.3539447369495123, "grad_norm": 1.9962046146392822, "learning_rate": 1.9993131920288078e-05, "loss": 0.7148, "step": 2168 }, { "epoch": 0.35410799559201667, "grad_norm": 2.27956485748291, "learning_rate": 1.999312015619245e-05, "loss": 0.8794, "step": 2169 }, { "epoch": 0.35427125423452105, "grad_norm": 2.0613083839416504, "learning_rate": 1.9993108382033752e-05, "loss": 0.816, "step": 2170 }, { "epoch": 0.35443451287702543, "grad_norm": 2.4596760272979736, "learning_rate": 1.9993096597811997e-05, "loss": 0.9815, "step": 2171 }, { "epoch": 0.3545977715195298, "grad_norm": 2.0031962394714355, "learning_rate": 1.9993084803527197e-05, "loss": 0.9398, "step": 2172 }, { "epoch": 0.3547610301620342, "grad_norm": 2.244058609008789, "learning_rate": 1.999307299917936e-05, "loss": 0.9973, "step": 2173 }, { "epoch": 0.3549242888045386, "grad_norm": 2.236382007598877, "learning_rate": 1.9993061184768504e-05, "loss": 0.9558, "step": 2174 }, { "epoch": 0.35508754744704296, "grad_norm": 2.6043248176574707, "learning_rate": 1.9993049360294636e-05, "loss": 0.8723, "step": 2175 }, { "epoch": 0.35525080608954734, "grad_norm": 2.1027164459228516, "learning_rate": 1.9993037525757773e-05, "loss": 0.7825, "step": 2176 }, { "epoch": 0.3554140647320517, "grad_norm": 2.1460907459259033, "learning_rate": 1.999302568115792e-05, "loss": 0.8968, "step": 2177 }, { "epoch": 0.35557732337455616, "grad_norm": 2.357816219329834, "learning_rate": 1.9993013826495097e-05, "loss": 0.8547, "step": 2178 }, { "epoch": 0.35574058201706055, "grad_norm": 2.111359119415283, "learning_rate": 1.999300196176931e-05, "loss": 0.908, "step": 2179 }, { "epoch": 0.35590384065956493, "grad_norm": 2.334989309310913, "learning_rate": 1.999299008698057e-05, "loss": 0.987, "step": 2180 }, { "epoch": 0.3560670993020693, "grad_norm": 2.6345107555389404, "learning_rate": 1.9992978202128895e-05, "loss": 0.948, "step": 2181 }, { "epoch": 0.3562303579445737, "grad_norm": 2.2766902446746826, "learning_rate": 1.9992966307214293e-05, "loss": 1.12, "step": 2182 }, { "epoch": 0.3563936165870781, "grad_norm": 1.8305878639221191, "learning_rate": 1.9992954402236774e-05, "loss": 0.7865, "step": 2183 }, { "epoch": 0.35655687522958246, "grad_norm": 2.214966058731079, "learning_rate": 1.9992942487196358e-05, "loss": 0.7793, "step": 2184 }, { "epoch": 0.35672013387208684, "grad_norm": 2.3188652992248535, "learning_rate": 1.9992930562093048e-05, "loss": 0.8254, "step": 2185 }, { "epoch": 0.3568833925145912, "grad_norm": 2.268418788909912, "learning_rate": 1.9992918626926857e-05, "loss": 0.9278, "step": 2186 }, { "epoch": 0.3570466511570956, "grad_norm": 2.226747751235962, "learning_rate": 1.9992906681697804e-05, "loss": 0.837, "step": 2187 }, { "epoch": 0.3572099097996, "grad_norm": 1.9379140138626099, "learning_rate": 1.9992894726405894e-05, "loss": 0.7272, "step": 2188 }, { "epoch": 0.3573731684421044, "grad_norm": 2.3467555046081543, "learning_rate": 1.9992882761051145e-05, "loss": 0.9209, "step": 2189 }, { "epoch": 0.3575364270846088, "grad_norm": 2.590972423553467, "learning_rate": 1.9992870785633563e-05, "loss": 0.929, "step": 2190 }, { "epoch": 0.3576996857271132, "grad_norm": 2.357642889022827, "learning_rate": 1.999285880015316e-05, "loss": 1.6963, "step": 2191 }, { "epoch": 0.3578629443696176, "grad_norm": 2.321352243423462, "learning_rate": 1.9992846804609955e-05, "loss": 0.8093, "step": 2192 }, { "epoch": 0.35802620301212196, "grad_norm": 2.1331088542938232, "learning_rate": 1.9992834799003956e-05, "loss": 0.8995, "step": 2193 }, { "epoch": 0.35818946165462634, "grad_norm": 2.1914002895355225, "learning_rate": 1.999282278333517e-05, "loss": 0.8782, "step": 2194 }, { "epoch": 0.3583527202971307, "grad_norm": 1.9910295009613037, "learning_rate": 1.999281075760362e-05, "loss": 0.8439, "step": 2195 }, { "epoch": 0.3585159789396351, "grad_norm": 2.1690845489501953, "learning_rate": 1.999279872180931e-05, "loss": 0.8659, "step": 2196 }, { "epoch": 0.3586792375821395, "grad_norm": 2.2820091247558594, "learning_rate": 1.9992786675952256e-05, "loss": 0.993, "step": 2197 }, { "epoch": 0.35884249622464387, "grad_norm": 1.8423489332199097, "learning_rate": 1.9992774620032466e-05, "loss": 0.6868, "step": 2198 }, { "epoch": 0.35900575486714825, "grad_norm": 2.052805185317993, "learning_rate": 1.9992762554049955e-05, "loss": 0.9652, "step": 2199 }, { "epoch": 0.3591690135096527, "grad_norm": 2.368788480758667, "learning_rate": 1.999275047800474e-05, "loss": 1.0008, "step": 2200 }, { "epoch": 0.3593322721521571, "grad_norm": 1.9906386137008667, "learning_rate": 1.9992738391896818e-05, "loss": 0.9164, "step": 2201 }, { "epoch": 0.35949553079466146, "grad_norm": 2.1123273372650146, "learning_rate": 1.999272629572622e-05, "loss": 0.8087, "step": 2202 }, { "epoch": 0.35965878943716584, "grad_norm": 2.163785219192505, "learning_rate": 1.9992714189492943e-05, "loss": 0.7636, "step": 2203 }, { "epoch": 0.3598220480796702, "grad_norm": 2.182392120361328, "learning_rate": 1.999270207319701e-05, "loss": 1.0744, "step": 2204 }, { "epoch": 0.3599853067221746, "grad_norm": 2.6292316913604736, "learning_rate": 1.999268994683843e-05, "loss": 1.0012, "step": 2205 }, { "epoch": 0.360148565364679, "grad_norm": 2.496081829071045, "learning_rate": 1.9992677810417207e-05, "loss": 1.0008, "step": 2206 }, { "epoch": 0.36031182400718337, "grad_norm": 2.3611650466918945, "learning_rate": 1.9992665663933362e-05, "loss": 1.0318, "step": 2207 }, { "epoch": 0.36047508264968775, "grad_norm": 2.1103577613830566, "learning_rate": 1.999265350738691e-05, "loss": 0.7471, "step": 2208 }, { "epoch": 0.36063834129219213, "grad_norm": 2.255551338195801, "learning_rate": 1.9992641340777856e-05, "loss": 0.945, "step": 2209 }, { "epoch": 0.3608015999346965, "grad_norm": 2.042912721633911, "learning_rate": 1.999262916410621e-05, "loss": 0.8392, "step": 2210 }, { "epoch": 0.36096485857720095, "grad_norm": 2.2842702865600586, "learning_rate": 1.9992616977371998e-05, "loss": 0.8474, "step": 2211 }, { "epoch": 0.36112811721970534, "grad_norm": 2.192051410675049, "learning_rate": 1.9992604780575216e-05, "loss": 1.0306, "step": 2212 }, { "epoch": 0.3612913758622097, "grad_norm": 2.4476466178894043, "learning_rate": 1.999259257371589e-05, "loss": 0.9405, "step": 2213 }, { "epoch": 0.3614546345047141, "grad_norm": 2.293199062347412, "learning_rate": 1.999258035679402e-05, "loss": 0.8404, "step": 2214 }, { "epoch": 0.3616178931472185, "grad_norm": 2.5179603099823, "learning_rate": 1.9992568129809627e-05, "loss": 1.0047, "step": 2215 }, { "epoch": 0.36178115178972287, "grad_norm": 2.0449423789978027, "learning_rate": 1.999255589276272e-05, "loss": 0.788, "step": 2216 }, { "epoch": 0.36194441043222725, "grad_norm": 2.334240198135376, "learning_rate": 1.9992543645653313e-05, "loss": 0.8862, "step": 2217 }, { "epoch": 0.36210766907473163, "grad_norm": 2.43619704246521, "learning_rate": 1.9992531388481416e-05, "loss": 0.9423, "step": 2218 }, { "epoch": 0.362270927717236, "grad_norm": 2.5858755111694336, "learning_rate": 1.9992519121247045e-05, "loss": 1.0318, "step": 2219 }, { "epoch": 0.3624341863597404, "grad_norm": 2.763028621673584, "learning_rate": 1.9992506843950207e-05, "loss": 1.0611, "step": 2220 }, { "epoch": 0.36259744500224483, "grad_norm": 2.262590169906616, "learning_rate": 1.999249455659092e-05, "loss": 0.9048, "step": 2221 }, { "epoch": 0.3627607036447492, "grad_norm": 1.978287935256958, "learning_rate": 1.9992482259169188e-05, "loss": 0.9646, "step": 2222 }, { "epoch": 0.3629239622872536, "grad_norm": 2.0319831371307373, "learning_rate": 1.9992469951685034e-05, "loss": 0.8185, "step": 2223 }, { "epoch": 0.363087220929758, "grad_norm": 2.1010732650756836, "learning_rate": 1.9992457634138464e-05, "loss": 0.8115, "step": 2224 }, { "epoch": 0.36325047957226236, "grad_norm": 1.9776264429092407, "learning_rate": 1.9992445306529492e-05, "loss": 0.932, "step": 2225 }, { "epoch": 0.36341373821476675, "grad_norm": 2.70733642578125, "learning_rate": 1.9992432968858128e-05, "loss": 1.1287, "step": 2226 }, { "epoch": 0.36357699685727113, "grad_norm": 2.383721351623535, "learning_rate": 1.999242062112439e-05, "loss": 0.9074, "step": 2227 }, { "epoch": 0.3637402554997755, "grad_norm": 2.4274322986602783, "learning_rate": 1.9992408263328286e-05, "loss": 0.9591, "step": 2228 }, { "epoch": 0.3639035141422799, "grad_norm": 2.2306063175201416, "learning_rate": 1.999239589546983e-05, "loss": 0.865, "step": 2229 }, { "epoch": 0.3640667727847843, "grad_norm": 2.1650025844573975, "learning_rate": 1.9992383517549032e-05, "loss": 1.007, "step": 2230 }, { "epoch": 0.36423003142728866, "grad_norm": 2.162447214126587, "learning_rate": 1.999237112956591e-05, "loss": 0.9327, "step": 2231 }, { "epoch": 0.3643932900697931, "grad_norm": 2.4145126342773438, "learning_rate": 1.999235873152047e-05, "loss": 0.9317, "step": 2232 }, { "epoch": 0.3645565487122975, "grad_norm": 2.1048343181610107, "learning_rate": 1.9992346323412728e-05, "loss": 0.9958, "step": 2233 }, { "epoch": 0.36471980735480186, "grad_norm": 1.872110366821289, "learning_rate": 1.9992333905242697e-05, "loss": 1.0119, "step": 2234 }, { "epoch": 0.36488306599730624, "grad_norm": 1.938316822052002, "learning_rate": 1.9992321477010388e-05, "loss": 0.8157, "step": 2235 }, { "epoch": 0.3650463246398106, "grad_norm": 2.1497247219085693, "learning_rate": 1.9992309038715812e-05, "loss": 0.999, "step": 2236 }, { "epoch": 0.365209583282315, "grad_norm": 1.9800329208374023, "learning_rate": 1.9992296590358987e-05, "loss": 0.9124, "step": 2237 }, { "epoch": 0.3653728419248194, "grad_norm": 2.2014238834381104, "learning_rate": 1.9992284131939918e-05, "loss": 0.8728, "step": 2238 }, { "epoch": 0.3655361005673238, "grad_norm": 2.6112349033355713, "learning_rate": 1.9992271663458623e-05, "loss": 0.7642, "step": 2239 }, { "epoch": 0.36569935920982816, "grad_norm": 1.9591474533081055, "learning_rate": 1.9992259184915115e-05, "loss": 0.8695, "step": 2240 }, { "epoch": 0.36586261785233254, "grad_norm": 2.0417397022247314, "learning_rate": 1.99922466963094e-05, "loss": 0.7663, "step": 2241 }, { "epoch": 0.3660258764948369, "grad_norm": 2.4605541229248047, "learning_rate": 1.99922341976415e-05, "loss": 1.0054, "step": 2242 }, { "epoch": 0.36618913513734136, "grad_norm": 2.291613817214966, "learning_rate": 1.999222168891142e-05, "loss": 0.8368, "step": 2243 }, { "epoch": 0.36635239377984574, "grad_norm": 2.2952418327331543, "learning_rate": 1.9992209170119177e-05, "loss": 1.0905, "step": 2244 }, { "epoch": 0.3665156524223501, "grad_norm": 2.679644823074341, "learning_rate": 1.9992196641264782e-05, "loss": 1.0988, "step": 2245 }, { "epoch": 0.3666789110648545, "grad_norm": 2.282463788986206, "learning_rate": 1.999218410234825e-05, "loss": 0.9432, "step": 2246 }, { "epoch": 0.3668421697073589, "grad_norm": 2.69142484664917, "learning_rate": 1.9992171553369584e-05, "loss": 0.9991, "step": 2247 }, { "epoch": 0.36700542834986327, "grad_norm": 2.083199977874756, "learning_rate": 1.9992158994328812e-05, "loss": 0.7862, "step": 2248 }, { "epoch": 0.36716868699236765, "grad_norm": 2.3319509029388428, "learning_rate": 1.9992146425225932e-05, "loss": 0.9877, "step": 2249 }, { "epoch": 0.36733194563487204, "grad_norm": 1.9969801902770996, "learning_rate": 1.999213384606097e-05, "loss": 0.8515, "step": 2250 }, { "epoch": 0.3674952042773764, "grad_norm": 2.3730595111846924, "learning_rate": 1.9992121256833927e-05, "loss": 0.9218, "step": 2251 }, { "epoch": 0.3676584629198808, "grad_norm": 2.5518863201141357, "learning_rate": 1.9992108657544823e-05, "loss": 1.0718, "step": 2252 }, { "epoch": 0.3678217215623852, "grad_norm": 1.8573054075241089, "learning_rate": 1.9992096048193664e-05, "loss": 0.5913, "step": 2253 }, { "epoch": 0.3679849802048896, "grad_norm": 2.7444937229156494, "learning_rate": 1.999208342878047e-05, "loss": 0.9895, "step": 2254 }, { "epoch": 0.368148238847394, "grad_norm": 2.201964855194092, "learning_rate": 1.999207079930525e-05, "loss": 0.8339, "step": 2255 }, { "epoch": 0.3683114974898984, "grad_norm": 2.202101230621338, "learning_rate": 1.9992058159768018e-05, "loss": 0.8526, "step": 2256 }, { "epoch": 0.36847475613240277, "grad_norm": 2.18135929107666, "learning_rate": 1.999204551016879e-05, "loss": 0.9135, "step": 2257 }, { "epoch": 0.36863801477490715, "grad_norm": 2.245607852935791, "learning_rate": 1.9992032850507566e-05, "loss": 0.8863, "step": 2258 }, { "epoch": 0.36880127341741153, "grad_norm": 2.2053489685058594, "learning_rate": 1.9992020180784372e-05, "loss": 1.71, "step": 2259 }, { "epoch": 0.3689645320599159, "grad_norm": 2.4611237049102783, "learning_rate": 1.9992007500999216e-05, "loss": 0.7997, "step": 2260 }, { "epoch": 0.3691277907024203, "grad_norm": 2.557905673980713, "learning_rate": 1.9991994811152115e-05, "loss": 0.8783, "step": 2261 }, { "epoch": 0.3692910493449247, "grad_norm": 2.4489026069641113, "learning_rate": 1.9991982111243075e-05, "loss": 1.0168, "step": 2262 }, { "epoch": 0.36945430798742906, "grad_norm": 1.969509243965149, "learning_rate": 1.9991969401272107e-05, "loss": 0.9258, "step": 2263 }, { "epoch": 0.36961756662993345, "grad_norm": 2.2505760192871094, "learning_rate": 1.9991956681239232e-05, "loss": 0.7833, "step": 2264 }, { "epoch": 0.3697808252724379, "grad_norm": 2.386566400527954, "learning_rate": 1.9991943951144462e-05, "loss": 0.9975, "step": 2265 }, { "epoch": 0.36994408391494227, "grad_norm": 2.1028547286987305, "learning_rate": 1.9991931210987805e-05, "loss": 0.9321, "step": 2266 }, { "epoch": 0.37010734255744665, "grad_norm": 2.2028493881225586, "learning_rate": 1.9991918460769274e-05, "loss": 0.9036, "step": 2267 }, { "epoch": 0.37027060119995103, "grad_norm": 2.378711700439453, "learning_rate": 1.9991905700488886e-05, "loss": 1.0446, "step": 2268 }, { "epoch": 0.3704338598424554, "grad_norm": 2.0718767642974854, "learning_rate": 1.999189293014665e-05, "loss": 0.8442, "step": 2269 }, { "epoch": 0.3705971184849598, "grad_norm": 1.9486868381500244, "learning_rate": 1.9991880149742583e-05, "loss": 0.868, "step": 2270 }, { "epoch": 0.3707603771274642, "grad_norm": 2.341618061065674, "learning_rate": 1.9991867359276696e-05, "loss": 0.6921, "step": 2271 }, { "epoch": 0.37092363576996856, "grad_norm": 1.854557991027832, "learning_rate": 1.9991854558749e-05, "loss": 0.7725, "step": 2272 }, { "epoch": 0.37108689441247295, "grad_norm": 2.4644439220428467, "learning_rate": 1.9991841748159506e-05, "loss": 0.9248, "step": 2273 }, { "epoch": 0.3712501530549773, "grad_norm": 2.175028085708618, "learning_rate": 1.9991828927508234e-05, "loss": 0.8805, "step": 2274 }, { "epoch": 0.3714134116974817, "grad_norm": 2.0989503860473633, "learning_rate": 1.9991816096795193e-05, "loss": 0.914, "step": 2275 }, { "epoch": 0.37157667033998615, "grad_norm": 2.0568270683288574, "learning_rate": 1.9991803256020393e-05, "loss": 0.7703, "step": 2276 }, { "epoch": 0.37173992898249053, "grad_norm": 1.9770033359527588, "learning_rate": 1.999179040518385e-05, "loss": 0.7251, "step": 2277 }, { "epoch": 0.3719031876249949, "grad_norm": 2.2503979206085205, "learning_rate": 1.9991777544285575e-05, "loss": 0.9457, "step": 2278 }, { "epoch": 0.3720664462674993, "grad_norm": 2.0288405418395996, "learning_rate": 1.9991764673325587e-05, "loss": 0.8405, "step": 2279 }, { "epoch": 0.3722297049100037, "grad_norm": 2.3824329376220703, "learning_rate": 1.9991751792303892e-05, "loss": 0.9277, "step": 2280 }, { "epoch": 0.37239296355250806, "grad_norm": 2.0862839221954346, "learning_rate": 1.999173890122051e-05, "loss": 0.9338, "step": 2281 }, { "epoch": 0.37255622219501244, "grad_norm": 2.0769782066345215, "learning_rate": 1.9991726000075445e-05, "loss": 0.9175, "step": 2282 }, { "epoch": 0.3727194808375168, "grad_norm": 2.2393691539764404, "learning_rate": 1.9991713088868714e-05, "loss": 0.8211, "step": 2283 }, { "epoch": 0.3728827394800212, "grad_norm": 2.3423573970794678, "learning_rate": 1.9991700167600333e-05, "loss": 1.115, "step": 2284 }, { "epoch": 0.3730459981225256, "grad_norm": 2.3422374725341797, "learning_rate": 1.999168723627031e-05, "loss": 1.0062, "step": 2285 }, { "epoch": 0.37320925676503, "grad_norm": 2.2843518257141113, "learning_rate": 1.9991674294878663e-05, "loss": 0.9245, "step": 2286 }, { "epoch": 0.3733725154075344, "grad_norm": 2.6438379287719727, "learning_rate": 1.9991661343425402e-05, "loss": 0.8941, "step": 2287 }, { "epoch": 0.3735357740500388, "grad_norm": 2.2897679805755615, "learning_rate": 1.999164838191054e-05, "loss": 0.9905, "step": 2288 }, { "epoch": 0.3736990326925432, "grad_norm": 2.055382251739502, "learning_rate": 1.9991635410334092e-05, "loss": 1.0715, "step": 2289 }, { "epoch": 0.37386229133504756, "grad_norm": 2.235107660293579, "learning_rate": 1.999162242869607e-05, "loss": 0.8594, "step": 2290 }, { "epoch": 0.37402554997755194, "grad_norm": 2.1087429523468018, "learning_rate": 1.9991609436996486e-05, "loss": 0.8611, "step": 2291 }, { "epoch": 0.3741888086200563, "grad_norm": 2.0639665126800537, "learning_rate": 1.9991596435235353e-05, "loss": 0.8815, "step": 2292 }, { "epoch": 0.3743520672625607, "grad_norm": 2.148899793624878, "learning_rate": 1.999158342341269e-05, "loss": 1.0174, "step": 2293 }, { "epoch": 0.3745153259050651, "grad_norm": 2.390429973602295, "learning_rate": 1.99915704015285e-05, "loss": 1.1164, "step": 2294 }, { "epoch": 0.37467858454756947, "grad_norm": 2.3499748706817627, "learning_rate": 1.9991557369582802e-05, "loss": 0.9772, "step": 2295 }, { "epoch": 0.37484184319007385, "grad_norm": 1.977137565612793, "learning_rate": 1.999154432757561e-05, "loss": 0.9271, "step": 2296 }, { "epoch": 0.37500510183257824, "grad_norm": 2.1660349369049072, "learning_rate": 1.9991531275506934e-05, "loss": 0.8688, "step": 2297 }, { "epoch": 0.3751683604750827, "grad_norm": 1.947456955909729, "learning_rate": 1.9991518213376787e-05, "loss": 0.8144, "step": 2298 }, { "epoch": 0.37533161911758706, "grad_norm": 2.258554697036743, "learning_rate": 1.9991505141185187e-05, "loss": 0.7922, "step": 2299 }, { "epoch": 0.37549487776009144, "grad_norm": 2.0119235515594482, "learning_rate": 1.9991492058932143e-05, "loss": 1.0209, "step": 2300 }, { "epoch": 0.3756581364025958, "grad_norm": 1.927079200744629, "learning_rate": 1.999147896661767e-05, "loss": 0.8222, "step": 2301 }, { "epoch": 0.3758213950451002, "grad_norm": 1.9848893880844116, "learning_rate": 1.9991465864241778e-05, "loss": 0.8582, "step": 2302 }, { "epoch": 0.3759846536876046, "grad_norm": 2.156874895095825, "learning_rate": 1.9991452751804484e-05, "loss": 0.8524, "step": 2303 }, { "epoch": 0.37614791233010897, "grad_norm": 2.1641054153442383, "learning_rate": 1.99914396293058e-05, "loss": 1.0119, "step": 2304 }, { "epoch": 0.37631117097261335, "grad_norm": 2.165212631225586, "learning_rate": 1.999142649674574e-05, "loss": 0.8027, "step": 2305 }, { "epoch": 0.37647442961511773, "grad_norm": 2.553990364074707, "learning_rate": 1.999141335412431e-05, "loss": 1.1193, "step": 2306 }, { "epoch": 0.3766376882576221, "grad_norm": 2.1941773891448975, "learning_rate": 1.9991400201441538e-05, "loss": 0.96, "step": 2307 }, { "epoch": 0.3768009469001265, "grad_norm": 2.500070095062256, "learning_rate": 1.9991387038697423e-05, "loss": 0.9214, "step": 2308 }, { "epoch": 0.37696420554263094, "grad_norm": 2.172396659851074, "learning_rate": 1.9991373865891986e-05, "loss": 0.8634, "step": 2309 }, { "epoch": 0.3771274641851353, "grad_norm": 2.038590431213379, "learning_rate": 1.9991360683025238e-05, "loss": 0.7695, "step": 2310 }, { "epoch": 0.3772907228276397, "grad_norm": 1.849043607711792, "learning_rate": 1.999134749009719e-05, "loss": 0.7995, "step": 2311 }, { "epoch": 0.3774539814701441, "grad_norm": 2.282470464706421, "learning_rate": 1.999133428710786e-05, "loss": 0.9761, "step": 2312 }, { "epoch": 0.37761724011264847, "grad_norm": 2.2381653785705566, "learning_rate": 1.9991321074057263e-05, "loss": 0.9223, "step": 2313 }, { "epoch": 0.37778049875515285, "grad_norm": 2.0038673877716064, "learning_rate": 1.99913078509454e-05, "loss": 0.9387, "step": 2314 }, { "epoch": 0.37794375739765723, "grad_norm": 2.218248128890991, "learning_rate": 1.99912946177723e-05, "loss": 0.9657, "step": 2315 }, { "epoch": 0.3781070160401616, "grad_norm": 2.7542827129364014, "learning_rate": 1.9991281374537967e-05, "loss": 0.8709, "step": 2316 }, { "epoch": 0.378270274682666, "grad_norm": 2.073265790939331, "learning_rate": 1.9991268121242414e-05, "loss": 0.8432, "step": 2317 }, { "epoch": 0.3784335333251704, "grad_norm": 2.5492687225341797, "learning_rate": 1.999125485788566e-05, "loss": 0.8065, "step": 2318 }, { "epoch": 0.37859679196767476, "grad_norm": 2.3170039653778076, "learning_rate": 1.999124158446771e-05, "loss": 0.8277, "step": 2319 }, { "epoch": 0.3787600506101792, "grad_norm": 2.0927159786224365, "learning_rate": 1.9991228300988586e-05, "loss": 0.7591, "step": 2320 }, { "epoch": 0.3789233092526836, "grad_norm": 2.2113091945648193, "learning_rate": 1.9991215007448293e-05, "loss": 0.932, "step": 2321 }, { "epoch": 0.37908656789518796, "grad_norm": 1.8843798637390137, "learning_rate": 1.9991201703846858e-05, "loss": 0.7216, "step": 2322 }, { "epoch": 0.37924982653769235, "grad_norm": 2.3609485626220703, "learning_rate": 1.999118839018428e-05, "loss": 0.9213, "step": 2323 }, { "epoch": 0.37941308518019673, "grad_norm": 2.308814525604248, "learning_rate": 1.9991175066460578e-05, "loss": 0.8812, "step": 2324 }, { "epoch": 0.3795763438227011, "grad_norm": 2.204939842224121, "learning_rate": 1.9991161732675767e-05, "loss": 0.9319, "step": 2325 }, { "epoch": 0.3797396024652055, "grad_norm": 2.607072591781616, "learning_rate": 1.9991148388829855e-05, "loss": 0.9323, "step": 2326 }, { "epoch": 0.3799028611077099, "grad_norm": 2.0990898609161377, "learning_rate": 1.9991135034922865e-05, "loss": 0.9018, "step": 2327 }, { "epoch": 0.38006611975021426, "grad_norm": 2.0443265438079834, "learning_rate": 1.99911216709548e-05, "loss": 0.9103, "step": 2328 }, { "epoch": 0.38022937839271864, "grad_norm": 2.032975196838379, "learning_rate": 1.999110829692568e-05, "loss": 0.9044, "step": 2329 }, { "epoch": 0.380392637035223, "grad_norm": 2.20564603805542, "learning_rate": 1.9991094912835514e-05, "loss": 0.7813, "step": 2330 }, { "epoch": 0.38055589567772746, "grad_norm": 2.3233721256256104, "learning_rate": 1.9991081518684322e-05, "loss": 1.0568, "step": 2331 }, { "epoch": 0.38071915432023185, "grad_norm": 2.0378851890563965, "learning_rate": 1.9991068114472113e-05, "loss": 0.949, "step": 2332 }, { "epoch": 0.3808824129627362, "grad_norm": 2.1214215755462646, "learning_rate": 1.9991054700198898e-05, "loss": 0.8867, "step": 2333 }, { "epoch": 0.3810456716052406, "grad_norm": 2.3817129135131836, "learning_rate": 1.9991041275864697e-05, "loss": 1.0808, "step": 2334 }, { "epoch": 0.381208930247745, "grad_norm": 2.0851662158966064, "learning_rate": 1.999102784146952e-05, "loss": 0.8811, "step": 2335 }, { "epoch": 0.3813721888902494, "grad_norm": 2.3774023056030273, "learning_rate": 1.999101439701338e-05, "loss": 1.2085, "step": 2336 }, { "epoch": 0.38153544753275376, "grad_norm": 2.5050346851348877, "learning_rate": 1.999100094249629e-05, "loss": 0.9142, "step": 2337 }, { "epoch": 0.38169870617525814, "grad_norm": 2.126155376434326, "learning_rate": 1.9990987477918266e-05, "loss": 1.03, "step": 2338 }, { "epoch": 0.3818619648177625, "grad_norm": 2.551145315170288, "learning_rate": 1.9990974003279316e-05, "loss": 1.1277, "step": 2339 }, { "epoch": 0.3820252234602669, "grad_norm": 2.352571487426758, "learning_rate": 1.9990960518579462e-05, "loss": 0.8142, "step": 2340 }, { "epoch": 0.38218848210277134, "grad_norm": 2.0983633995056152, "learning_rate": 1.9990947023818713e-05, "loss": 0.95, "step": 2341 }, { "epoch": 0.3823517407452757, "grad_norm": 2.0947790145874023, "learning_rate": 1.9990933518997086e-05, "loss": 0.9835, "step": 2342 }, { "epoch": 0.3825149993877801, "grad_norm": 2.527278184890747, "learning_rate": 1.9990920004114588e-05, "loss": 1.2071, "step": 2343 }, { "epoch": 0.3826782580302845, "grad_norm": 2.229099988937378, "learning_rate": 1.9990906479171236e-05, "loss": 0.8382, "step": 2344 }, { "epoch": 0.3828415166727889, "grad_norm": 1.9527391195297241, "learning_rate": 1.9990892944167044e-05, "loss": 0.9016, "step": 2345 }, { "epoch": 0.38300477531529326, "grad_norm": 1.9701675176620483, "learning_rate": 1.9990879399102024e-05, "loss": 0.7919, "step": 2346 }, { "epoch": 0.38316803395779764, "grad_norm": 2.1456830501556396, "learning_rate": 1.9990865843976195e-05, "loss": 0.7835, "step": 2347 }, { "epoch": 0.383331292600302, "grad_norm": 2.3690779209136963, "learning_rate": 1.9990852278789562e-05, "loss": 0.8976, "step": 2348 }, { "epoch": 0.3834945512428064, "grad_norm": 2.124682903289795, "learning_rate": 1.9990838703542146e-05, "loss": 0.8693, "step": 2349 }, { "epoch": 0.3836578098853108, "grad_norm": 1.9659172296524048, "learning_rate": 1.9990825118233958e-05, "loss": 0.7612, "step": 2350 }, { "epoch": 0.38382106852781517, "grad_norm": 2.310131072998047, "learning_rate": 1.9990811522865014e-05, "loss": 1.0597, "step": 2351 }, { "epoch": 0.3839843271703196, "grad_norm": 2.1698062419891357, "learning_rate": 1.9990797917435324e-05, "loss": 0.8984, "step": 2352 }, { "epoch": 0.384147585812824, "grad_norm": 2.0753705501556396, "learning_rate": 1.9990784301944902e-05, "loss": 0.739, "step": 2353 }, { "epoch": 0.38431084445532837, "grad_norm": 2.3093955516815186, "learning_rate": 1.9990770676393762e-05, "loss": 0.9565, "step": 2354 }, { "epoch": 0.38447410309783275, "grad_norm": 2.806009292602539, "learning_rate": 1.999075704078192e-05, "loss": 1.1218, "step": 2355 }, { "epoch": 0.38463736174033714, "grad_norm": 2.46855092048645, "learning_rate": 1.999074339510939e-05, "loss": 1.0959, "step": 2356 }, { "epoch": 0.3848006203828415, "grad_norm": 2.4953761100769043, "learning_rate": 1.999072973937618e-05, "loss": 1.0218, "step": 2357 }, { "epoch": 0.3849638790253459, "grad_norm": 2.846637725830078, "learning_rate": 1.999071607358231e-05, "loss": 1.0803, "step": 2358 }, { "epoch": 0.3851271376678503, "grad_norm": 2.1669204235076904, "learning_rate": 1.9990702397727794e-05, "loss": 0.9435, "step": 2359 }, { "epoch": 0.38529039631035467, "grad_norm": 2.246234893798828, "learning_rate": 1.999068871181264e-05, "loss": 1.0379, "step": 2360 }, { "epoch": 0.38545365495285905, "grad_norm": 1.9008138179779053, "learning_rate": 1.9990675015836863e-05, "loss": 0.873, "step": 2361 }, { "epoch": 0.38561691359536343, "grad_norm": 2.467764377593994, "learning_rate": 1.9990661309800483e-05, "loss": 1.1829, "step": 2362 }, { "epoch": 0.38578017223786787, "grad_norm": 2.077467441558838, "learning_rate": 1.999064759370351e-05, "loss": 0.9456, "step": 2363 }, { "epoch": 0.38594343088037225, "grad_norm": 2.39619779586792, "learning_rate": 1.9990633867545956e-05, "loss": 1.031, "step": 2364 }, { "epoch": 0.38610668952287663, "grad_norm": 2.1364123821258545, "learning_rate": 1.9990620131327836e-05, "loss": 1.1695, "step": 2365 }, { "epoch": 0.386269948165381, "grad_norm": 2.1466245651245117, "learning_rate": 1.9990606385049165e-05, "loss": 0.9586, "step": 2366 }, { "epoch": 0.3864332068078854, "grad_norm": 2.5207529067993164, "learning_rate": 1.9990592628709957e-05, "loss": 1.0535, "step": 2367 }, { "epoch": 0.3865964654503898, "grad_norm": 2.0793449878692627, "learning_rate": 1.999057886231022e-05, "loss": 0.8829, "step": 2368 }, { "epoch": 0.38675972409289416, "grad_norm": 2.1145431995391846, "learning_rate": 1.9990565085849976e-05, "loss": 0.8804, "step": 2369 }, { "epoch": 0.38692298273539855, "grad_norm": 2.1253602504730225, "learning_rate": 1.999055129932924e-05, "loss": 1.0116, "step": 2370 }, { "epoch": 0.38708624137790293, "grad_norm": 2.194624662399292, "learning_rate": 1.9990537502748016e-05, "loss": 0.9817, "step": 2371 }, { "epoch": 0.3872495000204073, "grad_norm": 2.5927858352661133, "learning_rate": 1.9990523696106327e-05, "loss": 0.8736, "step": 2372 }, { "epoch": 0.3874127586629117, "grad_norm": 2.4821815490722656, "learning_rate": 1.999050987940418e-05, "loss": 1.0939, "step": 2373 }, { "epoch": 0.38757601730541613, "grad_norm": 2.0023574829101562, "learning_rate": 1.9990496052641594e-05, "loss": 0.7837, "step": 2374 }, { "epoch": 0.3877392759479205, "grad_norm": 2.0740838050842285, "learning_rate": 1.999048221581858e-05, "loss": 0.8384, "step": 2375 }, { "epoch": 0.3879025345904249, "grad_norm": 2.616694450378418, "learning_rate": 1.9990468368935155e-05, "loss": 0.9406, "step": 2376 }, { "epoch": 0.3880657932329293, "grad_norm": 2.4535982608795166, "learning_rate": 1.9990454511991327e-05, "loss": 1.0165, "step": 2377 }, { "epoch": 0.38822905187543366, "grad_norm": 2.466538906097412, "learning_rate": 1.9990440644987116e-05, "loss": 1.1825, "step": 2378 }, { "epoch": 0.38839231051793804, "grad_norm": 2.030176877975464, "learning_rate": 1.9990426767922535e-05, "loss": 0.9254, "step": 2379 }, { "epoch": 0.3885555691604424, "grad_norm": 1.867173194885254, "learning_rate": 1.9990412880797595e-05, "loss": 0.8518, "step": 2380 }, { "epoch": 0.3887188278029468, "grad_norm": 2.000919818878174, "learning_rate": 1.9990398983612316e-05, "loss": 0.7092, "step": 2381 }, { "epoch": 0.3888820864454512, "grad_norm": 1.9182844161987305, "learning_rate": 1.99903850763667e-05, "loss": 0.898, "step": 2382 }, { "epoch": 0.3890453450879556, "grad_norm": 2.3210976123809814, "learning_rate": 1.9990371159060778e-05, "loss": 1.1047, "step": 2383 }, { "epoch": 0.38920860373045996, "grad_norm": 2.0933878421783447, "learning_rate": 1.999035723169455e-05, "loss": 0.9968, "step": 2384 }, { "epoch": 0.3893718623729644, "grad_norm": 2.1206212043762207, "learning_rate": 1.9990343294268036e-05, "loss": 1.0121, "step": 2385 }, { "epoch": 0.3895351210154688, "grad_norm": 1.999044418334961, "learning_rate": 1.999032934678125e-05, "loss": 0.8421, "step": 2386 }, { "epoch": 0.38969837965797316, "grad_norm": 1.8623462915420532, "learning_rate": 1.9990315389234203e-05, "loss": 0.7388, "step": 2387 }, { "epoch": 0.38986163830047754, "grad_norm": 2.1583359241485596, "learning_rate": 1.999030142162691e-05, "loss": 0.8295, "step": 2388 }, { "epoch": 0.3900248969429819, "grad_norm": 2.46870756149292, "learning_rate": 1.999028744395939e-05, "loss": 0.9973, "step": 2389 }, { "epoch": 0.3901881555854863, "grad_norm": 2.453171730041504, "learning_rate": 1.999027345623165e-05, "loss": 0.8719, "step": 2390 }, { "epoch": 0.3903514142279907, "grad_norm": 2.5562548637390137, "learning_rate": 1.999025945844371e-05, "loss": 1.0458, "step": 2391 }, { "epoch": 0.39051467287049507, "grad_norm": 2.0096516609191895, "learning_rate": 1.999024545059558e-05, "loss": 0.9288, "step": 2392 }, { "epoch": 0.39067793151299945, "grad_norm": 2.213114023208618, "learning_rate": 1.9990231432687274e-05, "loss": 0.8927, "step": 2393 }, { "epoch": 0.39084119015550384, "grad_norm": 2.1281092166900635, "learning_rate": 1.9990217404718807e-05, "loss": 0.7749, "step": 2394 }, { "epoch": 0.3910044487980082, "grad_norm": 2.512016773223877, "learning_rate": 1.9990203366690197e-05, "loss": 0.853, "step": 2395 }, { "epoch": 0.39116770744051266, "grad_norm": 2.199647903442383, "learning_rate": 1.9990189318601452e-05, "loss": 0.9074, "step": 2396 }, { "epoch": 0.39133096608301704, "grad_norm": 2.2656641006469727, "learning_rate": 1.999017526045259e-05, "loss": 0.9226, "step": 2397 }, { "epoch": 0.3914942247255214, "grad_norm": 2.167797565460205, "learning_rate": 1.9990161192243628e-05, "loss": 0.8112, "step": 2398 }, { "epoch": 0.3916574833680258, "grad_norm": 1.9817966222763062, "learning_rate": 1.999014711397457e-05, "loss": 0.7674, "step": 2399 }, { "epoch": 0.3918207420105302, "grad_norm": 2.379504919052124, "learning_rate": 1.999013302564544e-05, "loss": 0.7849, "step": 2400 }, { "epoch": 0.39198400065303457, "grad_norm": 2.1934640407562256, "learning_rate": 1.9990118927256247e-05, "loss": 1.0028, "step": 2401 }, { "epoch": 0.39214725929553895, "grad_norm": 2.2847135066986084, "learning_rate": 1.999010481880701e-05, "loss": 0.8633, "step": 2402 }, { "epoch": 0.39231051793804333, "grad_norm": 2.3834688663482666, "learning_rate": 1.9990090700297736e-05, "loss": 0.8161, "step": 2403 }, { "epoch": 0.3924737765805477, "grad_norm": 2.2691235542297363, "learning_rate": 1.9990076571728447e-05, "loss": 0.8593, "step": 2404 }, { "epoch": 0.3926370352230521, "grad_norm": 2.3049893379211426, "learning_rate": 1.999006243309915e-05, "loss": 0.9053, "step": 2405 }, { "epoch": 0.3928002938655565, "grad_norm": 2.031383514404297, "learning_rate": 1.9990048284409867e-05, "loss": 0.7923, "step": 2406 }, { "epoch": 0.3929635525080609, "grad_norm": 2.201955795288086, "learning_rate": 1.9990034125660607e-05, "loss": 1.0715, "step": 2407 }, { "epoch": 0.3931268111505653, "grad_norm": 2.3718245029449463, "learning_rate": 1.9990019956851384e-05, "loss": 0.8632, "step": 2408 }, { "epoch": 0.3932900697930697, "grad_norm": 2.0922348499298096, "learning_rate": 1.9990005777982212e-05, "loss": 0.8749, "step": 2409 }, { "epoch": 0.39345332843557407, "grad_norm": 2.5148468017578125, "learning_rate": 1.9989991589053107e-05, "loss": 0.9764, "step": 2410 }, { "epoch": 0.39361658707807845, "grad_norm": 2.161440849304199, "learning_rate": 1.9989977390064087e-05, "loss": 0.9593, "step": 2411 }, { "epoch": 0.39377984572058283, "grad_norm": 2.1965153217315674, "learning_rate": 1.9989963181015158e-05, "loss": 0.8848, "step": 2412 }, { "epoch": 0.3939431043630872, "grad_norm": 2.3817453384399414, "learning_rate": 1.998994896190634e-05, "loss": 0.8978, "step": 2413 }, { "epoch": 0.3941063630055916, "grad_norm": 2.1136372089385986, "learning_rate": 1.9989934732737648e-05, "loss": 1.0197, "step": 2414 }, { "epoch": 0.394269621648096, "grad_norm": 2.265977144241333, "learning_rate": 1.998992049350909e-05, "loss": 0.9031, "step": 2415 }, { "epoch": 0.39443288029060036, "grad_norm": 2.0319786071777344, "learning_rate": 1.998990624422069e-05, "loss": 0.9202, "step": 2416 }, { "epoch": 0.39459613893310475, "grad_norm": 2.1427950859069824, "learning_rate": 1.9989891984872457e-05, "loss": 0.9407, "step": 2417 }, { "epoch": 0.3947593975756092, "grad_norm": 2.5593388080596924, "learning_rate": 1.9989877715464404e-05, "loss": 0.8438, "step": 2418 }, { "epoch": 0.39492265621811357, "grad_norm": 2.397465705871582, "learning_rate": 1.9989863435996544e-05, "loss": 1.123, "step": 2419 }, { "epoch": 0.39508591486061795, "grad_norm": 1.7739402055740356, "learning_rate": 1.9989849146468897e-05, "loss": 0.7916, "step": 2420 }, { "epoch": 0.39524917350312233, "grad_norm": 2.119030714035034, "learning_rate": 1.9989834846881473e-05, "loss": 0.9219, "step": 2421 }, { "epoch": 0.3954124321456267, "grad_norm": 2.2377028465270996, "learning_rate": 1.9989820537234287e-05, "loss": 1.0082, "step": 2422 }, { "epoch": 0.3955756907881311, "grad_norm": 2.1268718242645264, "learning_rate": 1.9989806217527357e-05, "loss": 0.9853, "step": 2423 }, { "epoch": 0.3957389494306355, "grad_norm": 2.1222565174102783, "learning_rate": 1.9989791887760695e-05, "loss": 0.8893, "step": 2424 }, { "epoch": 0.39590220807313986, "grad_norm": 2.537492275238037, "learning_rate": 1.9989777547934314e-05, "loss": 1.0533, "step": 2425 }, { "epoch": 0.39606546671564424, "grad_norm": 2.0177674293518066, "learning_rate": 1.998976319804823e-05, "loss": 0.8432, "step": 2426 }, { "epoch": 0.3962287253581486, "grad_norm": 2.4538278579711914, "learning_rate": 1.9989748838102456e-05, "loss": 0.9414, "step": 2427 }, { "epoch": 0.396391984000653, "grad_norm": 2.0986878871917725, "learning_rate": 1.998973446809701e-05, "loss": 0.8182, "step": 2428 }, { "epoch": 0.39655524264315745, "grad_norm": 1.7855708599090576, "learning_rate": 1.99897200880319e-05, "loss": 0.7151, "step": 2429 }, { "epoch": 0.39671850128566183, "grad_norm": 2.5517826080322266, "learning_rate": 1.998970569790715e-05, "loss": 1.1297, "step": 2430 }, { "epoch": 0.3968817599281662, "grad_norm": 2.115194320678711, "learning_rate": 1.9989691297722765e-05, "loss": 0.7574, "step": 2431 }, { "epoch": 0.3970450185706706, "grad_norm": 2.2917094230651855, "learning_rate": 1.9989676887478764e-05, "loss": 0.9255, "step": 2432 }, { "epoch": 0.397208277213175, "grad_norm": 2.540828227996826, "learning_rate": 1.9989662467175163e-05, "loss": 1.6806, "step": 2433 }, { "epoch": 0.39737153585567936, "grad_norm": 1.9915798902511597, "learning_rate": 1.998964803681197e-05, "loss": 0.9324, "step": 2434 }, { "epoch": 0.39753479449818374, "grad_norm": 2.2942895889282227, "learning_rate": 1.998963359638921e-05, "loss": 0.8661, "step": 2435 }, { "epoch": 0.3976980531406881, "grad_norm": 2.766526460647583, "learning_rate": 1.9989619145906888e-05, "loss": 0.9932, "step": 2436 }, { "epoch": 0.3978613117831925, "grad_norm": 2.6054022312164307, "learning_rate": 1.9989604685365024e-05, "loss": 0.9714, "step": 2437 }, { "epoch": 0.3980245704256969, "grad_norm": 2.4717018604278564, "learning_rate": 1.9989590214763627e-05, "loss": 0.7983, "step": 2438 }, { "epoch": 0.39818782906820127, "grad_norm": 2.5681324005126953, "learning_rate": 1.998957573410272e-05, "loss": 0.9195, "step": 2439 }, { "epoch": 0.3983510877107057, "grad_norm": 2.060110569000244, "learning_rate": 1.9989561243382313e-05, "loss": 0.7392, "step": 2440 }, { "epoch": 0.3985143463532101, "grad_norm": 2.5571093559265137, "learning_rate": 1.9989546742602416e-05, "loss": 0.8896, "step": 2441 }, { "epoch": 0.3986776049957145, "grad_norm": 2.0924389362335205, "learning_rate": 1.998953223176305e-05, "loss": 0.8161, "step": 2442 }, { "epoch": 0.39884086363821886, "grad_norm": 2.2974934577941895, "learning_rate": 1.9989517710864228e-05, "loss": 0.993, "step": 2443 }, { "epoch": 0.39900412228072324, "grad_norm": 2.378819704055786, "learning_rate": 1.9989503179905963e-05, "loss": 1.0175, "step": 2444 }, { "epoch": 0.3991673809232276, "grad_norm": 2.131253957748413, "learning_rate": 1.9989488638888274e-05, "loss": 0.9907, "step": 2445 }, { "epoch": 0.399330639565732, "grad_norm": 2.3086137771606445, "learning_rate": 1.998947408781117e-05, "loss": 0.9663, "step": 2446 }, { "epoch": 0.3994938982082364, "grad_norm": 2.284175157546997, "learning_rate": 1.998945952667467e-05, "loss": 1.0497, "step": 2447 }, { "epoch": 0.39965715685074077, "grad_norm": 2.550283432006836, "learning_rate": 1.9989444955478788e-05, "loss": 1.0249, "step": 2448 }, { "epoch": 0.39982041549324515, "grad_norm": 2.1344642639160156, "learning_rate": 1.9989430374223534e-05, "loss": 0.9956, "step": 2449 }, { "epoch": 0.39998367413574953, "grad_norm": 2.0825536251068115, "learning_rate": 1.998941578290893e-05, "loss": 0.9194, "step": 2450 }, { "epoch": 0.40014693277825397, "grad_norm": 2.0687222480773926, "learning_rate": 1.9989401181534985e-05, "loss": 0.9302, "step": 2451 }, { "epoch": 0.40031019142075835, "grad_norm": 2.411212205886841, "learning_rate": 1.9989386570101716e-05, "loss": 0.9873, "step": 2452 }, { "epoch": 0.40047345006326274, "grad_norm": 2.179976463317871, "learning_rate": 1.9989371948609134e-05, "loss": 0.8882, "step": 2453 }, { "epoch": 0.4006367087057671, "grad_norm": 2.2875638008117676, "learning_rate": 1.998935731705726e-05, "loss": 1.0496, "step": 2454 }, { "epoch": 0.4007999673482715, "grad_norm": 3.0647501945495605, "learning_rate": 1.998934267544611e-05, "loss": 1.1592, "step": 2455 }, { "epoch": 0.4009632259907759, "grad_norm": 2.404136896133423, "learning_rate": 1.9989328023775688e-05, "loss": 0.9799, "step": 2456 }, { "epoch": 0.40112648463328027, "grad_norm": 2.19797945022583, "learning_rate": 1.998931336204602e-05, "loss": 0.9022, "step": 2457 }, { "epoch": 0.40128974327578465, "grad_norm": 2.4626898765563965, "learning_rate": 1.998929869025711e-05, "loss": 0.8586, "step": 2458 }, { "epoch": 0.40145300191828903, "grad_norm": 1.9525703191757202, "learning_rate": 1.9989284008408985e-05, "loss": 0.8928, "step": 2459 }, { "epoch": 0.4016162605607934, "grad_norm": 2.2514874935150146, "learning_rate": 1.998926931650165e-05, "loss": 1.0309, "step": 2460 }, { "epoch": 0.4017795192032978, "grad_norm": 2.2697155475616455, "learning_rate": 1.9989254614535125e-05, "loss": 0.739, "step": 2461 }, { "epoch": 0.40194277784580223, "grad_norm": 2.230809211730957, "learning_rate": 1.9989239902509423e-05, "loss": 0.9559, "step": 2462 }, { "epoch": 0.4021060364883066, "grad_norm": 2.2936198711395264, "learning_rate": 1.998922518042456e-05, "loss": 0.887, "step": 2463 }, { "epoch": 0.402269295130811, "grad_norm": 2.4923603534698486, "learning_rate": 1.9989210448280548e-05, "loss": 1.0061, "step": 2464 }, { "epoch": 0.4024325537733154, "grad_norm": 2.1371591091156006, "learning_rate": 1.9989195706077406e-05, "loss": 0.9595, "step": 2465 }, { "epoch": 0.40259581241581976, "grad_norm": 2.299093246459961, "learning_rate": 1.9989180953815145e-05, "loss": 0.9594, "step": 2466 }, { "epoch": 0.40275907105832415, "grad_norm": 2.4092671871185303, "learning_rate": 1.9989166191493782e-05, "loss": 0.9845, "step": 2467 }, { "epoch": 0.40292232970082853, "grad_norm": 2.5959017276763916, "learning_rate": 1.998915141911333e-05, "loss": 1.1591, "step": 2468 }, { "epoch": 0.4030855883433329, "grad_norm": 2.149758815765381, "learning_rate": 1.9989136636673805e-05, "loss": 0.8009, "step": 2469 }, { "epoch": 0.4032488469858373, "grad_norm": 2.285952091217041, "learning_rate": 1.9989121844175226e-05, "loss": 0.9094, "step": 2470 }, { "epoch": 0.4034121056283417, "grad_norm": 2.2832398414611816, "learning_rate": 1.9989107041617602e-05, "loss": 1.0566, "step": 2471 }, { "epoch": 0.4035753642708461, "grad_norm": 2.092925548553467, "learning_rate": 1.9989092229000947e-05, "loss": 0.9838, "step": 2472 }, { "epoch": 0.4037386229133505, "grad_norm": 2.4731366634368896, "learning_rate": 1.9989077406325285e-05, "loss": 0.8645, "step": 2473 }, { "epoch": 0.4039018815558549, "grad_norm": 2.2036325931549072, "learning_rate": 1.9989062573590618e-05, "loss": 0.9265, "step": 2474 }, { "epoch": 0.40406514019835926, "grad_norm": 2.272914409637451, "learning_rate": 1.998904773079697e-05, "loss": 0.9708, "step": 2475 }, { "epoch": 0.40422839884086365, "grad_norm": 2.1340277194976807, "learning_rate": 1.9989032877944353e-05, "loss": 0.7244, "step": 2476 }, { "epoch": 0.404391657483368, "grad_norm": 2.39723539352417, "learning_rate": 1.9989018015032785e-05, "loss": 1.0894, "step": 2477 }, { "epoch": 0.4045549161258724, "grad_norm": 2.237788438796997, "learning_rate": 1.9989003142062278e-05, "loss": 0.8802, "step": 2478 }, { "epoch": 0.4047181747683768, "grad_norm": 2.2704155445098877, "learning_rate": 1.9988988259032845e-05, "loss": 1.047, "step": 2479 }, { "epoch": 0.4048814334108812, "grad_norm": 2.0703465938568115, "learning_rate": 1.9988973365944505e-05, "loss": 0.8315, "step": 2480 }, { "epoch": 0.40504469205338556, "grad_norm": 2.293002128601074, "learning_rate": 1.998895846279727e-05, "loss": 0.9043, "step": 2481 }, { "epoch": 0.40520795069588994, "grad_norm": 1.9347337484359741, "learning_rate": 1.998894354959116e-05, "loss": 0.7863, "step": 2482 }, { "epoch": 0.4053712093383944, "grad_norm": 2.192974090576172, "learning_rate": 1.9988928626326184e-05, "loss": 0.9902, "step": 2483 }, { "epoch": 0.40553446798089876, "grad_norm": 2.1292686462402344, "learning_rate": 1.9988913693002357e-05, "loss": 0.802, "step": 2484 }, { "epoch": 0.40569772662340314, "grad_norm": 1.9144996404647827, "learning_rate": 1.9988898749619702e-05, "loss": 0.6783, "step": 2485 }, { "epoch": 0.4058609852659075, "grad_norm": 2.1686155796051025, "learning_rate": 1.9988883796178225e-05, "loss": 0.8397, "step": 2486 }, { "epoch": 0.4060242439084119, "grad_norm": 2.360686779022217, "learning_rate": 1.9988868832677945e-05, "loss": 0.9389, "step": 2487 }, { "epoch": 0.4061875025509163, "grad_norm": 2.213864803314209, "learning_rate": 1.998885385911888e-05, "loss": 0.7705, "step": 2488 }, { "epoch": 0.4063507611934207, "grad_norm": 2.1179182529449463, "learning_rate": 1.9988838875501038e-05, "loss": 0.7862, "step": 2489 }, { "epoch": 0.40651401983592506, "grad_norm": 2.2212228775024414, "learning_rate": 1.9988823881824436e-05, "loss": 0.8399, "step": 2490 }, { "epoch": 0.40667727847842944, "grad_norm": 2.483272075653076, "learning_rate": 1.9988808878089098e-05, "loss": 0.9944, "step": 2491 }, { "epoch": 0.4068405371209338, "grad_norm": 2.3988304138183594, "learning_rate": 1.998879386429503e-05, "loss": 0.8675, "step": 2492 }, { "epoch": 0.4070037957634382, "grad_norm": 2.72599196434021, "learning_rate": 1.9988778840442245e-05, "loss": 0.9306, "step": 2493 }, { "epoch": 0.40716705440594264, "grad_norm": 2.291930675506592, "learning_rate": 1.9988763806530765e-05, "loss": 0.8025, "step": 2494 }, { "epoch": 0.407330313048447, "grad_norm": 2.6197917461395264, "learning_rate": 1.9988748762560603e-05, "loss": 0.6936, "step": 2495 }, { "epoch": 0.4074935716909514, "grad_norm": 2.8044846057891846, "learning_rate": 1.9988733708531772e-05, "loss": 0.988, "step": 2496 }, { "epoch": 0.4076568303334558, "grad_norm": 2.4618797302246094, "learning_rate": 1.998871864444429e-05, "loss": 0.961, "step": 2497 }, { "epoch": 0.40782008897596017, "grad_norm": 2.451298713684082, "learning_rate": 1.998870357029817e-05, "loss": 0.9464, "step": 2498 }, { "epoch": 0.40798334761846455, "grad_norm": 2.3801121711730957, "learning_rate": 1.998868848609343e-05, "loss": 0.7763, "step": 2499 }, { "epoch": 0.40814660626096894, "grad_norm": 2.047248601913452, "learning_rate": 1.9988673391830082e-05, "loss": 0.9, "step": 2500 }, { "epoch": 0.4083098649034733, "grad_norm": 2.207097053527832, "learning_rate": 1.998865828750814e-05, "loss": 0.9595, "step": 2501 }, { "epoch": 0.4084731235459777, "grad_norm": 2.621706962585449, "learning_rate": 1.9988643173127627e-05, "loss": 0.8284, "step": 2502 }, { "epoch": 0.4086363821884821, "grad_norm": 2.0710906982421875, "learning_rate": 1.998862804868855e-05, "loss": 0.7883, "step": 2503 }, { "epoch": 0.40879964083098647, "grad_norm": 2.157914638519287, "learning_rate": 1.9988612914190927e-05, "loss": 0.8823, "step": 2504 }, { "epoch": 0.4089628994734909, "grad_norm": 2.0399465560913086, "learning_rate": 1.9988597769634775e-05, "loss": 0.8627, "step": 2505 }, { "epoch": 0.4091261581159953, "grad_norm": 2.9331929683685303, "learning_rate": 1.9988582615020107e-05, "loss": 0.992, "step": 2506 }, { "epoch": 0.40928941675849967, "grad_norm": 2.2221031188964844, "learning_rate": 1.9988567450346937e-05, "loss": 0.7773, "step": 2507 }, { "epoch": 0.40945267540100405, "grad_norm": 2.1749541759490967, "learning_rate": 1.9988552275615287e-05, "loss": 0.9387, "step": 2508 }, { "epoch": 0.40961593404350843, "grad_norm": 1.8630714416503906, "learning_rate": 1.9988537090825166e-05, "loss": 0.7531, "step": 2509 }, { "epoch": 0.4097791926860128, "grad_norm": 2.0631561279296875, "learning_rate": 1.9988521895976585e-05, "loss": 0.8155, "step": 2510 }, { "epoch": 0.4099424513285172, "grad_norm": 2.1360857486724854, "learning_rate": 1.998850669106957e-05, "loss": 0.8572, "step": 2511 }, { "epoch": 0.4101057099710216, "grad_norm": 2.1138761043548584, "learning_rate": 1.9988491476104134e-05, "loss": 0.8673, "step": 2512 }, { "epoch": 0.41026896861352596, "grad_norm": 2.1313304901123047, "learning_rate": 1.9988476251080286e-05, "loss": 0.878, "step": 2513 }, { "epoch": 0.41043222725603035, "grad_norm": 2.2187817096710205, "learning_rate": 1.9988461015998044e-05, "loss": 0.7952, "step": 2514 }, { "epoch": 0.41059548589853473, "grad_norm": 2.1907341480255127, "learning_rate": 1.9988445770857424e-05, "loss": 0.9067, "step": 2515 }, { "epoch": 0.41075874454103917, "grad_norm": 2.4918317794799805, "learning_rate": 1.9988430515658444e-05, "loss": 1.1319, "step": 2516 }, { "epoch": 0.41092200318354355, "grad_norm": 2.2417404651641846, "learning_rate": 1.9988415250401118e-05, "loss": 0.8734, "step": 2517 }, { "epoch": 0.41108526182604793, "grad_norm": 2.722362756729126, "learning_rate": 1.998839997508546e-05, "loss": 0.9068, "step": 2518 }, { "epoch": 0.4112485204685523, "grad_norm": 2.4187746047973633, "learning_rate": 1.9988384689711488e-05, "loss": 0.9884, "step": 2519 }, { "epoch": 0.4114117791110567, "grad_norm": 2.4642841815948486, "learning_rate": 1.998836939427921e-05, "loss": 0.9182, "step": 2520 }, { "epoch": 0.4115750377535611, "grad_norm": 2.4722025394439697, "learning_rate": 1.9988354088788647e-05, "loss": 0.9368, "step": 2521 }, { "epoch": 0.41173829639606546, "grad_norm": 1.9845893383026123, "learning_rate": 1.998833877323982e-05, "loss": 0.8145, "step": 2522 }, { "epoch": 0.41190155503856984, "grad_norm": 2.298062324523926, "learning_rate": 1.9988323447632734e-05, "loss": 0.8222, "step": 2523 }, { "epoch": 0.4120648136810742, "grad_norm": 2.304030656814575, "learning_rate": 1.998830811196741e-05, "loss": 0.8895, "step": 2524 }, { "epoch": 0.4122280723235786, "grad_norm": 2.721147060394287, "learning_rate": 1.998829276624386e-05, "loss": 0.9336, "step": 2525 }, { "epoch": 0.412391330966083, "grad_norm": 2.0208778381347656, "learning_rate": 1.9988277410462107e-05, "loss": 0.7763, "step": 2526 }, { "epoch": 0.41255458960858743, "grad_norm": 2.641338348388672, "learning_rate": 1.998826204462216e-05, "loss": 1.0205, "step": 2527 }, { "epoch": 0.4127178482510918, "grad_norm": 2.2045462131500244, "learning_rate": 1.9988246668724033e-05, "loss": 0.9223, "step": 2528 }, { "epoch": 0.4128811068935962, "grad_norm": 2.7185170650482178, "learning_rate": 1.9988231282767744e-05, "loss": 1.2058, "step": 2529 }, { "epoch": 0.4130443655361006, "grad_norm": 2.5857558250427246, "learning_rate": 1.9988215886753308e-05, "loss": 0.922, "step": 2530 }, { "epoch": 0.41320762417860496, "grad_norm": 2.5655033588409424, "learning_rate": 1.9988200480680745e-05, "loss": 1.0105, "step": 2531 }, { "epoch": 0.41337088282110934, "grad_norm": 2.238011121749878, "learning_rate": 1.9988185064550065e-05, "loss": 0.8987, "step": 2532 }, { "epoch": 0.4135341414636137, "grad_norm": 2.143281936645508, "learning_rate": 1.9988169638361286e-05, "loss": 1.0788, "step": 2533 }, { "epoch": 0.4136974001061181, "grad_norm": 2.717515230178833, "learning_rate": 1.998815420211442e-05, "loss": 1.0217, "step": 2534 }, { "epoch": 0.4138606587486225, "grad_norm": 1.9467624425888062, "learning_rate": 1.998813875580949e-05, "loss": 0.7849, "step": 2535 }, { "epoch": 0.41402391739112687, "grad_norm": 2.3189358711242676, "learning_rate": 1.9988123299446505e-05, "loss": 1.112, "step": 2536 }, { "epoch": 0.41418717603363125, "grad_norm": 2.7086145877838135, "learning_rate": 1.998810783302548e-05, "loss": 1.0364, "step": 2537 }, { "epoch": 0.4143504346761357, "grad_norm": 1.9426896572113037, "learning_rate": 1.9988092356546434e-05, "loss": 0.8665, "step": 2538 }, { "epoch": 0.4145136933186401, "grad_norm": 1.9196878671646118, "learning_rate": 1.9988076870009384e-05, "loss": 0.8402, "step": 2539 }, { "epoch": 0.41467695196114446, "grad_norm": 1.8388781547546387, "learning_rate": 1.9988061373414342e-05, "loss": 0.8519, "step": 2540 }, { "epoch": 0.41484021060364884, "grad_norm": 2.220960855484009, "learning_rate": 1.9988045866761326e-05, "loss": 0.9278, "step": 2541 }, { "epoch": 0.4150034692461532, "grad_norm": 1.971710443496704, "learning_rate": 1.9988030350050346e-05, "loss": 0.9422, "step": 2542 }, { "epoch": 0.4151667278886576, "grad_norm": 1.9760900735855103, "learning_rate": 1.9988014823281426e-05, "loss": 0.8995, "step": 2543 }, { "epoch": 0.415329986531162, "grad_norm": 2.2629480361938477, "learning_rate": 1.998799928645458e-05, "loss": 1.0904, "step": 2544 }, { "epoch": 0.41549324517366637, "grad_norm": 2.1864173412323, "learning_rate": 1.9987983739569815e-05, "loss": 1.022, "step": 2545 }, { "epoch": 0.41565650381617075, "grad_norm": 1.9978785514831543, "learning_rate": 1.998796818262716e-05, "loss": 0.8582, "step": 2546 }, { "epoch": 0.41581976245867514, "grad_norm": 2.5000813007354736, "learning_rate": 1.9987952615626617e-05, "loss": 0.713, "step": 2547 }, { "epoch": 0.4159830211011795, "grad_norm": 2.153843879699707, "learning_rate": 1.9987937038568212e-05, "loss": 0.9524, "step": 2548 }, { "epoch": 0.41614627974368396, "grad_norm": 2.0682623386383057, "learning_rate": 1.998792145145196e-05, "loss": 0.9338, "step": 2549 }, { "epoch": 0.41630953838618834, "grad_norm": 1.8734424114227295, "learning_rate": 1.9987905854277867e-05, "loss": 0.9532, "step": 2550 }, { "epoch": 0.4164727970286927, "grad_norm": 2.2436208724975586, "learning_rate": 1.998789024704596e-05, "loss": 0.8538, "step": 2551 }, { "epoch": 0.4166360556711971, "grad_norm": 2.1438546180725098, "learning_rate": 1.9987874629756248e-05, "loss": 0.7532, "step": 2552 }, { "epoch": 0.4167993143137015, "grad_norm": 2.1417322158813477, "learning_rate": 1.998785900240875e-05, "loss": 0.8365, "step": 2553 }, { "epoch": 0.41696257295620587, "grad_norm": 2.048462152481079, "learning_rate": 1.998784336500348e-05, "loss": 0.803, "step": 2554 }, { "epoch": 0.41712583159871025, "grad_norm": 1.9908592700958252, "learning_rate": 1.9987827717540457e-05, "loss": 0.7958, "step": 2555 }, { "epoch": 0.41728909024121463, "grad_norm": 2.3123931884765625, "learning_rate": 1.9987812060019692e-05, "loss": 0.9047, "step": 2556 }, { "epoch": 0.417452348883719, "grad_norm": 2.427549362182617, "learning_rate": 1.99877963924412e-05, "loss": 0.9235, "step": 2557 }, { "epoch": 0.4176156075262234, "grad_norm": 2.414076805114746, "learning_rate": 1.9987780714805006e-05, "loss": 0.9491, "step": 2558 }, { "epoch": 0.4177788661687278, "grad_norm": 2.3998637199401855, "learning_rate": 1.9987765027111115e-05, "loss": 0.965, "step": 2559 }, { "epoch": 0.4179421248112322, "grad_norm": 2.331723690032959, "learning_rate": 1.998774932935955e-05, "loss": 0.826, "step": 2560 }, { "epoch": 0.4181053834537366, "grad_norm": 2.5903913974761963, "learning_rate": 1.998773362155032e-05, "loss": 1.0947, "step": 2561 }, { "epoch": 0.418268642096241, "grad_norm": 2.973273754119873, "learning_rate": 1.9987717903683447e-05, "loss": 0.9077, "step": 2562 }, { "epoch": 0.41843190073874537, "grad_norm": 2.08506441116333, "learning_rate": 1.9987702175758944e-05, "loss": 0.7905, "step": 2563 }, { "epoch": 0.41859515938124975, "grad_norm": 2.776777982711792, "learning_rate": 1.998768643777683e-05, "loss": 0.9312, "step": 2564 }, { "epoch": 0.41875841802375413, "grad_norm": 2.6250064373016357, "learning_rate": 1.9987670689737116e-05, "loss": 1.1168, "step": 2565 }, { "epoch": 0.4189216766662585, "grad_norm": 2.3053131103515625, "learning_rate": 1.998765493163982e-05, "loss": 0.9886, "step": 2566 }, { "epoch": 0.4190849353087629, "grad_norm": 2.3273322582244873, "learning_rate": 1.998763916348496e-05, "loss": 1.13, "step": 2567 }, { "epoch": 0.4192481939512673, "grad_norm": 2.0786330699920654, "learning_rate": 1.998762338527255e-05, "loss": 0.8755, "step": 2568 }, { "epoch": 0.41941145259377166, "grad_norm": 2.1966702938079834, "learning_rate": 1.9987607597002605e-05, "loss": 0.9536, "step": 2569 }, { "epoch": 0.41957471123627604, "grad_norm": 2.1364517211914062, "learning_rate": 1.9987591798675142e-05, "loss": 0.959, "step": 2570 }, { "epoch": 0.4197379698787805, "grad_norm": 1.9524366855621338, "learning_rate": 1.9987575990290176e-05, "loss": 0.8255, "step": 2571 }, { "epoch": 0.41990122852128486, "grad_norm": 2.1553242206573486, "learning_rate": 1.9987560171847727e-05, "loss": 1.0061, "step": 2572 }, { "epoch": 0.42006448716378925, "grad_norm": 1.990578532218933, "learning_rate": 1.9987544343347802e-05, "loss": 0.95, "step": 2573 }, { "epoch": 0.42022774580629363, "grad_norm": 2.5374364852905273, "learning_rate": 1.9987528504790425e-05, "loss": 0.9161, "step": 2574 }, { "epoch": 0.420391004448798, "grad_norm": 1.9781410694122314, "learning_rate": 1.9987512656175612e-05, "loss": 0.8864, "step": 2575 }, { "epoch": 0.4205542630913024, "grad_norm": 3.0853006839752197, "learning_rate": 1.998749679750337e-05, "loss": 0.9969, "step": 2576 }, { "epoch": 0.4207175217338068, "grad_norm": 2.0785045623779297, "learning_rate": 1.998748092877373e-05, "loss": 0.9968, "step": 2577 }, { "epoch": 0.42088078037631116, "grad_norm": 2.253516912460327, "learning_rate": 1.9987465049986693e-05, "loss": 0.8925, "step": 2578 }, { "epoch": 0.42104403901881554, "grad_norm": 2.089512348175049, "learning_rate": 1.9987449161142284e-05, "loss": 0.934, "step": 2579 }, { "epoch": 0.4212072976613199, "grad_norm": 1.8636101484298706, "learning_rate": 1.9987433262240518e-05, "loss": 0.8302, "step": 2580 }, { "epoch": 0.4213705563038243, "grad_norm": 2.141235113143921, "learning_rate": 1.9987417353281407e-05, "loss": 0.8097, "step": 2581 }, { "epoch": 0.42153381494632874, "grad_norm": 2.5590522289276123, "learning_rate": 1.9987401434264966e-05, "loss": 0.8327, "step": 2582 }, { "epoch": 0.4216970735888331, "grad_norm": 2.420496940612793, "learning_rate": 1.998738550519122e-05, "loss": 0.9455, "step": 2583 }, { "epoch": 0.4218603322313375, "grad_norm": 1.946527361869812, "learning_rate": 1.998736956606018e-05, "loss": 0.7956, "step": 2584 }, { "epoch": 0.4220235908738419, "grad_norm": 2.544832706451416, "learning_rate": 1.9987353616871855e-05, "loss": 1.0669, "step": 2585 }, { "epoch": 0.4221868495163463, "grad_norm": 2.0066325664520264, "learning_rate": 1.9987337657626272e-05, "loss": 0.7262, "step": 2586 }, { "epoch": 0.42235010815885066, "grad_norm": 2.5045721530914307, "learning_rate": 1.9987321688323444e-05, "loss": 0.7806, "step": 2587 }, { "epoch": 0.42251336680135504, "grad_norm": 2.1405093669891357, "learning_rate": 1.998730570896338e-05, "loss": 0.9533, "step": 2588 }, { "epoch": 0.4226766254438594, "grad_norm": 2.3566653728485107, "learning_rate": 1.9987289719546107e-05, "loss": 1.0133, "step": 2589 }, { "epoch": 0.4228398840863638, "grad_norm": 2.389056444168091, "learning_rate": 1.9987273720071633e-05, "loss": 0.9189, "step": 2590 }, { "epoch": 0.4230031427288682, "grad_norm": 1.881344199180603, "learning_rate": 1.998725771053998e-05, "loss": 0.9044, "step": 2591 }, { "epoch": 0.4231664013713726, "grad_norm": 2.144057512283325, "learning_rate": 1.998724169095116e-05, "loss": 0.8832, "step": 2592 }, { "epoch": 0.423329660013877, "grad_norm": 2.0240700244903564, "learning_rate": 1.998722566130519e-05, "loss": 0.8022, "step": 2593 }, { "epoch": 0.4234929186563814, "grad_norm": 2.117976427078247, "learning_rate": 1.9987209621602086e-05, "loss": 0.8283, "step": 2594 }, { "epoch": 0.42365617729888577, "grad_norm": 2.322115898132324, "learning_rate": 1.9987193571841865e-05, "loss": 0.7451, "step": 2595 }, { "epoch": 0.42381943594139015, "grad_norm": 2.426928758621216, "learning_rate": 1.9987177512024543e-05, "loss": 1.057, "step": 2596 }, { "epoch": 0.42398269458389454, "grad_norm": 2.165200710296631, "learning_rate": 1.9987161442150135e-05, "loss": 0.9862, "step": 2597 }, { "epoch": 0.4241459532263989, "grad_norm": 2.2707483768463135, "learning_rate": 1.9987145362218658e-05, "loss": 0.9719, "step": 2598 }, { "epoch": 0.4243092118689033, "grad_norm": 2.195021390914917, "learning_rate": 1.9987129272230128e-05, "loss": 0.8666, "step": 2599 }, { "epoch": 0.4244724705114077, "grad_norm": 2.2502899169921875, "learning_rate": 1.9987113172184562e-05, "loss": 1.0496, "step": 2600 }, { "epoch": 0.42463572915391207, "grad_norm": 2.3947930335998535, "learning_rate": 1.9987097062081978e-05, "loss": 0.8629, "step": 2601 }, { "epoch": 0.42479898779641645, "grad_norm": 2.5473814010620117, "learning_rate": 1.9987080941922385e-05, "loss": 1.2059, "step": 2602 }, { "epoch": 0.4249622464389209, "grad_norm": 2.2321674823760986, "learning_rate": 1.9987064811705807e-05, "loss": 0.9677, "step": 2603 }, { "epoch": 0.42512550508142527, "grad_norm": 2.011504650115967, "learning_rate": 1.9987048671432258e-05, "loss": 0.7454, "step": 2604 }, { "epoch": 0.42528876372392965, "grad_norm": 2.228098154067993, "learning_rate": 1.998703252110175e-05, "loss": 0.8066, "step": 2605 }, { "epoch": 0.42545202236643404, "grad_norm": 2.144942283630371, "learning_rate": 1.9987016360714307e-05, "loss": 1.0303, "step": 2606 }, { "epoch": 0.4256152810089384, "grad_norm": 2.026698112487793, "learning_rate": 1.9987000190269943e-05, "loss": 0.814, "step": 2607 }, { "epoch": 0.4257785396514428, "grad_norm": 2.0498287677764893, "learning_rate": 1.9986984009768665e-05, "loss": 0.9857, "step": 2608 }, { "epoch": 0.4259417982939472, "grad_norm": 2.043485403060913, "learning_rate": 1.9986967819210507e-05, "loss": 0.8504, "step": 2609 }, { "epoch": 0.42610505693645156, "grad_norm": 2.2902987003326416, "learning_rate": 1.9986951618595466e-05, "loss": 0.9418, "step": 2610 }, { "epoch": 0.42626831557895595, "grad_norm": 2.1433684825897217, "learning_rate": 1.9986935407923572e-05, "loss": 0.9862, "step": 2611 }, { "epoch": 0.42643157422146033, "grad_norm": 2.1448352336883545, "learning_rate": 1.9986919187194832e-05, "loss": 0.8599, "step": 2612 }, { "epoch": 0.4265948328639647, "grad_norm": 2.2717056274414062, "learning_rate": 1.9986902956409274e-05, "loss": 0.9652, "step": 2613 }, { "epoch": 0.42675809150646915, "grad_norm": 2.016901731491089, "learning_rate": 1.9986886715566903e-05, "loss": 0.9389, "step": 2614 }, { "epoch": 0.42692135014897353, "grad_norm": 1.9714692831039429, "learning_rate": 1.998687046466774e-05, "loss": 0.7918, "step": 2615 }, { "epoch": 0.4270846087914779, "grad_norm": 2.046323299407959, "learning_rate": 1.99868542037118e-05, "loss": 0.9182, "step": 2616 }, { "epoch": 0.4272478674339823, "grad_norm": 2.1829674243927, "learning_rate": 1.9986837932699103e-05, "loss": 0.8179, "step": 2617 }, { "epoch": 0.4274111260764867, "grad_norm": 2.3346526622772217, "learning_rate": 1.9986821651629664e-05, "loss": 0.8893, "step": 2618 }, { "epoch": 0.42757438471899106, "grad_norm": 2.1561694145202637, "learning_rate": 1.9986805360503494e-05, "loss": 0.8789, "step": 2619 }, { "epoch": 0.42773764336149545, "grad_norm": 2.158025026321411, "learning_rate": 1.9986789059320614e-05, "loss": 0.7085, "step": 2620 }, { "epoch": 0.42790090200399983, "grad_norm": 2.2466347217559814, "learning_rate": 1.9986772748081044e-05, "loss": 0.9338, "step": 2621 }, { "epoch": 0.4280641606465042, "grad_norm": 2.3954708576202393, "learning_rate": 1.9986756426784794e-05, "loss": 0.9137, "step": 2622 }, { "epoch": 0.4282274192890086, "grad_norm": 2.3417820930480957, "learning_rate": 1.9986740095431884e-05, "loss": 0.9494, "step": 2623 }, { "epoch": 0.428390677931513, "grad_norm": 2.4926111698150635, "learning_rate": 1.998672375402233e-05, "loss": 0.9806, "step": 2624 }, { "epoch": 0.4285539365740174, "grad_norm": 2.5028328895568848, "learning_rate": 1.9986707402556144e-05, "loss": 0.884, "step": 2625 }, { "epoch": 0.4287171952165218, "grad_norm": 2.1877872943878174, "learning_rate": 1.998669104103335e-05, "loss": 0.8646, "step": 2626 }, { "epoch": 0.4288804538590262, "grad_norm": 2.3030076026916504, "learning_rate": 1.998667466945396e-05, "loss": 0.9533, "step": 2627 }, { "epoch": 0.42904371250153056, "grad_norm": 2.288083553314209, "learning_rate": 1.998665828781799e-05, "loss": 0.8689, "step": 2628 }, { "epoch": 0.42920697114403494, "grad_norm": 2.1332218647003174, "learning_rate": 1.9986641896125457e-05, "loss": 0.9012, "step": 2629 }, { "epoch": 0.4293702297865393, "grad_norm": 2.2287707328796387, "learning_rate": 1.998662549437638e-05, "loss": 0.9217, "step": 2630 }, { "epoch": 0.4295334884290437, "grad_norm": 2.3400299549102783, "learning_rate": 1.9986609082570775e-05, "loss": 0.8374, "step": 2631 }, { "epoch": 0.4296967470715481, "grad_norm": 2.132723569869995, "learning_rate": 1.9986592660708654e-05, "loss": 0.8746, "step": 2632 }, { "epoch": 0.4298600057140525, "grad_norm": 2.0846195220947266, "learning_rate": 1.998657622879004e-05, "loss": 0.8457, "step": 2633 }, { "epoch": 0.43002326435655686, "grad_norm": 2.1875085830688477, "learning_rate": 1.9986559786814946e-05, "loss": 0.9212, "step": 2634 }, { "epoch": 0.43018652299906124, "grad_norm": 2.014497995376587, "learning_rate": 1.9986543334783386e-05, "loss": 0.8776, "step": 2635 }, { "epoch": 0.4303497816415657, "grad_norm": 2.0356993675231934, "learning_rate": 1.9986526872695383e-05, "loss": 0.8039, "step": 2636 }, { "epoch": 0.43051304028407006, "grad_norm": 2.043959856033325, "learning_rate": 1.9986510400550947e-05, "loss": 0.8053, "step": 2637 }, { "epoch": 0.43067629892657444, "grad_norm": 2.468763589859009, "learning_rate": 1.99864939183501e-05, "loss": 0.8736, "step": 2638 }, { "epoch": 0.4308395575690788, "grad_norm": 2.263637065887451, "learning_rate": 1.9986477426092856e-05, "loss": 0.9566, "step": 2639 }, { "epoch": 0.4310028162115832, "grad_norm": 2.235112428665161, "learning_rate": 1.998646092377923e-05, "loss": 0.9102, "step": 2640 }, { "epoch": 0.4311660748540876, "grad_norm": 2.0359420776367188, "learning_rate": 1.9986444411409245e-05, "loss": 0.8232, "step": 2641 }, { "epoch": 0.43132933349659197, "grad_norm": 2.1837315559387207, "learning_rate": 1.9986427888982907e-05, "loss": 0.9655, "step": 2642 }, { "epoch": 0.43149259213909635, "grad_norm": 2.1636385917663574, "learning_rate": 1.9986411356500242e-05, "loss": 0.8835, "step": 2643 }, { "epoch": 0.43165585078160074, "grad_norm": 2.4290692806243896, "learning_rate": 1.9986394813961267e-05, "loss": 0.9449, "step": 2644 }, { "epoch": 0.4318191094241051, "grad_norm": 2.2007832527160645, "learning_rate": 1.9986378261365987e-05, "loss": 0.974, "step": 2645 }, { "epoch": 0.4319823680666095, "grad_norm": 2.0438380241394043, "learning_rate": 1.998636169871443e-05, "loss": 0.9276, "step": 2646 }, { "epoch": 0.43214562670911394, "grad_norm": 1.9163894653320312, "learning_rate": 1.9986345126006612e-05, "loss": 0.7641, "step": 2647 }, { "epoch": 0.4323088853516183, "grad_norm": 2.1693336963653564, "learning_rate": 1.9986328543242546e-05, "loss": 0.9465, "step": 2648 }, { "epoch": 0.4324721439941227, "grad_norm": 2.2977375984191895, "learning_rate": 1.9986311950422252e-05, "loss": 0.8577, "step": 2649 }, { "epoch": 0.4326354026366271, "grad_norm": 2.289811372756958, "learning_rate": 1.9986295347545738e-05, "loss": 0.914, "step": 2650 }, { "epoch": 0.43279866127913147, "grad_norm": 2.16536808013916, "learning_rate": 1.9986278734613032e-05, "loss": 0.9527, "step": 2651 }, { "epoch": 0.43296191992163585, "grad_norm": 2.135718822479248, "learning_rate": 1.9986262111624145e-05, "loss": 0.7753, "step": 2652 }, { "epoch": 0.43312517856414023, "grad_norm": 2.2220683097839355, "learning_rate": 1.9986245478579095e-05, "loss": 0.7502, "step": 2653 }, { "epoch": 0.4332884372066446, "grad_norm": 1.9034044742584229, "learning_rate": 1.99862288354779e-05, "loss": 0.8147, "step": 2654 }, { "epoch": 0.433451695849149, "grad_norm": 2.4922430515289307, "learning_rate": 1.9986212182320574e-05, "loss": 1.0012, "step": 2655 }, { "epoch": 0.4336149544916534, "grad_norm": 2.050568103790283, "learning_rate": 1.9986195519107135e-05, "loss": 0.8234, "step": 2656 }, { "epoch": 0.43377821313415776, "grad_norm": 2.2101564407348633, "learning_rate": 1.99861788458376e-05, "loss": 0.785, "step": 2657 }, { "epoch": 0.4339414717766622, "grad_norm": 2.09731388092041, "learning_rate": 1.9986162162511983e-05, "loss": 0.8506, "step": 2658 }, { "epoch": 0.4341047304191666, "grad_norm": 2.0556869506835938, "learning_rate": 1.9986145469130304e-05, "loss": 0.9208, "step": 2659 }, { "epoch": 0.43426798906167097, "grad_norm": 2.35178279876709, "learning_rate": 1.998612876569258e-05, "loss": 1.2053, "step": 2660 }, { "epoch": 0.43443124770417535, "grad_norm": 2.285785675048828, "learning_rate": 1.998611205219883e-05, "loss": 0.9811, "step": 2661 }, { "epoch": 0.43459450634667973, "grad_norm": 1.9938793182373047, "learning_rate": 1.9986095328649063e-05, "loss": 0.8602, "step": 2662 }, { "epoch": 0.4347577649891841, "grad_norm": 1.9170814752578735, "learning_rate": 1.9986078595043303e-05, "loss": 0.7936, "step": 2663 }, { "epoch": 0.4349210236316885, "grad_norm": 2.26836895942688, "learning_rate": 1.9986061851381565e-05, "loss": 0.9123, "step": 2664 }, { "epoch": 0.4350842822741929, "grad_norm": 1.9311662912368774, "learning_rate": 1.9986045097663866e-05, "loss": 0.7011, "step": 2665 }, { "epoch": 0.43524754091669726, "grad_norm": 2.714801788330078, "learning_rate": 1.9986028333890218e-05, "loss": 1.1053, "step": 2666 }, { "epoch": 0.43541079955920164, "grad_norm": 2.334782361984253, "learning_rate": 1.9986011560060646e-05, "loss": 1.0644, "step": 2667 }, { "epoch": 0.435574058201706, "grad_norm": 1.690503478050232, "learning_rate": 1.998599477617516e-05, "loss": 0.6588, "step": 2668 }, { "epoch": 0.43573731684421046, "grad_norm": 1.747901439666748, "learning_rate": 1.9985977982233783e-05, "loss": 0.7068, "step": 2669 }, { "epoch": 0.43590057548671485, "grad_norm": 2.215257167816162, "learning_rate": 1.9985961178236526e-05, "loss": 0.9213, "step": 2670 }, { "epoch": 0.43606383412921923, "grad_norm": 2.1856789588928223, "learning_rate": 1.9985944364183413e-05, "loss": 1.0029, "step": 2671 }, { "epoch": 0.4362270927717236, "grad_norm": 2.3084564208984375, "learning_rate": 1.9985927540074453e-05, "loss": 0.9062, "step": 2672 }, { "epoch": 0.436390351414228, "grad_norm": 2.2608482837677, "learning_rate": 1.9985910705909667e-05, "loss": 1.1294, "step": 2673 }, { "epoch": 0.4365536100567324, "grad_norm": 2.2989938259124756, "learning_rate": 1.9985893861689076e-05, "loss": 0.9288, "step": 2674 }, { "epoch": 0.43671686869923676, "grad_norm": 2.4355976581573486, "learning_rate": 1.9985877007412685e-05, "loss": 1.0084, "step": 2675 }, { "epoch": 0.43688012734174114, "grad_norm": 2.417752742767334, "learning_rate": 1.9985860143080526e-05, "loss": 0.8687, "step": 2676 }, { "epoch": 0.4370433859842455, "grad_norm": 2.6050524711608887, "learning_rate": 1.9985843268692605e-05, "loss": 0.9473, "step": 2677 }, { "epoch": 0.4372066446267499, "grad_norm": 2.0831985473632812, "learning_rate": 1.998582638424894e-05, "loss": 0.8053, "step": 2678 }, { "epoch": 0.4373699032692543, "grad_norm": 2.1258373260498047, "learning_rate": 1.9985809489749553e-05, "loss": 0.7933, "step": 2679 }, { "epoch": 0.43753316191175873, "grad_norm": 2.739917039871216, "learning_rate": 1.998579258519446e-05, "loss": 0.9547, "step": 2680 }, { "epoch": 0.4376964205542631, "grad_norm": 1.8580518960952759, "learning_rate": 1.998577567058367e-05, "loss": 0.8272, "step": 2681 }, { "epoch": 0.4378596791967675, "grad_norm": 2.168564558029175, "learning_rate": 1.9985758745917213e-05, "loss": 0.9615, "step": 2682 }, { "epoch": 0.4380229378392719, "grad_norm": 2.185171127319336, "learning_rate": 1.9985741811195098e-05, "loss": 0.9181, "step": 2683 }, { "epoch": 0.43818619648177626, "grad_norm": 2.091188907623291, "learning_rate": 1.9985724866417343e-05, "loss": 0.9006, "step": 2684 }, { "epoch": 0.43834945512428064, "grad_norm": 2.0668110847473145, "learning_rate": 1.9985707911583966e-05, "loss": 0.8571, "step": 2685 }, { "epoch": 0.438512713766785, "grad_norm": 2.063580274581909, "learning_rate": 1.9985690946694983e-05, "loss": 0.8711, "step": 2686 }, { "epoch": 0.4386759724092894, "grad_norm": 1.9888142347335815, "learning_rate": 1.9985673971750414e-05, "loss": 0.8756, "step": 2687 }, { "epoch": 0.4388392310517938, "grad_norm": 1.8143372535705566, "learning_rate": 1.9985656986750273e-05, "loss": 0.8307, "step": 2688 }, { "epoch": 0.43900248969429817, "grad_norm": 1.866960048675537, "learning_rate": 1.9985639991694578e-05, "loss": 0.8031, "step": 2689 }, { "epoch": 0.43916574833680255, "grad_norm": 2.203295946121216, "learning_rate": 1.9985622986583347e-05, "loss": 0.8205, "step": 2690 }, { "epoch": 0.439329006979307, "grad_norm": 1.8331339359283447, "learning_rate": 1.9985605971416596e-05, "loss": 0.7562, "step": 2691 }, { "epoch": 0.4394922656218114, "grad_norm": 2.258321523666382, "learning_rate": 1.9985588946194343e-05, "loss": 0.792, "step": 2692 }, { "epoch": 0.43965552426431576, "grad_norm": 2.39616060256958, "learning_rate": 1.9985571910916604e-05, "loss": 0.9636, "step": 2693 }, { "epoch": 0.43981878290682014, "grad_norm": 2.1235127449035645, "learning_rate": 1.9985554865583394e-05, "loss": 0.8517, "step": 2694 }, { "epoch": 0.4399820415493245, "grad_norm": 2.282531976699829, "learning_rate": 1.9985537810194734e-05, "loss": 0.8666, "step": 2695 }, { "epoch": 0.4401453001918289, "grad_norm": 2.088718891143799, "learning_rate": 1.998552074475064e-05, "loss": 0.8936, "step": 2696 }, { "epoch": 0.4403085588343333, "grad_norm": 2.4440555572509766, "learning_rate": 1.9985503669251135e-05, "loss": 0.8472, "step": 2697 }, { "epoch": 0.44047181747683767, "grad_norm": 1.8653243780136108, "learning_rate": 1.9985486583696227e-05, "loss": 0.8235, "step": 2698 }, { "epoch": 0.44063507611934205, "grad_norm": 2.849872589111328, "learning_rate": 1.9985469488085933e-05, "loss": 0.8711, "step": 2699 }, { "epoch": 0.44079833476184643, "grad_norm": 1.9888776540756226, "learning_rate": 1.9985452382420277e-05, "loss": 0.813, "step": 2700 }, { "epoch": 0.4409615934043508, "grad_norm": 2.2006723880767822, "learning_rate": 1.998543526669927e-05, "loss": 0.9467, "step": 2701 }, { "epoch": 0.44112485204685525, "grad_norm": 2.177020311355591, "learning_rate": 1.9985418140922938e-05, "loss": 0.9523, "step": 2702 }, { "epoch": 0.44128811068935964, "grad_norm": 2.3006911277770996, "learning_rate": 1.9985401005091288e-05, "loss": 0.9483, "step": 2703 }, { "epoch": 0.441451369331864, "grad_norm": 2.1592843532562256, "learning_rate": 1.9985383859204343e-05, "loss": 0.8812, "step": 2704 }, { "epoch": 0.4416146279743684, "grad_norm": 1.8533493280410767, "learning_rate": 1.998536670326212e-05, "loss": 0.7322, "step": 2705 }, { "epoch": 0.4417778866168728, "grad_norm": 2.0111234188079834, "learning_rate": 1.9985349537264637e-05, "loss": 0.8164, "step": 2706 }, { "epoch": 0.44194114525937717, "grad_norm": 2.3743317127227783, "learning_rate": 1.9985332361211905e-05, "loss": 0.7042, "step": 2707 }, { "epoch": 0.44210440390188155, "grad_norm": 2.3276257514953613, "learning_rate": 1.9985315175103947e-05, "loss": 1.1234, "step": 2708 }, { "epoch": 0.44226766254438593, "grad_norm": 2.0492115020751953, "learning_rate": 1.998529797894078e-05, "loss": 0.8982, "step": 2709 }, { "epoch": 0.4424309211868903, "grad_norm": 1.9858986139297485, "learning_rate": 1.9985280772722423e-05, "loss": 0.821, "step": 2710 }, { "epoch": 0.4425941798293947, "grad_norm": 2.381416082382202, "learning_rate": 1.9985263556448888e-05, "loss": 0.9602, "step": 2711 }, { "epoch": 0.4427574384718991, "grad_norm": 1.8716630935668945, "learning_rate": 1.9985246330120197e-05, "loss": 0.8154, "step": 2712 }, { "epoch": 0.4429206971144035, "grad_norm": 2.085667133331299, "learning_rate": 1.9985229093736365e-05, "loss": 0.6856, "step": 2713 }, { "epoch": 0.4430839557569079, "grad_norm": 2.112861156463623, "learning_rate": 1.998521184729741e-05, "loss": 0.8776, "step": 2714 }, { "epoch": 0.4432472143994123, "grad_norm": 1.8321067094802856, "learning_rate": 1.9985194590803346e-05, "loss": 0.7676, "step": 2715 }, { "epoch": 0.44341047304191666, "grad_norm": 1.997267484664917, "learning_rate": 1.99851773242542e-05, "loss": 0.8008, "step": 2716 }, { "epoch": 0.44357373168442105, "grad_norm": 2.4477429389953613, "learning_rate": 1.9985160047649978e-05, "loss": 0.9343, "step": 2717 }, { "epoch": 0.44373699032692543, "grad_norm": 2.2380995750427246, "learning_rate": 1.9985142760990705e-05, "loss": 0.8682, "step": 2718 }, { "epoch": 0.4439002489694298, "grad_norm": 2.1809186935424805, "learning_rate": 1.9985125464276395e-05, "loss": 1.0538, "step": 2719 }, { "epoch": 0.4440635076119342, "grad_norm": 2.454454183578491, "learning_rate": 1.998510815750707e-05, "loss": 1.0195, "step": 2720 }, { "epoch": 0.4442267662544386, "grad_norm": 2.165220022201538, "learning_rate": 1.9985090840682737e-05, "loss": 0.9512, "step": 2721 }, { "epoch": 0.44439002489694296, "grad_norm": 1.8629913330078125, "learning_rate": 1.9985073513803425e-05, "loss": 0.8271, "step": 2722 }, { "epoch": 0.4445532835394474, "grad_norm": 2.238548994064331, "learning_rate": 1.9985056176869145e-05, "loss": 0.8329, "step": 2723 }, { "epoch": 0.4447165421819518, "grad_norm": 2.0306456089019775, "learning_rate": 1.9985038829879917e-05, "loss": 0.8197, "step": 2724 }, { "epoch": 0.44487980082445616, "grad_norm": 2.0444462299346924, "learning_rate": 1.9985021472835756e-05, "loss": 0.946, "step": 2725 }, { "epoch": 0.44504305946696054, "grad_norm": 2.3781251907348633, "learning_rate": 1.998500410573668e-05, "loss": 1.0021, "step": 2726 }, { "epoch": 0.4452063181094649, "grad_norm": 1.8087161779403687, "learning_rate": 1.9984986728582712e-05, "loss": 0.8959, "step": 2727 }, { "epoch": 0.4453695767519693, "grad_norm": 2.05653715133667, "learning_rate": 1.9984969341373862e-05, "loss": 0.8544, "step": 2728 }, { "epoch": 0.4455328353944737, "grad_norm": 2.2994933128356934, "learning_rate": 1.9984951944110152e-05, "loss": 1.015, "step": 2729 }, { "epoch": 0.4456960940369781, "grad_norm": 2.725553512573242, "learning_rate": 1.9984934536791594e-05, "loss": 1.0603, "step": 2730 }, { "epoch": 0.44585935267948246, "grad_norm": 2.4205844402313232, "learning_rate": 1.9984917119418214e-05, "loss": 1.0091, "step": 2731 }, { "epoch": 0.44602261132198684, "grad_norm": 2.026588201522827, "learning_rate": 1.9984899691990024e-05, "loss": 0.8715, "step": 2732 }, { "epoch": 0.4461858699644912, "grad_norm": 2.3007028102874756, "learning_rate": 1.998488225450704e-05, "loss": 0.9269, "step": 2733 }, { "epoch": 0.44634912860699566, "grad_norm": 2.229365587234497, "learning_rate": 1.9984864806969286e-05, "loss": 0.8723, "step": 2734 }, { "epoch": 0.44651238724950004, "grad_norm": 2.1601788997650146, "learning_rate": 1.9984847349376775e-05, "loss": 0.757, "step": 2735 }, { "epoch": 0.4466756458920044, "grad_norm": 2.043607473373413, "learning_rate": 1.9984829881729526e-05, "loss": 0.8626, "step": 2736 }, { "epoch": 0.4468389045345088, "grad_norm": 2.118283271789551, "learning_rate": 1.9984812404027555e-05, "loss": 1.0084, "step": 2737 }, { "epoch": 0.4470021631770132, "grad_norm": 2.2881579399108887, "learning_rate": 1.9984794916270876e-05, "loss": 1.3142, "step": 2738 }, { "epoch": 0.4471654218195176, "grad_norm": 2.0737898349761963, "learning_rate": 1.9984777418459517e-05, "loss": 0.8781, "step": 2739 }, { "epoch": 0.44732868046202195, "grad_norm": 2.217668056488037, "learning_rate": 1.998475991059349e-05, "loss": 0.9425, "step": 2740 }, { "epoch": 0.44749193910452634, "grad_norm": 2.0479846000671387, "learning_rate": 1.998474239267281e-05, "loss": 0.8539, "step": 2741 }, { "epoch": 0.4476551977470307, "grad_norm": 2.188425302505493, "learning_rate": 1.9984724864697495e-05, "loss": 0.8078, "step": 2742 }, { "epoch": 0.4478184563895351, "grad_norm": 2.477513551712036, "learning_rate": 1.998470732666757e-05, "loss": 0.9089, "step": 2743 }, { "epoch": 0.4479817150320395, "grad_norm": 1.9713108539581299, "learning_rate": 1.9984689778583044e-05, "loss": 0.9462, "step": 2744 }, { "epoch": 0.4481449736745439, "grad_norm": 2.284900188446045, "learning_rate": 1.998467222044394e-05, "loss": 1.1004, "step": 2745 }, { "epoch": 0.4483082323170483, "grad_norm": 2.3754286766052246, "learning_rate": 1.998465465225027e-05, "loss": 0.8506, "step": 2746 }, { "epoch": 0.4484714909595527, "grad_norm": 2.0146396160125732, "learning_rate": 1.9984637074002056e-05, "loss": 0.9111, "step": 2747 }, { "epoch": 0.44863474960205707, "grad_norm": 2.218055486679077, "learning_rate": 1.998461948569932e-05, "loss": 1.0541, "step": 2748 }, { "epoch": 0.44879800824456145, "grad_norm": 2.104807138442993, "learning_rate": 1.998460188734207e-05, "loss": 0.9474, "step": 2749 }, { "epoch": 0.44896126688706584, "grad_norm": 2.4090664386749268, "learning_rate": 1.9984584278930333e-05, "loss": 0.7195, "step": 2750 }, { "epoch": 0.4491245255295702, "grad_norm": 2.2388219833374023, "learning_rate": 1.998456666046412e-05, "loss": 0.9465, "step": 2751 }, { "epoch": 0.4492877841720746, "grad_norm": 2.3202152252197266, "learning_rate": 1.998454903194345e-05, "loss": 0.9494, "step": 2752 }, { "epoch": 0.449451042814579, "grad_norm": 2.259066343307495, "learning_rate": 1.998453139336834e-05, "loss": 0.8967, "step": 2753 }, { "epoch": 0.44961430145708337, "grad_norm": 2.4667165279388428, "learning_rate": 1.9984513744738815e-05, "loss": 0.9511, "step": 2754 }, { "epoch": 0.44977756009958775, "grad_norm": 1.9335856437683105, "learning_rate": 1.9984496086054882e-05, "loss": 0.9403, "step": 2755 }, { "epoch": 0.4499408187420922, "grad_norm": 2.232543706893921, "learning_rate": 1.9984478417316566e-05, "loss": 0.8169, "step": 2756 }, { "epoch": 0.45010407738459657, "grad_norm": 2.1563398838043213, "learning_rate": 1.9984460738523882e-05, "loss": 0.9334, "step": 2757 }, { "epoch": 0.45026733602710095, "grad_norm": 1.9198511838912964, "learning_rate": 1.998444304967685e-05, "loss": 0.7089, "step": 2758 }, { "epoch": 0.45043059466960533, "grad_norm": 2.1701595783233643, "learning_rate": 1.998442535077549e-05, "loss": 0.9623, "step": 2759 }, { "epoch": 0.4505938533121097, "grad_norm": 2.248934745788574, "learning_rate": 1.9984407641819812e-05, "loss": 0.8981, "step": 2760 }, { "epoch": 0.4507571119546141, "grad_norm": 2.0339176654815674, "learning_rate": 1.998438992280984e-05, "loss": 0.7647, "step": 2761 }, { "epoch": 0.4509203705971185, "grad_norm": 1.8736517429351807, "learning_rate": 1.9984372193745588e-05, "loss": 0.8505, "step": 2762 }, { "epoch": 0.45108362923962286, "grad_norm": 1.9474809169769287, "learning_rate": 1.9984354454627074e-05, "loss": 0.6766, "step": 2763 }, { "epoch": 0.45124688788212725, "grad_norm": 2.469625473022461, "learning_rate": 1.998433670545432e-05, "loss": 0.9633, "step": 2764 }, { "epoch": 0.45141014652463163, "grad_norm": 2.1931521892547607, "learning_rate": 1.9984318946227343e-05, "loss": 0.9492, "step": 2765 }, { "epoch": 0.451573405167136, "grad_norm": 2.0469679832458496, "learning_rate": 1.998430117694616e-05, "loss": 0.9221, "step": 2766 }, { "epoch": 0.45173666380964045, "grad_norm": 2.043379306793213, "learning_rate": 1.9984283397610785e-05, "loss": 0.8435, "step": 2767 }, { "epoch": 0.45189992245214483, "grad_norm": 2.3748512268066406, "learning_rate": 1.9984265608221242e-05, "loss": 0.9452, "step": 2768 }, { "epoch": 0.4520631810946492, "grad_norm": 2.4516921043395996, "learning_rate": 1.9984247808777547e-05, "loss": 0.9168, "step": 2769 }, { "epoch": 0.4522264397371536, "grad_norm": 2.0556933879852295, "learning_rate": 1.9984229999279713e-05, "loss": 0.8783, "step": 2770 }, { "epoch": 0.452389698379658, "grad_norm": 1.9374761581420898, "learning_rate": 1.9984212179727768e-05, "loss": 0.7759, "step": 2771 }, { "epoch": 0.45255295702216236, "grad_norm": 1.919141411781311, "learning_rate": 1.998419435012172e-05, "loss": 0.7105, "step": 2772 }, { "epoch": 0.45271621566466674, "grad_norm": 1.9514325857162476, "learning_rate": 1.9984176510461592e-05, "loss": 0.8254, "step": 2773 }, { "epoch": 0.4528794743071711, "grad_norm": 2.317502021789551, "learning_rate": 1.9984158660747396e-05, "loss": 0.8213, "step": 2774 }, { "epoch": 0.4530427329496755, "grad_norm": 2.1851048469543457, "learning_rate": 1.9984140800979163e-05, "loss": 0.7771, "step": 2775 }, { "epoch": 0.4532059915921799, "grad_norm": 2.4070708751678467, "learning_rate": 1.9984122931156896e-05, "loss": 0.8929, "step": 2776 }, { "epoch": 0.4533692502346843, "grad_norm": 2.4133450984954834, "learning_rate": 1.9984105051280626e-05, "loss": 1.0678, "step": 2777 }, { "epoch": 0.4535325088771887, "grad_norm": 2.160731554031372, "learning_rate": 1.998408716135036e-05, "loss": 0.881, "step": 2778 }, { "epoch": 0.4536957675196931, "grad_norm": 1.9060003757476807, "learning_rate": 1.998406926136612e-05, "loss": 0.7609, "step": 2779 }, { "epoch": 0.4538590261621975, "grad_norm": 2.153287887573242, "learning_rate": 1.998405135132793e-05, "loss": 0.941, "step": 2780 }, { "epoch": 0.45402228480470186, "grad_norm": 2.3656256198883057, "learning_rate": 1.99840334312358e-05, "loss": 0.942, "step": 2781 }, { "epoch": 0.45418554344720624, "grad_norm": 2.4292593002319336, "learning_rate": 1.998401550108975e-05, "loss": 0.9825, "step": 2782 }, { "epoch": 0.4543488020897106, "grad_norm": 2.3295626640319824, "learning_rate": 1.9983997560889804e-05, "loss": 0.8922, "step": 2783 }, { "epoch": 0.454512060732215, "grad_norm": 2.081510543823242, "learning_rate": 1.9983979610635972e-05, "loss": 0.7443, "step": 2784 }, { "epoch": 0.4546753193747194, "grad_norm": 2.332350015640259, "learning_rate": 1.9983961650328272e-05, "loss": 1.0261, "step": 2785 }, { "epoch": 0.45483857801722377, "grad_norm": 2.1659679412841797, "learning_rate": 1.9983943679966728e-05, "loss": 0.8339, "step": 2786 }, { "epoch": 0.45500183665972815, "grad_norm": 2.744924306869507, "learning_rate": 1.9983925699551357e-05, "loss": 0.8831, "step": 2787 }, { "epoch": 0.45516509530223254, "grad_norm": 2.4122302532196045, "learning_rate": 1.9983907709082172e-05, "loss": 1.0166, "step": 2788 }, { "epoch": 0.455328353944737, "grad_norm": 2.241115093231201, "learning_rate": 1.9983889708559198e-05, "loss": 0.9517, "step": 2789 }, { "epoch": 0.45549161258724136, "grad_norm": 2.609023094177246, "learning_rate": 1.9983871697982448e-05, "loss": 1.0865, "step": 2790 }, { "epoch": 0.45565487122974574, "grad_norm": 2.022104501724243, "learning_rate": 1.9983853677351945e-05, "loss": 0.8671, "step": 2791 }, { "epoch": 0.4558181298722501, "grad_norm": 1.9546022415161133, "learning_rate": 1.99838356466677e-05, "loss": 0.7546, "step": 2792 }, { "epoch": 0.4559813885147545, "grad_norm": 2.274498224258423, "learning_rate": 1.9983817605929735e-05, "loss": 0.9117, "step": 2793 }, { "epoch": 0.4561446471572589, "grad_norm": 2.3548507690429688, "learning_rate": 1.998379955513807e-05, "loss": 1.0239, "step": 2794 }, { "epoch": 0.45630790579976327, "grad_norm": 2.3394954204559326, "learning_rate": 1.998378149429272e-05, "loss": 0.9683, "step": 2795 }, { "epoch": 0.45647116444226765, "grad_norm": 1.926032304763794, "learning_rate": 1.9983763423393703e-05, "loss": 0.8501, "step": 2796 }, { "epoch": 0.45663442308477203, "grad_norm": 2.6548948287963867, "learning_rate": 1.9983745342441044e-05, "loss": 0.936, "step": 2797 }, { "epoch": 0.4567976817272764, "grad_norm": 2.3010077476501465, "learning_rate": 1.9983727251434755e-05, "loss": 0.9463, "step": 2798 }, { "epoch": 0.4569609403697808, "grad_norm": 1.773181676864624, "learning_rate": 1.998370915037485e-05, "loss": 0.641, "step": 2799 }, { "epoch": 0.45712419901228524, "grad_norm": 2.1500132083892822, "learning_rate": 1.9983691039261358e-05, "loss": 0.9939, "step": 2800 }, { "epoch": 0.4572874576547896, "grad_norm": 2.171962022781372, "learning_rate": 1.9983672918094292e-05, "loss": 0.9821, "step": 2801 }, { "epoch": 0.457450716297294, "grad_norm": 1.9071234464645386, "learning_rate": 1.9983654786873666e-05, "loss": 0.7982, "step": 2802 }, { "epoch": 0.4576139749397984, "grad_norm": 1.9087942838668823, "learning_rate": 1.9983636645599508e-05, "loss": 0.8352, "step": 2803 }, { "epoch": 0.45777723358230277, "grad_norm": 2.2338805198669434, "learning_rate": 1.9983618494271825e-05, "loss": 0.9627, "step": 2804 }, { "epoch": 0.45794049222480715, "grad_norm": 2.2184669971466064, "learning_rate": 1.9983600332890643e-05, "loss": 0.8054, "step": 2805 }, { "epoch": 0.45810375086731153, "grad_norm": 2.211547613143921, "learning_rate": 1.9983582161455976e-05, "loss": 0.8377, "step": 2806 }, { "epoch": 0.4582670095098159, "grad_norm": 2.0472335815429688, "learning_rate": 1.9983563979967848e-05, "loss": 0.846, "step": 2807 }, { "epoch": 0.4584302681523203, "grad_norm": 2.140235424041748, "learning_rate": 1.9983545788426273e-05, "loss": 0.8343, "step": 2808 }, { "epoch": 0.4585935267948247, "grad_norm": 2.291480779647827, "learning_rate": 1.998352758683127e-05, "loss": 1.032, "step": 2809 }, { "epoch": 0.45875678543732906, "grad_norm": 2.3100991249084473, "learning_rate": 1.9983509375182853e-05, "loss": 0.9329, "step": 2810 }, { "epoch": 0.4589200440798335, "grad_norm": 2.1981215476989746, "learning_rate": 1.998349115348105e-05, "loss": 0.8952, "step": 2811 }, { "epoch": 0.4590833027223379, "grad_norm": 2.182628870010376, "learning_rate": 1.9983472921725874e-05, "loss": 0.8301, "step": 2812 }, { "epoch": 0.45924656136484227, "grad_norm": 2.0193445682525635, "learning_rate": 1.998345467991734e-05, "loss": 0.7344, "step": 2813 }, { "epoch": 0.45940982000734665, "grad_norm": 2.03368878364563, "learning_rate": 1.998343642805547e-05, "loss": 0.8465, "step": 2814 }, { "epoch": 0.45957307864985103, "grad_norm": 2.191793203353882, "learning_rate": 1.9983418166140286e-05, "loss": 0.9405, "step": 2815 }, { "epoch": 0.4597363372923554, "grad_norm": 2.1480743885040283, "learning_rate": 1.99833998941718e-05, "loss": 0.7638, "step": 2816 }, { "epoch": 0.4598995959348598, "grad_norm": 2.185225248336792, "learning_rate": 1.9983381612150034e-05, "loss": 0.9954, "step": 2817 }, { "epoch": 0.4600628545773642, "grad_norm": 2.2997236251831055, "learning_rate": 1.9983363320075006e-05, "loss": 0.9414, "step": 2818 }, { "epoch": 0.46022611321986856, "grad_norm": 2.383753776550293, "learning_rate": 1.998334501794673e-05, "loss": 1.1423, "step": 2819 }, { "epoch": 0.46038937186237294, "grad_norm": 2.103109121322632, "learning_rate": 1.998332670576523e-05, "loss": 0.8618, "step": 2820 }, { "epoch": 0.4605526305048773, "grad_norm": 2.23368763923645, "learning_rate": 1.9983308383530522e-05, "loss": 0.9832, "step": 2821 }, { "epoch": 0.46071588914738176, "grad_norm": 2.262098550796509, "learning_rate": 1.998329005124263e-05, "loss": 0.8908, "step": 2822 }, { "epoch": 0.46087914778988615, "grad_norm": 2.0950767993927, "learning_rate": 1.998327170890156e-05, "loss": 0.8278, "step": 2823 }, { "epoch": 0.46104240643239053, "grad_norm": 2.093890428543091, "learning_rate": 1.9983253356507345e-05, "loss": 1.0121, "step": 2824 }, { "epoch": 0.4612056650748949, "grad_norm": 2.174423933029175, "learning_rate": 1.998323499405999e-05, "loss": 0.8794, "step": 2825 }, { "epoch": 0.4613689237173993, "grad_norm": 2.1634421348571777, "learning_rate": 1.9983216621559525e-05, "loss": 0.9161, "step": 2826 }, { "epoch": 0.4615321823599037, "grad_norm": 2.0016865730285645, "learning_rate": 1.9983198239005962e-05, "loss": 0.885, "step": 2827 }, { "epoch": 0.46169544100240806, "grad_norm": 2.013849973678589, "learning_rate": 1.998317984639932e-05, "loss": 0.8185, "step": 2828 }, { "epoch": 0.46185869964491244, "grad_norm": 2.138503313064575, "learning_rate": 1.998316144373962e-05, "loss": 0.9567, "step": 2829 }, { "epoch": 0.4620219582874168, "grad_norm": 2.2302157878875732, "learning_rate": 1.998314303102688e-05, "loss": 0.9868, "step": 2830 }, { "epoch": 0.4621852169299212, "grad_norm": 1.9154495000839233, "learning_rate": 1.9983124608261116e-05, "loss": 0.7744, "step": 2831 }, { "epoch": 0.4623484755724256, "grad_norm": 2.2065558433532715, "learning_rate": 1.9983106175442348e-05, "loss": 0.8315, "step": 2832 }, { "epoch": 0.46251173421493, "grad_norm": 1.9942960739135742, "learning_rate": 1.9983087732570596e-05, "loss": 0.7742, "step": 2833 }, { "epoch": 0.4626749928574344, "grad_norm": 2.16050386428833, "learning_rate": 1.9983069279645875e-05, "loss": 0.8622, "step": 2834 }, { "epoch": 0.4628382514999388, "grad_norm": 2.252201795578003, "learning_rate": 1.9983050816668207e-05, "loss": 0.8288, "step": 2835 }, { "epoch": 0.4630015101424432, "grad_norm": 2.4610588550567627, "learning_rate": 1.998303234363761e-05, "loss": 0.923, "step": 2836 }, { "epoch": 0.46316476878494756, "grad_norm": 2.255638360977173, "learning_rate": 1.99830138605541e-05, "loss": 0.8623, "step": 2837 }, { "epoch": 0.46332802742745194, "grad_norm": 3.4806549549102783, "learning_rate": 1.99829953674177e-05, "loss": 0.8391, "step": 2838 }, { "epoch": 0.4634912860699563, "grad_norm": 2.0836477279663086, "learning_rate": 1.9982976864228427e-05, "loss": 0.9397, "step": 2839 }, { "epoch": 0.4636545447124607, "grad_norm": 1.8030729293823242, "learning_rate": 1.9982958350986296e-05, "loss": 0.6601, "step": 2840 }, { "epoch": 0.4638178033549651, "grad_norm": 1.9555153846740723, "learning_rate": 1.998293982769133e-05, "loss": 0.847, "step": 2841 }, { "epoch": 0.46398106199746947, "grad_norm": 2.0813868045806885, "learning_rate": 1.9982921294343548e-05, "loss": 0.7273, "step": 2842 }, { "epoch": 0.4641443206399739, "grad_norm": 2.059178590774536, "learning_rate": 1.998290275094297e-05, "loss": 0.8192, "step": 2843 }, { "epoch": 0.4643075792824783, "grad_norm": 2.340812921524048, "learning_rate": 1.9982884197489602e-05, "loss": 0.8316, "step": 2844 }, { "epoch": 0.46447083792498267, "grad_norm": 2.144193410873413, "learning_rate": 1.998286563398348e-05, "loss": 0.8057, "step": 2845 }, { "epoch": 0.46463409656748705, "grad_norm": 2.0668280124664307, "learning_rate": 1.998284706042461e-05, "loss": 0.7549, "step": 2846 }, { "epoch": 0.46479735520999144, "grad_norm": 2.1740007400512695, "learning_rate": 1.9982828476813018e-05, "loss": 0.8372, "step": 2847 }, { "epoch": 0.4649606138524958, "grad_norm": 2.311825752258301, "learning_rate": 1.998280988314872e-05, "loss": 0.803, "step": 2848 }, { "epoch": 0.4651238724950002, "grad_norm": 2.0905911922454834, "learning_rate": 1.9982791279431738e-05, "loss": 0.6485, "step": 2849 }, { "epoch": 0.4652871311375046, "grad_norm": 2.2241992950439453, "learning_rate": 1.9982772665662083e-05, "loss": 0.9435, "step": 2850 }, { "epoch": 0.46545038978000897, "grad_norm": 2.195361375808716, "learning_rate": 1.9982754041839784e-05, "loss": 0.9347, "step": 2851 }, { "epoch": 0.46561364842251335, "grad_norm": 2.078212261199951, "learning_rate": 1.9982735407964847e-05, "loss": 0.7921, "step": 2852 }, { "epoch": 0.46577690706501773, "grad_norm": 2.2851967811584473, "learning_rate": 1.9982716764037303e-05, "loss": 0.8441, "step": 2853 }, { "epoch": 0.46594016570752217, "grad_norm": 2.23728346824646, "learning_rate": 1.9982698110057165e-05, "loss": 0.7958, "step": 2854 }, { "epoch": 0.46610342435002655, "grad_norm": 2.095329999923706, "learning_rate": 1.9982679446024457e-05, "loss": 0.9383, "step": 2855 }, { "epoch": 0.46626668299253093, "grad_norm": 2.822284460067749, "learning_rate": 1.9982660771939185e-05, "loss": 0.8709, "step": 2856 }, { "epoch": 0.4664299416350353, "grad_norm": 2.1636219024658203, "learning_rate": 1.998264208780138e-05, "loss": 0.7915, "step": 2857 }, { "epoch": 0.4665932002775397, "grad_norm": 2.226421356201172, "learning_rate": 1.998262339361106e-05, "loss": 0.8553, "step": 2858 }, { "epoch": 0.4667564589200441, "grad_norm": 2.3619461059570312, "learning_rate": 1.998260468936824e-05, "loss": 0.9113, "step": 2859 }, { "epoch": 0.46691971756254846, "grad_norm": 2.351224660873413, "learning_rate": 1.9982585975072938e-05, "loss": 0.9972, "step": 2860 }, { "epoch": 0.46708297620505285, "grad_norm": 2.365201473236084, "learning_rate": 1.9982567250725175e-05, "loss": 0.8192, "step": 2861 }, { "epoch": 0.46724623484755723, "grad_norm": 2.134860038757324, "learning_rate": 1.9982548516324967e-05, "loss": 0.829, "step": 2862 }, { "epoch": 0.4674094934900616, "grad_norm": 1.9725315570831299, "learning_rate": 1.998252977187234e-05, "loss": 0.8187, "step": 2863 }, { "epoch": 0.467572752132566, "grad_norm": 2.3101861476898193, "learning_rate": 1.9982511017367307e-05, "loss": 0.8469, "step": 2864 }, { "epoch": 0.46773601077507043, "grad_norm": 1.8851842880249023, "learning_rate": 1.998249225280988e-05, "loss": 0.8447, "step": 2865 }, { "epoch": 0.4678992694175748, "grad_norm": 2.2839772701263428, "learning_rate": 1.9982473478200094e-05, "loss": 0.9947, "step": 2866 }, { "epoch": 0.4680625280600792, "grad_norm": 2.428194999694824, "learning_rate": 1.998245469353796e-05, "loss": 1.0399, "step": 2867 }, { "epoch": 0.4682257867025836, "grad_norm": 2.3076398372650146, "learning_rate": 1.9982435898823495e-05, "loss": 1.0321, "step": 2868 }, { "epoch": 0.46838904534508796, "grad_norm": 2.163058042526245, "learning_rate": 1.998241709405672e-05, "loss": 0.7943, "step": 2869 }, { "epoch": 0.46855230398759234, "grad_norm": 1.9033005237579346, "learning_rate": 1.9982398279237657e-05, "loss": 0.7803, "step": 2870 }, { "epoch": 0.4687155626300967, "grad_norm": 1.9590667486190796, "learning_rate": 1.9982379454366314e-05, "loss": 0.8188, "step": 2871 }, { "epoch": 0.4688788212726011, "grad_norm": 1.980513334274292, "learning_rate": 1.9982360619442724e-05, "loss": 0.7395, "step": 2872 }, { "epoch": 0.4690420799151055, "grad_norm": 2.6382150650024414, "learning_rate": 1.9982341774466895e-05, "loss": 0.9217, "step": 2873 }, { "epoch": 0.4692053385576099, "grad_norm": 2.3668220043182373, "learning_rate": 1.9982322919438855e-05, "loss": 0.9604, "step": 2874 }, { "epoch": 0.46936859720011426, "grad_norm": 1.9891583919525146, "learning_rate": 1.9982304054358615e-05, "loss": 0.8778, "step": 2875 }, { "epoch": 0.4695318558426187, "grad_norm": 2.083937406539917, "learning_rate": 1.9982285179226197e-05, "loss": 0.8809, "step": 2876 }, { "epoch": 0.4696951144851231, "grad_norm": 2.156355619430542, "learning_rate": 1.9982266294041623e-05, "loss": 0.8518, "step": 2877 }, { "epoch": 0.46985837312762746, "grad_norm": 2.194596529006958, "learning_rate": 1.998224739880491e-05, "loss": 0.9476, "step": 2878 }, { "epoch": 0.47002163177013184, "grad_norm": 2.057173490524292, "learning_rate": 1.9982228493516076e-05, "loss": 0.7898, "step": 2879 }, { "epoch": 0.4701848904126362, "grad_norm": 2.5394163131713867, "learning_rate": 1.9982209578175136e-05, "loss": 0.9684, "step": 2880 }, { "epoch": 0.4703481490551406, "grad_norm": 2.59202241897583, "learning_rate": 1.9982190652782122e-05, "loss": 0.9448, "step": 2881 }, { "epoch": 0.470511407697645, "grad_norm": 2.0157599449157715, "learning_rate": 1.998217171733704e-05, "loss": 0.9343, "step": 2882 }, { "epoch": 0.4706746663401494, "grad_norm": 2.2865381240844727, "learning_rate": 1.998215277183991e-05, "loss": 0.8782, "step": 2883 }, { "epoch": 0.47083792498265375, "grad_norm": 2.233272075653076, "learning_rate": 1.9982133816290757e-05, "loss": 0.9271, "step": 2884 }, { "epoch": 0.47100118362515814, "grad_norm": 2.287358045578003, "learning_rate": 1.9982114850689603e-05, "loss": 0.9606, "step": 2885 }, { "epoch": 0.4711644422676625, "grad_norm": 2.0011157989501953, "learning_rate": 1.9982095875036455e-05, "loss": 0.8636, "step": 2886 }, { "epoch": 0.47132770091016696, "grad_norm": 2.073028564453125, "learning_rate": 1.9982076889331344e-05, "loss": 0.797, "step": 2887 }, { "epoch": 0.47149095955267134, "grad_norm": 2.008582353591919, "learning_rate": 1.9982057893574286e-05, "loss": 0.6691, "step": 2888 }, { "epoch": 0.4716542181951757, "grad_norm": 1.8523125648498535, "learning_rate": 1.998203888776529e-05, "loss": 0.8356, "step": 2889 }, { "epoch": 0.4718174768376801, "grad_norm": 2.081228494644165, "learning_rate": 1.998201987190439e-05, "loss": 0.8704, "step": 2890 }, { "epoch": 0.4719807354801845, "grad_norm": 1.900962471961975, "learning_rate": 1.9982000845991598e-05, "loss": 0.8424, "step": 2891 }, { "epoch": 0.47214399412268887, "grad_norm": 2.2780520915985107, "learning_rate": 1.9981981810026932e-05, "loss": 0.9071, "step": 2892 }, { "epoch": 0.47230725276519325, "grad_norm": 2.255894899368286, "learning_rate": 1.9981962764010415e-05, "loss": 0.9602, "step": 2893 }, { "epoch": 0.47247051140769764, "grad_norm": 2.24461030960083, "learning_rate": 1.9981943707942064e-05, "loss": 0.8588, "step": 2894 }, { "epoch": 0.472633770050202, "grad_norm": 2.2727653980255127, "learning_rate": 1.9981924641821897e-05, "loss": 0.9272, "step": 2895 }, { "epoch": 0.4727970286927064, "grad_norm": 2.308840036392212, "learning_rate": 1.9981905565649937e-05, "loss": 0.6948, "step": 2896 }, { "epoch": 0.4729602873352108, "grad_norm": 2.0174977779388428, "learning_rate": 1.9981886479426195e-05, "loss": 0.7442, "step": 2897 }, { "epoch": 0.4731235459777152, "grad_norm": 1.8840582370758057, "learning_rate": 1.99818673831507e-05, "loss": 0.6555, "step": 2898 }, { "epoch": 0.4732868046202196, "grad_norm": 1.9221245050430298, "learning_rate": 1.9981848276823466e-05, "loss": 0.6932, "step": 2899 }, { "epoch": 0.473450063262724, "grad_norm": 2.556025743484497, "learning_rate": 1.9981829160444515e-05, "loss": 0.9103, "step": 2900 }, { "epoch": 0.47361332190522837, "grad_norm": 2.0670883655548096, "learning_rate": 1.9981810034013865e-05, "loss": 0.8191, "step": 2901 }, { "epoch": 0.47377658054773275, "grad_norm": 2.203158140182495, "learning_rate": 1.9981790897531535e-05, "loss": 0.8158, "step": 2902 }, { "epoch": 0.47393983919023713, "grad_norm": 2.3072898387908936, "learning_rate": 1.998177175099754e-05, "loss": 0.7948, "step": 2903 }, { "epoch": 0.4741030978327415, "grad_norm": 2.3490943908691406, "learning_rate": 1.9981752594411908e-05, "loss": 0.9464, "step": 2904 }, { "epoch": 0.4742663564752459, "grad_norm": 2.2218451499938965, "learning_rate": 1.9981733427774653e-05, "loss": 0.9224, "step": 2905 }, { "epoch": 0.4744296151177503, "grad_norm": 2.1985065937042236, "learning_rate": 1.9981714251085794e-05, "loss": 0.8522, "step": 2906 }, { "epoch": 0.47459287376025466, "grad_norm": 2.1712403297424316, "learning_rate": 1.998169506434535e-05, "loss": 0.9383, "step": 2907 }, { "epoch": 0.47475613240275905, "grad_norm": 2.305015802383423, "learning_rate": 1.9981675867553344e-05, "loss": 1.0712, "step": 2908 }, { "epoch": 0.4749193910452635, "grad_norm": 1.900051236152649, "learning_rate": 1.9981656660709794e-05, "loss": 0.7351, "step": 2909 }, { "epoch": 0.47508264968776787, "grad_norm": 2.1865296363830566, "learning_rate": 1.9981637443814717e-05, "loss": 0.9549, "step": 2910 }, { "epoch": 0.47524590833027225, "grad_norm": 2.153003215789795, "learning_rate": 1.9981618216868134e-05, "loss": 0.9081, "step": 2911 }, { "epoch": 0.47540916697277663, "grad_norm": 2.1814370155334473, "learning_rate": 1.998159897987006e-05, "loss": 0.7573, "step": 2912 }, { "epoch": 0.475572425615281, "grad_norm": 2.627352237701416, "learning_rate": 1.9981579732820523e-05, "loss": 0.9614, "step": 2913 }, { "epoch": 0.4757356842577854, "grad_norm": 2.1022489070892334, "learning_rate": 1.998156047571954e-05, "loss": 0.971, "step": 2914 }, { "epoch": 0.4758989429002898, "grad_norm": 2.0379631519317627, "learning_rate": 1.9981541208567122e-05, "loss": 0.8237, "step": 2915 }, { "epoch": 0.47606220154279416, "grad_norm": 2.286691188812256, "learning_rate": 1.9981521931363303e-05, "loss": 0.938, "step": 2916 }, { "epoch": 0.47622546018529854, "grad_norm": 2.454576253890991, "learning_rate": 1.998150264410809e-05, "loss": 0.9036, "step": 2917 }, { "epoch": 0.4763887188278029, "grad_norm": 1.854154109954834, "learning_rate": 1.99814833468015e-05, "loss": 0.8774, "step": 2918 }, { "epoch": 0.4765519774703073, "grad_norm": 2.294463872909546, "learning_rate": 1.9981464039443566e-05, "loss": 0.9566, "step": 2919 }, { "epoch": 0.47671523611281175, "grad_norm": 1.9930500984191895, "learning_rate": 1.99814447220343e-05, "loss": 0.8527, "step": 2920 }, { "epoch": 0.47687849475531613, "grad_norm": 2.007268190383911, "learning_rate": 1.998142539457372e-05, "loss": 0.7803, "step": 2921 }, { "epoch": 0.4770417533978205, "grad_norm": 2.0584607124328613, "learning_rate": 1.9981406057061846e-05, "loss": 0.7947, "step": 2922 }, { "epoch": 0.4772050120403249, "grad_norm": 2.1352384090423584, "learning_rate": 1.9981386709498703e-05, "loss": 0.7785, "step": 2923 }, { "epoch": 0.4773682706828293, "grad_norm": 2.671921730041504, "learning_rate": 1.9981367351884305e-05, "loss": 1.0969, "step": 2924 }, { "epoch": 0.47753152932533366, "grad_norm": 2.308342933654785, "learning_rate": 1.998134798421867e-05, "loss": 0.8548, "step": 2925 }, { "epoch": 0.47769478796783804, "grad_norm": 2.1172280311584473, "learning_rate": 1.9981328606501826e-05, "loss": 0.8842, "step": 2926 }, { "epoch": 0.4778580466103424, "grad_norm": 2.494612455368042, "learning_rate": 1.998130921873378e-05, "loss": 1.7319, "step": 2927 }, { "epoch": 0.4780213052528468, "grad_norm": 2.1061408519744873, "learning_rate": 1.9981289820914562e-05, "loss": 0.9551, "step": 2928 }, { "epoch": 0.4781845638953512, "grad_norm": 2.1102888584136963, "learning_rate": 1.9981270413044184e-05, "loss": 0.9396, "step": 2929 }, { "epoch": 0.47834782253785557, "grad_norm": 2.4565937519073486, "learning_rate": 1.9981250995122673e-05, "loss": 1.0906, "step": 2930 }, { "epoch": 0.47851108118036, "grad_norm": 2.8187267780303955, "learning_rate": 1.9981231567150042e-05, "loss": 1.0055, "step": 2931 }, { "epoch": 0.4786743398228644, "grad_norm": 2.0861103534698486, "learning_rate": 1.998121212912632e-05, "loss": 0.8419, "step": 2932 }, { "epoch": 0.4788375984653688, "grad_norm": 2.2977378368377686, "learning_rate": 1.9981192681051514e-05, "loss": 0.9361, "step": 2933 }, { "epoch": 0.47900085710787316, "grad_norm": 1.8260252475738525, "learning_rate": 1.998117322292565e-05, "loss": 0.8027, "step": 2934 }, { "epoch": 0.47916411575037754, "grad_norm": 1.976986050605774, "learning_rate": 1.9981153754748747e-05, "loss": 0.8388, "step": 2935 }, { "epoch": 0.4793273743928819, "grad_norm": 2.6454837322235107, "learning_rate": 1.9981134276520828e-05, "loss": 0.8392, "step": 2936 }, { "epoch": 0.4794906330353863, "grad_norm": 2.001678705215454, "learning_rate": 1.9981114788241907e-05, "loss": 0.6858, "step": 2937 }, { "epoch": 0.4796538916778907, "grad_norm": 2.0924317836761475, "learning_rate": 1.9981095289912006e-05, "loss": 0.7612, "step": 2938 }, { "epoch": 0.47981715032039507, "grad_norm": 2.139970302581787, "learning_rate": 1.9981075781531147e-05, "loss": 0.7729, "step": 2939 }, { "epoch": 0.47998040896289945, "grad_norm": 2.0188868045806885, "learning_rate": 1.9981056263099347e-05, "loss": 0.9477, "step": 2940 }, { "epoch": 0.48014366760540383, "grad_norm": 1.75154709815979, "learning_rate": 1.998103673461662e-05, "loss": 0.7585, "step": 2941 }, { "epoch": 0.4803069262479083, "grad_norm": 2.139803409576416, "learning_rate": 1.9981017196082998e-05, "loss": 0.8247, "step": 2942 }, { "epoch": 0.48047018489041265, "grad_norm": 2.2295327186584473, "learning_rate": 1.9980997647498493e-05, "loss": 0.9058, "step": 2943 }, { "epoch": 0.48063344353291704, "grad_norm": 2.1121714115142822, "learning_rate": 1.9980978088863125e-05, "loss": 0.9122, "step": 2944 }, { "epoch": 0.4807967021754214, "grad_norm": 2.3188819885253906, "learning_rate": 1.9980958520176915e-05, "loss": 0.8868, "step": 2945 }, { "epoch": 0.4809599608179258, "grad_norm": 2.8637523651123047, "learning_rate": 1.9980938941439883e-05, "loss": 1.1229, "step": 2946 }, { "epoch": 0.4811232194604302, "grad_norm": 2.2811903953552246, "learning_rate": 1.998091935265205e-05, "loss": 0.7047, "step": 2947 }, { "epoch": 0.48128647810293457, "grad_norm": 2.3774428367614746, "learning_rate": 1.9980899753813432e-05, "loss": 0.8565, "step": 2948 }, { "epoch": 0.48144973674543895, "grad_norm": 2.6348989009857178, "learning_rate": 1.998088014492405e-05, "loss": 0.9203, "step": 2949 }, { "epoch": 0.48161299538794333, "grad_norm": 2.54010009765625, "learning_rate": 1.9980860525983924e-05, "loss": 0.894, "step": 2950 }, { "epoch": 0.4817762540304477, "grad_norm": 2.545994997024536, "learning_rate": 1.9980840896993074e-05, "loss": 0.8777, "step": 2951 }, { "epoch": 0.4819395126729521, "grad_norm": 2.4247658252716064, "learning_rate": 1.9980821257951522e-05, "loss": 0.9417, "step": 2952 }, { "epoch": 0.48210277131545654, "grad_norm": 1.8573678731918335, "learning_rate": 1.9980801608859283e-05, "loss": 0.7365, "step": 2953 }, { "epoch": 0.4822660299579609, "grad_norm": 2.3054144382476807, "learning_rate": 1.998078194971638e-05, "loss": 0.9102, "step": 2954 }, { "epoch": 0.4824292886004653, "grad_norm": 2.2980403900146484, "learning_rate": 1.9980762280522834e-05, "loss": 0.9507, "step": 2955 }, { "epoch": 0.4825925472429697, "grad_norm": 2.1343464851379395, "learning_rate": 1.9980742601278662e-05, "loss": 0.948, "step": 2956 }, { "epoch": 0.48275580588547407, "grad_norm": 2.2745609283447266, "learning_rate": 1.9980722911983884e-05, "loss": 0.8342, "step": 2957 }, { "epoch": 0.48291906452797845, "grad_norm": 2.047905206680298, "learning_rate": 1.9980703212638522e-05, "loss": 0.9463, "step": 2958 }, { "epoch": 0.48308232317048283, "grad_norm": 2.983752489089966, "learning_rate": 1.9980683503242596e-05, "loss": 0.9554, "step": 2959 }, { "epoch": 0.4832455818129872, "grad_norm": 2.0807745456695557, "learning_rate": 1.998066378379612e-05, "loss": 0.8924, "step": 2960 }, { "epoch": 0.4834088404554916, "grad_norm": 2.2232308387756348, "learning_rate": 1.9980644054299122e-05, "loss": 0.9268, "step": 2961 }, { "epoch": 0.483572099097996, "grad_norm": 2.1287057399749756, "learning_rate": 1.9980624314751614e-05, "loss": 1.0554, "step": 2962 }, { "epoch": 0.48373535774050036, "grad_norm": 2.1028623580932617, "learning_rate": 1.9980604565153624e-05, "loss": 0.9326, "step": 2963 }, { "epoch": 0.4838986163830048, "grad_norm": 2.0352272987365723, "learning_rate": 1.9980584805505167e-05, "loss": 0.8342, "step": 2964 }, { "epoch": 0.4840618750255092, "grad_norm": 2.1748850345611572, "learning_rate": 1.9980565035806262e-05, "loss": 0.8828, "step": 2965 }, { "epoch": 0.48422513366801356, "grad_norm": 2.2086987495422363, "learning_rate": 1.998054525605693e-05, "loss": 0.7939, "step": 2966 }, { "epoch": 0.48438839231051795, "grad_norm": 2.4605724811553955, "learning_rate": 1.9980525466257195e-05, "loss": 1.0773, "step": 2967 }, { "epoch": 0.48455165095302233, "grad_norm": 2.135646104812622, "learning_rate": 1.9980505666407074e-05, "loss": 0.8832, "step": 2968 }, { "epoch": 0.4847149095955267, "grad_norm": 2.2326395511627197, "learning_rate": 1.9980485856506582e-05, "loss": 0.9103, "step": 2969 }, { "epoch": 0.4848781682380311, "grad_norm": 2.2525382041931152, "learning_rate": 1.9980466036555746e-05, "loss": 0.8218, "step": 2970 }, { "epoch": 0.4850414268805355, "grad_norm": 2.2327094078063965, "learning_rate": 1.9980446206554583e-05, "loss": 0.9171, "step": 2971 }, { "epoch": 0.48520468552303986, "grad_norm": 2.320629358291626, "learning_rate": 1.9980426366503117e-05, "loss": 0.9102, "step": 2972 }, { "epoch": 0.48536794416554424, "grad_norm": 2.501129388809204, "learning_rate": 1.9980406516401357e-05, "loss": 0.8034, "step": 2973 }, { "epoch": 0.4855312028080487, "grad_norm": 2.139223575592041, "learning_rate": 1.9980386656249334e-05, "loss": 0.9316, "step": 2974 }, { "epoch": 0.48569446145055306, "grad_norm": 2.0296218395233154, "learning_rate": 1.9980366786047066e-05, "loss": 0.8225, "step": 2975 }, { "epoch": 0.48585772009305744, "grad_norm": 2.0393569469451904, "learning_rate": 1.998034690579457e-05, "loss": 0.901, "step": 2976 }, { "epoch": 0.4860209787355618, "grad_norm": 2.026050090789795, "learning_rate": 1.9980327015491866e-05, "loss": 0.84, "step": 2977 }, { "epoch": 0.4861842373780662, "grad_norm": 2.0071427822113037, "learning_rate": 1.9980307115138976e-05, "loss": 0.8944, "step": 2978 }, { "epoch": 0.4863474960205706, "grad_norm": 2.3075180053710938, "learning_rate": 1.998028720473592e-05, "loss": 0.9718, "step": 2979 }, { "epoch": 0.486510754663075, "grad_norm": 2.4666945934295654, "learning_rate": 1.9980267284282718e-05, "loss": 1.4253, "step": 2980 }, { "epoch": 0.48667401330557936, "grad_norm": 1.9533698558807373, "learning_rate": 1.9980247353779388e-05, "loss": 0.9028, "step": 2981 }, { "epoch": 0.48683727194808374, "grad_norm": 1.9554541110992432, "learning_rate": 1.998022741322595e-05, "loss": 0.8571, "step": 2982 }, { "epoch": 0.4870005305905881, "grad_norm": 2.191021203994751, "learning_rate": 1.998020746262243e-05, "loss": 0.7544, "step": 2983 }, { "epoch": 0.4871637892330925, "grad_norm": 2.1417734622955322, "learning_rate": 1.9980187501968838e-05, "loss": 0.9186, "step": 2984 }, { "epoch": 0.48732704787559694, "grad_norm": 2.29416561126709, "learning_rate": 1.9980167531265206e-05, "loss": 1.0595, "step": 2985 }, { "epoch": 0.4874903065181013, "grad_norm": 2.046635627746582, "learning_rate": 1.9980147550511545e-05, "loss": 0.9007, "step": 2986 }, { "epoch": 0.4876535651606057, "grad_norm": 1.726946234703064, "learning_rate": 1.998012755970788e-05, "loss": 0.716, "step": 2987 }, { "epoch": 0.4878168238031101, "grad_norm": 2.0166027545928955, "learning_rate": 1.9980107558854227e-05, "loss": 0.868, "step": 2988 }, { "epoch": 0.48798008244561447, "grad_norm": 2.3249034881591797, "learning_rate": 1.9980087547950607e-05, "loss": 1.0554, "step": 2989 }, { "epoch": 0.48814334108811885, "grad_norm": 2.352238655090332, "learning_rate": 1.9980067526997044e-05, "loss": 0.8417, "step": 2990 }, { "epoch": 0.48830659973062324, "grad_norm": 2.213148832321167, "learning_rate": 1.9980047495993556e-05, "loss": 1.0963, "step": 2991 }, { "epoch": 0.4884698583731276, "grad_norm": 2.137869119644165, "learning_rate": 1.998002745494016e-05, "loss": 0.8551, "step": 2992 }, { "epoch": 0.488633117015632, "grad_norm": 2.222379207611084, "learning_rate": 1.9980007403836884e-05, "loss": 1.0197, "step": 2993 }, { "epoch": 0.4887963756581364, "grad_norm": 1.9355217218399048, "learning_rate": 1.9979987342683736e-05, "loss": 0.6892, "step": 2994 }, { "epoch": 0.48895963430064077, "grad_norm": 2.2415010929107666, "learning_rate": 1.997996727148075e-05, "loss": 0.8633, "step": 2995 }, { "epoch": 0.4891228929431452, "grad_norm": 1.8818628787994385, "learning_rate": 1.9979947190227937e-05, "loss": 0.7942, "step": 2996 }, { "epoch": 0.4892861515856496, "grad_norm": 2.001256227493286, "learning_rate": 1.997992709892532e-05, "loss": 0.8923, "step": 2997 }, { "epoch": 0.48944941022815397, "grad_norm": 2.2003233432769775, "learning_rate": 1.997990699757292e-05, "loss": 0.9447, "step": 2998 }, { "epoch": 0.48961266887065835, "grad_norm": 2.0368096828460693, "learning_rate": 1.9979886886170755e-05, "loss": 0.8759, "step": 2999 }, { "epoch": 0.48977592751316273, "grad_norm": 2.2755496501922607, "learning_rate": 1.9979866764718846e-05, "loss": 0.9196, "step": 3000 }, { "epoch": 0.4899391861556671, "grad_norm": 1.7515732049942017, "learning_rate": 1.9979846633217214e-05, "loss": 0.6944, "step": 3001 }, { "epoch": 0.4901024447981715, "grad_norm": 2.0700576305389404, "learning_rate": 1.997982649166588e-05, "loss": 0.9317, "step": 3002 }, { "epoch": 0.4902657034406759, "grad_norm": 3.290846347808838, "learning_rate": 1.997980634006486e-05, "loss": 0.8254, "step": 3003 }, { "epoch": 0.49042896208318026, "grad_norm": 1.9789406061172485, "learning_rate": 1.9979786178414185e-05, "loss": 0.8504, "step": 3004 }, { "epoch": 0.49059222072568465, "grad_norm": 2.6427135467529297, "learning_rate": 1.9979766006713863e-05, "loss": 0.9686, "step": 3005 }, { "epoch": 0.49075547936818903, "grad_norm": 2.3769781589508057, "learning_rate": 1.9979745824963918e-05, "loss": 1.0175, "step": 3006 }, { "epoch": 0.49091873801069347, "grad_norm": 2.253631114959717, "learning_rate": 1.9979725633164375e-05, "loss": 0.9372, "step": 3007 }, { "epoch": 0.49108199665319785, "grad_norm": 1.912671685218811, "learning_rate": 1.997970543131525e-05, "loss": 0.7671, "step": 3008 }, { "epoch": 0.49124525529570223, "grad_norm": 1.8150739669799805, "learning_rate": 1.9979685219416563e-05, "loss": 0.727, "step": 3009 }, { "epoch": 0.4914085139382066, "grad_norm": 1.9659093618392944, "learning_rate": 1.997966499746834e-05, "loss": 0.7915, "step": 3010 }, { "epoch": 0.491571772580711, "grad_norm": 1.967844009399414, "learning_rate": 1.997964476547059e-05, "loss": 0.9199, "step": 3011 }, { "epoch": 0.4917350312232154, "grad_norm": 2.3013789653778076, "learning_rate": 1.9979624523423342e-05, "loss": 0.6664, "step": 3012 }, { "epoch": 0.49189828986571976, "grad_norm": 2.030122756958008, "learning_rate": 1.9979604271326617e-05, "loss": 0.6904, "step": 3013 }, { "epoch": 0.49206154850822414, "grad_norm": 2.4916930198669434, "learning_rate": 1.9979584009180435e-05, "loss": 0.8858, "step": 3014 }, { "epoch": 0.4922248071507285, "grad_norm": 2.3177926540374756, "learning_rate": 1.997956373698481e-05, "loss": 0.8444, "step": 3015 }, { "epoch": 0.4923880657932329, "grad_norm": 2.183171272277832, "learning_rate": 1.997954345473977e-05, "loss": 0.7691, "step": 3016 }, { "epoch": 0.4925513244357373, "grad_norm": 2.253082275390625, "learning_rate": 1.997952316244533e-05, "loss": 0.9144, "step": 3017 }, { "epoch": 0.49271458307824173, "grad_norm": 2.270782232284546, "learning_rate": 1.9979502860101517e-05, "loss": 0.6775, "step": 3018 }, { "epoch": 0.4928778417207461, "grad_norm": 2.40990948677063, "learning_rate": 1.9979482547708344e-05, "loss": 0.8789, "step": 3019 }, { "epoch": 0.4930411003632505, "grad_norm": 2.1739113330841064, "learning_rate": 1.9979462225265834e-05, "loss": 0.9664, "step": 3020 }, { "epoch": 0.4932043590057549, "grad_norm": 2.346156120300293, "learning_rate": 1.997944189277401e-05, "loss": 0.8365, "step": 3021 }, { "epoch": 0.49336761764825926, "grad_norm": 2.339733600616455, "learning_rate": 1.9979421550232892e-05, "loss": 0.9033, "step": 3022 }, { "epoch": 0.49353087629076364, "grad_norm": 1.6829936504364014, "learning_rate": 1.9979401197642497e-05, "loss": 0.6937, "step": 3023 }, { "epoch": 0.493694134933268, "grad_norm": 2.2345287799835205, "learning_rate": 1.9979380835002846e-05, "loss": 0.8525, "step": 3024 }, { "epoch": 0.4938573935757724, "grad_norm": 2.111694097518921, "learning_rate": 1.9979360462313965e-05, "loss": 0.9757, "step": 3025 }, { "epoch": 0.4940206522182768, "grad_norm": 2.213365077972412, "learning_rate": 1.9979340079575865e-05, "loss": 0.9656, "step": 3026 }, { "epoch": 0.4941839108607812, "grad_norm": 2.278170108795166, "learning_rate": 1.997931968678858e-05, "loss": 0.8585, "step": 3027 }, { "epoch": 0.49434716950328556, "grad_norm": 2.8380050659179688, "learning_rate": 1.9979299283952116e-05, "loss": 0.8634, "step": 3028 }, { "epoch": 0.49451042814579, "grad_norm": 2.406644582748413, "learning_rate": 1.9979278871066504e-05, "loss": 0.7971, "step": 3029 }, { "epoch": 0.4946736867882944, "grad_norm": 2.6808650493621826, "learning_rate": 1.997925844813176e-05, "loss": 0.9972, "step": 3030 }, { "epoch": 0.49483694543079876, "grad_norm": 1.8772902488708496, "learning_rate": 1.9979238015147904e-05, "loss": 0.8138, "step": 3031 }, { "epoch": 0.49500020407330314, "grad_norm": 1.8968791961669922, "learning_rate": 1.997921757211496e-05, "loss": 0.846, "step": 3032 }, { "epoch": 0.4951634627158075, "grad_norm": 2.2460570335388184, "learning_rate": 1.9979197119032946e-05, "loss": 0.9541, "step": 3033 }, { "epoch": 0.4953267213583119, "grad_norm": 2.087461233139038, "learning_rate": 1.997917665590188e-05, "loss": 0.8338, "step": 3034 }, { "epoch": 0.4954899800008163, "grad_norm": 2.6389570236206055, "learning_rate": 1.997915618272179e-05, "loss": 0.9143, "step": 3035 }, { "epoch": 0.49565323864332067, "grad_norm": 2.1088502407073975, "learning_rate": 1.9979135699492692e-05, "loss": 0.8716, "step": 3036 }, { "epoch": 0.49581649728582505, "grad_norm": 2.5845401287078857, "learning_rate": 1.9979115206214607e-05, "loss": 0.8624, "step": 3037 }, { "epoch": 0.49597975592832944, "grad_norm": 2.876234292984009, "learning_rate": 1.9979094702887554e-05, "loss": 0.9556, "step": 3038 }, { "epoch": 0.4961430145708338, "grad_norm": 2.5351595878601074, "learning_rate": 1.9979074189511556e-05, "loss": 0.9999, "step": 3039 }, { "epoch": 0.49630627321333826, "grad_norm": 2.285290002822876, "learning_rate": 1.9979053666086633e-05, "loss": 0.8573, "step": 3040 }, { "epoch": 0.49646953185584264, "grad_norm": 3.3460261821746826, "learning_rate": 1.9979033132612806e-05, "loss": 0.9797, "step": 3041 }, { "epoch": 0.496632790498347, "grad_norm": 1.9459445476531982, "learning_rate": 1.9979012589090092e-05, "loss": 0.8252, "step": 3042 }, { "epoch": 0.4967960491408514, "grad_norm": 2.526336669921875, "learning_rate": 1.997899203551852e-05, "loss": 1.1115, "step": 3043 }, { "epoch": 0.4969593077833558, "grad_norm": 2.222221851348877, "learning_rate": 1.9978971471898105e-05, "loss": 0.8229, "step": 3044 }, { "epoch": 0.49712256642586017, "grad_norm": 2.2877819538116455, "learning_rate": 1.9978950898228865e-05, "loss": 1.0483, "step": 3045 }, { "epoch": 0.49728582506836455, "grad_norm": 2.0507566928863525, "learning_rate": 1.9978930314510826e-05, "loss": 0.957, "step": 3046 }, { "epoch": 0.49744908371086893, "grad_norm": 1.8991285562515259, "learning_rate": 1.9978909720744005e-05, "loss": 0.8751, "step": 3047 }, { "epoch": 0.4976123423533733, "grad_norm": 2.0472488403320312, "learning_rate": 1.997888911692843e-05, "loss": 0.8776, "step": 3048 }, { "epoch": 0.4977756009958777, "grad_norm": 2.287355422973633, "learning_rate": 1.997886850306411e-05, "loss": 0.9827, "step": 3049 }, { "epoch": 0.4979388596383821, "grad_norm": 2.0967390537261963, "learning_rate": 1.9978847879151076e-05, "loss": 0.9019, "step": 3050 }, { "epoch": 0.4981021182808865, "grad_norm": 2.326050043106079, "learning_rate": 1.997882724518934e-05, "loss": 0.9828, "step": 3051 }, { "epoch": 0.4982653769233909, "grad_norm": 1.8875048160552979, "learning_rate": 1.9978806601178933e-05, "loss": 0.76, "step": 3052 }, { "epoch": 0.4984286355658953, "grad_norm": 2.5950992107391357, "learning_rate": 1.9978785947119866e-05, "loss": 0.8534, "step": 3053 }, { "epoch": 0.49859189420839967, "grad_norm": 1.9403152465820312, "learning_rate": 1.997876528301217e-05, "loss": 0.8539, "step": 3054 }, { "epoch": 0.49875515285090405, "grad_norm": 2.7445127964019775, "learning_rate": 1.9978744608855857e-05, "loss": 0.8984, "step": 3055 }, { "epoch": 0.49891841149340843, "grad_norm": 1.7534751892089844, "learning_rate": 1.997872392465095e-05, "loss": 0.7085, "step": 3056 }, { "epoch": 0.4990816701359128, "grad_norm": 2.113125801086426, "learning_rate": 1.997870323039747e-05, "loss": 0.8371, "step": 3057 }, { "epoch": 0.4992449287784172, "grad_norm": 2.7556309700012207, "learning_rate": 1.997868252609544e-05, "loss": 0.8921, "step": 3058 }, { "epoch": 0.4994081874209216, "grad_norm": 2.338942289352417, "learning_rate": 1.9978661811744876e-05, "loss": 0.8578, "step": 3059 }, { "epoch": 0.49957144606342596, "grad_norm": 2.310269832611084, "learning_rate": 1.9978641087345808e-05, "loss": 0.8858, "step": 3060 }, { "epoch": 0.49973470470593034, "grad_norm": 2.177549362182617, "learning_rate": 1.9978620352898246e-05, "loss": 0.746, "step": 3061 }, { "epoch": 0.4998979633484348, "grad_norm": 2.3217713832855225, "learning_rate": 1.9978599608402217e-05, "loss": 0.9706, "step": 3062 }, { "epoch": 0.5000612219909392, "grad_norm": 2.488421678543091, "learning_rate": 1.9978578853857744e-05, "loss": 0.9503, "step": 3063 }, { "epoch": 0.5002244806334435, "grad_norm": 2.4593520164489746, "learning_rate": 1.9978558089264842e-05, "loss": 0.7776, "step": 3064 }, { "epoch": 0.5003877392759479, "grad_norm": 2.1755521297454834, "learning_rate": 1.9978537314623537e-05, "loss": 0.8517, "step": 3065 }, { "epoch": 0.5005509979184523, "grad_norm": 2.511699914932251, "learning_rate": 1.9978516529933847e-05, "loss": 0.7657, "step": 3066 }, { "epoch": 0.5007142565609567, "grad_norm": 2.1570489406585693, "learning_rate": 1.997849573519579e-05, "loss": 0.899, "step": 3067 }, { "epoch": 0.5008775152034611, "grad_norm": 2.316589832305908, "learning_rate": 1.9978474930409396e-05, "loss": 1.0725, "step": 3068 }, { "epoch": 0.5010407738459655, "grad_norm": 2.532916784286499, "learning_rate": 1.9978454115574677e-05, "loss": 0.7456, "step": 3069 }, { "epoch": 0.5012040324884699, "grad_norm": 2.3918509483337402, "learning_rate": 1.9978433290691654e-05, "loss": 1.0135, "step": 3070 }, { "epoch": 0.5013672911309742, "grad_norm": 1.9708644151687622, "learning_rate": 1.9978412455760356e-05, "loss": 0.8468, "step": 3071 }, { "epoch": 0.5015305497734787, "grad_norm": 2.123650312423706, "learning_rate": 1.9978391610780798e-05, "loss": 0.8918, "step": 3072 }, { "epoch": 0.501693808415983, "grad_norm": 2.218646287918091, "learning_rate": 1.9978370755753004e-05, "loss": 0.7343, "step": 3073 }, { "epoch": 0.5018570670584874, "grad_norm": 2.8331501483917236, "learning_rate": 1.9978349890676993e-05, "loss": 0.8768, "step": 3074 }, { "epoch": 0.5020203257009918, "grad_norm": 1.8815797567367554, "learning_rate": 1.9978329015552783e-05, "loss": 0.794, "step": 3075 }, { "epoch": 0.5021835843434962, "grad_norm": 1.9058696031570435, "learning_rate": 1.99783081303804e-05, "loss": 0.746, "step": 3076 }, { "epoch": 0.5023468429860005, "grad_norm": 1.8353303670883179, "learning_rate": 1.9978287235159866e-05, "loss": 0.789, "step": 3077 }, { "epoch": 0.502510101628505, "grad_norm": 1.9611802101135254, "learning_rate": 1.9978266329891196e-05, "loss": 0.9944, "step": 3078 }, { "epoch": 0.5026733602710094, "grad_norm": 2.1930031776428223, "learning_rate": 1.997824541457442e-05, "loss": 0.9648, "step": 3079 }, { "epoch": 0.5028366189135137, "grad_norm": 3.010266065597534, "learning_rate": 1.9978224489209547e-05, "loss": 0.7474, "step": 3080 }, { "epoch": 0.5029998775560182, "grad_norm": 2.297976493835449, "learning_rate": 1.9978203553796605e-05, "loss": 0.82, "step": 3081 }, { "epoch": 0.5031631361985225, "grad_norm": 2.1804168224334717, "learning_rate": 1.9978182608335616e-05, "loss": 0.8221, "step": 3082 }, { "epoch": 0.5033263948410269, "grad_norm": 2.184976100921631, "learning_rate": 1.99781616528266e-05, "loss": 0.9469, "step": 3083 }, { "epoch": 0.5034896534835313, "grad_norm": 1.9245314598083496, "learning_rate": 1.997814068726958e-05, "loss": 0.8229, "step": 3084 }, { "epoch": 0.5036529121260357, "grad_norm": 2.4057416915893555, "learning_rate": 1.9978119711664573e-05, "loss": 0.8896, "step": 3085 }, { "epoch": 0.50381617076854, "grad_norm": 2.02895450592041, "learning_rate": 1.9978098726011603e-05, "loss": 0.814, "step": 3086 }, { "epoch": 0.5039794294110445, "grad_norm": 2.1146857738494873, "learning_rate": 1.9978077730310687e-05, "loss": 0.9083, "step": 3087 }, { "epoch": 0.5041426880535488, "grad_norm": 2.010622978210449, "learning_rate": 1.9978056724561853e-05, "loss": 0.7959, "step": 3088 }, { "epoch": 0.5043059466960532, "grad_norm": 1.9162832498550415, "learning_rate": 1.997803570876512e-05, "loss": 0.7084, "step": 3089 }, { "epoch": 0.5044692053385577, "grad_norm": 2.6650588512420654, "learning_rate": 1.9978014682920503e-05, "loss": 0.8471, "step": 3090 }, { "epoch": 0.504632463981062, "grad_norm": 2.136096477508545, "learning_rate": 1.997799364702803e-05, "loss": 0.7691, "step": 3091 }, { "epoch": 0.5047957226235664, "grad_norm": 1.9939074516296387, "learning_rate": 1.997797260108772e-05, "loss": 0.9131, "step": 3092 }, { "epoch": 0.5049589812660707, "grad_norm": 2.473628282546997, "learning_rate": 1.9977951545099593e-05, "loss": 0.9144, "step": 3093 }, { "epoch": 0.5051222399085752, "grad_norm": 2.4819302558898926, "learning_rate": 1.997793047906367e-05, "loss": 0.8941, "step": 3094 }, { "epoch": 0.5052854985510795, "grad_norm": 2.5031745433807373, "learning_rate": 1.997790940297998e-05, "loss": 0.8393, "step": 3095 }, { "epoch": 0.505448757193584, "grad_norm": 2.0246689319610596, "learning_rate": 1.997788831684853e-05, "loss": 0.6635, "step": 3096 }, { "epoch": 0.5056120158360883, "grad_norm": 2.317363977432251, "learning_rate": 1.9977867220669356e-05, "loss": 1.0788, "step": 3097 }, { "epoch": 0.5057752744785927, "grad_norm": 2.2687721252441406, "learning_rate": 1.997784611444247e-05, "loss": 0.953, "step": 3098 }, { "epoch": 0.505938533121097, "grad_norm": 2.5856900215148926, "learning_rate": 1.9977824998167894e-05, "loss": 0.8202, "step": 3099 }, { "epoch": 0.5061017917636015, "grad_norm": 2.1392757892608643, "learning_rate": 1.997780387184565e-05, "loss": 0.8015, "step": 3100 }, { "epoch": 0.5062650504061059, "grad_norm": 2.0395615100860596, "learning_rate": 1.9977782735475765e-05, "loss": 0.834, "step": 3101 }, { "epoch": 0.5064283090486102, "grad_norm": 2.0939781665802, "learning_rate": 1.9977761589058252e-05, "loss": 0.9268, "step": 3102 }, { "epoch": 0.5065915676911147, "grad_norm": 1.819284439086914, "learning_rate": 1.997774043259314e-05, "loss": 0.6608, "step": 3103 }, { "epoch": 0.506754826333619, "grad_norm": 2.2785489559173584, "learning_rate": 1.997771926608044e-05, "loss": 0.8759, "step": 3104 }, { "epoch": 0.5069180849761235, "grad_norm": 1.9993271827697754, "learning_rate": 1.9977698089520183e-05, "loss": 0.7594, "step": 3105 }, { "epoch": 0.5070813436186278, "grad_norm": 2.1211814880371094, "learning_rate": 1.9977676902912383e-05, "loss": 0.8613, "step": 3106 }, { "epoch": 0.5072446022611322, "grad_norm": 2.458709716796875, "learning_rate": 1.9977655706257068e-05, "loss": 0.8629, "step": 3107 }, { "epoch": 0.5074078609036365, "grad_norm": 1.8477967977523804, "learning_rate": 1.9977634499554255e-05, "loss": 0.7501, "step": 3108 }, { "epoch": 0.507571119546141, "grad_norm": 2.2111892700195312, "learning_rate": 1.9977613282803968e-05, "loss": 0.8437, "step": 3109 }, { "epoch": 0.5077343781886453, "grad_norm": 1.9481911659240723, "learning_rate": 1.9977592056006226e-05, "loss": 0.7037, "step": 3110 }, { "epoch": 0.5078976368311497, "grad_norm": 2.0714974403381348, "learning_rate": 1.997757081916105e-05, "loss": 0.8396, "step": 3111 }, { "epoch": 0.5080608954736542, "grad_norm": 2.2502570152282715, "learning_rate": 1.997754957226847e-05, "loss": 0.9439, "step": 3112 }, { "epoch": 0.5082241541161585, "grad_norm": 2.034342050552368, "learning_rate": 1.9977528315328492e-05, "loss": 0.6994, "step": 3113 }, { "epoch": 0.508387412758663, "grad_norm": 2.3898301124572754, "learning_rate": 1.997750704834115e-05, "loss": 1.0438, "step": 3114 }, { "epoch": 0.5085506714011673, "grad_norm": 2.1666438579559326, "learning_rate": 1.997748577130646e-05, "loss": 0.9007, "step": 3115 }, { "epoch": 0.5087139300436717, "grad_norm": 3.30202579498291, "learning_rate": 1.997746448422444e-05, "loss": 0.9208, "step": 3116 }, { "epoch": 0.508877188686176, "grad_norm": 2.3611483573913574, "learning_rate": 1.997744318709512e-05, "loss": 0.7812, "step": 3117 }, { "epoch": 0.5090404473286805, "grad_norm": 1.8441590070724487, "learning_rate": 1.997742187991852e-05, "loss": 0.8313, "step": 3118 }, { "epoch": 0.5092037059711848, "grad_norm": 2.158367156982422, "learning_rate": 1.9977400562694656e-05, "loss": 0.9435, "step": 3119 }, { "epoch": 0.5093669646136892, "grad_norm": 2.4378623962402344, "learning_rate": 1.9977379235423553e-05, "loss": 0.9167, "step": 3120 }, { "epoch": 0.5095302232561936, "grad_norm": 2.6020264625549316, "learning_rate": 1.997735789810523e-05, "loss": 1.0001, "step": 3121 }, { "epoch": 0.509693481898698, "grad_norm": 2.036524534225464, "learning_rate": 1.9977336550739716e-05, "loss": 0.911, "step": 3122 }, { "epoch": 0.5098567405412024, "grad_norm": 2.365675210952759, "learning_rate": 1.9977315193327017e-05, "loss": 0.9513, "step": 3123 }, { "epoch": 0.5100199991837068, "grad_norm": 2.202857732772827, "learning_rate": 1.9977293825867173e-05, "loss": 0.8564, "step": 3124 }, { "epoch": 0.5101832578262112, "grad_norm": 2.1707661151885986, "learning_rate": 1.9977272448360193e-05, "loss": 0.9094, "step": 3125 }, { "epoch": 0.5103465164687155, "grad_norm": 1.899152159690857, "learning_rate": 1.9977251060806102e-05, "loss": 0.801, "step": 3126 }, { "epoch": 0.51050977511122, "grad_norm": 1.845895767211914, "learning_rate": 1.9977229663204922e-05, "loss": 0.7105, "step": 3127 }, { "epoch": 0.5106730337537243, "grad_norm": 2.0829052925109863, "learning_rate": 1.9977208255556675e-05, "loss": 0.7011, "step": 3128 }, { "epoch": 0.5108362923962287, "grad_norm": 2.175828218460083, "learning_rate": 1.997718683786138e-05, "loss": 0.9134, "step": 3129 }, { "epoch": 0.5109995510387331, "grad_norm": 2.1157751083374023, "learning_rate": 1.9977165410119065e-05, "loss": 0.7965, "step": 3130 }, { "epoch": 0.5111628096812375, "grad_norm": 2.2744505405426025, "learning_rate": 1.9977143972329744e-05, "loss": 0.9872, "step": 3131 }, { "epoch": 0.5113260683237418, "grad_norm": 2.2209384441375732, "learning_rate": 1.9977122524493442e-05, "loss": 0.999, "step": 3132 }, { "epoch": 0.5114893269662463, "grad_norm": 2.137951612472534, "learning_rate": 1.997710106661018e-05, "loss": 1.0127, "step": 3133 }, { "epoch": 0.5116525856087507, "grad_norm": 2.148066997528076, "learning_rate": 1.9977079598679978e-05, "loss": 0.9146, "step": 3134 }, { "epoch": 0.511815844251255, "grad_norm": 2.263364791870117, "learning_rate": 1.9977058120702863e-05, "loss": 0.9476, "step": 3135 }, { "epoch": 0.5119791028937595, "grad_norm": 2.104234218597412, "learning_rate": 1.9977036632678853e-05, "loss": 0.8515, "step": 3136 }, { "epoch": 0.5121423615362638, "grad_norm": 2.1755144596099854, "learning_rate": 1.997701513460797e-05, "loss": 0.8653, "step": 3137 }, { "epoch": 0.5123056201787682, "grad_norm": 2.43241286277771, "learning_rate": 1.997699362649023e-05, "loss": 0.8202, "step": 3138 }, { "epoch": 0.5124688788212726, "grad_norm": 2.2741568088531494, "learning_rate": 1.9976972108325667e-05, "loss": 0.8328, "step": 3139 }, { "epoch": 0.512632137463777, "grad_norm": 2.2319533824920654, "learning_rate": 1.997695058011429e-05, "loss": 0.8935, "step": 3140 }, { "epoch": 0.5127953961062813, "grad_norm": 2.158381700515747, "learning_rate": 1.997692904185613e-05, "loss": 0.6791, "step": 3141 }, { "epoch": 0.5129586547487858, "grad_norm": 2.067470073699951, "learning_rate": 1.99769074935512e-05, "loss": 0.9704, "step": 3142 }, { "epoch": 0.5131219133912901, "grad_norm": 2.209883689880371, "learning_rate": 1.9976885935199533e-05, "loss": 0.8688, "step": 3143 }, { "epoch": 0.5132851720337945, "grad_norm": 2.204659938812256, "learning_rate": 1.9976864366801146e-05, "loss": 0.9793, "step": 3144 }, { "epoch": 0.513448430676299, "grad_norm": 2.3818485736846924, "learning_rate": 1.9976842788356054e-05, "loss": 0.9124, "step": 3145 }, { "epoch": 0.5136116893188033, "grad_norm": 2.0255539417266846, "learning_rate": 1.9976821199864287e-05, "loss": 0.9115, "step": 3146 }, { "epoch": 0.5137749479613077, "grad_norm": 2.194688081741333, "learning_rate": 1.9976799601325863e-05, "loss": 0.9187, "step": 3147 }, { "epoch": 0.5139382066038121, "grad_norm": 1.9303861856460571, "learning_rate": 1.9976777992740804e-05, "loss": 0.8442, "step": 3148 }, { "epoch": 0.5141014652463165, "grad_norm": 2.0928473472595215, "learning_rate": 1.997675637410913e-05, "loss": 0.7622, "step": 3149 }, { "epoch": 0.5142647238888208, "grad_norm": 2.3140709400177, "learning_rate": 1.997673474543087e-05, "loss": 1.0003, "step": 3150 }, { "epoch": 0.5144279825313253, "grad_norm": 2.0424208641052246, "learning_rate": 1.9976713106706036e-05, "loss": 0.8173, "step": 3151 }, { "epoch": 0.5145912411738296, "grad_norm": 2.2570807933807373, "learning_rate": 1.9976691457934655e-05, "loss": 0.9522, "step": 3152 }, { "epoch": 0.514754499816334, "grad_norm": 2.0205392837524414, "learning_rate": 1.997666979911675e-05, "loss": 0.7731, "step": 3153 }, { "epoch": 0.5149177584588384, "grad_norm": 2.14528489112854, "learning_rate": 1.997664813025234e-05, "loss": 0.8298, "step": 3154 }, { "epoch": 0.5150810171013428, "grad_norm": 2.021923065185547, "learning_rate": 1.997662645134145e-05, "loss": 1.0278, "step": 3155 }, { "epoch": 0.5152442757438472, "grad_norm": 1.8985143899917603, "learning_rate": 1.99766047623841e-05, "loss": 0.7724, "step": 3156 }, { "epoch": 0.5154075343863516, "grad_norm": 2.1301212310791016, "learning_rate": 1.997658306338031e-05, "loss": 0.817, "step": 3157 }, { "epoch": 0.515570793028856, "grad_norm": 2.3517141342163086, "learning_rate": 1.9976561354330105e-05, "loss": 0.9374, "step": 3158 }, { "epoch": 0.5157340516713603, "grad_norm": 2.257906436920166, "learning_rate": 1.99765396352335e-05, "loss": 0.8537, "step": 3159 }, { "epoch": 0.5158973103138648, "grad_norm": 2.4338629245758057, "learning_rate": 1.9976517906090528e-05, "loss": 1.1772, "step": 3160 }, { "epoch": 0.5160605689563691, "grad_norm": 2.1121225357055664, "learning_rate": 1.9976496166901205e-05, "loss": 0.9339, "step": 3161 }, { "epoch": 0.5162238275988735, "grad_norm": 2.0665366649627686, "learning_rate": 1.997647441766555e-05, "loss": 0.8609, "step": 3162 }, { "epoch": 0.5163870862413779, "grad_norm": 2.125631809234619, "learning_rate": 1.9976452658383588e-05, "loss": 1.0179, "step": 3163 }, { "epoch": 0.5165503448838823, "grad_norm": 2.4693562984466553, "learning_rate": 1.9976430889055342e-05, "loss": 0.9653, "step": 3164 }, { "epoch": 0.5167136035263867, "grad_norm": 1.9813436269760132, "learning_rate": 1.9976409109680835e-05, "loss": 0.7908, "step": 3165 }, { "epoch": 0.5168768621688911, "grad_norm": 2.1706442832946777, "learning_rate": 1.9976387320260083e-05, "loss": 0.9407, "step": 3166 }, { "epoch": 0.5170401208113955, "grad_norm": 1.9704300165176392, "learning_rate": 1.9976365520793114e-05, "loss": 0.7262, "step": 3167 }, { "epoch": 0.5172033794538998, "grad_norm": 2.4319238662719727, "learning_rate": 1.9976343711279947e-05, "loss": 1.1614, "step": 3168 }, { "epoch": 0.5173666380964043, "grad_norm": 2.3980228900909424, "learning_rate": 1.9976321891720604e-05, "loss": 0.9184, "step": 3169 }, { "epoch": 0.5175298967389086, "grad_norm": 2.2042312622070312, "learning_rate": 1.9976300062115112e-05, "loss": 0.907, "step": 3170 }, { "epoch": 0.517693155381413, "grad_norm": 2.05627703666687, "learning_rate": 1.997627822246348e-05, "loss": 0.7615, "step": 3171 }, { "epoch": 0.5178564140239174, "grad_norm": 2.013457775115967, "learning_rate": 1.9976256372765746e-05, "loss": 0.9695, "step": 3172 }, { "epoch": 0.5180196726664218, "grad_norm": 2.065033435821533, "learning_rate": 1.997623451302192e-05, "loss": 0.9746, "step": 3173 }, { "epoch": 0.5181829313089261, "grad_norm": 2.101802349090576, "learning_rate": 1.997621264323203e-05, "loss": 0.8493, "step": 3174 }, { "epoch": 0.5183461899514306, "grad_norm": 1.8131414651870728, "learning_rate": 1.9976190763396094e-05, "loss": 0.7428, "step": 3175 }, { "epoch": 0.518509448593935, "grad_norm": 2.0616135597229004, "learning_rate": 1.997616887351414e-05, "loss": 1.0356, "step": 3176 }, { "epoch": 0.5186727072364393, "grad_norm": 2.1442439556121826, "learning_rate": 1.9976146973586184e-05, "loss": 0.9392, "step": 3177 }, { "epoch": 0.5188359658789438, "grad_norm": 2.1017026901245117, "learning_rate": 1.9976125063612254e-05, "loss": 1.2462, "step": 3178 }, { "epoch": 0.5189992245214481, "grad_norm": 2.22216534614563, "learning_rate": 1.9976103143592368e-05, "loss": 0.8796, "step": 3179 }, { "epoch": 0.5191624831639525, "grad_norm": 2.0383503437042236, "learning_rate": 1.9976081213526545e-05, "loss": 0.8975, "step": 3180 }, { "epoch": 0.5193257418064569, "grad_norm": 2.0457003116607666, "learning_rate": 1.9976059273414813e-05, "loss": 0.6546, "step": 3181 }, { "epoch": 0.5194890004489613, "grad_norm": 2.2953717708587646, "learning_rate": 1.9976037323257193e-05, "loss": 0.9085, "step": 3182 }, { "epoch": 0.5196522590914656, "grad_norm": 1.8370320796966553, "learning_rate": 1.9976015363053708e-05, "loss": 0.8271, "step": 3183 }, { "epoch": 0.5198155177339701, "grad_norm": 1.757144570350647, "learning_rate": 1.9975993392804374e-05, "loss": 0.7603, "step": 3184 }, { "epoch": 0.5199787763764744, "grad_norm": 1.935219645500183, "learning_rate": 1.997597141250922e-05, "loss": 0.7571, "step": 3185 }, { "epoch": 0.5201420350189788, "grad_norm": 2.089667797088623, "learning_rate": 1.9975949422168265e-05, "loss": 0.9095, "step": 3186 }, { "epoch": 0.5203052936614833, "grad_norm": 1.9769450426101685, "learning_rate": 1.997592742178153e-05, "loss": 0.6965, "step": 3187 }, { "epoch": 0.5204685523039876, "grad_norm": 1.7493739128112793, "learning_rate": 1.997590541134904e-05, "loss": 0.6207, "step": 3188 }, { "epoch": 0.520631810946492, "grad_norm": 2.201674699783325, "learning_rate": 1.9975883390870817e-05, "loss": 0.8436, "step": 3189 }, { "epoch": 0.5207950695889964, "grad_norm": 1.9206316471099854, "learning_rate": 1.9975861360346877e-05, "loss": 0.8058, "step": 3190 }, { "epoch": 0.5209583282315008, "grad_norm": 2.2072269916534424, "learning_rate": 1.997583931977725e-05, "loss": 0.9863, "step": 3191 }, { "epoch": 0.5211215868740051, "grad_norm": 2.0386557579040527, "learning_rate": 1.9975817269161957e-05, "loss": 0.8646, "step": 3192 }, { "epoch": 0.5212848455165096, "grad_norm": 2.0219480991363525, "learning_rate": 1.9975795208501018e-05, "loss": 0.8849, "step": 3193 }, { "epoch": 0.5214481041590139, "grad_norm": 2.244020462036133, "learning_rate": 1.9975773137794458e-05, "loss": 0.9284, "step": 3194 }, { "epoch": 0.5216113628015183, "grad_norm": 2.0932788848876953, "learning_rate": 1.9975751057042294e-05, "loss": 0.8693, "step": 3195 }, { "epoch": 0.5217746214440226, "grad_norm": 2.072313070297241, "learning_rate": 1.9975728966244553e-05, "loss": 0.711, "step": 3196 }, { "epoch": 0.5219378800865271, "grad_norm": 1.811410903930664, "learning_rate": 1.9975706865401255e-05, "loss": 0.6794, "step": 3197 }, { "epoch": 0.5221011387290315, "grad_norm": 2.2907443046569824, "learning_rate": 1.9975684754512425e-05, "loss": 0.8816, "step": 3198 }, { "epoch": 0.5222643973715358, "grad_norm": 2.330420732498169, "learning_rate": 1.9975662633578078e-05, "loss": 0.7539, "step": 3199 }, { "epoch": 0.5224276560140403, "grad_norm": 1.6327462196350098, "learning_rate": 1.9975640502598243e-05, "loss": 0.5512, "step": 3200 }, { "epoch": 0.5225909146565446, "grad_norm": 2.2267472743988037, "learning_rate": 1.9975618361572942e-05, "loss": 0.7534, "step": 3201 }, { "epoch": 0.522754173299049, "grad_norm": 2.464442729949951, "learning_rate": 1.9975596210502197e-05, "loss": 1.0419, "step": 3202 }, { "epoch": 0.5229174319415534, "grad_norm": 2.3450839519500732, "learning_rate": 1.9975574049386027e-05, "loss": 0.7692, "step": 3203 }, { "epoch": 0.5230806905840578, "grad_norm": 2.1445021629333496, "learning_rate": 1.997555187822446e-05, "loss": 1.0271, "step": 3204 }, { "epoch": 0.5232439492265621, "grad_norm": 1.9949091672897339, "learning_rate": 1.997552969701751e-05, "loss": 0.8389, "step": 3205 }, { "epoch": 0.5234072078690666, "grad_norm": 2.0966928005218506, "learning_rate": 1.9975507505765207e-05, "loss": 0.7554, "step": 3206 }, { "epoch": 0.5235704665115709, "grad_norm": 1.6886711120605469, "learning_rate": 1.997548530446757e-05, "loss": 0.6343, "step": 3207 }, { "epoch": 0.5237337251540753, "grad_norm": 2.2185089588165283, "learning_rate": 1.9975463093124623e-05, "loss": 1.0363, "step": 3208 }, { "epoch": 0.5238969837965798, "grad_norm": 2.123464584350586, "learning_rate": 1.9975440871736387e-05, "loss": 0.8059, "step": 3209 }, { "epoch": 0.5240602424390841, "grad_norm": 1.9603670835494995, "learning_rate": 1.9975418640302885e-05, "loss": 0.6738, "step": 3210 }, { "epoch": 0.5242235010815885, "grad_norm": 1.8736332654953003, "learning_rate": 1.997539639882414e-05, "loss": 0.7986, "step": 3211 }, { "epoch": 0.5243867597240929, "grad_norm": 2.2341456413269043, "learning_rate": 1.9975374147300172e-05, "loss": 0.8755, "step": 3212 }, { "epoch": 0.5245500183665973, "grad_norm": 1.7159340381622314, "learning_rate": 1.9975351885731004e-05, "loss": 0.7989, "step": 3213 }, { "epoch": 0.5247132770091016, "grad_norm": 1.766485571861267, "learning_rate": 1.997532961411666e-05, "loss": 0.6922, "step": 3214 }, { "epoch": 0.5248765356516061, "grad_norm": 2.2090628147125244, "learning_rate": 1.997530733245716e-05, "loss": 0.9142, "step": 3215 }, { "epoch": 0.5250397942941104, "grad_norm": 2.0952234268188477, "learning_rate": 1.997528504075253e-05, "loss": 0.9603, "step": 3216 }, { "epoch": 0.5252030529366148, "grad_norm": 2.554661989212036, "learning_rate": 1.9975262739002793e-05, "loss": 0.8493, "step": 3217 }, { "epoch": 0.5253663115791192, "grad_norm": 1.922166347503662, "learning_rate": 1.9975240427207966e-05, "loss": 0.767, "step": 3218 }, { "epoch": 0.5255295702216236, "grad_norm": 2.4131007194519043, "learning_rate": 1.9975218105368074e-05, "loss": 0.8432, "step": 3219 }, { "epoch": 0.525692828864128, "grad_norm": 2.114917278289795, "learning_rate": 1.997519577348314e-05, "loss": 0.8398, "step": 3220 }, { "epoch": 0.5258560875066324, "grad_norm": 2.2072269916534424, "learning_rate": 1.9975173431553188e-05, "loss": 0.8589, "step": 3221 }, { "epoch": 0.5260193461491368, "grad_norm": 2.3732430934906006, "learning_rate": 1.9975151079578238e-05, "loss": 0.8723, "step": 3222 }, { "epoch": 0.5261826047916411, "grad_norm": 1.9268263578414917, "learning_rate": 1.9975128717558318e-05, "loss": 0.7422, "step": 3223 }, { "epoch": 0.5263458634341456, "grad_norm": 2.1012864112854004, "learning_rate": 1.997510634549344e-05, "loss": 0.9143, "step": 3224 }, { "epoch": 0.5265091220766499, "grad_norm": 2.1604623794555664, "learning_rate": 1.9975083963383634e-05, "loss": 0.8148, "step": 3225 }, { "epoch": 0.5266723807191543, "grad_norm": 2.0763907432556152, "learning_rate": 1.997506157122892e-05, "loss": 0.8783, "step": 3226 }, { "epoch": 0.5268356393616587, "grad_norm": 2.274029016494751, "learning_rate": 1.9975039169029325e-05, "loss": 1.0623, "step": 3227 }, { "epoch": 0.5269988980041631, "grad_norm": 2.1632800102233887, "learning_rate": 1.9975016756784868e-05, "loss": 0.9064, "step": 3228 }, { "epoch": 0.5271621566466674, "grad_norm": 1.9675068855285645, "learning_rate": 1.997499433449557e-05, "loss": 0.8688, "step": 3229 }, { "epoch": 0.5273254152891719, "grad_norm": 2.0734686851501465, "learning_rate": 1.9974971902161455e-05, "loss": 1.0222, "step": 3230 }, { "epoch": 0.5274886739316763, "grad_norm": 2.190962553024292, "learning_rate": 1.9974949459782547e-05, "loss": 0.8687, "step": 3231 }, { "epoch": 0.5276519325741806, "grad_norm": 2.356158971786499, "learning_rate": 1.9974927007358868e-05, "loss": 0.8767, "step": 3232 }, { "epoch": 0.5278151912166851, "grad_norm": 1.9763189554214478, "learning_rate": 1.997490454489044e-05, "loss": 0.815, "step": 3233 }, { "epoch": 0.5279784498591894, "grad_norm": 2.2305474281311035, "learning_rate": 1.9974882072377283e-05, "loss": 0.8647, "step": 3234 }, { "epoch": 0.5281417085016938, "grad_norm": 1.9637455940246582, "learning_rate": 1.9974859589819428e-05, "loss": 0.8085, "step": 3235 }, { "epoch": 0.5283049671441982, "grad_norm": 1.7090474367141724, "learning_rate": 1.9974837097216887e-05, "loss": 0.6628, "step": 3236 }, { "epoch": 0.5284682257867026, "grad_norm": 2.2208778858184814, "learning_rate": 1.997481459456969e-05, "loss": 0.7848, "step": 3237 }, { "epoch": 0.5286314844292069, "grad_norm": 2.433609962463379, "learning_rate": 1.9974792081877856e-05, "loss": 0.8681, "step": 3238 }, { "epoch": 0.5287947430717114, "grad_norm": 2.417046308517456, "learning_rate": 1.997476955914141e-05, "loss": 0.9345, "step": 3239 }, { "epoch": 0.5289580017142157, "grad_norm": 1.9577100276947021, "learning_rate": 1.9974747026360372e-05, "loss": 0.8468, "step": 3240 }, { "epoch": 0.5291212603567201, "grad_norm": 2.3044791221618652, "learning_rate": 1.9974724483534768e-05, "loss": 1.0128, "step": 3241 }, { "epoch": 0.5292845189992246, "grad_norm": 2.224580764770508, "learning_rate": 1.997470193066462e-05, "loss": 0.9383, "step": 3242 }, { "epoch": 0.5294477776417289, "grad_norm": 1.8904632329940796, "learning_rate": 1.997467936774995e-05, "loss": 0.6855, "step": 3243 }, { "epoch": 0.5296110362842333, "grad_norm": 2.261305809020996, "learning_rate": 1.9974656794790777e-05, "loss": 0.9835, "step": 3244 }, { "epoch": 0.5297742949267377, "grad_norm": 2.5073816776275635, "learning_rate": 1.997463421178713e-05, "loss": 1.045, "step": 3245 }, { "epoch": 0.5299375535692421, "grad_norm": 1.9100139141082764, "learning_rate": 1.997461161873903e-05, "loss": 0.8544, "step": 3246 }, { "epoch": 0.5301008122117464, "grad_norm": 2.0610477924346924, "learning_rate": 1.9974589015646494e-05, "loss": 0.9465, "step": 3247 }, { "epoch": 0.5302640708542509, "grad_norm": 2.2391514778137207, "learning_rate": 1.9974566402509556e-05, "loss": 1.0182, "step": 3248 }, { "epoch": 0.5304273294967552, "grad_norm": 2.4734034538269043, "learning_rate": 1.997454377932823e-05, "loss": 0.8235, "step": 3249 }, { "epoch": 0.5305905881392596, "grad_norm": 1.961127758026123, "learning_rate": 1.9974521146102535e-05, "loss": 0.8251, "step": 3250 }, { "epoch": 0.530753846781764, "grad_norm": 2.1797642707824707, "learning_rate": 1.9974498502832508e-05, "loss": 0.8809, "step": 3251 }, { "epoch": 0.5309171054242684, "grad_norm": 2.0917508602142334, "learning_rate": 1.9974475849518157e-05, "loss": 0.8194, "step": 3252 }, { "epoch": 0.5310803640667728, "grad_norm": 2.3419740200042725, "learning_rate": 1.9974453186159517e-05, "loss": 1.1124, "step": 3253 }, { "epoch": 0.5312436227092772, "grad_norm": 1.7141506671905518, "learning_rate": 1.9974430512756604e-05, "loss": 0.6427, "step": 3254 }, { "epoch": 0.5314068813517816, "grad_norm": 2.333385705947876, "learning_rate": 1.9974407829309442e-05, "loss": 0.8156, "step": 3255 }, { "epoch": 0.5315701399942859, "grad_norm": 2.105518102645874, "learning_rate": 1.9974385135818052e-05, "loss": 0.6289, "step": 3256 }, { "epoch": 0.5317333986367904, "grad_norm": 1.9439438581466675, "learning_rate": 1.997436243228246e-05, "loss": 0.7239, "step": 3257 }, { "epoch": 0.5318966572792947, "grad_norm": 1.995877742767334, "learning_rate": 1.9974339718702688e-05, "loss": 0.8766, "step": 3258 }, { "epoch": 0.5320599159217991, "grad_norm": 1.8711572885513306, "learning_rate": 1.9974316995078758e-05, "loss": 0.8147, "step": 3259 }, { "epoch": 0.5322231745643035, "grad_norm": 2.0966269969940186, "learning_rate": 1.9974294261410695e-05, "loss": 0.7724, "step": 3260 }, { "epoch": 0.5323864332068079, "grad_norm": 2.099099636077881, "learning_rate": 1.997427151769852e-05, "loss": 0.7857, "step": 3261 }, { "epoch": 0.5325496918493122, "grad_norm": 2.083033323287964, "learning_rate": 1.9974248763942255e-05, "loss": 0.834, "step": 3262 }, { "epoch": 0.5327129504918167, "grad_norm": 2.5886573791503906, "learning_rate": 1.9974226000141926e-05, "loss": 0.9564, "step": 3263 }, { "epoch": 0.5328762091343211, "grad_norm": 2.36087965965271, "learning_rate": 1.997420322629755e-05, "loss": 0.8632, "step": 3264 }, { "epoch": 0.5330394677768254, "grad_norm": 1.7981657981872559, "learning_rate": 1.9974180442409155e-05, "loss": 0.7335, "step": 3265 }, { "epoch": 0.5332027264193299, "grad_norm": 2.0183613300323486, "learning_rate": 1.9974157648476768e-05, "loss": 0.796, "step": 3266 }, { "epoch": 0.5333659850618342, "grad_norm": 2.4413630962371826, "learning_rate": 1.9974134844500402e-05, "loss": 0.848, "step": 3267 }, { "epoch": 0.5335292437043386, "grad_norm": 2.2178828716278076, "learning_rate": 1.997411203048009e-05, "loss": 0.9209, "step": 3268 }, { "epoch": 0.533692502346843, "grad_norm": 1.8141670227050781, "learning_rate": 1.9974089206415843e-05, "loss": 0.6697, "step": 3269 }, { "epoch": 0.5338557609893474, "grad_norm": 2.1023662090301514, "learning_rate": 1.9974066372307694e-05, "loss": 0.8987, "step": 3270 }, { "epoch": 0.5340190196318517, "grad_norm": 2.038614511489868, "learning_rate": 1.997404352815566e-05, "loss": 0.9465, "step": 3271 }, { "epoch": 0.5341822782743562, "grad_norm": 2.1336827278137207, "learning_rate": 1.997402067395977e-05, "loss": 1.1489, "step": 3272 }, { "epoch": 0.5343455369168605, "grad_norm": 1.8819364309310913, "learning_rate": 1.9973997809720045e-05, "loss": 0.8246, "step": 3273 }, { "epoch": 0.5345087955593649, "grad_norm": 2.729527711868286, "learning_rate": 1.9973974935436503e-05, "loss": 0.912, "step": 3274 }, { "epoch": 0.5346720542018694, "grad_norm": 2.132807970046997, "learning_rate": 1.9973952051109176e-05, "loss": 0.9008, "step": 3275 }, { "epoch": 0.5348353128443737, "grad_norm": 2.097180128097534, "learning_rate": 1.9973929156738078e-05, "loss": 0.9581, "step": 3276 }, { "epoch": 0.5349985714868781, "grad_norm": 2.1963400840759277, "learning_rate": 1.997390625232324e-05, "loss": 0.8222, "step": 3277 }, { "epoch": 0.5351618301293825, "grad_norm": 1.8913047313690186, "learning_rate": 1.9973883337864674e-05, "loss": 0.9136, "step": 3278 }, { "epoch": 0.5353250887718869, "grad_norm": 2.293325901031494, "learning_rate": 1.9973860413362418e-05, "loss": 0.8359, "step": 3279 }, { "epoch": 0.5354883474143912, "grad_norm": 2.0164906978607178, "learning_rate": 1.9973837478816483e-05, "loss": 0.8161, "step": 3280 }, { "epoch": 0.5356516060568957, "grad_norm": 1.9961150884628296, "learning_rate": 1.9973814534226895e-05, "loss": 0.8885, "step": 3281 }, { "epoch": 0.5358148646994, "grad_norm": 2.101891279220581, "learning_rate": 1.997379157959368e-05, "loss": 0.7739, "step": 3282 }, { "epoch": 0.5359781233419044, "grad_norm": 2.17558217048645, "learning_rate": 1.997376861491686e-05, "loss": 0.893, "step": 3283 }, { "epoch": 0.5361413819844087, "grad_norm": 2.166165351867676, "learning_rate": 1.9973745640196458e-05, "loss": 0.8295, "step": 3284 }, { "epoch": 0.5363046406269132, "grad_norm": 2.0578770637512207, "learning_rate": 1.9973722655432497e-05, "loss": 0.8408, "step": 3285 }, { "epoch": 0.5364678992694176, "grad_norm": 1.8979583978652954, "learning_rate": 1.9973699660625e-05, "loss": 0.8439, "step": 3286 }, { "epoch": 0.536631157911922, "grad_norm": 1.8617664575576782, "learning_rate": 1.9973676655773988e-05, "loss": 0.7567, "step": 3287 }, { "epoch": 0.5367944165544264, "grad_norm": 2.4459118843078613, "learning_rate": 1.9973653640879486e-05, "loss": 0.9096, "step": 3288 }, { "epoch": 0.5369576751969307, "grad_norm": 2.6515989303588867, "learning_rate": 1.997363061594152e-05, "loss": 0.8885, "step": 3289 }, { "epoch": 0.5371209338394352, "grad_norm": 3.1581332683563232, "learning_rate": 1.997360758096011e-05, "loss": 0.9964, "step": 3290 }, { "epoch": 0.5372841924819395, "grad_norm": 2.3488147258758545, "learning_rate": 1.9973584535935277e-05, "loss": 0.9653, "step": 3291 }, { "epoch": 0.5374474511244439, "grad_norm": 2.1410341262817383, "learning_rate": 1.997356148086705e-05, "loss": 0.7864, "step": 3292 }, { "epoch": 0.5376107097669482, "grad_norm": 2.797759771347046, "learning_rate": 1.9973538415755448e-05, "loss": 0.8193, "step": 3293 }, { "epoch": 0.5377739684094527, "grad_norm": 2.1440000534057617, "learning_rate": 1.99735153406005e-05, "loss": 0.8494, "step": 3294 }, { "epoch": 0.537937227051957, "grad_norm": 2.4268276691436768, "learning_rate": 1.9973492255402215e-05, "loss": 0.789, "step": 3295 }, { "epoch": 0.5381004856944615, "grad_norm": 1.965209722518921, "learning_rate": 1.9973469160160635e-05, "loss": 0.9018, "step": 3296 }, { "epoch": 0.5382637443369659, "grad_norm": 2.0752551555633545, "learning_rate": 1.997344605487577e-05, "loss": 0.8973, "step": 3297 }, { "epoch": 0.5384270029794702, "grad_norm": 2.0615603923797607, "learning_rate": 1.997342293954765e-05, "loss": 0.7705, "step": 3298 }, { "epoch": 0.5385902616219747, "grad_norm": 2.37383770942688, "learning_rate": 1.9973399814176293e-05, "loss": 0.7852, "step": 3299 }, { "epoch": 0.538753520264479, "grad_norm": 2.173271656036377, "learning_rate": 1.9973376678761726e-05, "loss": 0.9145, "step": 3300 }, { "epoch": 0.5389167789069834, "grad_norm": 2.4143214225769043, "learning_rate": 1.997335353330397e-05, "loss": 1.014, "step": 3301 }, { "epoch": 0.5390800375494877, "grad_norm": 2.2319889068603516, "learning_rate": 1.997333037780305e-05, "loss": 0.9775, "step": 3302 }, { "epoch": 0.5392432961919922, "grad_norm": 2.1811447143554688, "learning_rate": 1.997330721225899e-05, "loss": 0.8083, "step": 3303 }, { "epoch": 0.5394065548344965, "grad_norm": 2.50657057762146, "learning_rate": 1.9973284036671814e-05, "loss": 0.7693, "step": 3304 }, { "epoch": 0.539569813477001, "grad_norm": 2.503931999206543, "learning_rate": 1.9973260851041542e-05, "loss": 0.883, "step": 3305 }, { "epoch": 0.5397330721195053, "grad_norm": 2.1914658546447754, "learning_rate": 1.9973237655368197e-05, "loss": 1.0242, "step": 3306 }, { "epoch": 0.5398963307620097, "grad_norm": 2.1027865409851074, "learning_rate": 1.9973214449651806e-05, "loss": 0.9424, "step": 3307 }, { "epoch": 0.5400595894045142, "grad_norm": 1.8685299158096313, "learning_rate": 1.9973191233892393e-05, "loss": 0.9353, "step": 3308 }, { "epoch": 0.5402228480470185, "grad_norm": 1.9433985948562622, "learning_rate": 1.9973168008089977e-05, "loss": 0.7342, "step": 3309 }, { "epoch": 0.5403861066895229, "grad_norm": 2.336198091506958, "learning_rate": 1.997314477224458e-05, "loss": 1.0094, "step": 3310 }, { "epoch": 0.5405493653320272, "grad_norm": 2.2141501903533936, "learning_rate": 1.9973121526356236e-05, "loss": 0.8756, "step": 3311 }, { "epoch": 0.5407126239745317, "grad_norm": 2.107102394104004, "learning_rate": 1.9973098270424957e-05, "loss": 0.7689, "step": 3312 }, { "epoch": 0.540875882617036, "grad_norm": 2.1019861698150635, "learning_rate": 1.997307500445077e-05, "loss": 0.8061, "step": 3313 }, { "epoch": 0.5410391412595404, "grad_norm": 2.0350253582000732, "learning_rate": 1.99730517284337e-05, "loss": 0.9164, "step": 3314 }, { "epoch": 0.5412023999020448, "grad_norm": 1.9105134010314941, "learning_rate": 1.9973028442373768e-05, "loss": 0.6903, "step": 3315 }, { "epoch": 0.5413656585445492, "grad_norm": 2.3066296577453613, "learning_rate": 1.9973005146271003e-05, "loss": 0.9312, "step": 3316 }, { "epoch": 0.5415289171870535, "grad_norm": 2.2854549884796143, "learning_rate": 1.9972981840125422e-05, "loss": 0.8628, "step": 3317 }, { "epoch": 0.541692175829558, "grad_norm": 2.052642583847046, "learning_rate": 1.9972958523937053e-05, "loss": 0.882, "step": 3318 }, { "epoch": 0.5418554344720624, "grad_norm": 2.201667547225952, "learning_rate": 1.9972935197705915e-05, "loss": 0.7227, "step": 3319 }, { "epoch": 0.5420186931145667, "grad_norm": 1.9283092021942139, "learning_rate": 1.9972911861432033e-05, "loss": 0.9028, "step": 3320 }, { "epoch": 0.5421819517570712, "grad_norm": 2.266380548477173, "learning_rate": 1.9972888515115433e-05, "loss": 0.8383, "step": 3321 }, { "epoch": 0.5423452103995755, "grad_norm": 1.8276673555374146, "learning_rate": 1.9972865158756137e-05, "loss": 0.6794, "step": 3322 }, { "epoch": 0.5425084690420799, "grad_norm": 2.2632904052734375, "learning_rate": 1.997284179235417e-05, "loss": 0.9747, "step": 3323 }, { "epoch": 0.5426717276845843, "grad_norm": 2.114713668823242, "learning_rate": 1.997281841590955e-05, "loss": 0.7752, "step": 3324 }, { "epoch": 0.5428349863270887, "grad_norm": 1.948089361190796, "learning_rate": 1.997279502942231e-05, "loss": 0.8183, "step": 3325 }, { "epoch": 0.542998244969593, "grad_norm": 2.033433437347412, "learning_rate": 1.997277163289246e-05, "loss": 0.8493, "step": 3326 }, { "epoch": 0.5431615036120975, "grad_norm": 2.0917389392852783, "learning_rate": 1.997274822632004e-05, "loss": 0.9189, "step": 3327 }, { "epoch": 0.5433247622546018, "grad_norm": 2.043719530105591, "learning_rate": 1.997272480970506e-05, "loss": 0.8304, "step": 3328 }, { "epoch": 0.5434880208971062, "grad_norm": 2.3795104026794434, "learning_rate": 1.9972701383047552e-05, "loss": 0.8467, "step": 3329 }, { "epoch": 0.5436512795396107, "grad_norm": 2.2582433223724365, "learning_rate": 1.9972677946347536e-05, "loss": 0.8613, "step": 3330 }, { "epoch": 0.543814538182115, "grad_norm": 1.813295841217041, "learning_rate": 1.9972654499605034e-05, "loss": 0.7642, "step": 3331 }, { "epoch": 0.5439777968246194, "grad_norm": 2.236614227294922, "learning_rate": 1.997263104282007e-05, "loss": 0.8329, "step": 3332 }, { "epoch": 0.5441410554671238, "grad_norm": 1.9925373792648315, "learning_rate": 1.9972607575992672e-05, "loss": 0.9528, "step": 3333 }, { "epoch": 0.5443043141096282, "grad_norm": 1.9964669942855835, "learning_rate": 1.997258409912286e-05, "loss": 0.7323, "step": 3334 }, { "epoch": 0.5444675727521325, "grad_norm": 2.1535727977752686, "learning_rate": 1.997256061221066e-05, "loss": 0.8805, "step": 3335 }, { "epoch": 0.544630831394637, "grad_norm": 2.2644340991973877, "learning_rate": 1.9972537115256095e-05, "loss": 0.9324, "step": 3336 }, { "epoch": 0.5447940900371413, "grad_norm": 2.2130517959594727, "learning_rate": 1.9972513608259185e-05, "loss": 0.9007, "step": 3337 }, { "epoch": 0.5449573486796457, "grad_norm": 2.4157769680023193, "learning_rate": 1.9972490091219954e-05, "loss": 1.0026, "step": 3338 }, { "epoch": 0.5451206073221501, "grad_norm": 2.194753885269165, "learning_rate": 1.9972466564138433e-05, "loss": 0.9746, "step": 3339 }, { "epoch": 0.5452838659646545, "grad_norm": 2.1495954990386963, "learning_rate": 1.997244302701464e-05, "loss": 0.8154, "step": 3340 }, { "epoch": 0.5454471246071589, "grad_norm": 1.6893572807312012, "learning_rate": 1.9972419479848597e-05, "loss": 0.7939, "step": 3341 }, { "epoch": 0.5456103832496633, "grad_norm": 1.875591516494751, "learning_rate": 1.997239592264033e-05, "loss": 0.7619, "step": 3342 }, { "epoch": 0.5457736418921677, "grad_norm": 1.9269273281097412, "learning_rate": 1.997237235538987e-05, "loss": 0.7747, "step": 3343 }, { "epoch": 0.545936900534672, "grad_norm": 1.987618327140808, "learning_rate": 1.9972348778097225e-05, "loss": 0.8482, "step": 3344 }, { "epoch": 0.5461001591771765, "grad_norm": 2.366896152496338, "learning_rate": 1.997232519076243e-05, "loss": 0.9536, "step": 3345 }, { "epoch": 0.5462634178196808, "grad_norm": 2.162234306335449, "learning_rate": 1.9972301593385507e-05, "loss": 0.8529, "step": 3346 }, { "epoch": 0.5464266764621852, "grad_norm": 1.7913020849227905, "learning_rate": 1.9972277985966482e-05, "loss": 0.7579, "step": 3347 }, { "epoch": 0.5465899351046896, "grad_norm": 1.8930449485778809, "learning_rate": 1.997225436850537e-05, "loss": 0.7405, "step": 3348 }, { "epoch": 0.546753193747194, "grad_norm": 2.0314600467681885, "learning_rate": 1.9972230741002204e-05, "loss": 0.9037, "step": 3349 }, { "epoch": 0.5469164523896983, "grad_norm": 2.158505916595459, "learning_rate": 1.9972207103457e-05, "loss": 0.7989, "step": 3350 }, { "epoch": 0.5470797110322028, "grad_norm": 2.019305467605591, "learning_rate": 1.9972183455869793e-05, "loss": 0.7828, "step": 3351 }, { "epoch": 0.5472429696747072, "grad_norm": 2.5652377605438232, "learning_rate": 1.9972159798240596e-05, "loss": 0.9107, "step": 3352 }, { "epoch": 0.5474062283172115, "grad_norm": 1.9729971885681152, "learning_rate": 1.9972136130569438e-05, "loss": 0.8646, "step": 3353 }, { "epoch": 0.547569486959716, "grad_norm": 1.9854708909988403, "learning_rate": 1.997211245285634e-05, "loss": 0.8254, "step": 3354 }, { "epoch": 0.5477327456022203, "grad_norm": 2.113562822341919, "learning_rate": 1.9972088765101326e-05, "loss": 0.7603, "step": 3355 }, { "epoch": 0.5478960042447247, "grad_norm": 2.289031744003296, "learning_rate": 1.9972065067304424e-05, "loss": 0.9278, "step": 3356 }, { "epoch": 0.5480592628872291, "grad_norm": 2.354802131652832, "learning_rate": 1.9972041359465658e-05, "loss": 0.7929, "step": 3357 }, { "epoch": 0.5482225215297335, "grad_norm": 2.2339534759521484, "learning_rate": 1.9972017641585043e-05, "loss": 0.9376, "step": 3358 }, { "epoch": 0.5483857801722378, "grad_norm": 2.3014743328094482, "learning_rate": 1.997199391366261e-05, "loss": 0.8999, "step": 3359 }, { "epoch": 0.5485490388147423, "grad_norm": 2.0174829959869385, "learning_rate": 1.9971970175698387e-05, "loss": 0.7391, "step": 3360 }, { "epoch": 0.5487122974572466, "grad_norm": 2.2106211185455322, "learning_rate": 1.9971946427692387e-05, "loss": 0.9314, "step": 3361 }, { "epoch": 0.548875556099751, "grad_norm": 2.4365060329437256, "learning_rate": 1.9971922669644642e-05, "loss": 1.0066, "step": 3362 }, { "epoch": 0.5490388147422555, "grad_norm": 2.1324260234832764, "learning_rate": 1.9971898901555173e-05, "loss": 0.7237, "step": 3363 }, { "epoch": 0.5492020733847598, "grad_norm": 2.3569600582122803, "learning_rate": 1.9971875123424006e-05, "loss": 0.9277, "step": 3364 }, { "epoch": 0.5493653320272642, "grad_norm": 2.323190689086914, "learning_rate": 1.9971851335251162e-05, "loss": 0.8148, "step": 3365 }, { "epoch": 0.5495285906697686, "grad_norm": 2.846837282180786, "learning_rate": 1.9971827537036664e-05, "loss": 1.0011, "step": 3366 }, { "epoch": 0.549691849312273, "grad_norm": 2.1637978553771973, "learning_rate": 1.997180372878054e-05, "loss": 0.8356, "step": 3367 }, { "epoch": 0.5498551079547773, "grad_norm": 2.0121238231658936, "learning_rate": 1.997177991048281e-05, "loss": 0.7152, "step": 3368 }, { "epoch": 0.5500183665972818, "grad_norm": 2.341517210006714, "learning_rate": 1.9971756082143504e-05, "loss": 0.8444, "step": 3369 }, { "epoch": 0.5501816252397861, "grad_norm": 2.399437427520752, "learning_rate": 1.9971732243762643e-05, "loss": 0.908, "step": 3370 }, { "epoch": 0.5503448838822905, "grad_norm": 2.1694765090942383, "learning_rate": 1.9971708395340247e-05, "loss": 0.8396, "step": 3371 }, { "epoch": 0.5505081425247949, "grad_norm": 2.0025227069854736, "learning_rate": 1.9971684536876347e-05, "loss": 0.7701, "step": 3372 }, { "epoch": 0.5506714011672993, "grad_norm": 2.12707257270813, "learning_rate": 1.997166066837096e-05, "loss": 0.9347, "step": 3373 }, { "epoch": 0.5508346598098037, "grad_norm": 2.4536960124969482, "learning_rate": 1.9971636789824114e-05, "loss": 0.925, "step": 3374 }, { "epoch": 0.550997918452308, "grad_norm": 2.416557550430298, "learning_rate": 1.9971612901235832e-05, "loss": 0.9445, "step": 3375 }, { "epoch": 0.5511611770948125, "grad_norm": 1.9669743776321411, "learning_rate": 1.997158900260614e-05, "loss": 0.7123, "step": 3376 }, { "epoch": 0.5513244357373168, "grad_norm": 1.9152058362960815, "learning_rate": 1.997156509393506e-05, "loss": 0.9422, "step": 3377 }, { "epoch": 0.5514876943798213, "grad_norm": 2.189948558807373, "learning_rate": 1.9971541175222618e-05, "loss": 1.3452, "step": 3378 }, { "epoch": 0.5516509530223256, "grad_norm": 2.670701503753662, "learning_rate": 1.9971517246468834e-05, "loss": 0.9336, "step": 3379 }, { "epoch": 0.55181421166483, "grad_norm": 2.3493216037750244, "learning_rate": 1.9971493307673735e-05, "loss": 1.0201, "step": 3380 }, { "epoch": 0.5519774703073344, "grad_norm": 1.6180508136749268, "learning_rate": 1.9971469358837348e-05, "loss": 0.6577, "step": 3381 }, { "epoch": 0.5521407289498388, "grad_norm": 2.2462778091430664, "learning_rate": 1.997144539995969e-05, "loss": 0.8901, "step": 3382 }, { "epoch": 0.5523039875923431, "grad_norm": 1.9646728038787842, "learning_rate": 1.9971421431040793e-05, "loss": 0.9873, "step": 3383 }, { "epoch": 0.5524672462348476, "grad_norm": 1.9699361324310303, "learning_rate": 1.9971397452080673e-05, "loss": 0.8236, "step": 3384 }, { "epoch": 0.552630504877352, "grad_norm": 2.031480550765991, "learning_rate": 1.9971373463079363e-05, "loss": 0.8913, "step": 3385 }, { "epoch": 0.5527937635198563, "grad_norm": 2.3946778774261475, "learning_rate": 1.997134946403688e-05, "loss": 0.919, "step": 3386 }, { "epoch": 0.5529570221623608, "grad_norm": 2.1686277389526367, "learning_rate": 1.997132545495325e-05, "loss": 0.9801, "step": 3387 }, { "epoch": 0.5531202808048651, "grad_norm": 2.0874907970428467, "learning_rate": 1.99713014358285e-05, "loss": 0.9114, "step": 3388 }, { "epoch": 0.5532835394473695, "grad_norm": 2.0704283714294434, "learning_rate": 1.997127740666265e-05, "loss": 0.8794, "step": 3389 }, { "epoch": 0.5534467980898738, "grad_norm": 2.326942205429077, "learning_rate": 1.9971253367455728e-05, "loss": 1.0979, "step": 3390 }, { "epoch": 0.5536100567323783, "grad_norm": 1.8089680671691895, "learning_rate": 1.9971229318207753e-05, "loss": 0.8726, "step": 3391 }, { "epoch": 0.5537733153748826, "grad_norm": 2.3091816902160645, "learning_rate": 1.9971205258918758e-05, "loss": 1.4083, "step": 3392 }, { "epoch": 0.553936574017387, "grad_norm": 1.720828652381897, "learning_rate": 1.9971181189588756e-05, "loss": 0.7107, "step": 3393 }, { "epoch": 0.5540998326598914, "grad_norm": 1.8170922994613647, "learning_rate": 1.9971157110217782e-05, "loss": 0.7238, "step": 3394 }, { "epoch": 0.5542630913023958, "grad_norm": 2.1354002952575684, "learning_rate": 1.9971133020805856e-05, "loss": 0.8321, "step": 3395 }, { "epoch": 0.5544263499449003, "grad_norm": 2.0781266689300537, "learning_rate": 1.9971108921352998e-05, "loss": 0.6815, "step": 3396 }, { "epoch": 0.5545896085874046, "grad_norm": 2.0168750286102295, "learning_rate": 1.997108481185924e-05, "loss": 0.7504, "step": 3397 }, { "epoch": 0.554752867229909, "grad_norm": 2.2027952671051025, "learning_rate": 1.99710606923246e-05, "loss": 0.795, "step": 3398 }, { "epoch": 0.5549161258724133, "grad_norm": 2.126131057739258, "learning_rate": 1.99710365627491e-05, "loss": 0.7701, "step": 3399 }, { "epoch": 0.5550793845149178, "grad_norm": 1.6917164325714111, "learning_rate": 1.9971012423132776e-05, "loss": 0.7215, "step": 3400 }, { "epoch": 0.5552426431574221, "grad_norm": 1.962458848953247, "learning_rate": 1.9970988273475642e-05, "loss": 0.7456, "step": 3401 }, { "epoch": 0.5554059017999265, "grad_norm": 2.2335526943206787, "learning_rate": 1.9970964113777725e-05, "loss": 0.8518, "step": 3402 }, { "epoch": 0.5555691604424309, "grad_norm": 2.119255781173706, "learning_rate": 1.9970939944039052e-05, "loss": 0.9137, "step": 3403 }, { "epoch": 0.5557324190849353, "grad_norm": 2.1925292015075684, "learning_rate": 1.9970915764259644e-05, "loss": 0.7889, "step": 3404 }, { "epoch": 0.5558956777274396, "grad_norm": 2.054605484008789, "learning_rate": 1.9970891574439524e-05, "loss": 0.8136, "step": 3405 }, { "epoch": 0.5560589363699441, "grad_norm": 2.2178456783294678, "learning_rate": 1.9970867374578724e-05, "loss": 0.8551, "step": 3406 }, { "epoch": 0.5562221950124485, "grad_norm": 1.9237934350967407, "learning_rate": 1.9970843164677262e-05, "loss": 0.6965, "step": 3407 }, { "epoch": 0.5563854536549528, "grad_norm": 2.1611223220825195, "learning_rate": 1.997081894473516e-05, "loss": 0.834, "step": 3408 }, { "epoch": 0.5565487122974573, "grad_norm": 2.290708541870117, "learning_rate": 1.9970794714752448e-05, "loss": 0.8227, "step": 3409 }, { "epoch": 0.5567119709399616, "grad_norm": 2.1337385177612305, "learning_rate": 1.9970770474729146e-05, "loss": 0.8777, "step": 3410 }, { "epoch": 0.556875229582466, "grad_norm": 2.221975564956665, "learning_rate": 1.9970746224665282e-05, "loss": 0.9283, "step": 3411 }, { "epoch": 0.5570384882249704, "grad_norm": 2.21610426902771, "learning_rate": 1.9970721964560882e-05, "loss": 0.8254, "step": 3412 }, { "epoch": 0.5572017468674748, "grad_norm": 2.266871452331543, "learning_rate": 1.9970697694415967e-05, "loss": 0.9072, "step": 3413 }, { "epoch": 0.5573650055099791, "grad_norm": 2.2100205421447754, "learning_rate": 1.997067341423056e-05, "loss": 0.8356, "step": 3414 }, { "epoch": 0.5575282641524836, "grad_norm": 2.517849922180176, "learning_rate": 1.9970649124004687e-05, "loss": 1.0958, "step": 3415 }, { "epoch": 0.557691522794988, "grad_norm": 1.908424973487854, "learning_rate": 1.9970624823738376e-05, "loss": 0.719, "step": 3416 }, { "epoch": 0.5578547814374923, "grad_norm": 2.277839183807373, "learning_rate": 1.9970600513431645e-05, "loss": 0.9011, "step": 3417 }, { "epoch": 0.5580180400799968, "grad_norm": 1.965239405632019, "learning_rate": 1.9970576193084524e-05, "loss": 0.8449, "step": 3418 }, { "epoch": 0.5581812987225011, "grad_norm": 3.430443048477173, "learning_rate": 1.9970551862697037e-05, "loss": 1.0985, "step": 3419 }, { "epoch": 0.5583445573650055, "grad_norm": 2.2285091876983643, "learning_rate": 1.9970527522269204e-05, "loss": 0.7707, "step": 3420 }, { "epoch": 0.5585078160075099, "grad_norm": 1.9810905456542969, "learning_rate": 1.9970503171801053e-05, "loss": 0.8253, "step": 3421 }, { "epoch": 0.5586710746500143, "grad_norm": 2.501267671585083, "learning_rate": 1.997047881129261e-05, "loss": 0.9598, "step": 3422 }, { "epoch": 0.5588343332925186, "grad_norm": 2.3219518661499023, "learning_rate": 1.9970454440743893e-05, "loss": 0.9445, "step": 3423 }, { "epoch": 0.5589975919350231, "grad_norm": 2.236375570297241, "learning_rate": 1.9970430060154938e-05, "loss": 0.9506, "step": 3424 }, { "epoch": 0.5591608505775274, "grad_norm": 2.064094066619873, "learning_rate": 1.9970405669525756e-05, "loss": 0.7553, "step": 3425 }, { "epoch": 0.5593241092200318, "grad_norm": 2.0976550579071045, "learning_rate": 1.997038126885638e-05, "loss": 0.8452, "step": 3426 }, { "epoch": 0.5594873678625363, "grad_norm": 2.2344675064086914, "learning_rate": 1.9970356858146833e-05, "loss": 1.004, "step": 3427 }, { "epoch": 0.5596506265050406, "grad_norm": 1.7670211791992188, "learning_rate": 1.997033243739714e-05, "loss": 0.6927, "step": 3428 }, { "epoch": 0.559813885147545, "grad_norm": 1.8610122203826904, "learning_rate": 1.9970308006607327e-05, "loss": 0.8392, "step": 3429 }, { "epoch": 0.5599771437900494, "grad_norm": 2.0461933612823486, "learning_rate": 1.997028356577741e-05, "loss": 0.8206, "step": 3430 }, { "epoch": 0.5601404024325538, "grad_norm": 2.8661203384399414, "learning_rate": 1.9970259114907428e-05, "loss": 0.8657, "step": 3431 }, { "epoch": 0.5603036610750581, "grad_norm": 1.7290693521499634, "learning_rate": 1.9970234653997395e-05, "loss": 0.7571, "step": 3432 }, { "epoch": 0.5604669197175626, "grad_norm": 1.7906416654586792, "learning_rate": 1.9970210183047335e-05, "loss": 0.8287, "step": 3433 }, { "epoch": 0.5606301783600669, "grad_norm": 2.152283191680908, "learning_rate": 1.9970185702057278e-05, "loss": 0.8399, "step": 3434 }, { "epoch": 0.5607934370025713, "grad_norm": 2.637589454650879, "learning_rate": 1.9970161211027248e-05, "loss": 0.8584, "step": 3435 }, { "epoch": 0.5609566956450757, "grad_norm": 1.7848567962646484, "learning_rate": 1.9970136709957265e-05, "loss": 0.6849, "step": 3436 }, { "epoch": 0.5611199542875801, "grad_norm": 2.3367159366607666, "learning_rate": 1.997011219884736e-05, "loss": 0.7873, "step": 3437 }, { "epoch": 0.5612832129300845, "grad_norm": 2.27604341506958, "learning_rate": 1.997008767769755e-05, "loss": 0.9516, "step": 3438 }, { "epoch": 0.5614464715725889, "grad_norm": 2.1098456382751465, "learning_rate": 1.9970063146507873e-05, "loss": 0.8121, "step": 3439 }, { "epoch": 0.5616097302150933, "grad_norm": 2.8377089500427246, "learning_rate": 1.997003860527834e-05, "loss": 1.0012, "step": 3440 }, { "epoch": 0.5617729888575976, "grad_norm": 1.9472919702529907, "learning_rate": 1.997001405400898e-05, "loss": 0.7443, "step": 3441 }, { "epoch": 0.5619362475001021, "grad_norm": 2.575282573699951, "learning_rate": 1.996998949269982e-05, "loss": 1.0491, "step": 3442 }, { "epoch": 0.5620995061426064, "grad_norm": 1.89371657371521, "learning_rate": 1.996996492135088e-05, "loss": 0.7521, "step": 3443 }, { "epoch": 0.5622627647851108, "grad_norm": 2.2644269466400146, "learning_rate": 1.9969940339962192e-05, "loss": 0.9906, "step": 3444 }, { "epoch": 0.5624260234276152, "grad_norm": 1.9731885194778442, "learning_rate": 1.9969915748533774e-05, "loss": 0.873, "step": 3445 }, { "epoch": 0.5625892820701196, "grad_norm": 2.233092784881592, "learning_rate": 1.9969891147065657e-05, "loss": 0.8655, "step": 3446 }, { "epoch": 0.5627525407126239, "grad_norm": 1.8233085870742798, "learning_rate": 1.996986653555786e-05, "loss": 0.8324, "step": 3447 }, { "epoch": 0.5629157993551284, "grad_norm": 1.8706403970718384, "learning_rate": 1.996984191401041e-05, "loss": 0.7154, "step": 3448 }, { "epoch": 0.5630790579976328, "grad_norm": 1.9819782972335815, "learning_rate": 1.9969817282423332e-05, "loss": 0.8228, "step": 3449 }, { "epoch": 0.5632423166401371, "grad_norm": 1.7624868154525757, "learning_rate": 1.996979264079665e-05, "loss": 0.827, "step": 3450 }, { "epoch": 0.5634055752826416, "grad_norm": 2.0551652908325195, "learning_rate": 1.996976798913039e-05, "loss": 0.8641, "step": 3451 }, { "epoch": 0.5635688339251459, "grad_norm": 2.5862843990325928, "learning_rate": 1.9969743327424574e-05, "loss": 1.0906, "step": 3452 }, { "epoch": 0.5637320925676503, "grad_norm": 1.7549127340316772, "learning_rate": 1.9969718655679235e-05, "loss": 0.6891, "step": 3453 }, { "epoch": 0.5638953512101547, "grad_norm": 2.14625883102417, "learning_rate": 1.9969693973894387e-05, "loss": 0.8222, "step": 3454 }, { "epoch": 0.5640586098526591, "grad_norm": 2.0105526447296143, "learning_rate": 1.996966928207006e-05, "loss": 0.8607, "step": 3455 }, { "epoch": 0.5642218684951634, "grad_norm": 1.8692042827606201, "learning_rate": 1.996964458020628e-05, "loss": 0.6152, "step": 3456 }, { "epoch": 0.5643851271376679, "grad_norm": 2.189004421234131, "learning_rate": 1.996961986830307e-05, "loss": 0.9449, "step": 3457 }, { "epoch": 0.5645483857801722, "grad_norm": 2.206759452819824, "learning_rate": 1.996959514636046e-05, "loss": 0.8098, "step": 3458 }, { "epoch": 0.5647116444226766, "grad_norm": 1.8337886333465576, "learning_rate": 1.9969570414378463e-05, "loss": 0.7908, "step": 3459 }, { "epoch": 0.5648749030651811, "grad_norm": 2.0800657272338867, "learning_rate": 1.9969545672357117e-05, "loss": 0.8759, "step": 3460 }, { "epoch": 0.5650381617076854, "grad_norm": 2.2918269634246826, "learning_rate": 1.9969520920296436e-05, "loss": 0.8315, "step": 3461 }, { "epoch": 0.5652014203501898, "grad_norm": 2.3565824031829834, "learning_rate": 1.9969496158196452e-05, "loss": 0.8446, "step": 3462 }, { "epoch": 0.5653646789926942, "grad_norm": 2.054570436477661, "learning_rate": 1.996947138605719e-05, "loss": 0.8475, "step": 3463 }, { "epoch": 0.5655279376351986, "grad_norm": 1.9744058847427368, "learning_rate": 1.9969446603878673e-05, "loss": 0.8297, "step": 3464 }, { "epoch": 0.5656911962777029, "grad_norm": 2.1881208419799805, "learning_rate": 1.9969421811660922e-05, "loss": 1.0169, "step": 3465 }, { "epoch": 0.5658544549202074, "grad_norm": 1.7953952550888062, "learning_rate": 1.9969397009403967e-05, "loss": 0.7628, "step": 3466 }, { "epoch": 0.5660177135627117, "grad_norm": 2.4982221126556396, "learning_rate": 1.9969372197107835e-05, "loss": 0.8465, "step": 3467 }, { "epoch": 0.5661809722052161, "grad_norm": 2.2998929023742676, "learning_rate": 1.9969347374772547e-05, "loss": 1.0015, "step": 3468 }, { "epoch": 0.5663442308477205, "grad_norm": 1.9936132431030273, "learning_rate": 1.9969322542398126e-05, "loss": 0.7333, "step": 3469 }, { "epoch": 0.5665074894902249, "grad_norm": 1.7774120569229126, "learning_rate": 1.9969297699984606e-05, "loss": 0.7523, "step": 3470 }, { "epoch": 0.5666707481327293, "grad_norm": 2.0726122856140137, "learning_rate": 1.9969272847532e-05, "loss": 0.8299, "step": 3471 }, { "epoch": 0.5668340067752337, "grad_norm": 2.009347677230835, "learning_rate": 1.996924798504034e-05, "loss": 1.0534, "step": 3472 }, { "epoch": 0.5669972654177381, "grad_norm": 1.7794783115386963, "learning_rate": 1.996922311250965e-05, "loss": 0.6734, "step": 3473 }, { "epoch": 0.5671605240602424, "grad_norm": 2.088350534439087, "learning_rate": 1.9969198229939955e-05, "loss": 0.728, "step": 3474 }, { "epoch": 0.5673237827027469, "grad_norm": 2.16640567779541, "learning_rate": 1.9969173337331283e-05, "loss": 0.8941, "step": 3475 }, { "epoch": 0.5674870413452512, "grad_norm": 2.1613821983337402, "learning_rate": 1.996914843468365e-05, "loss": 0.8288, "step": 3476 }, { "epoch": 0.5676502999877556, "grad_norm": 2.108583688735962, "learning_rate": 1.9969123521997092e-05, "loss": 0.8162, "step": 3477 }, { "epoch": 0.56781355863026, "grad_norm": 2.116083860397339, "learning_rate": 1.996909859927163e-05, "loss": 0.9621, "step": 3478 }, { "epoch": 0.5679768172727644, "grad_norm": 2.1136112213134766, "learning_rate": 1.9969073666507283e-05, "loss": 0.798, "step": 3479 }, { "epoch": 0.5681400759152687, "grad_norm": 2.1535415649414062, "learning_rate": 1.996904872370409e-05, "loss": 0.9049, "step": 3480 }, { "epoch": 0.5683033345577732, "grad_norm": 2.131030559539795, "learning_rate": 1.996902377086206e-05, "loss": 0.7919, "step": 3481 }, { "epoch": 0.5684665932002776, "grad_norm": 1.9921826124191284, "learning_rate": 1.9968998807981224e-05, "loss": 0.7865, "step": 3482 }, { "epoch": 0.5686298518427819, "grad_norm": 2.3258039951324463, "learning_rate": 1.9968973835061615e-05, "loss": 0.8733, "step": 3483 }, { "epoch": 0.5687931104852864, "grad_norm": 2.5045089721679688, "learning_rate": 1.9968948852103252e-05, "loss": 0.8565, "step": 3484 }, { "epoch": 0.5689563691277907, "grad_norm": 2.0942046642303467, "learning_rate": 1.9968923859106156e-05, "loss": 0.9612, "step": 3485 }, { "epoch": 0.5691196277702951, "grad_norm": 1.9553985595703125, "learning_rate": 1.996889885607036e-05, "loss": 0.9012, "step": 3486 }, { "epoch": 0.5692828864127994, "grad_norm": 1.8133631944656372, "learning_rate": 1.9968873842995884e-05, "loss": 0.7124, "step": 3487 }, { "epoch": 0.5694461450553039, "grad_norm": 2.2276971340179443, "learning_rate": 1.9968848819882755e-05, "loss": 0.6877, "step": 3488 }, { "epoch": 0.5696094036978082, "grad_norm": 2.2094480991363525, "learning_rate": 1.9968823786730995e-05, "loss": 0.8508, "step": 3489 }, { "epoch": 0.5697726623403127, "grad_norm": 1.9274355173110962, "learning_rate": 1.9968798743540638e-05, "loss": 0.8486, "step": 3490 }, { "epoch": 0.569935920982817, "grad_norm": 2.271136522293091, "learning_rate": 1.9968773690311696e-05, "loss": 0.8311, "step": 3491 }, { "epoch": 0.5700991796253214, "grad_norm": 1.7240419387817383, "learning_rate": 1.996874862704421e-05, "loss": 0.7199, "step": 3492 }, { "epoch": 0.5702624382678259, "grad_norm": 2.30059814453125, "learning_rate": 1.996872355373819e-05, "loss": 0.9258, "step": 3493 }, { "epoch": 0.5704256969103302, "grad_norm": 2.240341901779175, "learning_rate": 1.996869847039367e-05, "loss": 0.727, "step": 3494 }, { "epoch": 0.5705889555528346, "grad_norm": 1.8499248027801514, "learning_rate": 1.9968673377010672e-05, "loss": 0.6413, "step": 3495 }, { "epoch": 0.570752214195339, "grad_norm": 1.9977656602859497, "learning_rate": 1.9968648273589225e-05, "loss": 0.9071, "step": 3496 }, { "epoch": 0.5709154728378434, "grad_norm": 1.923082947731018, "learning_rate": 1.9968623160129353e-05, "loss": 0.8189, "step": 3497 }, { "epoch": 0.5710787314803477, "grad_norm": 1.8215272426605225, "learning_rate": 1.9968598036631077e-05, "loss": 0.7802, "step": 3498 }, { "epoch": 0.5712419901228522, "grad_norm": 2.44059157371521, "learning_rate": 1.9968572903094427e-05, "loss": 0.8205, "step": 3499 }, { "epoch": 0.5714052487653565, "grad_norm": 1.7760988473892212, "learning_rate": 1.9968547759519426e-05, "loss": 0.7798, "step": 3500 }, { "epoch": 0.5715685074078609, "grad_norm": 2.06912899017334, "learning_rate": 1.9968522605906097e-05, "loss": 0.9287, "step": 3501 }, { "epoch": 0.5717317660503652, "grad_norm": 2.350764036178589, "learning_rate": 1.9968497442254474e-05, "loss": 0.9821, "step": 3502 }, { "epoch": 0.5718950246928697, "grad_norm": 2.0147688388824463, "learning_rate": 1.9968472268564573e-05, "loss": 0.8077, "step": 3503 }, { "epoch": 0.5720582833353741, "grad_norm": 2.2154200077056885, "learning_rate": 1.9968447084836423e-05, "loss": 0.8475, "step": 3504 }, { "epoch": 0.5722215419778784, "grad_norm": 2.128984212875366, "learning_rate": 1.9968421891070052e-05, "loss": 1.0071, "step": 3505 }, { "epoch": 0.5723848006203829, "grad_norm": 2.417509078979492, "learning_rate": 1.9968396687265483e-05, "loss": 0.9865, "step": 3506 }, { "epoch": 0.5725480592628872, "grad_norm": 2.044718027114868, "learning_rate": 1.9968371473422737e-05, "loss": 1.075, "step": 3507 }, { "epoch": 0.5727113179053916, "grad_norm": 2.431800365447998, "learning_rate": 1.9968346249541848e-05, "loss": 0.8866, "step": 3508 }, { "epoch": 0.572874576547896, "grad_norm": 2.267066240310669, "learning_rate": 1.9968321015622836e-05, "loss": 0.9074, "step": 3509 }, { "epoch": 0.5730378351904004, "grad_norm": 1.9672914743423462, "learning_rate": 1.9968295771665727e-05, "loss": 0.7516, "step": 3510 }, { "epoch": 0.5732010938329047, "grad_norm": 2.1989762783050537, "learning_rate": 1.9968270517670546e-05, "loss": 0.8349, "step": 3511 }, { "epoch": 0.5733643524754092, "grad_norm": 2.4410431385040283, "learning_rate": 1.996824525363732e-05, "loss": 0.918, "step": 3512 }, { "epoch": 0.5735276111179135, "grad_norm": 1.757378101348877, "learning_rate": 1.9968219979566073e-05, "loss": 0.818, "step": 3513 }, { "epoch": 0.5736908697604179, "grad_norm": 2.1554975509643555, "learning_rate": 1.996819469545683e-05, "loss": 1.0567, "step": 3514 }, { "epoch": 0.5738541284029224, "grad_norm": 2.4001452922821045, "learning_rate": 1.996816940130962e-05, "loss": 1.0932, "step": 3515 }, { "epoch": 0.5740173870454267, "grad_norm": 2.1639411449432373, "learning_rate": 1.9968144097124467e-05, "loss": 0.7229, "step": 3516 }, { "epoch": 0.5741806456879311, "grad_norm": 2.1429758071899414, "learning_rate": 1.9968118782901395e-05, "loss": 0.8223, "step": 3517 }, { "epoch": 0.5743439043304355, "grad_norm": 2.041686534881592, "learning_rate": 1.9968093458640426e-05, "loss": 0.9954, "step": 3518 }, { "epoch": 0.5745071629729399, "grad_norm": 1.8341180086135864, "learning_rate": 1.9968068124341593e-05, "loss": 0.6653, "step": 3519 }, { "epoch": 0.5746704216154442, "grad_norm": 2.1523661613464355, "learning_rate": 1.9968042780004917e-05, "loss": 1.047, "step": 3520 }, { "epoch": 0.5748336802579487, "grad_norm": 1.844008207321167, "learning_rate": 1.9968017425630426e-05, "loss": 0.7597, "step": 3521 }, { "epoch": 0.574996938900453, "grad_norm": 2.1349868774414062, "learning_rate": 1.996799206121814e-05, "loss": 0.7207, "step": 3522 }, { "epoch": 0.5751601975429574, "grad_norm": 2.0012898445129395, "learning_rate": 1.9967966686768096e-05, "loss": 0.7216, "step": 3523 }, { "epoch": 0.5753234561854618, "grad_norm": 2.0907328128814697, "learning_rate": 1.9967941302280307e-05, "loss": 0.9239, "step": 3524 }, { "epoch": 0.5754867148279662, "grad_norm": 1.8208011388778687, "learning_rate": 1.99679159077548e-05, "loss": 0.7155, "step": 3525 }, { "epoch": 0.5756499734704706, "grad_norm": 2.0465950965881348, "learning_rate": 1.9967890503191613e-05, "loss": 1.0453, "step": 3526 }, { "epoch": 0.575813232112975, "grad_norm": 1.9289677143096924, "learning_rate": 1.996786508859076e-05, "loss": 0.9206, "step": 3527 }, { "epoch": 0.5759764907554794, "grad_norm": 2.235194444656372, "learning_rate": 1.9967839663952267e-05, "loss": 0.9156, "step": 3528 }, { "epoch": 0.5761397493979837, "grad_norm": 1.7359317541122437, "learning_rate": 1.9967814229276163e-05, "loss": 0.7562, "step": 3529 }, { "epoch": 0.5763030080404882, "grad_norm": 1.7814279794692993, "learning_rate": 1.9967788784562474e-05, "loss": 0.6828, "step": 3530 }, { "epoch": 0.5764662666829925, "grad_norm": 2.071117639541626, "learning_rate": 1.9967763329811222e-05, "loss": 0.8092, "step": 3531 }, { "epoch": 0.5766295253254969, "grad_norm": 2.035979986190796, "learning_rate": 1.9967737865022436e-05, "loss": 0.8125, "step": 3532 }, { "epoch": 0.5767927839680013, "grad_norm": 2.5577938556671143, "learning_rate": 1.9967712390196144e-05, "loss": 0.7574, "step": 3533 }, { "epoch": 0.5769560426105057, "grad_norm": 1.939315915107727, "learning_rate": 1.9967686905332365e-05, "loss": 0.7368, "step": 3534 }, { "epoch": 0.57711930125301, "grad_norm": 1.9030582904815674, "learning_rate": 1.9967661410431128e-05, "loss": 0.7362, "step": 3535 }, { "epoch": 0.5772825598955145, "grad_norm": 1.9101777076721191, "learning_rate": 1.996763590549246e-05, "loss": 0.9099, "step": 3536 }, { "epoch": 0.5774458185380189, "grad_norm": 2.0086679458618164, "learning_rate": 1.9967610390516384e-05, "loss": 0.7953, "step": 3537 }, { "epoch": 0.5776090771805232, "grad_norm": 2.229421854019165, "learning_rate": 1.9967584865502925e-05, "loss": 0.9647, "step": 3538 }, { "epoch": 0.5777723358230277, "grad_norm": 1.8451991081237793, "learning_rate": 1.9967559330452113e-05, "loss": 0.7281, "step": 3539 }, { "epoch": 0.577935594465532, "grad_norm": 2.137878894805908, "learning_rate": 1.996753378536397e-05, "loss": 0.9125, "step": 3540 }, { "epoch": 0.5780988531080364, "grad_norm": 1.878936529159546, "learning_rate": 1.9967508230238524e-05, "loss": 0.7519, "step": 3541 }, { "epoch": 0.5782621117505408, "grad_norm": 2.218960762023926, "learning_rate": 1.9967482665075802e-05, "loss": 0.8717, "step": 3542 }, { "epoch": 0.5784253703930452, "grad_norm": 2.2280657291412354, "learning_rate": 1.9967457089875824e-05, "loss": 0.8948, "step": 3543 }, { "epoch": 0.5785886290355495, "grad_norm": 2.416912317276001, "learning_rate": 1.9967431504638624e-05, "loss": 0.9698, "step": 3544 }, { "epoch": 0.578751887678054, "grad_norm": 2.0678751468658447, "learning_rate": 1.9967405909364216e-05, "loss": 0.8646, "step": 3545 }, { "epoch": 0.5789151463205583, "grad_norm": 2.2173993587493896, "learning_rate": 1.996738030405264e-05, "loss": 0.8275, "step": 3546 }, { "epoch": 0.5790784049630627, "grad_norm": 1.7599059343338013, "learning_rate": 1.996735468870391e-05, "loss": 0.6167, "step": 3547 }, { "epoch": 0.5792416636055672, "grad_norm": 2.010413646697998, "learning_rate": 1.9967329063318058e-05, "loss": 0.7683, "step": 3548 }, { "epoch": 0.5794049222480715, "grad_norm": 1.968126654624939, "learning_rate": 1.996730342789511e-05, "loss": 0.8275, "step": 3549 }, { "epoch": 0.5795681808905759, "grad_norm": 2.081322431564331, "learning_rate": 1.996727778243509e-05, "loss": 0.8272, "step": 3550 }, { "epoch": 0.5797314395330803, "grad_norm": 2.2533774375915527, "learning_rate": 1.996725212693802e-05, "loss": 0.8824, "step": 3551 }, { "epoch": 0.5798946981755847, "grad_norm": 2.0859153270721436, "learning_rate": 1.9967226461403934e-05, "loss": 0.7182, "step": 3552 }, { "epoch": 0.580057956818089, "grad_norm": 1.9190319776535034, "learning_rate": 1.9967200785832853e-05, "loss": 0.825, "step": 3553 }, { "epoch": 0.5802212154605935, "grad_norm": 2.0208308696746826, "learning_rate": 1.9967175100224803e-05, "loss": 0.773, "step": 3554 }, { "epoch": 0.5803844741030978, "grad_norm": 2.127307653427124, "learning_rate": 1.996714940457981e-05, "loss": 0.813, "step": 3555 }, { "epoch": 0.5805477327456022, "grad_norm": 2.342010021209717, "learning_rate": 1.9967123698897896e-05, "loss": 0.9685, "step": 3556 }, { "epoch": 0.5807109913881066, "grad_norm": 2.3246588706970215, "learning_rate": 1.9967097983179096e-05, "loss": 0.7986, "step": 3557 }, { "epoch": 0.580874250030611, "grad_norm": 1.9008607864379883, "learning_rate": 1.996707225742343e-05, "loss": 0.7681, "step": 3558 }, { "epoch": 0.5810375086731154, "grad_norm": 2.0052239894866943, "learning_rate": 1.9967046521630925e-05, "loss": 0.9682, "step": 3559 }, { "epoch": 0.5812007673156198, "grad_norm": 1.9885621070861816, "learning_rate": 1.9967020775801605e-05, "loss": 0.9662, "step": 3560 }, { "epoch": 0.5813640259581242, "grad_norm": 2.164640426635742, "learning_rate": 1.99669950199355e-05, "loss": 0.9055, "step": 3561 }, { "epoch": 0.5815272846006285, "grad_norm": 2.1173012256622314, "learning_rate": 1.9966969254032637e-05, "loss": 0.8021, "step": 3562 }, { "epoch": 0.581690543243133, "grad_norm": 2.1482012271881104, "learning_rate": 1.996694347809303e-05, "loss": 0.6883, "step": 3563 }, { "epoch": 0.5818538018856373, "grad_norm": 1.790844202041626, "learning_rate": 1.996691769211672e-05, "loss": 0.7198, "step": 3564 }, { "epoch": 0.5820170605281417, "grad_norm": 1.9320993423461914, "learning_rate": 1.9966891896103723e-05, "loss": 0.8342, "step": 3565 }, { "epoch": 0.582180319170646, "grad_norm": 2.5462567806243896, "learning_rate": 1.996686609005407e-05, "loss": 0.9056, "step": 3566 }, { "epoch": 0.5823435778131505, "grad_norm": 1.8899462223052979, "learning_rate": 1.996684027396779e-05, "loss": 0.859, "step": 3567 }, { "epoch": 0.5825068364556548, "grad_norm": 1.9892734289169312, "learning_rate": 1.9966814447844898e-05, "loss": 0.8492, "step": 3568 }, { "epoch": 0.5826700950981593, "grad_norm": 2.21549129486084, "learning_rate": 1.996678861168543e-05, "loss": 0.9839, "step": 3569 }, { "epoch": 0.5828333537406637, "grad_norm": 1.7828598022460938, "learning_rate": 1.9966762765489407e-05, "loss": 0.8695, "step": 3570 }, { "epoch": 0.582996612383168, "grad_norm": 1.858689546585083, "learning_rate": 1.9966736909256857e-05, "loss": 0.9036, "step": 3571 }, { "epoch": 0.5831598710256725, "grad_norm": 2.165029764175415, "learning_rate": 1.9966711042987806e-05, "loss": 0.9676, "step": 3572 }, { "epoch": 0.5833231296681768, "grad_norm": 2.0980281829833984, "learning_rate": 1.9966685166682276e-05, "loss": 1.3083, "step": 3573 }, { "epoch": 0.5834863883106812, "grad_norm": 1.7611467838287354, "learning_rate": 1.99666592803403e-05, "loss": 0.8083, "step": 3574 }, { "epoch": 0.5836496469531856, "grad_norm": 2.7075891494750977, "learning_rate": 1.99666333839619e-05, "loss": 1.0356, "step": 3575 }, { "epoch": 0.58381290559569, "grad_norm": 1.8931360244750977, "learning_rate": 1.9966607477547105e-05, "loss": 0.7758, "step": 3576 }, { "epoch": 0.5839761642381943, "grad_norm": 1.759946584701538, "learning_rate": 1.9966581561095933e-05, "loss": 0.7916, "step": 3577 }, { "epoch": 0.5841394228806988, "grad_norm": 2.396160125732422, "learning_rate": 1.996655563460842e-05, "loss": 1.0775, "step": 3578 }, { "epoch": 0.5843026815232031, "grad_norm": 2.015117645263672, "learning_rate": 1.996652969808459e-05, "loss": 0.799, "step": 3579 }, { "epoch": 0.5844659401657075, "grad_norm": 1.8990167379379272, "learning_rate": 1.9966503751524467e-05, "loss": 0.8666, "step": 3580 }, { "epoch": 0.584629198808212, "grad_norm": 1.863648533821106, "learning_rate": 1.9966477794928078e-05, "loss": 0.8379, "step": 3581 }, { "epoch": 0.5847924574507163, "grad_norm": 2.1056289672851562, "learning_rate": 1.9966451828295445e-05, "loss": 0.8087, "step": 3582 }, { "epoch": 0.5849557160932207, "grad_norm": 2.0448837280273438, "learning_rate": 1.9966425851626598e-05, "loss": 0.7847, "step": 3583 }, { "epoch": 0.585118974735725, "grad_norm": 2.1308772563934326, "learning_rate": 1.9966399864921565e-05, "loss": 0.9436, "step": 3584 }, { "epoch": 0.5852822333782295, "grad_norm": 2.0299911499023438, "learning_rate": 1.9966373868180367e-05, "loss": 0.8106, "step": 3585 }, { "epoch": 0.5854454920207338, "grad_norm": 1.900641679763794, "learning_rate": 1.9966347861403035e-05, "loss": 0.8712, "step": 3586 }, { "epoch": 0.5856087506632383, "grad_norm": 2.2011184692382812, "learning_rate": 1.9966321844589592e-05, "loss": 0.9172, "step": 3587 }, { "epoch": 0.5857720093057426, "grad_norm": 2.1134426593780518, "learning_rate": 1.996629581774007e-05, "loss": 0.8899, "step": 3588 }, { "epoch": 0.585935267948247, "grad_norm": 1.8321781158447266, "learning_rate": 1.9966269780854487e-05, "loss": 0.7152, "step": 3589 }, { "epoch": 0.5860985265907513, "grad_norm": 2.057455062866211, "learning_rate": 1.9966243733932873e-05, "loss": 1.0119, "step": 3590 }, { "epoch": 0.5862617852332558, "grad_norm": 2.1324174404144287, "learning_rate": 1.9966217676975256e-05, "loss": 0.8262, "step": 3591 }, { "epoch": 0.5864250438757602, "grad_norm": 2.0259804725646973, "learning_rate": 1.9966191609981657e-05, "loss": 0.9146, "step": 3592 }, { "epoch": 0.5865883025182645, "grad_norm": 2.019693374633789, "learning_rate": 1.996616553295211e-05, "loss": 0.7471, "step": 3593 }, { "epoch": 0.586751561160769, "grad_norm": 1.9416102170944214, "learning_rate": 1.9966139445886633e-05, "loss": 0.7142, "step": 3594 }, { "epoch": 0.5869148198032733, "grad_norm": 1.8799490928649902, "learning_rate": 1.9966113348785258e-05, "loss": 0.7989, "step": 3595 }, { "epoch": 0.5870780784457778, "grad_norm": 1.9430004358291626, "learning_rate": 1.996608724164801e-05, "loss": 0.7038, "step": 3596 }, { "epoch": 0.5872413370882821, "grad_norm": 2.42558217048645, "learning_rate": 1.9966061124474912e-05, "loss": 1.408, "step": 3597 }, { "epoch": 0.5874045957307865, "grad_norm": 1.786195158958435, "learning_rate": 1.9966034997266e-05, "loss": 0.6356, "step": 3598 }, { "epoch": 0.5875678543732908, "grad_norm": 2.3067615032196045, "learning_rate": 1.9966008860021286e-05, "loss": 0.8633, "step": 3599 }, { "epoch": 0.5877311130157953, "grad_norm": 2.1207830905914307, "learning_rate": 1.996598271274081e-05, "loss": 0.8098, "step": 3600 }, { "epoch": 0.5878943716582996, "grad_norm": 2.233381986618042, "learning_rate": 1.9965956555424587e-05, "loss": 0.7689, "step": 3601 }, { "epoch": 0.588057630300804, "grad_norm": 2.062166213989258, "learning_rate": 1.996593038807265e-05, "loss": 0.7001, "step": 3602 }, { "epoch": 0.5882208889433085, "grad_norm": 1.9232172966003418, "learning_rate": 1.9965904210685025e-05, "loss": 0.886, "step": 3603 }, { "epoch": 0.5883841475858128, "grad_norm": 2.2392020225524902, "learning_rate": 1.996587802326173e-05, "loss": 0.928, "step": 3604 }, { "epoch": 0.5885474062283172, "grad_norm": 2.1075050830841064, "learning_rate": 1.996585182580281e-05, "loss": 0.9298, "step": 3605 }, { "epoch": 0.5887106648708216, "grad_norm": 1.9919886589050293, "learning_rate": 1.996582561830827e-05, "loss": 0.7881, "step": 3606 }, { "epoch": 0.588873923513326, "grad_norm": 2.227320909500122, "learning_rate": 1.9965799400778154e-05, "loss": 0.7981, "step": 3607 }, { "epoch": 0.5890371821558303, "grad_norm": 2.0313994884490967, "learning_rate": 1.9965773173212475e-05, "loss": 0.7718, "step": 3608 }, { "epoch": 0.5892004407983348, "grad_norm": 2.2753689289093018, "learning_rate": 1.996574693561127e-05, "loss": 0.9776, "step": 3609 }, { "epoch": 0.5893636994408391, "grad_norm": 2.286255359649658, "learning_rate": 1.9965720687974555e-05, "loss": 1.0616, "step": 3610 }, { "epoch": 0.5895269580833435, "grad_norm": 1.9817832708358765, "learning_rate": 1.9965694430302364e-05, "loss": 0.79, "step": 3611 }, { "epoch": 0.5896902167258479, "grad_norm": 2.2115752696990967, "learning_rate": 1.9965668162594723e-05, "loss": 0.9056, "step": 3612 }, { "epoch": 0.5898534753683523, "grad_norm": 1.92093026638031, "learning_rate": 1.9965641884851657e-05, "loss": 0.7717, "step": 3613 }, { "epoch": 0.5900167340108567, "grad_norm": 2.053100347518921, "learning_rate": 1.9965615597073188e-05, "loss": 0.8169, "step": 3614 }, { "epoch": 0.5901799926533611, "grad_norm": 2.5104873180389404, "learning_rate": 1.996558929925935e-05, "loss": 0.9791, "step": 3615 }, { "epoch": 0.5903432512958655, "grad_norm": 1.9146260023117065, "learning_rate": 1.9965562991410167e-05, "loss": 0.8574, "step": 3616 }, { "epoch": 0.5905065099383698, "grad_norm": 1.938597559928894, "learning_rate": 1.9965536673525664e-05, "loss": 0.7675, "step": 3617 }, { "epoch": 0.5906697685808743, "grad_norm": 2.1565463542938232, "learning_rate": 1.9965510345605866e-05, "loss": 1.0027, "step": 3618 }, { "epoch": 0.5908330272233786, "grad_norm": 1.985184669494629, "learning_rate": 1.9965484007650805e-05, "loss": 0.8628, "step": 3619 }, { "epoch": 0.590996285865883, "grad_norm": 2.17162823677063, "learning_rate": 1.9965457659660504e-05, "loss": 0.8638, "step": 3620 }, { "epoch": 0.5911595445083874, "grad_norm": 2.005338191986084, "learning_rate": 1.9965431301634987e-05, "loss": 1.0438, "step": 3621 }, { "epoch": 0.5913228031508918, "grad_norm": 1.822651982307434, "learning_rate": 1.9965404933574284e-05, "loss": 0.7258, "step": 3622 }, { "epoch": 0.5914860617933961, "grad_norm": 2.550746202468872, "learning_rate": 1.9965378555478423e-05, "loss": 0.9615, "step": 3623 }, { "epoch": 0.5916493204359006, "grad_norm": 1.7623876333236694, "learning_rate": 1.9965352167347428e-05, "loss": 0.7797, "step": 3624 }, { "epoch": 0.591812579078405, "grad_norm": 1.955164909362793, "learning_rate": 1.9965325769181324e-05, "loss": 0.8866, "step": 3625 }, { "epoch": 0.5919758377209093, "grad_norm": 1.9095629453659058, "learning_rate": 1.996529936098014e-05, "loss": 0.8066, "step": 3626 }, { "epoch": 0.5921390963634138, "grad_norm": 2.33553147315979, "learning_rate": 1.9965272942743903e-05, "loss": 0.8389, "step": 3627 }, { "epoch": 0.5923023550059181, "grad_norm": 1.7946014404296875, "learning_rate": 1.996524651447264e-05, "loss": 0.9017, "step": 3628 }, { "epoch": 0.5924656136484225, "grad_norm": 1.7187762260437012, "learning_rate": 1.9965220076166376e-05, "loss": 0.7062, "step": 3629 }, { "epoch": 0.5926288722909269, "grad_norm": 2.0866942405700684, "learning_rate": 1.9965193627825138e-05, "loss": 0.8384, "step": 3630 }, { "epoch": 0.5927921309334313, "grad_norm": 2.062596559524536, "learning_rate": 1.9965167169448947e-05, "loss": 1.0171, "step": 3631 }, { "epoch": 0.5929553895759356, "grad_norm": 2.073064088821411, "learning_rate": 1.9965140701037843e-05, "loss": 0.9276, "step": 3632 }, { "epoch": 0.5931186482184401, "grad_norm": 1.9622799158096313, "learning_rate": 1.996511422259184e-05, "loss": 0.8272, "step": 3633 }, { "epoch": 0.5932819068609444, "grad_norm": 1.9165892601013184, "learning_rate": 1.9965087734110974e-05, "loss": 0.7848, "step": 3634 }, { "epoch": 0.5934451655034488, "grad_norm": 2.123906373977661, "learning_rate": 1.9965061235595265e-05, "loss": 0.8469, "step": 3635 }, { "epoch": 0.5936084241459533, "grad_norm": 1.6614809036254883, "learning_rate": 1.9965034727044743e-05, "loss": 0.7089, "step": 3636 }, { "epoch": 0.5937716827884576, "grad_norm": 2.218162775039673, "learning_rate": 1.9965008208459434e-05, "loss": 0.9996, "step": 3637 }, { "epoch": 0.593934941430962, "grad_norm": 2.1402623653411865, "learning_rate": 1.996498167983936e-05, "loss": 0.8732, "step": 3638 }, { "epoch": 0.5940982000734664, "grad_norm": 2.083862781524658, "learning_rate": 1.9964955141184556e-05, "loss": 0.8834, "step": 3639 }, { "epoch": 0.5942614587159708, "grad_norm": 2.0694124698638916, "learning_rate": 1.9964928592495046e-05, "loss": 0.8436, "step": 3640 }, { "epoch": 0.5944247173584751, "grad_norm": 1.8595224618911743, "learning_rate": 1.9964902033770853e-05, "loss": 0.6515, "step": 3641 }, { "epoch": 0.5945879760009796, "grad_norm": 1.8203481435775757, "learning_rate": 1.9964875465012005e-05, "loss": 0.9081, "step": 3642 }, { "epoch": 0.5947512346434839, "grad_norm": 2.117825984954834, "learning_rate": 1.996484888621853e-05, "loss": 0.7962, "step": 3643 }, { "epoch": 0.5949144932859883, "grad_norm": 2.43833065032959, "learning_rate": 1.996482229739046e-05, "loss": 1.1591, "step": 3644 }, { "epoch": 0.5950777519284927, "grad_norm": 1.9372369050979614, "learning_rate": 1.9964795698527816e-05, "loss": 0.9037, "step": 3645 }, { "epoch": 0.5952410105709971, "grad_norm": 1.837344765663147, "learning_rate": 1.9964769089630622e-05, "loss": 0.7742, "step": 3646 }, { "epoch": 0.5954042692135015, "grad_norm": 2.213494300842285, "learning_rate": 1.9964742470698906e-05, "loss": 0.9234, "step": 3647 }, { "epoch": 0.5955675278560059, "grad_norm": 2.143841505050659, "learning_rate": 1.99647158417327e-05, "loss": 0.8837, "step": 3648 }, { "epoch": 0.5957307864985103, "grad_norm": 2.2131524085998535, "learning_rate": 1.996468920273203e-05, "loss": 0.7107, "step": 3649 }, { "epoch": 0.5958940451410146, "grad_norm": 2.2660155296325684, "learning_rate": 1.9964662553696915e-05, "loss": 1.0301, "step": 3650 }, { "epoch": 0.5960573037835191, "grad_norm": 2.46110200881958, "learning_rate": 1.996463589462739e-05, "loss": 0.8158, "step": 3651 }, { "epoch": 0.5962205624260234, "grad_norm": 1.8709434270858765, "learning_rate": 1.9964609225523484e-05, "loss": 0.7605, "step": 3652 }, { "epoch": 0.5963838210685278, "grad_norm": 1.6768497228622437, "learning_rate": 1.9964582546385212e-05, "loss": 0.6069, "step": 3653 }, { "epoch": 0.5965470797110322, "grad_norm": 1.8969913721084595, "learning_rate": 1.9964555857212612e-05, "loss": 0.8008, "step": 3654 }, { "epoch": 0.5967103383535366, "grad_norm": 1.8646571636199951, "learning_rate": 1.9964529158005707e-05, "loss": 0.7478, "step": 3655 }, { "epoch": 0.5968735969960409, "grad_norm": 2.075502872467041, "learning_rate": 1.9964502448764524e-05, "loss": 0.8409, "step": 3656 }, { "epoch": 0.5970368556385454, "grad_norm": 1.8992784023284912, "learning_rate": 1.9964475729489087e-05, "loss": 0.7192, "step": 3657 }, { "epoch": 0.5972001142810498, "grad_norm": 2.103079319000244, "learning_rate": 1.9964449000179428e-05, "loss": 0.9669, "step": 3658 }, { "epoch": 0.5973633729235541, "grad_norm": 1.7781537771224976, "learning_rate": 1.996442226083557e-05, "loss": 0.7325, "step": 3659 }, { "epoch": 0.5975266315660586, "grad_norm": 2.018824338912964, "learning_rate": 1.9964395511457543e-05, "loss": 0.7988, "step": 3660 }, { "epoch": 0.5976898902085629, "grad_norm": 2.1745574474334717, "learning_rate": 1.9964368752045372e-05, "loss": 0.7999, "step": 3661 }, { "epoch": 0.5978531488510673, "grad_norm": 2.1089706420898438, "learning_rate": 1.996434198259908e-05, "loss": 0.7532, "step": 3662 }, { "epoch": 0.5980164074935717, "grad_norm": 2.1655523777008057, "learning_rate": 1.9964315203118706e-05, "loss": 0.7481, "step": 3663 }, { "epoch": 0.5981796661360761, "grad_norm": 2.165886163711548, "learning_rate": 1.9964288413604262e-05, "loss": 0.7264, "step": 3664 }, { "epoch": 0.5983429247785804, "grad_norm": 2.5514092445373535, "learning_rate": 1.9964261614055788e-05, "loss": 0.9026, "step": 3665 }, { "epoch": 0.5985061834210849, "grad_norm": 2.8042421340942383, "learning_rate": 1.99642348044733e-05, "loss": 1.0191, "step": 3666 }, { "epoch": 0.5986694420635893, "grad_norm": 2.482532262802124, "learning_rate": 1.9964207984856833e-05, "loss": 0.9144, "step": 3667 }, { "epoch": 0.5988327007060936, "grad_norm": 1.8352465629577637, "learning_rate": 1.996418115520641e-05, "loss": 0.7136, "step": 3668 }, { "epoch": 0.5989959593485981, "grad_norm": 2.125253438949585, "learning_rate": 1.9964154315522062e-05, "loss": 0.7817, "step": 3669 }, { "epoch": 0.5991592179911024, "grad_norm": 2.0890777111053467, "learning_rate": 1.9964127465803812e-05, "loss": 0.8061, "step": 3670 }, { "epoch": 0.5993224766336068, "grad_norm": 1.7184405326843262, "learning_rate": 1.9964100606051685e-05, "loss": 0.7482, "step": 3671 }, { "epoch": 0.5994857352761112, "grad_norm": 2.1258692741394043, "learning_rate": 1.9964073736265717e-05, "loss": 0.8278, "step": 3672 }, { "epoch": 0.5996489939186156, "grad_norm": 2.428915023803711, "learning_rate": 1.9964046856445926e-05, "loss": 0.8707, "step": 3673 }, { "epoch": 0.5998122525611199, "grad_norm": 2.3831193447113037, "learning_rate": 1.996401996659234e-05, "loss": 0.7595, "step": 3674 }, { "epoch": 0.5999755112036244, "grad_norm": 2.1495227813720703, "learning_rate": 1.9963993066704995e-05, "loss": 0.8719, "step": 3675 }, { "epoch": 0.6001387698461287, "grad_norm": 2.247288942337036, "learning_rate": 1.9963966156783906e-05, "loss": 0.9942, "step": 3676 }, { "epoch": 0.6003020284886331, "grad_norm": 2.2114663124084473, "learning_rate": 1.9963939236829108e-05, "loss": 0.994, "step": 3677 }, { "epoch": 0.6004652871311376, "grad_norm": 2.113921880722046, "learning_rate": 1.9963912306840626e-05, "loss": 0.7393, "step": 3678 }, { "epoch": 0.6006285457736419, "grad_norm": 2.2942121028900146, "learning_rate": 1.9963885366818486e-05, "loss": 0.8665, "step": 3679 }, { "epoch": 0.6007918044161463, "grad_norm": 2.5849621295928955, "learning_rate": 1.996385841676272e-05, "loss": 0.8805, "step": 3680 }, { "epoch": 0.6009550630586507, "grad_norm": 1.9586313962936401, "learning_rate": 1.9963831456673346e-05, "loss": 0.8416, "step": 3681 }, { "epoch": 0.6011183217011551, "grad_norm": 1.8255609273910522, "learning_rate": 1.9963804486550397e-05, "loss": 0.8924, "step": 3682 }, { "epoch": 0.6012815803436594, "grad_norm": 2.106628894805908, "learning_rate": 1.99637775063939e-05, "loss": 0.9633, "step": 3683 }, { "epoch": 0.6014448389861639, "grad_norm": 1.8040400743484497, "learning_rate": 1.9963750516203887e-05, "loss": 0.8778, "step": 3684 }, { "epoch": 0.6016080976286682, "grad_norm": 1.8429375886917114, "learning_rate": 1.9963723515980372e-05, "loss": 0.8145, "step": 3685 }, { "epoch": 0.6017713562711726, "grad_norm": 1.9379334449768066, "learning_rate": 1.9963696505723392e-05, "loss": 0.7614, "step": 3686 }, { "epoch": 0.601934614913677, "grad_norm": 2.2609550952911377, "learning_rate": 1.9963669485432975e-05, "loss": 0.9209, "step": 3687 }, { "epoch": 0.6020978735561814, "grad_norm": 2.6682698726654053, "learning_rate": 1.9963642455109144e-05, "loss": 0.6736, "step": 3688 }, { "epoch": 0.6022611321986858, "grad_norm": 2.4525601863861084, "learning_rate": 1.996361541475193e-05, "loss": 1.0349, "step": 3689 }, { "epoch": 0.6024243908411901, "grad_norm": 2.488133430480957, "learning_rate": 1.9963588364361356e-05, "loss": 0.8848, "step": 3690 }, { "epoch": 0.6025876494836946, "grad_norm": 2.042724132537842, "learning_rate": 1.9963561303937447e-05, "loss": 0.7803, "step": 3691 }, { "epoch": 0.6027509081261989, "grad_norm": 2.018433094024658, "learning_rate": 1.996353423348024e-05, "loss": 0.7195, "step": 3692 }, { "epoch": 0.6029141667687034, "grad_norm": 1.9370495080947876, "learning_rate": 1.9963507152989755e-05, "loss": 0.8553, "step": 3693 }, { "epoch": 0.6030774254112077, "grad_norm": 2.1063125133514404, "learning_rate": 1.9963480062466022e-05, "loss": 0.8353, "step": 3694 }, { "epoch": 0.6032406840537121, "grad_norm": 2.00225830078125, "learning_rate": 1.9963452961909065e-05, "loss": 0.9269, "step": 3695 }, { "epoch": 0.6034039426962164, "grad_norm": 2.2192225456237793, "learning_rate": 1.9963425851318915e-05, "loss": 0.7334, "step": 3696 }, { "epoch": 0.6035672013387209, "grad_norm": 1.948266863822937, "learning_rate": 1.9963398730695598e-05, "loss": 0.7861, "step": 3697 }, { "epoch": 0.6037304599812252, "grad_norm": 2.260859966278076, "learning_rate": 1.996337160003914e-05, "loss": 0.9476, "step": 3698 }, { "epoch": 0.6038937186237296, "grad_norm": 1.8101441860198975, "learning_rate": 1.9963344459349572e-05, "loss": 0.8074, "step": 3699 }, { "epoch": 0.6040569772662341, "grad_norm": 2.1374411582946777, "learning_rate": 1.9963317308626916e-05, "loss": 0.8633, "step": 3700 }, { "epoch": 0.6042202359087384, "grad_norm": 2.003028631210327, "learning_rate": 1.9963290147871205e-05, "loss": 0.7181, "step": 3701 }, { "epoch": 0.6043834945512429, "grad_norm": 2.6432526111602783, "learning_rate": 1.996326297708246e-05, "loss": 0.8456, "step": 3702 }, { "epoch": 0.6045467531937472, "grad_norm": 2.264915704727173, "learning_rate": 1.9963235796260713e-05, "loss": 0.8801, "step": 3703 }, { "epoch": 0.6047100118362516, "grad_norm": 2.3882110118865967, "learning_rate": 1.996320860540599e-05, "loss": 0.9004, "step": 3704 }, { "epoch": 0.6048732704787559, "grad_norm": 1.9614765644073486, "learning_rate": 1.9963181404518318e-05, "loss": 0.8522, "step": 3705 }, { "epoch": 0.6050365291212604, "grad_norm": 2.125267744064331, "learning_rate": 1.9963154193597728e-05, "loss": 0.9304, "step": 3706 }, { "epoch": 0.6051997877637647, "grad_norm": 2.4708149433135986, "learning_rate": 1.9963126972644243e-05, "loss": 0.9468, "step": 3707 }, { "epoch": 0.6053630464062691, "grad_norm": 1.9877036809921265, "learning_rate": 1.9963099741657887e-05, "loss": 1.0306, "step": 3708 }, { "epoch": 0.6055263050487735, "grad_norm": 1.9365112781524658, "learning_rate": 1.9963072500638697e-05, "loss": 0.7073, "step": 3709 }, { "epoch": 0.6056895636912779, "grad_norm": 2.130544424057007, "learning_rate": 1.9963045249586696e-05, "loss": 0.8411, "step": 3710 }, { "epoch": 0.6058528223337823, "grad_norm": 1.9434716701507568, "learning_rate": 1.9963017988501908e-05, "loss": 0.7616, "step": 3711 }, { "epoch": 0.6060160809762867, "grad_norm": 2.1127660274505615, "learning_rate": 1.9962990717384368e-05, "loss": 0.821, "step": 3712 }, { "epoch": 0.6061793396187911, "grad_norm": 1.9306023120880127, "learning_rate": 1.9962963436234095e-05, "loss": 0.8825, "step": 3713 }, { "epoch": 0.6063425982612954, "grad_norm": 1.9786553382873535, "learning_rate": 1.9962936145051123e-05, "loss": 0.7718, "step": 3714 }, { "epoch": 0.6065058569037999, "grad_norm": 1.7668818235397339, "learning_rate": 1.9962908843835476e-05, "loss": 0.7322, "step": 3715 }, { "epoch": 0.6066691155463042, "grad_norm": 2.0161406993865967, "learning_rate": 1.996288153258718e-05, "loss": 0.8121, "step": 3716 }, { "epoch": 0.6068323741888086, "grad_norm": 2.028794765472412, "learning_rate": 1.996285421130627e-05, "loss": 0.8758, "step": 3717 }, { "epoch": 0.606995632831313, "grad_norm": 2.0440173149108887, "learning_rate": 1.9962826879992767e-05, "loss": 0.8263, "step": 3718 }, { "epoch": 0.6071588914738174, "grad_norm": 1.9800260066986084, "learning_rate": 1.9962799538646698e-05, "loss": 0.8812, "step": 3719 }, { "epoch": 0.6073221501163217, "grad_norm": 2.2526094913482666, "learning_rate": 1.9962772187268093e-05, "loss": 0.7996, "step": 3720 }, { "epoch": 0.6074854087588262, "grad_norm": 2.237852096557617, "learning_rate": 1.996274482585698e-05, "loss": 1.0548, "step": 3721 }, { "epoch": 0.6076486674013306, "grad_norm": 1.9838221073150635, "learning_rate": 1.9962717454413384e-05, "loss": 0.8007, "step": 3722 }, { "epoch": 0.6078119260438349, "grad_norm": 1.9112454652786255, "learning_rate": 1.9962690072937337e-05, "loss": 0.6838, "step": 3723 }, { "epoch": 0.6079751846863394, "grad_norm": 1.6715295314788818, "learning_rate": 1.996266268142886e-05, "loss": 0.6539, "step": 3724 }, { "epoch": 0.6081384433288437, "grad_norm": 1.7555391788482666, "learning_rate": 1.9962635279887987e-05, "loss": 0.8086, "step": 3725 }, { "epoch": 0.6083017019713481, "grad_norm": 2.1395602226257324, "learning_rate": 1.996260786831474e-05, "loss": 0.9322, "step": 3726 }, { "epoch": 0.6084649606138525, "grad_norm": 1.9704563617706299, "learning_rate": 1.9962580446709153e-05, "loss": 0.7355, "step": 3727 }, { "epoch": 0.6086282192563569, "grad_norm": 2.4036669731140137, "learning_rate": 1.996255301507125e-05, "loss": 0.9818, "step": 3728 }, { "epoch": 0.6087914778988612, "grad_norm": 2.23121976852417, "learning_rate": 1.9962525573401053e-05, "loss": 0.8791, "step": 3729 }, { "epoch": 0.6089547365413657, "grad_norm": 2.050804615020752, "learning_rate": 1.9962498121698602e-05, "loss": 0.8695, "step": 3730 }, { "epoch": 0.60911799518387, "grad_norm": 2.21913743019104, "learning_rate": 1.9962470659963914e-05, "loss": 0.7476, "step": 3731 }, { "epoch": 0.6092812538263744, "grad_norm": 2.0922482013702393, "learning_rate": 1.9962443188197024e-05, "loss": 0.8584, "step": 3732 }, { "epoch": 0.6094445124688789, "grad_norm": 2.075019359588623, "learning_rate": 1.9962415706397954e-05, "loss": 0.8137, "step": 3733 }, { "epoch": 0.6096077711113832, "grad_norm": 2.05233097076416, "learning_rate": 1.9962388214566738e-05, "loss": 0.8552, "step": 3734 }, { "epoch": 0.6097710297538876, "grad_norm": 2.4691267013549805, "learning_rate": 1.9962360712703396e-05, "loss": 1.0121, "step": 3735 }, { "epoch": 0.609934288396392, "grad_norm": 1.9852056503295898, "learning_rate": 1.9962333200807958e-05, "loss": 0.9552, "step": 3736 }, { "epoch": 0.6100975470388964, "grad_norm": 2.222888469696045, "learning_rate": 1.996230567888046e-05, "loss": 0.8516, "step": 3737 }, { "epoch": 0.6102608056814007, "grad_norm": 2.003159999847412, "learning_rate": 1.9962278146920914e-05, "loss": 0.8734, "step": 3738 }, { "epoch": 0.6104240643239052, "grad_norm": 2.2001168727874756, "learning_rate": 1.9962250604929362e-05, "loss": 0.9279, "step": 3739 }, { "epoch": 0.6105873229664095, "grad_norm": 1.823410153388977, "learning_rate": 1.9962223052905823e-05, "loss": 0.6804, "step": 3740 }, { "epoch": 0.6107505816089139, "grad_norm": 2.1045539379119873, "learning_rate": 1.996219549085033e-05, "loss": 0.7195, "step": 3741 }, { "epoch": 0.6109138402514183, "grad_norm": 1.905200481414795, "learning_rate": 1.996216791876291e-05, "loss": 0.8083, "step": 3742 }, { "epoch": 0.6110770988939227, "grad_norm": 1.8150074481964111, "learning_rate": 1.9962140336643588e-05, "loss": 0.8253, "step": 3743 }, { "epoch": 0.6112403575364271, "grad_norm": 2.5424857139587402, "learning_rate": 1.996211274449239e-05, "loss": 0.9048, "step": 3744 }, { "epoch": 0.6114036161789315, "grad_norm": 2.2649083137512207, "learning_rate": 1.9962085142309354e-05, "loss": 0.9257, "step": 3745 }, { "epoch": 0.6115668748214359, "grad_norm": 2.1318905353546143, "learning_rate": 1.9962057530094498e-05, "loss": 0.9251, "step": 3746 }, { "epoch": 0.6117301334639402, "grad_norm": 2.132192611694336, "learning_rate": 1.9962029907847852e-05, "loss": 0.8719, "step": 3747 }, { "epoch": 0.6118933921064447, "grad_norm": 1.7561556100845337, "learning_rate": 1.9962002275569445e-05, "loss": 0.7785, "step": 3748 }, { "epoch": 0.612056650748949, "grad_norm": 1.6951043605804443, "learning_rate": 1.9961974633259302e-05, "loss": 0.6695, "step": 3749 }, { "epoch": 0.6122199093914534, "grad_norm": 1.6717313528060913, "learning_rate": 1.9961946980917457e-05, "loss": 0.5896, "step": 3750 }, { "epoch": 0.6123831680339578, "grad_norm": 2.0419058799743652, "learning_rate": 1.9961919318543933e-05, "loss": 0.7776, "step": 3751 }, { "epoch": 0.6125464266764622, "grad_norm": 2.1031951904296875, "learning_rate": 1.9961891646138757e-05, "loss": 0.8297, "step": 3752 }, { "epoch": 0.6127096853189665, "grad_norm": 1.8134374618530273, "learning_rate": 1.9961863963701963e-05, "loss": 0.7281, "step": 3753 }, { "epoch": 0.612872943961471, "grad_norm": 2.130743980407715, "learning_rate": 1.9961836271233568e-05, "loss": 0.8177, "step": 3754 }, { "epoch": 0.6130362026039754, "grad_norm": 2.097658157348633, "learning_rate": 1.9961808568733612e-05, "loss": 0.8198, "step": 3755 }, { "epoch": 0.6131994612464797, "grad_norm": 2.359623432159424, "learning_rate": 1.9961780856202114e-05, "loss": 1.0107, "step": 3756 }, { "epoch": 0.6133627198889842, "grad_norm": 2.0818188190460205, "learning_rate": 1.996175313363911e-05, "loss": 0.8016, "step": 3757 }, { "epoch": 0.6135259785314885, "grad_norm": 2.3268625736236572, "learning_rate": 1.996172540104462e-05, "loss": 0.9352, "step": 3758 }, { "epoch": 0.6136892371739929, "grad_norm": 2.025575876235962, "learning_rate": 1.9961697658418674e-05, "loss": 0.8486, "step": 3759 }, { "epoch": 0.6138524958164973, "grad_norm": 2.0393269062042236, "learning_rate": 1.9961669905761303e-05, "loss": 0.8731, "step": 3760 }, { "epoch": 0.6140157544590017, "grad_norm": 1.8359060287475586, "learning_rate": 1.9961642143072532e-05, "loss": 0.747, "step": 3761 }, { "epoch": 0.614179013101506, "grad_norm": 2.3686137199401855, "learning_rate": 1.996161437035239e-05, "loss": 1.0249, "step": 3762 }, { "epoch": 0.6143422717440105, "grad_norm": 2.0227677822113037, "learning_rate": 1.9961586587600905e-05, "loss": 0.8272, "step": 3763 }, { "epoch": 0.6145055303865148, "grad_norm": 2.142469882965088, "learning_rate": 1.9961558794818107e-05, "loss": 0.8906, "step": 3764 }, { "epoch": 0.6146687890290192, "grad_norm": 2.2985129356384277, "learning_rate": 1.996153099200402e-05, "loss": 0.7387, "step": 3765 }, { "epoch": 0.6148320476715237, "grad_norm": 2.1139304637908936, "learning_rate": 1.9961503179158673e-05, "loss": 0.8451, "step": 3766 }, { "epoch": 0.614995306314028, "grad_norm": 1.921873927116394, "learning_rate": 1.9961475356282095e-05, "loss": 0.8413, "step": 3767 }, { "epoch": 0.6151585649565324, "grad_norm": 2.2924017906188965, "learning_rate": 1.9961447523374316e-05, "loss": 0.9166, "step": 3768 }, { "epoch": 0.6153218235990368, "grad_norm": 2.0093233585357666, "learning_rate": 1.996141968043536e-05, "loss": 0.8167, "step": 3769 }, { "epoch": 0.6154850822415412, "grad_norm": 2.016788959503174, "learning_rate": 1.996139182746526e-05, "loss": 0.8128, "step": 3770 }, { "epoch": 0.6156483408840455, "grad_norm": 1.9738140106201172, "learning_rate": 1.9961363964464037e-05, "loss": 0.7093, "step": 3771 }, { "epoch": 0.61581159952655, "grad_norm": 1.8268052339553833, "learning_rate": 1.9961336091431728e-05, "loss": 0.7611, "step": 3772 }, { "epoch": 0.6159748581690543, "grad_norm": 1.717950701713562, "learning_rate": 1.996130820836835e-05, "loss": 0.676, "step": 3773 }, { "epoch": 0.6161381168115587, "grad_norm": 2.321230173110962, "learning_rate": 1.9961280315273944e-05, "loss": 0.8696, "step": 3774 }, { "epoch": 0.616301375454063, "grad_norm": 2.2084743976593018, "learning_rate": 1.9961252412148526e-05, "loss": 0.7299, "step": 3775 }, { "epoch": 0.6164646340965675, "grad_norm": 1.9729336500167847, "learning_rate": 1.9961224498992134e-05, "loss": 0.9359, "step": 3776 }, { "epoch": 0.6166278927390719, "grad_norm": 2.1378586292266846, "learning_rate": 1.996119657580479e-05, "loss": 0.8089, "step": 3777 }, { "epoch": 0.6167911513815763, "grad_norm": 2.071220636367798, "learning_rate": 1.9961168642586523e-05, "loss": 0.8486, "step": 3778 }, { "epoch": 0.6169544100240807, "grad_norm": 2.0117225646972656, "learning_rate": 1.9961140699337358e-05, "loss": 0.7344, "step": 3779 }, { "epoch": 0.617117668666585, "grad_norm": 2.243736982345581, "learning_rate": 1.996111274605733e-05, "loss": 0.9864, "step": 3780 }, { "epoch": 0.6172809273090895, "grad_norm": 1.8221784830093384, "learning_rate": 1.9961084782746468e-05, "loss": 0.8879, "step": 3781 }, { "epoch": 0.6174441859515938, "grad_norm": 2.270043134689331, "learning_rate": 1.9961056809404793e-05, "loss": 0.8056, "step": 3782 }, { "epoch": 0.6176074445940982, "grad_norm": 2.108318567276001, "learning_rate": 1.9961028826032335e-05, "loss": 0.8774, "step": 3783 }, { "epoch": 0.6177707032366025, "grad_norm": 2.0278093814849854, "learning_rate": 1.9961000832629126e-05, "loss": 0.8867, "step": 3784 }, { "epoch": 0.617933961879107, "grad_norm": 2.036600112915039, "learning_rate": 1.996097282919519e-05, "loss": 0.8026, "step": 3785 }, { "epoch": 0.6180972205216113, "grad_norm": 2.1643218994140625, "learning_rate": 1.9960944815730558e-05, "loss": 0.9236, "step": 3786 }, { "epoch": 0.6182604791641158, "grad_norm": 2.492954969406128, "learning_rate": 1.9960916792235256e-05, "loss": 0.8474, "step": 3787 }, { "epoch": 0.6184237378066202, "grad_norm": 1.9372669458389282, "learning_rate": 1.9960888758709316e-05, "loss": 0.8115, "step": 3788 }, { "epoch": 0.6185869964491245, "grad_norm": 2.0380916595458984, "learning_rate": 1.996086071515276e-05, "loss": 0.8055, "step": 3789 }, { "epoch": 0.618750255091629, "grad_norm": 1.7349754571914673, "learning_rate": 1.9960832661565625e-05, "loss": 0.5634, "step": 3790 }, { "epoch": 0.6189135137341333, "grad_norm": 2.294201135635376, "learning_rate": 1.996080459794793e-05, "loss": 1.0117, "step": 3791 }, { "epoch": 0.6190767723766377, "grad_norm": 2.2213833332061768, "learning_rate": 1.9960776524299708e-05, "loss": 0.8685, "step": 3792 }, { "epoch": 0.619240031019142, "grad_norm": 2.023338556289673, "learning_rate": 1.9960748440620988e-05, "loss": 0.8657, "step": 3793 }, { "epoch": 0.6194032896616465, "grad_norm": 2.0414113998413086, "learning_rate": 1.9960720346911798e-05, "loss": 0.8349, "step": 3794 }, { "epoch": 0.6195665483041508, "grad_norm": 2.300799608230591, "learning_rate": 1.9960692243172163e-05, "loss": 0.842, "step": 3795 }, { "epoch": 0.6197298069466552, "grad_norm": 2.1251487731933594, "learning_rate": 1.9960664129402113e-05, "loss": 0.8899, "step": 3796 }, { "epoch": 0.6198930655891596, "grad_norm": 2.2507076263427734, "learning_rate": 1.9960636005601678e-05, "loss": 0.9363, "step": 3797 }, { "epoch": 0.620056324231664, "grad_norm": 2.0603346824645996, "learning_rate": 1.9960607871770886e-05, "loss": 0.9506, "step": 3798 }, { "epoch": 0.6202195828741685, "grad_norm": 2.081101417541504, "learning_rate": 1.9960579727909763e-05, "loss": 1.0015, "step": 3799 }, { "epoch": 0.6203828415166728, "grad_norm": 1.9013113975524902, "learning_rate": 1.996055157401834e-05, "loss": 0.6796, "step": 3800 }, { "epoch": 0.6205461001591772, "grad_norm": 2.277682304382324, "learning_rate": 1.9960523410096645e-05, "loss": 0.7942, "step": 3801 }, { "epoch": 0.6207093588016815, "grad_norm": 2.6727676391601562, "learning_rate": 1.9960495236144704e-05, "loss": 0.8886, "step": 3802 }, { "epoch": 0.620872617444186, "grad_norm": 2.2822396755218506, "learning_rate": 1.9960467052162548e-05, "loss": 0.9177, "step": 3803 }, { "epoch": 0.6210358760866903, "grad_norm": 1.922499418258667, "learning_rate": 1.99604388581502e-05, "loss": 0.807, "step": 3804 }, { "epoch": 0.6211991347291947, "grad_norm": 1.9836468696594238, "learning_rate": 1.99604106541077e-05, "loss": 0.8503, "step": 3805 }, { "epoch": 0.6213623933716991, "grad_norm": 1.9225810766220093, "learning_rate": 1.9960382440035063e-05, "loss": 0.9294, "step": 3806 }, { "epoch": 0.6215256520142035, "grad_norm": 1.9350652694702148, "learning_rate": 1.9960354215932324e-05, "loss": 0.8095, "step": 3807 }, { "epoch": 0.6216889106567078, "grad_norm": 2.0266857147216797, "learning_rate": 1.9960325981799516e-05, "loss": 0.8309, "step": 3808 }, { "epoch": 0.6218521692992123, "grad_norm": 2.285499334335327, "learning_rate": 1.9960297737636658e-05, "loss": 0.7849, "step": 3809 }, { "epoch": 0.6220154279417167, "grad_norm": 1.9849332571029663, "learning_rate": 1.9960269483443785e-05, "loss": 0.8395, "step": 3810 }, { "epoch": 0.622178686584221, "grad_norm": 1.8987846374511719, "learning_rate": 1.996024121922092e-05, "loss": 0.8899, "step": 3811 }, { "epoch": 0.6223419452267255, "grad_norm": 1.9115079641342163, "learning_rate": 1.9960212944968098e-05, "loss": 0.859, "step": 3812 }, { "epoch": 0.6225052038692298, "grad_norm": 1.9027541875839233, "learning_rate": 1.9960184660685345e-05, "loss": 0.7638, "step": 3813 }, { "epoch": 0.6226684625117342, "grad_norm": 1.9608430862426758, "learning_rate": 1.996015636637269e-05, "loss": 0.8208, "step": 3814 }, { "epoch": 0.6228317211542386, "grad_norm": 1.624884843826294, "learning_rate": 1.9960128062030153e-05, "loss": 0.6761, "step": 3815 }, { "epoch": 0.622994979796743, "grad_norm": 2.1514575481414795, "learning_rate": 1.9960099747657774e-05, "loss": 0.9092, "step": 3816 }, { "epoch": 0.6231582384392473, "grad_norm": 2.1382553577423096, "learning_rate": 1.9960071423255577e-05, "loss": 0.747, "step": 3817 }, { "epoch": 0.6233214970817518, "grad_norm": 2.1861326694488525, "learning_rate": 1.996004308882359e-05, "loss": 0.8345, "step": 3818 }, { "epoch": 0.6234847557242561, "grad_norm": 2.439685821533203, "learning_rate": 1.9960014744361844e-05, "loss": 1.1167, "step": 3819 }, { "epoch": 0.6236480143667605, "grad_norm": 2.220843553543091, "learning_rate": 1.9959986389870364e-05, "loss": 0.783, "step": 3820 }, { "epoch": 0.623811273009265, "grad_norm": 1.9950898885726929, "learning_rate": 1.995995802534918e-05, "loss": 0.7427, "step": 3821 }, { "epoch": 0.6239745316517693, "grad_norm": 2.029041051864624, "learning_rate": 1.9959929650798325e-05, "loss": 0.8306, "step": 3822 }, { "epoch": 0.6241377902942737, "grad_norm": 2.349630117416382, "learning_rate": 1.9959901266217818e-05, "loss": 0.9501, "step": 3823 }, { "epoch": 0.6243010489367781, "grad_norm": 2.2787833213806152, "learning_rate": 1.9959872871607696e-05, "loss": 0.8555, "step": 3824 }, { "epoch": 0.6244643075792825, "grad_norm": 2.0943055152893066, "learning_rate": 1.9959844466967985e-05, "loss": 0.6602, "step": 3825 }, { "epoch": 0.6246275662217868, "grad_norm": 2.1828486919403076, "learning_rate": 1.9959816052298713e-05, "loss": 0.8173, "step": 3826 }, { "epoch": 0.6247908248642913, "grad_norm": 2.2292890548706055, "learning_rate": 1.9959787627599907e-05, "loss": 0.9583, "step": 3827 }, { "epoch": 0.6249540835067956, "grad_norm": 1.7890475988388062, "learning_rate": 1.99597591928716e-05, "loss": 0.8043, "step": 3828 }, { "epoch": 0.6251173421493, "grad_norm": 2.1255531311035156, "learning_rate": 1.9959730748113814e-05, "loss": 0.7283, "step": 3829 }, { "epoch": 0.6252806007918044, "grad_norm": 1.5569697618484497, "learning_rate": 1.9959702293326585e-05, "loss": 0.5379, "step": 3830 }, { "epoch": 0.6254438594343088, "grad_norm": 2.226503610610962, "learning_rate": 1.995967382850994e-05, "loss": 0.7863, "step": 3831 }, { "epoch": 0.6256071180768132, "grad_norm": 2.0033159255981445, "learning_rate": 1.9959645353663904e-05, "loss": 0.8435, "step": 3832 }, { "epoch": 0.6257703767193176, "grad_norm": 2.1793508529663086, "learning_rate": 1.9959616868788506e-05, "loss": 0.7264, "step": 3833 }, { "epoch": 0.625933635361822, "grad_norm": 2.237682342529297, "learning_rate": 1.9959588373883784e-05, "loss": 0.8884, "step": 3834 }, { "epoch": 0.6260968940043263, "grad_norm": 2.142112970352173, "learning_rate": 1.995955986894975e-05, "loss": 0.7348, "step": 3835 }, { "epoch": 0.6262601526468308, "grad_norm": 1.862033724784851, "learning_rate": 1.9959531353986445e-05, "loss": 0.7222, "step": 3836 }, { "epoch": 0.6264234112893351, "grad_norm": 2.026902914047241, "learning_rate": 1.9959502828993896e-05, "loss": 0.8597, "step": 3837 }, { "epoch": 0.6265866699318395, "grad_norm": 2.1309542655944824, "learning_rate": 1.995947429397213e-05, "loss": 0.8716, "step": 3838 }, { "epoch": 0.6267499285743439, "grad_norm": 2.212858200073242, "learning_rate": 1.9959445748921176e-05, "loss": 0.9834, "step": 3839 }, { "epoch": 0.6269131872168483, "grad_norm": 2.2014424800872803, "learning_rate": 1.995941719384106e-05, "loss": 0.9188, "step": 3840 }, { "epoch": 0.6270764458593526, "grad_norm": 2.1766600608825684, "learning_rate": 1.9959388628731816e-05, "loss": 0.8088, "step": 3841 }, { "epoch": 0.6272397045018571, "grad_norm": 1.9613821506500244, "learning_rate": 1.9959360053593473e-05, "loss": 0.7239, "step": 3842 }, { "epoch": 0.6274029631443615, "grad_norm": 2.1956701278686523, "learning_rate": 1.9959331468426054e-05, "loss": 0.9328, "step": 3843 }, { "epoch": 0.6275662217868658, "grad_norm": 2.0547103881835938, "learning_rate": 1.995930287322959e-05, "loss": 0.7885, "step": 3844 }, { "epoch": 0.6277294804293703, "grad_norm": 2.6029984951019287, "learning_rate": 1.995927426800411e-05, "loss": 0.82, "step": 3845 }, { "epoch": 0.6278927390718746, "grad_norm": 2.6842284202575684, "learning_rate": 1.9959245652749647e-05, "loss": 0.9036, "step": 3846 }, { "epoch": 0.628055997714379, "grad_norm": 1.9465556144714355, "learning_rate": 1.9959217027466226e-05, "loss": 0.8334, "step": 3847 }, { "epoch": 0.6282192563568834, "grad_norm": 2.2189435958862305, "learning_rate": 1.9959188392153873e-05, "loss": 0.6879, "step": 3848 }, { "epoch": 0.6283825149993878, "grad_norm": 1.9280937910079956, "learning_rate": 1.995915974681262e-05, "loss": 0.8867, "step": 3849 }, { "epoch": 0.6285457736418921, "grad_norm": 2.1597084999084473, "learning_rate": 1.9959131091442497e-05, "loss": 0.8926, "step": 3850 }, { "epoch": 0.6287090322843966, "grad_norm": 2.059746742248535, "learning_rate": 1.9959102426043534e-05, "loss": 0.9488, "step": 3851 }, { "epoch": 0.6288722909269009, "grad_norm": 1.934052586555481, "learning_rate": 1.9959073750615756e-05, "loss": 0.8256, "step": 3852 }, { "epoch": 0.6290355495694053, "grad_norm": 2.3877503871917725, "learning_rate": 1.995904506515919e-05, "loss": 0.951, "step": 3853 }, { "epoch": 0.6291988082119098, "grad_norm": 2.0603699684143066, "learning_rate": 1.9959016369673873e-05, "loss": 0.9771, "step": 3854 }, { "epoch": 0.6293620668544141, "grad_norm": 2.3064584732055664, "learning_rate": 1.9958987664159826e-05, "loss": 0.8445, "step": 3855 }, { "epoch": 0.6295253254969185, "grad_norm": 1.651580810546875, "learning_rate": 1.9958958948617082e-05, "loss": 0.6746, "step": 3856 }, { "epoch": 0.6296885841394229, "grad_norm": 2.1996777057647705, "learning_rate": 1.995893022304567e-05, "loss": 0.9707, "step": 3857 }, { "epoch": 0.6298518427819273, "grad_norm": 2.047375202178955, "learning_rate": 1.9958901487445613e-05, "loss": 0.8364, "step": 3858 }, { "epoch": 0.6300151014244316, "grad_norm": 2.2454278469085693, "learning_rate": 1.995887274181695e-05, "loss": 0.9821, "step": 3859 }, { "epoch": 0.6301783600669361, "grad_norm": 2.129563331604004, "learning_rate": 1.9958843986159705e-05, "loss": 0.7714, "step": 3860 }, { "epoch": 0.6303416187094404, "grad_norm": 2.073392152786255, "learning_rate": 1.9958815220473905e-05, "loss": 0.7857, "step": 3861 }, { "epoch": 0.6305048773519448, "grad_norm": 1.8291730880737305, "learning_rate": 1.995878644475958e-05, "loss": 0.7052, "step": 3862 }, { "epoch": 0.6306681359944492, "grad_norm": 2.301786422729492, "learning_rate": 1.9958757659016765e-05, "loss": 0.8392, "step": 3863 }, { "epoch": 0.6308313946369536, "grad_norm": 1.9461169242858887, "learning_rate": 1.9958728863245475e-05, "loss": 0.8542, "step": 3864 }, { "epoch": 0.630994653279458, "grad_norm": 1.9911609888076782, "learning_rate": 1.9958700057445753e-05, "loss": 0.7682, "step": 3865 }, { "epoch": 0.6311579119219624, "grad_norm": 1.770376205444336, "learning_rate": 1.9958671241617625e-05, "loss": 0.6403, "step": 3866 }, { "epoch": 0.6313211705644668, "grad_norm": 1.987022876739502, "learning_rate": 1.9958642415761115e-05, "loss": 0.757, "step": 3867 }, { "epoch": 0.6314844292069711, "grad_norm": 2.4466052055358887, "learning_rate": 1.9958613579876253e-05, "loss": 0.9384, "step": 3868 }, { "epoch": 0.6316476878494756, "grad_norm": 2.070402145385742, "learning_rate": 1.995858473396307e-05, "loss": 0.8061, "step": 3869 }, { "epoch": 0.6318109464919799, "grad_norm": 2.2122299671173096, "learning_rate": 1.9958555878021596e-05, "loss": 0.6943, "step": 3870 }, { "epoch": 0.6319742051344843, "grad_norm": 1.8979194164276123, "learning_rate": 1.995852701205186e-05, "loss": 0.8448, "step": 3871 }, { "epoch": 0.6321374637769887, "grad_norm": 1.987113356590271, "learning_rate": 1.9958498136053888e-05, "loss": 0.855, "step": 3872 }, { "epoch": 0.6323007224194931, "grad_norm": 2.451205015182495, "learning_rate": 1.995846925002771e-05, "loss": 0.8958, "step": 3873 }, { "epoch": 0.6324639810619974, "grad_norm": 2.0886292457580566, "learning_rate": 1.995844035397336e-05, "loss": 0.7626, "step": 3874 }, { "epoch": 0.6326272397045019, "grad_norm": 1.9825876951217651, "learning_rate": 1.995841144789086e-05, "loss": 0.7961, "step": 3875 }, { "epoch": 0.6327904983470063, "grad_norm": 1.8546133041381836, "learning_rate": 1.9958382531780243e-05, "loss": 0.8598, "step": 3876 }, { "epoch": 0.6329537569895106, "grad_norm": 2.2103657722473145, "learning_rate": 1.9958353605641537e-05, "loss": 0.7279, "step": 3877 }, { "epoch": 0.6331170156320151, "grad_norm": 2.257733106613159, "learning_rate": 1.9958324669474774e-05, "loss": 0.8375, "step": 3878 }, { "epoch": 0.6332802742745194, "grad_norm": 1.9387556314468384, "learning_rate": 1.9958295723279978e-05, "loss": 0.6344, "step": 3879 }, { "epoch": 0.6334435329170238, "grad_norm": 2.170278549194336, "learning_rate": 1.9958266767057183e-05, "loss": 0.9048, "step": 3880 }, { "epoch": 0.6336067915595281, "grad_norm": 2.1518657207489014, "learning_rate": 1.995823780080641e-05, "loss": 0.9136, "step": 3881 }, { "epoch": 0.6337700502020326, "grad_norm": 1.9989204406738281, "learning_rate": 1.9958208824527702e-05, "loss": 0.7156, "step": 3882 }, { "epoch": 0.6339333088445369, "grad_norm": 1.9998705387115479, "learning_rate": 1.9958179838221078e-05, "loss": 0.8506, "step": 3883 }, { "epoch": 0.6340965674870414, "grad_norm": 2.1180222034454346, "learning_rate": 1.995815084188657e-05, "loss": 0.7945, "step": 3884 }, { "epoch": 0.6342598261295457, "grad_norm": 2.1036112308502197, "learning_rate": 1.9958121835524204e-05, "loss": 0.7738, "step": 3885 }, { "epoch": 0.6344230847720501, "grad_norm": 2.1715307235717773, "learning_rate": 1.9958092819134012e-05, "loss": 0.9625, "step": 3886 }, { "epoch": 0.6345863434145546, "grad_norm": 1.6309186220169067, "learning_rate": 1.9958063792716028e-05, "loss": 0.6469, "step": 3887 }, { "epoch": 0.6347496020570589, "grad_norm": 2.154336452484131, "learning_rate": 1.995803475627027e-05, "loss": 0.772, "step": 3888 }, { "epoch": 0.6349128606995633, "grad_norm": 1.9058438539505005, "learning_rate": 1.995800570979678e-05, "loss": 0.6833, "step": 3889 }, { "epoch": 0.6350761193420676, "grad_norm": 2.0721659660339355, "learning_rate": 1.9957976653295576e-05, "loss": 0.8417, "step": 3890 }, { "epoch": 0.6352393779845721, "grad_norm": 3.318566083908081, "learning_rate": 1.9957947586766695e-05, "loss": 0.9382, "step": 3891 }, { "epoch": 0.6354026366270764, "grad_norm": 1.9561172723770142, "learning_rate": 1.995791851021016e-05, "loss": 0.7667, "step": 3892 }, { "epoch": 0.6355658952695808, "grad_norm": 2.0578510761260986, "learning_rate": 1.9957889423626006e-05, "loss": 0.9117, "step": 3893 }, { "epoch": 0.6357291539120852, "grad_norm": 2.0947399139404297, "learning_rate": 1.9957860327014262e-05, "loss": 0.976, "step": 3894 }, { "epoch": 0.6358924125545896, "grad_norm": 2.0819363594055176, "learning_rate": 1.9957831220374953e-05, "loss": 0.8592, "step": 3895 }, { "epoch": 0.6360556711970939, "grad_norm": 2.1190083026885986, "learning_rate": 1.995780210370811e-05, "loss": 0.8897, "step": 3896 }, { "epoch": 0.6362189298395984, "grad_norm": 2.0488765239715576, "learning_rate": 1.9957772977013765e-05, "loss": 0.7649, "step": 3897 }, { "epoch": 0.6363821884821028, "grad_norm": 2.5979888439178467, "learning_rate": 1.9957743840291942e-05, "loss": 0.9276, "step": 3898 }, { "epoch": 0.6365454471246071, "grad_norm": 3.41827130317688, "learning_rate": 1.9957714693542678e-05, "loss": 1.1754, "step": 3899 }, { "epoch": 0.6367087057671116, "grad_norm": 2.1105499267578125, "learning_rate": 1.9957685536765998e-05, "loss": 0.8533, "step": 3900 }, { "epoch": 0.6368719644096159, "grad_norm": 1.8626378774642944, "learning_rate": 1.9957656369961928e-05, "loss": 0.6931, "step": 3901 }, { "epoch": 0.6370352230521203, "grad_norm": 1.8937807083129883, "learning_rate": 1.99576271931305e-05, "loss": 0.8176, "step": 3902 }, { "epoch": 0.6371984816946247, "grad_norm": 2.338810443878174, "learning_rate": 1.9957598006271745e-05, "loss": 0.9986, "step": 3903 }, { "epoch": 0.6373617403371291, "grad_norm": 2.3824877738952637, "learning_rate": 1.9957568809385693e-05, "loss": 0.7272, "step": 3904 }, { "epoch": 0.6375249989796334, "grad_norm": 2.1532349586486816, "learning_rate": 1.995753960247237e-05, "loss": 0.821, "step": 3905 }, { "epoch": 0.6376882576221379, "grad_norm": 2.2774415016174316, "learning_rate": 1.995751038553181e-05, "loss": 0.7677, "step": 3906 }, { "epoch": 0.6378515162646422, "grad_norm": 2.0611109733581543, "learning_rate": 1.9957481158564037e-05, "loss": 0.9521, "step": 3907 }, { "epoch": 0.6380147749071466, "grad_norm": 2.0877084732055664, "learning_rate": 1.9957451921569084e-05, "loss": 0.9196, "step": 3908 }, { "epoch": 0.6381780335496511, "grad_norm": 2.128732204437256, "learning_rate": 1.995742267454698e-05, "loss": 0.914, "step": 3909 }, { "epoch": 0.6383412921921554, "grad_norm": 2.6169135570526123, "learning_rate": 1.9957393417497753e-05, "loss": 0.8113, "step": 3910 }, { "epoch": 0.6385045508346598, "grad_norm": 2.217097759246826, "learning_rate": 1.9957364150421435e-05, "loss": 0.8716, "step": 3911 }, { "epoch": 0.6386678094771642, "grad_norm": 1.9963403940200806, "learning_rate": 1.9957334873318052e-05, "loss": 0.867, "step": 3912 }, { "epoch": 0.6388310681196686, "grad_norm": 1.984139323234558, "learning_rate": 1.9957305586187636e-05, "loss": 0.8685, "step": 3913 }, { "epoch": 0.6389943267621729, "grad_norm": 2.370695114135742, "learning_rate": 1.9957276289030217e-05, "loss": 1.0087, "step": 3914 }, { "epoch": 0.6391575854046774, "grad_norm": 2.2320048809051514, "learning_rate": 1.9957246981845825e-05, "loss": 0.9583, "step": 3915 }, { "epoch": 0.6393208440471817, "grad_norm": 2.2000739574432373, "learning_rate": 1.9957217664634484e-05, "loss": 0.8816, "step": 3916 }, { "epoch": 0.6394841026896861, "grad_norm": 2.422565460205078, "learning_rate": 1.9957188337396228e-05, "loss": 0.776, "step": 3917 }, { "epoch": 0.6396473613321906, "grad_norm": 2.2651000022888184, "learning_rate": 1.9957159000131087e-05, "loss": 0.8537, "step": 3918 }, { "epoch": 0.6398106199746949, "grad_norm": 2.1559486389160156, "learning_rate": 1.9957129652839092e-05, "loss": 0.8695, "step": 3919 }, { "epoch": 0.6399738786171993, "grad_norm": 2.0040483474731445, "learning_rate": 1.9957100295520267e-05, "loss": 0.783, "step": 3920 }, { "epoch": 0.6401371372597037, "grad_norm": 2.025927782058716, "learning_rate": 1.9957070928174645e-05, "loss": 0.7896, "step": 3921 }, { "epoch": 0.6403003959022081, "grad_norm": 2.081263780593872, "learning_rate": 1.9957041550802257e-05, "loss": 0.8882, "step": 3922 }, { "epoch": 0.6404636545447124, "grad_norm": 2.5506882667541504, "learning_rate": 1.995701216340313e-05, "loss": 0.8313, "step": 3923 }, { "epoch": 0.6406269131872169, "grad_norm": 1.8875963687896729, "learning_rate": 1.995698276597729e-05, "loss": 0.895, "step": 3924 }, { "epoch": 0.6407901718297212, "grad_norm": 1.805955171585083, "learning_rate": 1.9956953358524774e-05, "loss": 0.7252, "step": 3925 }, { "epoch": 0.6409534304722256, "grad_norm": 2.4521286487579346, "learning_rate": 1.9956923941045613e-05, "loss": 1.0291, "step": 3926 }, { "epoch": 0.64111668911473, "grad_norm": 2.2084898948669434, "learning_rate": 1.995689451353983e-05, "loss": 0.9433, "step": 3927 }, { "epoch": 0.6412799477572344, "grad_norm": 1.8056122064590454, "learning_rate": 1.995686507600745e-05, "loss": 0.6321, "step": 3928 }, { "epoch": 0.6414432063997388, "grad_norm": 2.013023853302002, "learning_rate": 1.995683562844852e-05, "loss": 0.8773, "step": 3929 }, { "epoch": 0.6416064650422432, "grad_norm": 1.9381183385849, "learning_rate": 1.995680617086305e-05, "loss": 0.736, "step": 3930 }, { "epoch": 0.6417697236847476, "grad_norm": 2.0055699348449707, "learning_rate": 1.9956776703251083e-05, "loss": 0.7631, "step": 3931 }, { "epoch": 0.6419329823272519, "grad_norm": 1.858241319656372, "learning_rate": 1.9956747225612643e-05, "loss": 0.7891, "step": 3932 }, { "epoch": 0.6420962409697564, "grad_norm": 2.552084445953369, "learning_rate": 1.9956717737947766e-05, "loss": 0.89, "step": 3933 }, { "epoch": 0.6422594996122607, "grad_norm": 2.1156399250030518, "learning_rate": 1.9956688240256473e-05, "loss": 0.882, "step": 3934 }, { "epoch": 0.6424227582547651, "grad_norm": 1.8388075828552246, "learning_rate": 1.99566587325388e-05, "loss": 0.7159, "step": 3935 }, { "epoch": 0.6425860168972695, "grad_norm": 2.6280083656311035, "learning_rate": 1.9956629214794773e-05, "loss": 0.9021, "step": 3936 }, { "epoch": 0.6427492755397739, "grad_norm": 1.8802495002746582, "learning_rate": 1.995659968702442e-05, "loss": 0.8772, "step": 3937 }, { "epoch": 0.6429125341822782, "grad_norm": 1.6542173624038696, "learning_rate": 1.9956570149227777e-05, "loss": 0.6515, "step": 3938 }, { "epoch": 0.6430757928247827, "grad_norm": 2.072232484817505, "learning_rate": 1.995654060140487e-05, "loss": 0.817, "step": 3939 }, { "epoch": 0.6432390514672871, "grad_norm": 2.2016494274139404, "learning_rate": 1.995651104355573e-05, "loss": 0.8919, "step": 3940 }, { "epoch": 0.6434023101097914, "grad_norm": 2.393803834915161, "learning_rate": 1.9956481475680384e-05, "loss": 0.8891, "step": 3941 }, { "epoch": 0.6435655687522959, "grad_norm": 2.1537551879882812, "learning_rate": 1.9956451897778864e-05, "loss": 0.8584, "step": 3942 }, { "epoch": 0.6437288273948002, "grad_norm": 2.1254405975341797, "learning_rate": 1.99564223098512e-05, "loss": 0.7949, "step": 3943 }, { "epoch": 0.6438920860373046, "grad_norm": 2.1835193634033203, "learning_rate": 1.995639271189742e-05, "loss": 0.8324, "step": 3944 }, { "epoch": 0.644055344679809, "grad_norm": 2.6670496463775635, "learning_rate": 1.995636310391756e-05, "loss": 0.8234, "step": 3945 }, { "epoch": 0.6442186033223134, "grad_norm": 1.982601523399353, "learning_rate": 1.9956333485911642e-05, "loss": 0.7973, "step": 3946 }, { "epoch": 0.6443818619648177, "grad_norm": 2.302584648132324, "learning_rate": 1.9956303857879698e-05, "loss": 0.7846, "step": 3947 }, { "epoch": 0.6445451206073222, "grad_norm": 2.196352958679199, "learning_rate": 1.995627421982176e-05, "loss": 0.8901, "step": 3948 }, { "epoch": 0.6447083792498265, "grad_norm": 2.1761200428009033, "learning_rate": 1.9956244571737855e-05, "loss": 0.9241, "step": 3949 }, { "epoch": 0.6448716378923309, "grad_norm": 2.5097243785858154, "learning_rate": 1.9956214913628015e-05, "loss": 0.8693, "step": 3950 }, { "epoch": 0.6450348965348354, "grad_norm": 1.8800350427627563, "learning_rate": 1.995618524549227e-05, "loss": 0.8748, "step": 3951 }, { "epoch": 0.6451981551773397, "grad_norm": 1.6585696935653687, "learning_rate": 1.9956155567330648e-05, "loss": 0.6693, "step": 3952 }, { "epoch": 0.6453614138198441, "grad_norm": 2.103243589401245, "learning_rate": 1.995612587914318e-05, "loss": 0.9704, "step": 3953 }, { "epoch": 0.6455246724623485, "grad_norm": 2.090947389602661, "learning_rate": 1.99560961809299e-05, "loss": 0.9566, "step": 3954 }, { "epoch": 0.6456879311048529, "grad_norm": 1.9720873832702637, "learning_rate": 1.9956066472690826e-05, "loss": 0.7959, "step": 3955 }, { "epoch": 0.6458511897473572, "grad_norm": 1.688127875328064, "learning_rate": 1.9956036754426004e-05, "loss": 0.7929, "step": 3956 }, { "epoch": 0.6460144483898617, "grad_norm": 1.9238029718399048, "learning_rate": 1.9956007026135448e-05, "loss": 0.7159, "step": 3957 }, { "epoch": 0.646177707032366, "grad_norm": 2.5537431240081787, "learning_rate": 1.99559772878192e-05, "loss": 1.0091, "step": 3958 }, { "epoch": 0.6463409656748704, "grad_norm": 1.8655478954315186, "learning_rate": 1.9955947539477285e-05, "loss": 0.7544, "step": 3959 }, { "epoch": 0.6465042243173748, "grad_norm": 2.066704034805298, "learning_rate": 1.995591778110973e-05, "loss": 0.7309, "step": 3960 }, { "epoch": 0.6466674829598792, "grad_norm": 2.049016237258911, "learning_rate": 1.9955888012716574e-05, "loss": 0.8132, "step": 3961 }, { "epoch": 0.6468307416023836, "grad_norm": 2.3270530700683594, "learning_rate": 1.995585823429784e-05, "loss": 0.9067, "step": 3962 }, { "epoch": 0.646994000244888, "grad_norm": 2.370431661605835, "learning_rate": 1.995582844585356e-05, "loss": 1.0286, "step": 3963 }, { "epoch": 0.6471572588873924, "grad_norm": 1.9145859479904175, "learning_rate": 1.995579864738376e-05, "loss": 0.7595, "step": 3964 }, { "epoch": 0.6473205175298967, "grad_norm": 2.0103914737701416, "learning_rate": 1.9955768838888473e-05, "loss": 0.7468, "step": 3965 }, { "epoch": 0.6474837761724012, "grad_norm": 2.1546573638916016, "learning_rate": 1.9955739020367733e-05, "loss": 0.819, "step": 3966 }, { "epoch": 0.6476470348149055, "grad_norm": 2.047351837158203, "learning_rate": 1.9955709191821565e-05, "loss": 0.8153, "step": 3967 }, { "epoch": 0.6478102934574099, "grad_norm": 2.5663466453552246, "learning_rate": 1.995567935325e-05, "loss": 0.9616, "step": 3968 }, { "epoch": 0.6479735520999143, "grad_norm": 2.136587142944336, "learning_rate": 1.995564950465307e-05, "loss": 0.7275, "step": 3969 }, { "epoch": 0.6481368107424187, "grad_norm": 2.013615846633911, "learning_rate": 1.99556196460308e-05, "loss": 0.7894, "step": 3970 }, { "epoch": 0.648300069384923, "grad_norm": 2.1864113807678223, "learning_rate": 1.995558977738323e-05, "loss": 0.823, "step": 3971 }, { "epoch": 0.6484633280274275, "grad_norm": 2.0250179767608643, "learning_rate": 1.9955559898710377e-05, "loss": 0.8342, "step": 3972 }, { "epoch": 0.6486265866699319, "grad_norm": 2.1324098110198975, "learning_rate": 1.9955530010012283e-05, "loss": 0.9318, "step": 3973 }, { "epoch": 0.6487898453124362, "grad_norm": 1.7510625123977661, "learning_rate": 1.995550011128897e-05, "loss": 0.7564, "step": 3974 }, { "epoch": 0.6489531039549407, "grad_norm": 2.1405553817749023, "learning_rate": 1.9955470202540472e-05, "loss": 0.8508, "step": 3975 }, { "epoch": 0.649116362597445, "grad_norm": 2.23287296295166, "learning_rate": 1.995544028376682e-05, "loss": 0.9902, "step": 3976 }, { "epoch": 0.6492796212399494, "grad_norm": 1.8539090156555176, "learning_rate": 1.9955410354968038e-05, "loss": 0.8302, "step": 3977 }, { "epoch": 0.6494428798824537, "grad_norm": 1.7731791734695435, "learning_rate": 1.9955380416144164e-05, "loss": 0.6716, "step": 3978 }, { "epoch": 0.6496061385249582, "grad_norm": 1.912860631942749, "learning_rate": 1.995535046729522e-05, "loss": 0.8558, "step": 3979 }, { "epoch": 0.6497693971674625, "grad_norm": 2.142641305923462, "learning_rate": 1.9955320508421247e-05, "loss": 0.9237, "step": 3980 }, { "epoch": 0.649932655809967, "grad_norm": 1.9753752946853638, "learning_rate": 1.9955290539522262e-05, "loss": 0.7924, "step": 3981 }, { "epoch": 0.6500959144524713, "grad_norm": 1.8638825416564941, "learning_rate": 1.9955260560598306e-05, "loss": 0.7445, "step": 3982 }, { "epoch": 0.6502591730949757, "grad_norm": 2.375680685043335, "learning_rate": 1.9955230571649407e-05, "loss": 1.0534, "step": 3983 }, { "epoch": 0.6504224317374802, "grad_norm": 2.331519842147827, "learning_rate": 1.9955200572675593e-05, "loss": 0.7965, "step": 3984 }, { "epoch": 0.6505856903799845, "grad_norm": 1.7263696193695068, "learning_rate": 1.9955170563676892e-05, "loss": 0.673, "step": 3985 }, { "epoch": 0.6507489490224889, "grad_norm": 1.8704642057418823, "learning_rate": 1.9955140544653336e-05, "loss": 1.0084, "step": 3986 }, { "epoch": 0.6509122076649932, "grad_norm": 1.694795846939087, "learning_rate": 1.995511051560496e-05, "loss": 0.7092, "step": 3987 }, { "epoch": 0.6510754663074977, "grad_norm": 2.0374913215637207, "learning_rate": 1.9955080476531788e-05, "loss": 0.8642, "step": 3988 }, { "epoch": 0.651238724950002, "grad_norm": 1.8005086183547974, "learning_rate": 1.9955050427433857e-05, "loss": 0.7632, "step": 3989 }, { "epoch": 0.6514019835925065, "grad_norm": 2.0485095977783203, "learning_rate": 1.9955020368311185e-05, "loss": 0.7562, "step": 3990 }, { "epoch": 0.6515652422350108, "grad_norm": 1.7770757675170898, "learning_rate": 1.9954990299163814e-05, "loss": 0.6999, "step": 3991 }, { "epoch": 0.6517285008775152, "grad_norm": 1.9794975519180298, "learning_rate": 1.995496021999177e-05, "loss": 0.7433, "step": 3992 }, { "epoch": 0.6518917595200195, "grad_norm": 2.2751927375793457, "learning_rate": 1.9954930130795084e-05, "loss": 0.9534, "step": 3993 }, { "epoch": 0.652055018162524, "grad_norm": 2.1149494647979736, "learning_rate": 1.9954900031573788e-05, "loss": 0.8748, "step": 3994 }, { "epoch": 0.6522182768050284, "grad_norm": 2.0718464851379395, "learning_rate": 1.9954869922327908e-05, "loss": 0.8226, "step": 3995 }, { "epoch": 0.6523815354475327, "grad_norm": 1.923607349395752, "learning_rate": 1.9954839803057478e-05, "loss": 0.7806, "step": 3996 }, { "epoch": 0.6525447940900372, "grad_norm": 2.0130627155303955, "learning_rate": 1.9954809673762528e-05, "loss": 0.8563, "step": 3997 }, { "epoch": 0.6527080527325415, "grad_norm": 2.1710920333862305, "learning_rate": 1.995477953444308e-05, "loss": 0.9389, "step": 3998 }, { "epoch": 0.652871311375046, "grad_norm": 1.9817372560501099, "learning_rate": 1.995474938509918e-05, "loss": 0.8255, "step": 3999 }, { "epoch": 0.6530345700175503, "grad_norm": 2.010631799697876, "learning_rate": 1.9954719225730847e-05, "loss": 0.6978, "step": 4000 }, { "epoch": 0.6531978286600547, "grad_norm": 1.9969439506530762, "learning_rate": 1.9954689056338113e-05, "loss": 0.7017, "step": 4001 }, { "epoch": 0.653361087302559, "grad_norm": 1.8172589540481567, "learning_rate": 1.9954658876921012e-05, "loss": 0.6477, "step": 4002 }, { "epoch": 0.6535243459450635, "grad_norm": 1.9709951877593994, "learning_rate": 1.995462868747957e-05, "loss": 0.8902, "step": 4003 }, { "epoch": 0.6536876045875678, "grad_norm": 2.084540605545044, "learning_rate": 1.9954598488013826e-05, "loss": 0.8218, "step": 4004 }, { "epoch": 0.6538508632300722, "grad_norm": 2.008793354034424, "learning_rate": 1.9954568278523796e-05, "loss": 0.8614, "step": 4005 }, { "epoch": 0.6540141218725767, "grad_norm": 2.1246957778930664, "learning_rate": 1.9954538059009523e-05, "loss": 0.8835, "step": 4006 }, { "epoch": 0.654177380515081, "grad_norm": 2.1777400970458984, "learning_rate": 1.995450782947103e-05, "loss": 1.0186, "step": 4007 }, { "epoch": 0.6543406391575854, "grad_norm": 2.5215978622436523, "learning_rate": 1.995447758990835e-05, "loss": 0.9352, "step": 4008 }, { "epoch": 0.6545038978000898, "grad_norm": 2.022606372833252, "learning_rate": 1.9954447340321516e-05, "loss": 0.9196, "step": 4009 }, { "epoch": 0.6546671564425942, "grad_norm": 1.7668960094451904, "learning_rate": 1.9954417080710557e-05, "loss": 0.748, "step": 4010 }, { "epoch": 0.6548304150850985, "grad_norm": 2.0687551498413086, "learning_rate": 1.9954386811075502e-05, "loss": 0.9036, "step": 4011 }, { "epoch": 0.654993673727603, "grad_norm": 1.5938286781311035, "learning_rate": 1.995435653141638e-05, "loss": 0.6414, "step": 4012 }, { "epoch": 0.6551569323701073, "grad_norm": 2.12983775138855, "learning_rate": 1.9954326241733223e-05, "loss": 0.9285, "step": 4013 }, { "epoch": 0.6553201910126117, "grad_norm": 2.091355085372925, "learning_rate": 1.9954295942026065e-05, "loss": 0.9115, "step": 4014 }, { "epoch": 0.6554834496551161, "grad_norm": 1.7807918787002563, "learning_rate": 1.995426563229493e-05, "loss": 0.6876, "step": 4015 }, { "epoch": 0.6556467082976205, "grad_norm": 2.3356451988220215, "learning_rate": 1.9954235312539855e-05, "loss": 0.6976, "step": 4016 }, { "epoch": 0.6558099669401249, "grad_norm": 2.1427462100982666, "learning_rate": 1.995420498276087e-05, "loss": 0.9099, "step": 4017 }, { "epoch": 0.6559732255826293, "grad_norm": 1.99425208568573, "learning_rate": 1.9954174642958e-05, "loss": 0.8985, "step": 4018 }, { "epoch": 0.6561364842251337, "grad_norm": 2.2237167358398438, "learning_rate": 1.9954144293131275e-05, "loss": 0.7749, "step": 4019 }, { "epoch": 0.656299742867638, "grad_norm": 1.7875380516052246, "learning_rate": 1.9954113933280737e-05, "loss": 0.7482, "step": 4020 }, { "epoch": 0.6564630015101425, "grad_norm": 1.9485515356063843, "learning_rate": 1.9954083563406407e-05, "loss": 0.8339, "step": 4021 }, { "epoch": 0.6566262601526468, "grad_norm": 1.8322675228118896, "learning_rate": 1.9954053183508317e-05, "loss": 0.7418, "step": 4022 }, { "epoch": 0.6567895187951512, "grad_norm": 1.6777312755584717, "learning_rate": 1.9954022793586492e-05, "loss": 0.7214, "step": 4023 }, { "epoch": 0.6569527774376556, "grad_norm": 2.226450204849243, "learning_rate": 1.9953992393640975e-05, "loss": 0.753, "step": 4024 }, { "epoch": 0.65711603608016, "grad_norm": 1.9682592153549194, "learning_rate": 1.9953961983671792e-05, "loss": 0.7863, "step": 4025 }, { "epoch": 0.6572792947226643, "grad_norm": 2.1388142108917236, "learning_rate": 1.9953931563678966e-05, "loss": 0.7784, "step": 4026 }, { "epoch": 0.6574425533651688, "grad_norm": 2.3465826511383057, "learning_rate": 1.995390113366254e-05, "loss": 0.8246, "step": 4027 }, { "epoch": 0.6576058120076732, "grad_norm": 1.8669836521148682, "learning_rate": 1.995387069362253e-05, "loss": 0.7372, "step": 4028 }, { "epoch": 0.6577690706501775, "grad_norm": 2.10113787651062, "learning_rate": 1.9953840243558982e-05, "loss": 0.8374, "step": 4029 }, { "epoch": 0.657932329292682, "grad_norm": 1.9991933107376099, "learning_rate": 1.9953809783471917e-05, "loss": 0.8547, "step": 4030 }, { "epoch": 0.6580955879351863, "grad_norm": 1.760053277015686, "learning_rate": 1.9953779313361368e-05, "loss": 0.8386, "step": 4031 }, { "epoch": 0.6582588465776907, "grad_norm": 2.0603861808776855, "learning_rate": 1.9953748833227364e-05, "loss": 0.7094, "step": 4032 }, { "epoch": 0.6584221052201951, "grad_norm": 1.8455878496170044, "learning_rate": 1.995371834306994e-05, "loss": 0.7988, "step": 4033 }, { "epoch": 0.6585853638626995, "grad_norm": 2.02435040473938, "learning_rate": 1.9953687842889126e-05, "loss": 0.7722, "step": 4034 }, { "epoch": 0.6587486225052038, "grad_norm": 1.9577025175094604, "learning_rate": 1.9953657332684947e-05, "loss": 0.8516, "step": 4035 }, { "epoch": 0.6589118811477083, "grad_norm": 1.9614293575286865, "learning_rate": 1.995362681245744e-05, "loss": 0.7988, "step": 4036 }, { "epoch": 0.6590751397902126, "grad_norm": 1.9903169870376587, "learning_rate": 1.995359628220663e-05, "loss": 0.8822, "step": 4037 }, { "epoch": 0.659238398432717, "grad_norm": 2.0466933250427246, "learning_rate": 1.995356574193256e-05, "loss": 0.7112, "step": 4038 }, { "epoch": 0.6594016570752215, "grad_norm": 2.1602303981781006, "learning_rate": 1.9953535191635245e-05, "loss": 0.8392, "step": 4039 }, { "epoch": 0.6595649157177258, "grad_norm": 2.1674540042877197, "learning_rate": 1.9953504631314722e-05, "loss": 0.911, "step": 4040 }, { "epoch": 0.6597281743602302, "grad_norm": 1.9284535646438599, "learning_rate": 1.9953474060971024e-05, "loss": 0.8454, "step": 4041 }, { "epoch": 0.6598914330027346, "grad_norm": 1.9467206001281738, "learning_rate": 1.9953443480604182e-05, "loss": 0.8372, "step": 4042 }, { "epoch": 0.660054691645239, "grad_norm": 1.886176347732544, "learning_rate": 1.9953412890214223e-05, "loss": 0.6558, "step": 4043 }, { "epoch": 0.6602179502877433, "grad_norm": 1.801827311515808, "learning_rate": 1.995338228980118e-05, "loss": 0.7936, "step": 4044 }, { "epoch": 0.6603812089302478, "grad_norm": 2.0684685707092285, "learning_rate": 1.9953351679365086e-05, "loss": 0.8093, "step": 4045 }, { "epoch": 0.6605444675727521, "grad_norm": 2.105072498321533, "learning_rate": 1.995332105890597e-05, "loss": 0.8979, "step": 4046 }, { "epoch": 0.6607077262152565, "grad_norm": 2.3216841220855713, "learning_rate": 1.9953290428423857e-05, "loss": 0.9275, "step": 4047 }, { "epoch": 0.6608709848577609, "grad_norm": 2.2121288776397705, "learning_rate": 1.9953259787918788e-05, "loss": 0.8287, "step": 4048 }, { "epoch": 0.6610342435002653, "grad_norm": 2.5402417182922363, "learning_rate": 1.9953229137390787e-05, "loss": 0.6928, "step": 4049 }, { "epoch": 0.6611975021427697, "grad_norm": 2.049816846847534, "learning_rate": 1.9953198476839886e-05, "loss": 0.8361, "step": 4050 }, { "epoch": 0.6613607607852741, "grad_norm": 1.8441988229751587, "learning_rate": 1.995316780626612e-05, "loss": 0.7696, "step": 4051 }, { "epoch": 0.6615240194277785, "grad_norm": 2.2757208347320557, "learning_rate": 1.9953137125669513e-05, "loss": 0.8383, "step": 4052 }, { "epoch": 0.6616872780702828, "grad_norm": 1.8649359941482544, "learning_rate": 1.99531064350501e-05, "loss": 0.7691, "step": 4053 }, { "epoch": 0.6618505367127873, "grad_norm": 1.9752105474472046, "learning_rate": 1.9953075734407915e-05, "loss": 0.8499, "step": 4054 }, { "epoch": 0.6620137953552916, "grad_norm": 2.0030970573425293, "learning_rate": 1.9953045023742984e-05, "loss": 0.857, "step": 4055 }, { "epoch": 0.662177053997796, "grad_norm": 2.2038145065307617, "learning_rate": 1.9953014303055336e-05, "loss": 0.9591, "step": 4056 }, { "epoch": 0.6623403126403004, "grad_norm": 1.8572810888290405, "learning_rate": 1.9952983572345008e-05, "loss": 0.8644, "step": 4057 }, { "epoch": 0.6625035712828048, "grad_norm": 1.8880647420883179, "learning_rate": 1.9952952831612027e-05, "loss": 0.7326, "step": 4058 }, { "epoch": 0.6626668299253091, "grad_norm": 1.7331615686416626, "learning_rate": 1.9952922080856427e-05, "loss": 0.7337, "step": 4059 }, { "epoch": 0.6628300885678136, "grad_norm": 1.9105256795883179, "learning_rate": 1.9952891320078235e-05, "loss": 0.7964, "step": 4060 }, { "epoch": 0.662993347210318, "grad_norm": 1.8040122985839844, "learning_rate": 1.9952860549277485e-05, "loss": 0.629, "step": 4061 }, { "epoch": 0.6631566058528223, "grad_norm": 2.099109649658203, "learning_rate": 1.9952829768454208e-05, "loss": 0.8116, "step": 4062 }, { "epoch": 0.6633198644953268, "grad_norm": 2.135161876678467, "learning_rate": 1.995279897760843e-05, "loss": 0.8711, "step": 4063 }, { "epoch": 0.6634831231378311, "grad_norm": 2.1493113040924072, "learning_rate": 1.9952768176740193e-05, "loss": 0.8911, "step": 4064 }, { "epoch": 0.6636463817803355, "grad_norm": 2.266852855682373, "learning_rate": 1.9952737365849516e-05, "loss": 0.9986, "step": 4065 }, { "epoch": 0.6638096404228399, "grad_norm": 2.052060604095459, "learning_rate": 1.9952706544936437e-05, "loss": 0.8148, "step": 4066 }, { "epoch": 0.6639728990653443, "grad_norm": 1.8236923217773438, "learning_rate": 1.9952675714000983e-05, "loss": 0.7644, "step": 4067 }, { "epoch": 0.6641361577078486, "grad_norm": 1.8633387088775635, "learning_rate": 1.995264487304319e-05, "loss": 0.7575, "step": 4068 }, { "epoch": 0.6642994163503531, "grad_norm": 2.448219060897827, "learning_rate": 1.9952614022063085e-05, "loss": 0.9747, "step": 4069 }, { "epoch": 0.6644626749928574, "grad_norm": 2.0879111289978027, "learning_rate": 1.9952583161060702e-05, "loss": 0.8959, "step": 4070 }, { "epoch": 0.6646259336353618, "grad_norm": 2.194457530975342, "learning_rate": 1.9952552290036066e-05, "loss": 1.5685, "step": 4071 }, { "epoch": 0.6647891922778663, "grad_norm": 2.1314384937286377, "learning_rate": 1.9952521408989215e-05, "loss": 0.8554, "step": 4072 }, { "epoch": 0.6649524509203706, "grad_norm": 1.9167932271957397, "learning_rate": 1.9952490517920178e-05, "loss": 0.8732, "step": 4073 }, { "epoch": 0.665115709562875, "grad_norm": 2.3634629249572754, "learning_rate": 1.9952459616828986e-05, "loss": 0.7868, "step": 4074 }, { "epoch": 0.6652789682053794, "grad_norm": 2.101855993270874, "learning_rate": 1.995242870571567e-05, "loss": 0.8442, "step": 4075 }, { "epoch": 0.6654422268478838, "grad_norm": 1.9536573886871338, "learning_rate": 1.995239778458026e-05, "loss": 0.8829, "step": 4076 }, { "epoch": 0.6656054854903881, "grad_norm": 1.6369829177856445, "learning_rate": 1.995236685342279e-05, "loss": 0.7549, "step": 4077 }, { "epoch": 0.6657687441328926, "grad_norm": 2.187455654144287, "learning_rate": 1.9952335912243284e-05, "loss": 0.7359, "step": 4078 }, { "epoch": 0.6659320027753969, "grad_norm": 2.4019758701324463, "learning_rate": 1.9952304961041783e-05, "loss": 0.9003, "step": 4079 }, { "epoch": 0.6660952614179013, "grad_norm": 1.9343417882919312, "learning_rate": 1.9952273999818312e-05, "loss": 0.7625, "step": 4080 }, { "epoch": 0.6662585200604056, "grad_norm": 2.135756015777588, "learning_rate": 1.9952243028572904e-05, "loss": 0.9449, "step": 4081 }, { "epoch": 0.6664217787029101, "grad_norm": 2.0714633464813232, "learning_rate": 1.9952212047305592e-05, "loss": 0.7356, "step": 4082 }, { "epoch": 0.6665850373454145, "grad_norm": 1.6581432819366455, "learning_rate": 1.9952181056016403e-05, "loss": 0.76, "step": 4083 }, { "epoch": 0.6667482959879188, "grad_norm": 2.813175678253174, "learning_rate": 1.995215005470537e-05, "loss": 0.9086, "step": 4084 }, { "epoch": 0.6669115546304233, "grad_norm": 2.511840343475342, "learning_rate": 1.9952119043372526e-05, "loss": 0.9545, "step": 4085 }, { "epoch": 0.6670748132729276, "grad_norm": 1.8923448324203491, "learning_rate": 1.99520880220179e-05, "loss": 0.7757, "step": 4086 }, { "epoch": 0.667238071915432, "grad_norm": 1.7587465047836304, "learning_rate": 1.9952056990641523e-05, "loss": 0.5989, "step": 4087 }, { "epoch": 0.6674013305579364, "grad_norm": 1.9205251932144165, "learning_rate": 1.9952025949243427e-05, "loss": 0.6685, "step": 4088 }, { "epoch": 0.6675645892004408, "grad_norm": 2.4398887157440186, "learning_rate": 1.9951994897823647e-05, "loss": 0.9893, "step": 4089 }, { "epoch": 0.6677278478429451, "grad_norm": 2.3446192741394043, "learning_rate": 1.9951963836382206e-05, "loss": 0.7545, "step": 4090 }, { "epoch": 0.6678911064854496, "grad_norm": 1.7858868837356567, "learning_rate": 1.9951932764919143e-05, "loss": 0.6628, "step": 4091 }, { "epoch": 0.6680543651279539, "grad_norm": 1.98744535446167, "learning_rate": 1.9951901683434487e-05, "loss": 0.7449, "step": 4092 }, { "epoch": 0.6682176237704583, "grad_norm": 1.9634106159210205, "learning_rate": 1.9951870591928266e-05, "loss": 0.9274, "step": 4093 }, { "epoch": 0.6683808824129628, "grad_norm": 1.6654844284057617, "learning_rate": 1.9951839490400514e-05, "loss": 0.6702, "step": 4094 }, { "epoch": 0.6685441410554671, "grad_norm": 1.8778430223464966, "learning_rate": 1.9951808378851264e-05, "loss": 0.7187, "step": 4095 }, { "epoch": 0.6687073996979715, "grad_norm": 1.9327142238616943, "learning_rate": 1.9951777257280547e-05, "loss": 0.907, "step": 4096 }, { "epoch": 0.6688706583404759, "grad_norm": 1.9685276746749878, "learning_rate": 1.995174612568839e-05, "loss": 0.7314, "step": 4097 }, { "epoch": 0.6690339169829803, "grad_norm": 1.7189851999282837, "learning_rate": 1.995171498407483e-05, "loss": 0.8234, "step": 4098 }, { "epoch": 0.6691971756254846, "grad_norm": 1.8868552446365356, "learning_rate": 1.9951683832439892e-05, "loss": 0.8507, "step": 4099 }, { "epoch": 0.6693604342679891, "grad_norm": 2.4335665702819824, "learning_rate": 1.9951652670783615e-05, "loss": 0.8789, "step": 4100 }, { "epoch": 0.6695236929104934, "grad_norm": 2.0909385681152344, "learning_rate": 1.9951621499106024e-05, "loss": 0.7983, "step": 4101 }, { "epoch": 0.6696869515529978, "grad_norm": 1.7976733446121216, "learning_rate": 1.9951590317407152e-05, "loss": 0.6667, "step": 4102 }, { "epoch": 0.6698502101955022, "grad_norm": 2.168764352798462, "learning_rate": 1.9951559125687033e-05, "loss": 0.7478, "step": 4103 }, { "epoch": 0.6700134688380066, "grad_norm": 1.8159606456756592, "learning_rate": 1.9951527923945698e-05, "loss": 0.7724, "step": 4104 }, { "epoch": 0.670176727480511, "grad_norm": 1.8856970071792603, "learning_rate": 1.9951496712183177e-05, "loss": 0.7946, "step": 4105 }, { "epoch": 0.6703399861230154, "grad_norm": 1.654207706451416, "learning_rate": 1.9951465490399497e-05, "loss": 0.6858, "step": 4106 }, { "epoch": 0.6705032447655198, "grad_norm": 1.956335425376892, "learning_rate": 1.9951434258594696e-05, "loss": 0.8269, "step": 4107 }, { "epoch": 0.6706665034080241, "grad_norm": 1.9062511920928955, "learning_rate": 1.9951403016768804e-05, "loss": 0.7978, "step": 4108 }, { "epoch": 0.6708297620505286, "grad_norm": 1.8814045190811157, "learning_rate": 1.9951371764921852e-05, "loss": 0.8816, "step": 4109 }, { "epoch": 0.6709930206930329, "grad_norm": 2.0050930976867676, "learning_rate": 1.995134050305387e-05, "loss": 0.8296, "step": 4110 }, { "epoch": 0.6711562793355373, "grad_norm": 2.005835771560669, "learning_rate": 1.995130923116489e-05, "loss": 0.791, "step": 4111 }, { "epoch": 0.6713195379780417, "grad_norm": 1.7616783380508423, "learning_rate": 1.9951277949254947e-05, "loss": 0.6316, "step": 4112 }, { "epoch": 0.6714827966205461, "grad_norm": 2.390568733215332, "learning_rate": 1.995124665732407e-05, "loss": 0.7934, "step": 4113 }, { "epoch": 0.6716460552630504, "grad_norm": 2.151191473007202, "learning_rate": 1.9951215355372287e-05, "loss": 0.9134, "step": 4114 }, { "epoch": 0.6718093139055549, "grad_norm": 1.9410500526428223, "learning_rate": 1.9951184043399636e-05, "loss": 0.7673, "step": 4115 }, { "epoch": 0.6719725725480593, "grad_norm": 2.383833408355713, "learning_rate": 1.9951152721406145e-05, "loss": 0.7873, "step": 4116 }, { "epoch": 0.6721358311905636, "grad_norm": 1.8013930320739746, "learning_rate": 1.9951121389391844e-05, "loss": 0.7774, "step": 4117 }, { "epoch": 0.6722990898330681, "grad_norm": 1.7600305080413818, "learning_rate": 1.9951090047356767e-05, "loss": 0.6834, "step": 4118 }, { "epoch": 0.6724623484755724, "grad_norm": 1.7398566007614136, "learning_rate": 1.9951058695300945e-05, "loss": 0.6073, "step": 4119 }, { "epoch": 0.6726256071180768, "grad_norm": 1.8340108394622803, "learning_rate": 1.995102733322441e-05, "loss": 0.7637, "step": 4120 }, { "epoch": 0.6727888657605812, "grad_norm": 1.9868963956832886, "learning_rate": 1.9950995961127193e-05, "loss": 0.969, "step": 4121 }, { "epoch": 0.6729521244030856, "grad_norm": 2.232551097869873, "learning_rate": 1.9950964579009328e-05, "loss": 0.7824, "step": 4122 }, { "epoch": 0.6731153830455899, "grad_norm": 1.8406410217285156, "learning_rate": 1.995093318687084e-05, "loss": 0.7621, "step": 4123 }, { "epoch": 0.6732786416880944, "grad_norm": 1.7530088424682617, "learning_rate": 1.9950901784711765e-05, "loss": 0.7559, "step": 4124 }, { "epoch": 0.6734419003305987, "grad_norm": 2.102611541748047, "learning_rate": 1.9950870372532138e-05, "loss": 0.7479, "step": 4125 }, { "epoch": 0.6736051589731031, "grad_norm": 1.8543158769607544, "learning_rate": 1.9950838950331986e-05, "loss": 0.8004, "step": 4126 }, { "epoch": 0.6737684176156076, "grad_norm": 1.7378910779953003, "learning_rate": 1.9950807518111342e-05, "loss": 0.8071, "step": 4127 }, { "epoch": 0.6739316762581119, "grad_norm": 2.0280206203460693, "learning_rate": 1.9950776075870235e-05, "loss": 0.8395, "step": 4128 }, { "epoch": 0.6740949349006163, "grad_norm": 2.075049638748169, "learning_rate": 1.99507446236087e-05, "loss": 0.9797, "step": 4129 }, { "epoch": 0.6742581935431207, "grad_norm": 1.668195128440857, "learning_rate": 1.995071316132677e-05, "loss": 0.6875, "step": 4130 }, { "epoch": 0.6744214521856251, "grad_norm": 2.2895493507385254, "learning_rate": 1.995068168902447e-05, "loss": 0.9833, "step": 4131 }, { "epoch": 0.6745847108281294, "grad_norm": 1.9771149158477783, "learning_rate": 1.995065020670184e-05, "loss": 0.7198, "step": 4132 }, { "epoch": 0.6747479694706339, "grad_norm": 1.9430091381072998, "learning_rate": 1.9950618714358908e-05, "loss": 0.8313, "step": 4133 }, { "epoch": 0.6749112281131382, "grad_norm": 1.8720214366912842, "learning_rate": 1.9950587211995707e-05, "loss": 0.6809, "step": 4134 }, { "epoch": 0.6750744867556426, "grad_norm": 2.122695207595825, "learning_rate": 1.9950555699612265e-05, "loss": 0.9174, "step": 4135 }, { "epoch": 0.675237745398147, "grad_norm": 2.1797494888305664, "learning_rate": 1.9950524177208614e-05, "loss": 0.7063, "step": 4136 }, { "epoch": 0.6754010040406514, "grad_norm": 1.9694017171859741, "learning_rate": 1.9950492644784793e-05, "loss": 0.7874, "step": 4137 }, { "epoch": 0.6755642626831558, "grad_norm": 1.819265604019165, "learning_rate": 1.9950461102340823e-05, "loss": 0.6381, "step": 4138 }, { "epoch": 0.6757275213256602, "grad_norm": 2.2781858444213867, "learning_rate": 1.9950429549876748e-05, "loss": 0.7844, "step": 4139 }, { "epoch": 0.6758907799681646, "grad_norm": 2.3974759578704834, "learning_rate": 1.9950397987392588e-05, "loss": 0.8841, "step": 4140 }, { "epoch": 0.6760540386106689, "grad_norm": 2.2151107788085938, "learning_rate": 1.995036641488838e-05, "loss": 0.8983, "step": 4141 }, { "epoch": 0.6762172972531734, "grad_norm": 2.004086494445801, "learning_rate": 1.9950334832364157e-05, "loss": 0.8643, "step": 4142 }, { "epoch": 0.6763805558956777, "grad_norm": 2.504765272140503, "learning_rate": 1.9950303239819946e-05, "loss": 0.9706, "step": 4143 }, { "epoch": 0.6765438145381821, "grad_norm": 1.9661954641342163, "learning_rate": 1.9950271637255787e-05, "loss": 0.9314, "step": 4144 }, { "epoch": 0.6767070731806865, "grad_norm": 1.8291016817092896, "learning_rate": 1.9950240024671705e-05, "loss": 0.7036, "step": 4145 }, { "epoch": 0.6768703318231909, "grad_norm": 1.9649732112884521, "learning_rate": 1.9950208402067735e-05, "loss": 0.8265, "step": 4146 }, { "epoch": 0.6770335904656952, "grad_norm": 1.8695887327194214, "learning_rate": 1.9950176769443908e-05, "loss": 0.7423, "step": 4147 }, { "epoch": 0.6771968491081997, "grad_norm": 2.0008432865142822, "learning_rate": 1.9950145126800253e-05, "loss": 0.7986, "step": 4148 }, { "epoch": 0.6773601077507041, "grad_norm": 1.8796616792678833, "learning_rate": 1.9950113474136805e-05, "loss": 0.7066, "step": 4149 }, { "epoch": 0.6775233663932084, "grad_norm": 1.6945209503173828, "learning_rate": 1.9950081811453598e-05, "loss": 0.8076, "step": 4150 }, { "epoch": 0.6776866250357129, "grad_norm": 1.897228479385376, "learning_rate": 1.9950050138750662e-05, "loss": 0.708, "step": 4151 }, { "epoch": 0.6778498836782172, "grad_norm": 1.8645113706588745, "learning_rate": 1.9950018456028024e-05, "loss": 0.8532, "step": 4152 }, { "epoch": 0.6780131423207216, "grad_norm": 2.045151948928833, "learning_rate": 1.994998676328572e-05, "loss": 0.7437, "step": 4153 }, { "epoch": 0.678176400963226, "grad_norm": 1.9175703525543213, "learning_rate": 1.9949955060523784e-05, "loss": 0.8108, "step": 4154 }, { "epoch": 0.6783396596057304, "grad_norm": 2.1187527179718018, "learning_rate": 1.9949923347742248e-05, "loss": 0.8112, "step": 4155 }, { "epoch": 0.6785029182482347, "grad_norm": 1.9319872856140137, "learning_rate": 1.9949891624941138e-05, "loss": 0.8092, "step": 4156 }, { "epoch": 0.6786661768907392, "grad_norm": 2.0780091285705566, "learning_rate": 1.9949859892120492e-05, "loss": 0.7458, "step": 4157 }, { "epoch": 0.6788294355332435, "grad_norm": 2.0599660873413086, "learning_rate": 1.9949828149280338e-05, "loss": 0.903, "step": 4158 }, { "epoch": 0.6789926941757479, "grad_norm": 2.172431230545044, "learning_rate": 1.9949796396420712e-05, "loss": 0.8421, "step": 4159 }, { "epoch": 0.6791559528182524, "grad_norm": 1.6010431051254272, "learning_rate": 1.9949764633541645e-05, "loss": 0.6038, "step": 4160 }, { "epoch": 0.6793192114607567, "grad_norm": 1.9309957027435303, "learning_rate": 1.994973286064316e-05, "loss": 0.9091, "step": 4161 }, { "epoch": 0.6794824701032611, "grad_norm": 1.9657368659973145, "learning_rate": 1.9949701077725304e-05, "loss": 0.7011, "step": 4162 }, { "epoch": 0.6796457287457655, "grad_norm": 2.3205385208129883, "learning_rate": 1.99496692847881e-05, "loss": 1.0717, "step": 4163 }, { "epoch": 0.6798089873882699, "grad_norm": 1.9440120458602905, "learning_rate": 1.994963748183158e-05, "loss": 0.7, "step": 4164 }, { "epoch": 0.6799722460307742, "grad_norm": 1.8481014966964722, "learning_rate": 1.994960566885578e-05, "loss": 0.9877, "step": 4165 }, { "epoch": 0.6801355046732787, "grad_norm": 2.0271425247192383, "learning_rate": 1.9949573845860727e-05, "loss": 0.9173, "step": 4166 }, { "epoch": 0.680298763315783, "grad_norm": 1.9089303016662598, "learning_rate": 1.994954201284646e-05, "loss": 0.6098, "step": 4167 }, { "epoch": 0.6804620219582874, "grad_norm": 2.172492742538452, "learning_rate": 1.9949510169813006e-05, "loss": 0.8572, "step": 4168 }, { "epoch": 0.6806252806007919, "grad_norm": 1.8873982429504395, "learning_rate": 1.9949478316760394e-05, "loss": 0.757, "step": 4169 }, { "epoch": 0.6807885392432962, "grad_norm": 1.919996976852417, "learning_rate": 1.994944645368866e-05, "loss": 1.0166, "step": 4170 }, { "epoch": 0.6809517978858006, "grad_norm": 2.1866002082824707, "learning_rate": 1.994941458059784e-05, "loss": 0.9012, "step": 4171 }, { "epoch": 0.681115056528305, "grad_norm": 1.8691411018371582, "learning_rate": 1.994938269748796e-05, "loss": 0.6651, "step": 4172 }, { "epoch": 0.6812783151708094, "grad_norm": 2.1386609077453613, "learning_rate": 1.9949350804359057e-05, "loss": 0.7147, "step": 4173 }, { "epoch": 0.6814415738133137, "grad_norm": 1.673698902130127, "learning_rate": 1.9949318901211155e-05, "loss": 0.7399, "step": 4174 }, { "epoch": 0.6816048324558182, "grad_norm": 2.0039803981781006, "learning_rate": 1.9949286988044293e-05, "loss": 0.8556, "step": 4175 }, { "epoch": 0.6817680910983225, "grad_norm": 1.7296161651611328, "learning_rate": 1.9949255064858505e-05, "loss": 0.5359, "step": 4176 }, { "epoch": 0.6819313497408269, "grad_norm": 2.1121556758880615, "learning_rate": 1.9949223131653818e-05, "loss": 0.8199, "step": 4177 }, { "epoch": 0.6820946083833312, "grad_norm": 2.249812602996826, "learning_rate": 1.9949191188430266e-05, "loss": 0.7857, "step": 4178 }, { "epoch": 0.6822578670258357, "grad_norm": 1.7473031282424927, "learning_rate": 1.994915923518788e-05, "loss": 0.6729, "step": 4179 }, { "epoch": 0.6824211256683401, "grad_norm": 2.116805076599121, "learning_rate": 1.9949127271926696e-05, "loss": 0.8303, "step": 4180 }, { "epoch": 0.6825843843108444, "grad_norm": 1.9649015665054321, "learning_rate": 1.994909529864674e-05, "loss": 0.7353, "step": 4181 }, { "epoch": 0.6827476429533489, "grad_norm": 2.056251287460327, "learning_rate": 1.994906331534805e-05, "loss": 0.8865, "step": 4182 }, { "epoch": 0.6829109015958532, "grad_norm": 2.0325703620910645, "learning_rate": 1.9949031322030654e-05, "loss": 0.7782, "step": 4183 }, { "epoch": 0.6830741602383577, "grad_norm": 1.781244158744812, "learning_rate": 1.994899931869459e-05, "loss": 0.6667, "step": 4184 }, { "epoch": 0.683237418880862, "grad_norm": 2.1162655353546143, "learning_rate": 1.994896730533988e-05, "loss": 0.8312, "step": 4185 }, { "epoch": 0.6834006775233664, "grad_norm": 1.973695158958435, "learning_rate": 1.994893528196657e-05, "loss": 0.8368, "step": 4186 }, { "epoch": 0.6835639361658707, "grad_norm": 1.8510578870773315, "learning_rate": 1.994890324857468e-05, "loss": 0.8579, "step": 4187 }, { "epoch": 0.6837271948083752, "grad_norm": 2.1016201972961426, "learning_rate": 1.994887120516425e-05, "loss": 0.8654, "step": 4188 }, { "epoch": 0.6838904534508795, "grad_norm": 2.2568845748901367, "learning_rate": 1.9948839151735305e-05, "loss": 1.0038, "step": 4189 }, { "epoch": 0.684053712093384, "grad_norm": 2.1826863288879395, "learning_rate": 1.9948807088287884e-05, "loss": 0.885, "step": 4190 }, { "epoch": 0.6842169707358884, "grad_norm": 2.1392626762390137, "learning_rate": 1.9948775014822016e-05, "loss": 0.7477, "step": 4191 }, { "epoch": 0.6843802293783927, "grad_norm": 2.1337029933929443, "learning_rate": 1.9948742931337736e-05, "loss": 0.83, "step": 4192 }, { "epoch": 0.6845434880208972, "grad_norm": 2.0879764556884766, "learning_rate": 1.9948710837835072e-05, "loss": 0.7925, "step": 4193 }, { "epoch": 0.6847067466634015, "grad_norm": 2.0691444873809814, "learning_rate": 1.9948678734314062e-05, "loss": 0.835, "step": 4194 }, { "epoch": 0.6848700053059059, "grad_norm": 1.8288135528564453, "learning_rate": 1.9948646620774733e-05, "loss": 0.7324, "step": 4195 }, { "epoch": 0.6850332639484102, "grad_norm": 1.9561655521392822, "learning_rate": 1.994861449721712e-05, "loss": 0.8832, "step": 4196 }, { "epoch": 0.6851965225909147, "grad_norm": 2.3786544799804688, "learning_rate": 1.9948582363641254e-05, "loss": 0.9468, "step": 4197 }, { "epoch": 0.685359781233419, "grad_norm": 1.624997615814209, "learning_rate": 1.9948550220047173e-05, "loss": 0.6864, "step": 4198 }, { "epoch": 0.6855230398759234, "grad_norm": 2.4162254333496094, "learning_rate": 1.9948518066434898e-05, "loss": 1.0107, "step": 4199 }, { "epoch": 0.6856862985184278, "grad_norm": 1.8422808647155762, "learning_rate": 1.9948485902804472e-05, "loss": 0.7443, "step": 4200 }, { "epoch": 0.6858495571609322, "grad_norm": 1.6834032535552979, "learning_rate": 1.994845372915592e-05, "loss": 0.7019, "step": 4201 }, { "epoch": 0.6860128158034366, "grad_norm": 1.7679576873779297, "learning_rate": 1.994842154548928e-05, "loss": 0.6552, "step": 4202 }, { "epoch": 0.686176074445941, "grad_norm": 1.9738646745681763, "learning_rate": 1.994838935180458e-05, "loss": 0.9114, "step": 4203 }, { "epoch": 0.6863393330884454, "grad_norm": 1.7693486213684082, "learning_rate": 1.994835714810186e-05, "loss": 0.761, "step": 4204 }, { "epoch": 0.6865025917309497, "grad_norm": 2.203340530395508, "learning_rate": 1.9948324934381142e-05, "loss": 1.3468, "step": 4205 }, { "epoch": 0.6866658503734542, "grad_norm": 1.9346898794174194, "learning_rate": 1.9948292710642464e-05, "loss": 0.8564, "step": 4206 }, { "epoch": 0.6868291090159585, "grad_norm": 2.4551684856414795, "learning_rate": 1.994826047688586e-05, "loss": 0.9484, "step": 4207 }, { "epoch": 0.6869923676584629, "grad_norm": 2.2222578525543213, "learning_rate": 1.994822823311136e-05, "loss": 0.7566, "step": 4208 }, { "epoch": 0.6871556263009673, "grad_norm": 1.671071171760559, "learning_rate": 1.9948195979318995e-05, "loss": 0.7242, "step": 4209 }, { "epoch": 0.6873188849434717, "grad_norm": 2.14620304107666, "learning_rate": 1.99481637155088e-05, "loss": 0.8451, "step": 4210 }, { "epoch": 0.687482143585976, "grad_norm": 2.0595812797546387, "learning_rate": 1.994813144168081e-05, "loss": 0.9181, "step": 4211 }, { "epoch": 0.6876454022284805, "grad_norm": 1.9789401292800903, "learning_rate": 1.994809915783505e-05, "loss": 0.7538, "step": 4212 }, { "epoch": 0.6878086608709849, "grad_norm": 1.8737579584121704, "learning_rate": 1.9948066863971556e-05, "loss": 0.7529, "step": 4213 }, { "epoch": 0.6879719195134892, "grad_norm": 1.8760442733764648, "learning_rate": 1.9948034560090364e-05, "loss": 0.944, "step": 4214 }, { "epoch": 0.6881351781559937, "grad_norm": 2.2963998317718506, "learning_rate": 1.9948002246191503e-05, "loss": 1.4175, "step": 4215 }, { "epoch": 0.688298436798498, "grad_norm": 1.8749713897705078, "learning_rate": 1.9947969922275007e-05, "loss": 0.82, "step": 4216 }, { "epoch": 0.6884616954410024, "grad_norm": 2.7313199043273926, "learning_rate": 1.9947937588340907e-05, "loss": 0.9915, "step": 4217 }, { "epoch": 0.6886249540835068, "grad_norm": 1.8387986421585083, "learning_rate": 1.9947905244389235e-05, "loss": 0.6928, "step": 4218 }, { "epoch": 0.6887882127260112, "grad_norm": 1.822058081626892, "learning_rate": 1.994787289042003e-05, "loss": 0.857, "step": 4219 }, { "epoch": 0.6889514713685155, "grad_norm": 2.0355279445648193, "learning_rate": 1.9947840526433316e-05, "loss": 0.8637, "step": 4220 }, { "epoch": 0.68911473001102, "grad_norm": 2.2732481956481934, "learning_rate": 1.994780815242913e-05, "loss": 0.8268, "step": 4221 }, { "epoch": 0.6892779886535243, "grad_norm": 2.1448006629943848, "learning_rate": 1.9947775768407504e-05, "loss": 0.6977, "step": 4222 }, { "epoch": 0.6894412472960287, "grad_norm": 2.252592086791992, "learning_rate": 1.9947743374368467e-05, "loss": 0.8878, "step": 4223 }, { "epoch": 0.6896045059385332, "grad_norm": 2.1373229026794434, "learning_rate": 1.994771097031206e-05, "loss": 0.8448, "step": 4224 }, { "epoch": 0.6897677645810375, "grad_norm": 1.992712140083313, "learning_rate": 1.994767855623831e-05, "loss": 0.9574, "step": 4225 }, { "epoch": 0.6899310232235419, "grad_norm": 1.8699966669082642, "learning_rate": 1.9947646132147248e-05, "loss": 0.834, "step": 4226 }, { "epoch": 0.6900942818660463, "grad_norm": 1.740078091621399, "learning_rate": 1.994761369803891e-05, "loss": 0.7823, "step": 4227 }, { "epoch": 0.6902575405085507, "grad_norm": 1.994172215461731, "learning_rate": 1.994758125391333e-05, "loss": 0.7185, "step": 4228 }, { "epoch": 0.690420799151055, "grad_norm": 1.887046456336975, "learning_rate": 1.9947548799770535e-05, "loss": 0.8063, "step": 4229 }, { "epoch": 0.6905840577935595, "grad_norm": 2.1959755420684814, "learning_rate": 1.9947516335610563e-05, "loss": 1.3874, "step": 4230 }, { "epoch": 0.6907473164360638, "grad_norm": 2.1626553535461426, "learning_rate": 1.9947483861433444e-05, "loss": 0.8176, "step": 4231 }, { "epoch": 0.6909105750785682, "grad_norm": 2.0679123401641846, "learning_rate": 1.9947451377239212e-05, "loss": 0.7845, "step": 4232 }, { "epoch": 0.6910738337210726, "grad_norm": 2.1547374725341797, "learning_rate": 1.9947418883027894e-05, "loss": 0.8036, "step": 4233 }, { "epoch": 0.691237092363577, "grad_norm": 2.0083796977996826, "learning_rate": 1.9947386378799534e-05, "loss": 0.8588, "step": 4234 }, { "epoch": 0.6914003510060814, "grad_norm": 2.129823684692383, "learning_rate": 1.994735386455416e-05, "loss": 0.8286, "step": 4235 }, { "epoch": 0.6915636096485858, "grad_norm": 1.9867092370986938, "learning_rate": 1.99473213402918e-05, "loss": 0.7711, "step": 4236 }, { "epoch": 0.6917268682910902, "grad_norm": 1.8291975259780884, "learning_rate": 1.994728880601249e-05, "loss": 0.7234, "step": 4237 }, { "epoch": 0.6918901269335945, "grad_norm": 2.030412435531616, "learning_rate": 1.994725626171626e-05, "loss": 0.8301, "step": 4238 }, { "epoch": 0.692053385576099, "grad_norm": 2.0023584365844727, "learning_rate": 1.9947223707403148e-05, "loss": 0.7924, "step": 4239 }, { "epoch": 0.6922166442186033, "grad_norm": 1.7801696062088013, "learning_rate": 1.9947191143073185e-05, "loss": 0.7787, "step": 4240 }, { "epoch": 0.6923799028611077, "grad_norm": 1.8166919946670532, "learning_rate": 1.9947158568726404e-05, "loss": 0.7034, "step": 4241 }, { "epoch": 0.6925431615036121, "grad_norm": 2.013502359390259, "learning_rate": 1.9947125984362835e-05, "loss": 0.7165, "step": 4242 }, { "epoch": 0.6927064201461165, "grad_norm": 2.4362733364105225, "learning_rate": 1.9947093389982515e-05, "loss": 0.9341, "step": 4243 }, { "epoch": 0.6928696787886208, "grad_norm": 1.9451409578323364, "learning_rate": 1.9947060785585472e-05, "loss": 0.697, "step": 4244 }, { "epoch": 0.6930329374311253, "grad_norm": 1.8926961421966553, "learning_rate": 1.9947028171171742e-05, "loss": 0.7593, "step": 4245 }, { "epoch": 0.6931961960736297, "grad_norm": 2.113431930541992, "learning_rate": 1.994699554674136e-05, "loss": 0.9076, "step": 4246 }, { "epoch": 0.693359454716134, "grad_norm": 1.7997654676437378, "learning_rate": 1.9946962912294356e-05, "loss": 0.7661, "step": 4247 }, { "epoch": 0.6935227133586385, "grad_norm": 2.130751848220825, "learning_rate": 1.9946930267830757e-05, "loss": 0.9527, "step": 4248 }, { "epoch": 0.6936859720011428, "grad_norm": 2.097210168838501, "learning_rate": 1.9946897613350607e-05, "loss": 0.8686, "step": 4249 }, { "epoch": 0.6938492306436472, "grad_norm": 2.080037832260132, "learning_rate": 1.9946864948853936e-05, "loss": 0.9064, "step": 4250 }, { "epoch": 0.6940124892861516, "grad_norm": 2.0504324436187744, "learning_rate": 1.994683227434077e-05, "loss": 0.8105, "step": 4251 }, { "epoch": 0.694175747928656, "grad_norm": 1.935913324356079, "learning_rate": 1.9946799589811146e-05, "loss": 0.8818, "step": 4252 }, { "epoch": 0.6943390065711603, "grad_norm": 1.9660416841506958, "learning_rate": 1.99467668952651e-05, "loss": 0.8194, "step": 4253 }, { "epoch": 0.6945022652136648, "grad_norm": 1.6792112588882446, "learning_rate": 1.9946734190702664e-05, "loss": 0.7723, "step": 4254 }, { "epoch": 0.6946655238561691, "grad_norm": 1.9246500730514526, "learning_rate": 1.9946701476123867e-05, "loss": 0.8484, "step": 4255 }, { "epoch": 0.6948287824986735, "grad_norm": 2.087454080581665, "learning_rate": 1.9946668751528745e-05, "loss": 0.9466, "step": 4256 }, { "epoch": 0.694992041141178, "grad_norm": 1.8705824613571167, "learning_rate": 1.9946636016917326e-05, "loss": 0.9323, "step": 4257 }, { "epoch": 0.6951552997836823, "grad_norm": 1.7846181392669678, "learning_rate": 1.9946603272289652e-05, "loss": 0.8027, "step": 4258 }, { "epoch": 0.6953185584261867, "grad_norm": 1.8184690475463867, "learning_rate": 1.994657051764575e-05, "loss": 0.8243, "step": 4259 }, { "epoch": 0.695481817068691, "grad_norm": 1.768057107925415, "learning_rate": 1.9946537752985653e-05, "loss": 0.8454, "step": 4260 }, { "epoch": 0.6956450757111955, "grad_norm": 1.7855664491653442, "learning_rate": 1.9946504978309397e-05, "loss": 0.8301, "step": 4261 }, { "epoch": 0.6958083343536998, "grad_norm": 1.9005568027496338, "learning_rate": 1.994647219361701e-05, "loss": 0.984, "step": 4262 }, { "epoch": 0.6959715929962043, "grad_norm": 1.8614243268966675, "learning_rate": 1.9946439398908533e-05, "loss": 0.7467, "step": 4263 }, { "epoch": 0.6961348516387086, "grad_norm": 1.6746809482574463, "learning_rate": 1.9946406594183993e-05, "loss": 0.7641, "step": 4264 }, { "epoch": 0.696298110281213, "grad_norm": 1.6465895175933838, "learning_rate": 1.994637377944342e-05, "loss": 0.6649, "step": 4265 }, { "epoch": 0.6964613689237173, "grad_norm": 1.7396831512451172, "learning_rate": 1.9946340954686852e-05, "loss": 0.8663, "step": 4266 }, { "epoch": 0.6966246275662218, "grad_norm": 1.9153978824615479, "learning_rate": 1.9946308119914323e-05, "loss": 0.7456, "step": 4267 }, { "epoch": 0.6967878862087262, "grad_norm": 2.133854627609253, "learning_rate": 1.9946275275125867e-05, "loss": 0.7657, "step": 4268 }, { "epoch": 0.6969511448512306, "grad_norm": 1.8593735694885254, "learning_rate": 1.9946242420321513e-05, "loss": 0.8253, "step": 4269 }, { "epoch": 0.697114403493735, "grad_norm": 1.7236478328704834, "learning_rate": 1.9946209555501293e-05, "loss": 0.7209, "step": 4270 }, { "epoch": 0.6972776621362393, "grad_norm": 2.262835741043091, "learning_rate": 1.9946176680665244e-05, "loss": 0.7842, "step": 4271 }, { "epoch": 0.6974409207787438, "grad_norm": 2.00213623046875, "learning_rate": 1.9946143795813396e-05, "loss": 0.8801, "step": 4272 }, { "epoch": 0.6976041794212481, "grad_norm": 1.6758482456207275, "learning_rate": 1.9946110900945787e-05, "loss": 0.7961, "step": 4273 }, { "epoch": 0.6977674380637525, "grad_norm": 1.9252508878707886, "learning_rate": 1.9946077996062447e-05, "loss": 0.7542, "step": 4274 }, { "epoch": 0.6979306967062568, "grad_norm": 2.0963165760040283, "learning_rate": 1.9946045081163407e-05, "loss": 0.9548, "step": 4275 }, { "epoch": 0.6980939553487613, "grad_norm": 1.7537860870361328, "learning_rate": 1.9946012156248703e-05, "loss": 0.7042, "step": 4276 }, { "epoch": 0.6982572139912656, "grad_norm": 2.062753200531006, "learning_rate": 1.9945979221318367e-05, "loss": 0.8515, "step": 4277 }, { "epoch": 0.69842047263377, "grad_norm": 1.8563761711120605, "learning_rate": 1.9945946276372435e-05, "loss": 0.768, "step": 4278 }, { "epoch": 0.6985837312762745, "grad_norm": 2.1685030460357666, "learning_rate": 1.9945913321410935e-05, "loss": 0.9328, "step": 4279 }, { "epoch": 0.6987469899187788, "grad_norm": 1.908599853515625, "learning_rate": 1.9945880356433904e-05, "loss": 0.8296, "step": 4280 }, { "epoch": 0.6989102485612833, "grad_norm": 2.0254435539245605, "learning_rate": 1.9945847381441372e-05, "loss": 0.8294, "step": 4281 }, { "epoch": 0.6990735072037876, "grad_norm": 1.7319930791854858, "learning_rate": 1.9945814396433377e-05, "loss": 0.7415, "step": 4282 }, { "epoch": 0.699236765846292, "grad_norm": 2.027949571609497, "learning_rate": 1.9945781401409946e-05, "loss": 0.8439, "step": 4283 }, { "epoch": 0.6994000244887963, "grad_norm": 2.514307975769043, "learning_rate": 1.994574839637112e-05, "loss": 0.8266, "step": 4284 }, { "epoch": 0.6995632831313008, "grad_norm": 2.7291760444641113, "learning_rate": 1.994571538131693e-05, "loss": 0.958, "step": 4285 }, { "epoch": 0.6997265417738051, "grad_norm": 1.8221714496612549, "learning_rate": 1.99456823562474e-05, "loss": 0.8429, "step": 4286 }, { "epoch": 0.6998898004163095, "grad_norm": 1.8138916492462158, "learning_rate": 1.9945649321162576e-05, "loss": 0.7084, "step": 4287 }, { "epoch": 0.7000530590588139, "grad_norm": 2.039774179458618, "learning_rate": 1.9945616276062482e-05, "loss": 0.8453, "step": 4288 }, { "epoch": 0.7002163177013183, "grad_norm": 1.8851399421691895, "learning_rate": 1.9945583220947156e-05, "loss": 0.8044, "step": 4289 }, { "epoch": 0.7003795763438228, "grad_norm": 1.7872333526611328, "learning_rate": 1.9945550155816634e-05, "loss": 0.7419, "step": 4290 }, { "epoch": 0.7005428349863271, "grad_norm": 2.0840182304382324, "learning_rate": 1.994551708067094e-05, "loss": 0.8663, "step": 4291 }, { "epoch": 0.7007060936288315, "grad_norm": 2.122913360595703, "learning_rate": 1.994548399551012e-05, "loss": 0.963, "step": 4292 }, { "epoch": 0.7008693522713358, "grad_norm": 1.951949119567871, "learning_rate": 1.9945450900334197e-05, "loss": 0.8804, "step": 4293 }, { "epoch": 0.7010326109138403, "grad_norm": 1.8915026187896729, "learning_rate": 1.99454177951432e-05, "loss": 0.7281, "step": 4294 }, { "epoch": 0.7011958695563446, "grad_norm": 1.8705130815505981, "learning_rate": 1.9945384679937182e-05, "loss": 0.7097, "step": 4295 }, { "epoch": 0.701359128198849, "grad_norm": 2.0616843700408936, "learning_rate": 1.9945351554716158e-05, "loss": 0.8083, "step": 4296 }, { "epoch": 0.7015223868413534, "grad_norm": 2.316213369369507, "learning_rate": 1.9945318419480168e-05, "loss": 1.0206, "step": 4297 }, { "epoch": 0.7016856454838578, "grad_norm": 1.6560720205307007, "learning_rate": 1.9945285274229244e-05, "loss": 0.7628, "step": 4298 }, { "epoch": 0.7018489041263621, "grad_norm": 1.5189416408538818, "learning_rate": 1.994525211896342e-05, "loss": 0.6112, "step": 4299 }, { "epoch": 0.7020121627688666, "grad_norm": 1.9120960235595703, "learning_rate": 1.9945218953682736e-05, "loss": 0.7637, "step": 4300 }, { "epoch": 0.702175421411371, "grad_norm": 1.6891099214553833, "learning_rate": 1.9945185778387214e-05, "loss": 0.6498, "step": 4301 }, { "epoch": 0.7023386800538753, "grad_norm": 2.311800956726074, "learning_rate": 1.9945152593076893e-05, "loss": 0.9035, "step": 4302 }, { "epoch": 0.7025019386963798, "grad_norm": 2.07788348197937, "learning_rate": 1.9945119397751807e-05, "loss": 1.0844, "step": 4303 }, { "epoch": 0.7026651973388841, "grad_norm": 2.0255422592163086, "learning_rate": 1.9945086192411986e-05, "loss": 0.7657, "step": 4304 }, { "epoch": 0.7028284559813885, "grad_norm": 1.9278734922409058, "learning_rate": 1.994505297705747e-05, "loss": 0.9272, "step": 4305 }, { "epoch": 0.7029917146238929, "grad_norm": 2.594517469406128, "learning_rate": 1.9945019751688284e-05, "loss": 0.9448, "step": 4306 }, { "epoch": 0.7031549732663973, "grad_norm": 1.808044195175171, "learning_rate": 1.994498651630447e-05, "loss": 0.848, "step": 4307 }, { "epoch": 0.7033182319089016, "grad_norm": 1.9005550146102905, "learning_rate": 1.9944953270906054e-05, "loss": 0.7092, "step": 4308 }, { "epoch": 0.7034814905514061, "grad_norm": 2.1721482276916504, "learning_rate": 1.9944920015493074e-05, "loss": 0.8934, "step": 4309 }, { "epoch": 0.7036447491939104, "grad_norm": 1.7311025857925415, "learning_rate": 1.994488675006556e-05, "loss": 0.8381, "step": 4310 }, { "epoch": 0.7038080078364148, "grad_norm": 1.9951573610305786, "learning_rate": 1.994485347462355e-05, "loss": 0.8851, "step": 4311 }, { "epoch": 0.7039712664789193, "grad_norm": 2.0537636280059814, "learning_rate": 1.9944820189167076e-05, "loss": 0.9549, "step": 4312 }, { "epoch": 0.7041345251214236, "grad_norm": 2.0764970779418945, "learning_rate": 1.9944786893696166e-05, "loss": 0.975, "step": 4313 }, { "epoch": 0.704297783763928, "grad_norm": 1.805821180343628, "learning_rate": 1.9944753588210864e-05, "loss": 0.8177, "step": 4314 }, { "epoch": 0.7044610424064324, "grad_norm": 1.931982398033142, "learning_rate": 1.9944720272711192e-05, "loss": 0.9035, "step": 4315 }, { "epoch": 0.7046243010489368, "grad_norm": 2.203350305557251, "learning_rate": 1.9944686947197196e-05, "loss": 1.0634, "step": 4316 }, { "epoch": 0.7047875596914411, "grad_norm": 2.152621269226074, "learning_rate": 1.99446536116689e-05, "loss": 0.9628, "step": 4317 }, { "epoch": 0.7049508183339456, "grad_norm": 2.0857505798339844, "learning_rate": 1.9944620266126338e-05, "loss": 0.7802, "step": 4318 }, { "epoch": 0.7051140769764499, "grad_norm": 1.7998846769332886, "learning_rate": 1.9944586910569546e-05, "loss": 0.7594, "step": 4319 }, { "epoch": 0.7052773356189543, "grad_norm": 1.6078064441680908, "learning_rate": 1.9944553544998563e-05, "loss": 0.6447, "step": 4320 }, { "epoch": 0.7054405942614587, "grad_norm": 1.8381255865097046, "learning_rate": 1.9944520169413413e-05, "loss": 0.6851, "step": 4321 }, { "epoch": 0.7056038529039631, "grad_norm": 1.9162471294403076, "learning_rate": 1.9944486783814135e-05, "loss": 0.8674, "step": 4322 }, { "epoch": 0.7057671115464675, "grad_norm": 1.7429533004760742, "learning_rate": 1.994445338820076e-05, "loss": 0.8149, "step": 4323 }, { "epoch": 0.7059303701889719, "grad_norm": 2.092479944229126, "learning_rate": 1.9944419982573323e-05, "loss": 0.7385, "step": 4324 }, { "epoch": 0.7060936288314763, "grad_norm": 1.873106837272644, "learning_rate": 1.994438656693186e-05, "loss": 0.8601, "step": 4325 }, { "epoch": 0.7062568874739806, "grad_norm": 1.8368573188781738, "learning_rate": 1.99443531412764e-05, "loss": 0.7698, "step": 4326 }, { "epoch": 0.7064201461164851, "grad_norm": 1.8464480638504028, "learning_rate": 1.9944319705606983e-05, "loss": 0.8331, "step": 4327 }, { "epoch": 0.7065834047589894, "grad_norm": 2.2949576377868652, "learning_rate": 1.9944286259923637e-05, "loss": 0.9281, "step": 4328 }, { "epoch": 0.7067466634014938, "grad_norm": 2.095341205596924, "learning_rate": 1.9944252804226393e-05, "loss": 0.878, "step": 4329 }, { "epoch": 0.7069099220439982, "grad_norm": 2.058976888656616, "learning_rate": 1.994421933851529e-05, "loss": 0.9978, "step": 4330 }, { "epoch": 0.7070731806865026, "grad_norm": 2.2933664321899414, "learning_rate": 1.9944185862790366e-05, "loss": 0.9138, "step": 4331 }, { "epoch": 0.7072364393290069, "grad_norm": 2.0721192359924316, "learning_rate": 1.9944152377051648e-05, "loss": 0.9309, "step": 4332 }, { "epoch": 0.7073996979715114, "grad_norm": 1.724638819694519, "learning_rate": 1.9944118881299167e-05, "loss": 0.7519, "step": 4333 }, { "epoch": 0.7075629566140158, "grad_norm": 1.9347200393676758, "learning_rate": 1.9944085375532965e-05, "loss": 0.8812, "step": 4334 }, { "epoch": 0.7077262152565201, "grad_norm": 2.1574108600616455, "learning_rate": 1.9944051859753072e-05, "loss": 0.7688, "step": 4335 }, { "epoch": 0.7078894738990246, "grad_norm": 1.8231219053268433, "learning_rate": 1.9944018333959518e-05, "loss": 0.6678, "step": 4336 }, { "epoch": 0.7080527325415289, "grad_norm": 2.307121515274048, "learning_rate": 1.9943984798152343e-05, "loss": 0.9512, "step": 4337 }, { "epoch": 0.7082159911840333, "grad_norm": 1.6214911937713623, "learning_rate": 1.9943951252331576e-05, "loss": 0.6637, "step": 4338 }, { "epoch": 0.7083792498265377, "grad_norm": 1.7350754737854004, "learning_rate": 1.9943917696497256e-05, "loss": 0.8309, "step": 4339 }, { "epoch": 0.7085425084690421, "grad_norm": 2.1449248790740967, "learning_rate": 1.9943884130649408e-05, "loss": 0.9577, "step": 4340 }, { "epoch": 0.7087057671115464, "grad_norm": 1.7031553983688354, "learning_rate": 1.9943850554788075e-05, "loss": 0.7667, "step": 4341 }, { "epoch": 0.7088690257540509, "grad_norm": 2.3525102138519287, "learning_rate": 1.9943816968913288e-05, "loss": 0.8443, "step": 4342 }, { "epoch": 0.7090322843965552, "grad_norm": 2.133125066757202, "learning_rate": 1.9943783373025077e-05, "loss": 0.9871, "step": 4343 }, { "epoch": 0.7091955430390596, "grad_norm": 2.244927406311035, "learning_rate": 1.994374976712348e-05, "loss": 0.9959, "step": 4344 }, { "epoch": 0.7093588016815641, "grad_norm": 1.6296930313110352, "learning_rate": 1.994371615120853e-05, "loss": 0.6568, "step": 4345 }, { "epoch": 0.7095220603240684, "grad_norm": 1.7802700996398926, "learning_rate": 1.9943682525280263e-05, "loss": 0.7199, "step": 4346 }, { "epoch": 0.7096853189665728, "grad_norm": 2.2932846546173096, "learning_rate": 1.9943648889338707e-05, "loss": 0.8624, "step": 4347 }, { "epoch": 0.7098485776090772, "grad_norm": 2.2595794200897217, "learning_rate": 1.9943615243383897e-05, "loss": 0.9352, "step": 4348 }, { "epoch": 0.7100118362515816, "grad_norm": 2.022770881652832, "learning_rate": 1.9943581587415873e-05, "loss": 0.8571, "step": 4349 }, { "epoch": 0.7101750948940859, "grad_norm": 2.03802752494812, "learning_rate": 1.9943547921434666e-05, "loss": 0.7644, "step": 4350 }, { "epoch": 0.7103383535365904, "grad_norm": 1.731443166732788, "learning_rate": 1.9943514245440307e-05, "loss": 0.6777, "step": 4351 }, { "epoch": 0.7105016121790947, "grad_norm": 1.8085764646530151, "learning_rate": 1.9943480559432832e-05, "loss": 0.7689, "step": 4352 }, { "epoch": 0.7106648708215991, "grad_norm": 1.8456294536590576, "learning_rate": 1.9943446863412276e-05, "loss": 0.7884, "step": 4353 }, { "epoch": 0.7108281294641035, "grad_norm": 1.7384699583053589, "learning_rate": 1.994341315737867e-05, "loss": 0.6381, "step": 4354 }, { "epoch": 0.7109913881066079, "grad_norm": 1.8413194417953491, "learning_rate": 1.994337944133205e-05, "loss": 0.6429, "step": 4355 }, { "epoch": 0.7111546467491123, "grad_norm": 2.1186680793762207, "learning_rate": 1.9943345715272445e-05, "loss": 0.8943, "step": 4356 }, { "epoch": 0.7113179053916167, "grad_norm": 1.9528472423553467, "learning_rate": 1.99433119791999e-05, "loss": 0.8599, "step": 4357 }, { "epoch": 0.7114811640341211, "grad_norm": 2.0833191871643066, "learning_rate": 1.994327823311444e-05, "loss": 0.8997, "step": 4358 }, { "epoch": 0.7116444226766254, "grad_norm": 2.3430826663970947, "learning_rate": 1.99432444770161e-05, "loss": 0.8764, "step": 4359 }, { "epoch": 0.7118076813191299, "grad_norm": 2.094642400741577, "learning_rate": 1.994321071090492e-05, "loss": 0.8559, "step": 4360 }, { "epoch": 0.7119709399616342, "grad_norm": 2.1967456340789795, "learning_rate": 1.9943176934780926e-05, "loss": 0.8555, "step": 4361 }, { "epoch": 0.7121341986041386, "grad_norm": 1.8970474004745483, "learning_rate": 1.9943143148644155e-05, "loss": 0.9391, "step": 4362 }, { "epoch": 0.712297457246643, "grad_norm": 1.9624043703079224, "learning_rate": 1.994310935249464e-05, "loss": 0.8835, "step": 4363 }, { "epoch": 0.7124607158891474, "grad_norm": 2.1123692989349365, "learning_rate": 1.9943075546332423e-05, "loss": 0.8153, "step": 4364 }, { "epoch": 0.7126239745316517, "grad_norm": 1.8385090827941895, "learning_rate": 1.9943041730157526e-05, "loss": 0.8518, "step": 4365 }, { "epoch": 0.7127872331741562, "grad_norm": 1.819939136505127, "learning_rate": 1.994300790396999e-05, "loss": 0.7119, "step": 4366 }, { "epoch": 0.7129504918166606, "grad_norm": 2.3111627101898193, "learning_rate": 1.9942974067769847e-05, "loss": 0.7905, "step": 4367 }, { "epoch": 0.7131137504591649, "grad_norm": 2.173330783843994, "learning_rate": 1.9942940221557135e-05, "loss": 0.9446, "step": 4368 }, { "epoch": 0.7132770091016694, "grad_norm": 1.9439023733139038, "learning_rate": 1.994290636533188e-05, "loss": 0.8962, "step": 4369 }, { "epoch": 0.7134402677441737, "grad_norm": 2.0761358737945557, "learning_rate": 1.9942872499094125e-05, "loss": 0.9764, "step": 4370 }, { "epoch": 0.7136035263866781, "grad_norm": 2.273097515106201, "learning_rate": 1.9942838622843898e-05, "loss": 1.0192, "step": 4371 }, { "epoch": 0.7137667850291824, "grad_norm": 1.8375968933105469, "learning_rate": 1.9942804736581236e-05, "loss": 0.8392, "step": 4372 }, { "epoch": 0.7139300436716869, "grad_norm": 1.7532120943069458, "learning_rate": 1.994277084030617e-05, "loss": 0.9312, "step": 4373 }, { "epoch": 0.7140933023141912, "grad_norm": 1.9342118501663208, "learning_rate": 1.994273693401874e-05, "loss": 0.9675, "step": 4374 }, { "epoch": 0.7142565609566957, "grad_norm": 1.7692539691925049, "learning_rate": 1.9942703017718977e-05, "loss": 0.7715, "step": 4375 }, { "epoch": 0.7144198195992, "grad_norm": 1.4650812149047852, "learning_rate": 1.994266909140691e-05, "loss": 0.6356, "step": 4376 }, { "epoch": 0.7145830782417044, "grad_norm": 1.6834259033203125, "learning_rate": 1.994263515508258e-05, "loss": 0.7278, "step": 4377 }, { "epoch": 0.7147463368842089, "grad_norm": 1.982469081878662, "learning_rate": 1.994260120874602e-05, "loss": 0.9722, "step": 4378 }, { "epoch": 0.7149095955267132, "grad_norm": 2.1274068355560303, "learning_rate": 1.9942567252397262e-05, "loss": 0.8126, "step": 4379 }, { "epoch": 0.7150728541692176, "grad_norm": 2.203218698501587, "learning_rate": 1.9942533286036343e-05, "loss": 0.9513, "step": 4380 }, { "epoch": 0.715236112811722, "grad_norm": 2.0004398822784424, "learning_rate": 1.9942499309663294e-05, "loss": 0.6825, "step": 4381 }, { "epoch": 0.7153993714542264, "grad_norm": 1.9553112983703613, "learning_rate": 1.9942465323278153e-05, "loss": 0.84, "step": 4382 }, { "epoch": 0.7155626300967307, "grad_norm": 1.8179467916488647, "learning_rate": 1.994243132688095e-05, "loss": 0.782, "step": 4383 }, { "epoch": 0.7157258887392351, "grad_norm": 2.235081911087036, "learning_rate": 1.9942397320471723e-05, "loss": 0.7786, "step": 4384 }, { "epoch": 0.7158891473817395, "grad_norm": 1.925100326538086, "learning_rate": 1.99423633040505e-05, "loss": 0.8828, "step": 4385 }, { "epoch": 0.7160524060242439, "grad_norm": 2.014695405960083, "learning_rate": 1.9942329277617325e-05, "loss": 0.8665, "step": 4386 }, { "epoch": 0.7162156646667482, "grad_norm": 1.8004367351531982, "learning_rate": 1.9942295241172225e-05, "loss": 0.6948, "step": 4387 }, { "epoch": 0.7163789233092527, "grad_norm": 1.9331480264663696, "learning_rate": 1.9942261194715236e-05, "loss": 0.7294, "step": 4388 }, { "epoch": 0.7165421819517571, "grad_norm": 1.7150667905807495, "learning_rate": 1.9942227138246393e-05, "loss": 0.6843, "step": 4389 }, { "epoch": 0.7167054405942614, "grad_norm": 1.5684446096420288, "learning_rate": 1.9942193071765728e-05, "loss": 0.6938, "step": 4390 }, { "epoch": 0.7168686992367659, "grad_norm": 1.931045651435852, "learning_rate": 1.9942158995273283e-05, "loss": 0.8214, "step": 4391 }, { "epoch": 0.7170319578792702, "grad_norm": 2.2262299060821533, "learning_rate": 1.994212490876908e-05, "loss": 0.8845, "step": 4392 }, { "epoch": 0.7171952165217746, "grad_norm": 1.936834692955017, "learning_rate": 1.9942090812253164e-05, "loss": 0.7213, "step": 4393 }, { "epoch": 0.717358475164279, "grad_norm": 2.140897750854492, "learning_rate": 1.9942056705725564e-05, "loss": 0.9362, "step": 4394 }, { "epoch": 0.7175217338067834, "grad_norm": 1.9691821336746216, "learning_rate": 1.9942022589186316e-05, "loss": 0.8167, "step": 4395 }, { "epoch": 0.7176849924492877, "grad_norm": 1.8906147480010986, "learning_rate": 1.994198846263545e-05, "loss": 0.9226, "step": 4396 }, { "epoch": 0.7178482510917922, "grad_norm": 2.099067449569702, "learning_rate": 1.994195432607301e-05, "loss": 0.9348, "step": 4397 }, { "epoch": 0.7180115097342965, "grad_norm": 1.9238884449005127, "learning_rate": 1.9941920179499022e-05, "loss": 0.7465, "step": 4398 }, { "epoch": 0.7181747683768009, "grad_norm": 1.9999345541000366, "learning_rate": 1.9941886022913523e-05, "loss": 0.8098, "step": 4399 }, { "epoch": 0.7183380270193054, "grad_norm": 2.05483341217041, "learning_rate": 1.994185185631655e-05, "loss": 0.7072, "step": 4400 }, { "epoch": 0.7185012856618097, "grad_norm": 2.3453845977783203, "learning_rate": 1.994181767970813e-05, "loss": 0.8156, "step": 4401 }, { "epoch": 0.7186645443043141, "grad_norm": 2.759714126586914, "learning_rate": 1.9941783493088304e-05, "loss": 0.923, "step": 4402 }, { "epoch": 0.7188278029468185, "grad_norm": 2.065807819366455, "learning_rate": 1.9941749296457103e-05, "loss": 0.8615, "step": 4403 }, { "epoch": 0.7189910615893229, "grad_norm": 1.7405942678451538, "learning_rate": 1.994171508981457e-05, "loss": 0.6686, "step": 4404 }, { "epoch": 0.7191543202318272, "grad_norm": 2.803520441055298, "learning_rate": 1.9941680873160727e-05, "loss": 0.8708, "step": 4405 }, { "epoch": 0.7193175788743317, "grad_norm": 1.911081314086914, "learning_rate": 1.9941646646495615e-05, "loss": 0.8535, "step": 4406 }, { "epoch": 0.719480837516836, "grad_norm": 1.9501444101333618, "learning_rate": 1.9941612409819265e-05, "loss": 0.7194, "step": 4407 }, { "epoch": 0.7196440961593404, "grad_norm": 1.9938591718673706, "learning_rate": 1.9941578163131717e-05, "loss": 0.8317, "step": 4408 }, { "epoch": 0.7198073548018448, "grad_norm": 1.904526948928833, "learning_rate": 1.9941543906433003e-05, "loss": 0.7517, "step": 4409 }, { "epoch": 0.7199706134443492, "grad_norm": 2.1266376972198486, "learning_rate": 1.9941509639723155e-05, "loss": 0.7963, "step": 4410 }, { "epoch": 0.7201338720868536, "grad_norm": 1.8949412107467651, "learning_rate": 1.994147536300221e-05, "loss": 0.8006, "step": 4411 }, { "epoch": 0.720297130729358, "grad_norm": 1.9772933721542358, "learning_rate": 1.99414410762702e-05, "loss": 1.0022, "step": 4412 }, { "epoch": 0.7204603893718624, "grad_norm": 1.732645034790039, "learning_rate": 1.9941406779527167e-05, "loss": 0.7157, "step": 4413 }, { "epoch": 0.7206236480143667, "grad_norm": 1.798436164855957, "learning_rate": 1.994137247277314e-05, "loss": 0.6785, "step": 4414 }, { "epoch": 0.7207869066568712, "grad_norm": 1.6932530403137207, "learning_rate": 1.9941338156008147e-05, "loss": 0.786, "step": 4415 }, { "epoch": 0.7209501652993755, "grad_norm": 1.8624871969223022, "learning_rate": 1.994130382923223e-05, "loss": 0.7434, "step": 4416 }, { "epoch": 0.7211134239418799, "grad_norm": 1.823852777481079, "learning_rate": 1.9941269492445427e-05, "loss": 0.8213, "step": 4417 }, { "epoch": 0.7212766825843843, "grad_norm": 1.9162726402282715, "learning_rate": 1.9941235145647763e-05, "loss": 0.824, "step": 4418 }, { "epoch": 0.7214399412268887, "grad_norm": 1.8077019453048706, "learning_rate": 1.9941200788839285e-05, "loss": 0.9382, "step": 4419 }, { "epoch": 0.721603199869393, "grad_norm": 1.7717796564102173, "learning_rate": 1.9941166422020016e-05, "loss": 0.6571, "step": 4420 }, { "epoch": 0.7217664585118975, "grad_norm": 2.067509889602661, "learning_rate": 1.9941132045189993e-05, "loss": 0.8508, "step": 4421 }, { "epoch": 0.7219297171544019, "grad_norm": 2.26788592338562, "learning_rate": 1.9941097658349256e-05, "loss": 0.8283, "step": 4422 }, { "epoch": 0.7220929757969062, "grad_norm": 1.917021632194519, "learning_rate": 1.9941063261497838e-05, "loss": 0.7346, "step": 4423 }, { "epoch": 0.7222562344394107, "grad_norm": 1.7852272987365723, "learning_rate": 1.9941028854635767e-05, "loss": 0.7495, "step": 4424 }, { "epoch": 0.722419493081915, "grad_norm": 1.7005994319915771, "learning_rate": 1.9940994437763085e-05, "loss": 0.6463, "step": 4425 }, { "epoch": 0.7225827517244194, "grad_norm": 2.0141804218292236, "learning_rate": 1.9940960010879825e-05, "loss": 0.7524, "step": 4426 }, { "epoch": 0.7227460103669238, "grad_norm": 2.0254030227661133, "learning_rate": 1.9940925573986018e-05, "loss": 0.8772, "step": 4427 }, { "epoch": 0.7229092690094282, "grad_norm": 1.6054635047912598, "learning_rate": 1.9940891127081704e-05, "loss": 0.6519, "step": 4428 }, { "epoch": 0.7230725276519325, "grad_norm": 2.340158700942993, "learning_rate": 1.9940856670166915e-05, "loss": 0.9471, "step": 4429 }, { "epoch": 0.723235786294437, "grad_norm": 1.890354037284851, "learning_rate": 1.9940822203241684e-05, "loss": 0.7177, "step": 4430 }, { "epoch": 0.7233990449369414, "grad_norm": 2.3571619987487793, "learning_rate": 1.994078772630605e-05, "loss": 0.6998, "step": 4431 }, { "epoch": 0.7235623035794457, "grad_norm": 2.3782622814178467, "learning_rate": 1.9940753239360047e-05, "loss": 0.8007, "step": 4432 }, { "epoch": 0.7237255622219502, "grad_norm": 2.500842571258545, "learning_rate": 1.9940718742403707e-05, "loss": 0.7501, "step": 4433 }, { "epoch": 0.7238888208644545, "grad_norm": 1.7871969938278198, "learning_rate": 1.9940684235437063e-05, "loss": 0.6593, "step": 4434 }, { "epoch": 0.7240520795069589, "grad_norm": 1.8737636804580688, "learning_rate": 1.9940649718460157e-05, "loss": 0.7737, "step": 4435 }, { "epoch": 0.7242153381494633, "grad_norm": 2.200291395187378, "learning_rate": 1.9940615191473015e-05, "loss": 0.8039, "step": 4436 }, { "epoch": 0.7243785967919677, "grad_norm": 1.956521987915039, "learning_rate": 1.994058065447568e-05, "loss": 0.7505, "step": 4437 }, { "epoch": 0.724541855434472, "grad_norm": 1.8933959007263184, "learning_rate": 1.994054610746818e-05, "loss": 0.7759, "step": 4438 }, { "epoch": 0.7247051140769765, "grad_norm": 2.11024808883667, "learning_rate": 1.9940511550450554e-05, "loss": 0.8867, "step": 4439 }, { "epoch": 0.7248683727194808, "grad_norm": 1.61370050907135, "learning_rate": 1.9940476983422834e-05, "loss": 0.6572, "step": 4440 }, { "epoch": 0.7250316313619852, "grad_norm": 1.6555801630020142, "learning_rate": 1.9940442406385057e-05, "loss": 0.8441, "step": 4441 }, { "epoch": 0.7251948900044897, "grad_norm": 2.0151753425598145, "learning_rate": 1.994040781933726e-05, "loss": 0.9194, "step": 4442 }, { "epoch": 0.725358148646994, "grad_norm": 2.0465376377105713, "learning_rate": 1.9940373222279473e-05, "loss": 0.8144, "step": 4443 }, { "epoch": 0.7255214072894984, "grad_norm": 2.4903249740600586, "learning_rate": 1.9940338615211732e-05, "loss": 0.8174, "step": 4444 }, { "epoch": 0.7256846659320028, "grad_norm": 1.8807685375213623, "learning_rate": 1.9940303998134075e-05, "loss": 0.8298, "step": 4445 }, { "epoch": 0.7258479245745072, "grad_norm": 2.66568922996521, "learning_rate": 1.994026937104653e-05, "loss": 0.9182, "step": 4446 }, { "epoch": 0.7260111832170115, "grad_norm": 1.9964847564697266, "learning_rate": 1.994023473394914e-05, "loss": 0.8194, "step": 4447 }, { "epoch": 0.726174441859516, "grad_norm": 2.089733123779297, "learning_rate": 1.9940200086841934e-05, "loss": 0.79, "step": 4448 }, { "epoch": 0.7263377005020203, "grad_norm": 1.882120966911316, "learning_rate": 1.9940165429724948e-05, "loss": 0.7946, "step": 4449 }, { "epoch": 0.7265009591445247, "grad_norm": 1.7768546342849731, "learning_rate": 1.9940130762598224e-05, "loss": 0.8142, "step": 4450 }, { "epoch": 0.726664217787029, "grad_norm": 2.1513869762420654, "learning_rate": 1.9940096085461787e-05, "loss": 1.0164, "step": 4451 }, { "epoch": 0.7268274764295335, "grad_norm": 1.8286242485046387, "learning_rate": 1.9940061398315674e-05, "loss": 0.7776, "step": 4452 }, { "epoch": 0.7269907350720379, "grad_norm": 1.778072714805603, "learning_rate": 1.9940026701159928e-05, "loss": 0.7149, "step": 4453 }, { "epoch": 0.7271539937145423, "grad_norm": 2.6977362632751465, "learning_rate": 1.993999199399457e-05, "loss": 0.8836, "step": 4454 }, { "epoch": 0.7273172523570467, "grad_norm": 1.6712161302566528, "learning_rate": 1.993995727681965e-05, "loss": 0.7576, "step": 4455 }, { "epoch": 0.727480510999551, "grad_norm": 2.0784361362457275, "learning_rate": 1.9939922549635192e-05, "loss": 0.8192, "step": 4456 }, { "epoch": 0.7276437696420555, "grad_norm": 1.839468002319336, "learning_rate": 1.9939887812441233e-05, "loss": 0.8589, "step": 4457 }, { "epoch": 0.7278070282845598, "grad_norm": 1.9465668201446533, "learning_rate": 1.993985306523781e-05, "loss": 0.8724, "step": 4458 }, { "epoch": 0.7279702869270642, "grad_norm": 2.0373129844665527, "learning_rate": 1.9939818308024962e-05, "loss": 0.894, "step": 4459 }, { "epoch": 0.7281335455695686, "grad_norm": 1.747261643409729, "learning_rate": 1.9939783540802715e-05, "loss": 0.7493, "step": 4460 }, { "epoch": 0.728296804212073, "grad_norm": 1.9915575981140137, "learning_rate": 1.993974876357111e-05, "loss": 0.9033, "step": 4461 }, { "epoch": 0.7284600628545773, "grad_norm": 2.0744011402130127, "learning_rate": 1.9939713976330182e-05, "loss": 0.7934, "step": 4462 }, { "epoch": 0.7286233214970818, "grad_norm": 1.8379077911376953, "learning_rate": 1.9939679179079964e-05, "loss": 0.7784, "step": 4463 }, { "epoch": 0.7287865801395862, "grad_norm": 2.0709240436553955, "learning_rate": 1.993964437182049e-05, "loss": 0.6351, "step": 4464 }, { "epoch": 0.7289498387820905, "grad_norm": 1.9836870431900024, "learning_rate": 1.99396095545518e-05, "loss": 0.8069, "step": 4465 }, { "epoch": 0.729113097424595, "grad_norm": 1.9226022958755493, "learning_rate": 1.9939574727273924e-05, "loss": 0.6556, "step": 4466 }, { "epoch": 0.7292763560670993, "grad_norm": 1.9196112155914307, "learning_rate": 1.9939539889986897e-05, "loss": 0.7753, "step": 4467 }, { "epoch": 0.7294396147096037, "grad_norm": 1.849435567855835, "learning_rate": 1.993950504269076e-05, "loss": 0.5915, "step": 4468 }, { "epoch": 0.729602873352108, "grad_norm": 2.0433857440948486, "learning_rate": 1.993947018538554e-05, "loss": 0.9091, "step": 4469 }, { "epoch": 0.7297661319946125, "grad_norm": 2.2304375171661377, "learning_rate": 1.9939435318071277e-05, "loss": 0.8822, "step": 4470 }, { "epoch": 0.7299293906371168, "grad_norm": 2.138397216796875, "learning_rate": 1.9939400440748008e-05, "loss": 0.8578, "step": 4471 }, { "epoch": 0.7300926492796213, "grad_norm": 1.9320099353790283, "learning_rate": 1.993936555341576e-05, "loss": 0.7436, "step": 4472 }, { "epoch": 0.7302559079221256, "grad_norm": 2.055783271789551, "learning_rate": 1.993933065607458e-05, "loss": 0.9926, "step": 4473 }, { "epoch": 0.73041916656463, "grad_norm": 2.0557861328125, "learning_rate": 1.9939295748724494e-05, "loss": 0.738, "step": 4474 }, { "epoch": 0.7305824252071345, "grad_norm": 2.1829559803009033, "learning_rate": 1.993926083136554e-05, "loss": 0.9589, "step": 4475 }, { "epoch": 0.7307456838496388, "grad_norm": 1.9490411281585693, "learning_rate": 1.9939225903997748e-05, "loss": 0.8352, "step": 4476 }, { "epoch": 0.7309089424921432, "grad_norm": 1.7162531614303589, "learning_rate": 1.993919096662117e-05, "loss": 0.663, "step": 4477 }, { "epoch": 0.7310722011346475, "grad_norm": 2.1170153617858887, "learning_rate": 1.9939156019235817e-05, "loss": 0.8473, "step": 4478 }, { "epoch": 0.731235459777152, "grad_norm": 2.079963207244873, "learning_rate": 1.993912106184174e-05, "loss": 0.8273, "step": 4479 }, { "epoch": 0.7313987184196563, "grad_norm": 1.9364622831344604, "learning_rate": 1.9939086094438975e-05, "loss": 0.8535, "step": 4480 }, { "epoch": 0.7315619770621608, "grad_norm": 2.1018033027648926, "learning_rate": 1.993905111702755e-05, "loss": 0.8389, "step": 4481 }, { "epoch": 0.7317252357046651, "grad_norm": 3.277167320251465, "learning_rate": 1.9939016129607503e-05, "loss": 0.9941, "step": 4482 }, { "epoch": 0.7318884943471695, "grad_norm": 2.1185555458068848, "learning_rate": 1.993898113217887e-05, "loss": 0.934, "step": 4483 }, { "epoch": 0.7320517529896738, "grad_norm": 2.1316545009613037, "learning_rate": 1.9938946124741684e-05, "loss": 0.893, "step": 4484 }, { "epoch": 0.7322150116321783, "grad_norm": 1.7952659130096436, "learning_rate": 1.9938911107295984e-05, "loss": 0.7511, "step": 4485 }, { "epoch": 0.7323782702746827, "grad_norm": 2.254964590072632, "learning_rate": 1.9938876079841804e-05, "loss": 0.8298, "step": 4486 }, { "epoch": 0.732541528917187, "grad_norm": 1.7881091833114624, "learning_rate": 1.9938841042379174e-05, "loss": 0.9017, "step": 4487 }, { "epoch": 0.7327047875596915, "grad_norm": 1.9769015312194824, "learning_rate": 1.993880599490814e-05, "loss": 0.8554, "step": 4488 }, { "epoch": 0.7328680462021958, "grad_norm": 2.071852207183838, "learning_rate": 1.9938770937428728e-05, "loss": 0.8367, "step": 4489 }, { "epoch": 0.7330313048447002, "grad_norm": 1.9506504535675049, "learning_rate": 1.9938735869940972e-05, "loss": 0.8202, "step": 4490 }, { "epoch": 0.7331945634872046, "grad_norm": 1.6314549446105957, "learning_rate": 1.993870079244492e-05, "loss": 0.7745, "step": 4491 }, { "epoch": 0.733357822129709, "grad_norm": 1.9646632671356201, "learning_rate": 1.9938665704940592e-05, "loss": 0.7403, "step": 4492 }, { "epoch": 0.7335210807722133, "grad_norm": 1.8802987337112427, "learning_rate": 1.9938630607428033e-05, "loss": 0.7444, "step": 4493 }, { "epoch": 0.7336843394147178, "grad_norm": 1.703447937965393, "learning_rate": 1.9938595499907274e-05, "loss": 0.8167, "step": 4494 }, { "epoch": 0.7338475980572221, "grad_norm": 1.7272857427597046, "learning_rate": 1.9938560382378353e-05, "loss": 0.6824, "step": 4495 }, { "epoch": 0.7340108566997265, "grad_norm": 2.1873269081115723, "learning_rate": 1.9938525254841305e-05, "loss": 0.8032, "step": 4496 }, { "epoch": 0.734174115342231, "grad_norm": 1.997214674949646, "learning_rate": 1.9938490117296165e-05, "loss": 0.7912, "step": 4497 }, { "epoch": 0.7343373739847353, "grad_norm": 2.073227882385254, "learning_rate": 1.993845496974297e-05, "loss": 0.8176, "step": 4498 }, { "epoch": 0.7345006326272397, "grad_norm": 2.1228950023651123, "learning_rate": 1.9938419812181747e-05, "loss": 0.8408, "step": 4499 }, { "epoch": 0.7346638912697441, "grad_norm": 1.8168696165084839, "learning_rate": 1.9938384644612542e-05, "loss": 0.817, "step": 4500 }, { "epoch": 0.7348271499122485, "grad_norm": 1.7455905675888062, "learning_rate": 1.9938349467035386e-05, "loss": 0.8409, "step": 4501 }, { "epoch": 0.7349904085547528, "grad_norm": 1.759324312210083, "learning_rate": 1.9938314279450318e-05, "loss": 0.6535, "step": 4502 }, { "epoch": 0.7351536671972573, "grad_norm": 1.982790470123291, "learning_rate": 1.9938279081857367e-05, "loss": 0.8982, "step": 4503 }, { "epoch": 0.7353169258397616, "grad_norm": 2.3180794715881348, "learning_rate": 1.9938243874256572e-05, "loss": 0.6466, "step": 4504 }, { "epoch": 0.735480184482266, "grad_norm": 1.9459507465362549, "learning_rate": 1.9938208656647964e-05, "loss": 0.8078, "step": 4505 }, { "epoch": 0.7356434431247704, "grad_norm": 1.7770904302597046, "learning_rate": 1.9938173429031588e-05, "loss": 0.8085, "step": 4506 }, { "epoch": 0.7358067017672748, "grad_norm": 1.8279656171798706, "learning_rate": 1.9938138191407473e-05, "loss": 0.7013, "step": 4507 }, { "epoch": 0.7359699604097792, "grad_norm": 2.4340391159057617, "learning_rate": 1.9938102943775653e-05, "loss": 0.8248, "step": 4508 }, { "epoch": 0.7361332190522836, "grad_norm": 2.24891996383667, "learning_rate": 1.9938067686136167e-05, "loss": 0.8393, "step": 4509 }, { "epoch": 0.736296477694788, "grad_norm": 1.69552743434906, "learning_rate": 1.993803241848905e-05, "loss": 0.7323, "step": 4510 }, { "epoch": 0.7364597363372923, "grad_norm": 2.2317330837249756, "learning_rate": 1.9937997140834338e-05, "loss": 0.9625, "step": 4511 }, { "epoch": 0.7366229949797968, "grad_norm": 2.0568790435791016, "learning_rate": 1.9937961853172064e-05, "loss": 0.9577, "step": 4512 }, { "epoch": 0.7367862536223011, "grad_norm": 1.8632373809814453, "learning_rate": 1.9937926555502268e-05, "loss": 0.7166, "step": 4513 }, { "epoch": 0.7369495122648055, "grad_norm": 2.076192617416382, "learning_rate": 1.9937891247824977e-05, "loss": 0.9299, "step": 4514 }, { "epoch": 0.7371127709073099, "grad_norm": 1.8990850448608398, "learning_rate": 1.9937855930140237e-05, "loss": 0.8555, "step": 4515 }, { "epoch": 0.7372760295498143, "grad_norm": 1.7650129795074463, "learning_rate": 1.9937820602448076e-05, "loss": 0.6615, "step": 4516 }, { "epoch": 0.7374392881923186, "grad_norm": 1.987490177154541, "learning_rate": 1.9937785264748536e-05, "loss": 0.9224, "step": 4517 }, { "epoch": 0.7376025468348231, "grad_norm": 1.9159023761749268, "learning_rate": 1.9937749917041645e-05, "loss": 0.7918, "step": 4518 }, { "epoch": 0.7377658054773275, "grad_norm": 1.8243857622146606, "learning_rate": 1.9937714559327445e-05, "loss": 0.8178, "step": 4519 }, { "epoch": 0.7379290641198318, "grad_norm": 1.8879094123840332, "learning_rate": 1.9937679191605964e-05, "loss": 0.879, "step": 4520 }, { "epoch": 0.7380923227623363, "grad_norm": 1.7971030473709106, "learning_rate": 1.9937643813877247e-05, "loss": 0.8109, "step": 4521 }, { "epoch": 0.7382555814048406, "grad_norm": 2.050477981567383, "learning_rate": 1.9937608426141326e-05, "loss": 0.8226, "step": 4522 }, { "epoch": 0.738418840047345, "grad_norm": 1.600393533706665, "learning_rate": 1.9937573028398233e-05, "loss": 0.664, "step": 4523 }, { "epoch": 0.7385820986898494, "grad_norm": 2.0359764099121094, "learning_rate": 1.993753762064801e-05, "loss": 0.8488, "step": 4524 }, { "epoch": 0.7387453573323538, "grad_norm": 1.86660897731781, "learning_rate": 1.993750220289069e-05, "loss": 0.8534, "step": 4525 }, { "epoch": 0.7389086159748581, "grad_norm": 2.136317491531372, "learning_rate": 1.9937466775126305e-05, "loss": 0.8335, "step": 4526 }, { "epoch": 0.7390718746173626, "grad_norm": 2.17376971244812, "learning_rate": 1.9937431337354894e-05, "loss": 0.9482, "step": 4527 }, { "epoch": 0.7392351332598669, "grad_norm": 2.136197566986084, "learning_rate": 1.993739588957649e-05, "loss": 0.8684, "step": 4528 }, { "epoch": 0.7393983919023713, "grad_norm": 1.655583143234253, "learning_rate": 1.9937360431791136e-05, "loss": 0.7033, "step": 4529 }, { "epoch": 0.7395616505448758, "grad_norm": 2.09268856048584, "learning_rate": 1.9937324963998858e-05, "loss": 0.8713, "step": 4530 }, { "epoch": 0.7397249091873801, "grad_norm": 1.774221420288086, "learning_rate": 1.9937289486199696e-05, "loss": 0.6822, "step": 4531 }, { "epoch": 0.7398881678298845, "grad_norm": 2.0438027381896973, "learning_rate": 1.993725399839369e-05, "loss": 0.7528, "step": 4532 }, { "epoch": 0.7400514264723889, "grad_norm": 1.654295563697815, "learning_rate": 1.993721850058087e-05, "loss": 0.6219, "step": 4533 }, { "epoch": 0.7402146851148933, "grad_norm": 1.7724084854125977, "learning_rate": 1.9937182992761276e-05, "loss": 0.84, "step": 4534 }, { "epoch": 0.7403779437573976, "grad_norm": 1.8691307306289673, "learning_rate": 1.9937147474934936e-05, "loss": 0.9156, "step": 4535 }, { "epoch": 0.7405412023999021, "grad_norm": 1.8682236671447754, "learning_rate": 1.9937111947101897e-05, "loss": 0.7704, "step": 4536 }, { "epoch": 0.7407044610424064, "grad_norm": 1.8265695571899414, "learning_rate": 1.9937076409262187e-05, "loss": 0.8736, "step": 4537 }, { "epoch": 0.7408677196849108, "grad_norm": 2.2403807640075684, "learning_rate": 1.9937040861415843e-05, "loss": 1.1012, "step": 4538 }, { "epoch": 0.7410309783274152, "grad_norm": 2.3451006412506104, "learning_rate": 1.9937005303562903e-05, "loss": 1.0008, "step": 4539 }, { "epoch": 0.7411942369699196, "grad_norm": 1.5230185985565186, "learning_rate": 1.9936969735703396e-05, "loss": 0.5971, "step": 4540 }, { "epoch": 0.741357495612424, "grad_norm": 2.101780414581299, "learning_rate": 1.9936934157837368e-05, "loss": 0.9586, "step": 4541 }, { "epoch": 0.7415207542549284, "grad_norm": 2.001110315322876, "learning_rate": 1.993689856996485e-05, "loss": 0.8577, "step": 4542 }, { "epoch": 0.7416840128974328, "grad_norm": 1.8413203954696655, "learning_rate": 1.9936862972085874e-05, "loss": 0.8253, "step": 4543 }, { "epoch": 0.7418472715399371, "grad_norm": 2.646864175796509, "learning_rate": 1.9936827364200483e-05, "loss": 0.9044, "step": 4544 }, { "epoch": 0.7420105301824416, "grad_norm": 1.8788542747497559, "learning_rate": 1.993679174630871e-05, "loss": 0.8153, "step": 4545 }, { "epoch": 0.7421737888249459, "grad_norm": 1.7516509294509888, "learning_rate": 1.993675611841059e-05, "loss": 0.7019, "step": 4546 }, { "epoch": 0.7423370474674503, "grad_norm": 2.01668119430542, "learning_rate": 1.9936720480506158e-05, "loss": 0.7598, "step": 4547 }, { "epoch": 0.7425003061099547, "grad_norm": 2.1090736389160156, "learning_rate": 1.993668483259545e-05, "loss": 0.9602, "step": 4548 }, { "epoch": 0.7426635647524591, "grad_norm": 2.140662431716919, "learning_rate": 1.9936649174678508e-05, "loss": 0.7182, "step": 4549 }, { "epoch": 0.7428268233949634, "grad_norm": 2.0196878910064697, "learning_rate": 1.9936613506755357e-05, "loss": 0.8091, "step": 4550 }, { "epoch": 0.7429900820374679, "grad_norm": 2.254681348800659, "learning_rate": 1.993657782882604e-05, "loss": 0.9833, "step": 4551 }, { "epoch": 0.7431533406799723, "grad_norm": 1.9238251447677612, "learning_rate": 1.9936542140890595e-05, "loss": 0.8406, "step": 4552 }, { "epoch": 0.7433165993224766, "grad_norm": 1.9392521381378174, "learning_rate": 1.9936506442949054e-05, "loss": 0.7429, "step": 4553 }, { "epoch": 0.7434798579649811, "grad_norm": 1.7947956323623657, "learning_rate": 1.9936470735001448e-05, "loss": 0.7023, "step": 4554 }, { "epoch": 0.7436431166074854, "grad_norm": 5.196544170379639, "learning_rate": 1.9936435017047826e-05, "loss": 0.9003, "step": 4555 }, { "epoch": 0.7438063752499898, "grad_norm": 2.1067655086517334, "learning_rate": 1.9936399289088213e-05, "loss": 0.8855, "step": 4556 }, { "epoch": 0.7439696338924942, "grad_norm": 1.9620591402053833, "learning_rate": 1.993636355112265e-05, "loss": 0.7034, "step": 4557 }, { "epoch": 0.7441328925349986, "grad_norm": 1.7370753288269043, "learning_rate": 1.993632780315117e-05, "loss": 0.6889, "step": 4558 }, { "epoch": 0.7442961511775029, "grad_norm": 1.8719536066055298, "learning_rate": 1.993629204517381e-05, "loss": 0.8508, "step": 4559 }, { "epoch": 0.7444594098200074, "grad_norm": 2.041933536529541, "learning_rate": 1.9936256277190608e-05, "loss": 0.8183, "step": 4560 }, { "epoch": 0.7446226684625117, "grad_norm": 2.0351321697235107, "learning_rate": 1.99362204992016e-05, "loss": 0.8701, "step": 4561 }, { "epoch": 0.7447859271050161, "grad_norm": 1.898887038230896, "learning_rate": 1.993618471120682e-05, "loss": 0.7686, "step": 4562 }, { "epoch": 0.7449491857475206, "grad_norm": 2.1133129596710205, "learning_rate": 1.99361489132063e-05, "loss": 0.7291, "step": 4563 }, { "epoch": 0.7451124443900249, "grad_norm": 1.756273627281189, "learning_rate": 1.9936113105200085e-05, "loss": 0.7525, "step": 4564 }, { "epoch": 0.7452757030325293, "grad_norm": 1.82601797580719, "learning_rate": 1.9936077287188206e-05, "loss": 0.6829, "step": 4565 }, { "epoch": 0.7454389616750337, "grad_norm": 1.9084341526031494, "learning_rate": 1.99360414591707e-05, "loss": 0.9182, "step": 4566 }, { "epoch": 0.7456022203175381, "grad_norm": 1.4208651781082153, "learning_rate": 1.9936005621147604e-05, "loss": 0.5942, "step": 4567 }, { "epoch": 0.7457654789600424, "grad_norm": 1.8099150657653809, "learning_rate": 1.993596977311895e-05, "loss": 0.7734, "step": 4568 }, { "epoch": 0.7459287376025469, "grad_norm": 1.9993034601211548, "learning_rate": 1.993593391508478e-05, "loss": 0.8243, "step": 4569 }, { "epoch": 0.7460919962450512, "grad_norm": 2.488537311553955, "learning_rate": 1.9935898047045126e-05, "loss": 0.8958, "step": 4570 }, { "epoch": 0.7462552548875556, "grad_norm": 2.1328558921813965, "learning_rate": 1.9935862169000023e-05, "loss": 0.9041, "step": 4571 }, { "epoch": 0.74641851353006, "grad_norm": 1.924705982208252, "learning_rate": 1.9935826280949513e-05, "loss": 0.8744, "step": 4572 }, { "epoch": 0.7465817721725644, "grad_norm": 1.9434748888015747, "learning_rate": 1.9935790382893626e-05, "loss": 0.7799, "step": 4573 }, { "epoch": 0.7467450308150688, "grad_norm": 2.048208475112915, "learning_rate": 1.9935754474832403e-05, "loss": 0.8689, "step": 4574 }, { "epoch": 0.7469082894575731, "grad_norm": 2.0442686080932617, "learning_rate": 1.9935718556765878e-05, "loss": 0.8505, "step": 4575 }, { "epoch": 0.7470715481000776, "grad_norm": 2.0635268688201904, "learning_rate": 1.9935682628694085e-05, "loss": 0.8212, "step": 4576 }, { "epoch": 0.7472348067425819, "grad_norm": 2.116694927215576, "learning_rate": 1.9935646690617063e-05, "loss": 0.8075, "step": 4577 }, { "epoch": 0.7473980653850864, "grad_norm": 1.9998581409454346, "learning_rate": 1.9935610742534845e-05, "loss": 0.878, "step": 4578 }, { "epoch": 0.7475613240275907, "grad_norm": 2.115821123123169, "learning_rate": 1.9935574784447473e-05, "loss": 0.6966, "step": 4579 }, { "epoch": 0.7477245826700951, "grad_norm": 1.9248027801513672, "learning_rate": 1.993553881635498e-05, "loss": 0.7794, "step": 4580 }, { "epoch": 0.7478878413125994, "grad_norm": 2.1643948554992676, "learning_rate": 1.9935502838257403e-05, "loss": 0.7905, "step": 4581 }, { "epoch": 0.7480510999551039, "grad_norm": 2.010206699371338, "learning_rate": 1.993546685015477e-05, "loss": 0.7532, "step": 4582 }, { "epoch": 0.7482143585976082, "grad_norm": 1.829881191253662, "learning_rate": 1.9935430852047133e-05, "loss": 0.8127, "step": 4583 }, { "epoch": 0.7483776172401126, "grad_norm": 1.9014230966567993, "learning_rate": 1.9935394843934513e-05, "loss": 0.6807, "step": 4584 }, { "epoch": 0.7485408758826171, "grad_norm": 1.834202766418457, "learning_rate": 1.9935358825816958e-05, "loss": 0.7037, "step": 4585 }, { "epoch": 0.7487041345251214, "grad_norm": 1.898146390914917, "learning_rate": 1.99353227976945e-05, "loss": 0.755, "step": 4586 }, { "epoch": 0.7488673931676258, "grad_norm": 2.0748255252838135, "learning_rate": 1.993528675956717e-05, "loss": 0.8054, "step": 4587 }, { "epoch": 0.7490306518101302, "grad_norm": 1.707938551902771, "learning_rate": 1.9935250711435016e-05, "loss": 0.6984, "step": 4588 }, { "epoch": 0.7491939104526346, "grad_norm": 2.3744587898254395, "learning_rate": 1.9935214653298057e-05, "loss": 0.7162, "step": 4589 }, { "epoch": 0.7493571690951389, "grad_norm": 2.039430856704712, "learning_rate": 1.993517858515635e-05, "loss": 0.8329, "step": 4590 }, { "epoch": 0.7495204277376434, "grad_norm": 2.0659639835357666, "learning_rate": 1.9935142507009914e-05, "loss": 0.9205, "step": 4591 }, { "epoch": 0.7496836863801477, "grad_norm": 2.235739231109619, "learning_rate": 1.9935106418858793e-05, "loss": 0.882, "step": 4592 }, { "epoch": 0.7498469450226521, "grad_norm": 1.8451383113861084, "learning_rate": 1.993507032070302e-05, "loss": 0.7907, "step": 4593 }, { "epoch": 0.7500102036651565, "grad_norm": 1.8548585176467896, "learning_rate": 1.993503421254264e-05, "loss": 0.8617, "step": 4594 }, { "epoch": 0.7501734623076609, "grad_norm": 2.1970179080963135, "learning_rate": 1.993499809437768e-05, "loss": 0.7481, "step": 4595 }, { "epoch": 0.7503367209501653, "grad_norm": 1.9062014818191528, "learning_rate": 1.993496196620818e-05, "loss": 0.6913, "step": 4596 }, { "epoch": 0.7504999795926697, "grad_norm": 1.8940649032592773, "learning_rate": 1.9934925828034174e-05, "loss": 0.8878, "step": 4597 }, { "epoch": 0.7506632382351741, "grad_norm": 1.7794382572174072, "learning_rate": 1.9934889679855706e-05, "loss": 0.8393, "step": 4598 }, { "epoch": 0.7508264968776784, "grad_norm": 1.7150719165802002, "learning_rate": 1.9934853521672802e-05, "loss": 0.8862, "step": 4599 }, { "epoch": 0.7509897555201829, "grad_norm": 2.082970380783081, "learning_rate": 1.99348173534855e-05, "loss": 0.8946, "step": 4600 }, { "epoch": 0.7511530141626872, "grad_norm": 1.7865692377090454, "learning_rate": 1.9934781175293847e-05, "loss": 0.653, "step": 4601 }, { "epoch": 0.7513162728051916, "grad_norm": 2.137843132019043, "learning_rate": 1.993474498709787e-05, "loss": 0.7865, "step": 4602 }, { "epoch": 0.751479531447696, "grad_norm": 2.069361686706543, "learning_rate": 1.9934708788897606e-05, "loss": 0.9781, "step": 4603 }, { "epoch": 0.7516427900902004, "grad_norm": 2.074106454849243, "learning_rate": 1.993467258069309e-05, "loss": 0.7962, "step": 4604 }, { "epoch": 0.7518060487327047, "grad_norm": 1.688706874847412, "learning_rate": 1.9934636362484364e-05, "loss": 0.6754, "step": 4605 }, { "epoch": 0.7519693073752092, "grad_norm": 1.7413018941879272, "learning_rate": 1.9934600134271463e-05, "loss": 0.74, "step": 4606 }, { "epoch": 0.7521325660177136, "grad_norm": 1.9484628438949585, "learning_rate": 1.9934563896054423e-05, "loss": 0.7287, "step": 4607 }, { "epoch": 0.7522958246602179, "grad_norm": 1.9032983779907227, "learning_rate": 1.9934527647833276e-05, "loss": 0.6762, "step": 4608 }, { "epoch": 0.7524590833027224, "grad_norm": 1.8567625284194946, "learning_rate": 1.9934491389608067e-05, "loss": 0.736, "step": 4609 }, { "epoch": 0.7526223419452267, "grad_norm": 2.129917621612549, "learning_rate": 1.9934455121378822e-05, "loss": 0.7626, "step": 4610 }, { "epoch": 0.7527856005877311, "grad_norm": 1.8982903957366943, "learning_rate": 1.9934418843145587e-05, "loss": 0.8415, "step": 4611 }, { "epoch": 0.7529488592302355, "grad_norm": 2.0184431076049805, "learning_rate": 1.9934382554908395e-05, "loss": 0.8828, "step": 4612 }, { "epoch": 0.7531121178727399, "grad_norm": 2.0772407054901123, "learning_rate": 1.9934346256667282e-05, "loss": 0.6562, "step": 4613 }, { "epoch": 0.7532753765152442, "grad_norm": 2.028277635574341, "learning_rate": 1.9934309948422287e-05, "loss": 0.9042, "step": 4614 }, { "epoch": 0.7534386351577487, "grad_norm": 1.9939104318618774, "learning_rate": 1.993427363017344e-05, "loss": 0.7808, "step": 4615 }, { "epoch": 0.753601893800253, "grad_norm": 1.923258662223816, "learning_rate": 1.9934237301920785e-05, "loss": 0.8258, "step": 4616 }, { "epoch": 0.7537651524427574, "grad_norm": 1.8246545791625977, "learning_rate": 1.9934200963664356e-05, "loss": 0.7033, "step": 4617 }, { "epoch": 0.7539284110852619, "grad_norm": 2.1785573959350586, "learning_rate": 1.993416461540419e-05, "loss": 0.8981, "step": 4618 }, { "epoch": 0.7540916697277662, "grad_norm": 2.1557085514068604, "learning_rate": 1.993412825714032e-05, "loss": 0.9544, "step": 4619 }, { "epoch": 0.7542549283702706, "grad_norm": 2.243162155151367, "learning_rate": 1.9934091888872785e-05, "loss": 0.8437, "step": 4620 }, { "epoch": 0.754418187012775, "grad_norm": 1.753284215927124, "learning_rate": 1.9934055510601625e-05, "loss": 0.7302, "step": 4621 }, { "epoch": 0.7545814456552794, "grad_norm": 1.8497236967086792, "learning_rate": 1.9934019122326873e-05, "loss": 0.7348, "step": 4622 }, { "epoch": 0.7547447042977837, "grad_norm": 1.950338363647461, "learning_rate": 1.9933982724048568e-05, "loss": 0.673, "step": 4623 }, { "epoch": 0.7549079629402882, "grad_norm": 1.8835318088531494, "learning_rate": 1.9933946315766742e-05, "loss": 0.7368, "step": 4624 }, { "epoch": 0.7550712215827925, "grad_norm": 2.0387613773345947, "learning_rate": 1.9933909897481434e-05, "loss": 0.7548, "step": 4625 }, { "epoch": 0.7552344802252969, "grad_norm": 1.9279372692108154, "learning_rate": 1.9933873469192683e-05, "loss": 0.7164, "step": 4626 }, { "epoch": 0.7553977388678013, "grad_norm": 2.3233654499053955, "learning_rate": 1.993383703090052e-05, "loss": 0.8276, "step": 4627 }, { "epoch": 0.7555609975103057, "grad_norm": 2.139484167098999, "learning_rate": 1.9933800582604994e-05, "loss": 0.8029, "step": 4628 }, { "epoch": 0.7557242561528101, "grad_norm": 1.6661276817321777, "learning_rate": 1.993376412430613e-05, "loss": 0.5543, "step": 4629 }, { "epoch": 0.7558875147953145, "grad_norm": 2.394298553466797, "learning_rate": 1.9933727656003964e-05, "loss": 0.9944, "step": 4630 }, { "epoch": 0.7560507734378189, "grad_norm": 2.443758249282837, "learning_rate": 1.993369117769854e-05, "loss": 1.0998, "step": 4631 }, { "epoch": 0.7562140320803232, "grad_norm": 1.943636178970337, "learning_rate": 1.9933654689389893e-05, "loss": 0.7691, "step": 4632 }, { "epoch": 0.7563772907228277, "grad_norm": 2.244948387145996, "learning_rate": 1.993361819107806e-05, "loss": 1.0348, "step": 4633 }, { "epoch": 0.756540549365332, "grad_norm": 2.144317626953125, "learning_rate": 1.993358168276307e-05, "loss": 0.7825, "step": 4634 }, { "epoch": 0.7567038080078364, "grad_norm": 2.0784707069396973, "learning_rate": 1.9933545164444973e-05, "loss": 0.7839, "step": 4635 }, { "epoch": 0.7568670666503408, "grad_norm": 1.8936043977737427, "learning_rate": 1.9933508636123793e-05, "loss": 0.6154, "step": 4636 }, { "epoch": 0.7570303252928452, "grad_norm": 1.9787250757217407, "learning_rate": 1.9933472097799574e-05, "loss": 0.878, "step": 4637 }, { "epoch": 0.7571935839353495, "grad_norm": 2.2274301052093506, "learning_rate": 1.9933435549472354e-05, "loss": 0.902, "step": 4638 }, { "epoch": 0.757356842577854, "grad_norm": 2.0145561695098877, "learning_rate": 1.993339899114216e-05, "loss": 0.8602, "step": 4639 }, { "epoch": 0.7575201012203584, "grad_norm": 1.7700239419937134, "learning_rate": 1.9933362422809043e-05, "loss": 0.7816, "step": 4640 }, { "epoch": 0.7576833598628627, "grad_norm": 1.8755545616149902, "learning_rate": 1.993332584447303e-05, "loss": 0.7278, "step": 4641 }, { "epoch": 0.7578466185053672, "grad_norm": 1.8476940393447876, "learning_rate": 1.9933289256134162e-05, "loss": 0.764, "step": 4642 }, { "epoch": 0.7580098771478715, "grad_norm": 1.626434564590454, "learning_rate": 1.993325265779247e-05, "loss": 0.5623, "step": 4643 }, { "epoch": 0.7581731357903759, "grad_norm": 1.8222821950912476, "learning_rate": 1.9933216049448003e-05, "loss": 0.7164, "step": 4644 }, { "epoch": 0.7583363944328803, "grad_norm": 1.6290500164031982, "learning_rate": 1.9933179431100783e-05, "loss": 0.5175, "step": 4645 }, { "epoch": 0.7584996530753847, "grad_norm": 1.8754215240478516, "learning_rate": 1.9933142802750856e-05, "loss": 0.7563, "step": 4646 }, { "epoch": 0.758662911717889, "grad_norm": 1.8576394319534302, "learning_rate": 1.9933106164398257e-05, "loss": 0.7555, "step": 4647 }, { "epoch": 0.7588261703603935, "grad_norm": 1.7232359647750854, "learning_rate": 1.993306951604302e-05, "loss": 0.612, "step": 4648 }, { "epoch": 0.7589894290028978, "grad_norm": 1.851973295211792, "learning_rate": 1.9933032857685187e-05, "loss": 0.7261, "step": 4649 }, { "epoch": 0.7591526876454022, "grad_norm": 1.763386607170105, "learning_rate": 1.9932996189324796e-05, "loss": 0.7674, "step": 4650 }, { "epoch": 0.7593159462879067, "grad_norm": 1.9506514072418213, "learning_rate": 1.9932959510961877e-05, "loss": 0.7898, "step": 4651 }, { "epoch": 0.759479204930411, "grad_norm": 1.608551263809204, "learning_rate": 1.993292282259647e-05, "loss": 0.6283, "step": 4652 }, { "epoch": 0.7596424635729154, "grad_norm": 1.7226485013961792, "learning_rate": 1.9932886124228615e-05, "loss": 0.6519, "step": 4653 }, { "epoch": 0.7598057222154198, "grad_norm": 2.018169641494751, "learning_rate": 1.9932849415858344e-05, "loss": 0.6938, "step": 4654 }, { "epoch": 0.7599689808579242, "grad_norm": 2.0303313732147217, "learning_rate": 1.9932812697485695e-05, "loss": 0.9363, "step": 4655 }, { "epoch": 0.7601322395004285, "grad_norm": 1.918703556060791, "learning_rate": 1.993277596911071e-05, "loss": 0.8338, "step": 4656 }, { "epoch": 0.760295498142933, "grad_norm": 2.071023464202881, "learning_rate": 1.993273923073342e-05, "loss": 0.8917, "step": 4657 }, { "epoch": 0.7604587567854373, "grad_norm": 1.9688845872879028, "learning_rate": 1.9932702482353864e-05, "loss": 0.7896, "step": 4658 }, { "epoch": 0.7606220154279417, "grad_norm": 1.9989173412322998, "learning_rate": 1.993266572397208e-05, "loss": 0.8769, "step": 4659 }, { "epoch": 0.760785274070446, "grad_norm": 2.200631856918335, "learning_rate": 1.9932628955588103e-05, "loss": 0.7868, "step": 4660 }, { "epoch": 0.7609485327129505, "grad_norm": 2.1566972732543945, "learning_rate": 1.9932592177201974e-05, "loss": 0.7045, "step": 4661 }, { "epoch": 0.7611117913554549, "grad_norm": 1.9910906553268433, "learning_rate": 1.9932555388813727e-05, "loss": 0.8742, "step": 4662 }, { "epoch": 0.7612750499979593, "grad_norm": 1.8481934070587158, "learning_rate": 1.9932518590423396e-05, "loss": 0.737, "step": 4663 }, { "epoch": 0.7614383086404637, "grad_norm": 2.0628092288970947, "learning_rate": 1.9932481782031023e-05, "loss": 0.8166, "step": 4664 }, { "epoch": 0.761601567282968, "grad_norm": 2.189767360687256, "learning_rate": 1.9932444963636644e-05, "loss": 0.869, "step": 4665 }, { "epoch": 0.7617648259254725, "grad_norm": 2.3621294498443604, "learning_rate": 1.9932408135240297e-05, "loss": 0.9071, "step": 4666 }, { "epoch": 0.7619280845679768, "grad_norm": 1.9231842756271362, "learning_rate": 1.9932371296842015e-05, "loss": 0.8447, "step": 4667 }, { "epoch": 0.7620913432104812, "grad_norm": 1.7949057817459106, "learning_rate": 1.993233444844184e-05, "loss": 0.7444, "step": 4668 }, { "epoch": 0.7622546018529855, "grad_norm": 1.9224872589111328, "learning_rate": 1.9932297590039804e-05, "loss": 0.8879, "step": 4669 }, { "epoch": 0.76241786049549, "grad_norm": 1.7565805912017822, "learning_rate": 1.9932260721635946e-05, "loss": 0.6991, "step": 4670 }, { "epoch": 0.7625811191379943, "grad_norm": 2.1124050617218018, "learning_rate": 1.993222384323031e-05, "loss": 0.9798, "step": 4671 }, { "epoch": 0.7627443777804988, "grad_norm": 1.6439214944839478, "learning_rate": 1.993218695482292e-05, "loss": 0.6485, "step": 4672 }, { "epoch": 0.7629076364230032, "grad_norm": 2.0205078125, "learning_rate": 1.993215005641383e-05, "loss": 0.872, "step": 4673 }, { "epoch": 0.7630708950655075, "grad_norm": 1.8556334972381592, "learning_rate": 1.9932113148003057e-05, "loss": 0.686, "step": 4674 }, { "epoch": 0.763234153708012, "grad_norm": 1.7366294860839844, "learning_rate": 1.9932076229590655e-05, "loss": 0.812, "step": 4675 }, { "epoch": 0.7633974123505163, "grad_norm": 1.9102355241775513, "learning_rate": 1.9932039301176654e-05, "loss": 0.8177, "step": 4676 }, { "epoch": 0.7635606709930207, "grad_norm": 1.694512128829956, "learning_rate": 1.993200236276109e-05, "loss": 0.6744, "step": 4677 }, { "epoch": 0.763723929635525, "grad_norm": 2.0839977264404297, "learning_rate": 1.9931965414344004e-05, "loss": 0.8284, "step": 4678 }, { "epoch": 0.7638871882780295, "grad_norm": 1.9487155675888062, "learning_rate": 1.9931928455925433e-05, "loss": 0.7822, "step": 4679 }, { "epoch": 0.7640504469205338, "grad_norm": 1.6613975763320923, "learning_rate": 1.993189148750541e-05, "loss": 0.564, "step": 4680 }, { "epoch": 0.7642137055630382, "grad_norm": 2.0825307369232178, "learning_rate": 1.9931854509083975e-05, "loss": 0.7952, "step": 4681 }, { "epoch": 0.7643769642055427, "grad_norm": 1.6338077783584595, "learning_rate": 1.9931817520661165e-05, "loss": 0.6926, "step": 4682 }, { "epoch": 0.764540222848047, "grad_norm": 1.9700241088867188, "learning_rate": 1.9931780522237018e-05, "loss": 0.8455, "step": 4683 }, { "epoch": 0.7647034814905515, "grad_norm": 2.0447447299957275, "learning_rate": 1.9931743513811573e-05, "loss": 0.8625, "step": 4684 }, { "epoch": 0.7648667401330558, "grad_norm": 1.913874864578247, "learning_rate": 1.9931706495384865e-05, "loss": 0.7448, "step": 4685 }, { "epoch": 0.7650299987755602, "grad_norm": 2.4198153018951416, "learning_rate": 1.9931669466956927e-05, "loss": 0.8871, "step": 4686 }, { "epoch": 0.7651932574180645, "grad_norm": 2.0204646587371826, "learning_rate": 1.9931632428527803e-05, "loss": 0.8744, "step": 4687 }, { "epoch": 0.765356516060569, "grad_norm": 1.941763997077942, "learning_rate": 1.9931595380097524e-05, "loss": 0.8268, "step": 4688 }, { "epoch": 0.7655197747030733, "grad_norm": 1.6127785444259644, "learning_rate": 1.9931558321666134e-05, "loss": 0.6734, "step": 4689 }, { "epoch": 0.7656830333455777, "grad_norm": 1.7478084564208984, "learning_rate": 1.993152125323367e-05, "loss": 0.728, "step": 4690 }, { "epoch": 0.7658462919880821, "grad_norm": 2.241806745529175, "learning_rate": 1.9931484174800163e-05, "loss": 0.8486, "step": 4691 }, { "epoch": 0.7660095506305865, "grad_norm": 2.177450180053711, "learning_rate": 1.9931447086365657e-05, "loss": 0.8279, "step": 4692 }, { "epoch": 0.766172809273091, "grad_norm": 2.2248799800872803, "learning_rate": 1.9931409987930185e-05, "loss": 1.0418, "step": 4693 }, { "epoch": 0.7663360679155953, "grad_norm": 1.9920138120651245, "learning_rate": 1.9931372879493788e-05, "loss": 0.5998, "step": 4694 }, { "epoch": 0.7664993265580997, "grad_norm": 1.8563570976257324, "learning_rate": 1.9931335761056497e-05, "loss": 0.8525, "step": 4695 }, { "epoch": 0.766662585200604, "grad_norm": 1.9834612607955933, "learning_rate": 1.9931298632618355e-05, "loss": 0.7254, "step": 4696 }, { "epoch": 0.7668258438431085, "grad_norm": 1.8087358474731445, "learning_rate": 1.9931261494179398e-05, "loss": 0.7608, "step": 4697 }, { "epoch": 0.7669891024856128, "grad_norm": 2.1756250858306885, "learning_rate": 1.9931224345739664e-05, "loss": 0.7805, "step": 4698 }, { "epoch": 0.7671523611281172, "grad_norm": 2.1738898754119873, "learning_rate": 1.993118718729919e-05, "loss": 0.7425, "step": 4699 }, { "epoch": 0.7673156197706216, "grad_norm": 1.6675218343734741, "learning_rate": 1.9931150018858013e-05, "loss": 0.7173, "step": 4700 }, { "epoch": 0.767478878413126, "grad_norm": 1.959957480430603, "learning_rate": 1.993111284041617e-05, "loss": 0.9401, "step": 4701 }, { "epoch": 0.7676421370556303, "grad_norm": 2.18278431892395, "learning_rate": 1.99310756519737e-05, "loss": 0.7895, "step": 4702 }, { "epoch": 0.7678053956981348, "grad_norm": 1.9910626411437988, "learning_rate": 1.993103845353064e-05, "loss": 0.8391, "step": 4703 }, { "epoch": 0.7679686543406392, "grad_norm": 1.7729636430740356, "learning_rate": 1.9931001245087024e-05, "loss": 0.7613, "step": 4704 }, { "epoch": 0.7681319129831435, "grad_norm": 2.279572010040283, "learning_rate": 1.993096402664289e-05, "loss": 0.7986, "step": 4705 }, { "epoch": 0.768295171625648, "grad_norm": 1.9377883672714233, "learning_rate": 1.9930926798198286e-05, "loss": 0.8515, "step": 4706 }, { "epoch": 0.7684584302681523, "grad_norm": 1.964151382446289, "learning_rate": 1.9930889559753235e-05, "loss": 0.6435, "step": 4707 }, { "epoch": 0.7686216889106567, "grad_norm": 2.023728370666504, "learning_rate": 1.993085231130778e-05, "loss": 0.7854, "step": 4708 }, { "epoch": 0.7687849475531611, "grad_norm": 1.7447378635406494, "learning_rate": 1.9930815052861964e-05, "loss": 0.6915, "step": 4709 }, { "epoch": 0.7689482061956655, "grad_norm": 2.0710904598236084, "learning_rate": 1.993077778441582e-05, "loss": 0.8041, "step": 4710 }, { "epoch": 0.7691114648381698, "grad_norm": 1.5173877477645874, "learning_rate": 1.9930740505969383e-05, "loss": 0.6818, "step": 4711 }, { "epoch": 0.7692747234806743, "grad_norm": 1.8935774564743042, "learning_rate": 1.9930703217522693e-05, "loss": 0.7381, "step": 4712 }, { "epoch": 0.7694379821231786, "grad_norm": 1.7899399995803833, "learning_rate": 1.993066591907579e-05, "loss": 0.6607, "step": 4713 }, { "epoch": 0.769601240765683, "grad_norm": 2.166910171508789, "learning_rate": 1.9930628610628703e-05, "loss": 0.7556, "step": 4714 }, { "epoch": 0.7697644994081875, "grad_norm": 1.7777835130691528, "learning_rate": 1.993059129218148e-05, "loss": 0.6343, "step": 4715 }, { "epoch": 0.7699277580506918, "grad_norm": 1.8069733381271362, "learning_rate": 1.9930553963734155e-05, "loss": 0.8172, "step": 4716 }, { "epoch": 0.7700910166931962, "grad_norm": 2.0318892002105713, "learning_rate": 1.9930516625286764e-05, "loss": 0.7681, "step": 4717 }, { "epoch": 0.7702542753357006, "grad_norm": 2.02823543548584, "learning_rate": 1.9930479276839347e-05, "loss": 0.8, "step": 4718 }, { "epoch": 0.770417533978205, "grad_norm": 1.9297562837600708, "learning_rate": 1.9930441918391933e-05, "loss": 0.7559, "step": 4719 }, { "epoch": 0.7705807926207093, "grad_norm": 2.3635354042053223, "learning_rate": 1.9930404549944577e-05, "loss": 0.9671, "step": 4720 }, { "epoch": 0.7707440512632138, "grad_norm": 2.5022261142730713, "learning_rate": 1.99303671714973e-05, "loss": 0.8454, "step": 4721 }, { "epoch": 0.7709073099057181, "grad_norm": 2.095069169998169, "learning_rate": 1.9930329783050146e-05, "loss": 0.7869, "step": 4722 }, { "epoch": 0.7710705685482225, "grad_norm": 2.1346852779388428, "learning_rate": 1.9930292384603153e-05, "loss": 1.0629, "step": 4723 }, { "epoch": 0.7712338271907269, "grad_norm": 1.9955021142959595, "learning_rate": 1.993025497615636e-05, "loss": 0.8815, "step": 4724 }, { "epoch": 0.7713970858332313, "grad_norm": 1.7011231184005737, "learning_rate": 1.99302175577098e-05, "loss": 0.6644, "step": 4725 }, { "epoch": 0.7715603444757357, "grad_norm": 1.6701023578643799, "learning_rate": 1.9930180129263516e-05, "loss": 0.7036, "step": 4726 }, { "epoch": 0.7717236031182401, "grad_norm": 2.1168112754821777, "learning_rate": 1.993014269081754e-05, "loss": 0.8638, "step": 4727 }, { "epoch": 0.7718868617607445, "grad_norm": 1.7615264654159546, "learning_rate": 1.9930105242371916e-05, "loss": 0.6784, "step": 4728 }, { "epoch": 0.7720501204032488, "grad_norm": 1.9493588209152222, "learning_rate": 1.9930067783926676e-05, "loss": 0.7227, "step": 4729 }, { "epoch": 0.7722133790457533, "grad_norm": 2.3278698921203613, "learning_rate": 1.9930030315481862e-05, "loss": 0.8652, "step": 4730 }, { "epoch": 0.7723766376882576, "grad_norm": 2.0557870864868164, "learning_rate": 1.992999283703751e-05, "loss": 0.7313, "step": 4731 }, { "epoch": 0.772539896330762, "grad_norm": 1.932987928390503, "learning_rate": 1.992995534859366e-05, "loss": 0.9973, "step": 4732 }, { "epoch": 0.7727031549732664, "grad_norm": 2.2142040729522705, "learning_rate": 1.9929917850150344e-05, "loss": 0.8792, "step": 4733 }, { "epoch": 0.7728664136157708, "grad_norm": 2.124579906463623, "learning_rate": 1.9929880341707605e-05, "loss": 0.9059, "step": 4734 }, { "epoch": 0.7730296722582751, "grad_norm": 1.956519365310669, "learning_rate": 1.992984282326548e-05, "loss": 0.8621, "step": 4735 }, { "epoch": 0.7731929309007796, "grad_norm": 1.8126347064971924, "learning_rate": 1.9929805294824004e-05, "loss": 0.7112, "step": 4736 }, { "epoch": 0.773356189543284, "grad_norm": 2.1600191593170166, "learning_rate": 1.9929767756383217e-05, "loss": 0.9415, "step": 4737 }, { "epoch": 0.7735194481857883, "grad_norm": 2.1210319995880127, "learning_rate": 1.992973020794316e-05, "loss": 0.7454, "step": 4738 }, { "epoch": 0.7736827068282928, "grad_norm": 2.0887176990509033, "learning_rate": 1.9929692649503866e-05, "loss": 0.9213, "step": 4739 }, { "epoch": 0.7738459654707971, "grad_norm": 2.2875921726226807, "learning_rate": 1.992965508106537e-05, "loss": 1.0257, "step": 4740 }, { "epoch": 0.7740092241133015, "grad_norm": 1.7170697450637817, "learning_rate": 1.9929617502627717e-05, "loss": 0.7716, "step": 4741 }, { "epoch": 0.7741724827558059, "grad_norm": 2.331270933151245, "learning_rate": 1.9929579914190943e-05, "loss": 1.0241, "step": 4742 }, { "epoch": 0.7743357413983103, "grad_norm": 1.8073878288269043, "learning_rate": 1.9929542315755083e-05, "loss": 0.7575, "step": 4743 }, { "epoch": 0.7744990000408146, "grad_norm": 2.0143723487854004, "learning_rate": 1.9929504707320176e-05, "loss": 0.9307, "step": 4744 }, { "epoch": 0.7746622586833191, "grad_norm": 1.470751404762268, "learning_rate": 1.992946708888626e-05, "loss": 0.625, "step": 4745 }, { "epoch": 0.7748255173258234, "grad_norm": 2.1794958114624023, "learning_rate": 1.9929429460453377e-05, "loss": 0.9388, "step": 4746 }, { "epoch": 0.7749887759683278, "grad_norm": 2.0387630462646484, "learning_rate": 1.9929391822021556e-05, "loss": 0.8402, "step": 4747 }, { "epoch": 0.7751520346108323, "grad_norm": 2.002237319946289, "learning_rate": 1.9929354173590844e-05, "loss": 0.8698, "step": 4748 }, { "epoch": 0.7753152932533366, "grad_norm": 1.8765798807144165, "learning_rate": 1.9929316515161274e-05, "loss": 0.8289, "step": 4749 }, { "epoch": 0.775478551895841, "grad_norm": 2.0631752014160156, "learning_rate": 1.9929278846732883e-05, "loss": 0.7836, "step": 4750 }, { "epoch": 0.7756418105383454, "grad_norm": 1.8475792407989502, "learning_rate": 1.9929241168305715e-05, "loss": 0.7923, "step": 4751 }, { "epoch": 0.7758050691808498, "grad_norm": 1.8382411003112793, "learning_rate": 1.9929203479879798e-05, "loss": 0.7335, "step": 4752 }, { "epoch": 0.7759683278233541, "grad_norm": 2.214232921600342, "learning_rate": 1.992916578145518e-05, "loss": 0.7798, "step": 4753 }, { "epoch": 0.7761315864658586, "grad_norm": 1.7648773193359375, "learning_rate": 1.9929128073031894e-05, "loss": 0.7423, "step": 4754 }, { "epoch": 0.7762948451083629, "grad_norm": 1.9422231912612915, "learning_rate": 1.992909035460998e-05, "loss": 0.7501, "step": 4755 }, { "epoch": 0.7764581037508673, "grad_norm": 1.9625016450881958, "learning_rate": 1.992905262618947e-05, "loss": 0.7894, "step": 4756 }, { "epoch": 0.7766213623933717, "grad_norm": 1.9452979564666748, "learning_rate": 1.992901488777041e-05, "loss": 1.0352, "step": 4757 }, { "epoch": 0.7767846210358761, "grad_norm": 2.0180585384368896, "learning_rate": 1.9928977139352836e-05, "loss": 0.9555, "step": 4758 }, { "epoch": 0.7769478796783805, "grad_norm": 2.206775665283203, "learning_rate": 1.992893938093678e-05, "loss": 0.9121, "step": 4759 }, { "epoch": 0.7771111383208849, "grad_norm": 2.0719492435455322, "learning_rate": 1.992890161252229e-05, "loss": 1.0306, "step": 4760 }, { "epoch": 0.7772743969633893, "grad_norm": 1.9695645570755005, "learning_rate": 1.9928863834109397e-05, "loss": 0.7072, "step": 4761 }, { "epoch": 0.7774376556058936, "grad_norm": 2.0175983905792236, "learning_rate": 1.9928826045698138e-05, "loss": 0.7949, "step": 4762 }, { "epoch": 0.7776009142483981, "grad_norm": 1.643094539642334, "learning_rate": 1.9928788247288557e-05, "loss": 0.8299, "step": 4763 }, { "epoch": 0.7777641728909024, "grad_norm": 1.770142674446106, "learning_rate": 1.9928750438880687e-05, "loss": 0.6598, "step": 4764 }, { "epoch": 0.7779274315334068, "grad_norm": 2.1587982177734375, "learning_rate": 1.992871262047457e-05, "loss": 0.8087, "step": 4765 }, { "epoch": 0.7780906901759111, "grad_norm": 1.7740154266357422, "learning_rate": 1.992867479207024e-05, "loss": 0.8044, "step": 4766 }, { "epoch": 0.7782539488184156, "grad_norm": 1.7717618942260742, "learning_rate": 1.9928636953667734e-05, "loss": 0.7726, "step": 4767 }, { "epoch": 0.7784172074609199, "grad_norm": 2.0453851222991943, "learning_rate": 1.9928599105267098e-05, "loss": 0.8473, "step": 4768 }, { "epoch": 0.7785804661034244, "grad_norm": 1.8253381252288818, "learning_rate": 1.9928561246868367e-05, "loss": 0.6764, "step": 4769 }, { "epoch": 0.7787437247459288, "grad_norm": 1.5525867938995361, "learning_rate": 1.9928523378471573e-05, "loss": 0.7257, "step": 4770 }, { "epoch": 0.7789069833884331, "grad_norm": 2.183997631072998, "learning_rate": 1.992848550007676e-05, "loss": 0.8833, "step": 4771 }, { "epoch": 0.7790702420309376, "grad_norm": 2.07392954826355, "learning_rate": 1.9928447611683964e-05, "loss": 1.0782, "step": 4772 }, { "epoch": 0.7792335006734419, "grad_norm": 1.9873499870300293, "learning_rate": 1.9928409713293226e-05, "loss": 0.7835, "step": 4773 }, { "epoch": 0.7793967593159463, "grad_norm": 1.7642195224761963, "learning_rate": 1.992837180490458e-05, "loss": 0.6764, "step": 4774 }, { "epoch": 0.7795600179584506, "grad_norm": 1.6348297595977783, "learning_rate": 1.992833388651807e-05, "loss": 0.7414, "step": 4775 }, { "epoch": 0.7797232766009551, "grad_norm": 2.057547092437744, "learning_rate": 1.9928295958133726e-05, "loss": 0.7635, "step": 4776 }, { "epoch": 0.7798865352434594, "grad_norm": 2.053492784500122, "learning_rate": 1.992825801975159e-05, "loss": 0.7378, "step": 4777 }, { "epoch": 0.7800497938859638, "grad_norm": 1.648028016090393, "learning_rate": 1.9928220071371706e-05, "loss": 0.7207, "step": 4778 }, { "epoch": 0.7802130525284682, "grad_norm": 1.8631021976470947, "learning_rate": 1.9928182112994105e-05, "loss": 0.782, "step": 4779 }, { "epoch": 0.7803763111709726, "grad_norm": 1.5458502769470215, "learning_rate": 1.9928144144618824e-05, "loss": 0.6672, "step": 4780 }, { "epoch": 0.780539569813477, "grad_norm": 1.768155574798584, "learning_rate": 1.9928106166245906e-05, "loss": 0.7477, "step": 4781 }, { "epoch": 0.7807028284559814, "grad_norm": 2.1225619316101074, "learning_rate": 1.992806817787539e-05, "loss": 0.9158, "step": 4782 }, { "epoch": 0.7808660870984858, "grad_norm": 2.369610071182251, "learning_rate": 1.992803017950731e-05, "loss": 0.9061, "step": 4783 }, { "epoch": 0.7810293457409901, "grad_norm": 2.3653757572174072, "learning_rate": 1.9927992171141707e-05, "loss": 0.9392, "step": 4784 }, { "epoch": 0.7811926043834946, "grad_norm": 1.8838850259780884, "learning_rate": 1.9927954152778618e-05, "loss": 0.8839, "step": 4785 }, { "epoch": 0.7813558630259989, "grad_norm": 1.7974094152450562, "learning_rate": 1.9927916124418084e-05, "loss": 0.7637, "step": 4786 }, { "epoch": 0.7815191216685033, "grad_norm": 1.8508896827697754, "learning_rate": 1.9927878086060136e-05, "loss": 0.7516, "step": 4787 }, { "epoch": 0.7816823803110077, "grad_norm": 1.7573643922805786, "learning_rate": 1.9927840037704823e-05, "loss": 0.8523, "step": 4788 }, { "epoch": 0.7818456389535121, "grad_norm": 1.8613853454589844, "learning_rate": 1.9927801979352174e-05, "loss": 0.8423, "step": 4789 }, { "epoch": 0.7820088975960164, "grad_norm": 1.8870909214019775, "learning_rate": 1.9927763911002232e-05, "loss": 0.9155, "step": 4790 }, { "epoch": 0.7821721562385209, "grad_norm": 1.7808561325073242, "learning_rate": 1.9927725832655035e-05, "loss": 0.6944, "step": 4791 }, { "epoch": 0.7823354148810253, "grad_norm": 1.8420054912567139, "learning_rate": 1.992768774431062e-05, "loss": 0.7751, "step": 4792 }, { "epoch": 0.7824986735235296, "grad_norm": 1.9556150436401367, "learning_rate": 1.9927649645969026e-05, "loss": 0.7477, "step": 4793 }, { "epoch": 0.7826619321660341, "grad_norm": 2.1239070892333984, "learning_rate": 1.9927611537630293e-05, "loss": 0.7029, "step": 4794 }, { "epoch": 0.7828251908085384, "grad_norm": 2.495624542236328, "learning_rate": 1.9927573419294456e-05, "loss": 0.8544, "step": 4795 }, { "epoch": 0.7829884494510428, "grad_norm": 1.9147332906723022, "learning_rate": 1.9927535290961558e-05, "loss": 0.8788, "step": 4796 }, { "epoch": 0.7831517080935472, "grad_norm": 1.638369083404541, "learning_rate": 1.992749715263163e-05, "loss": 0.6212, "step": 4797 }, { "epoch": 0.7833149667360516, "grad_norm": 1.7015105485916138, "learning_rate": 1.992745900430472e-05, "loss": 0.6655, "step": 4798 }, { "epoch": 0.7834782253785559, "grad_norm": 1.9030895233154297, "learning_rate": 1.9927420845980857e-05, "loss": 0.8161, "step": 4799 }, { "epoch": 0.7836414840210604, "grad_norm": 1.6917569637298584, "learning_rate": 1.992738267766009e-05, "loss": 0.8033, "step": 4800 }, { "epoch": 0.7838047426635647, "grad_norm": 1.9744513034820557, "learning_rate": 1.992734449934244e-05, "loss": 0.8762, "step": 4801 }, { "epoch": 0.7839680013060691, "grad_norm": 2.2937164306640625, "learning_rate": 1.992730631102797e-05, "loss": 1.0305, "step": 4802 }, { "epoch": 0.7841312599485736, "grad_norm": 2.205116033554077, "learning_rate": 1.9927268112716698e-05, "loss": 0.8747, "step": 4803 }, { "epoch": 0.7842945185910779, "grad_norm": 1.6136376857757568, "learning_rate": 1.992722990440867e-05, "loss": 0.673, "step": 4804 }, { "epoch": 0.7844577772335823, "grad_norm": 1.7520679235458374, "learning_rate": 1.9927191686103924e-05, "loss": 0.8504, "step": 4805 }, { "epoch": 0.7846210358760867, "grad_norm": 1.8477044105529785, "learning_rate": 1.99271534578025e-05, "loss": 0.8235, "step": 4806 }, { "epoch": 0.7847842945185911, "grad_norm": 1.7849783897399902, "learning_rate": 1.9927115219504433e-05, "loss": 0.9249, "step": 4807 }, { "epoch": 0.7849475531610954, "grad_norm": 1.8515146970748901, "learning_rate": 1.9927076971209765e-05, "loss": 0.7988, "step": 4808 }, { "epoch": 0.7851108118035999, "grad_norm": 2.2251012325286865, "learning_rate": 1.9927038712918532e-05, "loss": 0.825, "step": 4809 }, { "epoch": 0.7852740704461042, "grad_norm": 1.7487651109695435, "learning_rate": 1.9927000444630776e-05, "loss": 0.9116, "step": 4810 }, { "epoch": 0.7854373290886086, "grad_norm": 2.046638250350952, "learning_rate": 1.992696216634653e-05, "loss": 0.7812, "step": 4811 }, { "epoch": 0.785600587731113, "grad_norm": 2.1835498809814453, "learning_rate": 1.992692387806584e-05, "loss": 0.8792, "step": 4812 }, { "epoch": 0.7857638463736174, "grad_norm": 1.9345844984054565, "learning_rate": 1.9926885579788736e-05, "loss": 0.7299, "step": 4813 }, { "epoch": 0.7859271050161218, "grad_norm": 1.4853423833847046, "learning_rate": 1.9926847271515265e-05, "loss": 0.7281, "step": 4814 }, { "epoch": 0.7860903636586262, "grad_norm": 1.7897347211837769, "learning_rate": 1.9926808953245457e-05, "loss": 0.7749, "step": 4815 }, { "epoch": 0.7862536223011306, "grad_norm": 2.2011101245880127, "learning_rate": 1.992677062497936e-05, "loss": 0.9332, "step": 4816 }, { "epoch": 0.7864168809436349, "grad_norm": 1.8387035131454468, "learning_rate": 1.9926732286717005e-05, "loss": 0.732, "step": 4817 }, { "epoch": 0.7865801395861394, "grad_norm": 1.8893117904663086, "learning_rate": 1.9926693938458432e-05, "loss": 0.8071, "step": 4818 }, { "epoch": 0.7867433982286437, "grad_norm": 2.084770679473877, "learning_rate": 1.992665558020368e-05, "loss": 0.9329, "step": 4819 }, { "epoch": 0.7869066568711481, "grad_norm": 2.0545647144317627, "learning_rate": 1.9926617211952793e-05, "loss": 0.8625, "step": 4820 }, { "epoch": 0.7870699155136525, "grad_norm": 1.4702197313308716, "learning_rate": 1.9926578833705802e-05, "loss": 0.6962, "step": 4821 }, { "epoch": 0.7872331741561569, "grad_norm": 2.0203728675842285, "learning_rate": 1.992654044546275e-05, "loss": 0.74, "step": 4822 }, { "epoch": 0.7873964327986612, "grad_norm": 1.8938812017440796, "learning_rate": 1.9926502047223674e-05, "loss": 0.8473, "step": 4823 }, { "epoch": 0.7875596914411657, "grad_norm": 2.1865949630737305, "learning_rate": 1.992646363898861e-05, "loss": 0.7746, "step": 4824 }, { "epoch": 0.7877229500836701, "grad_norm": 1.8459864854812622, "learning_rate": 1.9926425220757607e-05, "loss": 0.786, "step": 4825 }, { "epoch": 0.7878862087261744, "grad_norm": 2.072862386703491, "learning_rate": 1.992638679253069e-05, "loss": 0.9947, "step": 4826 }, { "epoch": 0.7880494673686789, "grad_norm": 2.2285473346710205, "learning_rate": 1.9926348354307906e-05, "loss": 0.7423, "step": 4827 }, { "epoch": 0.7882127260111832, "grad_norm": 1.7071491479873657, "learning_rate": 1.992630990608929e-05, "loss": 0.7249, "step": 4828 }, { "epoch": 0.7883759846536876, "grad_norm": 2.0336320400238037, "learning_rate": 1.9926271447874885e-05, "loss": 0.7545, "step": 4829 }, { "epoch": 0.788539243296192, "grad_norm": 1.8453136682510376, "learning_rate": 1.9926232979664727e-05, "loss": 0.7947, "step": 4830 }, { "epoch": 0.7887025019386964, "grad_norm": 1.8886222839355469, "learning_rate": 1.9926194501458856e-05, "loss": 0.8197, "step": 4831 }, { "epoch": 0.7888657605812007, "grad_norm": 2.2592084407806396, "learning_rate": 1.992615601325731e-05, "loss": 0.8515, "step": 4832 }, { "epoch": 0.7890290192237052, "grad_norm": 2.0957882404327393, "learning_rate": 1.9926117515060124e-05, "loss": 0.844, "step": 4833 }, { "epoch": 0.7891922778662095, "grad_norm": 2.1119346618652344, "learning_rate": 1.992607900686734e-05, "loss": 0.9145, "step": 4834 }, { "epoch": 0.7893555365087139, "grad_norm": 1.541168212890625, "learning_rate": 1.9926040488679e-05, "loss": 0.6781, "step": 4835 }, { "epoch": 0.7895187951512184, "grad_norm": 2.149961233139038, "learning_rate": 1.992600196049514e-05, "loss": 0.8346, "step": 4836 }, { "epoch": 0.7896820537937227, "grad_norm": 2.000056266784668, "learning_rate": 1.99259634223158e-05, "loss": 0.9461, "step": 4837 }, { "epoch": 0.7898453124362271, "grad_norm": 1.895493745803833, "learning_rate": 1.9925924874141014e-05, "loss": 0.924, "step": 4838 }, { "epoch": 0.7900085710787315, "grad_norm": 2.0710206031799316, "learning_rate": 1.9925886315970825e-05, "loss": 1.0897, "step": 4839 }, { "epoch": 0.7901718297212359, "grad_norm": 1.845704436302185, "learning_rate": 1.9925847747805274e-05, "loss": 0.7711, "step": 4840 }, { "epoch": 0.7903350883637402, "grad_norm": 1.8371671438217163, "learning_rate": 1.9925809169644395e-05, "loss": 0.6374, "step": 4841 }, { "epoch": 0.7904983470062447, "grad_norm": 1.8138757944107056, "learning_rate": 1.9925770581488226e-05, "loss": 0.8355, "step": 4842 }, { "epoch": 0.790661605648749, "grad_norm": 2.237771511077881, "learning_rate": 1.9925731983336814e-05, "loss": 0.9534, "step": 4843 }, { "epoch": 0.7908248642912534, "grad_norm": 2.1276376247406006, "learning_rate": 1.9925693375190187e-05, "loss": 0.7991, "step": 4844 }, { "epoch": 0.7909881229337578, "grad_norm": 2.7150862216949463, "learning_rate": 1.9925654757048394e-05, "loss": 0.9995, "step": 4845 }, { "epoch": 0.7911513815762622, "grad_norm": 2.2937488555908203, "learning_rate": 1.9925616128911467e-05, "loss": 1.0305, "step": 4846 }, { "epoch": 0.7913146402187666, "grad_norm": 1.6939880847930908, "learning_rate": 1.992557749077945e-05, "loss": 0.6885, "step": 4847 }, { "epoch": 0.791477898861271, "grad_norm": 1.8745018243789673, "learning_rate": 1.9925538842652376e-05, "loss": 0.7845, "step": 4848 }, { "epoch": 0.7916411575037754, "grad_norm": 1.8437085151672363, "learning_rate": 1.9925500184530286e-05, "loss": 0.8282, "step": 4849 }, { "epoch": 0.7918044161462797, "grad_norm": 1.677315354347229, "learning_rate": 1.9925461516413224e-05, "loss": 0.7354, "step": 4850 }, { "epoch": 0.7919676747887842, "grad_norm": 1.949843168258667, "learning_rate": 1.992542283830122e-05, "loss": 0.7897, "step": 4851 }, { "epoch": 0.7921309334312885, "grad_norm": 1.8098435401916504, "learning_rate": 1.992538415019432e-05, "loss": 0.9394, "step": 4852 }, { "epoch": 0.7922941920737929, "grad_norm": 1.827813744544983, "learning_rate": 1.992534545209256e-05, "loss": 0.9133, "step": 4853 }, { "epoch": 0.7924574507162973, "grad_norm": 2.09625506401062, "learning_rate": 1.9925306743995984e-05, "loss": 0.8424, "step": 4854 }, { "epoch": 0.7926207093588017, "grad_norm": 1.937185287475586, "learning_rate": 1.9925268025904622e-05, "loss": 0.8087, "step": 4855 }, { "epoch": 0.792783968001306, "grad_norm": 2.0197770595550537, "learning_rate": 1.992522929781852e-05, "loss": 0.8478, "step": 4856 }, { "epoch": 0.7929472266438105, "grad_norm": 1.6214985847473145, "learning_rate": 1.9925190559737714e-05, "loss": 0.5859, "step": 4857 }, { "epoch": 0.7931104852863149, "grad_norm": 2.090153694152832, "learning_rate": 1.9925151811662243e-05, "loss": 0.9524, "step": 4858 }, { "epoch": 0.7932737439288192, "grad_norm": 1.8472976684570312, "learning_rate": 1.992511305359215e-05, "loss": 0.7586, "step": 4859 }, { "epoch": 0.7934370025713237, "grad_norm": 2.0367839336395264, "learning_rate": 1.9925074285527467e-05, "loss": 0.7588, "step": 4860 }, { "epoch": 0.793600261213828, "grad_norm": 1.6678805351257324, "learning_rate": 1.992503550746824e-05, "loss": 0.7385, "step": 4861 }, { "epoch": 0.7937635198563324, "grad_norm": 1.7842092514038086, "learning_rate": 1.9924996719414503e-05, "loss": 0.7824, "step": 4862 }, { "epoch": 0.7939267784988367, "grad_norm": 2.0741941928863525, "learning_rate": 1.9924957921366298e-05, "loss": 0.9028, "step": 4863 }, { "epoch": 0.7940900371413412, "grad_norm": 2.0686750411987305, "learning_rate": 1.992491911332366e-05, "loss": 0.8946, "step": 4864 }, { "epoch": 0.7942532957838455, "grad_norm": 1.8333791494369507, "learning_rate": 1.9924880295286634e-05, "loss": 0.7942, "step": 4865 }, { "epoch": 0.79441655442635, "grad_norm": 1.8150825500488281, "learning_rate": 1.9924841467255254e-05, "loss": 0.7016, "step": 4866 }, { "epoch": 0.7945798130688543, "grad_norm": 1.9416899681091309, "learning_rate": 1.9924802629229563e-05, "loss": 0.7524, "step": 4867 }, { "epoch": 0.7947430717113587, "grad_norm": 1.6752439737319946, "learning_rate": 1.99247637812096e-05, "loss": 0.777, "step": 4868 }, { "epoch": 0.7949063303538632, "grad_norm": 2.1142969131469727, "learning_rate": 1.9924724923195397e-05, "loss": 0.9306, "step": 4869 }, { "epoch": 0.7950695889963675, "grad_norm": 1.9637212753295898, "learning_rate": 1.9924686055187003e-05, "loss": 0.8004, "step": 4870 }, { "epoch": 0.7952328476388719, "grad_norm": 1.7740687131881714, "learning_rate": 1.9924647177184453e-05, "loss": 0.6823, "step": 4871 }, { "epoch": 0.7953961062813762, "grad_norm": 1.6591767072677612, "learning_rate": 1.9924608289187786e-05, "loss": 0.7141, "step": 4872 }, { "epoch": 0.7955593649238807, "grad_norm": 1.8841469287872314, "learning_rate": 1.9924569391197038e-05, "loss": 0.7758, "step": 4873 }, { "epoch": 0.795722623566385, "grad_norm": 1.7900296449661255, "learning_rate": 1.9924530483212253e-05, "loss": 0.7176, "step": 4874 }, { "epoch": 0.7958858822088895, "grad_norm": 2.041778802871704, "learning_rate": 1.992449156523347e-05, "loss": 0.7577, "step": 4875 }, { "epoch": 0.7960491408513938, "grad_norm": 1.648495078086853, "learning_rate": 1.9924452637260726e-05, "loss": 0.6537, "step": 4876 }, { "epoch": 0.7962123994938982, "grad_norm": 1.8487268686294556, "learning_rate": 1.9924413699294058e-05, "loss": 0.7848, "step": 4877 }, { "epoch": 0.7963756581364025, "grad_norm": 2.021528482437134, "learning_rate": 1.9924374751333512e-05, "loss": 0.8082, "step": 4878 }, { "epoch": 0.796538916778907, "grad_norm": 2.3087358474731445, "learning_rate": 1.9924335793379123e-05, "loss": 0.7887, "step": 4879 }, { "epoch": 0.7967021754214114, "grad_norm": 2.0047802925109863, "learning_rate": 1.9924296825430928e-05, "loss": 0.8623, "step": 4880 }, { "epoch": 0.7968654340639157, "grad_norm": 1.678239107131958, "learning_rate": 1.9924257847488967e-05, "loss": 0.7003, "step": 4881 }, { "epoch": 0.7970286927064202, "grad_norm": 1.731492519378662, "learning_rate": 1.9924218859553288e-05, "loss": 0.708, "step": 4882 }, { "epoch": 0.7971919513489245, "grad_norm": 2.324237585067749, "learning_rate": 1.9924179861623917e-05, "loss": 0.836, "step": 4883 }, { "epoch": 0.797355209991429, "grad_norm": 1.8351635932922363, "learning_rate": 1.99241408537009e-05, "loss": 0.6634, "step": 4884 }, { "epoch": 0.7975184686339333, "grad_norm": 2.070648670196533, "learning_rate": 1.9924101835784276e-05, "loss": 0.8651, "step": 4885 }, { "epoch": 0.7976817272764377, "grad_norm": 1.9946140050888062, "learning_rate": 1.9924062807874086e-05, "loss": 0.8188, "step": 4886 }, { "epoch": 0.797844985918942, "grad_norm": 1.9841363430023193, "learning_rate": 1.9924023769970368e-05, "loss": 0.891, "step": 4887 }, { "epoch": 0.7980082445614465, "grad_norm": 2.1555466651916504, "learning_rate": 1.992398472207316e-05, "loss": 0.9885, "step": 4888 }, { "epoch": 0.7981715032039508, "grad_norm": 2.10235857963562, "learning_rate": 1.99239456641825e-05, "loss": 0.8864, "step": 4889 }, { "epoch": 0.7983347618464552, "grad_norm": 2.0317468643188477, "learning_rate": 1.992390659629843e-05, "loss": 0.6946, "step": 4890 }, { "epoch": 0.7984980204889597, "grad_norm": 1.592612385749817, "learning_rate": 1.9923867518420993e-05, "loss": 0.5966, "step": 4891 }, { "epoch": 0.798661279131464, "grad_norm": 1.9969428777694702, "learning_rate": 1.9923828430550215e-05, "loss": 0.7439, "step": 4892 }, { "epoch": 0.7988245377739684, "grad_norm": 1.888777256011963, "learning_rate": 1.9923789332686153e-05, "loss": 0.8761, "step": 4893 }, { "epoch": 0.7989877964164728, "grad_norm": 1.9656144380569458, "learning_rate": 1.9923750224828833e-05, "loss": 0.8355, "step": 4894 }, { "epoch": 0.7991510550589772, "grad_norm": 1.9136943817138672, "learning_rate": 1.99237111069783e-05, "loss": 0.8629, "step": 4895 }, { "epoch": 0.7993143137014815, "grad_norm": 1.7606338262557983, "learning_rate": 1.9923671979134594e-05, "loss": 0.7806, "step": 4896 }, { "epoch": 0.799477572343986, "grad_norm": 1.8844459056854248, "learning_rate": 1.9923632841297753e-05, "loss": 0.8732, "step": 4897 }, { "epoch": 0.7996408309864903, "grad_norm": 1.7217713594436646, "learning_rate": 1.9923593693467816e-05, "loss": 0.6952, "step": 4898 }, { "epoch": 0.7998040896289947, "grad_norm": 1.9081617593765259, "learning_rate": 1.9923554535644823e-05, "loss": 0.9705, "step": 4899 }, { "epoch": 0.7999673482714991, "grad_norm": 1.782641887664795, "learning_rate": 1.9923515367828812e-05, "loss": 0.7567, "step": 4900 }, { "epoch": 0.8001306069140035, "grad_norm": 1.5512231588363647, "learning_rate": 1.9923476190019825e-05, "loss": 0.5722, "step": 4901 }, { "epoch": 0.8002938655565079, "grad_norm": 1.762524962425232, "learning_rate": 1.99234370022179e-05, "loss": 0.683, "step": 4902 }, { "epoch": 0.8004571241990123, "grad_norm": 1.9655622243881226, "learning_rate": 1.9923397804423075e-05, "loss": 0.8997, "step": 4903 }, { "epoch": 0.8006203828415167, "grad_norm": 2.107661008834839, "learning_rate": 1.9923358596635393e-05, "loss": 0.8201, "step": 4904 }, { "epoch": 0.800783641484021, "grad_norm": 2.104870080947876, "learning_rate": 1.9923319378854888e-05, "loss": 0.9219, "step": 4905 }, { "epoch": 0.8009469001265255, "grad_norm": 2.1182050704956055, "learning_rate": 1.992328015108161e-05, "loss": 0.8201, "step": 4906 }, { "epoch": 0.8011101587690298, "grad_norm": 1.873949646949768, "learning_rate": 1.9923240913315585e-05, "loss": 0.8274, "step": 4907 }, { "epoch": 0.8012734174115342, "grad_norm": 1.82496178150177, "learning_rate": 1.992320166555686e-05, "loss": 0.8341, "step": 4908 }, { "epoch": 0.8014366760540386, "grad_norm": 2.264035940170288, "learning_rate": 1.9923162407805475e-05, "loss": 0.8222, "step": 4909 }, { "epoch": 0.801599934696543, "grad_norm": 1.9848077297210693, "learning_rate": 1.9923123140061467e-05, "loss": 0.9032, "step": 4910 }, { "epoch": 0.8017631933390473, "grad_norm": 1.8136736154556274, "learning_rate": 1.9923083862324876e-05, "loss": 0.8664, "step": 4911 }, { "epoch": 0.8019264519815518, "grad_norm": 1.7448256015777588, "learning_rate": 1.9923044574595746e-05, "loss": 0.7248, "step": 4912 }, { "epoch": 0.8020897106240562, "grad_norm": 2.084658622741699, "learning_rate": 1.992300527687411e-05, "loss": 0.8148, "step": 4913 }, { "epoch": 0.8022529692665605, "grad_norm": 1.652200698852539, "learning_rate": 1.9922965969160007e-05, "loss": 0.7118, "step": 4914 }, { "epoch": 0.802416227909065, "grad_norm": 1.9420719146728516, "learning_rate": 1.9922926651453487e-05, "loss": 0.8443, "step": 4915 }, { "epoch": 0.8025794865515693, "grad_norm": 2.0187103748321533, "learning_rate": 1.992288732375458e-05, "loss": 0.7311, "step": 4916 }, { "epoch": 0.8027427451940737, "grad_norm": 2.107795476913452, "learning_rate": 1.9922847986063326e-05, "loss": 0.6756, "step": 4917 }, { "epoch": 0.8029060038365781, "grad_norm": 2.0052661895751953, "learning_rate": 1.9922808638379767e-05, "loss": 0.7797, "step": 4918 }, { "epoch": 0.8030692624790825, "grad_norm": 1.8871933221817017, "learning_rate": 1.9922769280703944e-05, "loss": 0.7946, "step": 4919 }, { "epoch": 0.8032325211215868, "grad_norm": 1.9102355241775513, "learning_rate": 1.9922729913035893e-05, "loss": 0.9598, "step": 4920 }, { "epoch": 0.8033957797640913, "grad_norm": 1.4432445764541626, "learning_rate": 1.9922690535375656e-05, "loss": 0.6567, "step": 4921 }, { "epoch": 0.8035590384065956, "grad_norm": 1.935724139213562, "learning_rate": 1.9922651147723275e-05, "loss": 0.8524, "step": 4922 }, { "epoch": 0.8037222970491, "grad_norm": 1.9747815132141113, "learning_rate": 1.9922611750078783e-05, "loss": 0.8371, "step": 4923 }, { "epoch": 0.8038855556916045, "grad_norm": 2.0641162395477295, "learning_rate": 1.9922572342442225e-05, "loss": 0.5564, "step": 4924 }, { "epoch": 0.8040488143341088, "grad_norm": 1.9731000661849976, "learning_rate": 1.992253292481364e-05, "loss": 0.8307, "step": 4925 }, { "epoch": 0.8042120729766132, "grad_norm": 2.0066750049591064, "learning_rate": 1.992249349719307e-05, "loss": 0.8237, "step": 4926 }, { "epoch": 0.8043753316191176, "grad_norm": 2.580623149871826, "learning_rate": 1.9922454059580543e-05, "loss": 0.7891, "step": 4927 }, { "epoch": 0.804538590261622, "grad_norm": 2.139864206314087, "learning_rate": 1.9922414611976116e-05, "loss": 0.8092, "step": 4928 }, { "epoch": 0.8047018489041263, "grad_norm": 1.8119847774505615, "learning_rate": 1.9922375154379818e-05, "loss": 0.8028, "step": 4929 }, { "epoch": 0.8048651075466308, "grad_norm": 1.9565775394439697, "learning_rate": 1.992233568679169e-05, "loss": 0.807, "step": 4930 }, { "epoch": 0.8050283661891351, "grad_norm": 1.834346890449524, "learning_rate": 1.992229620921177e-05, "loss": 0.8014, "step": 4931 }, { "epoch": 0.8051916248316395, "grad_norm": 1.9296114444732666, "learning_rate": 1.9922256721640104e-05, "loss": 0.7247, "step": 4932 }, { "epoch": 0.805354883474144, "grad_norm": 1.9737929105758667, "learning_rate": 1.992221722407673e-05, "loss": 0.7077, "step": 4933 }, { "epoch": 0.8055181421166483, "grad_norm": 2.2124643325805664, "learning_rate": 1.9922177716521678e-05, "loss": 0.9552, "step": 4934 }, { "epoch": 0.8056814007591527, "grad_norm": 1.9864575862884521, "learning_rate": 1.9922138198975003e-05, "loss": 0.7147, "step": 4935 }, { "epoch": 0.8058446594016571, "grad_norm": 2.0567729473114014, "learning_rate": 1.9922098671436734e-05, "loss": 0.8462, "step": 4936 }, { "epoch": 0.8060079180441615, "grad_norm": 1.8009942770004272, "learning_rate": 1.9922059133906915e-05, "loss": 0.9017, "step": 4937 }, { "epoch": 0.8061711766866658, "grad_norm": 1.920516848564148, "learning_rate": 1.9922019586385587e-05, "loss": 0.7646, "step": 4938 }, { "epoch": 0.8063344353291703, "grad_norm": 1.9540677070617676, "learning_rate": 1.9921980028872784e-05, "loss": 0.8017, "step": 4939 }, { "epoch": 0.8064976939716746, "grad_norm": 1.7938027381896973, "learning_rate": 1.9921940461368552e-05, "loss": 0.838, "step": 4940 }, { "epoch": 0.806660952614179, "grad_norm": 1.694833755493164, "learning_rate": 1.9921900883872927e-05, "loss": 0.6853, "step": 4941 }, { "epoch": 0.8068242112566834, "grad_norm": 1.9588202238082886, "learning_rate": 1.9921861296385952e-05, "loss": 0.8206, "step": 4942 }, { "epoch": 0.8069874698991878, "grad_norm": 2.0975844860076904, "learning_rate": 1.9921821698907668e-05, "loss": 0.8198, "step": 4943 }, { "epoch": 0.8071507285416922, "grad_norm": 1.7575784921646118, "learning_rate": 1.9921782091438108e-05, "loss": 0.7722, "step": 4944 }, { "epoch": 0.8073139871841966, "grad_norm": 1.9443762302398682, "learning_rate": 1.9921742473977317e-05, "loss": 0.7943, "step": 4945 }, { "epoch": 0.807477245826701, "grad_norm": 1.9529401063919067, "learning_rate": 1.9921702846525335e-05, "loss": 0.8789, "step": 4946 }, { "epoch": 0.8076405044692053, "grad_norm": 1.9852482080459595, "learning_rate": 1.99216632090822e-05, "loss": 0.8449, "step": 4947 }, { "epoch": 0.8078037631117098, "grad_norm": 1.942453145980835, "learning_rate": 1.9921623561647952e-05, "loss": 0.7073, "step": 4948 }, { "epoch": 0.8079670217542141, "grad_norm": 1.9876902103424072, "learning_rate": 1.9921583904222636e-05, "loss": 0.8965, "step": 4949 }, { "epoch": 0.8081302803967185, "grad_norm": 2.0401511192321777, "learning_rate": 1.9921544236806284e-05, "loss": 0.9623, "step": 4950 }, { "epoch": 0.8082935390392229, "grad_norm": 1.8046239614486694, "learning_rate": 1.992150455939894e-05, "loss": 0.6899, "step": 4951 }, { "epoch": 0.8084567976817273, "grad_norm": 1.8443927764892578, "learning_rate": 1.9921464872000643e-05, "loss": 0.8912, "step": 4952 }, { "epoch": 0.8086200563242316, "grad_norm": 2.1088807582855225, "learning_rate": 1.9921425174611435e-05, "loss": 0.7481, "step": 4953 }, { "epoch": 0.808783314966736, "grad_norm": 2.0166337490081787, "learning_rate": 1.992138546723135e-05, "loss": 0.858, "step": 4954 }, { "epoch": 0.8089465736092405, "grad_norm": 1.9046247005462646, "learning_rate": 1.9921345749860438e-05, "loss": 0.8141, "step": 4955 }, { "epoch": 0.8091098322517448, "grad_norm": 2.1999142169952393, "learning_rate": 1.992130602249873e-05, "loss": 0.8141, "step": 4956 }, { "epoch": 0.8092730908942493, "grad_norm": 1.3705949783325195, "learning_rate": 1.992126628514627e-05, "loss": 0.5902, "step": 4957 }, { "epoch": 0.8094363495367536, "grad_norm": 1.646638035774231, "learning_rate": 1.99212265378031e-05, "loss": 0.6318, "step": 4958 }, { "epoch": 0.809599608179258, "grad_norm": 1.901304006576538, "learning_rate": 1.9921186780469256e-05, "loss": 0.8359, "step": 4959 }, { "epoch": 0.8097628668217624, "grad_norm": 2.0890119075775146, "learning_rate": 1.9921147013144782e-05, "loss": 0.7419, "step": 4960 }, { "epoch": 0.8099261254642668, "grad_norm": 2.219465732574463, "learning_rate": 1.992110723582971e-05, "loss": 0.7566, "step": 4961 }, { "epoch": 0.8100893841067711, "grad_norm": 2.0533900260925293, "learning_rate": 1.992106744852409e-05, "loss": 0.8146, "step": 4962 }, { "epoch": 0.8102526427492756, "grad_norm": 1.624820590019226, "learning_rate": 1.9921027651227954e-05, "loss": 0.7129, "step": 4963 }, { "epoch": 0.8104159013917799, "grad_norm": 1.7947266101837158, "learning_rate": 1.992098784394135e-05, "loss": 0.7161, "step": 4964 }, { "epoch": 0.8105791600342843, "grad_norm": 2.0412700176239014, "learning_rate": 1.992094802666431e-05, "loss": 0.8682, "step": 4965 }, { "epoch": 0.8107424186767888, "grad_norm": 2.7092697620391846, "learning_rate": 1.992090819939688e-05, "loss": 0.7957, "step": 4966 }, { "epoch": 0.8109056773192931, "grad_norm": 1.8372575044631958, "learning_rate": 1.99208683621391e-05, "loss": 0.731, "step": 4967 }, { "epoch": 0.8110689359617975, "grad_norm": 2.0023632049560547, "learning_rate": 1.9920828514891007e-05, "loss": 0.7215, "step": 4968 }, { "epoch": 0.8112321946043018, "grad_norm": 1.9355003833770752, "learning_rate": 1.992078865765264e-05, "loss": 0.8644, "step": 4969 }, { "epoch": 0.8113954532468063, "grad_norm": 1.875082015991211, "learning_rate": 1.9920748790424043e-05, "loss": 0.8122, "step": 4970 }, { "epoch": 0.8115587118893106, "grad_norm": 1.6935220956802368, "learning_rate": 1.9920708913205254e-05, "loss": 0.718, "step": 4971 }, { "epoch": 0.811721970531815, "grad_norm": 1.9387755393981934, "learning_rate": 1.9920669025996314e-05, "loss": 0.7114, "step": 4972 }, { "epoch": 0.8118852291743194, "grad_norm": 2.08963680267334, "learning_rate": 1.9920629128797265e-05, "loss": 0.81, "step": 4973 }, { "epoch": 0.8120484878168238, "grad_norm": 1.8642210960388184, "learning_rate": 1.9920589221608143e-05, "loss": 0.9308, "step": 4974 }, { "epoch": 0.8122117464593281, "grad_norm": 2.2590415477752686, "learning_rate": 1.9920549304428992e-05, "loss": 0.6884, "step": 4975 }, { "epoch": 0.8123750051018326, "grad_norm": 2.1210553646087646, "learning_rate": 1.992050937725985e-05, "loss": 1.0297, "step": 4976 }, { "epoch": 0.812538263744337, "grad_norm": 2.174837350845337, "learning_rate": 1.9920469440100757e-05, "loss": 0.9024, "step": 4977 }, { "epoch": 0.8127015223868413, "grad_norm": 2.0452444553375244, "learning_rate": 1.992042949295175e-05, "loss": 1.0108, "step": 4978 }, { "epoch": 0.8128647810293458, "grad_norm": 1.9397871494293213, "learning_rate": 1.992038953581288e-05, "loss": 0.7674, "step": 4979 }, { "epoch": 0.8130280396718501, "grad_norm": 2.1064770221710205, "learning_rate": 1.992034956868418e-05, "loss": 0.8802, "step": 4980 }, { "epoch": 0.8131912983143545, "grad_norm": 2.225207805633545, "learning_rate": 1.9920309591565684e-05, "loss": 0.8788, "step": 4981 }, { "epoch": 0.8133545569568589, "grad_norm": 1.7960642576217651, "learning_rate": 1.9920269604457444e-05, "loss": 0.8891, "step": 4982 }, { "epoch": 0.8135178155993633, "grad_norm": 2.098869800567627, "learning_rate": 1.9920229607359495e-05, "loss": 0.7232, "step": 4983 }, { "epoch": 0.8136810742418676, "grad_norm": 1.8331701755523682, "learning_rate": 1.9920189600271876e-05, "loss": 0.8301, "step": 4984 }, { "epoch": 0.8138443328843721, "grad_norm": 1.993514060974121, "learning_rate": 1.992014958319463e-05, "loss": 0.9245, "step": 4985 }, { "epoch": 0.8140075915268764, "grad_norm": 1.4990359544754028, "learning_rate": 1.9920109556127793e-05, "loss": 0.6753, "step": 4986 }, { "epoch": 0.8141708501693808, "grad_norm": 1.8884570598602295, "learning_rate": 1.9920069519071414e-05, "loss": 0.7855, "step": 4987 }, { "epoch": 0.8143341088118853, "grad_norm": 2.021421194076538, "learning_rate": 1.992002947202552e-05, "loss": 0.8976, "step": 4988 }, { "epoch": 0.8144973674543896, "grad_norm": 1.9134283065795898, "learning_rate": 1.9919989414990164e-05, "loss": 0.7849, "step": 4989 }, { "epoch": 0.814660626096894, "grad_norm": 1.9964030981063843, "learning_rate": 1.991994934796538e-05, "loss": 0.8339, "step": 4990 }, { "epoch": 0.8148238847393984, "grad_norm": 1.9525470733642578, "learning_rate": 1.991990927095121e-05, "loss": 0.868, "step": 4991 }, { "epoch": 0.8149871433819028, "grad_norm": 2.0830307006835938, "learning_rate": 1.9919869183947693e-05, "loss": 0.9768, "step": 4992 }, { "epoch": 0.8151504020244071, "grad_norm": 1.8301317691802979, "learning_rate": 1.9919829086954872e-05, "loss": 0.7571, "step": 4993 }, { "epoch": 0.8153136606669116, "grad_norm": 1.6224919557571411, "learning_rate": 1.9919788979972785e-05, "loss": 0.6798, "step": 4994 }, { "epoch": 0.8154769193094159, "grad_norm": 1.7822777032852173, "learning_rate": 1.9919748863001473e-05, "loss": 0.7875, "step": 4995 }, { "epoch": 0.8156401779519203, "grad_norm": 1.8727947473526, "learning_rate": 1.9919708736040976e-05, "loss": 0.9101, "step": 4996 }, { "epoch": 0.8158034365944247, "grad_norm": 1.812536597251892, "learning_rate": 1.9919668599091334e-05, "loss": 0.7774, "step": 4997 }, { "epoch": 0.8159666952369291, "grad_norm": 1.6802173852920532, "learning_rate": 1.9919628452152592e-05, "loss": 0.6975, "step": 4998 }, { "epoch": 0.8161299538794335, "grad_norm": 2.2278597354888916, "learning_rate": 1.9919588295224784e-05, "loss": 0.7392, "step": 4999 }, { "epoch": 0.8162932125219379, "grad_norm": 1.9044816493988037, "learning_rate": 1.9919548128307954e-05, "loss": 0.8257, "step": 5000 }, { "epoch": 0.8164564711644423, "grad_norm": 1.8235116004943848, "learning_rate": 1.9919507951402142e-05, "loss": 0.8094, "step": 5001 }, { "epoch": 0.8166197298069466, "grad_norm": 1.6744729280471802, "learning_rate": 1.991946776450739e-05, "loss": 0.6615, "step": 5002 }, { "epoch": 0.8167829884494511, "grad_norm": 2.3734798431396484, "learning_rate": 1.9919427567623732e-05, "loss": 0.873, "step": 5003 }, { "epoch": 0.8169462470919554, "grad_norm": 1.7986395359039307, "learning_rate": 1.9919387360751216e-05, "loss": 0.7157, "step": 5004 }, { "epoch": 0.8171095057344598, "grad_norm": 1.9178245067596436, "learning_rate": 1.9919347143889877e-05, "loss": 0.793, "step": 5005 }, { "epoch": 0.8172727643769642, "grad_norm": 1.9701792001724243, "learning_rate": 1.991930691703976e-05, "loss": 0.8186, "step": 5006 }, { "epoch": 0.8174360230194686, "grad_norm": 2.0716731548309326, "learning_rate": 1.9919266680200905e-05, "loss": 0.7329, "step": 5007 }, { "epoch": 0.8175992816619729, "grad_norm": 1.9646869897842407, "learning_rate": 1.991922643337335e-05, "loss": 0.8934, "step": 5008 }, { "epoch": 0.8177625403044774, "grad_norm": 1.7814162969589233, "learning_rate": 1.9919186176557136e-05, "loss": 0.7497, "step": 5009 }, { "epoch": 0.8179257989469818, "grad_norm": 1.6358851194381714, "learning_rate": 1.9919145909752305e-05, "loss": 0.7428, "step": 5010 }, { "epoch": 0.8180890575894861, "grad_norm": 1.7643718719482422, "learning_rate": 1.9919105632958896e-05, "loss": 0.7474, "step": 5011 }, { "epoch": 0.8182523162319906, "grad_norm": 1.8401768207550049, "learning_rate": 1.991906534617695e-05, "loss": 0.7184, "step": 5012 }, { "epoch": 0.8184155748744949, "grad_norm": 1.85053551197052, "learning_rate": 1.991902504940651e-05, "loss": 0.6583, "step": 5013 }, { "epoch": 0.8185788335169993, "grad_norm": 1.9926639795303345, "learning_rate": 1.991898474264761e-05, "loss": 0.814, "step": 5014 }, { "epoch": 0.8187420921595037, "grad_norm": 1.7965377569198608, "learning_rate": 1.99189444259003e-05, "loss": 0.8565, "step": 5015 }, { "epoch": 0.8189053508020081, "grad_norm": 2.0575406551361084, "learning_rate": 1.991890409916461e-05, "loss": 0.947, "step": 5016 }, { "epoch": 0.8190686094445124, "grad_norm": 2.526061773300171, "learning_rate": 1.991886376244059e-05, "loss": 0.9665, "step": 5017 }, { "epoch": 0.8192318680870169, "grad_norm": 1.7794110774993896, "learning_rate": 1.9918823415728276e-05, "loss": 0.674, "step": 5018 }, { "epoch": 0.8193951267295212, "grad_norm": 1.7379111051559448, "learning_rate": 1.991878305902771e-05, "loss": 0.6696, "step": 5019 }, { "epoch": 0.8195583853720256, "grad_norm": 1.908668041229248, "learning_rate": 1.991874269233893e-05, "loss": 0.8622, "step": 5020 }, { "epoch": 0.8197216440145301, "grad_norm": 2.2182705402374268, "learning_rate": 1.9918702315661985e-05, "loss": 0.84, "step": 5021 }, { "epoch": 0.8198849026570344, "grad_norm": 2.102921962738037, "learning_rate": 1.9918661928996903e-05, "loss": 0.8621, "step": 5022 }, { "epoch": 0.8200481612995388, "grad_norm": 1.9957351684570312, "learning_rate": 1.991862153234373e-05, "loss": 0.9756, "step": 5023 }, { "epoch": 0.8202114199420432, "grad_norm": 2.015124797821045, "learning_rate": 1.9918581125702512e-05, "loss": 0.7818, "step": 5024 }, { "epoch": 0.8203746785845476, "grad_norm": 1.8854998350143433, "learning_rate": 1.9918540709073288e-05, "loss": 0.7965, "step": 5025 }, { "epoch": 0.8205379372270519, "grad_norm": 2.047118663787842, "learning_rate": 1.991850028245609e-05, "loss": 0.6867, "step": 5026 }, { "epoch": 0.8207011958695564, "grad_norm": 1.8411861658096313, "learning_rate": 1.9918459845850967e-05, "loss": 0.7055, "step": 5027 }, { "epoch": 0.8208644545120607, "grad_norm": 2.222003221511841, "learning_rate": 1.991841939925796e-05, "loss": 0.8281, "step": 5028 }, { "epoch": 0.8210277131545651, "grad_norm": 1.9511771202087402, "learning_rate": 1.99183789426771e-05, "loss": 0.8471, "step": 5029 }, { "epoch": 0.8211909717970695, "grad_norm": 1.9045811891555786, "learning_rate": 1.991833847610844e-05, "loss": 0.8528, "step": 5030 }, { "epoch": 0.8213542304395739, "grad_norm": 1.8613239526748657, "learning_rate": 1.9918297999552018e-05, "loss": 0.7263, "step": 5031 }, { "epoch": 0.8215174890820783, "grad_norm": 1.8274565935134888, "learning_rate": 1.991825751300787e-05, "loss": 0.7779, "step": 5032 }, { "epoch": 0.8216807477245827, "grad_norm": 2.3139355182647705, "learning_rate": 1.991821701647604e-05, "loss": 0.727, "step": 5033 }, { "epoch": 0.8218440063670871, "grad_norm": 2.1294779777526855, "learning_rate": 1.9918176509956568e-05, "loss": 0.9566, "step": 5034 }, { "epoch": 0.8220072650095914, "grad_norm": 2.0486795902252197, "learning_rate": 1.9918135993449494e-05, "loss": 0.8147, "step": 5035 }, { "epoch": 0.8221705236520959, "grad_norm": 2.0120131969451904, "learning_rate": 1.9918095466954862e-05, "loss": 0.7275, "step": 5036 }, { "epoch": 0.8223337822946002, "grad_norm": 1.8299384117126465, "learning_rate": 1.991805493047271e-05, "loss": 0.8452, "step": 5037 }, { "epoch": 0.8224970409371046, "grad_norm": 1.8235822916030884, "learning_rate": 1.9918014384003074e-05, "loss": 0.7923, "step": 5038 }, { "epoch": 0.822660299579609, "grad_norm": 1.9736616611480713, "learning_rate": 1.9917973827546006e-05, "loss": 0.6447, "step": 5039 }, { "epoch": 0.8228235582221134, "grad_norm": 1.6450153589248657, "learning_rate": 1.991793326110154e-05, "loss": 0.6996, "step": 5040 }, { "epoch": 0.8229868168646177, "grad_norm": 2.1436688899993896, "learning_rate": 1.9917892684669717e-05, "loss": 0.9107, "step": 5041 }, { "epoch": 0.8231500755071222, "grad_norm": 1.8279258012771606, "learning_rate": 1.9917852098250577e-05, "loss": 0.6802, "step": 5042 }, { "epoch": 0.8233133341496266, "grad_norm": 2.0657525062561035, "learning_rate": 1.9917811501844166e-05, "loss": 0.6491, "step": 5043 }, { "epoch": 0.8234765927921309, "grad_norm": 1.9762307405471802, "learning_rate": 1.9917770895450518e-05, "loss": 0.699, "step": 5044 }, { "epoch": 0.8236398514346354, "grad_norm": 2.1198737621307373, "learning_rate": 1.991773027906968e-05, "loss": 0.8205, "step": 5045 }, { "epoch": 0.8238031100771397, "grad_norm": 2.019064426422119, "learning_rate": 1.991768965270169e-05, "loss": 0.8918, "step": 5046 }, { "epoch": 0.8239663687196441, "grad_norm": 2.2834441661834717, "learning_rate": 1.991764901634659e-05, "loss": 1.3521, "step": 5047 }, { "epoch": 0.8241296273621485, "grad_norm": 1.8330230712890625, "learning_rate": 1.9917608370004417e-05, "loss": 0.7895, "step": 5048 }, { "epoch": 0.8242928860046529, "grad_norm": 1.563226580619812, "learning_rate": 1.9917567713675216e-05, "loss": 0.6555, "step": 5049 }, { "epoch": 0.8244561446471572, "grad_norm": 1.8024563789367676, "learning_rate": 1.991752704735903e-05, "loss": 0.7995, "step": 5050 }, { "epoch": 0.8246194032896617, "grad_norm": 1.7589255571365356, "learning_rate": 1.9917486371055893e-05, "loss": 0.7803, "step": 5051 }, { "epoch": 0.824782661932166, "grad_norm": 2.030750036239624, "learning_rate": 1.9917445684765853e-05, "loss": 0.8942, "step": 5052 }, { "epoch": 0.8249459205746704, "grad_norm": 1.9861804246902466, "learning_rate": 1.9917404988488945e-05, "loss": 0.7845, "step": 5053 }, { "epoch": 0.8251091792171749, "grad_norm": 1.822818398475647, "learning_rate": 1.9917364282225213e-05, "loss": 0.7473, "step": 5054 }, { "epoch": 0.8252724378596792, "grad_norm": 1.8324793577194214, "learning_rate": 1.99173235659747e-05, "loss": 0.7691, "step": 5055 }, { "epoch": 0.8254356965021836, "grad_norm": 1.6955355405807495, "learning_rate": 1.9917282839737443e-05, "loss": 0.6456, "step": 5056 }, { "epoch": 0.825598955144688, "grad_norm": 1.7857342958450317, "learning_rate": 1.9917242103513485e-05, "loss": 0.7328, "step": 5057 }, { "epoch": 0.8257622137871924, "grad_norm": 2.206937551498413, "learning_rate": 1.9917201357302867e-05, "loss": 0.9463, "step": 5058 }, { "epoch": 0.8259254724296967, "grad_norm": 1.7162189483642578, "learning_rate": 1.9917160601105632e-05, "loss": 0.7151, "step": 5059 }, { "epoch": 0.8260887310722012, "grad_norm": 1.7935936450958252, "learning_rate": 1.9917119834921818e-05, "loss": 0.7918, "step": 5060 }, { "epoch": 0.8262519897147055, "grad_norm": 2.0773730278015137, "learning_rate": 1.9917079058751464e-05, "loss": 0.8335, "step": 5061 }, { "epoch": 0.8264152483572099, "grad_norm": 1.9841821193695068, "learning_rate": 1.9917038272594616e-05, "loss": 0.7716, "step": 5062 }, { "epoch": 0.8265785069997142, "grad_norm": 2.1464812755584717, "learning_rate": 1.991699747645131e-05, "loss": 0.8795, "step": 5063 }, { "epoch": 0.8267417656422187, "grad_norm": 2.1702659130096436, "learning_rate": 1.9916956670321595e-05, "loss": 0.6632, "step": 5064 }, { "epoch": 0.8269050242847231, "grad_norm": 1.7651402950286865, "learning_rate": 1.9916915854205504e-05, "loss": 0.7662, "step": 5065 }, { "epoch": 0.8270682829272274, "grad_norm": 1.8429718017578125, "learning_rate": 1.9916875028103083e-05, "loss": 0.7271, "step": 5066 }, { "epoch": 0.8272315415697319, "grad_norm": 2.1390669345855713, "learning_rate": 1.9916834192014375e-05, "loss": 0.7916, "step": 5067 }, { "epoch": 0.8273948002122362, "grad_norm": 2.1748156547546387, "learning_rate": 1.9916793345939412e-05, "loss": 0.854, "step": 5068 }, { "epoch": 0.8275580588547407, "grad_norm": 2.016902446746826, "learning_rate": 1.9916752489878243e-05, "loss": 0.8834, "step": 5069 }, { "epoch": 0.827721317497245, "grad_norm": 1.7721856832504272, "learning_rate": 1.9916711623830904e-05, "loss": 0.6287, "step": 5070 }, { "epoch": 0.8278845761397494, "grad_norm": 1.9506150484085083, "learning_rate": 1.9916670747797444e-05, "loss": 0.888, "step": 5071 }, { "epoch": 0.8280478347822537, "grad_norm": 2.0228545665740967, "learning_rate": 1.9916629861777898e-05, "loss": 0.7147, "step": 5072 }, { "epoch": 0.8282110934247582, "grad_norm": 1.7982124090194702, "learning_rate": 1.9916588965772305e-05, "loss": 0.667, "step": 5073 }, { "epoch": 0.8283743520672625, "grad_norm": 2.1498124599456787, "learning_rate": 1.9916548059780712e-05, "loss": 0.8328, "step": 5074 }, { "epoch": 0.828537610709767, "grad_norm": 1.9324437379837036, "learning_rate": 1.991650714380316e-05, "loss": 0.8766, "step": 5075 }, { "epoch": 0.8287008693522714, "grad_norm": 2.0160040855407715, "learning_rate": 1.9916466217839684e-05, "loss": 0.8178, "step": 5076 }, { "epoch": 0.8288641279947757, "grad_norm": 1.7618170976638794, "learning_rate": 1.991642528189033e-05, "loss": 0.7313, "step": 5077 }, { "epoch": 0.8290273866372802, "grad_norm": 2.2003722190856934, "learning_rate": 1.9916384335955138e-05, "loss": 0.8097, "step": 5078 }, { "epoch": 0.8291906452797845, "grad_norm": 1.9265284538269043, "learning_rate": 1.991634338003415e-05, "loss": 0.8687, "step": 5079 }, { "epoch": 0.8293539039222889, "grad_norm": 2.032991409301758, "learning_rate": 1.9916302414127408e-05, "loss": 0.808, "step": 5080 }, { "epoch": 0.8295171625647932, "grad_norm": 2.3782904148101807, "learning_rate": 1.9916261438234953e-05, "loss": 0.8414, "step": 5081 }, { "epoch": 0.8296804212072977, "grad_norm": 1.792364239692688, "learning_rate": 1.991622045235682e-05, "loss": 0.78, "step": 5082 }, { "epoch": 0.829843679849802, "grad_norm": 1.7115353345870972, "learning_rate": 1.9916179456493062e-05, "loss": 0.717, "step": 5083 }, { "epoch": 0.8300069384923064, "grad_norm": 1.7939060926437378, "learning_rate": 1.991613845064371e-05, "loss": 0.7764, "step": 5084 }, { "epoch": 0.8301701971348108, "grad_norm": 1.9377961158752441, "learning_rate": 1.991609743480881e-05, "loss": 0.9417, "step": 5085 }, { "epoch": 0.8303334557773152, "grad_norm": 1.8728289604187012, "learning_rate": 1.9916056408988402e-05, "loss": 0.7574, "step": 5086 }, { "epoch": 0.8304967144198196, "grad_norm": 1.7909367084503174, "learning_rate": 1.9916015373182528e-05, "loss": 0.7087, "step": 5087 }, { "epoch": 0.830659973062324, "grad_norm": 1.8567538261413574, "learning_rate": 1.9915974327391234e-05, "loss": 0.7226, "step": 5088 }, { "epoch": 0.8308232317048284, "grad_norm": 1.6564449071884155, "learning_rate": 1.991593327161455e-05, "loss": 0.6412, "step": 5089 }, { "epoch": 0.8309864903473327, "grad_norm": 2.0899055004119873, "learning_rate": 1.9915892205852528e-05, "loss": 0.8715, "step": 5090 }, { "epoch": 0.8311497489898372, "grad_norm": 1.9624117612838745, "learning_rate": 1.9915851130105205e-05, "loss": 0.8584, "step": 5091 }, { "epoch": 0.8313130076323415, "grad_norm": 2.044499158859253, "learning_rate": 1.9915810044372618e-05, "loss": 0.7197, "step": 5092 }, { "epoch": 0.8314762662748459, "grad_norm": 1.7407574653625488, "learning_rate": 1.9915768948654816e-05, "loss": 0.8142, "step": 5093 }, { "epoch": 0.8316395249173503, "grad_norm": 1.9463224411010742, "learning_rate": 1.9915727842951838e-05, "loss": 0.7875, "step": 5094 }, { "epoch": 0.8318027835598547, "grad_norm": 1.6472877264022827, "learning_rate": 1.9915686727263723e-05, "loss": 0.5924, "step": 5095 }, { "epoch": 0.831966042202359, "grad_norm": 1.7957106828689575, "learning_rate": 1.9915645601590517e-05, "loss": 0.6818, "step": 5096 }, { "epoch": 0.8321293008448635, "grad_norm": 1.837754487991333, "learning_rate": 1.9915604465932255e-05, "loss": 0.6845, "step": 5097 }, { "epoch": 0.8322925594873679, "grad_norm": 1.650193452835083, "learning_rate": 1.9915563320288983e-05, "loss": 0.6541, "step": 5098 }, { "epoch": 0.8324558181298722, "grad_norm": 2.0429975986480713, "learning_rate": 1.991552216466074e-05, "loss": 0.7325, "step": 5099 }, { "epoch": 0.8326190767723767, "grad_norm": 1.5354825258255005, "learning_rate": 1.9915480999047573e-05, "loss": 0.6282, "step": 5100 }, { "epoch": 0.832782335414881, "grad_norm": 1.7072590589523315, "learning_rate": 1.991543982344952e-05, "loss": 0.7017, "step": 5101 }, { "epoch": 0.8329455940573854, "grad_norm": 1.7479791641235352, "learning_rate": 1.9915398637866615e-05, "loss": 0.8186, "step": 5102 }, { "epoch": 0.8331088526998898, "grad_norm": 1.655551791191101, "learning_rate": 1.991535744229891e-05, "loss": 0.7231, "step": 5103 }, { "epoch": 0.8332721113423942, "grad_norm": 1.872882604598999, "learning_rate": 1.9915316236746443e-05, "loss": 0.96, "step": 5104 }, { "epoch": 0.8334353699848985, "grad_norm": 2.03730845451355, "learning_rate": 1.9915275021209255e-05, "loss": 0.7652, "step": 5105 }, { "epoch": 0.833598628627403, "grad_norm": 1.7753304243087769, "learning_rate": 1.9915233795687388e-05, "loss": 0.6701, "step": 5106 }, { "epoch": 0.8337618872699073, "grad_norm": 2.580536365509033, "learning_rate": 1.991519256018088e-05, "loss": 0.8883, "step": 5107 }, { "epoch": 0.8339251459124117, "grad_norm": 2.025146007537842, "learning_rate": 1.991515131468978e-05, "loss": 0.8724, "step": 5108 }, { "epoch": 0.8340884045549162, "grad_norm": 1.9715062379837036, "learning_rate": 1.9915110059214124e-05, "loss": 0.9495, "step": 5109 }, { "epoch": 0.8342516631974205, "grad_norm": 1.925430178642273, "learning_rate": 1.9915068793753952e-05, "loss": 0.6901, "step": 5110 }, { "epoch": 0.8344149218399249, "grad_norm": 1.6726386547088623, "learning_rate": 1.9915027518309312e-05, "loss": 0.7338, "step": 5111 }, { "epoch": 0.8345781804824293, "grad_norm": 1.8051495552062988, "learning_rate": 1.991498623288024e-05, "loss": 0.7517, "step": 5112 }, { "epoch": 0.8347414391249337, "grad_norm": 1.6939828395843506, "learning_rate": 1.9914944937466784e-05, "loss": 0.7633, "step": 5113 }, { "epoch": 0.834904697767438, "grad_norm": 1.7901723384857178, "learning_rate": 1.9914903632068975e-05, "loss": 0.7185, "step": 5114 }, { "epoch": 0.8350679564099425, "grad_norm": 1.998066782951355, "learning_rate": 1.9914862316686863e-05, "loss": 0.7844, "step": 5115 }, { "epoch": 0.8352312150524468, "grad_norm": 1.554466724395752, "learning_rate": 1.9914820991320486e-05, "loss": 0.6126, "step": 5116 }, { "epoch": 0.8353944736949512, "grad_norm": 1.8795663118362427, "learning_rate": 1.991477965596989e-05, "loss": 0.9675, "step": 5117 }, { "epoch": 0.8355577323374556, "grad_norm": 1.7835410833358765, "learning_rate": 1.991473831063511e-05, "loss": 0.8659, "step": 5118 }, { "epoch": 0.83572099097996, "grad_norm": 2.07253098487854, "learning_rate": 1.9914696955316192e-05, "loss": 0.8165, "step": 5119 }, { "epoch": 0.8358842496224644, "grad_norm": 1.766620397567749, "learning_rate": 1.9914655590013177e-05, "loss": 0.7845, "step": 5120 }, { "epoch": 0.8360475082649688, "grad_norm": 2.0287749767303467, "learning_rate": 1.991461421472611e-05, "loss": 0.8063, "step": 5121 }, { "epoch": 0.8362107669074732, "grad_norm": 1.842419147491455, "learning_rate": 1.9914572829455022e-05, "loss": 0.7971, "step": 5122 }, { "epoch": 0.8363740255499775, "grad_norm": 2.0687546730041504, "learning_rate": 1.991453143419997e-05, "loss": 0.7034, "step": 5123 }, { "epoch": 0.836537284192482, "grad_norm": 1.5977431535720825, "learning_rate": 1.991449002896098e-05, "loss": 0.6752, "step": 5124 }, { "epoch": 0.8367005428349863, "grad_norm": 2.0692272186279297, "learning_rate": 1.9914448613738107e-05, "loss": 0.9777, "step": 5125 }, { "epoch": 0.8368638014774907, "grad_norm": 1.600207805633545, "learning_rate": 1.9914407188531383e-05, "loss": 0.6891, "step": 5126 }, { "epoch": 0.8370270601199951, "grad_norm": 1.544081211090088, "learning_rate": 1.9914365753340855e-05, "loss": 0.7329, "step": 5127 }, { "epoch": 0.8371903187624995, "grad_norm": 2.2848329544067383, "learning_rate": 1.9914324308166564e-05, "loss": 0.8599, "step": 5128 }, { "epoch": 0.8373535774050038, "grad_norm": 2.0703237056732178, "learning_rate": 1.9914282853008552e-05, "loss": 0.9249, "step": 5129 }, { "epoch": 0.8375168360475083, "grad_norm": 2.01664137840271, "learning_rate": 1.9914241387866858e-05, "loss": 0.7939, "step": 5130 }, { "epoch": 0.8376800946900127, "grad_norm": 1.5371699333190918, "learning_rate": 1.9914199912741522e-05, "loss": 0.6422, "step": 5131 }, { "epoch": 0.837843353332517, "grad_norm": 1.905791997909546, "learning_rate": 1.9914158427632595e-05, "loss": 0.7184, "step": 5132 }, { "epoch": 0.8380066119750215, "grad_norm": 1.9164390563964844, "learning_rate": 1.991411693254011e-05, "loss": 0.8402, "step": 5133 }, { "epoch": 0.8381698706175258, "grad_norm": 2.4472849369049072, "learning_rate": 1.9914075427464113e-05, "loss": 0.9627, "step": 5134 }, { "epoch": 0.8383331292600302, "grad_norm": 2.0392396450042725, "learning_rate": 1.9914033912404646e-05, "loss": 0.9576, "step": 5135 }, { "epoch": 0.8384963879025346, "grad_norm": 1.824885606765747, "learning_rate": 1.9913992387361747e-05, "loss": 0.658, "step": 5136 }, { "epoch": 0.838659646545039, "grad_norm": 1.7902811765670776, "learning_rate": 1.991395085233546e-05, "loss": 0.7542, "step": 5137 }, { "epoch": 0.8388229051875433, "grad_norm": 2.062926769256592, "learning_rate": 1.991390930732583e-05, "loss": 0.8996, "step": 5138 }, { "epoch": 0.8389861638300478, "grad_norm": 1.8213695287704468, "learning_rate": 1.9913867752332897e-05, "loss": 0.768, "step": 5139 }, { "epoch": 0.8391494224725521, "grad_norm": 1.50505793094635, "learning_rate": 1.99138261873567e-05, "loss": 0.7062, "step": 5140 }, { "epoch": 0.8393126811150565, "grad_norm": 1.6011627912521362, "learning_rate": 1.991378461239728e-05, "loss": 0.6055, "step": 5141 }, { "epoch": 0.839475939757561, "grad_norm": 1.6268272399902344, "learning_rate": 1.9913743027454686e-05, "loss": 0.5225, "step": 5142 }, { "epoch": 0.8396391984000653, "grad_norm": 2.160813331604004, "learning_rate": 1.991370143252895e-05, "loss": 0.8309, "step": 5143 }, { "epoch": 0.8398024570425697, "grad_norm": 1.8993743658065796, "learning_rate": 1.991365982762012e-05, "loss": 0.9248, "step": 5144 }, { "epoch": 0.839965715685074, "grad_norm": 1.8001459836959839, "learning_rate": 1.991361821272824e-05, "loss": 0.7515, "step": 5145 }, { "epoch": 0.8401289743275785, "grad_norm": 1.91147780418396, "learning_rate": 1.991357658785335e-05, "loss": 0.8288, "step": 5146 }, { "epoch": 0.8402922329700828, "grad_norm": 1.7198907136917114, "learning_rate": 1.991353495299549e-05, "loss": 0.7039, "step": 5147 }, { "epoch": 0.8404554916125873, "grad_norm": 1.782019019126892, "learning_rate": 1.9913493308154702e-05, "loss": 0.7034, "step": 5148 }, { "epoch": 0.8406187502550916, "grad_norm": 1.6733627319335938, "learning_rate": 1.9913451653331028e-05, "loss": 0.6633, "step": 5149 }, { "epoch": 0.840782008897596, "grad_norm": 1.7069876194000244, "learning_rate": 1.991340998852451e-05, "loss": 0.8409, "step": 5150 }, { "epoch": 0.8409452675401003, "grad_norm": 2.3522868156433105, "learning_rate": 1.9913368313735194e-05, "loss": 0.9128, "step": 5151 }, { "epoch": 0.8411085261826048, "grad_norm": 2.088667154312134, "learning_rate": 1.9913326628963118e-05, "loss": 0.819, "step": 5152 }, { "epoch": 0.8412717848251092, "grad_norm": 2.356287717819214, "learning_rate": 1.9913284934208325e-05, "loss": 0.9845, "step": 5153 }, { "epoch": 0.8414350434676136, "grad_norm": 2.047569751739502, "learning_rate": 1.9913243229470857e-05, "loss": 0.7949, "step": 5154 }, { "epoch": 0.841598302110118, "grad_norm": 2.131227493286133, "learning_rate": 1.9913201514750752e-05, "loss": 0.8029, "step": 5155 }, { "epoch": 0.8417615607526223, "grad_norm": 1.8882328271865845, "learning_rate": 1.991315979004806e-05, "loss": 0.7818, "step": 5156 }, { "epoch": 0.8419248193951268, "grad_norm": 2.1638076305389404, "learning_rate": 1.991311805536282e-05, "loss": 0.9051, "step": 5157 }, { "epoch": 0.8420880780376311, "grad_norm": 1.791635274887085, "learning_rate": 1.9913076310695068e-05, "loss": 0.9033, "step": 5158 }, { "epoch": 0.8422513366801355, "grad_norm": 1.9655120372772217, "learning_rate": 1.991303455604485e-05, "loss": 0.8673, "step": 5159 }, { "epoch": 0.8424145953226398, "grad_norm": 1.9005126953125, "learning_rate": 1.9912992791412212e-05, "loss": 0.8878, "step": 5160 }, { "epoch": 0.8425778539651443, "grad_norm": 1.7389261722564697, "learning_rate": 1.9912951016797195e-05, "loss": 0.8836, "step": 5161 }, { "epoch": 0.8427411126076486, "grad_norm": 1.581194519996643, "learning_rate": 1.9912909232199834e-05, "loss": 0.6804, "step": 5162 }, { "epoch": 0.842904371250153, "grad_norm": 1.9976918697357178, "learning_rate": 1.991286743762018e-05, "loss": 0.7426, "step": 5163 }, { "epoch": 0.8430676298926575, "grad_norm": 1.8329269886016846, "learning_rate": 1.991282563305827e-05, "loss": 0.7062, "step": 5164 }, { "epoch": 0.8432308885351618, "grad_norm": 1.6030261516571045, "learning_rate": 1.9912783818514144e-05, "loss": 0.6584, "step": 5165 }, { "epoch": 0.8433941471776663, "grad_norm": 1.8029874563217163, "learning_rate": 1.9912741993987853e-05, "loss": 0.7975, "step": 5166 }, { "epoch": 0.8435574058201706, "grad_norm": 2.0740880966186523, "learning_rate": 1.9912700159479428e-05, "loss": 0.8719, "step": 5167 }, { "epoch": 0.843720664462675, "grad_norm": 1.8785903453826904, "learning_rate": 1.991265831498892e-05, "loss": 0.7047, "step": 5168 }, { "epoch": 0.8438839231051793, "grad_norm": 2.018407106399536, "learning_rate": 1.9912616460516364e-05, "loss": 0.9659, "step": 5169 }, { "epoch": 0.8440471817476838, "grad_norm": 1.6844274997711182, "learning_rate": 1.991257459606181e-05, "loss": 0.6877, "step": 5170 }, { "epoch": 0.8442104403901881, "grad_norm": 1.733445167541504, "learning_rate": 1.9912532721625295e-05, "loss": 0.7463, "step": 5171 }, { "epoch": 0.8443736990326925, "grad_norm": 1.9155004024505615, "learning_rate": 1.9912490837206862e-05, "loss": 0.7527, "step": 5172 }, { "epoch": 0.8445369576751969, "grad_norm": 1.95585036277771, "learning_rate": 1.9912448942806553e-05, "loss": 0.7729, "step": 5173 }, { "epoch": 0.8447002163177013, "grad_norm": 1.7079719305038452, "learning_rate": 1.991240703842441e-05, "loss": 0.628, "step": 5174 }, { "epoch": 0.8448634749602058, "grad_norm": 1.6530548334121704, "learning_rate": 1.9912365124060478e-05, "loss": 0.7373, "step": 5175 }, { "epoch": 0.8450267336027101, "grad_norm": 2.1432971954345703, "learning_rate": 1.9912323199714796e-05, "loss": 0.9194, "step": 5176 }, { "epoch": 0.8451899922452145, "grad_norm": 1.7289494276046753, "learning_rate": 1.9912281265387407e-05, "loss": 0.6558, "step": 5177 }, { "epoch": 0.8453532508877188, "grad_norm": 2.2957234382629395, "learning_rate": 1.9912239321078354e-05, "loss": 0.8881, "step": 5178 }, { "epoch": 0.8455165095302233, "grad_norm": 2.081101894378662, "learning_rate": 1.9912197366787676e-05, "loss": 0.8361, "step": 5179 }, { "epoch": 0.8456797681727276, "grad_norm": 1.7224607467651367, "learning_rate": 1.991215540251542e-05, "loss": 0.7167, "step": 5180 }, { "epoch": 0.845843026815232, "grad_norm": 1.725548267364502, "learning_rate": 1.9912113428261624e-05, "loss": 0.5677, "step": 5181 }, { "epoch": 0.8460062854577364, "grad_norm": 2.104229211807251, "learning_rate": 1.9912071444026332e-05, "loss": 0.9158, "step": 5182 }, { "epoch": 0.8461695441002408, "grad_norm": 1.826003909111023, "learning_rate": 1.991202944980959e-05, "loss": 0.7274, "step": 5183 }, { "epoch": 0.8463328027427452, "grad_norm": 1.8764857053756714, "learning_rate": 1.991198744561144e-05, "loss": 0.722, "step": 5184 }, { "epoch": 0.8464960613852496, "grad_norm": 1.919197916984558, "learning_rate": 1.9911945431431913e-05, "loss": 0.7334, "step": 5185 }, { "epoch": 0.846659320027754, "grad_norm": 1.8895585536956787, "learning_rate": 1.9911903407271062e-05, "loss": 0.733, "step": 5186 }, { "epoch": 0.8468225786702583, "grad_norm": 1.8721678256988525, "learning_rate": 1.991186137312893e-05, "loss": 0.8117, "step": 5187 }, { "epoch": 0.8469858373127628, "grad_norm": 1.9103262424468994, "learning_rate": 1.9911819329005554e-05, "loss": 0.828, "step": 5188 }, { "epoch": 0.8471490959552671, "grad_norm": 1.7443757057189941, "learning_rate": 1.991177727490098e-05, "loss": 0.7869, "step": 5189 }, { "epoch": 0.8473123545977715, "grad_norm": 2.033560276031494, "learning_rate": 1.991173521081525e-05, "loss": 0.8675, "step": 5190 }, { "epoch": 0.8474756132402759, "grad_norm": 2.1890907287597656, "learning_rate": 1.9911693136748403e-05, "loss": 0.7402, "step": 5191 }, { "epoch": 0.8476388718827803, "grad_norm": 1.9003220796585083, "learning_rate": 1.9911651052700483e-05, "loss": 0.741, "step": 5192 }, { "epoch": 0.8478021305252846, "grad_norm": 1.8407002687454224, "learning_rate": 1.9911608958671533e-05, "loss": 0.7796, "step": 5193 }, { "epoch": 0.8479653891677891, "grad_norm": 2.300396680831909, "learning_rate": 1.9911566854661598e-05, "loss": 0.7326, "step": 5194 }, { "epoch": 0.8481286478102935, "grad_norm": 1.9121159315109253, "learning_rate": 1.9911524740670715e-05, "loss": 0.6953, "step": 5195 }, { "epoch": 0.8482919064527978, "grad_norm": 1.9805223941802979, "learning_rate": 1.991148261669893e-05, "loss": 0.8407, "step": 5196 }, { "epoch": 0.8484551650953023, "grad_norm": 1.764054775238037, "learning_rate": 1.9911440482746286e-05, "loss": 0.7854, "step": 5197 }, { "epoch": 0.8486184237378066, "grad_norm": 1.6168181896209717, "learning_rate": 1.9911398338812825e-05, "loss": 0.7899, "step": 5198 }, { "epoch": 0.848781682380311, "grad_norm": 1.5975042581558228, "learning_rate": 1.9911356184898585e-05, "loss": 0.6038, "step": 5199 }, { "epoch": 0.8489449410228154, "grad_norm": 1.6113570928573608, "learning_rate": 1.9911314021003614e-05, "loss": 0.7886, "step": 5200 }, { "epoch": 0.8491081996653198, "grad_norm": 2.0364503860473633, "learning_rate": 1.991127184712795e-05, "loss": 0.9071, "step": 5201 }, { "epoch": 0.8492714583078241, "grad_norm": 2.098405122756958, "learning_rate": 1.991122966327164e-05, "loss": 0.9129, "step": 5202 }, { "epoch": 0.8494347169503286, "grad_norm": 2.038407325744629, "learning_rate": 1.9911187469434724e-05, "loss": 0.9639, "step": 5203 }, { "epoch": 0.8495979755928329, "grad_norm": 1.9227811098098755, "learning_rate": 1.9911145265617246e-05, "loss": 0.8318, "step": 5204 }, { "epoch": 0.8497612342353373, "grad_norm": 1.8247493505477905, "learning_rate": 1.9911103051819248e-05, "loss": 0.797, "step": 5205 }, { "epoch": 0.8499244928778418, "grad_norm": 1.9497145414352417, "learning_rate": 1.9911060828040768e-05, "loss": 0.8014, "step": 5206 }, { "epoch": 0.8500877515203461, "grad_norm": 2.2105038166046143, "learning_rate": 1.9911018594281855e-05, "loss": 0.8664, "step": 5207 }, { "epoch": 0.8502510101628505, "grad_norm": 1.912534236907959, "learning_rate": 1.991097635054255e-05, "loss": 0.7798, "step": 5208 }, { "epoch": 0.8504142688053549, "grad_norm": 1.8173553943634033, "learning_rate": 1.9910934096822895e-05, "loss": 0.7025, "step": 5209 }, { "epoch": 0.8505775274478593, "grad_norm": 2.2416248321533203, "learning_rate": 1.9910891833122926e-05, "loss": 0.7768, "step": 5210 }, { "epoch": 0.8507407860903636, "grad_norm": 1.8488212823867798, "learning_rate": 1.9910849559442697e-05, "loss": 0.766, "step": 5211 }, { "epoch": 0.8509040447328681, "grad_norm": 1.8061200380325317, "learning_rate": 1.9910807275782244e-05, "loss": 0.8663, "step": 5212 }, { "epoch": 0.8510673033753724, "grad_norm": 1.764367938041687, "learning_rate": 1.991076498214161e-05, "loss": 0.7467, "step": 5213 }, { "epoch": 0.8512305620178768, "grad_norm": 1.757871389389038, "learning_rate": 1.991072267852084e-05, "loss": 0.8953, "step": 5214 }, { "epoch": 0.8513938206603812, "grad_norm": 1.8776533603668213, "learning_rate": 1.9910680364919975e-05, "loss": 0.8229, "step": 5215 }, { "epoch": 0.8515570793028856, "grad_norm": 1.6906567811965942, "learning_rate": 1.9910638041339053e-05, "loss": 0.7582, "step": 5216 }, { "epoch": 0.85172033794539, "grad_norm": 1.802294135093689, "learning_rate": 1.9910595707778127e-05, "loss": 0.9735, "step": 5217 }, { "epoch": 0.8518835965878944, "grad_norm": 2.0317392349243164, "learning_rate": 1.9910553364237232e-05, "loss": 0.7421, "step": 5218 }, { "epoch": 0.8520468552303988, "grad_norm": 2.0111281871795654, "learning_rate": 1.9910511010716405e-05, "loss": 0.8753, "step": 5219 }, { "epoch": 0.8522101138729031, "grad_norm": 1.837296485900879, "learning_rate": 1.9910468647215708e-05, "loss": 0.7092, "step": 5220 }, { "epoch": 0.8523733725154076, "grad_norm": 1.9931890964508057, "learning_rate": 1.9910426273735163e-05, "loss": 0.8305, "step": 5221 }, { "epoch": 0.8525366311579119, "grad_norm": 1.766592264175415, "learning_rate": 1.9910383890274825e-05, "loss": 0.7077, "step": 5222 }, { "epoch": 0.8526998898004163, "grad_norm": 2.0314688682556152, "learning_rate": 1.9910341496834734e-05, "loss": 0.7143, "step": 5223 }, { "epoch": 0.8528631484429207, "grad_norm": 1.7196743488311768, "learning_rate": 1.991029909341493e-05, "loss": 0.7315, "step": 5224 }, { "epoch": 0.8530264070854251, "grad_norm": 1.6130890846252441, "learning_rate": 1.991025668001546e-05, "loss": 0.7469, "step": 5225 }, { "epoch": 0.8531896657279294, "grad_norm": 2.028916120529175, "learning_rate": 1.991021425663636e-05, "loss": 0.7618, "step": 5226 }, { "epoch": 0.8533529243704339, "grad_norm": 1.955233097076416, "learning_rate": 1.9910171823277676e-05, "loss": 0.8082, "step": 5227 }, { "epoch": 0.8535161830129383, "grad_norm": 1.7255843877792358, "learning_rate": 1.9910129379939455e-05, "loss": 0.6573, "step": 5228 }, { "epoch": 0.8536794416554426, "grad_norm": 1.9246351718902588, "learning_rate": 1.9910086926621738e-05, "loss": 0.7637, "step": 5229 }, { "epoch": 0.8538427002979471, "grad_norm": 1.8635032176971436, "learning_rate": 1.9910044463324564e-05, "loss": 0.7794, "step": 5230 }, { "epoch": 0.8540059589404514, "grad_norm": 1.5943204164505005, "learning_rate": 1.9910001990047976e-05, "loss": 0.7402, "step": 5231 }, { "epoch": 0.8541692175829558, "grad_norm": 1.635206937789917, "learning_rate": 1.9909959506792022e-05, "loss": 0.6256, "step": 5232 }, { "epoch": 0.8543324762254602, "grad_norm": 1.894118070602417, "learning_rate": 1.990991701355674e-05, "loss": 0.8568, "step": 5233 }, { "epoch": 0.8544957348679646, "grad_norm": 2.069542169570923, "learning_rate": 1.990987451034217e-05, "loss": 0.9139, "step": 5234 }, { "epoch": 0.8546589935104689, "grad_norm": 1.9148963689804077, "learning_rate": 1.9909831997148363e-05, "loss": 0.7141, "step": 5235 }, { "epoch": 0.8548222521529734, "grad_norm": 1.4539021253585815, "learning_rate": 1.9909789473975358e-05, "loss": 0.5316, "step": 5236 }, { "epoch": 0.8549855107954777, "grad_norm": 2.009082078933716, "learning_rate": 1.9909746940823197e-05, "loss": 0.724, "step": 5237 }, { "epoch": 0.8551487694379821, "grad_norm": 1.810773253440857, "learning_rate": 1.9909704397691924e-05, "loss": 0.7616, "step": 5238 }, { "epoch": 0.8553120280804866, "grad_norm": 1.774091362953186, "learning_rate": 1.990966184458158e-05, "loss": 0.7153, "step": 5239 }, { "epoch": 0.8554752867229909, "grad_norm": 2.0093867778778076, "learning_rate": 1.990961928149221e-05, "loss": 0.8105, "step": 5240 }, { "epoch": 0.8556385453654953, "grad_norm": 1.7953866720199585, "learning_rate": 1.9909576708423857e-05, "loss": 0.6886, "step": 5241 }, { "epoch": 0.8558018040079997, "grad_norm": 1.8970423936843872, "learning_rate": 1.9909534125376563e-05, "loss": 0.8574, "step": 5242 }, { "epoch": 0.8559650626505041, "grad_norm": 1.8539543151855469, "learning_rate": 1.990949153235037e-05, "loss": 0.7792, "step": 5243 }, { "epoch": 0.8561283212930084, "grad_norm": 1.7712501287460327, "learning_rate": 1.990944892934532e-05, "loss": 0.5931, "step": 5244 }, { "epoch": 0.8562915799355129, "grad_norm": 1.84429132938385, "learning_rate": 1.990940631636146e-05, "loss": 0.6989, "step": 5245 }, { "epoch": 0.8564548385780172, "grad_norm": 1.8634265661239624, "learning_rate": 1.9909363693398828e-05, "loss": 0.7841, "step": 5246 }, { "epoch": 0.8566180972205216, "grad_norm": 1.753777265548706, "learning_rate": 1.9909321060457475e-05, "loss": 0.6982, "step": 5247 }, { "epoch": 0.856781355863026, "grad_norm": 1.595438838005066, "learning_rate": 1.9909278417537433e-05, "loss": 0.6368, "step": 5248 }, { "epoch": 0.8569446145055304, "grad_norm": 2.764350414276123, "learning_rate": 1.990923576463875e-05, "loss": 0.9282, "step": 5249 }, { "epoch": 0.8571078731480348, "grad_norm": 1.5892215967178345, "learning_rate": 1.990919310176147e-05, "loss": 0.7189, "step": 5250 }, { "epoch": 0.8572711317905392, "grad_norm": 2.046966552734375, "learning_rate": 1.990915042890564e-05, "loss": 0.6612, "step": 5251 }, { "epoch": 0.8574343904330436, "grad_norm": 2.2284348011016846, "learning_rate": 1.9909107746071294e-05, "loss": 0.8726, "step": 5252 }, { "epoch": 0.8575976490755479, "grad_norm": 1.8981080055236816, "learning_rate": 1.9909065053258477e-05, "loss": 0.7297, "step": 5253 }, { "epoch": 0.8577609077180524, "grad_norm": 2.076026201248169, "learning_rate": 1.9909022350467236e-05, "loss": 0.8593, "step": 5254 }, { "epoch": 0.8579241663605567, "grad_norm": 2.0104031562805176, "learning_rate": 1.9908979637697612e-05, "loss": 0.7239, "step": 5255 }, { "epoch": 0.8580874250030611, "grad_norm": 1.6701771020889282, "learning_rate": 1.990893691494965e-05, "loss": 0.7287, "step": 5256 }, { "epoch": 0.8582506836455654, "grad_norm": 1.668904185295105, "learning_rate": 1.990889418222339e-05, "loss": 0.6737, "step": 5257 }, { "epoch": 0.8584139422880699, "grad_norm": 1.901370882987976, "learning_rate": 1.9908851439518875e-05, "loss": 0.6895, "step": 5258 }, { "epoch": 0.8585772009305742, "grad_norm": 2.0240578651428223, "learning_rate": 1.990880868683615e-05, "loss": 0.9721, "step": 5259 }, { "epoch": 0.8587404595730787, "grad_norm": 1.9102860689163208, "learning_rate": 1.9908765924175256e-05, "loss": 0.7693, "step": 5260 }, { "epoch": 0.8589037182155831, "grad_norm": 2.2018327713012695, "learning_rate": 1.990872315153624e-05, "loss": 0.9316, "step": 5261 }, { "epoch": 0.8590669768580874, "grad_norm": 2.16581130027771, "learning_rate": 1.990868036891914e-05, "loss": 0.8588, "step": 5262 }, { "epoch": 0.8592302355005919, "grad_norm": 2.003629207611084, "learning_rate": 1.9908637576324005e-05, "loss": 0.8266, "step": 5263 }, { "epoch": 0.8593934941430962, "grad_norm": 2.2723209857940674, "learning_rate": 1.990859477375087e-05, "loss": 0.7091, "step": 5264 }, { "epoch": 0.8595567527856006, "grad_norm": 1.9962843656539917, "learning_rate": 1.9908551961199785e-05, "loss": 0.7861, "step": 5265 }, { "epoch": 0.859720011428105, "grad_norm": 1.9620423316955566, "learning_rate": 1.990850913867079e-05, "loss": 0.8081, "step": 5266 }, { "epoch": 0.8598832700706094, "grad_norm": 1.8551784753799438, "learning_rate": 1.990846630616393e-05, "loss": 0.8188, "step": 5267 }, { "epoch": 0.8600465287131137, "grad_norm": 1.8519151210784912, "learning_rate": 1.9908423463679246e-05, "loss": 0.7708, "step": 5268 }, { "epoch": 0.8602097873556181, "grad_norm": 1.7020376920700073, "learning_rate": 1.9908380611216785e-05, "loss": 0.6531, "step": 5269 }, { "epoch": 0.8603730459981225, "grad_norm": 1.87134850025177, "learning_rate": 1.9908337748776585e-05, "loss": 0.7429, "step": 5270 }, { "epoch": 0.8605363046406269, "grad_norm": 2.0105204582214355, "learning_rate": 1.9908294876358692e-05, "loss": 0.7262, "step": 5271 }, { "epoch": 0.8606995632831314, "grad_norm": 2.0024731159210205, "learning_rate": 1.9908251993963148e-05, "loss": 0.8756, "step": 5272 }, { "epoch": 0.8608628219256357, "grad_norm": 1.6941802501678467, "learning_rate": 1.9908209101589996e-05, "loss": 0.7362, "step": 5273 }, { "epoch": 0.8610260805681401, "grad_norm": 1.6821627616882324, "learning_rate": 1.9908166199239284e-05, "loss": 0.7683, "step": 5274 }, { "epoch": 0.8611893392106444, "grad_norm": 1.6837925910949707, "learning_rate": 1.990812328691105e-05, "loss": 0.7419, "step": 5275 }, { "epoch": 0.8613525978531489, "grad_norm": 1.6824650764465332, "learning_rate": 1.9908080364605334e-05, "loss": 0.7107, "step": 5276 }, { "epoch": 0.8615158564956532, "grad_norm": 1.8458701372146606, "learning_rate": 1.990803743232219e-05, "loss": 0.8471, "step": 5277 }, { "epoch": 0.8616791151381576, "grad_norm": 1.8365248441696167, "learning_rate": 1.990799449006165e-05, "loss": 0.6981, "step": 5278 }, { "epoch": 0.861842373780662, "grad_norm": 1.8982356786727905, "learning_rate": 1.9907951537823762e-05, "loss": 0.896, "step": 5279 }, { "epoch": 0.8620056324231664, "grad_norm": 2.1281328201293945, "learning_rate": 1.9907908575608573e-05, "loss": 0.8622, "step": 5280 }, { "epoch": 0.8621688910656707, "grad_norm": 1.3575146198272705, "learning_rate": 1.990786560341612e-05, "loss": 0.5932, "step": 5281 }, { "epoch": 0.8623321497081752, "grad_norm": 1.8779218196868896, "learning_rate": 1.9907822621246452e-05, "loss": 0.9159, "step": 5282 }, { "epoch": 0.8624954083506796, "grad_norm": 1.809924602508545, "learning_rate": 1.9907779629099605e-05, "loss": 0.7411, "step": 5283 }, { "epoch": 0.8626586669931839, "grad_norm": 1.6161234378814697, "learning_rate": 1.990773662697563e-05, "loss": 0.757, "step": 5284 }, { "epoch": 0.8628219256356884, "grad_norm": 1.9763113260269165, "learning_rate": 1.9907693614874566e-05, "loss": 0.6941, "step": 5285 }, { "epoch": 0.8629851842781927, "grad_norm": 1.9395538568496704, "learning_rate": 1.9907650592796454e-05, "loss": 0.7952, "step": 5286 }, { "epoch": 0.8631484429206971, "grad_norm": 1.9521416425704956, "learning_rate": 1.9907607560741345e-05, "loss": 0.7401, "step": 5287 }, { "epoch": 0.8633117015632015, "grad_norm": 1.8089687824249268, "learning_rate": 1.9907564518709276e-05, "loss": 0.7943, "step": 5288 }, { "epoch": 0.8634749602057059, "grad_norm": 1.4096678495407104, "learning_rate": 1.9907521466700293e-05, "loss": 0.5581, "step": 5289 }, { "epoch": 0.8636382188482102, "grad_norm": 1.72004234790802, "learning_rate": 1.9907478404714438e-05, "loss": 0.783, "step": 5290 }, { "epoch": 0.8638014774907147, "grad_norm": 1.8231446743011475, "learning_rate": 1.9907435332751754e-05, "loss": 0.8052, "step": 5291 }, { "epoch": 0.863964736133219, "grad_norm": 2.055612087249756, "learning_rate": 1.9907392250812287e-05, "loss": 0.7756, "step": 5292 }, { "epoch": 0.8641279947757234, "grad_norm": 1.622261643409729, "learning_rate": 1.9907349158896075e-05, "loss": 0.7184, "step": 5293 }, { "epoch": 0.8642912534182279, "grad_norm": 1.9895159006118774, "learning_rate": 1.9907306057003167e-05, "loss": 0.8028, "step": 5294 }, { "epoch": 0.8644545120607322, "grad_norm": 2.29270076751709, "learning_rate": 1.9907262945133607e-05, "loss": 0.8341, "step": 5295 }, { "epoch": 0.8646177707032366, "grad_norm": 1.69044828414917, "learning_rate": 1.9907219823287436e-05, "loss": 0.7895, "step": 5296 }, { "epoch": 0.864781029345741, "grad_norm": 1.723405122756958, "learning_rate": 1.9907176691464693e-05, "loss": 0.8586, "step": 5297 }, { "epoch": 0.8649442879882454, "grad_norm": 2.107980251312256, "learning_rate": 1.990713354966543e-05, "loss": 0.7913, "step": 5298 }, { "epoch": 0.8651075466307497, "grad_norm": 1.6097900867462158, "learning_rate": 1.9907090397889682e-05, "loss": 0.7168, "step": 5299 }, { "epoch": 0.8652708052732542, "grad_norm": 1.8492448329925537, "learning_rate": 1.99070472361375e-05, "loss": 0.629, "step": 5300 }, { "epoch": 0.8654340639157585, "grad_norm": 2.362110137939453, "learning_rate": 1.9907004064408923e-05, "loss": 0.7905, "step": 5301 }, { "epoch": 0.8655973225582629, "grad_norm": 1.621491551399231, "learning_rate": 1.9906960882703993e-05, "loss": 0.741, "step": 5302 }, { "epoch": 0.8657605812007673, "grad_norm": 1.8737967014312744, "learning_rate": 1.9906917691022757e-05, "loss": 0.7164, "step": 5303 }, { "epoch": 0.8659238398432717, "grad_norm": 1.8647116422653198, "learning_rate": 1.990687448936526e-05, "loss": 0.704, "step": 5304 }, { "epoch": 0.8660870984857761, "grad_norm": 1.8268877267837524, "learning_rate": 1.990683127773154e-05, "loss": 0.6797, "step": 5305 }, { "epoch": 0.8662503571282805, "grad_norm": 2.1025314331054688, "learning_rate": 1.9906788056121646e-05, "loss": 0.8211, "step": 5306 }, { "epoch": 0.8664136157707849, "grad_norm": 1.8377305269241333, "learning_rate": 1.990674482453562e-05, "loss": 0.7841, "step": 5307 }, { "epoch": 0.8665768744132892, "grad_norm": 1.6889852285385132, "learning_rate": 1.9906701582973502e-05, "loss": 0.7654, "step": 5308 }, { "epoch": 0.8667401330557937, "grad_norm": 1.8103327751159668, "learning_rate": 1.9906658331435338e-05, "loss": 0.5858, "step": 5309 }, { "epoch": 0.866903391698298, "grad_norm": 1.8921058177947998, "learning_rate": 1.9906615069921175e-05, "loss": 0.8204, "step": 5310 }, { "epoch": 0.8670666503408024, "grad_norm": 1.5741298198699951, "learning_rate": 1.990657179843105e-05, "loss": 0.6562, "step": 5311 }, { "epoch": 0.8672299089833068, "grad_norm": 1.8704915046691895, "learning_rate": 1.990652851696501e-05, "loss": 0.79, "step": 5312 }, { "epoch": 0.8673931676258112, "grad_norm": 1.824502944946289, "learning_rate": 1.99064852255231e-05, "loss": 0.8256, "step": 5313 }, { "epoch": 0.8675564262683155, "grad_norm": 1.771916151046753, "learning_rate": 1.990644192410536e-05, "loss": 0.7333, "step": 5314 }, { "epoch": 0.86771968491082, "grad_norm": 2.2634685039520264, "learning_rate": 1.9906398612711837e-05, "loss": 0.7429, "step": 5315 }, { "epoch": 0.8678829435533244, "grad_norm": 2.1489388942718506, "learning_rate": 1.9906355291342573e-05, "loss": 0.811, "step": 5316 }, { "epoch": 0.8680462021958287, "grad_norm": 1.9035115242004395, "learning_rate": 1.9906311959997614e-05, "loss": 0.7779, "step": 5317 }, { "epoch": 0.8682094608383332, "grad_norm": 1.9477570056915283, "learning_rate": 1.9906268618676997e-05, "loss": 1.0146, "step": 5318 }, { "epoch": 0.8683727194808375, "grad_norm": 2.302804470062256, "learning_rate": 1.9906225267380774e-05, "loss": 0.9743, "step": 5319 }, { "epoch": 0.8685359781233419, "grad_norm": 1.984257698059082, "learning_rate": 1.9906181906108983e-05, "loss": 0.9706, "step": 5320 }, { "epoch": 0.8686992367658463, "grad_norm": 2.1444292068481445, "learning_rate": 1.990613853486167e-05, "loss": 1.0123, "step": 5321 }, { "epoch": 0.8688624954083507, "grad_norm": 1.9393644332885742, "learning_rate": 1.9906095153638877e-05, "loss": 0.7344, "step": 5322 }, { "epoch": 0.869025754050855, "grad_norm": 1.7750895023345947, "learning_rate": 1.990605176244065e-05, "loss": 0.6801, "step": 5323 }, { "epoch": 0.8691890126933595, "grad_norm": 1.9480575323104858, "learning_rate": 1.990600836126703e-05, "loss": 0.6649, "step": 5324 }, { "epoch": 0.8693522713358638, "grad_norm": 1.7845743894577026, "learning_rate": 1.9905964950118063e-05, "loss": 0.7911, "step": 5325 }, { "epoch": 0.8695155299783682, "grad_norm": 1.681143879890442, "learning_rate": 1.9905921528993796e-05, "loss": 0.8538, "step": 5326 }, { "epoch": 0.8696787886208727, "grad_norm": 1.5916496515274048, "learning_rate": 1.9905878097894263e-05, "loss": 0.7182, "step": 5327 }, { "epoch": 0.869842047263377, "grad_norm": 1.7842963933944702, "learning_rate": 1.9905834656819513e-05, "loss": 0.7537, "step": 5328 }, { "epoch": 0.8700053059058814, "grad_norm": 1.7643011808395386, "learning_rate": 1.9905791205769596e-05, "loss": 0.6886, "step": 5329 }, { "epoch": 0.8701685645483858, "grad_norm": 1.9059230089187622, "learning_rate": 1.9905747744744547e-05, "loss": 0.8013, "step": 5330 }, { "epoch": 0.8703318231908902, "grad_norm": 1.6504101753234863, "learning_rate": 1.990570427374441e-05, "loss": 0.6243, "step": 5331 }, { "epoch": 0.8704950818333945, "grad_norm": 1.8690545558929443, "learning_rate": 1.9905660792769234e-05, "loss": 0.7277, "step": 5332 }, { "epoch": 0.870658340475899, "grad_norm": 1.6733988523483276, "learning_rate": 1.9905617301819057e-05, "loss": 0.7448, "step": 5333 }, { "epoch": 0.8708215991184033, "grad_norm": 2.0021817684173584, "learning_rate": 1.990557380089393e-05, "loss": 1.0192, "step": 5334 }, { "epoch": 0.8709848577609077, "grad_norm": 2.192671298980713, "learning_rate": 1.990553028999389e-05, "loss": 0.7175, "step": 5335 }, { "epoch": 0.871148116403412, "grad_norm": 1.4054670333862305, "learning_rate": 1.9905486769118987e-05, "loss": 0.5802, "step": 5336 }, { "epoch": 0.8713113750459165, "grad_norm": 1.6612898111343384, "learning_rate": 1.990544323826926e-05, "loss": 0.7542, "step": 5337 }, { "epoch": 0.8714746336884209, "grad_norm": 1.7611727714538574, "learning_rate": 1.990539969744475e-05, "loss": 0.8509, "step": 5338 }, { "epoch": 0.8716378923309253, "grad_norm": 1.9242517948150635, "learning_rate": 1.9905356146645514e-05, "loss": 0.7966, "step": 5339 }, { "epoch": 0.8718011509734297, "grad_norm": 2.21624493598938, "learning_rate": 1.990531258587158e-05, "loss": 0.9661, "step": 5340 }, { "epoch": 0.871964409615934, "grad_norm": 1.9838974475860596, "learning_rate": 1.9905269015123e-05, "loss": 0.9176, "step": 5341 }, { "epoch": 0.8721276682584385, "grad_norm": 1.858581304550171, "learning_rate": 1.990522543439982e-05, "loss": 0.6789, "step": 5342 }, { "epoch": 0.8722909269009428, "grad_norm": 1.9639451503753662, "learning_rate": 1.9905181843702073e-05, "loss": 0.8564, "step": 5343 }, { "epoch": 0.8724541855434472, "grad_norm": 2.051272392272949, "learning_rate": 1.990513824302982e-05, "loss": 0.8163, "step": 5344 }, { "epoch": 0.8726174441859516, "grad_norm": 1.8524880409240723, "learning_rate": 1.990509463238309e-05, "loss": 0.8013, "step": 5345 }, { "epoch": 0.872780702828456, "grad_norm": 1.9199869632720947, "learning_rate": 1.9905051011761933e-05, "loss": 0.726, "step": 5346 }, { "epoch": 0.8729439614709603, "grad_norm": 2.0056276321411133, "learning_rate": 1.9905007381166394e-05, "loss": 1.2549, "step": 5347 }, { "epoch": 0.8731072201134648, "grad_norm": 2.1515982151031494, "learning_rate": 1.9904963740596514e-05, "loss": 0.8041, "step": 5348 }, { "epoch": 0.8732704787559692, "grad_norm": 2.007981538772583, "learning_rate": 1.9904920090052336e-05, "loss": 0.8187, "step": 5349 }, { "epoch": 0.8734337373984735, "grad_norm": 1.9774667024612427, "learning_rate": 1.9904876429533912e-05, "loss": 0.6288, "step": 5350 }, { "epoch": 0.873596996040978, "grad_norm": 1.6390999555587769, "learning_rate": 1.990483275904127e-05, "loss": 0.6677, "step": 5351 }, { "epoch": 0.8737602546834823, "grad_norm": 1.9435499906539917, "learning_rate": 1.9904789078574472e-05, "loss": 0.7302, "step": 5352 }, { "epoch": 0.8739235133259867, "grad_norm": 1.7114388942718506, "learning_rate": 1.9904745388133552e-05, "loss": 0.8115, "step": 5353 }, { "epoch": 0.874086771968491, "grad_norm": 2.0061943531036377, "learning_rate": 1.9904701687718558e-05, "loss": 0.9039, "step": 5354 }, { "epoch": 0.8742500306109955, "grad_norm": 1.8601288795471191, "learning_rate": 1.990465797732953e-05, "loss": 0.9263, "step": 5355 }, { "epoch": 0.8744132892534998, "grad_norm": 2.5641002655029297, "learning_rate": 1.9904614256966514e-05, "loss": 0.7432, "step": 5356 }, { "epoch": 0.8745765478960043, "grad_norm": 2.044412136077881, "learning_rate": 1.9904570526629556e-05, "loss": 0.7277, "step": 5357 }, { "epoch": 0.8747398065385086, "grad_norm": 2.10294508934021, "learning_rate": 1.9904526786318693e-05, "loss": 0.8701, "step": 5358 }, { "epoch": 0.874903065181013, "grad_norm": 1.6787285804748535, "learning_rate": 1.990448303603398e-05, "loss": 0.6683, "step": 5359 }, { "epoch": 0.8750663238235175, "grad_norm": 1.6135354042053223, "learning_rate": 1.990443927577545e-05, "loss": 0.7424, "step": 5360 }, { "epoch": 0.8752295824660218, "grad_norm": 1.7826166152954102, "learning_rate": 1.9904395505543156e-05, "loss": 0.7811, "step": 5361 }, { "epoch": 0.8753928411085262, "grad_norm": 1.9096801280975342, "learning_rate": 1.990435172533714e-05, "loss": 0.6655, "step": 5362 }, { "epoch": 0.8755560997510305, "grad_norm": 1.8651432991027832, "learning_rate": 1.990430793515744e-05, "loss": 0.8022, "step": 5363 }, { "epoch": 0.875719358393535, "grad_norm": 1.912885069847107, "learning_rate": 1.990426413500411e-05, "loss": 0.9028, "step": 5364 }, { "epoch": 0.8758826170360393, "grad_norm": 1.8068113327026367, "learning_rate": 1.9904220324877183e-05, "loss": 0.7282, "step": 5365 }, { "epoch": 0.8760458756785438, "grad_norm": 2.1601099967956543, "learning_rate": 1.9904176504776707e-05, "loss": 0.7525, "step": 5366 }, { "epoch": 0.8762091343210481, "grad_norm": 2.074791669845581, "learning_rate": 1.9904132674702734e-05, "loss": 0.9304, "step": 5367 }, { "epoch": 0.8763723929635525, "grad_norm": 1.9066044092178345, "learning_rate": 1.99040888346553e-05, "loss": 0.8537, "step": 5368 }, { "epoch": 0.8765356516060568, "grad_norm": 1.7449294328689575, "learning_rate": 1.990404498463445e-05, "loss": 0.6296, "step": 5369 }, { "epoch": 0.8766989102485613, "grad_norm": 1.786722183227539, "learning_rate": 1.990400112464023e-05, "loss": 0.7452, "step": 5370 }, { "epoch": 0.8768621688910657, "grad_norm": 2.097872018814087, "learning_rate": 1.9903957254672687e-05, "loss": 0.8531, "step": 5371 }, { "epoch": 0.87702542753357, "grad_norm": 2.0409600734710693, "learning_rate": 1.9903913374731858e-05, "loss": 0.8803, "step": 5372 }, { "epoch": 0.8771886861760745, "grad_norm": 1.5766253471374512, "learning_rate": 1.990386948481779e-05, "loss": 0.6391, "step": 5373 }, { "epoch": 0.8773519448185788, "grad_norm": 1.8633641004562378, "learning_rate": 1.9903825584930527e-05, "loss": 0.6918, "step": 5374 }, { "epoch": 0.8775152034610832, "grad_norm": 2.0771825313568115, "learning_rate": 1.9903781675070116e-05, "loss": 0.9161, "step": 5375 }, { "epoch": 0.8776784621035876, "grad_norm": 1.7609386444091797, "learning_rate": 1.9903737755236604e-05, "loss": 0.7634, "step": 5376 }, { "epoch": 0.877841720746092, "grad_norm": 1.960425615310669, "learning_rate": 1.9903693825430027e-05, "loss": 0.8728, "step": 5377 }, { "epoch": 0.8780049793885963, "grad_norm": 1.8830480575561523, "learning_rate": 1.990364988565043e-05, "loss": 0.7346, "step": 5378 }, { "epoch": 0.8781682380311008, "grad_norm": 1.8873202800750732, "learning_rate": 1.990360593589786e-05, "loss": 0.7891, "step": 5379 }, { "epoch": 0.8783314966736051, "grad_norm": 1.6141752004623413, "learning_rate": 1.9903561976172367e-05, "loss": 0.927, "step": 5380 }, { "epoch": 0.8784947553161095, "grad_norm": 1.7607455253601074, "learning_rate": 1.9903518006473987e-05, "loss": 0.8128, "step": 5381 }, { "epoch": 0.878658013958614, "grad_norm": 1.829599380493164, "learning_rate": 1.9903474026802768e-05, "loss": 0.763, "step": 5382 }, { "epoch": 0.8788212726011183, "grad_norm": 2.2322628498077393, "learning_rate": 1.9903430037158755e-05, "loss": 0.7634, "step": 5383 }, { "epoch": 0.8789845312436227, "grad_norm": 1.962327480316162, "learning_rate": 1.9903386037541986e-05, "loss": 0.8711, "step": 5384 }, { "epoch": 0.8791477898861271, "grad_norm": 2.259624719619751, "learning_rate": 1.9903342027952512e-05, "loss": 0.846, "step": 5385 }, { "epoch": 0.8793110485286315, "grad_norm": 1.8829166889190674, "learning_rate": 1.9903298008390374e-05, "loss": 0.8885, "step": 5386 }, { "epoch": 0.8794743071711358, "grad_norm": 1.8962863683700562, "learning_rate": 1.990325397885562e-05, "loss": 0.8238, "step": 5387 }, { "epoch": 0.8796375658136403, "grad_norm": 1.6922260522842407, "learning_rate": 1.990320993934829e-05, "loss": 0.6972, "step": 5388 }, { "epoch": 0.8798008244561446, "grad_norm": 1.8908518552780151, "learning_rate": 1.990316588986843e-05, "loss": 0.7384, "step": 5389 }, { "epoch": 0.879964083098649, "grad_norm": 2.1582534313201904, "learning_rate": 1.9903121830416085e-05, "loss": 0.9081, "step": 5390 }, { "epoch": 0.8801273417411534, "grad_norm": 1.9674888849258423, "learning_rate": 1.99030777609913e-05, "loss": 0.8014, "step": 5391 }, { "epoch": 0.8802906003836578, "grad_norm": 1.6110748052597046, "learning_rate": 1.9903033681594117e-05, "loss": 0.7339, "step": 5392 }, { "epoch": 0.8804538590261622, "grad_norm": 1.792742133140564, "learning_rate": 1.990298959222458e-05, "loss": 0.7502, "step": 5393 }, { "epoch": 0.8806171176686666, "grad_norm": 1.8898837566375732, "learning_rate": 1.9902945492882738e-05, "loss": 0.7956, "step": 5394 }, { "epoch": 0.880780376311171, "grad_norm": 1.7538906335830688, "learning_rate": 1.9902901383568633e-05, "loss": 0.9086, "step": 5395 }, { "epoch": 0.8809436349536753, "grad_norm": 1.9342808723449707, "learning_rate": 1.9902857264282307e-05, "loss": 0.8356, "step": 5396 }, { "epoch": 0.8811068935961798, "grad_norm": 1.7108091115951538, "learning_rate": 1.9902813135023805e-05, "loss": 0.6881, "step": 5397 }, { "epoch": 0.8812701522386841, "grad_norm": 1.8676313161849976, "learning_rate": 1.9902768995793177e-05, "loss": 0.8874, "step": 5398 }, { "epoch": 0.8814334108811885, "grad_norm": 1.487164855003357, "learning_rate": 1.990272484659046e-05, "loss": 0.7379, "step": 5399 }, { "epoch": 0.8815966695236929, "grad_norm": 1.4870518445968628, "learning_rate": 1.9902680687415704e-05, "loss": 0.6003, "step": 5400 }, { "epoch": 0.8817599281661973, "grad_norm": 1.7540218830108643, "learning_rate": 1.990263651826895e-05, "loss": 0.8199, "step": 5401 }, { "epoch": 0.8819231868087016, "grad_norm": 1.9954863786697388, "learning_rate": 1.9902592339150243e-05, "loss": 0.8971, "step": 5402 }, { "epoch": 0.8820864454512061, "grad_norm": 1.8785138130187988, "learning_rate": 1.990254815005963e-05, "loss": 0.843, "step": 5403 }, { "epoch": 0.8822497040937105, "grad_norm": 1.6195671558380127, "learning_rate": 1.9902503950997154e-05, "loss": 0.6432, "step": 5404 }, { "epoch": 0.8824129627362148, "grad_norm": 1.957841396331787, "learning_rate": 1.9902459741962856e-05, "loss": 0.6774, "step": 5405 }, { "epoch": 0.8825762213787193, "grad_norm": 1.8928015232086182, "learning_rate": 1.9902415522956785e-05, "loss": 0.7582, "step": 5406 }, { "epoch": 0.8827394800212236, "grad_norm": 2.031609058380127, "learning_rate": 1.9902371293978985e-05, "loss": 0.8242, "step": 5407 }, { "epoch": 0.882902738663728, "grad_norm": 1.8119521141052246, "learning_rate": 1.99023270550295e-05, "loss": 0.8074, "step": 5408 }, { "epoch": 0.8830659973062324, "grad_norm": 2.09952449798584, "learning_rate": 1.9902282806108372e-05, "loss": 0.8683, "step": 5409 }, { "epoch": 0.8832292559487368, "grad_norm": 1.6313326358795166, "learning_rate": 1.9902238547215652e-05, "loss": 0.7289, "step": 5410 }, { "epoch": 0.8833925145912411, "grad_norm": 2.0314393043518066, "learning_rate": 1.9902194278351375e-05, "loss": 0.7627, "step": 5411 }, { "epoch": 0.8835557732337456, "grad_norm": 1.949706792831421, "learning_rate": 1.9902149999515593e-05, "loss": 0.7523, "step": 5412 }, { "epoch": 0.8837190318762499, "grad_norm": 1.7507826089859009, "learning_rate": 1.9902105710708352e-05, "loss": 0.8117, "step": 5413 }, { "epoch": 0.8838822905187543, "grad_norm": 2.0059726238250732, "learning_rate": 1.990206141192969e-05, "loss": 0.7035, "step": 5414 }, { "epoch": 0.8840455491612588, "grad_norm": 1.7472230195999146, "learning_rate": 1.9902017103179655e-05, "loss": 0.7694, "step": 5415 }, { "epoch": 0.8842088078037631, "grad_norm": 1.9423251152038574, "learning_rate": 1.990197278445829e-05, "loss": 0.7938, "step": 5416 }, { "epoch": 0.8843720664462675, "grad_norm": 2.387195587158203, "learning_rate": 1.9901928455765644e-05, "loss": 0.7778, "step": 5417 }, { "epoch": 0.8845353250887719, "grad_norm": 2.100958824157715, "learning_rate": 1.990188411710176e-05, "loss": 0.8895, "step": 5418 }, { "epoch": 0.8846985837312763, "grad_norm": 1.7963606119155884, "learning_rate": 1.9901839768466677e-05, "loss": 0.7657, "step": 5419 }, { "epoch": 0.8848618423737806, "grad_norm": 1.9964659214019775, "learning_rate": 1.9901795409860445e-05, "loss": 0.8749, "step": 5420 }, { "epoch": 0.8850251010162851, "grad_norm": 1.9309360980987549, "learning_rate": 1.9901751041283108e-05, "loss": 0.8827, "step": 5421 }, { "epoch": 0.8851883596587894, "grad_norm": 1.8912358283996582, "learning_rate": 1.990170666273471e-05, "loss": 0.7277, "step": 5422 }, { "epoch": 0.8853516183012938, "grad_norm": 1.998364806175232, "learning_rate": 1.9901662274215298e-05, "loss": 0.7564, "step": 5423 }, { "epoch": 0.8855148769437982, "grad_norm": 2.4111011028289795, "learning_rate": 1.9901617875724914e-05, "loss": 0.9263, "step": 5424 }, { "epoch": 0.8856781355863026, "grad_norm": 1.7396621704101562, "learning_rate": 1.9901573467263603e-05, "loss": 0.6814, "step": 5425 }, { "epoch": 0.885841394228807, "grad_norm": 1.8112989664077759, "learning_rate": 1.9901529048831407e-05, "loss": 0.819, "step": 5426 }, { "epoch": 0.8860046528713114, "grad_norm": 2.6161205768585205, "learning_rate": 1.990148462042838e-05, "loss": 0.7833, "step": 5427 }, { "epoch": 0.8861679115138158, "grad_norm": 1.402883529663086, "learning_rate": 1.9901440182054555e-05, "loss": 0.5433, "step": 5428 }, { "epoch": 0.8863311701563201, "grad_norm": 1.7268433570861816, "learning_rate": 1.9901395733709987e-05, "loss": 0.7172, "step": 5429 }, { "epoch": 0.8864944287988246, "grad_norm": 1.9710057973861694, "learning_rate": 1.9901351275394712e-05, "loss": 0.7811, "step": 5430 }, { "epoch": 0.8866576874413289, "grad_norm": 1.5883653163909912, "learning_rate": 1.9901306807108783e-05, "loss": 0.614, "step": 5431 }, { "epoch": 0.8868209460838333, "grad_norm": 1.7911334037780762, "learning_rate": 1.990126232885224e-05, "loss": 0.737, "step": 5432 }, { "epoch": 0.8869842047263377, "grad_norm": 1.984209418296814, "learning_rate": 1.990121784062512e-05, "loss": 0.841, "step": 5433 }, { "epoch": 0.8871474633688421, "grad_norm": 1.9430984258651733, "learning_rate": 1.9901173342427487e-05, "loss": 0.7965, "step": 5434 }, { "epoch": 0.8873107220113465, "grad_norm": 1.787397861480713, "learning_rate": 1.990112883425937e-05, "loss": 0.6982, "step": 5435 }, { "epoch": 0.8874739806538509, "grad_norm": 1.8417294025421143, "learning_rate": 1.990108431612082e-05, "loss": 0.8915, "step": 5436 }, { "epoch": 0.8876372392963553, "grad_norm": 1.75802743434906, "learning_rate": 1.9901039788011883e-05, "loss": 0.7694, "step": 5437 }, { "epoch": 0.8878004979388596, "grad_norm": 1.8526890277862549, "learning_rate": 1.99009952499326e-05, "loss": 0.846, "step": 5438 }, { "epoch": 0.8879637565813641, "grad_norm": 1.3793190717697144, "learning_rate": 1.9900950701883014e-05, "loss": 0.6121, "step": 5439 }, { "epoch": 0.8881270152238684, "grad_norm": 1.8159807920455933, "learning_rate": 1.990090614386318e-05, "loss": 0.8367, "step": 5440 }, { "epoch": 0.8882902738663728, "grad_norm": 1.8311798572540283, "learning_rate": 1.9900861575873128e-05, "loss": 0.7265, "step": 5441 }, { "epoch": 0.8884535325088772, "grad_norm": 1.9749536514282227, "learning_rate": 1.990081699791292e-05, "loss": 0.7355, "step": 5442 }, { "epoch": 0.8886167911513816, "grad_norm": 1.6932133436203003, "learning_rate": 1.9900772409982582e-05, "loss": 0.6625, "step": 5443 }, { "epoch": 0.8887800497938859, "grad_norm": 1.8621851205825806, "learning_rate": 1.9900727812082177e-05, "loss": 0.8148, "step": 5444 }, { "epoch": 0.8889433084363904, "grad_norm": 1.9090908765792847, "learning_rate": 1.9900683204211737e-05, "loss": 0.7586, "step": 5445 }, { "epoch": 0.8891065670788948, "grad_norm": 1.8892121315002441, "learning_rate": 1.9900638586371315e-05, "loss": 0.7747, "step": 5446 }, { "epoch": 0.8892698257213991, "grad_norm": 2.0352370738983154, "learning_rate": 1.9900593958560953e-05, "loss": 0.856, "step": 5447 }, { "epoch": 0.8894330843639036, "grad_norm": 1.8563554286956787, "learning_rate": 1.9900549320780692e-05, "loss": 0.7798, "step": 5448 }, { "epoch": 0.8895963430064079, "grad_norm": 1.806465983390808, "learning_rate": 1.9900504673030582e-05, "loss": 0.693, "step": 5449 }, { "epoch": 0.8897596016489123, "grad_norm": 1.9736226797103882, "learning_rate": 1.9900460015310667e-05, "loss": 0.8577, "step": 5450 }, { "epoch": 0.8899228602914167, "grad_norm": 1.6509897708892822, "learning_rate": 1.9900415347620988e-05, "loss": 0.8087, "step": 5451 }, { "epoch": 0.8900861189339211, "grad_norm": 1.6485549211502075, "learning_rate": 1.99003706699616e-05, "loss": 0.6334, "step": 5452 }, { "epoch": 0.8902493775764254, "grad_norm": 2.000493049621582, "learning_rate": 1.9900325982332537e-05, "loss": 0.7635, "step": 5453 }, { "epoch": 0.8904126362189299, "grad_norm": 2.075079917907715, "learning_rate": 1.9900281284733847e-05, "loss": 0.777, "step": 5454 }, { "epoch": 0.8905758948614342, "grad_norm": 1.787853479385376, "learning_rate": 1.990023657716558e-05, "loss": 0.8209, "step": 5455 }, { "epoch": 0.8907391535039386, "grad_norm": 1.917310118675232, "learning_rate": 1.9900191859627773e-05, "loss": 0.7167, "step": 5456 }, { "epoch": 0.8909024121464431, "grad_norm": 1.845294713973999, "learning_rate": 1.9900147132120478e-05, "loss": 0.6278, "step": 5457 }, { "epoch": 0.8910656707889474, "grad_norm": 2.0440807342529297, "learning_rate": 1.9900102394643738e-05, "loss": 0.788, "step": 5458 }, { "epoch": 0.8912289294314518, "grad_norm": 2.147150754928589, "learning_rate": 1.9900057647197595e-05, "loss": 0.9297, "step": 5459 }, { "epoch": 0.8913921880739561, "grad_norm": 1.6638108491897583, "learning_rate": 1.99000128897821e-05, "loss": 0.7667, "step": 5460 }, { "epoch": 0.8915554467164606, "grad_norm": 2.0987725257873535, "learning_rate": 1.9899968122397293e-05, "loss": 0.8054, "step": 5461 }, { "epoch": 0.8917187053589649, "grad_norm": 1.8221789598464966, "learning_rate": 1.989992334504322e-05, "loss": 0.6381, "step": 5462 }, { "epoch": 0.8918819640014694, "grad_norm": 1.7546521425247192, "learning_rate": 1.989987855771993e-05, "loss": 0.6581, "step": 5463 }, { "epoch": 0.8920452226439737, "grad_norm": 1.8613066673278809, "learning_rate": 1.989983376042746e-05, "loss": 0.7308, "step": 5464 }, { "epoch": 0.8922084812864781, "grad_norm": 2.0199716091156006, "learning_rate": 1.9899788953165863e-05, "loss": 0.9019, "step": 5465 }, { "epoch": 0.8923717399289824, "grad_norm": 1.5132046937942505, "learning_rate": 1.989974413593518e-05, "loss": 0.6306, "step": 5466 }, { "epoch": 0.8925349985714869, "grad_norm": 1.5626327991485596, "learning_rate": 1.9899699308735458e-05, "loss": 0.6759, "step": 5467 }, { "epoch": 0.8926982572139913, "grad_norm": 1.9975075721740723, "learning_rate": 1.989965447156674e-05, "loss": 0.815, "step": 5468 }, { "epoch": 0.8928615158564956, "grad_norm": 1.8193795680999756, "learning_rate": 1.9899609624429077e-05, "loss": 0.7413, "step": 5469 }, { "epoch": 0.8930247744990001, "grad_norm": 1.783850908279419, "learning_rate": 1.989956476732251e-05, "loss": 0.7752, "step": 5470 }, { "epoch": 0.8931880331415044, "grad_norm": 1.9898436069488525, "learning_rate": 1.9899519900247077e-05, "loss": 0.8762, "step": 5471 }, { "epoch": 0.8933512917840088, "grad_norm": 1.9051214456558228, "learning_rate": 1.9899475023202838e-05, "loss": 0.7845, "step": 5472 }, { "epoch": 0.8935145504265132, "grad_norm": 1.774433970451355, "learning_rate": 1.9899430136189823e-05, "loss": 0.7743, "step": 5473 }, { "epoch": 0.8936778090690176, "grad_norm": 2.087049961090088, "learning_rate": 1.9899385239208088e-05, "loss": 0.8172, "step": 5474 }, { "epoch": 0.8938410677115219, "grad_norm": 1.5513324737548828, "learning_rate": 1.9899340332257676e-05, "loss": 0.734, "step": 5475 }, { "epoch": 0.8940043263540264, "grad_norm": 1.6456248760223389, "learning_rate": 1.989929541533863e-05, "loss": 0.8016, "step": 5476 }, { "epoch": 0.8941675849965307, "grad_norm": 1.6411423683166504, "learning_rate": 1.9899250488450993e-05, "loss": 0.708, "step": 5477 }, { "epoch": 0.8943308436390351, "grad_norm": 1.74517023563385, "learning_rate": 1.9899205551594816e-05, "loss": 0.7394, "step": 5478 }, { "epoch": 0.8944941022815396, "grad_norm": 1.9233719110488892, "learning_rate": 1.989916060477014e-05, "loss": 0.7487, "step": 5479 }, { "epoch": 0.8946573609240439, "grad_norm": 2.1521646976470947, "learning_rate": 1.9899115647977015e-05, "loss": 0.7456, "step": 5480 }, { "epoch": 0.8948206195665483, "grad_norm": 1.9030790328979492, "learning_rate": 1.989907068121548e-05, "loss": 0.699, "step": 5481 }, { "epoch": 0.8949838782090527, "grad_norm": 1.69891357421875, "learning_rate": 1.9899025704485585e-05, "loss": 0.8169, "step": 5482 }, { "epoch": 0.8951471368515571, "grad_norm": 2.1596570014953613, "learning_rate": 1.9898980717787374e-05, "loss": 0.904, "step": 5483 }, { "epoch": 0.8953103954940614, "grad_norm": 2.141803741455078, "learning_rate": 1.9898935721120892e-05, "loss": 0.6872, "step": 5484 }, { "epoch": 0.8954736541365659, "grad_norm": 1.8917534351348877, "learning_rate": 1.9898890714486182e-05, "loss": 0.8662, "step": 5485 }, { "epoch": 0.8956369127790702, "grad_norm": 1.811232566833496, "learning_rate": 1.9898845697883296e-05, "loss": 0.8044, "step": 5486 }, { "epoch": 0.8958001714215746, "grad_norm": 1.9061232805252075, "learning_rate": 1.9898800671312273e-05, "loss": 0.69, "step": 5487 }, { "epoch": 0.895963430064079, "grad_norm": 1.9330824613571167, "learning_rate": 1.989875563477316e-05, "loss": 0.8772, "step": 5488 }, { "epoch": 0.8961266887065834, "grad_norm": 1.7840790748596191, "learning_rate": 1.9898710588266002e-05, "loss": 0.7764, "step": 5489 }, { "epoch": 0.8962899473490878, "grad_norm": 1.415550708770752, "learning_rate": 1.9898665531790846e-05, "loss": 0.5857, "step": 5490 }, { "epoch": 0.8964532059915922, "grad_norm": 1.6764709949493408, "learning_rate": 1.9898620465347735e-05, "loss": 0.6888, "step": 5491 }, { "epoch": 0.8966164646340966, "grad_norm": 1.9158803224563599, "learning_rate": 1.9898575388936717e-05, "loss": 0.7512, "step": 5492 }, { "epoch": 0.8967797232766009, "grad_norm": 2.0202760696411133, "learning_rate": 1.9898530302557836e-05, "loss": 0.818, "step": 5493 }, { "epoch": 0.8969429819191054, "grad_norm": 1.768582820892334, "learning_rate": 1.9898485206211138e-05, "loss": 0.7236, "step": 5494 }, { "epoch": 0.8971062405616097, "grad_norm": 1.9406176805496216, "learning_rate": 1.989844009989667e-05, "loss": 0.8035, "step": 5495 }, { "epoch": 0.8972694992041141, "grad_norm": 2.0315680503845215, "learning_rate": 1.989839498361447e-05, "loss": 0.7808, "step": 5496 }, { "epoch": 0.8974327578466185, "grad_norm": 1.77961266040802, "learning_rate": 1.9898349857364594e-05, "loss": 0.7149, "step": 5497 }, { "epoch": 0.8975960164891229, "grad_norm": 2.0483458042144775, "learning_rate": 1.989830472114708e-05, "loss": 0.8953, "step": 5498 }, { "epoch": 0.8977592751316272, "grad_norm": 2.363706350326538, "learning_rate": 1.9898259574961977e-05, "loss": 0.854, "step": 5499 }, { "epoch": 0.8979225337741317, "grad_norm": 2.0008885860443115, "learning_rate": 1.989821441880933e-05, "loss": 0.7716, "step": 5500 }, { "epoch": 0.8980857924166361, "grad_norm": 1.823672890663147, "learning_rate": 1.989816925268918e-05, "loss": 0.6495, "step": 5501 }, { "epoch": 0.8982490510591404, "grad_norm": 1.8799450397491455, "learning_rate": 1.9898124076601578e-05, "loss": 0.8637, "step": 5502 }, { "epoch": 0.8984123097016449, "grad_norm": 1.800307035446167, "learning_rate": 1.989807889054657e-05, "loss": 0.7077, "step": 5503 }, { "epoch": 0.8985755683441492, "grad_norm": 1.9598329067230225, "learning_rate": 1.9898033694524196e-05, "loss": 0.7876, "step": 5504 }, { "epoch": 0.8987388269866536, "grad_norm": 1.796534776687622, "learning_rate": 1.9897988488534508e-05, "loss": 0.6227, "step": 5505 }, { "epoch": 0.898902085629158, "grad_norm": 1.6570171117782593, "learning_rate": 1.9897943272577546e-05, "loss": 0.7332, "step": 5506 }, { "epoch": 0.8990653442716624, "grad_norm": 1.778590440750122, "learning_rate": 1.9897898046653358e-05, "loss": 0.7386, "step": 5507 }, { "epoch": 0.8992286029141667, "grad_norm": 1.4596481323242188, "learning_rate": 1.9897852810761987e-05, "loss": 0.6164, "step": 5508 }, { "epoch": 0.8993918615566712, "grad_norm": 1.9816324710845947, "learning_rate": 1.9897807564903485e-05, "loss": 0.8267, "step": 5509 }, { "epoch": 0.8995551201991755, "grad_norm": 1.7053388357162476, "learning_rate": 1.989776230907789e-05, "loss": 0.7752, "step": 5510 }, { "epoch": 0.8997183788416799, "grad_norm": 2.085296392440796, "learning_rate": 1.9897717043285255e-05, "loss": 0.8386, "step": 5511 }, { "epoch": 0.8998816374841844, "grad_norm": 1.8707584142684937, "learning_rate": 1.989767176752562e-05, "loss": 0.8964, "step": 5512 }, { "epoch": 0.9000448961266887, "grad_norm": 1.8359700441360474, "learning_rate": 1.989762648179903e-05, "loss": 0.7131, "step": 5513 }, { "epoch": 0.9002081547691931, "grad_norm": 1.7875401973724365, "learning_rate": 1.989758118610553e-05, "loss": 0.7612, "step": 5514 }, { "epoch": 0.9003714134116975, "grad_norm": 1.903703212738037, "learning_rate": 1.9897535880445174e-05, "loss": 0.8097, "step": 5515 }, { "epoch": 0.9005346720542019, "grad_norm": 2.0160763263702393, "learning_rate": 1.9897490564818e-05, "loss": 0.7252, "step": 5516 }, { "epoch": 0.9006979306967062, "grad_norm": 1.8471753597259521, "learning_rate": 1.989744523922406e-05, "loss": 0.7367, "step": 5517 }, { "epoch": 0.9008611893392107, "grad_norm": 1.9934808015823364, "learning_rate": 1.989739990366339e-05, "loss": 0.7985, "step": 5518 }, { "epoch": 0.901024447981715, "grad_norm": 2.1980695724487305, "learning_rate": 1.989735455813604e-05, "loss": 0.9086, "step": 5519 }, { "epoch": 0.9011877066242194, "grad_norm": 1.5855870246887207, "learning_rate": 1.989730920264206e-05, "loss": 0.7498, "step": 5520 }, { "epoch": 0.9013509652667238, "grad_norm": 1.9430112838745117, "learning_rate": 1.9897263837181492e-05, "loss": 0.6587, "step": 5521 }, { "epoch": 0.9015142239092282, "grad_norm": 1.9637012481689453, "learning_rate": 1.989721846175438e-05, "loss": 0.7715, "step": 5522 }, { "epoch": 0.9016774825517326, "grad_norm": 1.6537851095199585, "learning_rate": 1.989717307636077e-05, "loss": 0.7607, "step": 5523 }, { "epoch": 0.901840741194237, "grad_norm": 2.33439302444458, "learning_rate": 1.9897127681000714e-05, "loss": 0.9548, "step": 5524 }, { "epoch": 0.9020039998367414, "grad_norm": 1.4771759510040283, "learning_rate": 1.9897082275674252e-05, "loss": 0.6319, "step": 5525 }, { "epoch": 0.9021672584792457, "grad_norm": 1.8442362546920776, "learning_rate": 1.989703686038143e-05, "loss": 0.7391, "step": 5526 }, { "epoch": 0.9023305171217502, "grad_norm": 2.017043113708496, "learning_rate": 1.9896991435122294e-05, "loss": 0.7818, "step": 5527 }, { "epoch": 0.9024937757642545, "grad_norm": 2.36134672164917, "learning_rate": 1.9896945999896887e-05, "loss": 1.4161, "step": 5528 }, { "epoch": 0.9026570344067589, "grad_norm": 1.7070966958999634, "learning_rate": 1.9896900554705264e-05, "loss": 0.7795, "step": 5529 }, { "epoch": 0.9028202930492633, "grad_norm": 2.256730794906616, "learning_rate": 1.9896855099547462e-05, "loss": 0.895, "step": 5530 }, { "epoch": 0.9029835516917677, "grad_norm": 2.093231201171875, "learning_rate": 1.989680963442353e-05, "loss": 0.7501, "step": 5531 }, { "epoch": 0.903146810334272, "grad_norm": 2.1362226009368896, "learning_rate": 1.989676415933351e-05, "loss": 0.8682, "step": 5532 }, { "epoch": 0.9033100689767765, "grad_norm": 1.9808677434921265, "learning_rate": 1.989671867427746e-05, "loss": 0.8651, "step": 5533 }, { "epoch": 0.9034733276192809, "grad_norm": 1.87907874584198, "learning_rate": 1.989667317925541e-05, "loss": 0.8305, "step": 5534 }, { "epoch": 0.9036365862617852, "grad_norm": 1.8500330448150635, "learning_rate": 1.9896627674267414e-05, "loss": 0.8166, "step": 5535 }, { "epoch": 0.9037998449042897, "grad_norm": 2.022334098815918, "learning_rate": 1.9896582159313517e-05, "loss": 0.9245, "step": 5536 }, { "epoch": 0.903963103546794, "grad_norm": 2.089448928833008, "learning_rate": 1.9896536634393762e-05, "loss": 0.7491, "step": 5537 }, { "epoch": 0.9041263621892984, "grad_norm": 1.7539336681365967, "learning_rate": 1.98964910995082e-05, "loss": 0.8298, "step": 5538 }, { "epoch": 0.9042896208318028, "grad_norm": 1.7201881408691406, "learning_rate": 1.989644555465687e-05, "loss": 0.9033, "step": 5539 }, { "epoch": 0.9044528794743072, "grad_norm": 1.544772744178772, "learning_rate": 1.9896399999839828e-05, "loss": 0.6645, "step": 5540 }, { "epoch": 0.9046161381168115, "grad_norm": 1.993411660194397, "learning_rate": 1.989635443505711e-05, "loss": 0.6866, "step": 5541 }, { "epoch": 0.904779396759316, "grad_norm": 1.773689866065979, "learning_rate": 1.989630886030877e-05, "loss": 0.6561, "step": 5542 }, { "epoch": 0.9049426554018203, "grad_norm": 1.8346515893936157, "learning_rate": 1.989626327559484e-05, "loss": 0.8791, "step": 5543 }, { "epoch": 0.9051059140443247, "grad_norm": 1.9989535808563232, "learning_rate": 1.9896217680915388e-05, "loss": 0.7248, "step": 5544 }, { "epoch": 0.9052691726868292, "grad_norm": 1.7388193607330322, "learning_rate": 1.9896172076270436e-05, "loss": 0.6694, "step": 5545 }, { "epoch": 0.9054324313293335, "grad_norm": 1.682182788848877, "learning_rate": 1.989612646166005e-05, "loss": 0.8053, "step": 5546 }, { "epoch": 0.9055956899718379, "grad_norm": 1.5830105543136597, "learning_rate": 1.989608083708426e-05, "loss": 0.7209, "step": 5547 }, { "epoch": 0.9057589486143423, "grad_norm": 1.7821048498153687, "learning_rate": 1.9896035202543124e-05, "loss": 0.745, "step": 5548 }, { "epoch": 0.9059222072568467, "grad_norm": 1.872824788093567, "learning_rate": 1.9895989558036684e-05, "loss": 0.7568, "step": 5549 }, { "epoch": 0.906085465899351, "grad_norm": 1.8558952808380127, "learning_rate": 1.989594390356498e-05, "loss": 0.6495, "step": 5550 }, { "epoch": 0.9062487245418555, "grad_norm": 1.6698013544082642, "learning_rate": 1.9895898239128072e-05, "loss": 0.6811, "step": 5551 }, { "epoch": 0.9064119831843598, "grad_norm": 1.519544243812561, "learning_rate": 1.989585256472599e-05, "loss": 0.6459, "step": 5552 }, { "epoch": 0.9065752418268642, "grad_norm": 1.902032494544983, "learning_rate": 1.9895806880358788e-05, "loss": 0.9176, "step": 5553 }, { "epoch": 0.9067385004693685, "grad_norm": 1.7615669965744019, "learning_rate": 1.989576118602651e-05, "loss": 0.9291, "step": 5554 }, { "epoch": 0.906901759111873, "grad_norm": 1.9209628105163574, "learning_rate": 1.9895715481729207e-05, "loss": 0.7969, "step": 5555 }, { "epoch": 0.9070650177543774, "grad_norm": 1.75411057472229, "learning_rate": 1.989566976746692e-05, "loss": 0.6233, "step": 5556 }, { "epoch": 0.9072282763968817, "grad_norm": 1.8862360715866089, "learning_rate": 1.9895624043239696e-05, "loss": 0.7266, "step": 5557 }, { "epoch": 0.9073915350393862, "grad_norm": 1.9569650888442993, "learning_rate": 1.9895578309047577e-05, "loss": 0.8652, "step": 5558 }, { "epoch": 0.9075547936818905, "grad_norm": 2.200761079788208, "learning_rate": 1.9895532564890616e-05, "loss": 1.0836, "step": 5559 }, { "epoch": 0.907718052324395, "grad_norm": 2.2112808227539062, "learning_rate": 1.989548681076886e-05, "loss": 0.8381, "step": 5560 }, { "epoch": 0.9078813109668993, "grad_norm": 1.8368815183639526, "learning_rate": 1.9895441046682345e-05, "loss": 0.7016, "step": 5561 }, { "epoch": 0.9080445696094037, "grad_norm": 2.1557767391204834, "learning_rate": 1.989539527263113e-05, "loss": 0.7825, "step": 5562 }, { "epoch": 0.908207828251908, "grad_norm": 1.9988001585006714, "learning_rate": 1.9895349488615248e-05, "loss": 1.0115, "step": 5563 }, { "epoch": 0.9083710868944125, "grad_norm": 1.884873390197754, "learning_rate": 1.9895303694634756e-05, "loss": 0.7757, "step": 5564 }, { "epoch": 0.9085343455369168, "grad_norm": 1.9829567670822144, "learning_rate": 1.9895257890689698e-05, "loss": 0.8484, "step": 5565 }, { "epoch": 0.9086976041794212, "grad_norm": 1.9736597537994385, "learning_rate": 1.9895212076780113e-05, "loss": 0.8061, "step": 5566 }, { "epoch": 0.9088608628219257, "grad_norm": 1.860804557800293, "learning_rate": 1.9895166252906053e-05, "loss": 0.9104, "step": 5567 }, { "epoch": 0.90902412146443, "grad_norm": 2.1515371799468994, "learning_rate": 1.9895120419067565e-05, "loss": 0.8432, "step": 5568 }, { "epoch": 0.9091873801069345, "grad_norm": 2.1283607482910156, "learning_rate": 1.9895074575264694e-05, "loss": 0.6855, "step": 5569 }, { "epoch": 0.9093506387494388, "grad_norm": 1.998544454574585, "learning_rate": 1.9895028721497482e-05, "loss": 0.7761, "step": 5570 }, { "epoch": 0.9095138973919432, "grad_norm": 1.8876937627792358, "learning_rate": 1.9894982857765982e-05, "loss": 0.8624, "step": 5571 }, { "epoch": 0.9096771560344475, "grad_norm": 1.6461340188980103, "learning_rate": 1.9894936984070234e-05, "loss": 0.7732, "step": 5572 }, { "epoch": 0.909840414676952, "grad_norm": 2.3960626125335693, "learning_rate": 1.989489110041029e-05, "loss": 0.7479, "step": 5573 }, { "epoch": 0.9100036733194563, "grad_norm": 1.6843887567520142, "learning_rate": 1.9894845206786192e-05, "loss": 0.6725, "step": 5574 }, { "epoch": 0.9101669319619607, "grad_norm": 2.2510833740234375, "learning_rate": 1.9894799303197987e-05, "loss": 0.7124, "step": 5575 }, { "epoch": 0.9103301906044651, "grad_norm": 1.7971994876861572, "learning_rate": 1.9894753389645723e-05, "loss": 0.7557, "step": 5576 }, { "epoch": 0.9104934492469695, "grad_norm": 1.6881722211837769, "learning_rate": 1.9894707466129444e-05, "loss": 0.6575, "step": 5577 }, { "epoch": 0.910656707889474, "grad_norm": 1.8837109804153442, "learning_rate": 1.98946615326492e-05, "loss": 0.6754, "step": 5578 }, { "epoch": 0.9108199665319783, "grad_norm": 2.2239246368408203, "learning_rate": 1.9894615589205027e-05, "loss": 0.882, "step": 5579 }, { "epoch": 0.9109832251744827, "grad_norm": 1.5459859371185303, "learning_rate": 1.9894569635796987e-05, "loss": 0.6854, "step": 5580 }, { "epoch": 0.911146483816987, "grad_norm": 1.7673524618148804, "learning_rate": 1.989452367242511e-05, "loss": 0.715, "step": 5581 }, { "epoch": 0.9113097424594915, "grad_norm": 2.1071817874908447, "learning_rate": 1.989447769908946e-05, "loss": 0.801, "step": 5582 }, { "epoch": 0.9114730011019958, "grad_norm": 1.8170974254608154, "learning_rate": 1.989443171579007e-05, "loss": 0.6932, "step": 5583 }, { "epoch": 0.9116362597445002, "grad_norm": 1.7549247741699219, "learning_rate": 1.9894385722526984e-05, "loss": 0.7693, "step": 5584 }, { "epoch": 0.9117995183870046, "grad_norm": 1.7795149087905884, "learning_rate": 1.989433971930026e-05, "loss": 0.834, "step": 5585 }, { "epoch": 0.911962777029509, "grad_norm": 1.735330581665039, "learning_rate": 1.9894293706109936e-05, "loss": 0.657, "step": 5586 }, { "epoch": 0.9121260356720133, "grad_norm": 2.176182508468628, "learning_rate": 1.9894247682956064e-05, "loss": 0.8754, "step": 5587 }, { "epoch": 0.9122892943145178, "grad_norm": 1.7211261987686157, "learning_rate": 1.9894201649838686e-05, "loss": 0.7445, "step": 5588 }, { "epoch": 0.9124525529570222, "grad_norm": 1.876654028892517, "learning_rate": 1.9894155606757846e-05, "loss": 0.7932, "step": 5589 }, { "epoch": 0.9126158115995265, "grad_norm": 2.7031610012054443, "learning_rate": 1.9894109553713597e-05, "loss": 0.7621, "step": 5590 }, { "epoch": 0.912779070242031, "grad_norm": 1.8352471590042114, "learning_rate": 1.9894063490705982e-05, "loss": 0.7445, "step": 5591 }, { "epoch": 0.9129423288845353, "grad_norm": 1.967947006225586, "learning_rate": 1.9894017417735046e-05, "loss": 0.6841, "step": 5592 }, { "epoch": 0.9131055875270397, "grad_norm": 1.8985121250152588, "learning_rate": 1.989397133480084e-05, "loss": 0.8033, "step": 5593 }, { "epoch": 0.9132688461695441, "grad_norm": 1.9212956428527832, "learning_rate": 1.9893925241903405e-05, "loss": 0.8779, "step": 5594 }, { "epoch": 0.9134321048120485, "grad_norm": 2.0976109504699707, "learning_rate": 1.9893879139042795e-05, "loss": 0.8377, "step": 5595 }, { "epoch": 0.9135953634545528, "grad_norm": 1.8942443132400513, "learning_rate": 1.9893833026219044e-05, "loss": 0.7014, "step": 5596 }, { "epoch": 0.9137586220970573, "grad_norm": 2.207634687423706, "learning_rate": 1.989378690343221e-05, "loss": 0.7141, "step": 5597 }, { "epoch": 0.9139218807395616, "grad_norm": 1.9334526062011719, "learning_rate": 1.9893740770682334e-05, "loss": 0.9303, "step": 5598 }, { "epoch": 0.914085139382066, "grad_norm": 1.7855514287948608, "learning_rate": 1.9893694627969464e-05, "loss": 0.6626, "step": 5599 }, { "epoch": 0.9142483980245705, "grad_norm": 1.9264668226242065, "learning_rate": 1.9893648475293646e-05, "loss": 0.7269, "step": 5600 }, { "epoch": 0.9144116566670748, "grad_norm": 1.5973302125930786, "learning_rate": 1.989360231265493e-05, "loss": 0.6111, "step": 5601 }, { "epoch": 0.9145749153095792, "grad_norm": 2.0852298736572266, "learning_rate": 1.9893556140053352e-05, "loss": 0.949, "step": 5602 }, { "epoch": 0.9147381739520836, "grad_norm": 2.078183889389038, "learning_rate": 1.989350995748897e-05, "loss": 0.8123, "step": 5603 }, { "epoch": 0.914901432594588, "grad_norm": 1.89762544631958, "learning_rate": 1.989346376496183e-05, "loss": 0.8302, "step": 5604 }, { "epoch": 0.9150646912370923, "grad_norm": 1.9650306701660156, "learning_rate": 1.9893417562471966e-05, "loss": 0.8212, "step": 5605 }, { "epoch": 0.9152279498795968, "grad_norm": 2.1041293144226074, "learning_rate": 1.989337135001944e-05, "loss": 1.0749, "step": 5606 }, { "epoch": 0.9153912085221011, "grad_norm": 1.7313833236694336, "learning_rate": 1.9893325127604287e-05, "loss": 0.6885, "step": 5607 }, { "epoch": 0.9155544671646055, "grad_norm": 1.7360668182373047, "learning_rate": 1.989327889522656e-05, "loss": 0.6979, "step": 5608 }, { "epoch": 0.9157177258071099, "grad_norm": 2.13582706451416, "learning_rate": 1.9893232652886306e-05, "loss": 0.8396, "step": 5609 }, { "epoch": 0.9158809844496143, "grad_norm": 1.4471087455749512, "learning_rate": 1.9893186400583568e-05, "loss": 0.5425, "step": 5610 }, { "epoch": 0.9160442430921187, "grad_norm": 1.9984731674194336, "learning_rate": 1.9893140138318394e-05, "loss": 0.793, "step": 5611 }, { "epoch": 0.9162075017346231, "grad_norm": 1.6998302936553955, "learning_rate": 1.9893093866090828e-05, "loss": 0.75, "step": 5612 }, { "epoch": 0.9163707603771275, "grad_norm": 1.7694296836853027, "learning_rate": 1.9893047583900918e-05, "loss": 0.7792, "step": 5613 }, { "epoch": 0.9165340190196318, "grad_norm": 1.841478943824768, "learning_rate": 1.9893001291748715e-05, "loss": 0.6653, "step": 5614 }, { "epoch": 0.9166972776621363, "grad_norm": 1.9038026332855225, "learning_rate": 1.9892954989634263e-05, "loss": 0.7349, "step": 5615 }, { "epoch": 0.9168605363046406, "grad_norm": 1.9097278118133545, "learning_rate": 1.989290867755761e-05, "loss": 0.7502, "step": 5616 }, { "epoch": 0.917023794947145, "grad_norm": 1.634843349456787, "learning_rate": 1.9892862355518793e-05, "loss": 0.695, "step": 5617 }, { "epoch": 0.9171870535896494, "grad_norm": 1.6406042575836182, "learning_rate": 1.9892816023517874e-05, "loss": 0.7695, "step": 5618 }, { "epoch": 0.9173503122321538, "grad_norm": 2.134953737258911, "learning_rate": 1.9892769681554885e-05, "loss": 0.7551, "step": 5619 }, { "epoch": 0.9175135708746581, "grad_norm": 1.660075306892395, "learning_rate": 1.9892723329629885e-05, "loss": 0.6419, "step": 5620 }, { "epoch": 0.9176768295171626, "grad_norm": 2.1675283908843994, "learning_rate": 1.9892676967742912e-05, "loss": 0.9836, "step": 5621 }, { "epoch": 0.917840088159667, "grad_norm": 1.8870595693588257, "learning_rate": 1.989263059589402e-05, "loss": 0.8409, "step": 5622 }, { "epoch": 0.9180033468021713, "grad_norm": 1.700561285018921, "learning_rate": 1.9892584214083247e-05, "loss": 0.7769, "step": 5623 }, { "epoch": 0.9181666054446758, "grad_norm": 2.0961501598358154, "learning_rate": 1.989253782231065e-05, "loss": 0.953, "step": 5624 }, { "epoch": 0.9183298640871801, "grad_norm": 1.9159821271896362, "learning_rate": 1.9892491420576265e-05, "loss": 0.8213, "step": 5625 }, { "epoch": 0.9184931227296845, "grad_norm": 1.900506615638733, "learning_rate": 1.989244500888014e-05, "loss": 0.8112, "step": 5626 }, { "epoch": 0.9186563813721889, "grad_norm": 2.0761029720306396, "learning_rate": 1.9892398587222336e-05, "loss": 0.8352, "step": 5627 }, { "epoch": 0.9188196400146933, "grad_norm": 1.85646390914917, "learning_rate": 1.989235215560288e-05, "loss": 0.6232, "step": 5628 }, { "epoch": 0.9189828986571976, "grad_norm": 1.524402141571045, "learning_rate": 1.9892305714021832e-05, "loss": 0.6017, "step": 5629 }, { "epoch": 0.9191461572997021, "grad_norm": 1.9812732934951782, "learning_rate": 1.9892259262479238e-05, "loss": 0.6919, "step": 5630 }, { "epoch": 0.9193094159422064, "grad_norm": 1.702762484550476, "learning_rate": 1.9892212800975136e-05, "loss": 0.8046, "step": 5631 }, { "epoch": 0.9194726745847108, "grad_norm": 1.7462058067321777, "learning_rate": 1.989216632950958e-05, "loss": 0.7064, "step": 5632 }, { "epoch": 0.9196359332272153, "grad_norm": 1.877686619758606, "learning_rate": 1.9892119848082615e-05, "loss": 0.875, "step": 5633 }, { "epoch": 0.9197991918697196, "grad_norm": 1.9059504270553589, "learning_rate": 1.9892073356694287e-05, "loss": 0.7883, "step": 5634 }, { "epoch": 0.919962450512224, "grad_norm": 1.660733699798584, "learning_rate": 1.9892026855344648e-05, "loss": 0.7094, "step": 5635 }, { "epoch": 0.9201257091547284, "grad_norm": 1.8124175071716309, "learning_rate": 1.989198034403374e-05, "loss": 0.7422, "step": 5636 }, { "epoch": 0.9202889677972328, "grad_norm": 1.7042763233184814, "learning_rate": 1.9891933822761603e-05, "loss": 0.724, "step": 5637 }, { "epoch": 0.9204522264397371, "grad_norm": 1.8302861452102661, "learning_rate": 1.98918872915283e-05, "loss": 0.6867, "step": 5638 }, { "epoch": 0.9206154850822416, "grad_norm": 2.1161882877349854, "learning_rate": 1.9891840750333864e-05, "loss": 0.8932, "step": 5639 }, { "epoch": 0.9207787437247459, "grad_norm": 1.601354718208313, "learning_rate": 1.989179419917835e-05, "loss": 0.5357, "step": 5640 }, { "epoch": 0.9209420023672503, "grad_norm": 1.6711939573287964, "learning_rate": 1.98917476380618e-05, "loss": 0.6833, "step": 5641 }, { "epoch": 0.9211052610097547, "grad_norm": 1.7503052949905396, "learning_rate": 1.9891701066984264e-05, "loss": 0.7157, "step": 5642 }, { "epoch": 0.9212685196522591, "grad_norm": 1.9473214149475098, "learning_rate": 1.9891654485945786e-05, "loss": 0.9499, "step": 5643 }, { "epoch": 0.9214317782947635, "grad_norm": 1.9269390106201172, "learning_rate": 1.9891607894946413e-05, "loss": 0.7534, "step": 5644 }, { "epoch": 0.9215950369372679, "grad_norm": 1.6406761407852173, "learning_rate": 1.9891561293986197e-05, "loss": 0.7171, "step": 5645 }, { "epoch": 0.9217582955797723, "grad_norm": 2.1362133026123047, "learning_rate": 1.9891514683065183e-05, "loss": 0.9696, "step": 5646 }, { "epoch": 0.9219215542222766, "grad_norm": 1.5797624588012695, "learning_rate": 1.9891468062183413e-05, "loss": 0.6037, "step": 5647 }, { "epoch": 0.9220848128647811, "grad_norm": 1.6846390962600708, "learning_rate": 1.989142143134094e-05, "loss": 0.7527, "step": 5648 }, { "epoch": 0.9222480715072854, "grad_norm": 2.2771694660186768, "learning_rate": 1.9891374790537804e-05, "loss": 1.0315, "step": 5649 }, { "epoch": 0.9224113301497898, "grad_norm": 1.8506041765213013, "learning_rate": 1.9891328139774057e-05, "loss": 0.7388, "step": 5650 }, { "epoch": 0.9225745887922941, "grad_norm": 1.8565744161605835, "learning_rate": 1.9891281479049748e-05, "loss": 0.6494, "step": 5651 }, { "epoch": 0.9227378474347986, "grad_norm": 1.9432196617126465, "learning_rate": 1.9891234808364917e-05, "loss": 0.8302, "step": 5652 }, { "epoch": 0.9229011060773029, "grad_norm": 1.4637361764907837, "learning_rate": 1.989118812771962e-05, "loss": 0.5649, "step": 5653 }, { "epoch": 0.9230643647198074, "grad_norm": 1.5249428749084473, "learning_rate": 1.9891141437113896e-05, "loss": 0.6706, "step": 5654 }, { "epoch": 0.9232276233623118, "grad_norm": 1.7894009351730347, "learning_rate": 1.9891094736547796e-05, "loss": 0.7278, "step": 5655 }, { "epoch": 0.9233908820048161, "grad_norm": 1.8121771812438965, "learning_rate": 1.9891048026021368e-05, "loss": 0.8781, "step": 5656 }, { "epoch": 0.9235541406473206, "grad_norm": 2.038479804992676, "learning_rate": 1.9891001305534656e-05, "loss": 0.8222, "step": 5657 }, { "epoch": 0.9237173992898249, "grad_norm": 1.6146109104156494, "learning_rate": 1.9890954575087708e-05, "loss": 0.6848, "step": 5658 }, { "epoch": 0.9238806579323293, "grad_norm": 1.6325279474258423, "learning_rate": 1.989090783468057e-05, "loss": 0.7504, "step": 5659 }, { "epoch": 0.9240439165748336, "grad_norm": 1.873058795928955, "learning_rate": 1.9890861084313293e-05, "loss": 0.7952, "step": 5660 }, { "epoch": 0.9242071752173381, "grad_norm": 2.0763649940490723, "learning_rate": 1.989081432398592e-05, "loss": 0.9199, "step": 5661 }, { "epoch": 0.9243704338598424, "grad_norm": 1.7875324487686157, "learning_rate": 1.98907675536985e-05, "loss": 0.7985, "step": 5662 }, { "epoch": 0.9245336925023468, "grad_norm": 1.8535127639770508, "learning_rate": 1.989072077345108e-05, "loss": 0.8475, "step": 5663 }, { "epoch": 0.9246969511448512, "grad_norm": 1.922010898590088, "learning_rate": 1.9890673983243708e-05, "loss": 0.7158, "step": 5664 }, { "epoch": 0.9248602097873556, "grad_norm": 1.8820998668670654, "learning_rate": 1.9890627183076427e-05, "loss": 0.784, "step": 5665 }, { "epoch": 0.92502346842986, "grad_norm": 1.6809495687484741, "learning_rate": 1.989058037294929e-05, "loss": 0.6742, "step": 5666 }, { "epoch": 0.9251867270723644, "grad_norm": 2.1938772201538086, "learning_rate": 1.9890533552862337e-05, "loss": 1.6547, "step": 5667 }, { "epoch": 0.9253499857148688, "grad_norm": 1.977703332901001, "learning_rate": 1.9890486722815624e-05, "loss": 0.9057, "step": 5668 }, { "epoch": 0.9255132443573731, "grad_norm": 1.9104596376419067, "learning_rate": 1.989043988280919e-05, "loss": 0.832, "step": 5669 }, { "epoch": 0.9256765029998776, "grad_norm": 1.937463641166687, "learning_rate": 1.989039303284309e-05, "loss": 0.7084, "step": 5670 }, { "epoch": 0.9258397616423819, "grad_norm": 1.9445589780807495, "learning_rate": 1.9890346172917362e-05, "loss": 0.812, "step": 5671 }, { "epoch": 0.9260030202848863, "grad_norm": 1.9386945962905884, "learning_rate": 1.989029930303206e-05, "loss": 0.8995, "step": 5672 }, { "epoch": 0.9261662789273907, "grad_norm": 2.154684066772461, "learning_rate": 1.989025242318723e-05, "loss": 0.9429, "step": 5673 }, { "epoch": 0.9263295375698951, "grad_norm": 1.7646502256393433, "learning_rate": 1.9890205533382917e-05, "loss": 0.8514, "step": 5674 }, { "epoch": 0.9264927962123994, "grad_norm": 1.8757656812667847, "learning_rate": 1.989015863361917e-05, "loss": 0.8768, "step": 5675 }, { "epoch": 0.9266560548549039, "grad_norm": 1.8330364227294922, "learning_rate": 1.989011172389604e-05, "loss": 0.7772, "step": 5676 }, { "epoch": 0.9268193134974083, "grad_norm": 1.7209479808807373, "learning_rate": 1.989006480421356e-05, "loss": 0.7909, "step": 5677 }, { "epoch": 0.9269825721399126, "grad_norm": 1.7864714860916138, "learning_rate": 1.9890017874571795e-05, "loss": 0.7267, "step": 5678 }, { "epoch": 0.9271458307824171, "grad_norm": 2.228712797164917, "learning_rate": 1.9889970934970785e-05, "loss": 0.7427, "step": 5679 }, { "epoch": 0.9273090894249214, "grad_norm": 1.5044628381729126, "learning_rate": 1.9889923985410576e-05, "loss": 0.6222, "step": 5680 }, { "epoch": 0.9274723480674258, "grad_norm": 1.8296196460723877, "learning_rate": 1.9889877025891217e-05, "loss": 0.7642, "step": 5681 }, { "epoch": 0.9276356067099302, "grad_norm": 1.8356233835220337, "learning_rate": 1.988983005641275e-05, "loss": 0.8395, "step": 5682 }, { "epoch": 0.9277988653524346, "grad_norm": 1.589242696762085, "learning_rate": 1.988978307697523e-05, "loss": 0.7172, "step": 5683 }, { "epoch": 0.9279621239949389, "grad_norm": 1.811841607093811, "learning_rate": 1.9889736087578703e-05, "loss": 0.7679, "step": 5684 }, { "epoch": 0.9281253826374434, "grad_norm": 1.8375786542892456, "learning_rate": 1.9889689088223208e-05, "loss": 0.8319, "step": 5685 }, { "epoch": 0.9282886412799478, "grad_norm": 1.5653473138809204, "learning_rate": 1.9889642078908805e-05, "loss": 0.6628, "step": 5686 }, { "epoch": 0.9284518999224521, "grad_norm": 1.9220969676971436, "learning_rate": 1.988959505963553e-05, "loss": 0.8849, "step": 5687 }, { "epoch": 0.9286151585649566, "grad_norm": 1.7848671674728394, "learning_rate": 1.988954803040344e-05, "loss": 0.8502, "step": 5688 }, { "epoch": 0.9287784172074609, "grad_norm": 1.5070096254348755, "learning_rate": 1.9889500991212575e-05, "loss": 0.6433, "step": 5689 }, { "epoch": 0.9289416758499653, "grad_norm": 1.5660334825515747, "learning_rate": 1.9889453942062988e-05, "loss": 0.6326, "step": 5690 }, { "epoch": 0.9291049344924697, "grad_norm": 1.6906564235687256, "learning_rate": 1.988940688295472e-05, "loss": 0.7252, "step": 5691 }, { "epoch": 0.9292681931349741, "grad_norm": 1.730214238166809, "learning_rate": 1.9889359813887824e-05, "loss": 0.8148, "step": 5692 }, { "epoch": 0.9294314517774784, "grad_norm": 1.95001220703125, "learning_rate": 1.9889312734862345e-05, "loss": 0.7926, "step": 5693 }, { "epoch": 0.9295947104199829, "grad_norm": 1.7198446989059448, "learning_rate": 1.988926564587833e-05, "loss": 0.7987, "step": 5694 }, { "epoch": 0.9297579690624872, "grad_norm": 1.6887645721435547, "learning_rate": 1.9889218546935827e-05, "loss": 0.7883, "step": 5695 }, { "epoch": 0.9299212277049916, "grad_norm": 1.9154633283615112, "learning_rate": 1.9889171438034886e-05, "loss": 0.819, "step": 5696 }, { "epoch": 0.9300844863474961, "grad_norm": 1.5947593450546265, "learning_rate": 1.9889124319175548e-05, "loss": 0.6547, "step": 5697 }, { "epoch": 0.9302477449900004, "grad_norm": 1.7725056409835815, "learning_rate": 1.9889077190357868e-05, "loss": 0.7414, "step": 5698 }, { "epoch": 0.9304110036325048, "grad_norm": 2.019804000854492, "learning_rate": 1.9889030051581888e-05, "loss": 0.5944, "step": 5699 }, { "epoch": 0.9305742622750092, "grad_norm": 1.9094572067260742, "learning_rate": 1.9888982902847658e-05, "loss": 0.8468, "step": 5700 }, { "epoch": 0.9307375209175136, "grad_norm": 1.995947003364563, "learning_rate": 1.9888935744155223e-05, "loss": 0.8157, "step": 5701 }, { "epoch": 0.9309007795600179, "grad_norm": 1.7261426448822021, "learning_rate": 1.9888888575504636e-05, "loss": 0.7469, "step": 5702 }, { "epoch": 0.9310640382025224, "grad_norm": 2.261383056640625, "learning_rate": 1.988884139689594e-05, "loss": 0.7302, "step": 5703 }, { "epoch": 0.9312272968450267, "grad_norm": 1.7354803085327148, "learning_rate": 1.9888794208329182e-05, "loss": 0.6641, "step": 5704 }, { "epoch": 0.9313905554875311, "grad_norm": 1.998819351196289, "learning_rate": 1.988874700980441e-05, "loss": 0.8802, "step": 5705 }, { "epoch": 0.9315538141300355, "grad_norm": 1.815419316291809, "learning_rate": 1.9888699801321675e-05, "loss": 0.6646, "step": 5706 }, { "epoch": 0.9317170727725399, "grad_norm": 2.088097095489502, "learning_rate": 1.9888652582881017e-05, "loss": 0.8867, "step": 5707 }, { "epoch": 0.9318803314150443, "grad_norm": 1.8003586530685425, "learning_rate": 1.9888605354482494e-05, "loss": 0.7524, "step": 5708 }, { "epoch": 0.9320435900575487, "grad_norm": 1.787760853767395, "learning_rate": 1.9888558116126143e-05, "loss": 0.7341, "step": 5709 }, { "epoch": 0.9322068487000531, "grad_norm": 1.8817999362945557, "learning_rate": 1.9888510867812022e-05, "loss": 0.6457, "step": 5710 }, { "epoch": 0.9323701073425574, "grad_norm": 1.9737091064453125, "learning_rate": 1.988846360954017e-05, "loss": 0.7561, "step": 5711 }, { "epoch": 0.9325333659850619, "grad_norm": 2.175656795501709, "learning_rate": 1.9888416341310637e-05, "loss": 0.8052, "step": 5712 }, { "epoch": 0.9326966246275662, "grad_norm": 1.705418586730957, "learning_rate": 1.9888369063123473e-05, "loss": 0.6683, "step": 5713 }, { "epoch": 0.9328598832700706, "grad_norm": 1.9361976385116577, "learning_rate": 1.9888321774978726e-05, "loss": 0.7575, "step": 5714 }, { "epoch": 0.933023141912575, "grad_norm": 1.9550631046295166, "learning_rate": 1.988827447687644e-05, "loss": 0.6993, "step": 5715 }, { "epoch": 0.9331864005550794, "grad_norm": 1.8096879720687866, "learning_rate": 1.988822716881666e-05, "loss": 0.7994, "step": 5716 }, { "epoch": 0.9333496591975837, "grad_norm": 1.5592663288116455, "learning_rate": 1.988817985079944e-05, "loss": 0.5859, "step": 5717 }, { "epoch": 0.9335129178400882, "grad_norm": 2.107410192489624, "learning_rate": 1.988813252282483e-05, "loss": 0.8668, "step": 5718 }, { "epoch": 0.9336761764825926, "grad_norm": 1.9437669515609741, "learning_rate": 1.9888085184892868e-05, "loss": 0.7966, "step": 5719 }, { "epoch": 0.9338394351250969, "grad_norm": 1.9885010719299316, "learning_rate": 1.988803783700361e-05, "loss": 0.8751, "step": 5720 }, { "epoch": 0.9340026937676014, "grad_norm": 2.0310909748077393, "learning_rate": 1.9887990479157098e-05, "loss": 0.8305, "step": 5721 }, { "epoch": 0.9341659524101057, "grad_norm": 1.7244991064071655, "learning_rate": 1.9887943111353385e-05, "loss": 0.7848, "step": 5722 }, { "epoch": 0.9343292110526101, "grad_norm": 1.6967183351516724, "learning_rate": 1.9887895733592514e-05, "loss": 0.7127, "step": 5723 }, { "epoch": 0.9344924696951145, "grad_norm": 1.746645450592041, "learning_rate": 1.9887848345874538e-05, "loss": 0.7447, "step": 5724 }, { "epoch": 0.9346557283376189, "grad_norm": 1.96803879737854, "learning_rate": 1.9887800948199496e-05, "loss": 0.7861, "step": 5725 }, { "epoch": 0.9348189869801232, "grad_norm": 1.7303558588027954, "learning_rate": 1.9887753540567446e-05, "loss": 0.732, "step": 5726 }, { "epoch": 0.9349822456226277, "grad_norm": 1.9768098592758179, "learning_rate": 1.9887706122978426e-05, "loss": 0.7366, "step": 5727 }, { "epoch": 0.935145504265132, "grad_norm": 1.7681853771209717, "learning_rate": 1.988765869543249e-05, "loss": 0.6297, "step": 5728 }, { "epoch": 0.9353087629076364, "grad_norm": 1.7637391090393066, "learning_rate": 1.9887611257929687e-05, "loss": 0.7391, "step": 5729 }, { "epoch": 0.9354720215501409, "grad_norm": 1.6085448265075684, "learning_rate": 1.988756381047006e-05, "loss": 0.655, "step": 5730 }, { "epoch": 0.9356352801926452, "grad_norm": 1.8517422676086426, "learning_rate": 1.988751635305366e-05, "loss": 0.8333, "step": 5731 }, { "epoch": 0.9357985388351496, "grad_norm": 1.7325329780578613, "learning_rate": 1.988746888568053e-05, "loss": 0.732, "step": 5732 }, { "epoch": 0.935961797477654, "grad_norm": 1.9422686100006104, "learning_rate": 1.9887421408350728e-05, "loss": 0.9066, "step": 5733 }, { "epoch": 0.9361250561201584, "grad_norm": 1.6214052438735962, "learning_rate": 1.988737392106429e-05, "loss": 0.8074, "step": 5734 }, { "epoch": 0.9362883147626627, "grad_norm": 1.999585747718811, "learning_rate": 1.988732642382127e-05, "loss": 1.0196, "step": 5735 }, { "epoch": 0.9364515734051672, "grad_norm": 1.8567827939987183, "learning_rate": 1.9887278916621717e-05, "loss": 0.8236, "step": 5736 }, { "epoch": 0.9366148320476715, "grad_norm": 2.0515315532684326, "learning_rate": 1.9887231399465678e-05, "loss": 0.8267, "step": 5737 }, { "epoch": 0.9367780906901759, "grad_norm": 1.78919517993927, "learning_rate": 1.9887183872353197e-05, "loss": 0.7893, "step": 5738 }, { "epoch": 0.9369413493326803, "grad_norm": 1.8790026903152466, "learning_rate": 1.988713633528432e-05, "loss": 0.9207, "step": 5739 }, { "epoch": 0.9371046079751847, "grad_norm": 2.102926254272461, "learning_rate": 1.9887088788259105e-05, "loss": 0.7826, "step": 5740 }, { "epoch": 0.9372678666176891, "grad_norm": 1.7118357419967651, "learning_rate": 1.9887041231277593e-05, "loss": 0.6542, "step": 5741 }, { "epoch": 0.9374311252601935, "grad_norm": 1.5889919996261597, "learning_rate": 1.988699366433983e-05, "loss": 0.6889, "step": 5742 }, { "epoch": 0.9375943839026979, "grad_norm": 1.6478652954101562, "learning_rate": 1.9886946087445872e-05, "loss": 0.7139, "step": 5743 }, { "epoch": 0.9377576425452022, "grad_norm": 1.866075038909912, "learning_rate": 1.9886898500595763e-05, "loss": 0.7629, "step": 5744 }, { "epoch": 0.9379209011877067, "grad_norm": 2.1026644706726074, "learning_rate": 1.9886850903789546e-05, "loss": 0.8349, "step": 5745 }, { "epoch": 0.938084159830211, "grad_norm": 1.798527479171753, "learning_rate": 1.988680329702727e-05, "loss": 0.7032, "step": 5746 }, { "epoch": 0.9382474184727154, "grad_norm": 1.8757797479629517, "learning_rate": 1.988675568030899e-05, "loss": 0.8275, "step": 5747 }, { "epoch": 0.9384106771152197, "grad_norm": 1.7428367137908936, "learning_rate": 1.9886708053634752e-05, "loss": 0.7727, "step": 5748 }, { "epoch": 0.9385739357577242, "grad_norm": 1.7845141887664795, "learning_rate": 1.9886660417004594e-05, "loss": 0.7315, "step": 5749 }, { "epoch": 0.9387371944002285, "grad_norm": 2.002915620803833, "learning_rate": 1.988661277041858e-05, "loss": 0.8609, "step": 5750 }, { "epoch": 0.938900453042733, "grad_norm": 1.6306358575820923, "learning_rate": 1.9886565113876744e-05, "loss": 0.6831, "step": 5751 }, { "epoch": 0.9390637116852374, "grad_norm": 1.8465707302093506, "learning_rate": 1.988651744737914e-05, "loss": 0.8808, "step": 5752 }, { "epoch": 0.9392269703277417, "grad_norm": 1.8070030212402344, "learning_rate": 1.988646977092582e-05, "loss": 0.587, "step": 5753 }, { "epoch": 0.9393902289702462, "grad_norm": 2.4189624786376953, "learning_rate": 1.9886422084516822e-05, "loss": 0.8934, "step": 5754 }, { "epoch": 0.9395534876127505, "grad_norm": 1.87638258934021, "learning_rate": 1.9886374388152203e-05, "loss": 0.7656, "step": 5755 }, { "epoch": 0.9397167462552549, "grad_norm": 1.7015703916549683, "learning_rate": 1.9886326681832006e-05, "loss": 0.7307, "step": 5756 }, { "epoch": 0.9398800048977592, "grad_norm": 2.803229331970215, "learning_rate": 1.988627896555628e-05, "loss": 0.8285, "step": 5757 }, { "epoch": 0.9400432635402637, "grad_norm": 2.406782627105713, "learning_rate": 1.9886231239325074e-05, "loss": 0.8408, "step": 5758 }, { "epoch": 0.940206522182768, "grad_norm": 1.8035948276519775, "learning_rate": 1.9886183503138438e-05, "loss": 0.7883, "step": 5759 }, { "epoch": 0.9403697808252724, "grad_norm": 1.8613662719726562, "learning_rate": 1.988613575699642e-05, "loss": 0.9293, "step": 5760 }, { "epoch": 0.9405330394677768, "grad_norm": 1.7085455656051636, "learning_rate": 1.988608800089906e-05, "loss": 0.7528, "step": 5761 }, { "epoch": 0.9406962981102812, "grad_norm": 1.8669358491897583, "learning_rate": 1.9886040234846415e-05, "loss": 0.7261, "step": 5762 }, { "epoch": 0.9408595567527857, "grad_norm": 2.2036235332489014, "learning_rate": 1.9885992458838527e-05, "loss": 0.9291, "step": 5763 }, { "epoch": 0.94102281539529, "grad_norm": 1.8173681497573853, "learning_rate": 1.988594467287545e-05, "loss": 0.9374, "step": 5764 }, { "epoch": 0.9411860740377944, "grad_norm": 1.7223553657531738, "learning_rate": 1.988589687695723e-05, "loss": 0.9217, "step": 5765 }, { "epoch": 0.9413493326802987, "grad_norm": 1.607495903968811, "learning_rate": 1.9885849071083912e-05, "loss": 0.7183, "step": 5766 }, { "epoch": 0.9415125913228032, "grad_norm": 1.7953224182128906, "learning_rate": 1.9885801255255552e-05, "loss": 0.7187, "step": 5767 }, { "epoch": 0.9416758499653075, "grad_norm": 1.5894697904586792, "learning_rate": 1.988575342947219e-05, "loss": 0.6975, "step": 5768 }, { "epoch": 0.941839108607812, "grad_norm": 2.0527658462524414, "learning_rate": 1.9885705593733872e-05, "loss": 0.7373, "step": 5769 }, { "epoch": 0.9420023672503163, "grad_norm": 1.8515516519546509, "learning_rate": 1.9885657748040655e-05, "loss": 0.6878, "step": 5770 }, { "epoch": 0.9421656258928207, "grad_norm": 1.92622971534729, "learning_rate": 1.9885609892392584e-05, "loss": 0.8359, "step": 5771 }, { "epoch": 0.942328884535325, "grad_norm": 1.627686858177185, "learning_rate": 1.9885562026789705e-05, "loss": 0.6738, "step": 5772 }, { "epoch": 0.9424921431778295, "grad_norm": 1.826431155204773, "learning_rate": 1.9885514151232067e-05, "loss": 0.7694, "step": 5773 }, { "epoch": 0.9426554018203339, "grad_norm": 2.256471633911133, "learning_rate": 1.9885466265719723e-05, "loss": 0.8479, "step": 5774 }, { "epoch": 0.9428186604628382, "grad_norm": 1.654866337776184, "learning_rate": 1.9885418370252715e-05, "loss": 0.6293, "step": 5775 }, { "epoch": 0.9429819191053427, "grad_norm": 1.6537717580795288, "learning_rate": 1.988537046483109e-05, "loss": 0.7651, "step": 5776 }, { "epoch": 0.943145177747847, "grad_norm": 1.6033003330230713, "learning_rate": 1.9885322549454905e-05, "loss": 0.9255, "step": 5777 }, { "epoch": 0.9433084363903514, "grad_norm": 1.7734872102737427, "learning_rate": 1.98852746241242e-05, "loss": 0.7852, "step": 5778 }, { "epoch": 0.9434716950328558, "grad_norm": 1.9155776500701904, "learning_rate": 1.9885226688839023e-05, "loss": 1.1201, "step": 5779 }, { "epoch": 0.9436349536753602, "grad_norm": 1.5816771984100342, "learning_rate": 1.988517874359943e-05, "loss": 0.5468, "step": 5780 }, { "epoch": 0.9437982123178645, "grad_norm": 2.1904280185699463, "learning_rate": 1.9885130788405463e-05, "loss": 0.7584, "step": 5781 }, { "epoch": 0.943961470960369, "grad_norm": 2.0713632106781006, "learning_rate": 1.988508282325717e-05, "loss": 0.7614, "step": 5782 }, { "epoch": 0.9441247296028733, "grad_norm": 2.3178539276123047, "learning_rate": 1.9885034848154605e-05, "loss": 0.9069, "step": 5783 }, { "epoch": 0.9442879882453777, "grad_norm": 2.1415598392486572, "learning_rate": 1.988498686309781e-05, "loss": 0.7871, "step": 5784 }, { "epoch": 0.9444512468878822, "grad_norm": 1.6048065423965454, "learning_rate": 1.9884938868086836e-05, "loss": 0.709, "step": 5785 }, { "epoch": 0.9446145055303865, "grad_norm": 1.8477904796600342, "learning_rate": 1.9884890863121734e-05, "loss": 0.8037, "step": 5786 }, { "epoch": 0.9447777641728909, "grad_norm": 2.049612045288086, "learning_rate": 1.9884842848202545e-05, "loss": 0.9281, "step": 5787 }, { "epoch": 0.9449410228153953, "grad_norm": 1.8600860834121704, "learning_rate": 1.9884794823329327e-05, "loss": 0.7258, "step": 5788 }, { "epoch": 0.9451042814578997, "grad_norm": 1.9998430013656616, "learning_rate": 1.988474678850212e-05, "loss": 0.8283, "step": 5789 }, { "epoch": 0.945267540100404, "grad_norm": 1.780461072921753, "learning_rate": 1.9884698743720973e-05, "loss": 0.6849, "step": 5790 }, { "epoch": 0.9454307987429085, "grad_norm": 1.8567193746566772, "learning_rate": 1.9884650688985943e-05, "loss": 0.6133, "step": 5791 }, { "epoch": 0.9455940573854128, "grad_norm": 2.219817876815796, "learning_rate": 1.988460262429707e-05, "loss": 0.7821, "step": 5792 }, { "epoch": 0.9457573160279172, "grad_norm": 1.7746769189834595, "learning_rate": 1.98845545496544e-05, "loss": 0.826, "step": 5793 }, { "epoch": 0.9459205746704216, "grad_norm": 1.9887628555297852, "learning_rate": 1.988450646505799e-05, "loss": 0.7882, "step": 5794 }, { "epoch": 0.946083833312926, "grad_norm": 1.7247023582458496, "learning_rate": 1.9884458370507886e-05, "loss": 0.7313, "step": 5795 }, { "epoch": 0.9462470919554304, "grad_norm": 2.068726062774658, "learning_rate": 1.9884410266004134e-05, "loss": 0.7834, "step": 5796 }, { "epoch": 0.9464103505979348, "grad_norm": 2.4623751640319824, "learning_rate": 1.9884362151546783e-05, "loss": 0.8833, "step": 5797 }, { "epoch": 0.9465736092404392, "grad_norm": 2.131727457046509, "learning_rate": 1.9884314027135883e-05, "loss": 0.8595, "step": 5798 }, { "epoch": 0.9467368678829435, "grad_norm": 2.0381524562835693, "learning_rate": 1.9884265892771483e-05, "loss": 1.3767, "step": 5799 }, { "epoch": 0.946900126525448, "grad_norm": 1.7590593099594116, "learning_rate": 1.9884217748453625e-05, "loss": 0.7559, "step": 5800 }, { "epoch": 0.9470633851679523, "grad_norm": 1.6981379985809326, "learning_rate": 1.9884169594182364e-05, "loss": 0.7696, "step": 5801 }, { "epoch": 0.9472266438104567, "grad_norm": 1.8272701501846313, "learning_rate": 1.988412142995775e-05, "loss": 0.7707, "step": 5802 }, { "epoch": 0.9473899024529611, "grad_norm": 1.9308500289916992, "learning_rate": 1.9884073255779824e-05, "loss": 0.7492, "step": 5803 }, { "epoch": 0.9475531610954655, "grad_norm": 1.7385494709014893, "learning_rate": 1.9884025071648643e-05, "loss": 0.7122, "step": 5804 }, { "epoch": 0.9477164197379698, "grad_norm": 1.829702615737915, "learning_rate": 1.988397687756425e-05, "loss": 0.7234, "step": 5805 }, { "epoch": 0.9478796783804743, "grad_norm": 1.8464897871017456, "learning_rate": 1.9883928673526692e-05, "loss": 0.7385, "step": 5806 }, { "epoch": 0.9480429370229787, "grad_norm": 1.8805749416351318, "learning_rate": 1.9883880459536024e-05, "loss": 0.8518, "step": 5807 }, { "epoch": 0.948206195665483, "grad_norm": 2.240586519241333, "learning_rate": 1.988383223559229e-05, "loss": 0.8711, "step": 5808 }, { "epoch": 0.9483694543079875, "grad_norm": 2.2569186687469482, "learning_rate": 1.988378400169554e-05, "loss": 0.7033, "step": 5809 }, { "epoch": 0.9485327129504918, "grad_norm": 1.9544212818145752, "learning_rate": 1.9883735757845822e-05, "loss": 0.8219, "step": 5810 }, { "epoch": 0.9486959715929962, "grad_norm": 1.8035211563110352, "learning_rate": 1.9883687504043183e-05, "loss": 0.6754, "step": 5811 }, { "epoch": 0.9488592302355006, "grad_norm": 1.9403053522109985, "learning_rate": 1.9883639240287676e-05, "loss": 0.7626, "step": 5812 }, { "epoch": 0.949022488878005, "grad_norm": 1.8226954936981201, "learning_rate": 1.9883590966579342e-05, "loss": 0.7701, "step": 5813 }, { "epoch": 0.9491857475205093, "grad_norm": 1.732888102531433, "learning_rate": 1.988354268291824e-05, "loss": 0.7134, "step": 5814 }, { "epoch": 0.9493490061630138, "grad_norm": 1.760650873184204, "learning_rate": 1.988349438930441e-05, "loss": 0.8481, "step": 5815 }, { "epoch": 0.9495122648055181, "grad_norm": 1.6131792068481445, "learning_rate": 1.9883446085737904e-05, "loss": 0.6023, "step": 5816 }, { "epoch": 0.9496755234480225, "grad_norm": 1.8252925872802734, "learning_rate": 1.988339777221877e-05, "loss": 0.6659, "step": 5817 }, { "epoch": 0.949838782090527, "grad_norm": 1.9540563821792603, "learning_rate": 1.988334944874706e-05, "loss": 0.7254, "step": 5818 }, { "epoch": 0.9500020407330313, "grad_norm": 2.14513897895813, "learning_rate": 1.9883301115322817e-05, "loss": 0.78, "step": 5819 }, { "epoch": 0.9501652993755357, "grad_norm": 1.8827747106552124, "learning_rate": 1.9883252771946094e-05, "loss": 0.6161, "step": 5820 }, { "epoch": 0.9503285580180401, "grad_norm": 2.076988935470581, "learning_rate": 1.988320441861694e-05, "loss": 0.8529, "step": 5821 }, { "epoch": 0.9504918166605445, "grad_norm": 2.323436975479126, "learning_rate": 1.9883156055335398e-05, "loss": 0.8945, "step": 5822 }, { "epoch": 0.9506550753030488, "grad_norm": 2.1867728233337402, "learning_rate": 1.9883107682101523e-05, "loss": 0.8147, "step": 5823 }, { "epoch": 0.9508183339455533, "grad_norm": 2.0491859912872314, "learning_rate": 1.988305929891536e-05, "loss": 0.8346, "step": 5824 }, { "epoch": 0.9509815925880576, "grad_norm": 1.8013696670532227, "learning_rate": 1.9883010905776955e-05, "loss": 0.8056, "step": 5825 }, { "epoch": 0.951144851230562, "grad_norm": 1.7445491552352905, "learning_rate": 1.988296250268637e-05, "loss": 0.7998, "step": 5826 }, { "epoch": 0.9513081098730664, "grad_norm": 1.6402561664581299, "learning_rate": 1.9882914089643635e-05, "loss": 0.7034, "step": 5827 }, { "epoch": 0.9514713685155708, "grad_norm": 2.0749800205230713, "learning_rate": 1.9882865666648814e-05, "loss": 0.8125, "step": 5828 }, { "epoch": 0.9516346271580752, "grad_norm": 1.8249925374984741, "learning_rate": 1.988281723370195e-05, "loss": 0.7257, "step": 5829 }, { "epoch": 0.9517978858005796, "grad_norm": 2.4648420810699463, "learning_rate": 1.9882768790803086e-05, "loss": 1.1782, "step": 5830 }, { "epoch": 0.951961144443084, "grad_norm": 1.930014967918396, "learning_rate": 1.9882720337952278e-05, "loss": 0.7643, "step": 5831 }, { "epoch": 0.9521244030855883, "grad_norm": 1.962272047996521, "learning_rate": 1.988267187514958e-05, "loss": 0.7616, "step": 5832 }, { "epoch": 0.9522876617280928, "grad_norm": 1.3929247856140137, "learning_rate": 1.9882623402395027e-05, "loss": 0.5807, "step": 5833 }, { "epoch": 0.9524509203705971, "grad_norm": 1.6981593370437622, "learning_rate": 1.9882574919688676e-05, "loss": 0.717, "step": 5834 }, { "epoch": 0.9526141790131015, "grad_norm": 1.6585187911987305, "learning_rate": 1.988252642703058e-05, "loss": 0.8325, "step": 5835 }, { "epoch": 0.9527774376556059, "grad_norm": 1.8912004232406616, "learning_rate": 1.9882477924420773e-05, "loss": 0.7601, "step": 5836 }, { "epoch": 0.9529406962981103, "grad_norm": 1.9597433805465698, "learning_rate": 1.9882429411859322e-05, "loss": 0.7029, "step": 5837 }, { "epoch": 0.9531039549406146, "grad_norm": 1.9821723699569702, "learning_rate": 1.988238088934626e-05, "loss": 0.8478, "step": 5838 }, { "epoch": 0.953267213583119, "grad_norm": 2.066716432571411, "learning_rate": 1.9882332356881647e-05, "loss": 0.7375, "step": 5839 }, { "epoch": 0.9534304722256235, "grad_norm": 1.8429127931594849, "learning_rate": 1.988228381446553e-05, "loss": 0.7104, "step": 5840 }, { "epoch": 0.9535937308681278, "grad_norm": 1.911085605621338, "learning_rate": 1.9882235262097954e-05, "loss": 0.8169, "step": 5841 }, { "epoch": 0.9537569895106323, "grad_norm": 1.6619443893432617, "learning_rate": 1.988218669977897e-05, "loss": 0.8408, "step": 5842 }, { "epoch": 0.9539202481531366, "grad_norm": 1.5614078044891357, "learning_rate": 1.9882138127508624e-05, "loss": 0.673, "step": 5843 }, { "epoch": 0.954083506795641, "grad_norm": 1.6054402589797974, "learning_rate": 1.9882089545286967e-05, "loss": 0.6165, "step": 5844 }, { "epoch": 0.9542467654381454, "grad_norm": 2.0731489658355713, "learning_rate": 1.9882040953114056e-05, "loss": 0.9001, "step": 5845 }, { "epoch": 0.9544100240806498, "grad_norm": 1.7190436124801636, "learning_rate": 1.9881992350989927e-05, "loss": 0.7477, "step": 5846 }, { "epoch": 0.9545732827231541, "grad_norm": 2.2513763904571533, "learning_rate": 1.9881943738914634e-05, "loss": 0.6684, "step": 5847 }, { "epoch": 0.9547365413656586, "grad_norm": 1.3994193077087402, "learning_rate": 1.988189511688823e-05, "loss": 0.6258, "step": 5848 }, { "epoch": 0.9548998000081629, "grad_norm": 1.9851990938186646, "learning_rate": 1.9881846484910752e-05, "loss": 0.7173, "step": 5849 }, { "epoch": 0.9550630586506673, "grad_norm": 1.8960609436035156, "learning_rate": 1.9881797842982265e-05, "loss": 1.039, "step": 5850 }, { "epoch": 0.9552263172931718, "grad_norm": 1.6367026567459106, "learning_rate": 1.9881749191102807e-05, "loss": 0.6784, "step": 5851 }, { "epoch": 0.9553895759356761, "grad_norm": 1.9023123979568481, "learning_rate": 1.988170052927243e-05, "loss": 0.6968, "step": 5852 }, { "epoch": 0.9555528345781805, "grad_norm": 1.7912591695785522, "learning_rate": 1.9881651857491184e-05, "loss": 0.9238, "step": 5853 }, { "epoch": 0.9557160932206848, "grad_norm": 1.9536640644073486, "learning_rate": 1.9881603175759117e-05, "loss": 0.8356, "step": 5854 }, { "epoch": 0.9558793518631893, "grad_norm": 2.178215503692627, "learning_rate": 1.988155448407628e-05, "loss": 0.7426, "step": 5855 }, { "epoch": 0.9560426105056936, "grad_norm": 2.2000627517700195, "learning_rate": 1.9881505782442717e-05, "loss": 0.6558, "step": 5856 }, { "epoch": 0.956205869148198, "grad_norm": 1.9184342622756958, "learning_rate": 1.9881457070858482e-05, "loss": 0.8036, "step": 5857 }, { "epoch": 0.9563691277907024, "grad_norm": 1.7472063302993774, "learning_rate": 1.9881408349323622e-05, "loss": 0.6824, "step": 5858 }, { "epoch": 0.9565323864332068, "grad_norm": 1.871843695640564, "learning_rate": 1.988135961783819e-05, "loss": 0.8832, "step": 5859 }, { "epoch": 0.9566956450757111, "grad_norm": 1.9221521615982056, "learning_rate": 1.9881310876402225e-05, "loss": 0.8213, "step": 5860 }, { "epoch": 0.9568589037182156, "grad_norm": 2.058335065841675, "learning_rate": 1.9881262125015786e-05, "loss": 0.8761, "step": 5861 }, { "epoch": 0.95702216236072, "grad_norm": 1.7215598821640015, "learning_rate": 1.988121336367892e-05, "loss": 0.6424, "step": 5862 }, { "epoch": 0.9571854210032243, "grad_norm": 1.4935752153396606, "learning_rate": 1.9881164592391672e-05, "loss": 0.5509, "step": 5863 }, { "epoch": 0.9573486796457288, "grad_norm": 1.992567777633667, "learning_rate": 1.9881115811154098e-05, "loss": 0.9443, "step": 5864 }, { "epoch": 0.9575119382882331, "grad_norm": 1.592947244644165, "learning_rate": 1.988106701996624e-05, "loss": 0.6793, "step": 5865 }, { "epoch": 0.9576751969307375, "grad_norm": 1.5262306928634644, "learning_rate": 1.9881018218828147e-05, "loss": 0.7548, "step": 5866 }, { "epoch": 0.9578384555732419, "grad_norm": 1.7564045190811157, "learning_rate": 1.9880969407739875e-05, "loss": 0.6788, "step": 5867 }, { "epoch": 0.9580017142157463, "grad_norm": 2.1623117923736572, "learning_rate": 1.988092058670147e-05, "loss": 1.0269, "step": 5868 }, { "epoch": 0.9581649728582506, "grad_norm": 2.032778024673462, "learning_rate": 1.988087175571298e-05, "loss": 0.7619, "step": 5869 }, { "epoch": 0.9583282315007551, "grad_norm": 1.6946464776992798, "learning_rate": 1.9880822914774453e-05, "loss": 0.6284, "step": 5870 }, { "epoch": 0.9584914901432594, "grad_norm": 1.8071743249893188, "learning_rate": 1.9880774063885942e-05, "loss": 0.8879, "step": 5871 }, { "epoch": 0.9586547487857638, "grad_norm": 1.8844603300094604, "learning_rate": 1.988072520304749e-05, "loss": 0.6178, "step": 5872 }, { "epoch": 0.9588180074282683, "grad_norm": 2.064663887023926, "learning_rate": 1.9880676332259155e-05, "loss": 0.9803, "step": 5873 }, { "epoch": 0.9589812660707726, "grad_norm": 1.6574041843414307, "learning_rate": 1.9880627451520983e-05, "loss": 0.6165, "step": 5874 }, { "epoch": 0.959144524713277, "grad_norm": 1.7718545198440552, "learning_rate": 1.9880578560833017e-05, "loss": 0.7296, "step": 5875 }, { "epoch": 0.9593077833557814, "grad_norm": 2.145753860473633, "learning_rate": 1.9880529660195314e-05, "loss": 0.7915, "step": 5876 }, { "epoch": 0.9594710419982858, "grad_norm": 2.0439789295196533, "learning_rate": 1.988048074960792e-05, "loss": 0.7655, "step": 5877 }, { "epoch": 0.9596343006407901, "grad_norm": 1.9380052089691162, "learning_rate": 1.988043182907088e-05, "loss": 0.8097, "step": 5878 }, { "epoch": 0.9597975592832946, "grad_norm": 1.8143365383148193, "learning_rate": 1.9880382898584254e-05, "loss": 0.6877, "step": 5879 }, { "epoch": 0.9599608179257989, "grad_norm": 1.7788065671920776, "learning_rate": 1.9880333958148085e-05, "loss": 0.8665, "step": 5880 }, { "epoch": 0.9601240765683033, "grad_norm": 1.9629443883895874, "learning_rate": 1.988028500776242e-05, "loss": 0.7228, "step": 5881 }, { "epoch": 0.9602873352108077, "grad_norm": 1.7812095880508423, "learning_rate": 1.9880236047427308e-05, "loss": 0.7608, "step": 5882 }, { "epoch": 0.9604505938533121, "grad_norm": 1.9366093873977661, "learning_rate": 1.98801870771428e-05, "loss": 0.874, "step": 5883 }, { "epoch": 0.9606138524958165, "grad_norm": 1.9091942310333252, "learning_rate": 1.9880138096908955e-05, "loss": 0.7725, "step": 5884 }, { "epoch": 0.9607771111383209, "grad_norm": 1.7375255823135376, "learning_rate": 1.9880089106725805e-05, "loss": 0.7603, "step": 5885 }, { "epoch": 0.9609403697808253, "grad_norm": 1.4243805408477783, "learning_rate": 1.9880040106593413e-05, "loss": 0.6183, "step": 5886 }, { "epoch": 0.9611036284233296, "grad_norm": 1.7426848411560059, "learning_rate": 1.987999109651182e-05, "loss": 0.8244, "step": 5887 }, { "epoch": 0.9612668870658341, "grad_norm": 1.4907567501068115, "learning_rate": 1.9879942076481082e-05, "loss": 0.6861, "step": 5888 }, { "epoch": 0.9614301457083384, "grad_norm": 1.6635817289352417, "learning_rate": 1.987989304650124e-05, "loss": 0.7142, "step": 5889 }, { "epoch": 0.9615934043508428, "grad_norm": 1.504945158958435, "learning_rate": 1.987984400657235e-05, "loss": 0.6635, "step": 5890 }, { "epoch": 0.9617566629933472, "grad_norm": 1.8974223136901855, "learning_rate": 1.9879794956694463e-05, "loss": 0.9495, "step": 5891 }, { "epoch": 0.9619199216358516, "grad_norm": 1.4257986545562744, "learning_rate": 1.9879745896867624e-05, "loss": 0.6576, "step": 5892 }, { "epoch": 0.9620831802783559, "grad_norm": 1.6958147287368774, "learning_rate": 1.9879696827091882e-05, "loss": 0.8572, "step": 5893 }, { "epoch": 0.9622464389208604, "grad_norm": 1.65938401222229, "learning_rate": 1.987964774736729e-05, "loss": 0.6801, "step": 5894 }, { "epoch": 0.9624096975633648, "grad_norm": 1.9055495262145996, "learning_rate": 1.9879598657693894e-05, "loss": 0.8883, "step": 5895 }, { "epoch": 0.9625729562058691, "grad_norm": 2.2240192890167236, "learning_rate": 1.9879549558071742e-05, "loss": 0.7431, "step": 5896 }, { "epoch": 0.9627362148483736, "grad_norm": 1.4063907861709595, "learning_rate": 1.987950044850089e-05, "loss": 0.6701, "step": 5897 }, { "epoch": 0.9628994734908779, "grad_norm": 1.8229234218597412, "learning_rate": 1.9879451328981384e-05, "loss": 0.8331, "step": 5898 }, { "epoch": 0.9630627321333823, "grad_norm": 1.8376917839050293, "learning_rate": 1.987940219951327e-05, "loss": 0.7301, "step": 5899 }, { "epoch": 0.9632259907758867, "grad_norm": 2.0361239910125732, "learning_rate": 1.98793530600966e-05, "loss": 0.8292, "step": 5900 }, { "epoch": 0.9633892494183911, "grad_norm": 1.9441180229187012, "learning_rate": 1.987930391073143e-05, "loss": 0.7768, "step": 5901 }, { "epoch": 0.9635525080608954, "grad_norm": 2.0011613368988037, "learning_rate": 1.9879254751417797e-05, "loss": 0.7991, "step": 5902 }, { "epoch": 0.9637157667033999, "grad_norm": 1.787095546722412, "learning_rate": 1.987920558215576e-05, "loss": 0.8272, "step": 5903 }, { "epoch": 0.9638790253459042, "grad_norm": 1.8058454990386963, "learning_rate": 1.9879156402945368e-05, "loss": 0.7298, "step": 5904 }, { "epoch": 0.9640422839884086, "grad_norm": 2.158674716949463, "learning_rate": 1.9879107213786667e-05, "loss": 0.9092, "step": 5905 }, { "epoch": 0.9642055426309131, "grad_norm": 1.6222621202468872, "learning_rate": 1.9879058014679704e-05, "loss": 0.6428, "step": 5906 }, { "epoch": 0.9643688012734174, "grad_norm": 2.0902462005615234, "learning_rate": 1.9879008805624535e-05, "loss": 0.6753, "step": 5907 }, { "epoch": 0.9645320599159218, "grad_norm": 1.9423555135726929, "learning_rate": 1.9878959586621204e-05, "loss": 0.8046, "step": 5908 }, { "epoch": 0.9646953185584262, "grad_norm": 1.8502558469772339, "learning_rate": 1.9878910357669766e-05, "loss": 0.8262, "step": 5909 }, { "epoch": 0.9648585772009306, "grad_norm": 2.2460103034973145, "learning_rate": 1.987886111877027e-05, "loss": 0.9176, "step": 5910 }, { "epoch": 0.9650218358434349, "grad_norm": 1.8354326486587524, "learning_rate": 1.987881186992276e-05, "loss": 0.9197, "step": 5911 }, { "epoch": 0.9651850944859394, "grad_norm": 1.7509719133377075, "learning_rate": 1.9878762611127288e-05, "loss": 0.8023, "step": 5912 }, { "epoch": 0.9653483531284437, "grad_norm": 1.6554336547851562, "learning_rate": 1.987871334238391e-05, "loss": 0.7153, "step": 5913 }, { "epoch": 0.9655116117709481, "grad_norm": 1.7806309461593628, "learning_rate": 1.9878664063692664e-05, "loss": 0.6852, "step": 5914 }, { "epoch": 0.9656748704134525, "grad_norm": 1.6907514333724976, "learning_rate": 1.987861477505361e-05, "loss": 0.7683, "step": 5915 }, { "epoch": 0.9658381290559569, "grad_norm": 2.20597243309021, "learning_rate": 1.987856547646679e-05, "loss": 0.8144, "step": 5916 }, { "epoch": 0.9660013876984613, "grad_norm": 1.771627426147461, "learning_rate": 1.987851616793226e-05, "loss": 0.8175, "step": 5917 }, { "epoch": 0.9661646463409657, "grad_norm": 1.6341692209243774, "learning_rate": 1.9878466849450067e-05, "loss": 0.694, "step": 5918 }, { "epoch": 0.9663279049834701, "grad_norm": 1.7307456731796265, "learning_rate": 1.987841752102026e-05, "loss": 0.6035, "step": 5919 }, { "epoch": 0.9664911636259744, "grad_norm": 1.8764612674713135, "learning_rate": 1.987836818264289e-05, "loss": 0.7312, "step": 5920 }, { "epoch": 0.9666544222684789, "grad_norm": 1.9953573942184448, "learning_rate": 1.9878318834318005e-05, "loss": 0.892, "step": 5921 }, { "epoch": 0.9668176809109832, "grad_norm": 2.0008957386016846, "learning_rate": 1.9878269476045656e-05, "loss": 0.9351, "step": 5922 }, { "epoch": 0.9669809395534876, "grad_norm": 1.5710453987121582, "learning_rate": 1.9878220107825892e-05, "loss": 0.6207, "step": 5923 }, { "epoch": 0.967144198195992, "grad_norm": 1.9178194999694824, "learning_rate": 1.9878170729658762e-05, "loss": 0.8535, "step": 5924 }, { "epoch": 0.9673074568384964, "grad_norm": 1.4948939085006714, "learning_rate": 1.9878121341544317e-05, "loss": 0.6455, "step": 5925 }, { "epoch": 0.9674707154810007, "grad_norm": 1.773796796798706, "learning_rate": 1.987807194348261e-05, "loss": 0.71, "step": 5926 }, { "epoch": 0.9676339741235052, "grad_norm": 2.001394748687744, "learning_rate": 1.9878022535473682e-05, "loss": 0.9076, "step": 5927 }, { "epoch": 0.9677972327660096, "grad_norm": 1.7112550735473633, "learning_rate": 1.987797311751759e-05, "loss": 0.7714, "step": 5928 }, { "epoch": 0.9679604914085139, "grad_norm": 1.954550862312317, "learning_rate": 1.9877923689614382e-05, "loss": 0.8302, "step": 5929 }, { "epoch": 0.9681237500510184, "grad_norm": 2.1458640098571777, "learning_rate": 1.9877874251764108e-05, "loss": 0.843, "step": 5930 }, { "epoch": 0.9682870086935227, "grad_norm": 1.9994858503341675, "learning_rate": 1.9877824803966818e-05, "loss": 0.838, "step": 5931 }, { "epoch": 0.9684502673360271, "grad_norm": 1.6891282796859741, "learning_rate": 1.987777534622256e-05, "loss": 0.6915, "step": 5932 }, { "epoch": 0.9686135259785315, "grad_norm": 1.8957222700119019, "learning_rate": 1.987772587853138e-05, "loss": 0.7644, "step": 5933 }, { "epoch": 0.9687767846210359, "grad_norm": 1.6775943040847778, "learning_rate": 1.987767640089334e-05, "loss": 0.7307, "step": 5934 }, { "epoch": 0.9689400432635402, "grad_norm": 1.917017936706543, "learning_rate": 1.987762691330848e-05, "loss": 0.9459, "step": 5935 }, { "epoch": 0.9691033019060447, "grad_norm": 1.6119433641433716, "learning_rate": 1.987757741577685e-05, "loss": 0.684, "step": 5936 }, { "epoch": 0.9692665605485491, "grad_norm": 1.8658279180526733, "learning_rate": 1.9877527908298503e-05, "loss": 0.8066, "step": 5937 }, { "epoch": 0.9694298191910534, "grad_norm": 1.8829654455184937, "learning_rate": 1.987747839087349e-05, "loss": 0.8023, "step": 5938 }, { "epoch": 0.9695930778335579, "grad_norm": 1.4931589365005493, "learning_rate": 1.9877428863501857e-05, "loss": 0.6303, "step": 5939 }, { "epoch": 0.9697563364760622, "grad_norm": 1.7371169328689575, "learning_rate": 1.9877379326183656e-05, "loss": 0.6894, "step": 5940 }, { "epoch": 0.9699195951185666, "grad_norm": 1.8759992122650146, "learning_rate": 1.9877329778918938e-05, "loss": 0.861, "step": 5941 }, { "epoch": 0.970082853761071, "grad_norm": 1.5274263620376587, "learning_rate": 1.9877280221707752e-05, "loss": 0.6898, "step": 5942 }, { "epoch": 0.9702461124035754, "grad_norm": 1.8152787685394287, "learning_rate": 1.9877230654550143e-05, "loss": 0.8718, "step": 5943 }, { "epoch": 0.9704093710460797, "grad_norm": 1.7864818572998047, "learning_rate": 1.9877181077446172e-05, "loss": 0.8864, "step": 5944 }, { "epoch": 0.9705726296885842, "grad_norm": 2.062959671020508, "learning_rate": 1.987713149039588e-05, "loss": 1.026, "step": 5945 }, { "epoch": 0.9707358883310885, "grad_norm": 1.7025115489959717, "learning_rate": 1.9877081893399315e-05, "loss": 0.711, "step": 5946 }, { "epoch": 0.9708991469735929, "grad_norm": 1.8405207395553589, "learning_rate": 1.9877032286456535e-05, "loss": 0.8961, "step": 5947 }, { "epoch": 0.9710624056160974, "grad_norm": 1.8922622203826904, "learning_rate": 1.9876982669567585e-05, "loss": 0.7401, "step": 5948 }, { "epoch": 0.9712256642586017, "grad_norm": 1.89198637008667, "learning_rate": 1.9876933042732517e-05, "loss": 0.9072, "step": 5949 }, { "epoch": 0.9713889229011061, "grad_norm": 1.819451093673706, "learning_rate": 1.9876883405951378e-05, "loss": 0.8554, "step": 5950 }, { "epoch": 0.9715521815436104, "grad_norm": 2.102905511856079, "learning_rate": 1.9876833759224223e-05, "loss": 0.9314, "step": 5951 }, { "epoch": 0.9717154401861149, "grad_norm": 1.8134114742279053, "learning_rate": 1.9876784102551098e-05, "loss": 0.6426, "step": 5952 }, { "epoch": 0.9718786988286192, "grad_norm": 1.63399338722229, "learning_rate": 1.987673443593205e-05, "loss": 0.7231, "step": 5953 }, { "epoch": 0.9720419574711237, "grad_norm": 1.5192047357559204, "learning_rate": 1.987668475936714e-05, "loss": 0.6071, "step": 5954 }, { "epoch": 0.972205216113628, "grad_norm": 1.8394055366516113, "learning_rate": 1.987663507285641e-05, "loss": 0.8735, "step": 5955 }, { "epoch": 0.9723684747561324, "grad_norm": 1.5879292488098145, "learning_rate": 1.9876585376399904e-05, "loss": 0.6748, "step": 5956 }, { "epoch": 0.9725317333986367, "grad_norm": 1.635048270225525, "learning_rate": 1.9876535669997685e-05, "loss": 0.7705, "step": 5957 }, { "epoch": 0.9726949920411412, "grad_norm": 1.8931376934051514, "learning_rate": 1.9876485953649795e-05, "loss": 0.7077, "step": 5958 }, { "epoch": 0.9728582506836456, "grad_norm": 1.8646506071090698, "learning_rate": 1.9876436227356288e-05, "loss": 0.7208, "step": 5959 }, { "epoch": 0.97302150932615, "grad_norm": 1.542148232460022, "learning_rate": 1.987638649111721e-05, "loss": 0.6866, "step": 5960 }, { "epoch": 0.9731847679686544, "grad_norm": 1.799831748008728, "learning_rate": 1.9876336744932616e-05, "loss": 0.8734, "step": 5961 }, { "epoch": 0.9733480266111587, "grad_norm": 1.745521068572998, "learning_rate": 1.9876286988802552e-05, "loss": 0.7456, "step": 5962 }, { "epoch": 0.9735112852536632, "grad_norm": 1.6802253723144531, "learning_rate": 1.9876237222727072e-05, "loss": 0.806, "step": 5963 }, { "epoch": 0.9736745438961675, "grad_norm": 1.6264944076538086, "learning_rate": 1.9876187446706222e-05, "loss": 0.7477, "step": 5964 }, { "epoch": 0.9738378025386719, "grad_norm": 1.580211877822876, "learning_rate": 1.9876137660740054e-05, "loss": 0.6917, "step": 5965 }, { "epoch": 0.9740010611811762, "grad_norm": 1.4693100452423096, "learning_rate": 1.9876087864828617e-05, "loss": 0.681, "step": 5966 }, { "epoch": 0.9741643198236807, "grad_norm": 1.7324347496032715, "learning_rate": 1.9876038058971963e-05, "loss": 0.7502, "step": 5967 }, { "epoch": 0.974327578466185, "grad_norm": 1.9221038818359375, "learning_rate": 1.987598824317014e-05, "loss": 0.8654, "step": 5968 }, { "epoch": 0.9744908371086894, "grad_norm": 2.1047980785369873, "learning_rate": 1.98759384174232e-05, "loss": 0.7481, "step": 5969 }, { "epoch": 0.9746540957511939, "grad_norm": 1.6862722635269165, "learning_rate": 1.9875888581731194e-05, "loss": 0.7664, "step": 5970 }, { "epoch": 0.9748173543936982, "grad_norm": 1.769893765449524, "learning_rate": 1.9875838736094173e-05, "loss": 0.7537, "step": 5971 }, { "epoch": 0.9749806130362026, "grad_norm": 2.2298662662506104, "learning_rate": 1.9875788880512183e-05, "loss": 0.8482, "step": 5972 }, { "epoch": 0.975143871678707, "grad_norm": 2.0298850536346436, "learning_rate": 1.9875739014985273e-05, "loss": 0.8494, "step": 5973 }, { "epoch": 0.9753071303212114, "grad_norm": 1.6761415004730225, "learning_rate": 1.98756891395135e-05, "loss": 0.6759, "step": 5974 }, { "epoch": 0.9754703889637157, "grad_norm": 1.8522803783416748, "learning_rate": 1.9875639254096908e-05, "loss": 0.7023, "step": 5975 }, { "epoch": 0.9756336476062202, "grad_norm": 2.02699613571167, "learning_rate": 1.9875589358735553e-05, "loss": 0.9165, "step": 5976 }, { "epoch": 0.9757969062487245, "grad_norm": 1.7743932008743286, "learning_rate": 1.987553945342948e-05, "loss": 0.6396, "step": 5977 }, { "epoch": 0.9759601648912289, "grad_norm": 1.6438225507736206, "learning_rate": 1.987548953817874e-05, "loss": 0.6977, "step": 5978 }, { "epoch": 0.9761234235337333, "grad_norm": 1.67996084690094, "learning_rate": 1.9875439612983388e-05, "loss": 0.6831, "step": 5979 }, { "epoch": 0.9762866821762377, "grad_norm": 1.6769899129867554, "learning_rate": 1.987538967784347e-05, "loss": 0.5868, "step": 5980 }, { "epoch": 0.9764499408187421, "grad_norm": 1.9860998392105103, "learning_rate": 1.9875339732759037e-05, "loss": 0.7618, "step": 5981 }, { "epoch": 0.9766131994612465, "grad_norm": 1.8570220470428467, "learning_rate": 1.9875289777730137e-05, "loss": 0.7333, "step": 5982 }, { "epoch": 0.9767764581037509, "grad_norm": 2.0959367752075195, "learning_rate": 1.9875239812756826e-05, "loss": 0.8431, "step": 5983 }, { "epoch": 0.9769397167462552, "grad_norm": 1.5867465734481812, "learning_rate": 1.987518983783915e-05, "loss": 0.5917, "step": 5984 }, { "epoch": 0.9771029753887597, "grad_norm": 2.1152360439300537, "learning_rate": 1.9875139852977162e-05, "loss": 0.8539, "step": 5985 }, { "epoch": 0.977266234031264, "grad_norm": 1.6980233192443848, "learning_rate": 1.9875089858170907e-05, "loss": 0.6967, "step": 5986 }, { "epoch": 0.9774294926737684, "grad_norm": 1.9800946712493896, "learning_rate": 1.9875039853420445e-05, "loss": 0.8382, "step": 5987 }, { "epoch": 0.9775927513162728, "grad_norm": 1.9031760692596436, "learning_rate": 1.9874989838725812e-05, "loss": 0.8271, "step": 5988 }, { "epoch": 0.9777560099587772, "grad_norm": 1.8787875175476074, "learning_rate": 1.9874939814087074e-05, "loss": 0.8431, "step": 5989 }, { "epoch": 0.9779192686012815, "grad_norm": 1.6274372339248657, "learning_rate": 1.9874889779504274e-05, "loss": 0.6774, "step": 5990 }, { "epoch": 0.978082527243786, "grad_norm": 2.103212356567383, "learning_rate": 1.987483973497746e-05, "loss": 0.9484, "step": 5991 }, { "epoch": 0.9782457858862904, "grad_norm": 2.074878454208374, "learning_rate": 1.9874789680506685e-05, "loss": 0.8176, "step": 5992 }, { "epoch": 0.9784090445287947, "grad_norm": 1.7970536947250366, "learning_rate": 1.9874739616092e-05, "loss": 0.7182, "step": 5993 }, { "epoch": 0.9785723031712992, "grad_norm": 1.6808812618255615, "learning_rate": 1.9874689541733455e-05, "loss": 0.7016, "step": 5994 }, { "epoch": 0.9787355618138035, "grad_norm": 1.6821799278259277, "learning_rate": 1.98746394574311e-05, "loss": 0.7992, "step": 5995 }, { "epoch": 0.9788988204563079, "grad_norm": 1.5709832906723022, "learning_rate": 1.9874589363184988e-05, "loss": 0.7204, "step": 5996 }, { "epoch": 0.9790620790988123, "grad_norm": 1.7833362817764282, "learning_rate": 1.987453925899516e-05, "loss": 0.8071, "step": 5997 }, { "epoch": 0.9792253377413167, "grad_norm": 1.6405885219573975, "learning_rate": 1.9874489144861683e-05, "loss": 0.7564, "step": 5998 }, { "epoch": 0.979388596383821, "grad_norm": 1.953478217124939, "learning_rate": 1.987443902078459e-05, "loss": 0.7531, "step": 5999 }, { "epoch": 0.9795518550263255, "grad_norm": 1.6311215162277222, "learning_rate": 1.9874388886763944e-05, "loss": 0.6578, "step": 6000 }, { "epoch": 0.9797151136688298, "grad_norm": 1.6680793762207031, "learning_rate": 1.987433874279979e-05, "loss": 0.7281, "step": 6001 }, { "epoch": 0.9798783723113342, "grad_norm": 1.6072957515716553, "learning_rate": 1.987428858889218e-05, "loss": 0.6269, "step": 6002 }, { "epoch": 0.9800416309538387, "grad_norm": 1.8018102645874023, "learning_rate": 1.9874238425041164e-05, "loss": 0.8311, "step": 6003 }, { "epoch": 0.980204889596343, "grad_norm": 1.9095489978790283, "learning_rate": 1.987418825124679e-05, "loss": 0.8519, "step": 6004 }, { "epoch": 0.9803681482388474, "grad_norm": 1.6413098573684692, "learning_rate": 1.9874138067509116e-05, "loss": 0.6749, "step": 6005 }, { "epoch": 0.9805314068813518, "grad_norm": 1.5669972896575928, "learning_rate": 1.9874087873828185e-05, "loss": 0.6804, "step": 6006 }, { "epoch": 0.9806946655238562, "grad_norm": 1.9253073930740356, "learning_rate": 1.987403767020405e-05, "loss": 0.8547, "step": 6007 }, { "epoch": 0.9808579241663605, "grad_norm": 1.6400352716445923, "learning_rate": 1.987398745663676e-05, "loss": 0.6881, "step": 6008 }, { "epoch": 0.981021182808865, "grad_norm": 1.9098623991012573, "learning_rate": 1.987393723312637e-05, "loss": 0.728, "step": 6009 }, { "epoch": 0.9811844414513693, "grad_norm": 1.4521044492721558, "learning_rate": 1.9873886999672927e-05, "loss": 0.5883, "step": 6010 }, { "epoch": 0.9813477000938737, "grad_norm": 1.7754608392715454, "learning_rate": 1.9873836756276482e-05, "loss": 0.7166, "step": 6011 }, { "epoch": 0.9815109587363781, "grad_norm": 1.4899892807006836, "learning_rate": 1.9873786502937086e-05, "loss": 0.7209, "step": 6012 }, { "epoch": 0.9816742173788825, "grad_norm": 1.8073278665542603, "learning_rate": 1.9873736239654787e-05, "loss": 0.6165, "step": 6013 }, { "epoch": 0.9818374760213869, "grad_norm": 2.1747896671295166, "learning_rate": 1.9873685966429646e-05, "loss": 0.9362, "step": 6014 }, { "epoch": 0.9820007346638913, "grad_norm": 1.6571619510650635, "learning_rate": 1.98736356832617e-05, "loss": 0.7174, "step": 6015 }, { "epoch": 0.9821639933063957, "grad_norm": 1.764414668083191, "learning_rate": 1.9873585390151003e-05, "loss": 0.6344, "step": 6016 }, { "epoch": 0.9823272519489, "grad_norm": 1.7066560983657837, "learning_rate": 1.9873535087097614e-05, "loss": 0.6145, "step": 6017 }, { "epoch": 0.9824905105914045, "grad_norm": 1.8135972023010254, "learning_rate": 1.9873484774101576e-05, "loss": 0.7636, "step": 6018 }, { "epoch": 0.9826537692339088, "grad_norm": 2.093597412109375, "learning_rate": 1.987343445116294e-05, "loss": 0.9134, "step": 6019 }, { "epoch": 0.9828170278764132, "grad_norm": 1.9487853050231934, "learning_rate": 1.987338411828176e-05, "loss": 0.7286, "step": 6020 }, { "epoch": 0.9829802865189176, "grad_norm": 2.1160693168640137, "learning_rate": 1.9873333775458082e-05, "loss": 0.8427, "step": 6021 }, { "epoch": 0.983143545161422, "grad_norm": 2.046687126159668, "learning_rate": 1.987328342269196e-05, "loss": 0.7701, "step": 6022 }, { "epoch": 0.9833068038039263, "grad_norm": 2.279602527618408, "learning_rate": 1.9873233059983446e-05, "loss": 0.8699, "step": 6023 }, { "epoch": 0.9834700624464308, "grad_norm": 1.8102754354476929, "learning_rate": 1.987318268733259e-05, "loss": 0.7004, "step": 6024 }, { "epoch": 0.9836333210889352, "grad_norm": 1.9187486171722412, "learning_rate": 1.987313230473944e-05, "loss": 0.7657, "step": 6025 }, { "epoch": 0.9837965797314395, "grad_norm": 2.090390682220459, "learning_rate": 1.9873081912204048e-05, "loss": 1.0659, "step": 6026 }, { "epoch": 0.983959838373944, "grad_norm": 1.8013267517089844, "learning_rate": 1.9873031509726463e-05, "loss": 0.8419, "step": 6027 }, { "epoch": 0.9841230970164483, "grad_norm": 2.1475915908813477, "learning_rate": 1.987298109730674e-05, "loss": 0.9188, "step": 6028 }, { "epoch": 0.9842863556589527, "grad_norm": 2.16963529586792, "learning_rate": 1.987293067494493e-05, "loss": 0.8846, "step": 6029 }, { "epoch": 0.984449614301457, "grad_norm": 2.2411036491394043, "learning_rate": 1.9872880242641078e-05, "loss": 0.7537, "step": 6030 }, { "epoch": 0.9846128729439615, "grad_norm": 1.4837383031845093, "learning_rate": 1.987282980039524e-05, "loss": 0.6128, "step": 6031 }, { "epoch": 0.9847761315864658, "grad_norm": 1.7958338260650635, "learning_rate": 1.9872779348207465e-05, "loss": 0.8465, "step": 6032 }, { "epoch": 0.9849393902289703, "grad_norm": 1.8919084072113037, "learning_rate": 1.9872728886077802e-05, "loss": 0.6389, "step": 6033 }, { "epoch": 0.9851026488714746, "grad_norm": 1.8457815647125244, "learning_rate": 1.9872678414006306e-05, "loss": 0.7177, "step": 6034 }, { "epoch": 0.985265907513979, "grad_norm": 1.7271432876586914, "learning_rate": 1.9872627931993026e-05, "loss": 0.6808, "step": 6035 }, { "epoch": 0.9854291661564835, "grad_norm": 1.746943473815918, "learning_rate": 1.987257744003801e-05, "loss": 0.7321, "step": 6036 }, { "epoch": 0.9855924247989878, "grad_norm": 1.8206374645233154, "learning_rate": 1.9872526938141313e-05, "loss": 0.7911, "step": 6037 }, { "epoch": 0.9857556834414922, "grad_norm": 1.9376498460769653, "learning_rate": 1.9872476426302983e-05, "loss": 0.6821, "step": 6038 }, { "epoch": 0.9859189420839966, "grad_norm": 1.9760218858718872, "learning_rate": 1.987242590452307e-05, "loss": 0.8867, "step": 6039 }, { "epoch": 0.986082200726501, "grad_norm": 1.9285062551498413, "learning_rate": 1.9872375372801627e-05, "loss": 0.7281, "step": 6040 }, { "epoch": 0.9862454593690053, "grad_norm": 1.731630563735962, "learning_rate": 1.987232483113871e-05, "loss": 0.6779, "step": 6041 }, { "epoch": 0.9864087180115098, "grad_norm": 1.5643675327301025, "learning_rate": 1.987227427953436e-05, "loss": 0.6119, "step": 6042 }, { "epoch": 0.9865719766540141, "grad_norm": 1.5938082933425903, "learning_rate": 1.9872223717988632e-05, "loss": 0.7078, "step": 6043 }, { "epoch": 0.9867352352965185, "grad_norm": 1.920799970626831, "learning_rate": 1.9872173146501577e-05, "loss": 0.7572, "step": 6044 }, { "epoch": 0.9868984939390228, "grad_norm": 2.1344242095947266, "learning_rate": 1.987212256507325e-05, "loss": 0.984, "step": 6045 }, { "epoch": 0.9870617525815273, "grad_norm": 1.501420021057129, "learning_rate": 1.9872071973703695e-05, "loss": 0.6223, "step": 6046 }, { "epoch": 0.9872250112240317, "grad_norm": 2.3442068099975586, "learning_rate": 1.987202137239297e-05, "loss": 0.7741, "step": 6047 }, { "epoch": 0.987388269866536, "grad_norm": 1.8293782472610474, "learning_rate": 1.9871970761141116e-05, "loss": 0.7081, "step": 6048 }, { "epoch": 0.9875515285090405, "grad_norm": 2.0163910388946533, "learning_rate": 1.9871920139948193e-05, "loss": 0.6837, "step": 6049 }, { "epoch": 0.9877147871515448, "grad_norm": 1.9918174743652344, "learning_rate": 1.987186950881425e-05, "loss": 0.6608, "step": 6050 }, { "epoch": 0.9878780457940493, "grad_norm": 1.8569176197052002, "learning_rate": 1.9871818867739336e-05, "loss": 0.7203, "step": 6051 }, { "epoch": 0.9880413044365536, "grad_norm": 1.4728494882583618, "learning_rate": 1.9871768216723504e-05, "loss": 0.6783, "step": 6052 }, { "epoch": 0.988204563079058, "grad_norm": 1.806503415107727, "learning_rate": 1.9871717555766802e-05, "loss": 0.7128, "step": 6053 }, { "epoch": 0.9883678217215623, "grad_norm": 1.6558796167373657, "learning_rate": 1.9871666884869284e-05, "loss": 0.7896, "step": 6054 }, { "epoch": 0.9885310803640668, "grad_norm": 1.5507030487060547, "learning_rate": 1.9871616204031e-05, "loss": 0.757, "step": 6055 }, { "epoch": 0.9886943390065711, "grad_norm": 2.423875331878662, "learning_rate": 1.9871565513252004e-05, "loss": 0.9425, "step": 6056 }, { "epoch": 0.9888575976490755, "grad_norm": 1.7223186492919922, "learning_rate": 1.9871514812532343e-05, "loss": 0.9469, "step": 6057 }, { "epoch": 0.98902085629158, "grad_norm": 1.9923958778381348, "learning_rate": 1.9871464101872066e-05, "loss": 0.9279, "step": 6058 }, { "epoch": 0.9891841149340843, "grad_norm": 1.9704676866531372, "learning_rate": 1.9871413381271227e-05, "loss": 0.7176, "step": 6059 }, { "epoch": 0.9893473735765888, "grad_norm": 1.9575555324554443, "learning_rate": 1.987136265072988e-05, "loss": 0.8085, "step": 6060 }, { "epoch": 0.9895106322190931, "grad_norm": 1.8110486268997192, "learning_rate": 1.9871311910248074e-05, "loss": 0.8116, "step": 6061 }, { "epoch": 0.9896738908615975, "grad_norm": 1.7949093580245972, "learning_rate": 1.9871261159825858e-05, "loss": 0.8535, "step": 6062 }, { "epoch": 0.9898371495041018, "grad_norm": 1.7336087226867676, "learning_rate": 1.9871210399463287e-05, "loss": 0.833, "step": 6063 }, { "epoch": 0.9900004081466063, "grad_norm": 1.7822155952453613, "learning_rate": 1.9871159629160405e-05, "loss": 0.6829, "step": 6064 }, { "epoch": 0.9901636667891106, "grad_norm": 1.9211289882659912, "learning_rate": 1.9871108848917272e-05, "loss": 0.8141, "step": 6065 }, { "epoch": 0.990326925431615, "grad_norm": 1.6113996505737305, "learning_rate": 1.987105805873393e-05, "loss": 0.7642, "step": 6066 }, { "epoch": 0.9904901840741194, "grad_norm": 1.684557557106018, "learning_rate": 1.987100725861044e-05, "loss": 0.7923, "step": 6067 }, { "epoch": 0.9906534427166238, "grad_norm": 1.6841557025909424, "learning_rate": 1.987095644854685e-05, "loss": 0.692, "step": 6068 }, { "epoch": 0.9908167013591282, "grad_norm": 1.7554411888122559, "learning_rate": 1.9870905628543204e-05, "loss": 0.7748, "step": 6069 }, { "epoch": 0.9909799600016326, "grad_norm": 2.031614065170288, "learning_rate": 1.9870854798599563e-05, "loss": 0.8937, "step": 6070 }, { "epoch": 0.991143218644137, "grad_norm": 2.0891551971435547, "learning_rate": 1.987080395871597e-05, "loss": 0.8405, "step": 6071 }, { "epoch": 0.9913064772866413, "grad_norm": 2.0731348991394043, "learning_rate": 1.9870753108892483e-05, "loss": 0.7968, "step": 6072 }, { "epoch": 0.9914697359291458, "grad_norm": 1.9249430894851685, "learning_rate": 1.987070224912915e-05, "loss": 0.7778, "step": 6073 }, { "epoch": 0.9916329945716501, "grad_norm": 1.6516939401626587, "learning_rate": 1.9870651379426022e-05, "loss": 0.6959, "step": 6074 }, { "epoch": 0.9917962532141545, "grad_norm": 1.8190982341766357, "learning_rate": 1.987060049978315e-05, "loss": 0.7352, "step": 6075 }, { "epoch": 0.9919595118566589, "grad_norm": 1.8975927829742432, "learning_rate": 1.9870549610200587e-05, "loss": 0.8362, "step": 6076 }, { "epoch": 0.9921227704991633, "grad_norm": 1.655179500579834, "learning_rate": 1.9870498710678383e-05, "loss": 0.7377, "step": 6077 }, { "epoch": 0.9922860291416676, "grad_norm": 1.9465856552124023, "learning_rate": 1.987044780121659e-05, "loss": 0.9519, "step": 6078 }, { "epoch": 0.9924492877841721, "grad_norm": 1.9071656465530396, "learning_rate": 1.987039688181526e-05, "loss": 0.8051, "step": 6079 }, { "epoch": 0.9926125464266765, "grad_norm": 1.632247805595398, "learning_rate": 1.9870345952474436e-05, "loss": 0.6219, "step": 6080 }, { "epoch": 0.9927758050691808, "grad_norm": 1.689948320388794, "learning_rate": 1.9870295013194183e-05, "loss": 0.6615, "step": 6081 }, { "epoch": 0.9929390637116853, "grad_norm": 2.5762290954589844, "learning_rate": 1.987024406397454e-05, "loss": 0.7496, "step": 6082 }, { "epoch": 0.9931023223541896, "grad_norm": 1.4671343564987183, "learning_rate": 1.987019310481557e-05, "loss": 0.5928, "step": 6083 }, { "epoch": 0.993265580996694, "grad_norm": 1.5476707220077515, "learning_rate": 1.9870142135717314e-05, "loss": 0.6122, "step": 6084 }, { "epoch": 0.9934288396391984, "grad_norm": 1.7163381576538086, "learning_rate": 1.9870091156679828e-05, "loss": 0.6932, "step": 6085 }, { "epoch": 0.9935920982817028, "grad_norm": 1.8964236974716187, "learning_rate": 1.9870040167703166e-05, "loss": 0.8945, "step": 6086 }, { "epoch": 0.9937553569242071, "grad_norm": 1.7007569074630737, "learning_rate": 1.9869989168787372e-05, "loss": 0.8272, "step": 6087 }, { "epoch": 0.9939186155667116, "grad_norm": 2.0102131366729736, "learning_rate": 1.9869938159932504e-05, "loss": 0.8839, "step": 6088 }, { "epoch": 0.9940818742092159, "grad_norm": 1.9050819873809814, "learning_rate": 1.9869887141138612e-05, "loss": 0.6724, "step": 6089 }, { "epoch": 0.9942451328517203, "grad_norm": 3.572845458984375, "learning_rate": 1.9869836112405744e-05, "loss": 0.8055, "step": 6090 }, { "epoch": 0.9944083914942248, "grad_norm": 1.8550292253494263, "learning_rate": 1.9869785073733953e-05, "loss": 0.838, "step": 6091 }, { "epoch": 0.9945716501367291, "grad_norm": 1.958802580833435, "learning_rate": 1.9869734025123292e-05, "loss": 0.7308, "step": 6092 }, { "epoch": 0.9947349087792335, "grad_norm": 2.0518805980682373, "learning_rate": 1.9869682966573814e-05, "loss": 0.8529, "step": 6093 }, { "epoch": 0.9948981674217379, "grad_norm": 1.6574863195419312, "learning_rate": 1.9869631898085563e-05, "loss": 0.7876, "step": 6094 }, { "epoch": 0.9950614260642423, "grad_norm": 2.1080191135406494, "learning_rate": 1.98695808196586e-05, "loss": 0.7873, "step": 6095 }, { "epoch": 0.9952246847067466, "grad_norm": 1.8979504108428955, "learning_rate": 1.986952973129297e-05, "loss": 0.8885, "step": 6096 }, { "epoch": 0.9953879433492511, "grad_norm": 1.9292210340499878, "learning_rate": 1.9869478632988724e-05, "loss": 0.7992, "step": 6097 }, { "epoch": 0.9955512019917554, "grad_norm": 1.9728667736053467, "learning_rate": 1.986942752474592e-05, "loss": 0.9235, "step": 6098 }, { "epoch": 0.9957144606342598, "grad_norm": 1.4399513006210327, "learning_rate": 1.98693764065646e-05, "loss": 0.5895, "step": 6099 }, { "epoch": 0.9958777192767642, "grad_norm": 1.7028934955596924, "learning_rate": 1.9869325278444824e-05, "loss": 0.595, "step": 6100 }, { "epoch": 0.9960409779192686, "grad_norm": 1.6393214464187622, "learning_rate": 1.986927414038664e-05, "loss": 0.6776, "step": 6101 }, { "epoch": 0.996204236561773, "grad_norm": 4.110625267028809, "learning_rate": 1.98692229923901e-05, "loss": 0.7828, "step": 6102 }, { "epoch": 0.9963674952042774, "grad_norm": 1.804225206375122, "learning_rate": 1.9869171834455253e-05, "loss": 0.7869, "step": 6103 }, { "epoch": 0.9965307538467818, "grad_norm": 1.6790324449539185, "learning_rate": 1.9869120666582153e-05, "loss": 0.6917, "step": 6104 }, { "epoch": 0.9966940124892861, "grad_norm": 1.5569666624069214, "learning_rate": 1.9869069488770853e-05, "loss": 0.7547, "step": 6105 }, { "epoch": 0.9968572711317906, "grad_norm": 1.550554871559143, "learning_rate": 1.98690183010214e-05, "loss": 0.6964, "step": 6106 }, { "epoch": 0.9970205297742949, "grad_norm": 1.5028797388076782, "learning_rate": 1.986896710333385e-05, "loss": 0.7396, "step": 6107 }, { "epoch": 0.9971837884167993, "grad_norm": 1.733398675918579, "learning_rate": 1.986891589570825e-05, "loss": 0.7703, "step": 6108 }, { "epoch": 0.9973470470593037, "grad_norm": 2.112023115158081, "learning_rate": 1.9868864678144658e-05, "loss": 0.7964, "step": 6109 }, { "epoch": 0.9975103057018081, "grad_norm": 1.8998324871063232, "learning_rate": 1.9868813450643118e-05, "loss": 0.7696, "step": 6110 }, { "epoch": 0.9976735643443124, "grad_norm": 2.2745769023895264, "learning_rate": 1.986876221320369e-05, "loss": 0.9555, "step": 6111 }, { "epoch": 0.9978368229868169, "grad_norm": 1.8462913036346436, "learning_rate": 1.986871096582642e-05, "loss": 0.7811, "step": 6112 }, { "epoch": 0.9980000816293213, "grad_norm": 1.734758734703064, "learning_rate": 1.9868659708511357e-05, "loss": 0.6561, "step": 6113 }, { "epoch": 0.9981633402718256, "grad_norm": 1.6623098850250244, "learning_rate": 1.9868608441258558e-05, "loss": 0.6602, "step": 6114 }, { "epoch": 0.9983265989143301, "grad_norm": 1.757601261138916, "learning_rate": 1.9868557164068073e-05, "loss": 0.7259, "step": 6115 }, { "epoch": 0.9984898575568344, "grad_norm": 2.138526439666748, "learning_rate": 1.9868505876939954e-05, "loss": 0.8225, "step": 6116 }, { "epoch": 0.9986531161993388, "grad_norm": 1.7662920951843262, "learning_rate": 1.9868454579874255e-05, "loss": 0.723, "step": 6117 }, { "epoch": 0.9988163748418432, "grad_norm": 1.8942501544952393, "learning_rate": 1.9868403272871023e-05, "loss": 0.771, "step": 6118 }, { "epoch": 0.9989796334843476, "grad_norm": 2.097524642944336, "learning_rate": 1.986835195593031e-05, "loss": 0.8086, "step": 6119 }, { "epoch": 0.9991428921268519, "grad_norm": 1.8089945316314697, "learning_rate": 1.986830062905217e-05, "loss": 0.7581, "step": 6120 }, { "epoch": 0.9993061507693564, "grad_norm": 1.7585166692733765, "learning_rate": 1.986824929223665e-05, "loss": 0.8141, "step": 6121 }, { "epoch": 0.9994694094118607, "grad_norm": 1.9036017656326294, "learning_rate": 1.9868197945483813e-05, "loss": 0.7553, "step": 6122 }, { "epoch": 0.9996326680543651, "grad_norm": 1.3933123350143433, "learning_rate": 1.98681465887937e-05, "loss": 0.5856, "step": 6123 }, { "epoch": 0.9997959266968696, "grad_norm": 1.8385380506515503, "learning_rate": 1.9868095222166364e-05, "loss": 0.6761, "step": 6124 }, { "epoch": 0.9999591853393739, "grad_norm": 1.7274433374404907, "learning_rate": 1.9868043845601863e-05, "loss": 0.7228, "step": 6125 }, { "epoch": 1.0, "grad_norm": 4.170963287353516, "learning_rate": 1.986799245910024e-05, "loss": 0.9424, "step": 6126 }, { "epoch": 1.0001632586425044, "grad_norm": 1.607887864112854, "learning_rate": 1.9867941062661555e-05, "loss": 0.5744, "step": 6127 }, { "epoch": 1.0003265172850089, "grad_norm": 1.720959186553955, "learning_rate": 1.9867889656285854e-05, "loss": 0.7406, "step": 6128 }, { "epoch": 1.000489775927513, "grad_norm": 1.9235025644302368, "learning_rate": 1.986783823997319e-05, "loss": 0.8196, "step": 6129 }, { "epoch": 1.0006530345700175, "grad_norm": 2.0748870372772217, "learning_rate": 1.9867786813723615e-05, "loss": 0.7751, "step": 6130 }, { "epoch": 1.000816293212522, "grad_norm": 1.7143235206604004, "learning_rate": 1.9867735377537186e-05, "loss": 0.7054, "step": 6131 }, { "epoch": 1.0009795518550264, "grad_norm": 1.737565040588379, "learning_rate": 1.9867683931413942e-05, "loss": 0.6615, "step": 6132 }, { "epoch": 1.0011428104975306, "grad_norm": 1.840692162513733, "learning_rate": 1.986763247535395e-05, "loss": 0.6369, "step": 6133 }, { "epoch": 1.001306069140035, "grad_norm": 1.8484382629394531, "learning_rate": 1.986758100935725e-05, "loss": 0.8318, "step": 6134 }, { "epoch": 1.0014693277825395, "grad_norm": 1.841214895248413, "learning_rate": 1.9867529533423903e-05, "loss": 0.7698, "step": 6135 }, { "epoch": 1.001632586425044, "grad_norm": 2.356635808944702, "learning_rate": 1.9867478047553957e-05, "loss": 0.9288, "step": 6136 }, { "epoch": 1.0017958450675484, "grad_norm": 1.6894954442977905, "learning_rate": 1.9867426551747457e-05, "loss": 0.7143, "step": 6137 }, { "epoch": 1.0019591037100526, "grad_norm": 1.8318668603897095, "learning_rate": 1.9867375046004467e-05, "loss": 0.7396, "step": 6138 }, { "epoch": 1.002122362352557, "grad_norm": 1.9993305206298828, "learning_rate": 1.986732353032503e-05, "loss": 0.753, "step": 6139 }, { "epoch": 1.0022856209950615, "grad_norm": 1.6902981996536255, "learning_rate": 1.98672720047092e-05, "loss": 0.5345, "step": 6140 }, { "epoch": 1.002448879637566, "grad_norm": 1.8155654668807983, "learning_rate": 1.9867220469157035e-05, "loss": 0.7035, "step": 6141 }, { "epoch": 1.0026121382800701, "grad_norm": 1.6589241027832031, "learning_rate": 1.9867168923668573e-05, "loss": 0.6555, "step": 6142 }, { "epoch": 1.0027753969225746, "grad_norm": 2.3357906341552734, "learning_rate": 1.986711736824388e-05, "loss": 0.8881, "step": 6143 }, { "epoch": 1.002938655565079, "grad_norm": 1.7395215034484863, "learning_rate": 1.9867065802883004e-05, "loss": 0.7562, "step": 6144 }, { "epoch": 1.0031019142075834, "grad_norm": 1.889523983001709, "learning_rate": 1.9867014227585992e-05, "loss": 0.7889, "step": 6145 }, { "epoch": 1.0032651728500876, "grad_norm": 1.6654964685440063, "learning_rate": 1.98669626423529e-05, "loss": 0.5987, "step": 6146 }, { "epoch": 1.003428431492592, "grad_norm": 1.7797136306762695, "learning_rate": 1.9866911047183782e-05, "loss": 0.669, "step": 6147 }, { "epoch": 1.0035916901350965, "grad_norm": 2.866408586502075, "learning_rate": 1.986685944207868e-05, "loss": 0.7728, "step": 6148 }, { "epoch": 1.003754948777601, "grad_norm": 1.8780059814453125, "learning_rate": 1.986680782703766e-05, "loss": 0.6784, "step": 6149 }, { "epoch": 1.0039182074201054, "grad_norm": 1.9424911737442017, "learning_rate": 1.9866756202060764e-05, "loss": 0.7224, "step": 6150 }, { "epoch": 1.0040814660626096, "grad_norm": 1.5416868925094604, "learning_rate": 1.986670456714805e-05, "loss": 0.7022, "step": 6151 }, { "epoch": 1.004244724705114, "grad_norm": 1.6087270975112915, "learning_rate": 1.9866652922299563e-05, "loss": 0.682, "step": 6152 }, { "epoch": 1.0044079833476185, "grad_norm": 2.1810262203216553, "learning_rate": 1.9866601267515363e-05, "loss": 0.8818, "step": 6153 }, { "epoch": 1.004571241990123, "grad_norm": 2.1084063053131104, "learning_rate": 1.9866549602795494e-05, "loss": 0.8209, "step": 6154 }, { "epoch": 1.0047345006326271, "grad_norm": 1.9017915725708008, "learning_rate": 1.9866497928140017e-05, "loss": 0.8047, "step": 6155 }, { "epoch": 1.0048977592751316, "grad_norm": 1.7743273973464966, "learning_rate": 1.986644624354898e-05, "loss": 0.6562, "step": 6156 }, { "epoch": 1.005061017917636, "grad_norm": 1.8690946102142334, "learning_rate": 1.986639454902243e-05, "loss": 0.7204, "step": 6157 }, { "epoch": 1.0052242765601405, "grad_norm": 1.727832555770874, "learning_rate": 1.9866342844560422e-05, "loss": 0.6859, "step": 6158 }, { "epoch": 1.005387535202645, "grad_norm": 1.7663624286651611, "learning_rate": 1.9866291130163013e-05, "loss": 0.7951, "step": 6159 }, { "epoch": 1.0055507938451491, "grad_norm": 1.5686070919036865, "learning_rate": 1.986623940583025e-05, "loss": 0.6854, "step": 6160 }, { "epoch": 1.0057140524876536, "grad_norm": 1.9059823751449585, "learning_rate": 1.9866187671562185e-05, "loss": 0.7349, "step": 6161 }, { "epoch": 1.005877311130158, "grad_norm": 1.5141847133636475, "learning_rate": 1.9866135927358872e-05, "loss": 0.5795, "step": 6162 }, { "epoch": 1.0060405697726624, "grad_norm": 1.505925178527832, "learning_rate": 1.9866084173220364e-05, "loss": 0.5691, "step": 6163 }, { "epoch": 1.0062038284151666, "grad_norm": 1.7032802104949951, "learning_rate": 1.9866032409146716e-05, "loss": 0.7281, "step": 6164 }, { "epoch": 1.006367087057671, "grad_norm": 1.741918683052063, "learning_rate": 1.986598063513797e-05, "loss": 0.7371, "step": 6165 }, { "epoch": 1.0065303457001755, "grad_norm": 1.6459757089614868, "learning_rate": 1.986592885119419e-05, "loss": 0.6124, "step": 6166 }, { "epoch": 1.00669360434268, "grad_norm": 1.9404174089431763, "learning_rate": 1.9865877057315416e-05, "loss": 0.7492, "step": 6167 }, { "epoch": 1.0068568629851842, "grad_norm": 1.5976184606552124, "learning_rate": 1.9865825253501708e-05, "loss": 0.5577, "step": 6168 }, { "epoch": 1.0070201216276886, "grad_norm": 1.6678483486175537, "learning_rate": 1.9865773439753118e-05, "loss": 0.6452, "step": 6169 }, { "epoch": 1.007183380270193, "grad_norm": 1.5222314596176147, "learning_rate": 1.9865721616069695e-05, "loss": 0.6555, "step": 6170 }, { "epoch": 1.0073466389126975, "grad_norm": 1.7038832902908325, "learning_rate": 1.9865669782451493e-05, "loss": 0.798, "step": 6171 }, { "epoch": 1.007509897555202, "grad_norm": 1.8904991149902344, "learning_rate": 1.9865617938898568e-05, "loss": 0.8571, "step": 6172 }, { "epoch": 1.0076731561977061, "grad_norm": 1.5897445678710938, "learning_rate": 1.9865566085410966e-05, "loss": 0.6717, "step": 6173 }, { "epoch": 1.0078364148402106, "grad_norm": 1.865080714225769, "learning_rate": 1.986551422198874e-05, "loss": 0.803, "step": 6174 }, { "epoch": 1.007999673482715, "grad_norm": 1.6964479684829712, "learning_rate": 1.9865462348631945e-05, "loss": 0.6113, "step": 6175 }, { "epoch": 1.0081629321252195, "grad_norm": 1.9108397960662842, "learning_rate": 1.9865410465340635e-05, "loss": 0.7235, "step": 6176 }, { "epoch": 1.0083261907677237, "grad_norm": 2.0547409057617188, "learning_rate": 1.9865358572114855e-05, "loss": 0.779, "step": 6177 }, { "epoch": 1.0084894494102281, "grad_norm": 1.8377087116241455, "learning_rate": 1.9865306668954662e-05, "loss": 0.678, "step": 6178 }, { "epoch": 1.0086527080527325, "grad_norm": 1.8337879180908203, "learning_rate": 1.9865254755860105e-05, "loss": 0.6748, "step": 6179 }, { "epoch": 1.008815966695237, "grad_norm": 1.7516900300979614, "learning_rate": 1.9865202832831243e-05, "loss": 0.7469, "step": 6180 }, { "epoch": 1.0089792253377414, "grad_norm": 2.2443320751190186, "learning_rate": 1.9865150899868126e-05, "loss": 0.6674, "step": 6181 }, { "epoch": 1.0091424839802456, "grad_norm": 1.7198677062988281, "learning_rate": 1.9865098956970802e-05, "loss": 0.607, "step": 6182 }, { "epoch": 1.00930574262275, "grad_norm": 1.375084400177002, "learning_rate": 1.9865047004139327e-05, "loss": 0.4885, "step": 6183 }, { "epoch": 1.0094690012652545, "grad_norm": 1.7029366493225098, "learning_rate": 1.9864995041373755e-05, "loss": 0.6846, "step": 6184 }, { "epoch": 1.009632259907759, "grad_norm": 1.5492157936096191, "learning_rate": 1.986494306867413e-05, "loss": 0.5639, "step": 6185 }, { "epoch": 1.0097955185502632, "grad_norm": 1.7408475875854492, "learning_rate": 1.9864891086040515e-05, "loss": 0.9219, "step": 6186 }, { "epoch": 1.0099587771927676, "grad_norm": 1.8497453927993774, "learning_rate": 1.9864839093472952e-05, "loss": 0.6364, "step": 6187 }, { "epoch": 1.010122035835272, "grad_norm": 1.5509625673294067, "learning_rate": 1.9864787090971502e-05, "loss": 0.6101, "step": 6188 }, { "epoch": 1.0102852944777765, "grad_norm": 1.7902127504348755, "learning_rate": 1.9864735078536213e-05, "loss": 0.6635, "step": 6189 }, { "epoch": 1.0104485531202807, "grad_norm": 2.0763742923736572, "learning_rate": 1.9864683056167137e-05, "loss": 0.767, "step": 6190 }, { "epoch": 1.0106118117627851, "grad_norm": 1.8266562223434448, "learning_rate": 1.986463102386433e-05, "loss": 0.6773, "step": 6191 }, { "epoch": 1.0107750704052896, "grad_norm": 2.068077325820923, "learning_rate": 1.9864578981627844e-05, "loss": 0.834, "step": 6192 }, { "epoch": 1.010938329047794, "grad_norm": 1.7848087549209595, "learning_rate": 1.9864526929457727e-05, "loss": 0.7425, "step": 6193 }, { "epoch": 1.0111015876902985, "grad_norm": 1.8313685655593872, "learning_rate": 1.9864474867354035e-05, "loss": 0.6767, "step": 6194 }, { "epoch": 1.0112648463328027, "grad_norm": 1.8804821968078613, "learning_rate": 1.9864422795316818e-05, "loss": 0.6778, "step": 6195 }, { "epoch": 1.011428104975307, "grad_norm": 1.555083155632019, "learning_rate": 1.986437071334613e-05, "loss": 0.615, "step": 6196 }, { "epoch": 1.0115913636178115, "grad_norm": 1.920152187347412, "learning_rate": 1.9864318621442024e-05, "loss": 0.8044, "step": 6197 }, { "epoch": 1.011754622260316, "grad_norm": 1.6950316429138184, "learning_rate": 1.9864266519604554e-05, "loss": 0.6972, "step": 6198 }, { "epoch": 1.0119178809028202, "grad_norm": 2.0170254707336426, "learning_rate": 1.9864214407833767e-05, "loss": 0.8063, "step": 6199 }, { "epoch": 1.0120811395453246, "grad_norm": 1.9544119834899902, "learning_rate": 1.986416228612972e-05, "loss": 0.8022, "step": 6200 }, { "epoch": 1.012244398187829, "grad_norm": 1.925719141960144, "learning_rate": 1.9864110154492463e-05, "loss": 0.7458, "step": 6201 }, { "epoch": 1.0124076568303335, "grad_norm": 2.0820651054382324, "learning_rate": 1.986405801292205e-05, "loss": 0.8773, "step": 6202 }, { "epoch": 1.012570915472838, "grad_norm": 1.848922848701477, "learning_rate": 1.9864005861418537e-05, "loss": 0.6056, "step": 6203 }, { "epoch": 1.0127341741153422, "grad_norm": 1.9207857847213745, "learning_rate": 1.9863953699981966e-05, "loss": 0.7101, "step": 6204 }, { "epoch": 1.0128974327578466, "grad_norm": 1.673141360282898, "learning_rate": 1.9863901528612402e-05, "loss": 0.6234, "step": 6205 }, { "epoch": 1.013060691400351, "grad_norm": 1.8589849472045898, "learning_rate": 1.986384934730989e-05, "loss": 0.7866, "step": 6206 }, { "epoch": 1.0132239500428555, "grad_norm": 2.122983932495117, "learning_rate": 1.9863797156074484e-05, "loss": 0.7326, "step": 6207 }, { "epoch": 1.0133872086853597, "grad_norm": 1.9577350616455078, "learning_rate": 1.9863744954906233e-05, "loss": 0.7529, "step": 6208 }, { "epoch": 1.0135504673278641, "grad_norm": 1.7594752311706543, "learning_rate": 1.98636927438052e-05, "loss": 0.7112, "step": 6209 }, { "epoch": 1.0137137259703686, "grad_norm": 1.7207964658737183, "learning_rate": 1.986364052277143e-05, "loss": 0.6229, "step": 6210 }, { "epoch": 1.013876984612873, "grad_norm": 1.7278751134872437, "learning_rate": 1.9863588291804973e-05, "loss": 0.838, "step": 6211 }, { "epoch": 1.0140402432553772, "grad_norm": 1.7588289976119995, "learning_rate": 1.9863536050905886e-05, "loss": 0.6742, "step": 6212 }, { "epoch": 1.0142035018978817, "grad_norm": 1.84909188747406, "learning_rate": 1.986348380007422e-05, "loss": 0.6163, "step": 6213 }, { "epoch": 1.014366760540386, "grad_norm": 1.7126491069793701, "learning_rate": 1.9863431539310033e-05, "loss": 0.6796, "step": 6214 }, { "epoch": 1.0145300191828905, "grad_norm": 1.6727677583694458, "learning_rate": 1.986337926861337e-05, "loss": 0.6452, "step": 6215 }, { "epoch": 1.014693277825395, "grad_norm": 1.600612998008728, "learning_rate": 1.9863326987984286e-05, "loss": 0.667, "step": 6216 }, { "epoch": 1.0148565364678992, "grad_norm": 1.7464735507965088, "learning_rate": 1.9863274697422835e-05, "loss": 0.6719, "step": 6217 }, { "epoch": 1.0150197951104036, "grad_norm": 1.9114577770233154, "learning_rate": 1.9863222396929068e-05, "loss": 0.7442, "step": 6218 }, { "epoch": 1.015183053752908, "grad_norm": 1.8738914728164673, "learning_rate": 1.9863170086503044e-05, "loss": 0.6257, "step": 6219 }, { "epoch": 1.0153463123954125, "grad_norm": 1.7792226076126099, "learning_rate": 1.9863117766144807e-05, "loss": 0.7197, "step": 6220 }, { "epoch": 1.0155095710379167, "grad_norm": 1.628347396850586, "learning_rate": 1.986306543585441e-05, "loss": 0.6001, "step": 6221 }, { "epoch": 1.0156728296804212, "grad_norm": 2.1227853298187256, "learning_rate": 1.9863013095631912e-05, "loss": 0.7956, "step": 6222 }, { "epoch": 1.0158360883229256, "grad_norm": 1.9262471199035645, "learning_rate": 1.986296074547736e-05, "loss": 0.8666, "step": 6223 }, { "epoch": 1.01599934696543, "grad_norm": 1.8963056802749634, "learning_rate": 1.986290838539081e-05, "loss": 0.7368, "step": 6224 }, { "epoch": 1.0161626056079345, "grad_norm": 1.8621140718460083, "learning_rate": 1.9862856015372315e-05, "loss": 0.827, "step": 6225 }, { "epoch": 1.0163258642504387, "grad_norm": 1.6787586212158203, "learning_rate": 1.986280363542193e-05, "loss": 0.6474, "step": 6226 }, { "epoch": 1.0164891228929431, "grad_norm": 1.8509377241134644, "learning_rate": 1.9862751245539698e-05, "loss": 0.6509, "step": 6227 }, { "epoch": 1.0166523815354476, "grad_norm": 1.5820976495742798, "learning_rate": 1.9862698845725677e-05, "loss": 0.5479, "step": 6228 }, { "epoch": 1.016815640177952, "grad_norm": 1.8056031465530396, "learning_rate": 1.9862646435979928e-05, "loss": 0.6627, "step": 6229 }, { "epoch": 1.0169788988204562, "grad_norm": 2.0000226497650146, "learning_rate": 1.9862594016302493e-05, "loss": 0.7842, "step": 6230 }, { "epoch": 1.0171421574629607, "grad_norm": 2.152437686920166, "learning_rate": 1.9862541586693428e-05, "loss": 0.7431, "step": 6231 }, { "epoch": 1.017305416105465, "grad_norm": 1.9848170280456543, "learning_rate": 1.9862489147152786e-05, "loss": 0.7583, "step": 6232 }, { "epoch": 1.0174686747479695, "grad_norm": 1.7627274990081787, "learning_rate": 1.986243669768062e-05, "loss": 0.7277, "step": 6233 }, { "epoch": 1.0176319333904738, "grad_norm": 1.600558876991272, "learning_rate": 1.9862384238276986e-05, "loss": 0.6123, "step": 6234 }, { "epoch": 1.0177951920329782, "grad_norm": 2.0893051624298096, "learning_rate": 1.986233176894193e-05, "loss": 0.8007, "step": 6235 }, { "epoch": 1.0179584506754826, "grad_norm": 1.8009285926818848, "learning_rate": 1.986227928967551e-05, "loss": 0.6951, "step": 6236 }, { "epoch": 1.018121709317987, "grad_norm": 2.0503101348876953, "learning_rate": 1.986222680047778e-05, "loss": 0.7789, "step": 6237 }, { "epoch": 1.0182849679604915, "grad_norm": 1.8305678367614746, "learning_rate": 1.9862174301348783e-05, "loss": 0.723, "step": 6238 }, { "epoch": 1.0184482266029957, "grad_norm": 1.906397819519043, "learning_rate": 1.9862121792288586e-05, "loss": 0.7958, "step": 6239 }, { "epoch": 1.0186114852455002, "grad_norm": 2.1317081451416016, "learning_rate": 1.9862069273297233e-05, "loss": 0.6731, "step": 6240 }, { "epoch": 1.0187747438880046, "grad_norm": 2.286470413208008, "learning_rate": 1.9862016744374778e-05, "loss": 0.7268, "step": 6241 }, { "epoch": 1.018938002530509, "grad_norm": 1.6786940097808838, "learning_rate": 1.9861964205521274e-05, "loss": 0.635, "step": 6242 }, { "epoch": 1.0191012611730133, "grad_norm": 1.821219563484192, "learning_rate": 1.9861911656736778e-05, "loss": 0.7595, "step": 6243 }, { "epoch": 1.0192645198155177, "grad_norm": 1.8271708488464355, "learning_rate": 1.9861859098021338e-05, "loss": 0.6791, "step": 6244 }, { "epoch": 1.0194277784580221, "grad_norm": 1.7191015481948853, "learning_rate": 1.9861806529375006e-05, "loss": 0.647, "step": 6245 }, { "epoch": 1.0195910371005266, "grad_norm": 1.7316787242889404, "learning_rate": 1.986175395079784e-05, "loss": 0.702, "step": 6246 }, { "epoch": 1.019754295743031, "grad_norm": 1.6700059175491333, "learning_rate": 1.9861701362289892e-05, "loss": 0.5623, "step": 6247 }, { "epoch": 1.0199175543855352, "grad_norm": 1.7218483686447144, "learning_rate": 1.986164876385121e-05, "loss": 0.6324, "step": 6248 }, { "epoch": 1.0200808130280397, "grad_norm": 1.9733182191848755, "learning_rate": 1.986159615548185e-05, "loss": 0.8765, "step": 6249 }, { "epoch": 1.020244071670544, "grad_norm": 1.9796265363693237, "learning_rate": 1.986154353718187e-05, "loss": 0.7411, "step": 6250 }, { "epoch": 1.0204073303130485, "grad_norm": 1.5361871719360352, "learning_rate": 1.9861490908951312e-05, "loss": 0.6553, "step": 6251 }, { "epoch": 1.0205705889555527, "grad_norm": 2.1760194301605225, "learning_rate": 1.986143827079024e-05, "loss": 0.8365, "step": 6252 }, { "epoch": 1.0207338475980572, "grad_norm": 1.891868233680725, "learning_rate": 1.98613856226987e-05, "loss": 0.7455, "step": 6253 }, { "epoch": 1.0208971062405616, "grad_norm": 1.712141513824463, "learning_rate": 1.9861332964676747e-05, "loss": 0.6784, "step": 6254 }, { "epoch": 1.021060364883066, "grad_norm": 2.3060672283172607, "learning_rate": 1.9861280296724438e-05, "loss": 0.6736, "step": 6255 }, { "epoch": 1.0212236235255703, "grad_norm": 1.726008415222168, "learning_rate": 1.9861227618841817e-05, "loss": 0.6931, "step": 6256 }, { "epoch": 1.0213868821680747, "grad_norm": 1.6330381631851196, "learning_rate": 1.9861174931028943e-05, "loss": 0.5625, "step": 6257 }, { "epoch": 1.0215501408105792, "grad_norm": 2.276740074157715, "learning_rate": 1.9861122233285873e-05, "loss": 0.816, "step": 6258 }, { "epoch": 1.0217133994530836, "grad_norm": 2.1491215229034424, "learning_rate": 1.9861069525612652e-05, "loss": 0.8122, "step": 6259 }, { "epoch": 1.021876658095588, "grad_norm": 1.961983561515808, "learning_rate": 1.9861016808009335e-05, "loss": 0.711, "step": 6260 }, { "epoch": 1.0220399167380922, "grad_norm": 1.800482988357544, "learning_rate": 1.986096408047598e-05, "loss": 0.6433, "step": 6261 }, { "epoch": 1.0222031753805967, "grad_norm": 1.8586786985397339, "learning_rate": 1.9860911343012638e-05, "loss": 0.6463, "step": 6262 }, { "epoch": 1.0223664340231011, "grad_norm": 1.9219046831130981, "learning_rate": 1.986085859561936e-05, "loss": 0.5772, "step": 6263 }, { "epoch": 1.0225296926656056, "grad_norm": 2.07528018951416, "learning_rate": 1.9860805838296197e-05, "loss": 0.7872, "step": 6264 }, { "epoch": 1.0226929513081098, "grad_norm": 2.04831862449646, "learning_rate": 1.9860753071043207e-05, "loss": 0.6821, "step": 6265 }, { "epoch": 1.0228562099506142, "grad_norm": 2.2361581325531006, "learning_rate": 1.9860700293860444e-05, "loss": 0.692, "step": 6266 }, { "epoch": 1.0230194685931187, "grad_norm": 2.163529396057129, "learning_rate": 1.9860647506747953e-05, "loss": 0.7447, "step": 6267 }, { "epoch": 1.023182727235623, "grad_norm": 2.1037120819091797, "learning_rate": 1.9860594709705797e-05, "loss": 0.9543, "step": 6268 }, { "epoch": 1.0233459858781275, "grad_norm": 1.9110475778579712, "learning_rate": 1.9860541902734023e-05, "loss": 0.7081, "step": 6269 }, { "epoch": 1.0235092445206317, "grad_norm": 1.7073440551757812, "learning_rate": 1.9860489085832685e-05, "loss": 0.6178, "step": 6270 }, { "epoch": 1.0236725031631362, "grad_norm": 1.8464313745498657, "learning_rate": 1.9860436259001837e-05, "loss": 0.7466, "step": 6271 }, { "epoch": 1.0238357618056406, "grad_norm": 1.9696903228759766, "learning_rate": 1.9860383422241534e-05, "loss": 0.7249, "step": 6272 }, { "epoch": 1.023999020448145, "grad_norm": 2.1451330184936523, "learning_rate": 1.9860330575551826e-05, "loss": 0.7367, "step": 6273 }, { "epoch": 1.0241622790906493, "grad_norm": 1.6812928915023804, "learning_rate": 1.986027771893277e-05, "loss": 0.6759, "step": 6274 }, { "epoch": 1.0243255377331537, "grad_norm": 1.6618458032608032, "learning_rate": 1.9860224852384416e-05, "loss": 0.7328, "step": 6275 }, { "epoch": 1.0244887963756582, "grad_norm": 2.396059513092041, "learning_rate": 1.9860171975906815e-05, "loss": 0.847, "step": 6276 }, { "epoch": 1.0246520550181626, "grad_norm": 1.9034167528152466, "learning_rate": 1.986011908950003e-05, "loss": 0.6862, "step": 6277 }, { "epoch": 1.0248153136606668, "grad_norm": 1.7377004623413086, "learning_rate": 1.9860066193164102e-05, "loss": 0.7185, "step": 6278 }, { "epoch": 1.0249785723031712, "grad_norm": 2.1314213275909424, "learning_rate": 1.986001328689909e-05, "loss": 0.8233, "step": 6279 }, { "epoch": 1.0251418309456757, "grad_norm": 1.7401275634765625, "learning_rate": 1.985996037070505e-05, "loss": 0.7173, "step": 6280 }, { "epoch": 1.0253050895881801, "grad_norm": 1.8057104349136353, "learning_rate": 1.9859907444582032e-05, "loss": 0.6848, "step": 6281 }, { "epoch": 1.0254683482306846, "grad_norm": 1.7019447088241577, "learning_rate": 1.985985450853009e-05, "loss": 0.716, "step": 6282 }, { "epoch": 1.0256316068731888, "grad_norm": 1.6462762355804443, "learning_rate": 1.9859801562549277e-05, "loss": 0.6676, "step": 6283 }, { "epoch": 1.0257948655156932, "grad_norm": 1.7857152223587036, "learning_rate": 1.9859748606639644e-05, "loss": 0.6541, "step": 6284 }, { "epoch": 1.0259581241581976, "grad_norm": 1.4962654113769531, "learning_rate": 1.985969564080125e-05, "loss": 0.6499, "step": 6285 }, { "epoch": 1.026121382800702, "grad_norm": 1.9302033185958862, "learning_rate": 1.9859642665034146e-05, "loss": 0.7586, "step": 6286 }, { "epoch": 1.0262846414432063, "grad_norm": 1.5182029008865356, "learning_rate": 1.985958967933838e-05, "loss": 0.7166, "step": 6287 }, { "epoch": 1.0264479000857107, "grad_norm": 1.6593518257141113, "learning_rate": 1.9859536683714014e-05, "loss": 0.7362, "step": 6288 }, { "epoch": 1.0266111587282152, "grad_norm": 1.9190257787704468, "learning_rate": 1.9859483678161092e-05, "loss": 0.7138, "step": 6289 }, { "epoch": 1.0267744173707196, "grad_norm": 1.7671862840652466, "learning_rate": 1.9859430662679676e-05, "loss": 0.6131, "step": 6290 }, { "epoch": 1.026937676013224, "grad_norm": 1.673975944519043, "learning_rate": 1.9859377637269817e-05, "loss": 0.5403, "step": 6291 }, { "epoch": 1.0271009346557283, "grad_norm": 1.8193539381027222, "learning_rate": 1.9859324601931567e-05, "loss": 0.7647, "step": 6292 }, { "epoch": 1.0272641932982327, "grad_norm": 1.7898828983306885, "learning_rate": 1.985927155666498e-05, "loss": 0.7991, "step": 6293 }, { "epoch": 1.0274274519407371, "grad_norm": 1.6811909675598145, "learning_rate": 1.9859218501470105e-05, "loss": 0.58, "step": 6294 }, { "epoch": 1.0275907105832416, "grad_norm": 1.752687931060791, "learning_rate": 1.9859165436347006e-05, "loss": 0.7363, "step": 6295 }, { "epoch": 1.0277539692257458, "grad_norm": 1.6997674703598022, "learning_rate": 1.9859112361295724e-05, "loss": 0.6956, "step": 6296 }, { "epoch": 1.0279172278682502, "grad_norm": 1.5739259719848633, "learning_rate": 1.9859059276316322e-05, "loss": 0.6887, "step": 6297 }, { "epoch": 1.0280804865107547, "grad_norm": 1.8841649293899536, "learning_rate": 1.9859006181408847e-05, "loss": 0.7953, "step": 6298 }, { "epoch": 1.0282437451532591, "grad_norm": 1.8035109043121338, "learning_rate": 1.985895307657336e-05, "loss": 0.7746, "step": 6299 }, { "epoch": 1.0284070037957633, "grad_norm": 1.9514487981796265, "learning_rate": 1.9858899961809904e-05, "loss": 0.7103, "step": 6300 }, { "epoch": 1.0285702624382678, "grad_norm": 1.7052786350250244, "learning_rate": 1.9858846837118545e-05, "loss": 0.7442, "step": 6301 }, { "epoch": 1.0287335210807722, "grad_norm": 1.7776670455932617, "learning_rate": 1.9858793702499322e-05, "loss": 0.6829, "step": 6302 }, { "epoch": 1.0288967797232766, "grad_norm": 1.647672414779663, "learning_rate": 1.9858740557952304e-05, "loss": 0.6907, "step": 6303 }, { "epoch": 1.029060038365781, "grad_norm": 1.936865210533142, "learning_rate": 1.9858687403477535e-05, "loss": 0.8271, "step": 6304 }, { "epoch": 1.0292232970082853, "grad_norm": 1.9570997953414917, "learning_rate": 1.9858634239075066e-05, "loss": 0.7679, "step": 6305 }, { "epoch": 1.0293865556507897, "grad_norm": 1.9535030126571655, "learning_rate": 1.985858106474496e-05, "loss": 0.7658, "step": 6306 }, { "epoch": 1.0295498142932942, "grad_norm": 1.6436525583267212, "learning_rate": 1.9858527880487263e-05, "loss": 0.6313, "step": 6307 }, { "epoch": 1.0297130729357986, "grad_norm": 1.9210878610610962, "learning_rate": 1.985847468630203e-05, "loss": 0.6596, "step": 6308 }, { "epoch": 1.0298763315783028, "grad_norm": 1.8534647226333618, "learning_rate": 1.9858421482189318e-05, "loss": 0.7182, "step": 6309 }, { "epoch": 1.0300395902208073, "grad_norm": 2.131354570388794, "learning_rate": 1.985836826814918e-05, "loss": 0.8707, "step": 6310 }, { "epoch": 1.0302028488633117, "grad_norm": 1.7080912590026855, "learning_rate": 1.9858315044181666e-05, "loss": 0.5776, "step": 6311 }, { "epoch": 1.0303661075058161, "grad_norm": 1.8762413263320923, "learning_rate": 1.9858261810286828e-05, "loss": 0.6788, "step": 6312 }, { "epoch": 1.0305293661483206, "grad_norm": 2.174556255340576, "learning_rate": 1.9858208566464726e-05, "loss": 0.8724, "step": 6313 }, { "epoch": 1.0306926247908248, "grad_norm": 2.1115877628326416, "learning_rate": 1.985815531271541e-05, "loss": 0.6962, "step": 6314 }, { "epoch": 1.0308558834333292, "grad_norm": 1.936353325843811, "learning_rate": 1.9858102049038933e-05, "loss": 0.7639, "step": 6315 }, { "epoch": 1.0310191420758337, "grad_norm": 2.2395389080047607, "learning_rate": 1.9858048775435353e-05, "loss": 0.6992, "step": 6316 }, { "epoch": 1.031182400718338, "grad_norm": 2.0371451377868652, "learning_rate": 1.985799549190472e-05, "loss": 0.6941, "step": 6317 }, { "epoch": 1.0313456593608423, "grad_norm": 2.105923652648926, "learning_rate": 1.9857942198447084e-05, "loss": 0.8578, "step": 6318 }, { "epoch": 1.0315089180033468, "grad_norm": 1.6982399225234985, "learning_rate": 1.9857888895062506e-05, "loss": 0.5982, "step": 6319 }, { "epoch": 1.0316721766458512, "grad_norm": 1.8045004606246948, "learning_rate": 1.9857835581751038e-05, "loss": 0.6575, "step": 6320 }, { "epoch": 1.0318354352883556, "grad_norm": 1.7899636030197144, "learning_rate": 1.9857782258512726e-05, "loss": 0.5769, "step": 6321 }, { "epoch": 1.03199869393086, "grad_norm": 2.0876801013946533, "learning_rate": 1.9857728925347636e-05, "loss": 0.7906, "step": 6322 }, { "epoch": 1.0321619525733643, "grad_norm": 1.9150800704956055, "learning_rate": 1.9857675582255814e-05, "loss": 0.7853, "step": 6323 }, { "epoch": 1.0323252112158687, "grad_norm": 1.9456051588058472, "learning_rate": 1.9857622229237315e-05, "loss": 0.8225, "step": 6324 }, { "epoch": 1.0324884698583732, "grad_norm": 1.776223063468933, "learning_rate": 1.9857568866292193e-05, "loss": 0.7346, "step": 6325 }, { "epoch": 1.0326517285008776, "grad_norm": 1.9625061750411987, "learning_rate": 1.9857515493420502e-05, "loss": 0.7034, "step": 6326 }, { "epoch": 1.0328149871433818, "grad_norm": 1.841731071472168, "learning_rate": 1.9857462110622293e-05, "loss": 0.625, "step": 6327 }, { "epoch": 1.0329782457858863, "grad_norm": 1.9265470504760742, "learning_rate": 1.9857408717897627e-05, "loss": 0.7014, "step": 6328 }, { "epoch": 1.0331415044283907, "grad_norm": 1.795730471611023, "learning_rate": 1.985735531524655e-05, "loss": 0.6803, "step": 6329 }, { "epoch": 1.0333047630708951, "grad_norm": 1.9166145324707031, "learning_rate": 1.985730190266912e-05, "loss": 0.653, "step": 6330 }, { "epoch": 1.0334680217133994, "grad_norm": 1.8467719554901123, "learning_rate": 1.9857248480165388e-05, "loss": 0.6842, "step": 6331 }, { "epoch": 1.0336312803559038, "grad_norm": 2.0247559547424316, "learning_rate": 1.9857195047735412e-05, "loss": 0.6433, "step": 6332 }, { "epoch": 1.0337945389984082, "grad_norm": 2.146075487136841, "learning_rate": 1.985714160537924e-05, "loss": 0.7956, "step": 6333 }, { "epoch": 1.0339577976409127, "grad_norm": 1.7712678909301758, "learning_rate": 1.985708815309693e-05, "loss": 0.5848, "step": 6334 }, { "epoch": 1.034121056283417, "grad_norm": 1.7628885507583618, "learning_rate": 1.985703469088854e-05, "loss": 0.7027, "step": 6335 }, { "epoch": 1.0342843149259213, "grad_norm": 2.074078321456909, "learning_rate": 1.985698121875411e-05, "loss": 0.9274, "step": 6336 }, { "epoch": 1.0344475735684258, "grad_norm": 1.541096806526184, "learning_rate": 1.9856927736693706e-05, "loss": 0.6185, "step": 6337 }, { "epoch": 1.0346108322109302, "grad_norm": 2.016784191131592, "learning_rate": 1.9856874244707383e-05, "loss": 0.7035, "step": 6338 }, { "epoch": 1.0347740908534346, "grad_norm": 2.2863566875457764, "learning_rate": 1.9856820742795183e-05, "loss": 0.8084, "step": 6339 }, { "epoch": 1.0349373494959389, "grad_norm": 1.9221899509429932, "learning_rate": 1.9856767230957173e-05, "loss": 0.8719, "step": 6340 }, { "epoch": 1.0351006081384433, "grad_norm": 1.6395528316497803, "learning_rate": 1.9856713709193397e-05, "loss": 0.7319, "step": 6341 }, { "epoch": 1.0352638667809477, "grad_norm": 2.3567001819610596, "learning_rate": 1.9856660177503916e-05, "loss": 0.9236, "step": 6342 }, { "epoch": 1.0354271254234522, "grad_norm": 2.0187878608703613, "learning_rate": 1.9856606635888777e-05, "loss": 0.8507, "step": 6343 }, { "epoch": 1.0355903840659564, "grad_norm": 2.2076609134674072, "learning_rate": 1.985655308434804e-05, "loss": 0.7411, "step": 6344 }, { "epoch": 1.0357536427084608, "grad_norm": 1.939016342163086, "learning_rate": 1.985649952288176e-05, "loss": 0.657, "step": 6345 }, { "epoch": 1.0359169013509653, "grad_norm": 1.7595069408416748, "learning_rate": 1.9856445951489984e-05, "loss": 0.7662, "step": 6346 }, { "epoch": 1.0360801599934697, "grad_norm": 1.9005506038665771, "learning_rate": 1.9856392370172768e-05, "loss": 0.7351, "step": 6347 }, { "epoch": 1.0362434186359741, "grad_norm": 2.0733301639556885, "learning_rate": 1.985633877893017e-05, "loss": 0.8138, "step": 6348 }, { "epoch": 1.0364066772784783, "grad_norm": 1.927930474281311, "learning_rate": 1.985628517776224e-05, "loss": 0.7378, "step": 6349 }, { "epoch": 1.0365699359209828, "grad_norm": 1.5964467525482178, "learning_rate": 1.9856231566669036e-05, "loss": 0.6966, "step": 6350 }, { "epoch": 1.0367331945634872, "grad_norm": 1.8646775484085083, "learning_rate": 1.9856177945650607e-05, "loss": 0.7725, "step": 6351 }, { "epoch": 1.0368964532059917, "grad_norm": 1.489858865737915, "learning_rate": 1.9856124314707008e-05, "loss": 0.6389, "step": 6352 }, { "epoch": 1.0370597118484959, "grad_norm": 2.1021125316619873, "learning_rate": 1.9856070673838297e-05, "loss": 0.7079, "step": 6353 }, { "epoch": 1.0372229704910003, "grad_norm": 1.8392387628555298, "learning_rate": 1.9856017023044525e-05, "loss": 0.6704, "step": 6354 }, { "epoch": 1.0373862291335048, "grad_norm": 1.7700083255767822, "learning_rate": 1.9855963362325746e-05, "loss": 0.6521, "step": 6355 }, { "epoch": 1.0375494877760092, "grad_norm": 1.7325944900512695, "learning_rate": 1.9855909691682014e-05, "loss": 0.7651, "step": 6356 }, { "epoch": 1.0377127464185136, "grad_norm": 1.5628496408462524, "learning_rate": 1.9855856011113384e-05, "loss": 0.6273, "step": 6357 }, { "epoch": 1.0378760050610178, "grad_norm": 1.9006133079528809, "learning_rate": 1.9855802320619913e-05, "loss": 0.7559, "step": 6358 }, { "epoch": 1.0380392637035223, "grad_norm": 1.7265238761901855, "learning_rate": 1.9855748620201646e-05, "loss": 0.6388, "step": 6359 }, { "epoch": 1.0382025223460267, "grad_norm": 1.879228949546814, "learning_rate": 1.9855694909858645e-05, "loss": 0.7162, "step": 6360 }, { "epoch": 1.0383657809885312, "grad_norm": 2.1982500553131104, "learning_rate": 1.9855641189590963e-05, "loss": 0.9425, "step": 6361 }, { "epoch": 1.0385290396310354, "grad_norm": 1.7462934255599976, "learning_rate": 1.9855587459398654e-05, "loss": 0.6618, "step": 6362 }, { "epoch": 1.0386922982735398, "grad_norm": 1.8214459419250488, "learning_rate": 1.9855533719281768e-05, "loss": 0.6754, "step": 6363 }, { "epoch": 1.0388555569160443, "grad_norm": 1.4484686851501465, "learning_rate": 1.9855479969240363e-05, "loss": 0.5025, "step": 6364 }, { "epoch": 1.0390188155585487, "grad_norm": 1.9273101091384888, "learning_rate": 1.9855426209274493e-05, "loss": 0.7877, "step": 6365 }, { "epoch": 1.0391820742010531, "grad_norm": 1.6532548666000366, "learning_rate": 1.985537243938421e-05, "loss": 0.6323, "step": 6366 }, { "epoch": 1.0393453328435573, "grad_norm": 1.9814331531524658, "learning_rate": 1.9855318659569572e-05, "loss": 0.7283, "step": 6367 }, { "epoch": 1.0395085914860618, "grad_norm": 1.765197515487671, "learning_rate": 1.985526486983063e-05, "loss": 0.7197, "step": 6368 }, { "epoch": 1.0396718501285662, "grad_norm": 1.5571547746658325, "learning_rate": 1.985521107016744e-05, "loss": 0.6677, "step": 6369 }, { "epoch": 1.0398351087710707, "grad_norm": 1.9828777313232422, "learning_rate": 1.9855157260580052e-05, "loss": 0.8341, "step": 6370 }, { "epoch": 1.0399983674135749, "grad_norm": 1.760911464691162, "learning_rate": 1.9855103441068525e-05, "loss": 0.7723, "step": 6371 }, { "epoch": 1.0401616260560793, "grad_norm": 1.6029481887817383, "learning_rate": 1.985504961163291e-05, "loss": 0.8017, "step": 6372 }, { "epoch": 1.0403248846985838, "grad_norm": 1.9062387943267822, "learning_rate": 1.9854995772273266e-05, "loss": 0.7894, "step": 6373 }, { "epoch": 1.0404881433410882, "grad_norm": 1.6903984546661377, "learning_rate": 1.9854941922989638e-05, "loss": 0.6404, "step": 6374 }, { "epoch": 1.0406514019835924, "grad_norm": 1.6882866621017456, "learning_rate": 1.9854888063782088e-05, "loss": 0.7031, "step": 6375 }, { "epoch": 1.0408146606260968, "grad_norm": 2.1223995685577393, "learning_rate": 1.9854834194650673e-05, "loss": 0.8229, "step": 6376 }, { "epoch": 1.0409779192686013, "grad_norm": 1.3748483657836914, "learning_rate": 1.985478031559544e-05, "loss": 0.5831, "step": 6377 }, { "epoch": 1.0411411779111057, "grad_norm": 1.6554031372070312, "learning_rate": 1.9854726426616447e-05, "loss": 0.6084, "step": 6378 }, { "epoch": 1.0413044365536102, "grad_norm": 1.5632424354553223, "learning_rate": 1.9854672527713745e-05, "loss": 0.6284, "step": 6379 }, { "epoch": 1.0414676951961144, "grad_norm": 2.5645785331726074, "learning_rate": 1.985461861888739e-05, "loss": 0.9476, "step": 6380 }, { "epoch": 1.0416309538386188, "grad_norm": 1.9751784801483154, "learning_rate": 1.9854564700137437e-05, "loss": 0.691, "step": 6381 }, { "epoch": 1.0417942124811232, "grad_norm": 1.5935426950454712, "learning_rate": 1.9854510771463942e-05, "loss": 0.7099, "step": 6382 }, { "epoch": 1.0419574711236277, "grad_norm": 2.161407470703125, "learning_rate": 1.9854456832866956e-05, "loss": 0.8541, "step": 6383 }, { "epoch": 1.042120729766132, "grad_norm": 1.9132003784179688, "learning_rate": 1.9854402884346534e-05, "loss": 0.6346, "step": 6384 }, { "epoch": 1.0422839884086363, "grad_norm": 1.8029053211212158, "learning_rate": 1.985434892590273e-05, "loss": 0.7083, "step": 6385 }, { "epoch": 1.0424472470511408, "grad_norm": 1.8897643089294434, "learning_rate": 1.98542949575356e-05, "loss": 0.667, "step": 6386 }, { "epoch": 1.0426105056936452, "grad_norm": 2.5108985900878906, "learning_rate": 1.98542409792452e-05, "loss": 0.8482, "step": 6387 }, { "epoch": 1.0427737643361494, "grad_norm": 1.5588014125823975, "learning_rate": 1.985418699103158e-05, "loss": 0.6847, "step": 6388 }, { "epoch": 1.0429370229786539, "grad_norm": 1.939668893814087, "learning_rate": 1.9854132992894793e-05, "loss": 0.6844, "step": 6389 }, { "epoch": 1.0431002816211583, "grad_norm": 1.7362983226776123, "learning_rate": 1.9854078984834904e-05, "loss": 0.6969, "step": 6390 }, { "epoch": 1.0432635402636627, "grad_norm": 2.0692286491394043, "learning_rate": 1.9854024966851953e-05, "loss": 0.8139, "step": 6391 }, { "epoch": 1.0434267989061672, "grad_norm": 1.7947496175765991, "learning_rate": 1.9853970938946005e-05, "loss": 0.5697, "step": 6392 }, { "epoch": 1.0435900575486714, "grad_norm": 1.83210027217865, "learning_rate": 1.9853916901117112e-05, "loss": 0.6931, "step": 6393 }, { "epoch": 1.0437533161911758, "grad_norm": 2.1277687549591064, "learning_rate": 1.9853862853365324e-05, "loss": 0.726, "step": 6394 }, { "epoch": 1.0439165748336803, "grad_norm": 1.94338858127594, "learning_rate": 1.9853808795690704e-05, "loss": 0.7794, "step": 6395 }, { "epoch": 1.0440798334761847, "grad_norm": 2.382124185562134, "learning_rate": 1.9853754728093296e-05, "loss": 0.7814, "step": 6396 }, { "epoch": 1.044243092118689, "grad_norm": 1.559651255607605, "learning_rate": 1.985370065057316e-05, "loss": 0.6314, "step": 6397 }, { "epoch": 1.0444063507611934, "grad_norm": 1.9126427173614502, "learning_rate": 1.9853646563130355e-05, "loss": 0.7189, "step": 6398 }, { "epoch": 1.0445696094036978, "grad_norm": 1.7692419290542603, "learning_rate": 1.9853592465764926e-05, "loss": 0.6914, "step": 6399 }, { "epoch": 1.0447328680462022, "grad_norm": 1.9816216230392456, "learning_rate": 1.9853538358476933e-05, "loss": 0.8409, "step": 6400 }, { "epoch": 1.0448961266887067, "grad_norm": 1.821016788482666, "learning_rate": 1.985348424126643e-05, "loss": 0.8012, "step": 6401 }, { "epoch": 1.045059385331211, "grad_norm": 1.792432427406311, "learning_rate": 1.985343011413347e-05, "loss": 0.7281, "step": 6402 }, { "epoch": 1.0452226439737153, "grad_norm": 1.7540838718414307, "learning_rate": 1.985337597707811e-05, "loss": 0.7064, "step": 6403 }, { "epoch": 1.0453859026162198, "grad_norm": 1.9724094867706299, "learning_rate": 1.98533218301004e-05, "loss": 0.8457, "step": 6404 }, { "epoch": 1.0455491612587242, "grad_norm": 1.963816523551941, "learning_rate": 1.98532676732004e-05, "loss": 0.8454, "step": 6405 }, { "epoch": 1.0457124199012284, "grad_norm": 1.8810901641845703, "learning_rate": 1.9853213506378163e-05, "loss": 0.7963, "step": 6406 }, { "epoch": 1.0458756785437329, "grad_norm": 1.9094598293304443, "learning_rate": 1.9853159329633737e-05, "loss": 0.8457, "step": 6407 }, { "epoch": 1.0460389371862373, "grad_norm": 1.7724827527999878, "learning_rate": 1.985310514296719e-05, "loss": 0.8429, "step": 6408 }, { "epoch": 1.0462021958287417, "grad_norm": 1.3906306028366089, "learning_rate": 1.9853050946378563e-05, "loss": 0.5326, "step": 6409 }, { "epoch": 1.0463654544712462, "grad_norm": 1.5561398267745972, "learning_rate": 1.9852996739867918e-05, "loss": 0.6433, "step": 6410 }, { "epoch": 1.0465287131137504, "grad_norm": 1.5456129312515259, "learning_rate": 1.985294252343531e-05, "loss": 0.6315, "step": 6411 }, { "epoch": 1.0466919717562548, "grad_norm": 1.8530503511428833, "learning_rate": 1.985288829708079e-05, "loss": 0.7208, "step": 6412 }, { "epoch": 1.0468552303987593, "grad_norm": 1.6401225328445435, "learning_rate": 1.985283406080441e-05, "loss": 0.7369, "step": 6413 }, { "epoch": 1.0470184890412637, "grad_norm": 2.065150499343872, "learning_rate": 1.9852779814606232e-05, "loss": 0.825, "step": 6414 }, { "epoch": 1.047181747683768, "grad_norm": 1.6929408311843872, "learning_rate": 1.985272555848631e-05, "loss": 0.7282, "step": 6415 }, { "epoch": 1.0473450063262724, "grad_norm": 1.9294968843460083, "learning_rate": 1.9852671292444692e-05, "loss": 0.7184, "step": 6416 }, { "epoch": 1.0475082649687768, "grad_norm": 2.0008933544158936, "learning_rate": 1.9852617016481442e-05, "loss": 0.8084, "step": 6417 }, { "epoch": 1.0476715236112812, "grad_norm": 2.3468823432922363, "learning_rate": 1.9852562730596606e-05, "loss": 0.8604, "step": 6418 }, { "epoch": 1.0478347822537855, "grad_norm": 1.7941347360610962, "learning_rate": 1.985250843479024e-05, "loss": 0.7205, "step": 6419 }, { "epoch": 1.04799804089629, "grad_norm": 1.568460464477539, "learning_rate": 1.98524541290624e-05, "loss": 0.6615, "step": 6420 }, { "epoch": 1.0481612995387943, "grad_norm": 1.4633936882019043, "learning_rate": 1.9852399813413146e-05, "loss": 0.6811, "step": 6421 }, { "epoch": 1.0483245581812988, "grad_norm": 1.7978830337524414, "learning_rate": 1.9852345487842527e-05, "loss": 0.6925, "step": 6422 }, { "epoch": 1.0484878168238032, "grad_norm": 1.621343731880188, "learning_rate": 1.9852291152350593e-05, "loss": 0.5299, "step": 6423 }, { "epoch": 1.0486510754663074, "grad_norm": 2.0123748779296875, "learning_rate": 1.985223680693741e-05, "loss": 0.7217, "step": 6424 }, { "epoch": 1.0488143341088119, "grad_norm": 2.2097108364105225, "learning_rate": 1.9852182451603026e-05, "loss": 0.9622, "step": 6425 }, { "epoch": 1.0489775927513163, "grad_norm": 1.7552664279937744, "learning_rate": 1.9852128086347497e-05, "loss": 0.7548, "step": 6426 }, { "epoch": 1.0491408513938207, "grad_norm": 2.169128179550171, "learning_rate": 1.9852073711170877e-05, "loss": 0.9252, "step": 6427 }, { "epoch": 1.049304110036325, "grad_norm": 2.043309450149536, "learning_rate": 1.985201932607322e-05, "loss": 0.7248, "step": 6428 }, { "epoch": 1.0494673686788294, "grad_norm": 1.878727674484253, "learning_rate": 1.9851964931054584e-05, "loss": 0.7259, "step": 6429 }, { "epoch": 1.0496306273213338, "grad_norm": 1.5973033905029297, "learning_rate": 1.9851910526115023e-05, "loss": 0.5529, "step": 6430 }, { "epoch": 1.0497938859638383, "grad_norm": 1.636610507965088, "learning_rate": 1.985185611125459e-05, "loss": 0.6918, "step": 6431 }, { "epoch": 1.0499571446063425, "grad_norm": 2.177405595779419, "learning_rate": 1.985180168647334e-05, "loss": 0.8603, "step": 6432 }, { "epoch": 1.050120403248847, "grad_norm": 2.079627513885498, "learning_rate": 1.9851747251771328e-05, "loss": 0.7971, "step": 6433 }, { "epoch": 1.0502836618913514, "grad_norm": 1.9238831996917725, "learning_rate": 1.9851692807148612e-05, "loss": 0.8452, "step": 6434 }, { "epoch": 1.0504469205338558, "grad_norm": 1.85007643699646, "learning_rate": 1.985163835260524e-05, "loss": 0.7016, "step": 6435 }, { "epoch": 1.0506101791763602, "grad_norm": 1.5897303819656372, "learning_rate": 1.9851583888141274e-05, "loss": 0.6901, "step": 6436 }, { "epoch": 1.0507734378188645, "grad_norm": 1.5689098834991455, "learning_rate": 1.985152941375676e-05, "loss": 0.6482, "step": 6437 }, { "epoch": 1.050936696461369, "grad_norm": 1.6794500350952148, "learning_rate": 1.9851474929451764e-05, "loss": 0.6576, "step": 6438 }, { "epoch": 1.0510999551038733, "grad_norm": 1.7521792650222778, "learning_rate": 1.9851420435226334e-05, "loss": 0.6242, "step": 6439 }, { "epoch": 1.0512632137463778, "grad_norm": 1.6738594770431519, "learning_rate": 1.9851365931080525e-05, "loss": 0.608, "step": 6440 }, { "epoch": 1.051426472388882, "grad_norm": 1.6715350151062012, "learning_rate": 1.9851311417014396e-05, "loss": 0.6584, "step": 6441 }, { "epoch": 1.0515897310313864, "grad_norm": 1.8142921924591064, "learning_rate": 1.9851256893027996e-05, "loss": 0.6869, "step": 6442 }, { "epoch": 1.0517529896738909, "grad_norm": 1.7631057500839233, "learning_rate": 1.9851202359121383e-05, "loss": 0.7282, "step": 6443 }, { "epoch": 1.0519162483163953, "grad_norm": 1.8243625164031982, "learning_rate": 1.9851147815294615e-05, "loss": 0.6897, "step": 6444 }, { "epoch": 1.0520795069588997, "grad_norm": 1.9253132343292236, "learning_rate": 1.985109326154774e-05, "loss": 0.8799, "step": 6445 }, { "epoch": 1.052242765601404, "grad_norm": 1.9539350271224976, "learning_rate": 1.9851038697880817e-05, "loss": 0.581, "step": 6446 }, { "epoch": 1.0524060242439084, "grad_norm": 2.1103930473327637, "learning_rate": 1.9850984124293902e-05, "loss": 0.7538, "step": 6447 }, { "epoch": 1.0525692828864128, "grad_norm": 3.306098699569702, "learning_rate": 1.985092954078705e-05, "loss": 0.7329, "step": 6448 }, { "epoch": 1.0527325415289173, "grad_norm": 1.8497775793075562, "learning_rate": 1.9850874947360316e-05, "loss": 0.6954, "step": 6449 }, { "epoch": 1.0528958001714215, "grad_norm": 1.8238327503204346, "learning_rate": 1.985082034401375e-05, "loss": 0.6843, "step": 6450 }, { "epoch": 1.053059058813926, "grad_norm": 2.092165470123291, "learning_rate": 1.985076573074741e-05, "loss": 0.6028, "step": 6451 }, { "epoch": 1.0532223174564304, "grad_norm": 1.6197607517242432, "learning_rate": 1.9850711107561353e-05, "loss": 0.5844, "step": 6452 }, { "epoch": 1.0533855760989348, "grad_norm": 1.900766372680664, "learning_rate": 1.985065647445563e-05, "loss": 0.6243, "step": 6453 }, { "epoch": 1.0535488347414392, "grad_norm": 1.6840955018997192, "learning_rate": 1.9850601831430304e-05, "loss": 0.6925, "step": 6454 }, { "epoch": 1.0537120933839434, "grad_norm": 1.6519290208816528, "learning_rate": 1.9850547178485418e-05, "loss": 0.7135, "step": 6455 }, { "epoch": 1.0538753520264479, "grad_norm": 1.8017761707305908, "learning_rate": 1.9850492515621038e-05, "loss": 0.7089, "step": 6456 }, { "epoch": 1.0540386106689523, "grad_norm": 2.288264274597168, "learning_rate": 1.9850437842837214e-05, "loss": 0.7643, "step": 6457 }, { "epoch": 1.0542018693114568, "grad_norm": 1.6951135396957397, "learning_rate": 1.9850383160134e-05, "loss": 0.6662, "step": 6458 }, { "epoch": 1.054365127953961, "grad_norm": 1.6866683959960938, "learning_rate": 1.985032846751146e-05, "loss": 0.5699, "step": 6459 }, { "epoch": 1.0545283865964654, "grad_norm": 2.023360013961792, "learning_rate": 1.9850273764969632e-05, "loss": 0.8211, "step": 6460 }, { "epoch": 1.0546916452389699, "grad_norm": 1.8363749980926514, "learning_rate": 1.9850219052508586e-05, "loss": 0.7046, "step": 6461 }, { "epoch": 1.0548549038814743, "grad_norm": 1.6069679260253906, "learning_rate": 1.985016433012837e-05, "loss": 0.5969, "step": 6462 }, { "epoch": 1.0550181625239785, "grad_norm": 1.8681252002716064, "learning_rate": 1.985010959782904e-05, "loss": 0.6333, "step": 6463 }, { "epoch": 1.055181421166483, "grad_norm": 2.0554378032684326, "learning_rate": 1.9850054855610656e-05, "loss": 0.6216, "step": 6464 }, { "epoch": 1.0553446798089874, "grad_norm": 1.68414306640625, "learning_rate": 1.985000010347327e-05, "loss": 0.5856, "step": 6465 }, { "epoch": 1.0555079384514918, "grad_norm": 2.0159666538238525, "learning_rate": 1.9849945341416932e-05, "loss": 0.7287, "step": 6466 }, { "epoch": 1.0556711970939963, "grad_norm": 1.9936645030975342, "learning_rate": 1.9849890569441704e-05, "loss": 0.6266, "step": 6467 }, { "epoch": 1.0558344557365005, "grad_norm": 2.7062675952911377, "learning_rate": 1.984983578754764e-05, "loss": 0.6609, "step": 6468 }, { "epoch": 1.055997714379005, "grad_norm": 1.8133025169372559, "learning_rate": 1.984978099573479e-05, "loss": 0.6778, "step": 6469 }, { "epoch": 1.0561609730215094, "grad_norm": 1.7791633605957031, "learning_rate": 1.9849726194003215e-05, "loss": 0.7104, "step": 6470 }, { "epoch": 1.0563242316640138, "grad_norm": 1.6535755395889282, "learning_rate": 1.984967138235297e-05, "loss": 0.7184, "step": 6471 }, { "epoch": 1.056487490306518, "grad_norm": 1.9171730279922485, "learning_rate": 1.984961656078411e-05, "loss": 0.7616, "step": 6472 }, { "epoch": 1.0566507489490224, "grad_norm": 1.7422312498092651, "learning_rate": 1.9849561729296686e-05, "loss": 0.6773, "step": 6473 }, { "epoch": 1.0568140075915269, "grad_norm": 1.9317115545272827, "learning_rate": 1.9849506887890754e-05, "loss": 0.702, "step": 6474 }, { "epoch": 1.0569772662340313, "grad_norm": 1.7813512086868286, "learning_rate": 1.9849452036566374e-05, "loss": 0.7586, "step": 6475 }, { "epoch": 1.0571405248765358, "grad_norm": 2.199392795562744, "learning_rate": 1.9849397175323598e-05, "loss": 0.9489, "step": 6476 }, { "epoch": 1.05730378351904, "grad_norm": 1.9289523363113403, "learning_rate": 1.9849342304162482e-05, "loss": 0.8421, "step": 6477 }, { "epoch": 1.0574670421615444, "grad_norm": 1.667407512664795, "learning_rate": 1.984928742308308e-05, "loss": 0.6283, "step": 6478 }, { "epoch": 1.0576303008040489, "grad_norm": 1.8135247230529785, "learning_rate": 1.9849232532085447e-05, "loss": 0.6939, "step": 6479 }, { "epoch": 1.0577935594465533, "grad_norm": 1.814348816871643, "learning_rate": 1.9849177631169643e-05, "loss": 0.7166, "step": 6480 }, { "epoch": 1.0579568180890575, "grad_norm": 2.018916368484497, "learning_rate": 1.9849122720335717e-05, "loss": 0.7593, "step": 6481 }, { "epoch": 1.058120076731562, "grad_norm": 2.4966375827789307, "learning_rate": 1.984906779958373e-05, "loss": 0.728, "step": 6482 }, { "epoch": 1.0582833353740664, "grad_norm": 2.3089301586151123, "learning_rate": 1.984901286891373e-05, "loss": 0.7133, "step": 6483 }, { "epoch": 1.0584465940165708, "grad_norm": 1.7044378519058228, "learning_rate": 1.9848957928325777e-05, "loss": 0.6748, "step": 6484 }, { "epoch": 1.058609852659075, "grad_norm": 1.6541234254837036, "learning_rate": 1.984890297781993e-05, "loss": 0.6403, "step": 6485 }, { "epoch": 1.0587731113015795, "grad_norm": 2.038078784942627, "learning_rate": 1.9848848017396237e-05, "loss": 0.7525, "step": 6486 }, { "epoch": 1.058936369944084, "grad_norm": 1.9732763767242432, "learning_rate": 1.984879304705476e-05, "loss": 0.6197, "step": 6487 }, { "epoch": 1.0590996285865883, "grad_norm": 1.8807592391967773, "learning_rate": 1.9848738066795547e-05, "loss": 0.7507, "step": 6488 }, { "epoch": 1.0592628872290928, "grad_norm": 1.9210035800933838, "learning_rate": 1.984868307661866e-05, "loss": 0.6903, "step": 6489 }, { "epoch": 1.059426145871597, "grad_norm": 1.615373969078064, "learning_rate": 1.984862807652415e-05, "loss": 0.5683, "step": 6490 }, { "epoch": 1.0595894045141014, "grad_norm": 1.8936339616775513, "learning_rate": 1.9848573066512074e-05, "loss": 0.5732, "step": 6491 }, { "epoch": 1.0597526631566059, "grad_norm": 1.8804913759231567, "learning_rate": 1.984851804658249e-05, "loss": 0.8062, "step": 6492 }, { "epoch": 1.0599159217991103, "grad_norm": 1.891694188117981, "learning_rate": 1.984846301673545e-05, "loss": 0.5941, "step": 6493 }, { "epoch": 1.0600791804416145, "grad_norm": 1.7877391576766968, "learning_rate": 1.984840797697101e-05, "loss": 0.6229, "step": 6494 }, { "epoch": 1.060242439084119, "grad_norm": 2.062567949295044, "learning_rate": 1.9848352927289224e-05, "loss": 0.8307, "step": 6495 }, { "epoch": 1.0604056977266234, "grad_norm": 1.7834426164627075, "learning_rate": 1.9848297867690152e-05, "loss": 0.6428, "step": 6496 }, { "epoch": 1.0605689563691278, "grad_norm": 2.1293575763702393, "learning_rate": 1.9848242798173846e-05, "loss": 0.8863, "step": 6497 }, { "epoch": 1.0607322150116323, "grad_norm": 1.8757915496826172, "learning_rate": 1.9848187718740363e-05, "loss": 0.7395, "step": 6498 }, { "epoch": 1.0608954736541365, "grad_norm": 1.5795472860336304, "learning_rate": 1.9848132629389757e-05, "loss": 0.632, "step": 6499 }, { "epoch": 1.061058732296641, "grad_norm": 1.74015212059021, "learning_rate": 1.9848077530122083e-05, "loss": 0.6635, "step": 6500 }, { "epoch": 1.0612219909391454, "grad_norm": 1.6464121341705322, "learning_rate": 1.9848022420937398e-05, "loss": 0.6114, "step": 6501 }, { "epoch": 1.0613852495816498, "grad_norm": 1.8635272979736328, "learning_rate": 1.9847967301835756e-05, "loss": 0.6404, "step": 6502 }, { "epoch": 1.061548508224154, "grad_norm": 1.721967101097107, "learning_rate": 1.9847912172817215e-05, "loss": 0.7079, "step": 6503 }, { "epoch": 1.0617117668666585, "grad_norm": 1.7030422687530518, "learning_rate": 1.984785703388183e-05, "loss": 0.6446, "step": 6504 }, { "epoch": 1.061875025509163, "grad_norm": 1.8117716312408447, "learning_rate": 1.9847801885029652e-05, "loss": 0.7431, "step": 6505 }, { "epoch": 1.0620382841516673, "grad_norm": 1.884779453277588, "learning_rate": 1.9847746726260742e-05, "loss": 0.6949, "step": 6506 }, { "epoch": 1.0622015427941716, "grad_norm": 1.7023359537124634, "learning_rate": 1.9847691557575153e-05, "loss": 0.7352, "step": 6507 }, { "epoch": 1.062364801436676, "grad_norm": 2.025390148162842, "learning_rate": 1.9847636378972944e-05, "loss": 0.7879, "step": 6508 }, { "epoch": 1.0625280600791804, "grad_norm": 1.7900786399841309, "learning_rate": 1.9847581190454166e-05, "loss": 0.7449, "step": 6509 }, { "epoch": 1.0626913187216849, "grad_norm": 1.8524688482284546, "learning_rate": 1.9847525992018877e-05, "loss": 0.5716, "step": 6510 }, { "epoch": 1.0628545773641893, "grad_norm": 2.3486416339874268, "learning_rate": 1.9847470783667128e-05, "loss": 0.8288, "step": 6511 }, { "epoch": 1.0630178360066935, "grad_norm": 1.7486556768417358, "learning_rate": 1.9847415565398985e-05, "loss": 0.6412, "step": 6512 }, { "epoch": 1.063181094649198, "grad_norm": 2.2853293418884277, "learning_rate": 1.9847360337214492e-05, "loss": 0.8744, "step": 6513 }, { "epoch": 1.0633443532917024, "grad_norm": 1.9964988231658936, "learning_rate": 1.9847305099113713e-05, "loss": 0.8406, "step": 6514 }, { "epoch": 1.0635076119342068, "grad_norm": 1.9495255947113037, "learning_rate": 1.9847249851096696e-05, "loss": 0.6728, "step": 6515 }, { "epoch": 1.063670870576711, "grad_norm": 2.054844617843628, "learning_rate": 1.9847194593163504e-05, "loss": 0.8373, "step": 6516 }, { "epoch": 1.0638341292192155, "grad_norm": 1.7116367816925049, "learning_rate": 1.984713932531419e-05, "loss": 0.5758, "step": 6517 }, { "epoch": 1.06399738786172, "grad_norm": 1.9038788080215454, "learning_rate": 1.984708404754881e-05, "loss": 0.8737, "step": 6518 }, { "epoch": 1.0641606465042244, "grad_norm": 1.902789831161499, "learning_rate": 1.9847028759867417e-05, "loss": 0.8002, "step": 6519 }, { "epoch": 1.0643239051467286, "grad_norm": 2.3554506301879883, "learning_rate": 1.984697346227007e-05, "loss": 0.8593, "step": 6520 }, { "epoch": 1.064487163789233, "grad_norm": 1.751593828201294, "learning_rate": 1.9846918154756823e-05, "loss": 0.7558, "step": 6521 }, { "epoch": 1.0646504224317375, "grad_norm": 1.6111421585083008, "learning_rate": 1.9846862837327733e-05, "loss": 0.6807, "step": 6522 }, { "epoch": 1.064813681074242, "grad_norm": 1.8458054065704346, "learning_rate": 1.9846807509982854e-05, "loss": 0.6997, "step": 6523 }, { "epoch": 1.0649769397167463, "grad_norm": 1.7240962982177734, "learning_rate": 1.9846752172722242e-05, "loss": 0.6612, "step": 6524 }, { "epoch": 1.0651401983592506, "grad_norm": 1.822969913482666, "learning_rate": 1.984669682554595e-05, "loss": 0.732, "step": 6525 }, { "epoch": 1.065303457001755, "grad_norm": 2.0365076065063477, "learning_rate": 1.984664146845404e-05, "loss": 0.7476, "step": 6526 }, { "epoch": 1.0654667156442594, "grad_norm": 2.104407548904419, "learning_rate": 1.9846586101446567e-05, "loss": 0.7717, "step": 6527 }, { "epoch": 1.0656299742867639, "grad_norm": 1.7804725170135498, "learning_rate": 1.9846530724523583e-05, "loss": 0.7185, "step": 6528 }, { "epoch": 1.065793232929268, "grad_norm": 1.8573238849639893, "learning_rate": 1.9846475337685143e-05, "loss": 0.7154, "step": 6529 }, { "epoch": 1.0659564915717725, "grad_norm": 1.7709726095199585, "learning_rate": 1.9846419940931305e-05, "loss": 0.723, "step": 6530 }, { "epoch": 1.066119750214277, "grad_norm": 1.8850891590118408, "learning_rate": 1.9846364534262127e-05, "loss": 0.9694, "step": 6531 }, { "epoch": 1.0662830088567814, "grad_norm": 1.6783599853515625, "learning_rate": 1.9846309117677662e-05, "loss": 0.6751, "step": 6532 }, { "epoch": 1.0664462674992858, "grad_norm": 2.040072441101074, "learning_rate": 1.9846253691177965e-05, "loss": 0.7128, "step": 6533 }, { "epoch": 1.06660952614179, "grad_norm": 1.5698174238204956, "learning_rate": 1.9846198254763097e-05, "loss": 0.5784, "step": 6534 }, { "epoch": 1.0667727847842945, "grad_norm": 1.7402125597000122, "learning_rate": 1.9846142808433108e-05, "loss": 0.7817, "step": 6535 }, { "epoch": 1.066936043426799, "grad_norm": 1.799605369567871, "learning_rate": 1.9846087352188053e-05, "loss": 0.7424, "step": 6536 }, { "epoch": 1.0670993020693034, "grad_norm": 1.6422302722930908, "learning_rate": 1.9846031886027994e-05, "loss": 0.6358, "step": 6537 }, { "epoch": 1.0672625607118076, "grad_norm": 1.985588788986206, "learning_rate": 1.984597640995298e-05, "loss": 0.7278, "step": 6538 }, { "epoch": 1.067425819354312, "grad_norm": 1.6890209913253784, "learning_rate": 1.9845920923963072e-05, "loss": 0.6707, "step": 6539 }, { "epoch": 1.0675890779968165, "grad_norm": 2.059126853942871, "learning_rate": 1.9845865428058325e-05, "loss": 0.8534, "step": 6540 }, { "epoch": 1.067752336639321, "grad_norm": 1.724249243736267, "learning_rate": 1.9845809922238794e-05, "loss": 0.7028, "step": 6541 }, { "epoch": 1.0679155952818253, "grad_norm": 1.9257467985153198, "learning_rate": 1.9845754406504535e-05, "loss": 0.7676, "step": 6542 }, { "epoch": 1.0680788539243296, "grad_norm": 1.7396624088287354, "learning_rate": 1.9845698880855603e-05, "loss": 0.7737, "step": 6543 }, { "epoch": 1.068242112566834, "grad_norm": 1.868084192276001, "learning_rate": 1.9845643345292055e-05, "loss": 0.6294, "step": 6544 }, { "epoch": 1.0684053712093384, "grad_norm": 2.02315092086792, "learning_rate": 1.9845587799813947e-05, "loss": 0.6757, "step": 6545 }, { "epoch": 1.0685686298518429, "grad_norm": 1.730357050895691, "learning_rate": 1.9845532244421334e-05, "loss": 0.7325, "step": 6546 }, { "epoch": 1.068731888494347, "grad_norm": 1.9987250566482544, "learning_rate": 1.9845476679114276e-05, "loss": 0.632, "step": 6547 }, { "epoch": 1.0688951471368515, "grad_norm": 2.0440049171447754, "learning_rate": 1.984542110389282e-05, "loss": 0.7285, "step": 6548 }, { "epoch": 1.069058405779356, "grad_norm": 1.7097535133361816, "learning_rate": 1.9845365518757033e-05, "loss": 0.654, "step": 6549 }, { "epoch": 1.0692216644218604, "grad_norm": 1.4935684204101562, "learning_rate": 1.9845309923706965e-05, "loss": 0.556, "step": 6550 }, { "epoch": 1.0693849230643648, "grad_norm": 1.8168354034423828, "learning_rate": 1.9845254318742668e-05, "loss": 0.6495, "step": 6551 }, { "epoch": 1.069548181706869, "grad_norm": 1.7025024890899658, "learning_rate": 1.9845198703864205e-05, "loss": 0.5683, "step": 6552 }, { "epoch": 1.0697114403493735, "grad_norm": 2.0251781940460205, "learning_rate": 1.9845143079071632e-05, "loss": 0.8429, "step": 6553 }, { "epoch": 1.069874698991878, "grad_norm": 1.65981924533844, "learning_rate": 1.9845087444365002e-05, "loss": 0.6155, "step": 6554 }, { "epoch": 1.0700379576343824, "grad_norm": 2.0557613372802734, "learning_rate": 1.9845031799744367e-05, "loss": 0.7641, "step": 6555 }, { "epoch": 1.0702012162768866, "grad_norm": 1.9714964628219604, "learning_rate": 1.984497614520979e-05, "loss": 0.6825, "step": 6556 }, { "epoch": 1.070364474919391, "grad_norm": 1.9259599447250366, "learning_rate": 1.9844920480761327e-05, "loss": 0.6782, "step": 6557 }, { "epoch": 1.0705277335618955, "grad_norm": 1.8293898105621338, "learning_rate": 1.984486480639903e-05, "loss": 0.7082, "step": 6558 }, { "epoch": 1.0706909922044, "grad_norm": 1.7960492372512817, "learning_rate": 1.9844809122122955e-05, "loss": 0.7651, "step": 6559 }, { "epoch": 1.070854250846904, "grad_norm": 1.6418561935424805, "learning_rate": 1.9844753427933163e-05, "loss": 0.5683, "step": 6560 }, { "epoch": 1.0710175094894085, "grad_norm": 2.100053310394287, "learning_rate": 1.9844697723829703e-05, "loss": 0.7285, "step": 6561 }, { "epoch": 1.071180768131913, "grad_norm": 2.004775285720825, "learning_rate": 1.9844642009812637e-05, "loss": 0.8311, "step": 6562 }, { "epoch": 1.0713440267744174, "grad_norm": 1.6053495407104492, "learning_rate": 1.984458628588202e-05, "loss": 0.5773, "step": 6563 }, { "epoch": 1.0715072854169219, "grad_norm": 1.648263931274414, "learning_rate": 1.984453055203791e-05, "loss": 0.5895, "step": 6564 }, { "epoch": 1.071670544059426, "grad_norm": 2.4731173515319824, "learning_rate": 1.9844474808280355e-05, "loss": 0.7566, "step": 6565 }, { "epoch": 1.0718338027019305, "grad_norm": 1.8368890285491943, "learning_rate": 1.9844419054609418e-05, "loss": 0.6419, "step": 6566 }, { "epoch": 1.071997061344435, "grad_norm": 1.9592753648757935, "learning_rate": 1.9844363291025154e-05, "loss": 0.6806, "step": 6567 }, { "epoch": 1.0721603199869394, "grad_norm": 2.030353546142578, "learning_rate": 1.9844307517527622e-05, "loss": 0.8372, "step": 6568 }, { "epoch": 1.0723235786294436, "grad_norm": 2.108290195465088, "learning_rate": 1.9844251734116867e-05, "loss": 0.6782, "step": 6569 }, { "epoch": 1.072486837271948, "grad_norm": 1.9641799926757812, "learning_rate": 1.984419594079296e-05, "loss": 0.855, "step": 6570 }, { "epoch": 1.0726500959144525, "grad_norm": 2.051434278488159, "learning_rate": 1.9844140137555946e-05, "loss": 0.7145, "step": 6571 }, { "epoch": 1.072813354556957, "grad_norm": 1.8669847249984741, "learning_rate": 1.984408432440589e-05, "loss": 0.6731, "step": 6572 }, { "epoch": 1.0729766131994611, "grad_norm": 1.75222909450531, "learning_rate": 1.984402850134284e-05, "loss": 0.6348, "step": 6573 }, { "epoch": 1.0731398718419656, "grad_norm": 1.7861241102218628, "learning_rate": 1.984397266836686e-05, "loss": 0.7489, "step": 6574 }, { "epoch": 1.07330313048447, "grad_norm": 1.9785913228988647, "learning_rate": 1.9843916825477997e-05, "loss": 0.6934, "step": 6575 }, { "epoch": 1.0734663891269745, "grad_norm": 1.661584496498108, "learning_rate": 1.9843860972676318e-05, "loss": 0.6539, "step": 6576 }, { "epoch": 1.073629647769479, "grad_norm": 2.224846601486206, "learning_rate": 1.984380510996187e-05, "loss": 0.9455, "step": 6577 }, { "epoch": 1.073792906411983, "grad_norm": 1.7586872577667236, "learning_rate": 1.9843749237334714e-05, "loss": 0.6399, "step": 6578 }, { "epoch": 1.0739561650544875, "grad_norm": 1.882796049118042, "learning_rate": 1.98436933547949e-05, "loss": 0.672, "step": 6579 }, { "epoch": 1.074119423696992, "grad_norm": 1.898260235786438, "learning_rate": 1.9843637462342498e-05, "loss": 0.7816, "step": 6580 }, { "epoch": 1.0742826823394964, "grad_norm": 1.8620870113372803, "learning_rate": 1.9843581559977553e-05, "loss": 0.7026, "step": 6581 }, { "epoch": 1.0744459409820006, "grad_norm": 1.5727565288543701, "learning_rate": 1.984352564770012e-05, "loss": 0.5662, "step": 6582 }, { "epoch": 1.074609199624505, "grad_norm": 1.3328168392181396, "learning_rate": 1.984346972551026e-05, "loss": 0.5136, "step": 6583 }, { "epoch": 1.0747724582670095, "grad_norm": 2.0531411170959473, "learning_rate": 1.984341379340803e-05, "loss": 0.726, "step": 6584 }, { "epoch": 1.074935716909514, "grad_norm": 2.056612253189087, "learning_rate": 1.984335785139349e-05, "loss": 0.8, "step": 6585 }, { "epoch": 1.0750989755520184, "grad_norm": 1.8384311199188232, "learning_rate": 1.9843301899466682e-05, "loss": 0.706, "step": 6586 }, { "epoch": 1.0752622341945226, "grad_norm": 2.2263176441192627, "learning_rate": 1.984324593762768e-05, "loss": 0.8344, "step": 6587 }, { "epoch": 1.075425492837027, "grad_norm": 1.9422591924667358, "learning_rate": 1.9843189965876525e-05, "loss": 0.71, "step": 6588 }, { "epoch": 1.0755887514795315, "grad_norm": 1.6306496858596802, "learning_rate": 1.9843133984213284e-05, "loss": 0.5425, "step": 6589 }, { "epoch": 1.075752010122036, "grad_norm": 2.018303155899048, "learning_rate": 1.984307799263801e-05, "loss": 0.7406, "step": 6590 }, { "epoch": 1.0759152687645401, "grad_norm": 1.973543643951416, "learning_rate": 1.984302199115076e-05, "loss": 0.6378, "step": 6591 }, { "epoch": 1.0760785274070446, "grad_norm": 2.021148443222046, "learning_rate": 1.9842965979751586e-05, "loss": 0.7338, "step": 6592 }, { "epoch": 1.076241786049549, "grad_norm": 1.8328499794006348, "learning_rate": 1.9842909958440552e-05, "loss": 0.7935, "step": 6593 }, { "epoch": 1.0764050446920534, "grad_norm": 1.772855520248413, "learning_rate": 1.9842853927217708e-05, "loss": 0.5776, "step": 6594 }, { "epoch": 1.0765683033345579, "grad_norm": 1.8396480083465576, "learning_rate": 1.984279788608311e-05, "loss": 0.6988, "step": 6595 }, { "epoch": 1.076731561977062, "grad_norm": 2.009335517883301, "learning_rate": 1.9842741835036817e-05, "loss": 0.7981, "step": 6596 }, { "epoch": 1.0768948206195665, "grad_norm": 1.584448218345642, "learning_rate": 1.984268577407889e-05, "loss": 0.67, "step": 6597 }, { "epoch": 1.077058079262071, "grad_norm": 2.006624460220337, "learning_rate": 1.984262970320938e-05, "loss": 0.787, "step": 6598 }, { "epoch": 1.0772213379045754, "grad_norm": 1.5471735000610352, "learning_rate": 1.9842573622428346e-05, "loss": 0.5102, "step": 6599 }, { "epoch": 1.0773845965470796, "grad_norm": 1.5162869691848755, "learning_rate": 1.9842517531735837e-05, "loss": 0.5633, "step": 6600 }, { "epoch": 1.077547855189584, "grad_norm": 1.5768474340438843, "learning_rate": 1.9842461431131922e-05, "loss": 0.6684, "step": 6601 }, { "epoch": 1.0777111138320885, "grad_norm": 1.9167141914367676, "learning_rate": 1.9842405320616647e-05, "loss": 0.7495, "step": 6602 }, { "epoch": 1.077874372474593, "grad_norm": 2.0941808223724365, "learning_rate": 1.9842349200190073e-05, "loss": 0.7309, "step": 6603 }, { "epoch": 1.0780376311170972, "grad_norm": 1.6425756216049194, "learning_rate": 1.9842293069852258e-05, "loss": 0.5606, "step": 6604 }, { "epoch": 1.0782008897596016, "grad_norm": 2.034040689468384, "learning_rate": 1.9842236929603253e-05, "loss": 0.7984, "step": 6605 }, { "epoch": 1.078364148402106, "grad_norm": 1.6111505031585693, "learning_rate": 1.984218077944312e-05, "loss": 0.6204, "step": 6606 }, { "epoch": 1.0785274070446105, "grad_norm": 1.7184796333312988, "learning_rate": 1.9842124619371918e-05, "loss": 0.6918, "step": 6607 }, { "epoch": 1.078690665687115, "grad_norm": 1.7415813207626343, "learning_rate": 1.984206844938969e-05, "loss": 0.6403, "step": 6608 }, { "epoch": 1.0788539243296191, "grad_norm": 1.9236661195755005, "learning_rate": 1.984201226949651e-05, "loss": 0.6855, "step": 6609 }, { "epoch": 1.0790171829721236, "grad_norm": 1.8409168720245361, "learning_rate": 1.984195607969242e-05, "loss": 0.6473, "step": 6610 }, { "epoch": 1.079180441614628, "grad_norm": 1.800718069076538, "learning_rate": 1.9841899879977485e-05, "loss": 0.7693, "step": 6611 }, { "epoch": 1.0793437002571324, "grad_norm": 2.3091940879821777, "learning_rate": 1.9841843670351762e-05, "loss": 0.7447, "step": 6612 }, { "epoch": 1.0795069588996367, "grad_norm": 1.992472529411316, "learning_rate": 1.9841787450815303e-05, "loss": 0.7803, "step": 6613 }, { "epoch": 1.079670217542141, "grad_norm": 2.1091084480285645, "learning_rate": 1.9841731221368166e-05, "loss": 0.7881, "step": 6614 }, { "epoch": 1.0798334761846455, "grad_norm": 1.6654093265533447, "learning_rate": 1.9841674982010408e-05, "loss": 0.6653, "step": 6615 }, { "epoch": 1.07999673482715, "grad_norm": 1.7147135734558105, "learning_rate": 1.984161873274209e-05, "loss": 0.6555, "step": 6616 }, { "epoch": 1.0801599934696542, "grad_norm": 1.780023217201233, "learning_rate": 1.984156247356326e-05, "loss": 0.6606, "step": 6617 }, { "epoch": 1.0803232521121586, "grad_norm": 1.875758171081543, "learning_rate": 1.984150620447398e-05, "loss": 0.6295, "step": 6618 }, { "epoch": 1.080486510754663, "grad_norm": 1.5792806148529053, "learning_rate": 1.9841449925474307e-05, "loss": 0.7325, "step": 6619 }, { "epoch": 1.0806497693971675, "grad_norm": 1.7335821390151978, "learning_rate": 1.9841393636564295e-05, "loss": 0.6596, "step": 6620 }, { "epoch": 1.080813028039672, "grad_norm": 1.9912279844284058, "learning_rate": 1.9841337337744004e-05, "loss": 0.6337, "step": 6621 }, { "epoch": 1.0809762866821762, "grad_norm": 1.6391801834106445, "learning_rate": 1.9841281029013488e-05, "loss": 0.6601, "step": 6622 }, { "epoch": 1.0811395453246806, "grad_norm": 2.2245352268218994, "learning_rate": 1.9841224710372805e-05, "loss": 0.7679, "step": 6623 }, { "epoch": 1.081302803967185, "grad_norm": 1.8162120580673218, "learning_rate": 1.984116838182201e-05, "loss": 0.8066, "step": 6624 }, { "epoch": 1.0814660626096895, "grad_norm": 2.0029609203338623, "learning_rate": 1.984111204336116e-05, "loss": 0.7806, "step": 6625 }, { "epoch": 1.0816293212521937, "grad_norm": 1.8661329746246338, "learning_rate": 1.9841055694990315e-05, "loss": 0.8323, "step": 6626 }, { "epoch": 1.0817925798946981, "grad_norm": 2.160513162612915, "learning_rate": 1.984099933670953e-05, "loss": 0.82, "step": 6627 }, { "epoch": 1.0819558385372026, "grad_norm": 2.184948682785034, "learning_rate": 1.984094296851886e-05, "loss": 0.7703, "step": 6628 }, { "epoch": 1.082119097179707, "grad_norm": 1.5797338485717773, "learning_rate": 1.9840886590418366e-05, "loss": 0.5749, "step": 6629 }, { "epoch": 1.0822823558222114, "grad_norm": 2.1470658779144287, "learning_rate": 1.98408302024081e-05, "loss": 0.7633, "step": 6630 }, { "epoch": 1.0824456144647157, "grad_norm": 1.7773462533950806, "learning_rate": 1.984077380448812e-05, "loss": 0.7237, "step": 6631 }, { "epoch": 1.08260887310722, "grad_norm": 1.9119062423706055, "learning_rate": 1.9840717396658483e-05, "loss": 0.6825, "step": 6632 }, { "epoch": 1.0827721317497245, "grad_norm": 1.8408288955688477, "learning_rate": 1.984066097891925e-05, "loss": 0.614, "step": 6633 }, { "epoch": 1.082935390392229, "grad_norm": 1.7402695417404175, "learning_rate": 1.9840604551270467e-05, "loss": 0.7003, "step": 6634 }, { "epoch": 1.0830986490347332, "grad_norm": 1.747113585472107, "learning_rate": 1.98405481137122e-05, "loss": 0.7841, "step": 6635 }, { "epoch": 1.0832619076772376, "grad_norm": 2.0237865447998047, "learning_rate": 1.9840491666244508e-05, "loss": 0.7104, "step": 6636 }, { "epoch": 1.083425166319742, "grad_norm": 1.9401023387908936, "learning_rate": 1.984043520886744e-05, "loss": 0.8054, "step": 6637 }, { "epoch": 1.0835884249622465, "grad_norm": 1.8557326793670654, "learning_rate": 1.984037874158106e-05, "loss": 0.6534, "step": 6638 }, { "epoch": 1.083751683604751, "grad_norm": 1.9136977195739746, "learning_rate": 1.9840322264385418e-05, "loss": 0.6825, "step": 6639 }, { "epoch": 1.0839149422472552, "grad_norm": 1.8995481729507446, "learning_rate": 1.984026577728057e-05, "loss": 0.7275, "step": 6640 }, { "epoch": 1.0840782008897596, "grad_norm": 1.9766427278518677, "learning_rate": 1.9840209280266585e-05, "loss": 0.6974, "step": 6641 }, { "epoch": 1.084241459532264, "grad_norm": 1.9898711442947388, "learning_rate": 1.9840152773343506e-05, "loss": 0.7818, "step": 6642 }, { "epoch": 1.0844047181747685, "grad_norm": 1.780605435371399, "learning_rate": 1.9840096256511398e-05, "loss": 0.7254, "step": 6643 }, { "epoch": 1.0845679768172727, "grad_norm": 2.2149736881256104, "learning_rate": 1.9840039729770316e-05, "loss": 0.7445, "step": 6644 }, { "epoch": 1.0847312354597771, "grad_norm": 2.1186225414276123, "learning_rate": 1.9839983193120317e-05, "loss": 0.8586, "step": 6645 }, { "epoch": 1.0848944941022816, "grad_norm": 1.6025733947753906, "learning_rate": 1.9839926646561456e-05, "loss": 0.5964, "step": 6646 }, { "epoch": 1.085057752744786, "grad_norm": 1.7277116775512695, "learning_rate": 1.9839870090093793e-05, "loss": 0.706, "step": 6647 }, { "epoch": 1.0852210113872902, "grad_norm": 1.8434275388717651, "learning_rate": 1.9839813523717383e-05, "loss": 0.5565, "step": 6648 }, { "epoch": 1.0853842700297947, "grad_norm": 1.7056304216384888, "learning_rate": 1.9839756947432283e-05, "loss": 0.6498, "step": 6649 }, { "epoch": 1.085547528672299, "grad_norm": 1.8487529754638672, "learning_rate": 1.9839700361238548e-05, "loss": 0.8111, "step": 6650 }, { "epoch": 1.0857107873148035, "grad_norm": 1.73468017578125, "learning_rate": 1.9839643765136242e-05, "loss": 0.6909, "step": 6651 }, { "epoch": 1.085874045957308, "grad_norm": 1.5972963571548462, "learning_rate": 1.9839587159125415e-05, "loss": 0.6562, "step": 6652 }, { "epoch": 1.0860373045998122, "grad_norm": 1.9899357557296753, "learning_rate": 1.9839530543206126e-05, "loss": 0.7233, "step": 6653 }, { "epoch": 1.0862005632423166, "grad_norm": 1.976824164390564, "learning_rate": 1.9839473917378432e-05, "loss": 0.7709, "step": 6654 }, { "epoch": 1.086363821884821, "grad_norm": 1.8326069116592407, "learning_rate": 1.9839417281642394e-05, "loss": 0.7178, "step": 6655 }, { "epoch": 1.0865270805273255, "grad_norm": 1.8693517446517944, "learning_rate": 1.9839360635998062e-05, "loss": 0.6715, "step": 6656 }, { "epoch": 1.0866903391698297, "grad_norm": 1.7195690870285034, "learning_rate": 1.9839303980445498e-05, "loss": 0.5223, "step": 6657 }, { "epoch": 1.0868535978123341, "grad_norm": 2.2858951091766357, "learning_rate": 1.9839247314984756e-05, "loss": 0.8267, "step": 6658 }, { "epoch": 1.0870168564548386, "grad_norm": 1.7932099103927612, "learning_rate": 1.9839190639615894e-05, "loss": 0.6833, "step": 6659 }, { "epoch": 1.087180115097343, "grad_norm": 1.4854425191879272, "learning_rate": 1.9839133954338972e-05, "loss": 0.5713, "step": 6660 }, { "epoch": 1.0873433737398472, "grad_norm": 1.6302744150161743, "learning_rate": 1.983907725915404e-05, "loss": 0.5961, "step": 6661 }, { "epoch": 1.0875066323823517, "grad_norm": 1.8020672798156738, "learning_rate": 1.9839020554061167e-05, "loss": 0.9097, "step": 6662 }, { "epoch": 1.0876698910248561, "grad_norm": 1.9074314832687378, "learning_rate": 1.9838963839060395e-05, "loss": 0.7573, "step": 6663 }, { "epoch": 1.0878331496673606, "grad_norm": 1.7917287349700928, "learning_rate": 1.9838907114151794e-05, "loss": 0.5639, "step": 6664 }, { "epoch": 1.087996408309865, "grad_norm": 1.9362990856170654, "learning_rate": 1.983885037933542e-05, "loss": 0.8913, "step": 6665 }, { "epoch": 1.0881596669523692, "grad_norm": 1.9013866186141968, "learning_rate": 1.983879363461132e-05, "loss": 0.79, "step": 6666 }, { "epoch": 1.0883229255948736, "grad_norm": 1.799142837524414, "learning_rate": 1.983873687997956e-05, "loss": 0.6431, "step": 6667 }, { "epoch": 1.088486184237378, "grad_norm": 1.4831737279891968, "learning_rate": 1.983868011544019e-05, "loss": 0.5704, "step": 6668 }, { "epoch": 1.0886494428798825, "grad_norm": 2.0343286991119385, "learning_rate": 1.983862334099328e-05, "loss": 0.8351, "step": 6669 }, { "epoch": 1.0888127015223867, "grad_norm": 1.5970900058746338, "learning_rate": 1.9838566556638872e-05, "loss": 0.5001, "step": 6670 }, { "epoch": 1.0889759601648912, "grad_norm": 2.1249027252197266, "learning_rate": 1.9838509762377033e-05, "loss": 0.7897, "step": 6671 }, { "epoch": 1.0891392188073956, "grad_norm": 1.9271678924560547, "learning_rate": 1.9838452958207816e-05, "loss": 0.6199, "step": 6672 }, { "epoch": 1.0893024774499, "grad_norm": 2.1807351112365723, "learning_rate": 1.983839614413128e-05, "loss": 1.1245, "step": 6673 }, { "epoch": 1.0894657360924045, "grad_norm": 2.3089358806610107, "learning_rate": 1.9838339320147483e-05, "loss": 0.7157, "step": 6674 }, { "epoch": 1.0896289947349087, "grad_norm": 1.7235743999481201, "learning_rate": 1.983828248625648e-05, "loss": 0.6675, "step": 6675 }, { "epoch": 1.0897922533774131, "grad_norm": 2.100759744644165, "learning_rate": 1.983822564245833e-05, "loss": 0.6916, "step": 6676 }, { "epoch": 1.0899555120199176, "grad_norm": 1.8754830360412598, "learning_rate": 1.9838168788753088e-05, "loss": 0.8093, "step": 6677 }, { "epoch": 1.090118770662422, "grad_norm": 1.998581051826477, "learning_rate": 1.983811192514081e-05, "loss": 0.6973, "step": 6678 }, { "epoch": 1.0902820293049262, "grad_norm": 1.96390962600708, "learning_rate": 1.983805505162156e-05, "loss": 0.7281, "step": 6679 }, { "epoch": 1.0904452879474307, "grad_norm": 1.7440420389175415, "learning_rate": 1.983799816819539e-05, "loss": 0.6581, "step": 6680 }, { "epoch": 1.0906085465899351, "grad_norm": 1.7326900959014893, "learning_rate": 1.9837941274862358e-05, "loss": 0.7065, "step": 6681 }, { "epoch": 1.0907718052324396, "grad_norm": 1.9566411972045898, "learning_rate": 1.9837884371622524e-05, "loss": 0.6938, "step": 6682 }, { "epoch": 1.090935063874944, "grad_norm": 1.9278169870376587, "learning_rate": 1.983782745847594e-05, "loss": 0.7652, "step": 6683 }, { "epoch": 1.0910983225174482, "grad_norm": 1.527809500694275, "learning_rate": 1.9837770535422668e-05, "loss": 0.5814, "step": 6684 }, { "epoch": 1.0912615811599526, "grad_norm": 1.8250073194503784, "learning_rate": 1.9837713602462762e-05, "loss": 0.732, "step": 6685 }, { "epoch": 1.091424839802457, "grad_norm": 1.928114652633667, "learning_rate": 1.9837656659596283e-05, "loss": 0.737, "step": 6686 }, { "epoch": 1.0915880984449615, "grad_norm": 1.8044427633285522, "learning_rate": 1.9837599706823284e-05, "loss": 0.6829, "step": 6687 }, { "epoch": 1.0917513570874657, "grad_norm": 2.185000419616699, "learning_rate": 1.9837542744143827e-05, "loss": 0.7051, "step": 6688 }, { "epoch": 1.0919146157299702, "grad_norm": 1.6885191202163696, "learning_rate": 1.9837485771557968e-05, "loss": 0.5987, "step": 6689 }, { "epoch": 1.0920778743724746, "grad_norm": 1.9911586046218872, "learning_rate": 1.983742878906576e-05, "loss": 0.7108, "step": 6690 }, { "epoch": 1.092241133014979, "grad_norm": 1.8788814544677734, "learning_rate": 1.9837371796667265e-05, "loss": 0.5904, "step": 6691 }, { "epoch": 1.0924043916574833, "grad_norm": 2.096450090408325, "learning_rate": 1.983731479436254e-05, "loss": 0.7639, "step": 6692 }, { "epoch": 1.0925676502999877, "grad_norm": 1.6665771007537842, "learning_rate": 1.9837257782151643e-05, "loss": 0.5899, "step": 6693 }, { "epoch": 1.0927309089424921, "grad_norm": 2.044680118560791, "learning_rate": 1.9837200760034627e-05, "loss": 0.8299, "step": 6694 }, { "epoch": 1.0928941675849966, "grad_norm": 1.6426305770874023, "learning_rate": 1.9837143728011555e-05, "loss": 0.652, "step": 6695 }, { "epoch": 1.093057426227501, "grad_norm": 1.7755138874053955, "learning_rate": 1.983708668608248e-05, "loss": 0.6615, "step": 6696 }, { "epoch": 1.0932206848700052, "grad_norm": 2.2207517623901367, "learning_rate": 1.9837029634247465e-05, "loss": 0.7706, "step": 6697 }, { "epoch": 1.0933839435125097, "grad_norm": 1.7230175733566284, "learning_rate": 1.9836972572506557e-05, "loss": 0.7526, "step": 6698 }, { "epoch": 1.093547202155014, "grad_norm": 1.8972721099853516, "learning_rate": 1.9836915500859825e-05, "loss": 0.7701, "step": 6699 }, { "epoch": 1.0937104607975185, "grad_norm": 1.9653180837631226, "learning_rate": 1.9836858419307325e-05, "loss": 0.7227, "step": 6700 }, { "epoch": 1.0938737194400228, "grad_norm": 2.07397198677063, "learning_rate": 1.9836801327849105e-05, "loss": 0.7456, "step": 6701 }, { "epoch": 1.0940369780825272, "grad_norm": 1.9206992387771606, "learning_rate": 1.9836744226485232e-05, "loss": 0.6984, "step": 6702 }, { "epoch": 1.0942002367250316, "grad_norm": 2.120697259902954, "learning_rate": 1.983668711521576e-05, "loss": 0.8265, "step": 6703 }, { "epoch": 1.094363495367536, "grad_norm": 1.6795170307159424, "learning_rate": 1.983662999404074e-05, "loss": 0.7036, "step": 6704 }, { "epoch": 1.0945267540100403, "grad_norm": 1.9468168020248413, "learning_rate": 1.9836572862960242e-05, "loss": 0.7614, "step": 6705 }, { "epoch": 1.0946900126525447, "grad_norm": 1.7960861921310425, "learning_rate": 1.983651572197432e-05, "loss": 0.7041, "step": 6706 }, { "epoch": 1.0948532712950492, "grad_norm": 1.7694761753082275, "learning_rate": 1.9836458571083027e-05, "loss": 0.7586, "step": 6707 }, { "epoch": 1.0950165299375536, "grad_norm": 1.8289145231246948, "learning_rate": 1.983640141028642e-05, "loss": 0.8302, "step": 6708 }, { "epoch": 1.095179788580058, "grad_norm": 1.9909168481826782, "learning_rate": 1.9836344239584566e-05, "loss": 0.817, "step": 6709 }, { "epoch": 1.0953430472225623, "grad_norm": 1.8028284311294556, "learning_rate": 1.983628705897751e-05, "loss": 0.7386, "step": 6710 }, { "epoch": 1.0955063058650667, "grad_norm": 1.9929534196853638, "learning_rate": 1.9836229868465318e-05, "loss": 0.7484, "step": 6711 }, { "epoch": 1.0956695645075711, "grad_norm": 2.0461676120758057, "learning_rate": 1.9836172668048043e-05, "loss": 1.3219, "step": 6712 }, { "epoch": 1.0958328231500756, "grad_norm": 1.7320629358291626, "learning_rate": 1.9836115457725745e-05, "loss": 0.7366, "step": 6713 }, { "epoch": 1.0959960817925798, "grad_norm": 2.1333844661712646, "learning_rate": 1.9836058237498487e-05, "loss": 1.1913, "step": 6714 }, { "epoch": 1.0961593404350842, "grad_norm": 1.4836969375610352, "learning_rate": 1.9836001007366317e-05, "loss": 0.5272, "step": 6715 }, { "epoch": 1.0963225990775887, "grad_norm": 1.635049819946289, "learning_rate": 1.9835943767329297e-05, "loss": 0.5816, "step": 6716 }, { "epoch": 1.096485857720093, "grad_norm": 1.8632649183273315, "learning_rate": 1.9835886517387483e-05, "loss": 0.7868, "step": 6717 }, { "epoch": 1.0966491163625975, "grad_norm": 1.587315559387207, "learning_rate": 1.9835829257540933e-05, "loss": 0.61, "step": 6718 }, { "epoch": 1.0968123750051018, "grad_norm": 1.6050032377243042, "learning_rate": 1.9835771987789706e-05, "loss": 0.6116, "step": 6719 }, { "epoch": 1.0969756336476062, "grad_norm": 2.281839370727539, "learning_rate": 1.983571470813386e-05, "loss": 0.7951, "step": 6720 }, { "epoch": 1.0971388922901106, "grad_norm": 1.5932621955871582, "learning_rate": 1.9835657418573453e-05, "loss": 0.6, "step": 6721 }, { "epoch": 1.097302150932615, "grad_norm": 1.9839459657669067, "learning_rate": 1.983560011910854e-05, "loss": 0.7756, "step": 6722 }, { "epoch": 1.0974654095751193, "grad_norm": 2.067816734313965, "learning_rate": 1.9835542809739183e-05, "loss": 0.6887, "step": 6723 }, { "epoch": 1.0976286682176237, "grad_norm": 1.8881237506866455, "learning_rate": 1.9835485490465435e-05, "loss": 0.8247, "step": 6724 }, { "epoch": 1.0977919268601282, "grad_norm": 1.849304437637329, "learning_rate": 1.9835428161287355e-05, "loss": 0.6657, "step": 6725 }, { "epoch": 1.0979551855026326, "grad_norm": 1.6956336498260498, "learning_rate": 1.9835370822205e-05, "loss": 0.6685, "step": 6726 }, { "epoch": 1.098118444145137, "grad_norm": 1.6432068347930908, "learning_rate": 1.983531347321843e-05, "loss": 0.6313, "step": 6727 }, { "epoch": 1.0982817027876413, "grad_norm": 1.582260012626648, "learning_rate": 1.9835256114327706e-05, "loss": 0.5925, "step": 6728 }, { "epoch": 1.0984449614301457, "grad_norm": 2.0107574462890625, "learning_rate": 1.983519874553288e-05, "loss": 0.7573, "step": 6729 }, { "epoch": 1.0986082200726501, "grad_norm": 1.5984526872634888, "learning_rate": 1.9835141366834006e-05, "loss": 0.664, "step": 6730 }, { "epoch": 1.0987714787151546, "grad_norm": 1.8791431188583374, "learning_rate": 1.9835083978231157e-05, "loss": 0.626, "step": 6731 }, { "epoch": 1.0989347373576588, "grad_norm": 1.9137969017028809, "learning_rate": 1.9835026579724372e-05, "loss": 0.8716, "step": 6732 }, { "epoch": 1.0990979960001632, "grad_norm": 1.7189158201217651, "learning_rate": 1.9834969171313722e-05, "loss": 0.6836, "step": 6733 }, { "epoch": 1.0992612546426677, "grad_norm": 2.041017532348633, "learning_rate": 1.983491175299926e-05, "loss": 0.7335, "step": 6734 }, { "epoch": 1.099424513285172, "grad_norm": 1.7586238384246826, "learning_rate": 1.9834854324781044e-05, "loss": 0.6964, "step": 6735 }, { "epoch": 1.0995877719276765, "grad_norm": 3.443878412246704, "learning_rate": 1.9834796886659135e-05, "loss": 0.8846, "step": 6736 }, { "epoch": 1.0997510305701808, "grad_norm": 1.6668941974639893, "learning_rate": 1.9834739438633584e-05, "loss": 0.5762, "step": 6737 }, { "epoch": 1.0999142892126852, "grad_norm": 2.1670210361480713, "learning_rate": 1.9834681980704456e-05, "loss": 0.7511, "step": 6738 }, { "epoch": 1.1000775478551896, "grad_norm": 1.9482485055923462, "learning_rate": 1.9834624512871806e-05, "loss": 0.7927, "step": 6739 }, { "epoch": 1.100240806497694, "grad_norm": 1.5745545625686646, "learning_rate": 1.983456703513569e-05, "loss": 0.5912, "step": 6740 }, { "epoch": 1.1004040651401983, "grad_norm": 1.7891712188720703, "learning_rate": 1.9834509547496167e-05, "loss": 0.719, "step": 6741 }, { "epoch": 1.1005673237827027, "grad_norm": 1.8192144632339478, "learning_rate": 1.98344520499533e-05, "loss": 0.7566, "step": 6742 }, { "epoch": 1.1007305824252072, "grad_norm": 1.8048319816589355, "learning_rate": 1.9834394542507138e-05, "loss": 0.7487, "step": 6743 }, { "epoch": 1.1008938410677116, "grad_norm": 2.1811366081237793, "learning_rate": 1.9834337025157745e-05, "loss": 0.7314, "step": 6744 }, { "epoch": 1.1010570997102158, "grad_norm": 1.6925514936447144, "learning_rate": 1.9834279497905177e-05, "loss": 0.6426, "step": 6745 }, { "epoch": 1.1012203583527203, "grad_norm": 1.8848170042037964, "learning_rate": 1.983422196074949e-05, "loss": 0.6891, "step": 6746 }, { "epoch": 1.1013836169952247, "grad_norm": 1.9195700883865356, "learning_rate": 1.9834164413690748e-05, "loss": 0.794, "step": 6747 }, { "epoch": 1.1015468756377291, "grad_norm": 2.074894428253174, "learning_rate": 1.9834106856729e-05, "loss": 0.8428, "step": 6748 }, { "epoch": 1.1017101342802333, "grad_norm": 2.0318331718444824, "learning_rate": 1.983404928986431e-05, "loss": 0.8508, "step": 6749 }, { "epoch": 1.1018733929227378, "grad_norm": 2.1480116844177246, "learning_rate": 1.9833991713096742e-05, "loss": 0.7183, "step": 6750 }, { "epoch": 1.1020366515652422, "grad_norm": 2.1867852210998535, "learning_rate": 1.9833934126426338e-05, "loss": 0.7751, "step": 6751 }, { "epoch": 1.1021999102077467, "grad_norm": 2.1268882751464844, "learning_rate": 1.983387652985317e-05, "loss": 0.7264, "step": 6752 }, { "epoch": 1.102363168850251, "grad_norm": 1.6672708988189697, "learning_rate": 1.9833818923377293e-05, "loss": 0.7489, "step": 6753 }, { "epoch": 1.1025264274927553, "grad_norm": 1.8561550378799438, "learning_rate": 1.9833761306998757e-05, "loss": 0.7351, "step": 6754 }, { "epoch": 1.1026896861352598, "grad_norm": 1.5503875017166138, "learning_rate": 1.983370368071763e-05, "loss": 0.6543, "step": 6755 }, { "epoch": 1.1028529447777642, "grad_norm": 2.2268285751342773, "learning_rate": 1.9833646044533962e-05, "loss": 0.756, "step": 6756 }, { "epoch": 1.1030162034202686, "grad_norm": 1.775679349899292, "learning_rate": 1.9833588398447822e-05, "loss": 0.579, "step": 6757 }, { "epoch": 1.1031794620627728, "grad_norm": 1.82843816280365, "learning_rate": 1.9833530742459253e-05, "loss": 0.6761, "step": 6758 }, { "epoch": 1.1033427207052773, "grad_norm": 1.8779449462890625, "learning_rate": 1.9833473076568328e-05, "loss": 0.7181, "step": 6759 }, { "epoch": 1.1035059793477817, "grad_norm": 2.030198574066162, "learning_rate": 1.9833415400775092e-05, "loss": 0.817, "step": 6760 }, { "epoch": 1.1036692379902862, "grad_norm": 1.7051976919174194, "learning_rate": 1.9833357715079615e-05, "loss": 0.7074, "step": 6761 }, { "epoch": 1.1038324966327906, "grad_norm": 1.8799012899398804, "learning_rate": 1.9833300019481946e-05, "loss": 0.6918, "step": 6762 }, { "epoch": 1.1039957552752948, "grad_norm": 1.8130028247833252, "learning_rate": 1.9833242313982147e-05, "loss": 0.6648, "step": 6763 }, { "epoch": 1.1041590139177992, "grad_norm": 1.9656734466552734, "learning_rate": 1.983318459858028e-05, "loss": 0.7135, "step": 6764 }, { "epoch": 1.1043222725603037, "grad_norm": 1.6402291059494019, "learning_rate": 1.9833126873276392e-05, "loss": 0.8452, "step": 6765 }, { "epoch": 1.1044855312028081, "grad_norm": 1.6347730159759521, "learning_rate": 1.983306913807055e-05, "loss": 0.627, "step": 6766 }, { "epoch": 1.1046487898453123, "grad_norm": 1.9236478805541992, "learning_rate": 1.983301139296281e-05, "loss": 0.6807, "step": 6767 }, { "epoch": 1.1048120484878168, "grad_norm": 2.181328773498535, "learning_rate": 1.983295363795323e-05, "loss": 0.8568, "step": 6768 }, { "epoch": 1.1049753071303212, "grad_norm": 1.7246285676956177, "learning_rate": 1.983289587304187e-05, "loss": 0.5648, "step": 6769 }, { "epoch": 1.1051385657728257, "grad_norm": 1.7487621307373047, "learning_rate": 1.9832838098228786e-05, "loss": 0.696, "step": 6770 }, { "epoch": 1.10530182441533, "grad_norm": 1.9745725393295288, "learning_rate": 1.9832780313514036e-05, "loss": 0.753, "step": 6771 }, { "epoch": 1.1054650830578343, "grad_norm": 1.7327139377593994, "learning_rate": 1.983272251889768e-05, "loss": 0.7353, "step": 6772 }, { "epoch": 1.1056283417003387, "grad_norm": 1.7728241682052612, "learning_rate": 1.9832664714379774e-05, "loss": 0.7146, "step": 6773 }, { "epoch": 1.1057916003428432, "grad_norm": 1.7190454006195068, "learning_rate": 1.9832606899960377e-05, "loss": 0.6562, "step": 6774 }, { "epoch": 1.1059548589853476, "grad_norm": 1.6471067667007446, "learning_rate": 1.983254907563955e-05, "loss": 0.6505, "step": 6775 }, { "epoch": 1.1061181176278518, "grad_norm": 1.693933129310608, "learning_rate": 1.9832491241417345e-05, "loss": 0.5357, "step": 6776 }, { "epoch": 1.1062813762703563, "grad_norm": 1.8552799224853516, "learning_rate": 1.9832433397293825e-05, "loss": 0.7356, "step": 6777 }, { "epoch": 1.1064446349128607, "grad_norm": 1.868455410003662, "learning_rate": 1.9832375543269048e-05, "loss": 0.6095, "step": 6778 }, { "epoch": 1.1066078935553652, "grad_norm": 1.9555726051330566, "learning_rate": 1.983231767934307e-05, "loss": 0.8013, "step": 6779 }, { "epoch": 1.1067711521978696, "grad_norm": 1.9471238851547241, "learning_rate": 1.9832259805515954e-05, "loss": 0.7786, "step": 6780 }, { "epoch": 1.1069344108403738, "grad_norm": 2.1935322284698486, "learning_rate": 1.983220192178775e-05, "loss": 0.7145, "step": 6781 }, { "epoch": 1.1070976694828782, "grad_norm": 1.7586286067962646, "learning_rate": 1.9832144028158523e-05, "loss": 0.5735, "step": 6782 }, { "epoch": 1.1072609281253827, "grad_norm": 1.8207159042358398, "learning_rate": 1.9832086124628333e-05, "loss": 0.7068, "step": 6783 }, { "epoch": 1.1074241867678871, "grad_norm": 1.9500941038131714, "learning_rate": 1.983202821119723e-05, "loss": 0.6724, "step": 6784 }, { "epoch": 1.1075874454103913, "grad_norm": 1.7856190204620361, "learning_rate": 1.983197028786528e-05, "loss": 0.7329, "step": 6785 }, { "epoch": 1.1077507040528958, "grad_norm": 1.9484974145889282, "learning_rate": 1.9831912354632537e-05, "loss": 0.9002, "step": 6786 }, { "epoch": 1.1079139626954002, "grad_norm": 2.2160184383392334, "learning_rate": 1.983185441149906e-05, "loss": 0.7222, "step": 6787 }, { "epoch": 1.1080772213379046, "grad_norm": 1.9722622632980347, "learning_rate": 1.9831796458464915e-05, "loss": 0.7333, "step": 6788 }, { "epoch": 1.1082404799804089, "grad_norm": 1.6124356985092163, "learning_rate": 1.9831738495530147e-05, "loss": 0.6508, "step": 6789 }, { "epoch": 1.1084037386229133, "grad_norm": 2.224576950073242, "learning_rate": 1.9831680522694823e-05, "loss": 1.2893, "step": 6790 }, { "epoch": 1.1085669972654177, "grad_norm": 1.9055266380310059, "learning_rate": 1.9831622539958996e-05, "loss": 0.7919, "step": 6791 }, { "epoch": 1.1087302559079222, "grad_norm": 1.8393490314483643, "learning_rate": 1.9831564547322733e-05, "loss": 0.7646, "step": 6792 }, { "epoch": 1.1088935145504264, "grad_norm": 1.7349016666412354, "learning_rate": 1.9831506544786087e-05, "loss": 0.8456, "step": 6793 }, { "epoch": 1.1090567731929308, "grad_norm": 1.774179220199585, "learning_rate": 1.983144853234911e-05, "loss": 0.7852, "step": 6794 }, { "epoch": 1.1092200318354353, "grad_norm": 1.796922206878662, "learning_rate": 1.9831390510011874e-05, "loss": 0.7277, "step": 6795 }, { "epoch": 1.1093832904779397, "grad_norm": 1.7811744213104248, "learning_rate": 1.9831332477774428e-05, "loss": 0.7266, "step": 6796 }, { "epoch": 1.1095465491204441, "grad_norm": 1.5025882720947266, "learning_rate": 1.983127443563683e-05, "loss": 0.6427, "step": 6797 }, { "epoch": 1.1097098077629484, "grad_norm": 1.9513026475906372, "learning_rate": 1.9831216383599146e-05, "loss": 0.8602, "step": 6798 }, { "epoch": 1.1098730664054528, "grad_norm": 1.953722596168518, "learning_rate": 1.9831158321661425e-05, "loss": 0.7889, "step": 6799 }, { "epoch": 1.1100363250479572, "grad_norm": 1.7625430822372437, "learning_rate": 1.9831100249823732e-05, "loss": 0.6398, "step": 6800 }, { "epoch": 1.1101995836904617, "grad_norm": 1.7166260480880737, "learning_rate": 1.9831042168086125e-05, "loss": 0.6793, "step": 6801 }, { "epoch": 1.110362842332966, "grad_norm": 1.7962063550949097, "learning_rate": 1.983098407644866e-05, "loss": 0.6801, "step": 6802 }, { "epoch": 1.1105261009754703, "grad_norm": 1.418904185295105, "learning_rate": 1.98309259749114e-05, "loss": 0.5075, "step": 6803 }, { "epoch": 1.1106893596179748, "grad_norm": 1.7665963172912598, "learning_rate": 1.9830867863474395e-05, "loss": 0.6998, "step": 6804 }, { "epoch": 1.1108526182604792, "grad_norm": 2.005910634994507, "learning_rate": 1.983080974213771e-05, "loss": 0.7852, "step": 6805 }, { "epoch": 1.1110158769029836, "grad_norm": 1.6989368200302124, "learning_rate": 1.9830751610901404e-05, "loss": 0.6165, "step": 6806 }, { "epoch": 1.1111791355454879, "grad_norm": 1.8590635061264038, "learning_rate": 1.9830693469765534e-05, "loss": 0.7582, "step": 6807 }, { "epoch": 1.1113423941879923, "grad_norm": 2.2276952266693115, "learning_rate": 1.9830635318730155e-05, "loss": 0.7856, "step": 6808 }, { "epoch": 1.1115056528304967, "grad_norm": 1.7470234632492065, "learning_rate": 1.9830577157795333e-05, "loss": 0.5781, "step": 6809 }, { "epoch": 1.1116689114730012, "grad_norm": 1.6071053743362427, "learning_rate": 1.983051898696112e-05, "loss": 0.6398, "step": 6810 }, { "epoch": 1.1118321701155054, "grad_norm": 1.5759453773498535, "learning_rate": 1.9830460806227575e-05, "loss": 0.7185, "step": 6811 }, { "epoch": 1.1119954287580098, "grad_norm": 1.892674446105957, "learning_rate": 1.9830402615594765e-05, "loss": 0.698, "step": 6812 }, { "epoch": 1.1121586874005143, "grad_norm": 2.023233652114868, "learning_rate": 1.9830344415062735e-05, "loss": 0.7472, "step": 6813 }, { "epoch": 1.1123219460430187, "grad_norm": 1.8775781393051147, "learning_rate": 1.9830286204631556e-05, "loss": 0.7512, "step": 6814 }, { "epoch": 1.1124852046855231, "grad_norm": 2.041203498840332, "learning_rate": 1.9830227984301276e-05, "loss": 0.7348, "step": 6815 }, { "epoch": 1.1126484633280274, "grad_norm": 1.5735390186309814, "learning_rate": 1.9830169754071963e-05, "loss": 0.7381, "step": 6816 }, { "epoch": 1.1128117219705318, "grad_norm": 1.788425326347351, "learning_rate": 1.9830111513943673e-05, "loss": 0.7612, "step": 6817 }, { "epoch": 1.1129749806130362, "grad_norm": 1.8437559604644775, "learning_rate": 1.983005326391646e-05, "loss": 0.78, "step": 6818 }, { "epoch": 1.1131382392555407, "grad_norm": 1.8134492635726929, "learning_rate": 1.9829995003990387e-05, "loss": 0.7635, "step": 6819 }, { "epoch": 1.113301497898045, "grad_norm": 1.636628270149231, "learning_rate": 1.9829936734165512e-05, "loss": 0.7704, "step": 6820 }, { "epoch": 1.1134647565405493, "grad_norm": 1.6763769388198853, "learning_rate": 1.982987845444189e-05, "loss": 0.6359, "step": 6821 }, { "epoch": 1.1136280151830538, "grad_norm": 1.7487847805023193, "learning_rate": 1.982982016481959e-05, "loss": 0.8518, "step": 6822 }, { "epoch": 1.1137912738255582, "grad_norm": 2.1973586082458496, "learning_rate": 1.9829761865298658e-05, "loss": 0.7173, "step": 6823 }, { "epoch": 1.1139545324680626, "grad_norm": 1.5976512432098389, "learning_rate": 1.982970355587916e-05, "loss": 0.5756, "step": 6824 }, { "epoch": 1.1141177911105669, "grad_norm": 1.6528211832046509, "learning_rate": 1.9829645236561154e-05, "loss": 0.6819, "step": 6825 }, { "epoch": 1.1142810497530713, "grad_norm": 1.726807713508606, "learning_rate": 1.9829586907344697e-05, "loss": 0.7371, "step": 6826 }, { "epoch": 1.1144443083955757, "grad_norm": 1.6804795265197754, "learning_rate": 1.982952856822985e-05, "loss": 0.5118, "step": 6827 }, { "epoch": 1.1146075670380802, "grad_norm": 1.7619603872299194, "learning_rate": 1.982947021921667e-05, "loss": 0.6357, "step": 6828 }, { "epoch": 1.1147708256805844, "grad_norm": 1.529826045036316, "learning_rate": 1.9829411860305215e-05, "loss": 0.5842, "step": 6829 }, { "epoch": 1.1149340843230888, "grad_norm": 1.8984102010726929, "learning_rate": 1.9829353491495545e-05, "loss": 0.7354, "step": 6830 }, { "epoch": 1.1150973429655933, "grad_norm": 1.5339477062225342, "learning_rate": 1.982929511278772e-05, "loss": 0.6427, "step": 6831 }, { "epoch": 1.1152606016080977, "grad_norm": 1.8459383249282837, "learning_rate": 1.9829236724181794e-05, "loss": 0.7263, "step": 6832 }, { "epoch": 1.115423860250602, "grad_norm": 1.7333714962005615, "learning_rate": 1.9829178325677832e-05, "loss": 0.6706, "step": 6833 }, { "epoch": 1.1155871188931064, "grad_norm": 1.841983437538147, "learning_rate": 1.9829119917275887e-05, "loss": 0.8563, "step": 6834 }, { "epoch": 1.1157503775356108, "grad_norm": 1.87663733959198, "learning_rate": 1.9829061498976026e-05, "loss": 0.6598, "step": 6835 }, { "epoch": 1.1159136361781152, "grad_norm": 1.8269145488739014, "learning_rate": 1.9829003070778298e-05, "loss": 0.6476, "step": 6836 }, { "epoch": 1.1160768948206197, "grad_norm": 1.9679805040359497, "learning_rate": 1.982894463268277e-05, "loss": 0.7917, "step": 6837 }, { "epoch": 1.1162401534631239, "grad_norm": 2.2173662185668945, "learning_rate": 1.9828886184689494e-05, "loss": 0.7781, "step": 6838 }, { "epoch": 1.1164034121056283, "grad_norm": 2.1593503952026367, "learning_rate": 1.9828827726798538e-05, "loss": 0.8614, "step": 6839 }, { "epoch": 1.1165666707481328, "grad_norm": 2.1898624897003174, "learning_rate": 1.9828769259009947e-05, "loss": 0.6893, "step": 6840 }, { "epoch": 1.1167299293906372, "grad_norm": 1.8274704217910767, "learning_rate": 1.9828710781323793e-05, "loss": 0.6357, "step": 6841 }, { "epoch": 1.1168931880331414, "grad_norm": 1.6828792095184326, "learning_rate": 1.982865229374013e-05, "loss": 0.5303, "step": 6842 }, { "epoch": 1.1170564466756459, "grad_norm": 1.3662415742874146, "learning_rate": 1.9828593796259013e-05, "loss": 0.4519, "step": 6843 }, { "epoch": 1.1172197053181503, "grad_norm": 1.6215028762817383, "learning_rate": 1.982853528888051e-05, "loss": 0.6232, "step": 6844 }, { "epoch": 1.1173829639606547, "grad_norm": 1.80733323097229, "learning_rate": 1.9828476771604673e-05, "loss": 0.7416, "step": 6845 }, { "epoch": 1.117546222603159, "grad_norm": 2.053267478942871, "learning_rate": 1.982841824443156e-05, "loss": 0.6888, "step": 6846 }, { "epoch": 1.1177094812456634, "grad_norm": 2.2380573749542236, "learning_rate": 1.9828359707361232e-05, "loss": 0.8681, "step": 6847 }, { "epoch": 1.1178727398881678, "grad_norm": 1.7616380453109741, "learning_rate": 1.9828301160393753e-05, "loss": 0.5855, "step": 6848 }, { "epoch": 1.1180359985306723, "grad_norm": 2.110002040863037, "learning_rate": 1.9828242603529175e-05, "loss": 0.8064, "step": 6849 }, { "epoch": 1.1181992571731767, "grad_norm": 1.8278428316116333, "learning_rate": 1.9828184036767556e-05, "loss": 0.6246, "step": 6850 }, { "epoch": 1.118362515815681, "grad_norm": 1.9694550037384033, "learning_rate": 1.9828125460108964e-05, "loss": 0.6068, "step": 6851 }, { "epoch": 1.1185257744581854, "grad_norm": 2.0617997646331787, "learning_rate": 1.982806687355345e-05, "loss": 0.6433, "step": 6852 }, { "epoch": 1.1186890331006898, "grad_norm": 2.086246967315674, "learning_rate": 1.9828008277101075e-05, "loss": 0.8958, "step": 6853 }, { "epoch": 1.1188522917431942, "grad_norm": 2.010230541229248, "learning_rate": 1.9827949670751897e-05, "loss": 0.7442, "step": 6854 }, { "epoch": 1.1190155503856984, "grad_norm": 1.8758224248886108, "learning_rate": 1.9827891054505976e-05, "loss": 0.7477, "step": 6855 }, { "epoch": 1.1191788090282029, "grad_norm": 2.02659010887146, "learning_rate": 1.9827832428363373e-05, "loss": 0.7538, "step": 6856 }, { "epoch": 1.1193420676707073, "grad_norm": 2.0248045921325684, "learning_rate": 1.9827773792324146e-05, "loss": 0.8045, "step": 6857 }, { "epoch": 1.1195053263132118, "grad_norm": 1.6483783721923828, "learning_rate": 1.982771514638835e-05, "loss": 0.6714, "step": 6858 }, { "epoch": 1.1196685849557162, "grad_norm": 2.2637393474578857, "learning_rate": 1.982765649055605e-05, "loss": 0.898, "step": 6859 }, { "epoch": 1.1198318435982204, "grad_norm": 2.0730416774749756, "learning_rate": 1.9827597824827306e-05, "loss": 0.7739, "step": 6860 }, { "epoch": 1.1199951022407248, "grad_norm": 1.6173166036605835, "learning_rate": 1.982753914920217e-05, "loss": 0.5648, "step": 6861 }, { "epoch": 1.1201583608832293, "grad_norm": 1.6421947479248047, "learning_rate": 1.98274804636807e-05, "loss": 0.6001, "step": 6862 }, { "epoch": 1.1203216195257337, "grad_norm": 1.5277462005615234, "learning_rate": 1.9827421768262966e-05, "loss": 0.6189, "step": 6863 }, { "epoch": 1.120484878168238, "grad_norm": 2.0833053588867188, "learning_rate": 1.982736306294902e-05, "loss": 0.7544, "step": 6864 }, { "epoch": 1.1206481368107424, "grad_norm": 1.8574639558792114, "learning_rate": 1.982730434773892e-05, "loss": 0.5558, "step": 6865 }, { "epoch": 1.1208113954532468, "grad_norm": 1.820709228515625, "learning_rate": 1.982724562263273e-05, "loss": 0.6608, "step": 6866 }, { "epoch": 1.1209746540957513, "grad_norm": 1.5128601789474487, "learning_rate": 1.9827186887630505e-05, "loss": 0.5032, "step": 6867 }, { "epoch": 1.1211379127382557, "grad_norm": 2.0339748859405518, "learning_rate": 1.9827128142732304e-05, "loss": 0.7245, "step": 6868 }, { "epoch": 1.12130117138076, "grad_norm": 1.9182184934616089, "learning_rate": 1.9827069387938187e-05, "loss": 0.7249, "step": 6869 }, { "epoch": 1.1214644300232643, "grad_norm": 1.9852222204208374, "learning_rate": 1.9827010623248217e-05, "loss": 0.7908, "step": 6870 }, { "epoch": 1.1216276886657688, "grad_norm": 2.0301201343536377, "learning_rate": 1.9826951848662447e-05, "loss": 0.7463, "step": 6871 }, { "epoch": 1.1217909473082732, "grad_norm": 2.267519235610962, "learning_rate": 1.9826893064180942e-05, "loss": 1.2568, "step": 6872 }, { "epoch": 1.1219542059507774, "grad_norm": 2.128164529800415, "learning_rate": 1.9826834269803756e-05, "loss": 0.897, "step": 6873 }, { "epoch": 1.1221174645932819, "grad_norm": 1.8943778276443481, "learning_rate": 1.982677546553095e-05, "loss": 0.6516, "step": 6874 }, { "epoch": 1.1222807232357863, "grad_norm": 1.9815822839736938, "learning_rate": 1.9826716651362585e-05, "loss": 0.7964, "step": 6875 }, { "epoch": 1.1224439818782908, "grad_norm": 1.4860695600509644, "learning_rate": 1.982665782729872e-05, "loss": 0.6318, "step": 6876 }, { "epoch": 1.122607240520795, "grad_norm": 2.015270471572876, "learning_rate": 1.9826598993339412e-05, "loss": 0.7627, "step": 6877 }, { "epoch": 1.1227704991632994, "grad_norm": 1.7218314409255981, "learning_rate": 1.982654014948472e-05, "loss": 0.5873, "step": 6878 }, { "epoch": 1.1229337578058038, "grad_norm": 1.775505781173706, "learning_rate": 1.9826481295734708e-05, "loss": 0.6014, "step": 6879 }, { "epoch": 1.1230970164483083, "grad_norm": 1.2582354545593262, "learning_rate": 1.982642243208943e-05, "loss": 0.4573, "step": 6880 }, { "epoch": 1.1232602750908127, "grad_norm": 1.7087342739105225, "learning_rate": 1.9826363558548947e-05, "loss": 0.6562, "step": 6881 }, { "epoch": 1.123423533733317, "grad_norm": 1.7182787656784058, "learning_rate": 1.982630467511332e-05, "loss": 0.6444, "step": 6882 }, { "epoch": 1.1235867923758214, "grad_norm": 1.9483979940414429, "learning_rate": 1.9826245781782604e-05, "loss": 0.7652, "step": 6883 }, { "epoch": 1.1237500510183258, "grad_norm": 2.0538694858551025, "learning_rate": 1.9826186878556862e-05, "loss": 0.8064, "step": 6884 }, { "epoch": 1.1239133096608303, "grad_norm": 1.6622323989868164, "learning_rate": 1.9826127965436153e-05, "loss": 0.698, "step": 6885 }, { "epoch": 1.1240765683033345, "grad_norm": 1.5758603811264038, "learning_rate": 1.9826069042420537e-05, "loss": 0.7471, "step": 6886 }, { "epoch": 1.124239826945839, "grad_norm": 2.111633539199829, "learning_rate": 1.982601010951007e-05, "loss": 0.7788, "step": 6887 }, { "epoch": 1.1244030855883433, "grad_norm": 1.8844319581985474, "learning_rate": 1.9825951166704814e-05, "loss": 0.7439, "step": 6888 }, { "epoch": 1.1245663442308478, "grad_norm": 1.818794846534729, "learning_rate": 1.982589221400483e-05, "loss": 0.6433, "step": 6889 }, { "epoch": 1.124729602873352, "grad_norm": 1.547567367553711, "learning_rate": 1.9825833251410173e-05, "loss": 0.6391, "step": 6890 }, { "epoch": 1.1248928615158564, "grad_norm": 1.7218223810195923, "learning_rate": 1.9825774278920904e-05, "loss": 0.7365, "step": 6891 }, { "epoch": 1.1250561201583609, "grad_norm": 1.700149655342102, "learning_rate": 1.9825715296537083e-05, "loss": 0.6649, "step": 6892 }, { "epoch": 1.1252193788008653, "grad_norm": 1.910664439201355, "learning_rate": 1.982565630425877e-05, "loss": 0.6421, "step": 6893 }, { "epoch": 1.1253826374433697, "grad_norm": 1.937506079673767, "learning_rate": 1.9825597302086024e-05, "loss": 0.6958, "step": 6894 }, { "epoch": 1.125545896085874, "grad_norm": 1.831484079360962, "learning_rate": 1.9825538290018903e-05, "loss": 0.7641, "step": 6895 }, { "epoch": 1.1257091547283784, "grad_norm": 1.9198724031448364, "learning_rate": 1.982547926805747e-05, "loss": 0.768, "step": 6896 }, { "epoch": 1.1258724133708828, "grad_norm": 1.6930948495864868, "learning_rate": 1.982542023620178e-05, "loss": 0.6376, "step": 6897 }, { "epoch": 1.1260356720133873, "grad_norm": 1.803470492362976, "learning_rate": 1.9825361194451895e-05, "loss": 0.7914, "step": 6898 }, { "epoch": 1.1261989306558915, "grad_norm": 1.538540244102478, "learning_rate": 1.982530214280787e-05, "loss": 0.571, "step": 6899 }, { "epoch": 1.126362189298396, "grad_norm": 1.7105906009674072, "learning_rate": 1.9825243081269778e-05, "loss": 0.6593, "step": 6900 }, { "epoch": 1.1265254479409004, "grad_norm": 1.6625746488571167, "learning_rate": 1.982518400983766e-05, "loss": 0.6883, "step": 6901 }, { "epoch": 1.1266887065834048, "grad_norm": 1.6381312608718872, "learning_rate": 1.9825124928511588e-05, "loss": 0.7135, "step": 6902 }, { "epoch": 1.1268519652259092, "grad_norm": 1.7359404563903809, "learning_rate": 1.9825065837291616e-05, "loss": 0.7304, "step": 6903 }, { "epoch": 1.1270152238684135, "grad_norm": 2.1568503379821777, "learning_rate": 1.982500673617781e-05, "loss": 0.7397, "step": 6904 }, { "epoch": 1.127178482510918, "grad_norm": 1.876868486404419, "learning_rate": 1.9824947625170216e-05, "loss": 0.7381, "step": 6905 }, { "epoch": 1.1273417411534223, "grad_norm": 1.8094137907028198, "learning_rate": 1.982488850426891e-05, "loss": 0.7562, "step": 6906 }, { "epoch": 1.1275049997959268, "grad_norm": 1.7405377626419067, "learning_rate": 1.9824829373473943e-05, "loss": 0.6241, "step": 6907 }, { "epoch": 1.127668258438431, "grad_norm": 1.9996654987335205, "learning_rate": 1.982477023278537e-05, "loss": 0.7316, "step": 6908 }, { "epoch": 1.1278315170809354, "grad_norm": 2.032029867172241, "learning_rate": 1.982471108220326e-05, "loss": 0.6398, "step": 6909 }, { "epoch": 1.1279947757234399, "grad_norm": 1.5801384449005127, "learning_rate": 1.982465192172767e-05, "loss": 0.5926, "step": 6910 }, { "epoch": 1.1281580343659443, "grad_norm": 1.6958065032958984, "learning_rate": 1.9824592751358656e-05, "loss": 0.5564, "step": 6911 }, { "epoch": 1.1283212930084487, "grad_norm": 1.8347259759902954, "learning_rate": 1.9824533571096278e-05, "loss": 0.6802, "step": 6912 }, { "epoch": 1.128484551650953, "grad_norm": 1.6899250745773315, "learning_rate": 1.9824474380940598e-05, "loss": 0.5774, "step": 6913 }, { "epoch": 1.1286478102934574, "grad_norm": 1.5960321426391602, "learning_rate": 1.982441518089168e-05, "loss": 0.6177, "step": 6914 }, { "epoch": 1.1288110689359618, "grad_norm": 1.7834609746932983, "learning_rate": 1.9824355970949574e-05, "loss": 0.752, "step": 6915 }, { "epoch": 1.1289743275784663, "grad_norm": 2.014601469039917, "learning_rate": 1.9824296751114345e-05, "loss": 0.8105, "step": 6916 }, { "epoch": 1.1291375862209705, "grad_norm": 1.9496270418167114, "learning_rate": 1.9824237521386052e-05, "loss": 0.7297, "step": 6917 }, { "epoch": 1.129300844863475, "grad_norm": 1.708036184310913, "learning_rate": 1.9824178281764753e-05, "loss": 0.6581, "step": 6918 }, { "epoch": 1.1294641035059794, "grad_norm": 1.8401415348052979, "learning_rate": 1.982411903225051e-05, "loss": 0.6348, "step": 6919 }, { "epoch": 1.1296273621484838, "grad_norm": 1.9307568073272705, "learning_rate": 1.982405977284338e-05, "loss": 0.7795, "step": 6920 }, { "epoch": 1.1297906207909882, "grad_norm": 2.1299707889556885, "learning_rate": 1.9824000503543427e-05, "loss": 0.7177, "step": 6921 }, { "epoch": 1.1299538794334925, "grad_norm": 1.9480713605880737, "learning_rate": 1.982394122435071e-05, "loss": 0.7472, "step": 6922 }, { "epoch": 1.130117138075997, "grad_norm": 2.2438302040100098, "learning_rate": 1.9823881935265283e-05, "loss": 0.5738, "step": 6923 }, { "epoch": 1.1302803967185013, "grad_norm": 1.8637434244155884, "learning_rate": 1.982382263628721e-05, "loss": 0.6903, "step": 6924 }, { "epoch": 1.1304436553610056, "grad_norm": 1.7981104850769043, "learning_rate": 1.982376332741655e-05, "loss": 0.6454, "step": 6925 }, { "epoch": 1.13060691400351, "grad_norm": 1.9311376810073853, "learning_rate": 1.9823704008653365e-05, "loss": 0.6819, "step": 6926 }, { "epoch": 1.1307701726460144, "grad_norm": 1.835029125213623, "learning_rate": 1.9823644679997713e-05, "loss": 0.666, "step": 6927 }, { "epoch": 1.1309334312885189, "grad_norm": 1.8951245546340942, "learning_rate": 1.982358534144965e-05, "loss": 0.6749, "step": 6928 }, { "epoch": 1.1310966899310233, "grad_norm": 1.8287206888198853, "learning_rate": 1.9823525993009243e-05, "loss": 0.6901, "step": 6929 }, { "epoch": 1.1312599485735275, "grad_norm": 2.0255959033966064, "learning_rate": 1.9823466634676544e-05, "loss": 0.69, "step": 6930 }, { "epoch": 1.131423207216032, "grad_norm": 1.9068493843078613, "learning_rate": 1.982340726645162e-05, "loss": 0.6113, "step": 6931 }, { "epoch": 1.1315864658585364, "grad_norm": 1.5973666906356812, "learning_rate": 1.9823347888334527e-05, "loss": 0.6205, "step": 6932 }, { "epoch": 1.1317497245010408, "grad_norm": 2.0450289249420166, "learning_rate": 1.9823288500325324e-05, "loss": 0.7166, "step": 6933 }, { "epoch": 1.131912983143545, "grad_norm": 2.0293185710906982, "learning_rate": 1.9823229102424074e-05, "loss": 0.7862, "step": 6934 }, { "epoch": 1.1320762417860495, "grad_norm": 1.8720734119415283, "learning_rate": 1.9823169694630834e-05, "loss": 0.688, "step": 6935 }, { "epoch": 1.132239500428554, "grad_norm": 2.0822865962982178, "learning_rate": 1.9823110276945663e-05, "loss": 0.6992, "step": 6936 }, { "epoch": 1.1324027590710584, "grad_norm": 2.3082008361816406, "learning_rate": 1.9823050849368624e-05, "loss": 0.8385, "step": 6937 }, { "epoch": 1.1325660177135628, "grad_norm": 1.9729644060134888, "learning_rate": 1.9822991411899774e-05, "loss": 0.7139, "step": 6938 }, { "epoch": 1.132729276356067, "grad_norm": 1.766169548034668, "learning_rate": 1.9822931964539176e-05, "loss": 0.6878, "step": 6939 }, { "epoch": 1.1328925349985715, "grad_norm": 1.7650502920150757, "learning_rate": 1.982287250728689e-05, "loss": 0.6276, "step": 6940 }, { "epoch": 1.133055793641076, "grad_norm": 1.562251091003418, "learning_rate": 1.982281304014297e-05, "loss": 0.6002, "step": 6941 }, { "epoch": 1.1332190522835803, "grad_norm": 1.899066686630249, "learning_rate": 1.982275356310748e-05, "loss": 0.6751, "step": 6942 }, { "epoch": 1.1333823109260845, "grad_norm": 1.3933321237564087, "learning_rate": 1.9822694076180486e-05, "loss": 0.506, "step": 6943 }, { "epoch": 1.133545569568589, "grad_norm": 1.668020248413086, "learning_rate": 1.9822634579362034e-05, "loss": 0.6858, "step": 6944 }, { "epoch": 1.1337088282110934, "grad_norm": 2.193957805633545, "learning_rate": 1.9822575072652195e-05, "loss": 0.8343, "step": 6945 }, { "epoch": 1.1338720868535979, "grad_norm": 2.141026020050049, "learning_rate": 1.9822515556051024e-05, "loss": 0.7218, "step": 6946 }, { "epoch": 1.1340353454961023, "grad_norm": 2.370328664779663, "learning_rate": 1.9822456029558582e-05, "loss": 0.8837, "step": 6947 }, { "epoch": 1.1341986041386065, "grad_norm": 1.751165509223938, "learning_rate": 1.9822396493174933e-05, "loss": 0.7019, "step": 6948 }, { "epoch": 1.134361862781111, "grad_norm": 2.104771375656128, "learning_rate": 1.982233694690013e-05, "loss": 0.8495, "step": 6949 }, { "epoch": 1.1345251214236154, "grad_norm": 1.864200472831726, "learning_rate": 1.982227739073424e-05, "loss": 0.7545, "step": 6950 }, { "epoch": 1.1346883800661198, "grad_norm": 1.9873508214950562, "learning_rate": 1.9822217824677313e-05, "loss": 0.6727, "step": 6951 }, { "epoch": 1.134851638708624, "grad_norm": 1.7002352476119995, "learning_rate": 1.9822158248729422e-05, "loss": 0.6552, "step": 6952 }, { "epoch": 1.1350148973511285, "grad_norm": 1.9444044828414917, "learning_rate": 1.9822098662890616e-05, "loss": 0.7116, "step": 6953 }, { "epoch": 1.135178155993633, "grad_norm": 1.8556920289993286, "learning_rate": 1.9822039067160962e-05, "loss": 0.6784, "step": 6954 }, { "epoch": 1.1353414146361374, "grad_norm": 1.7590546607971191, "learning_rate": 1.9821979461540515e-05, "loss": 0.6829, "step": 6955 }, { "epoch": 1.1355046732786418, "grad_norm": 1.465651512145996, "learning_rate": 1.9821919846029338e-05, "loss": 0.5867, "step": 6956 }, { "epoch": 1.135667931921146, "grad_norm": 1.775283932685852, "learning_rate": 1.982186022062749e-05, "loss": 0.6874, "step": 6957 }, { "epoch": 1.1358311905636505, "grad_norm": 1.9980140924453735, "learning_rate": 1.982180058533503e-05, "loss": 0.7226, "step": 6958 }, { "epoch": 1.1359944492061549, "grad_norm": 1.4668790102005005, "learning_rate": 1.9821740940152022e-05, "loss": 0.6143, "step": 6959 }, { "epoch": 1.1361577078486593, "grad_norm": 2.04335355758667, "learning_rate": 1.9821681285078522e-05, "loss": 0.7531, "step": 6960 }, { "epoch": 1.1363209664911635, "grad_norm": 1.9430917501449585, "learning_rate": 1.9821621620114594e-05, "loss": 0.7174, "step": 6961 }, { "epoch": 1.136484225133668, "grad_norm": 1.930712342262268, "learning_rate": 1.9821561945260292e-05, "loss": 0.7249, "step": 6962 }, { "epoch": 1.1366474837761724, "grad_norm": 1.9937719106674194, "learning_rate": 1.982150226051568e-05, "loss": 0.6685, "step": 6963 }, { "epoch": 1.1368107424186769, "grad_norm": 1.8762524127960205, "learning_rate": 1.9821442565880823e-05, "loss": 0.7068, "step": 6964 }, { "epoch": 1.1369740010611813, "grad_norm": 1.6878684759140015, "learning_rate": 1.982138286135577e-05, "loss": 0.6735, "step": 6965 }, { "epoch": 1.1371372597036855, "grad_norm": 1.6689066886901855, "learning_rate": 1.982132314694059e-05, "loss": 0.7429, "step": 6966 }, { "epoch": 1.13730051834619, "grad_norm": 1.8950519561767578, "learning_rate": 1.982126342263534e-05, "loss": 0.749, "step": 6967 }, { "epoch": 1.1374637769886944, "grad_norm": 1.7202976942062378, "learning_rate": 1.982120368844008e-05, "loss": 0.6911, "step": 6968 }, { "epoch": 1.1376270356311986, "grad_norm": 1.565660834312439, "learning_rate": 1.982114394435487e-05, "loss": 0.6263, "step": 6969 }, { "epoch": 1.137790294273703, "grad_norm": 1.8304948806762695, "learning_rate": 1.982108419037977e-05, "loss": 0.6627, "step": 6970 }, { "epoch": 1.1379535529162075, "grad_norm": 1.6983762979507446, "learning_rate": 1.9821024426514843e-05, "loss": 0.6886, "step": 6971 }, { "epoch": 1.138116811558712, "grad_norm": 1.974568486213684, "learning_rate": 1.9820964652760147e-05, "loss": 0.6252, "step": 6972 }, { "epoch": 1.1382800702012164, "grad_norm": 1.5335415601730347, "learning_rate": 1.982090486911574e-05, "loss": 0.4782, "step": 6973 }, { "epoch": 1.1384433288437206, "grad_norm": 2.145669460296631, "learning_rate": 1.9820845075581686e-05, "loss": 0.793, "step": 6974 }, { "epoch": 1.138606587486225, "grad_norm": 1.9396700859069824, "learning_rate": 1.9820785272158043e-05, "loss": 0.8486, "step": 6975 }, { "epoch": 1.1387698461287294, "grad_norm": 1.717542290687561, "learning_rate": 1.9820725458844873e-05, "loss": 0.6977, "step": 6976 }, { "epoch": 1.1389331047712339, "grad_norm": 1.8559787273406982, "learning_rate": 1.982066563564223e-05, "loss": 0.6823, "step": 6977 }, { "epoch": 1.139096363413738, "grad_norm": 1.7434706687927246, "learning_rate": 1.9820605802550187e-05, "loss": 0.6563, "step": 6978 }, { "epoch": 1.1392596220562425, "grad_norm": 1.943930745124817, "learning_rate": 1.9820545959568793e-05, "loss": 0.87, "step": 6979 }, { "epoch": 1.139422880698747, "grad_norm": 1.7395485639572144, "learning_rate": 1.982048610669811e-05, "loss": 0.6553, "step": 6980 }, { "epoch": 1.1395861393412514, "grad_norm": 2.004188060760498, "learning_rate": 1.9820426243938203e-05, "loss": 0.7147, "step": 6981 }, { "epoch": 1.1397493979837559, "grad_norm": 1.7561771869659424, "learning_rate": 1.9820366371289128e-05, "loss": 0.7855, "step": 6982 }, { "epoch": 1.13991265662626, "grad_norm": 1.744246006011963, "learning_rate": 1.9820306488750947e-05, "loss": 0.7072, "step": 6983 }, { "epoch": 1.1400759152687645, "grad_norm": 1.7740131616592407, "learning_rate": 1.982024659632372e-05, "loss": 0.6253, "step": 6984 }, { "epoch": 1.140239173911269, "grad_norm": 2.2510316371917725, "learning_rate": 1.9820186694007506e-05, "loss": 0.6562, "step": 6985 }, { "epoch": 1.1404024325537734, "grad_norm": 1.735775113105774, "learning_rate": 1.9820126781802365e-05, "loss": 0.6647, "step": 6986 }, { "epoch": 1.1405656911962776, "grad_norm": 1.6217494010925293, "learning_rate": 1.9820066859708366e-05, "loss": 0.6782, "step": 6987 }, { "epoch": 1.140728949838782, "grad_norm": 1.8319271802902222, "learning_rate": 1.9820006927725558e-05, "loss": 0.6714, "step": 6988 }, { "epoch": 1.1408922084812865, "grad_norm": 1.9567840099334717, "learning_rate": 1.9819946985854003e-05, "loss": 0.5958, "step": 6989 }, { "epoch": 1.141055467123791, "grad_norm": 1.9703575372695923, "learning_rate": 1.9819887034093768e-05, "loss": 0.715, "step": 6990 }, { "epoch": 1.1412187257662953, "grad_norm": 2.015397310256958, "learning_rate": 1.9819827072444905e-05, "loss": 0.8536, "step": 6991 }, { "epoch": 1.1413819844087996, "grad_norm": 2.131084442138672, "learning_rate": 1.9819767100907485e-05, "loss": 0.9671, "step": 6992 }, { "epoch": 1.141545243051304, "grad_norm": 1.6590217351913452, "learning_rate": 1.9819707119481558e-05, "loss": 0.6252, "step": 6993 }, { "epoch": 1.1417085016938084, "grad_norm": 1.8757551908493042, "learning_rate": 1.981964712816719e-05, "loss": 0.6873, "step": 6994 }, { "epoch": 1.1418717603363129, "grad_norm": 1.8032195568084717, "learning_rate": 1.981958712696444e-05, "loss": 0.719, "step": 6995 }, { "epoch": 1.142035018978817, "grad_norm": 1.5081963539123535, "learning_rate": 1.9819527115873365e-05, "loss": 0.6256, "step": 6996 }, { "epoch": 1.1421982776213215, "grad_norm": 2.0163052082061768, "learning_rate": 1.981946709489403e-05, "loss": 0.768, "step": 6997 }, { "epoch": 1.142361536263826, "grad_norm": 1.6024318933486938, "learning_rate": 1.9819407064026497e-05, "loss": 0.5547, "step": 6998 }, { "epoch": 1.1425247949063304, "grad_norm": 1.6843394041061401, "learning_rate": 1.9819347023270825e-05, "loss": 0.6469, "step": 6999 }, { "epoch": 1.1426880535488348, "grad_norm": 1.9185224771499634, "learning_rate": 1.9819286972627066e-05, "loss": 0.7551, "step": 7000 }, { "epoch": 1.142851312191339, "grad_norm": 1.907623052597046, "learning_rate": 1.9819226912095296e-05, "loss": 0.7487, "step": 7001 }, { "epoch": 1.1430145708338435, "grad_norm": 1.6331384181976318, "learning_rate": 1.981916684167556e-05, "loss": 0.6189, "step": 7002 }, { "epoch": 1.143177829476348, "grad_norm": 1.7796825170516968, "learning_rate": 1.981910676136793e-05, "loss": 0.7001, "step": 7003 }, { "epoch": 1.1433410881188524, "grad_norm": 1.947471261024475, "learning_rate": 1.9819046671172462e-05, "loss": 0.6548, "step": 7004 }, { "epoch": 1.1435043467613566, "grad_norm": 1.510225534439087, "learning_rate": 1.9818986571089213e-05, "loss": 0.6269, "step": 7005 }, { "epoch": 1.143667605403861, "grad_norm": 2.231996536254883, "learning_rate": 1.9818926461118254e-05, "loss": 0.6445, "step": 7006 }, { "epoch": 1.1438308640463655, "grad_norm": 1.8167731761932373, "learning_rate": 1.981886634125963e-05, "loss": 0.746, "step": 7007 }, { "epoch": 1.14399412268887, "grad_norm": 1.9367519617080688, "learning_rate": 1.9818806211513414e-05, "loss": 0.773, "step": 7008 }, { "epoch": 1.1441573813313743, "grad_norm": 1.8689509630203247, "learning_rate": 1.9818746071879666e-05, "loss": 0.7417, "step": 7009 }, { "epoch": 1.1443206399738786, "grad_norm": 1.78598153591156, "learning_rate": 1.981868592235844e-05, "loss": 0.6231, "step": 7010 }, { "epoch": 1.144483898616383, "grad_norm": 2.1730446815490723, "learning_rate": 1.98186257629498e-05, "loss": 0.8011, "step": 7011 }, { "epoch": 1.1446471572588874, "grad_norm": 1.4840019941329956, "learning_rate": 1.981856559365381e-05, "loss": 0.6577, "step": 7012 }, { "epoch": 1.1448104159013919, "grad_norm": 1.9703127145767212, "learning_rate": 1.981850541447052e-05, "loss": 0.8455, "step": 7013 }, { "epoch": 1.144973674543896, "grad_norm": 1.7421993017196655, "learning_rate": 1.9818445225400004e-05, "loss": 0.6875, "step": 7014 }, { "epoch": 1.1451369331864005, "grad_norm": 1.710767388343811, "learning_rate": 1.9818385026442314e-05, "loss": 0.6877, "step": 7015 }, { "epoch": 1.145300191828905, "grad_norm": 1.7985020875930786, "learning_rate": 1.981832481759751e-05, "loss": 0.6734, "step": 7016 }, { "epoch": 1.1454634504714094, "grad_norm": 1.6702628135681152, "learning_rate": 1.981826459886566e-05, "loss": 0.6299, "step": 7017 }, { "epoch": 1.1456267091139136, "grad_norm": 1.8287038803100586, "learning_rate": 1.981820437024682e-05, "loss": 0.6199, "step": 7018 }, { "epoch": 1.145789967756418, "grad_norm": 1.996075987815857, "learning_rate": 1.981814413174105e-05, "loss": 0.7018, "step": 7019 }, { "epoch": 1.1459532263989225, "grad_norm": 1.8047541379928589, "learning_rate": 1.981808388334841e-05, "loss": 0.6946, "step": 7020 }, { "epoch": 1.146116485041427, "grad_norm": 1.895222783088684, "learning_rate": 1.981802362506896e-05, "loss": 0.7188, "step": 7021 }, { "epoch": 1.1462797436839312, "grad_norm": 1.7218928337097168, "learning_rate": 1.9817963356902768e-05, "loss": 0.6843, "step": 7022 }, { "epoch": 1.1464430023264356, "grad_norm": 2.0415101051330566, "learning_rate": 1.9817903078849884e-05, "loss": 0.706, "step": 7023 }, { "epoch": 1.14660626096894, "grad_norm": 2.0164825916290283, "learning_rate": 1.981784279091038e-05, "loss": 0.7478, "step": 7024 }, { "epoch": 1.1467695196114445, "grad_norm": 1.7963260412216187, "learning_rate": 1.981778249308431e-05, "loss": 0.7191, "step": 7025 }, { "epoch": 1.146932778253949, "grad_norm": 1.9597198963165283, "learning_rate": 1.9817722185371733e-05, "loss": 0.6932, "step": 7026 }, { "epoch": 1.1470960368964531, "grad_norm": 2.1291117668151855, "learning_rate": 1.981766186777271e-05, "loss": 0.828, "step": 7027 }, { "epoch": 1.1472592955389576, "grad_norm": 1.851850152015686, "learning_rate": 1.981760154028731e-05, "loss": 0.6523, "step": 7028 }, { "epoch": 1.147422554181462, "grad_norm": 1.885074496269226, "learning_rate": 1.9817541202915586e-05, "loss": 0.6985, "step": 7029 }, { "epoch": 1.1475858128239664, "grad_norm": 1.7407574653625488, "learning_rate": 1.98174808556576e-05, "loss": 0.6714, "step": 7030 }, { "epoch": 1.1477490714664706, "grad_norm": 2.3079099655151367, "learning_rate": 1.981742049851341e-05, "loss": 0.8315, "step": 7031 }, { "epoch": 1.147912330108975, "grad_norm": 2.117048501968384, "learning_rate": 1.9817360131483086e-05, "loss": 0.8309, "step": 7032 }, { "epoch": 1.1480755887514795, "grad_norm": 1.8486849069595337, "learning_rate": 1.981729975456668e-05, "loss": 0.7698, "step": 7033 }, { "epoch": 1.148238847393984, "grad_norm": 1.8877975940704346, "learning_rate": 1.9817239367764257e-05, "loss": 0.7791, "step": 7034 }, { "epoch": 1.1484021060364884, "grad_norm": 2.020747184753418, "learning_rate": 1.981717897107587e-05, "loss": 0.74, "step": 7035 }, { "epoch": 1.1485653646789926, "grad_norm": 2.266770601272583, "learning_rate": 1.9817118564501597e-05, "loss": 0.7876, "step": 7036 }, { "epoch": 1.148728623321497, "grad_norm": 1.847443699836731, "learning_rate": 1.981705814804148e-05, "loss": 0.7213, "step": 7037 }, { "epoch": 1.1488918819640015, "grad_norm": 1.7767583131790161, "learning_rate": 1.9816997721695593e-05, "loss": 0.7806, "step": 7038 }, { "epoch": 1.149055140606506, "grad_norm": 1.6770697832107544, "learning_rate": 1.9816937285463992e-05, "loss": 0.6639, "step": 7039 }, { "epoch": 1.1492183992490101, "grad_norm": 1.8690392971038818, "learning_rate": 1.9816876839346734e-05, "loss": 0.8025, "step": 7040 }, { "epoch": 1.1493816578915146, "grad_norm": 2.367093324661255, "learning_rate": 1.9816816383343886e-05, "loss": 0.607, "step": 7041 }, { "epoch": 1.149544916534019, "grad_norm": 1.757157325744629, "learning_rate": 1.9816755917455507e-05, "loss": 0.663, "step": 7042 }, { "epoch": 1.1497081751765235, "grad_norm": 1.756466269493103, "learning_rate": 1.9816695441681653e-05, "loss": 0.858, "step": 7043 }, { "epoch": 1.149871433819028, "grad_norm": 1.6544139385223389, "learning_rate": 1.9816634956022397e-05, "loss": 0.7734, "step": 7044 }, { "epoch": 1.1500346924615321, "grad_norm": 1.8360061645507812, "learning_rate": 1.9816574460477788e-05, "loss": 0.7147, "step": 7045 }, { "epoch": 1.1501979511040366, "grad_norm": 1.7487989664077759, "learning_rate": 1.9816513955047888e-05, "loss": 0.6487, "step": 7046 }, { "epoch": 1.150361209746541, "grad_norm": 2.075815439224243, "learning_rate": 1.9816453439732764e-05, "loss": 0.7866, "step": 7047 }, { "epoch": 1.1505244683890454, "grad_norm": 1.8106496334075928, "learning_rate": 1.9816392914532475e-05, "loss": 0.7671, "step": 7048 }, { "epoch": 1.1506877270315496, "grad_norm": 1.978122591972351, "learning_rate": 1.981633237944708e-05, "loss": 0.6661, "step": 7049 }, { "epoch": 1.150850985674054, "grad_norm": 1.87139093875885, "learning_rate": 1.9816271834476642e-05, "loss": 0.7516, "step": 7050 }, { "epoch": 1.1510142443165585, "grad_norm": 2.12522292137146, "learning_rate": 1.981621127962122e-05, "loss": 0.73, "step": 7051 }, { "epoch": 1.151177502959063, "grad_norm": 1.7314891815185547, "learning_rate": 1.9816150714880874e-05, "loss": 0.7127, "step": 7052 }, { "epoch": 1.1513407616015674, "grad_norm": 1.875504970550537, "learning_rate": 1.9816090140255667e-05, "loss": 0.7278, "step": 7053 }, { "epoch": 1.1515040202440716, "grad_norm": 1.7297595739364624, "learning_rate": 1.9816029555745663e-05, "loss": 0.719, "step": 7054 }, { "epoch": 1.151667278886576, "grad_norm": 1.7305926084518433, "learning_rate": 1.9815968961350916e-05, "loss": 0.7204, "step": 7055 }, { "epoch": 1.1518305375290805, "grad_norm": 2.2047476768493652, "learning_rate": 1.9815908357071496e-05, "loss": 0.8031, "step": 7056 }, { "epoch": 1.151993796171585, "grad_norm": 1.5716166496276855, "learning_rate": 1.9815847742907458e-05, "loss": 0.6744, "step": 7057 }, { "epoch": 1.1521570548140891, "grad_norm": 1.672675609588623, "learning_rate": 1.9815787118858857e-05, "loss": 0.6443, "step": 7058 }, { "epoch": 1.1523203134565936, "grad_norm": 1.9502196311950684, "learning_rate": 1.9815726484925768e-05, "loss": 0.7718, "step": 7059 }, { "epoch": 1.152483572099098, "grad_norm": 1.7567880153656006, "learning_rate": 1.981566584110824e-05, "loss": 0.6525, "step": 7060 }, { "epoch": 1.1526468307416025, "grad_norm": 1.9837549924850464, "learning_rate": 1.9815605187406345e-05, "loss": 0.7317, "step": 7061 }, { "epoch": 1.152810089384107, "grad_norm": 1.8186602592468262, "learning_rate": 1.9815544523820134e-05, "loss": 0.7096, "step": 7062 }, { "epoch": 1.1529733480266111, "grad_norm": 1.7165849208831787, "learning_rate": 1.9815483850349675e-05, "loss": 0.7509, "step": 7063 }, { "epoch": 1.1531366066691155, "grad_norm": 1.9084129333496094, "learning_rate": 1.9815423166995025e-05, "loss": 0.7224, "step": 7064 }, { "epoch": 1.15329986531162, "grad_norm": 2.2137844562530518, "learning_rate": 1.9815362473756247e-05, "loss": 0.8647, "step": 7065 }, { "epoch": 1.1534631239541242, "grad_norm": 2.0009689331054688, "learning_rate": 1.98153017706334e-05, "loss": 0.7212, "step": 7066 }, { "epoch": 1.1536263825966286, "grad_norm": 1.6575508117675781, "learning_rate": 1.9815241057626547e-05, "loss": 0.746, "step": 7067 }, { "epoch": 1.153789641239133, "grad_norm": 1.8437013626098633, "learning_rate": 1.981518033473575e-05, "loss": 0.7092, "step": 7068 }, { "epoch": 1.1539528998816375, "grad_norm": 1.8319963216781616, "learning_rate": 1.981511960196107e-05, "loss": 0.7405, "step": 7069 }, { "epoch": 1.154116158524142, "grad_norm": 1.8586317300796509, "learning_rate": 1.9815058859302563e-05, "loss": 0.668, "step": 7070 }, { "epoch": 1.1542794171666462, "grad_norm": 1.9352737665176392, "learning_rate": 1.9814998106760297e-05, "loss": 0.8728, "step": 7071 }, { "epoch": 1.1544426758091506, "grad_norm": 1.8377000093460083, "learning_rate": 1.981493734433433e-05, "loss": 0.6248, "step": 7072 }, { "epoch": 1.154605934451655, "grad_norm": 1.7349605560302734, "learning_rate": 1.9814876572024727e-05, "loss": 0.7057, "step": 7073 }, { "epoch": 1.1547691930941595, "grad_norm": 2.0657670497894287, "learning_rate": 1.981481578983154e-05, "loss": 0.6793, "step": 7074 }, { "epoch": 1.1549324517366637, "grad_norm": 1.988625168800354, "learning_rate": 1.981475499775484e-05, "loss": 0.7845, "step": 7075 }, { "epoch": 1.1550957103791681, "grad_norm": 1.8430049419403076, "learning_rate": 1.9814694195794683e-05, "loss": 0.7203, "step": 7076 }, { "epoch": 1.1552589690216726, "grad_norm": 1.4854589700698853, "learning_rate": 1.9814633383951133e-05, "loss": 0.5439, "step": 7077 }, { "epoch": 1.155422227664177, "grad_norm": 1.7490549087524414, "learning_rate": 1.981457256222425e-05, "loss": 0.5914, "step": 7078 }, { "epoch": 1.1555854863066815, "grad_norm": 2.2118566036224365, "learning_rate": 1.981451173061409e-05, "loss": 0.678, "step": 7079 }, { "epoch": 1.1557487449491857, "grad_norm": 1.812168002128601, "learning_rate": 1.9814450889120725e-05, "loss": 0.6848, "step": 7080 }, { "epoch": 1.15591200359169, "grad_norm": 1.582257628440857, "learning_rate": 1.981439003774421e-05, "loss": 0.7262, "step": 7081 }, { "epoch": 1.1560752622341945, "grad_norm": 1.7081013917922974, "learning_rate": 1.9814329176484604e-05, "loss": 0.6169, "step": 7082 }, { "epoch": 1.156238520876699, "grad_norm": 1.8678871393203735, "learning_rate": 1.9814268305341974e-05, "loss": 0.6994, "step": 7083 }, { "epoch": 1.1564017795192032, "grad_norm": 1.8009268045425415, "learning_rate": 1.9814207424316378e-05, "loss": 0.6198, "step": 7084 }, { "epoch": 1.1565650381617076, "grad_norm": 1.7349498271942139, "learning_rate": 1.9814146533407875e-05, "loss": 0.6545, "step": 7085 }, { "epoch": 1.156728296804212, "grad_norm": 1.4686235189437866, "learning_rate": 1.981408563261653e-05, "loss": 0.5118, "step": 7086 }, { "epoch": 1.1568915554467165, "grad_norm": 2.028930902481079, "learning_rate": 1.9814024721942403e-05, "loss": 0.7321, "step": 7087 }, { "epoch": 1.157054814089221, "grad_norm": 1.8459935188293457, "learning_rate": 1.9813963801385558e-05, "loss": 0.6153, "step": 7088 }, { "epoch": 1.1572180727317252, "grad_norm": 2.0305066108703613, "learning_rate": 1.9813902870946055e-05, "loss": 0.7784, "step": 7089 }, { "epoch": 1.1573813313742296, "grad_norm": 1.7402966022491455, "learning_rate": 1.981384193062395e-05, "loss": 0.6681, "step": 7090 }, { "epoch": 1.157544590016734, "grad_norm": 1.8188283443450928, "learning_rate": 1.981378098041931e-05, "loss": 0.6946, "step": 7091 }, { "epoch": 1.1577078486592385, "grad_norm": 2.270312786102295, "learning_rate": 1.98137200203322e-05, "loss": 0.8649, "step": 7092 }, { "epoch": 1.1578711073017427, "grad_norm": 1.8433678150177002, "learning_rate": 1.981365905036267e-05, "loss": 0.8585, "step": 7093 }, { "epoch": 1.1580343659442471, "grad_norm": 2.6229636669158936, "learning_rate": 1.981359807051079e-05, "loss": 0.8618, "step": 7094 }, { "epoch": 1.1581976245867516, "grad_norm": 2.3593556880950928, "learning_rate": 1.981353708077662e-05, "loss": 0.9448, "step": 7095 }, { "epoch": 1.158360883229256, "grad_norm": 1.933457374572754, "learning_rate": 1.981347608116022e-05, "loss": 0.7786, "step": 7096 }, { "epoch": 1.1585241418717604, "grad_norm": 1.6249339580535889, "learning_rate": 1.9813415071661657e-05, "loss": 0.5231, "step": 7097 }, { "epoch": 1.1586874005142647, "grad_norm": 1.7684881687164307, "learning_rate": 1.981335405228098e-05, "loss": 0.6819, "step": 7098 }, { "epoch": 1.158850659156769, "grad_norm": 2.005373239517212, "learning_rate": 1.9813293023018266e-05, "loss": 0.7291, "step": 7099 }, { "epoch": 1.1590139177992735, "grad_norm": 1.5159815549850464, "learning_rate": 1.9813231983873563e-05, "loss": 0.6002, "step": 7100 }, { "epoch": 1.159177176441778, "grad_norm": 1.6615958213806152, "learning_rate": 1.9813170934846937e-05, "loss": 0.6358, "step": 7101 }, { "epoch": 1.1593404350842822, "grad_norm": 1.8222267627716064, "learning_rate": 1.9813109875938455e-05, "loss": 0.7597, "step": 7102 }, { "epoch": 1.1595036937267866, "grad_norm": 1.5389870405197144, "learning_rate": 1.981304880714817e-05, "loss": 0.7253, "step": 7103 }, { "epoch": 1.159666952369291, "grad_norm": 1.5359859466552734, "learning_rate": 1.981298772847615e-05, "loss": 0.7347, "step": 7104 }, { "epoch": 1.1598302110117955, "grad_norm": 1.9134548902511597, "learning_rate": 1.981292663992245e-05, "loss": 0.7431, "step": 7105 }, { "epoch": 1.1599934696543, "grad_norm": 1.759498119354248, "learning_rate": 1.9812865541487142e-05, "loss": 0.6685, "step": 7106 }, { "epoch": 1.1601567282968042, "grad_norm": 1.4908102750778198, "learning_rate": 1.9812804433170276e-05, "loss": 0.5898, "step": 7107 }, { "epoch": 1.1603199869393086, "grad_norm": 1.699192762374878, "learning_rate": 1.981274331497192e-05, "loss": 0.6301, "step": 7108 }, { "epoch": 1.160483245581813, "grad_norm": 2.5019733905792236, "learning_rate": 1.9812682186892136e-05, "loss": 0.8335, "step": 7109 }, { "epoch": 1.1606465042243173, "grad_norm": 1.8765603303909302, "learning_rate": 1.981262104893098e-05, "loss": 0.7594, "step": 7110 }, { "epoch": 1.1608097628668217, "grad_norm": 1.539469838142395, "learning_rate": 1.9812559901088518e-05, "loss": 0.5843, "step": 7111 }, { "epoch": 1.1609730215093261, "grad_norm": 2.524442434310913, "learning_rate": 1.9812498743364814e-05, "loss": 0.8364, "step": 7112 }, { "epoch": 1.1611362801518306, "grad_norm": 1.8964955806732178, "learning_rate": 1.9812437575759924e-05, "loss": 0.7686, "step": 7113 }, { "epoch": 1.161299538794335, "grad_norm": 1.8319218158721924, "learning_rate": 1.9812376398273914e-05, "loss": 0.7677, "step": 7114 }, { "epoch": 1.1614627974368392, "grad_norm": 2.158236265182495, "learning_rate": 1.981231521090684e-05, "loss": 0.7319, "step": 7115 }, { "epoch": 1.1616260560793437, "grad_norm": 2.2673230171203613, "learning_rate": 1.981225401365877e-05, "loss": 0.8539, "step": 7116 }, { "epoch": 1.161789314721848, "grad_norm": 1.8791080713272095, "learning_rate": 1.981219280652976e-05, "loss": 0.7454, "step": 7117 }, { "epoch": 1.1619525733643525, "grad_norm": 1.9308931827545166, "learning_rate": 1.9812131589519876e-05, "loss": 0.7057, "step": 7118 }, { "epoch": 1.1621158320068568, "grad_norm": 1.5517247915267944, "learning_rate": 1.981207036262918e-05, "loss": 0.5228, "step": 7119 }, { "epoch": 1.1622790906493612, "grad_norm": 1.773578405380249, "learning_rate": 1.981200912585773e-05, "loss": 0.6027, "step": 7120 }, { "epoch": 1.1624423492918656, "grad_norm": 1.693326473236084, "learning_rate": 1.981194787920559e-05, "loss": 0.7101, "step": 7121 }, { "epoch": 1.16260560793437, "grad_norm": 1.614210605621338, "learning_rate": 1.981188662267282e-05, "loss": 0.5765, "step": 7122 }, { "epoch": 1.1627688665768745, "grad_norm": 1.8770637512207031, "learning_rate": 1.9811825356259483e-05, "loss": 0.6339, "step": 7123 }, { "epoch": 1.1629321252193787, "grad_norm": 1.9384770393371582, "learning_rate": 1.981176407996564e-05, "loss": 0.6792, "step": 7124 }, { "epoch": 1.1630953838618832, "grad_norm": 2.167144775390625, "learning_rate": 1.9811702793791357e-05, "loss": 0.6558, "step": 7125 }, { "epoch": 1.1632586425043876, "grad_norm": 2.067514419555664, "learning_rate": 1.9811641497736686e-05, "loss": 0.8768, "step": 7126 }, { "epoch": 1.163421901146892, "grad_norm": 1.8030084371566772, "learning_rate": 1.9811580191801697e-05, "loss": 0.6911, "step": 7127 }, { "epoch": 1.1635851597893963, "grad_norm": 2.123981475830078, "learning_rate": 1.9811518875986452e-05, "loss": 0.6905, "step": 7128 }, { "epoch": 1.1637484184319007, "grad_norm": 1.6889708042144775, "learning_rate": 1.9811457550291008e-05, "loss": 0.604, "step": 7129 }, { "epoch": 1.1639116770744051, "grad_norm": 1.786833643913269, "learning_rate": 1.981139621471543e-05, "loss": 0.7208, "step": 7130 }, { "epoch": 1.1640749357169096, "grad_norm": 2.3827736377716064, "learning_rate": 1.9811334869259774e-05, "loss": 0.688, "step": 7131 }, { "epoch": 1.164238194359414, "grad_norm": 1.7948240041732788, "learning_rate": 1.9811273513924112e-05, "loss": 0.6452, "step": 7132 }, { "epoch": 1.1644014530019182, "grad_norm": 1.8083299398422241, "learning_rate": 1.9811212148708496e-05, "loss": 0.6758, "step": 7133 }, { "epoch": 1.1645647116444227, "grad_norm": 1.6124064922332764, "learning_rate": 1.9811150773612996e-05, "loss": 0.6251, "step": 7134 }, { "epoch": 1.164727970286927, "grad_norm": 1.8798433542251587, "learning_rate": 1.9811089388637667e-05, "loss": 0.6362, "step": 7135 }, { "epoch": 1.1648912289294315, "grad_norm": 1.766593098640442, "learning_rate": 1.9811027993782575e-05, "loss": 0.7453, "step": 7136 }, { "epoch": 1.1650544875719357, "grad_norm": 1.8099079132080078, "learning_rate": 1.981096658904778e-05, "loss": 0.705, "step": 7137 }, { "epoch": 1.1652177462144402, "grad_norm": 1.97365403175354, "learning_rate": 1.981090517443334e-05, "loss": 0.7224, "step": 7138 }, { "epoch": 1.1653810048569446, "grad_norm": 1.5255546569824219, "learning_rate": 1.9810843749939327e-05, "loss": 0.614, "step": 7139 }, { "epoch": 1.165544263499449, "grad_norm": 2.3167102336883545, "learning_rate": 1.9810782315565795e-05, "loss": 0.7847, "step": 7140 }, { "epoch": 1.1657075221419535, "grad_norm": 1.9067007303237915, "learning_rate": 1.9810720871312806e-05, "loss": 0.5857, "step": 7141 }, { "epoch": 1.1658707807844577, "grad_norm": 1.5282741785049438, "learning_rate": 1.9810659417180428e-05, "loss": 0.6213, "step": 7142 }, { "epoch": 1.1660340394269622, "grad_norm": 1.8993463516235352, "learning_rate": 1.9810597953168715e-05, "loss": 0.7016, "step": 7143 }, { "epoch": 1.1661972980694666, "grad_norm": 2.0074756145477295, "learning_rate": 1.9810536479277735e-05, "loss": 0.8246, "step": 7144 }, { "epoch": 1.166360556711971, "grad_norm": 1.938750147819519, "learning_rate": 1.9810474995507545e-05, "loss": 0.7049, "step": 7145 }, { "epoch": 1.1665238153544752, "grad_norm": 1.690386414527893, "learning_rate": 1.981041350185821e-05, "loss": 0.5837, "step": 7146 }, { "epoch": 1.1666870739969797, "grad_norm": 1.9899473190307617, "learning_rate": 1.981035199832979e-05, "loss": 0.6591, "step": 7147 }, { "epoch": 1.1668503326394841, "grad_norm": 1.6416652202606201, "learning_rate": 1.9810290484922352e-05, "loss": 0.7038, "step": 7148 }, { "epoch": 1.1670135912819886, "grad_norm": 1.7029551267623901, "learning_rate": 1.981022896163595e-05, "loss": 0.6901, "step": 7149 }, { "epoch": 1.167176849924493, "grad_norm": 1.770218014717102, "learning_rate": 1.9810167428470653e-05, "loss": 0.7067, "step": 7150 }, { "epoch": 1.1673401085669972, "grad_norm": 1.8394699096679688, "learning_rate": 1.9810105885426517e-05, "loss": 0.627, "step": 7151 }, { "epoch": 1.1675033672095017, "grad_norm": 1.7034099102020264, "learning_rate": 1.9810044332503612e-05, "loss": 0.6397, "step": 7152 }, { "epoch": 1.167666625852006, "grad_norm": 1.5363147258758545, "learning_rate": 1.980998276970199e-05, "loss": 0.5937, "step": 7153 }, { "epoch": 1.1678298844945103, "grad_norm": 1.547714352607727, "learning_rate": 1.980992119702172e-05, "loss": 0.5913, "step": 7154 }, { "epoch": 1.1679931431370147, "grad_norm": 1.6826876401901245, "learning_rate": 1.980985961446286e-05, "loss": 0.61, "step": 7155 }, { "epoch": 1.1681564017795192, "grad_norm": 2.023994207382202, "learning_rate": 1.9809798022025475e-05, "loss": 0.6641, "step": 7156 }, { "epoch": 1.1683196604220236, "grad_norm": 1.753090739250183, "learning_rate": 1.9809736419709626e-05, "loss": 0.6604, "step": 7157 }, { "epoch": 1.168482919064528, "grad_norm": 2.070065975189209, "learning_rate": 1.9809674807515374e-05, "loss": 0.7202, "step": 7158 }, { "epoch": 1.1686461777070323, "grad_norm": 2.0827131271362305, "learning_rate": 1.9809613185442783e-05, "loss": 0.8166, "step": 7159 }, { "epoch": 1.1688094363495367, "grad_norm": 1.8187520503997803, "learning_rate": 1.9809551553491918e-05, "loss": 0.614, "step": 7160 }, { "epoch": 1.1689726949920412, "grad_norm": 1.7988108396530151, "learning_rate": 1.9809489911662832e-05, "loss": 0.5808, "step": 7161 }, { "epoch": 1.1691359536345456, "grad_norm": 1.5943621397018433, "learning_rate": 1.9809428259955594e-05, "loss": 0.5103, "step": 7162 }, { "epoch": 1.1692992122770498, "grad_norm": 2.5683858394622803, "learning_rate": 1.9809366598370265e-05, "loss": 0.8241, "step": 7163 }, { "epoch": 1.1694624709195542, "grad_norm": 1.8795560598373413, "learning_rate": 1.980930492690691e-05, "loss": 0.7316, "step": 7164 }, { "epoch": 1.1696257295620587, "grad_norm": 1.7260730266571045, "learning_rate": 1.9809243245565583e-05, "loss": 0.6325, "step": 7165 }, { "epoch": 1.1697889882045631, "grad_norm": 2.002794027328491, "learning_rate": 1.9809181554346348e-05, "loss": 0.8228, "step": 7166 }, { "epoch": 1.1699522468470676, "grad_norm": 1.757325291633606, "learning_rate": 1.9809119853249273e-05, "loss": 0.5626, "step": 7167 }, { "epoch": 1.1701155054895718, "grad_norm": 2.499027967453003, "learning_rate": 1.9809058142274415e-05, "loss": 0.6445, "step": 7168 }, { "epoch": 1.1702787641320762, "grad_norm": 1.6577476263046265, "learning_rate": 1.980899642142184e-05, "loss": 0.5466, "step": 7169 }, { "epoch": 1.1704420227745806, "grad_norm": 1.8361775875091553, "learning_rate": 1.980893469069161e-05, "loss": 0.6365, "step": 7170 }, { "epoch": 1.170605281417085, "grad_norm": 1.7803117036819458, "learning_rate": 1.9808872950083785e-05, "loss": 0.6641, "step": 7171 }, { "epoch": 1.1707685400595893, "grad_norm": 1.928062081336975, "learning_rate": 1.9808811199598424e-05, "loss": 0.6656, "step": 7172 }, { "epoch": 1.1709317987020937, "grad_norm": 1.6976220607757568, "learning_rate": 1.9808749439235595e-05, "loss": 0.6067, "step": 7173 }, { "epoch": 1.1710950573445982, "grad_norm": 1.857807993888855, "learning_rate": 1.9808687668995356e-05, "loss": 0.6502, "step": 7174 }, { "epoch": 1.1712583159871026, "grad_norm": 1.7393978834152222, "learning_rate": 1.9808625888877775e-05, "loss": 0.6222, "step": 7175 }, { "epoch": 1.171421574629607, "grad_norm": 1.9331141710281372, "learning_rate": 1.9808564098882908e-05, "loss": 0.6521, "step": 7176 }, { "epoch": 1.1715848332721113, "grad_norm": 1.793966293334961, "learning_rate": 1.9808502299010817e-05, "loss": 0.6389, "step": 7177 }, { "epoch": 1.1717480919146157, "grad_norm": 1.8630450963974, "learning_rate": 1.980844048926157e-05, "loss": 0.7533, "step": 7178 }, { "epoch": 1.1719113505571201, "grad_norm": 1.7854267358779907, "learning_rate": 1.9808378669635227e-05, "loss": 0.7015, "step": 7179 }, { "epoch": 1.1720746091996246, "grad_norm": 1.8687618970870972, "learning_rate": 1.9808316840131846e-05, "loss": 0.6534, "step": 7180 }, { "epoch": 1.1722378678421288, "grad_norm": 1.9814318418502808, "learning_rate": 1.9808255000751496e-05, "loss": 0.7849, "step": 7181 }, { "epoch": 1.1724011264846332, "grad_norm": 1.961942195892334, "learning_rate": 1.9808193151494233e-05, "loss": 0.7385, "step": 7182 }, { "epoch": 1.1725643851271377, "grad_norm": 1.8831777572631836, "learning_rate": 1.9808131292360123e-05, "loss": 0.6908, "step": 7183 }, { "epoch": 1.1727276437696421, "grad_norm": 1.4788898229599, "learning_rate": 1.9808069423349228e-05, "loss": 0.5731, "step": 7184 }, { "epoch": 1.1728909024121466, "grad_norm": 1.8709713220596313, "learning_rate": 1.9808007544461608e-05, "loss": 0.7836, "step": 7185 }, { "epoch": 1.1730541610546508, "grad_norm": 1.7641911506652832, "learning_rate": 1.980794565569733e-05, "loss": 0.6387, "step": 7186 }, { "epoch": 1.1732174196971552, "grad_norm": 1.7214722633361816, "learning_rate": 1.980788375705645e-05, "loss": 0.6267, "step": 7187 }, { "epoch": 1.1733806783396596, "grad_norm": 1.7868555784225464, "learning_rate": 1.9807821848539034e-05, "loss": 0.7555, "step": 7188 }, { "epoch": 1.173543936982164, "grad_norm": 1.7970112562179565, "learning_rate": 1.9807759930145146e-05, "loss": 0.7551, "step": 7189 }, { "epoch": 1.1737071956246683, "grad_norm": 2.7307162284851074, "learning_rate": 1.9807698001874848e-05, "loss": 0.853, "step": 7190 }, { "epoch": 1.1738704542671727, "grad_norm": 1.9283397197723389, "learning_rate": 1.9807636063728196e-05, "loss": 0.6791, "step": 7191 }, { "epoch": 1.1740337129096772, "grad_norm": 1.6616342067718506, "learning_rate": 1.980757411570526e-05, "loss": 0.6663, "step": 7192 }, { "epoch": 1.1741969715521816, "grad_norm": 1.6126534938812256, "learning_rate": 1.98075121578061e-05, "loss": 0.5349, "step": 7193 }, { "epoch": 1.174360230194686, "grad_norm": 1.5531113147735596, "learning_rate": 1.9807450190030777e-05, "loss": 0.6795, "step": 7194 }, { "epoch": 1.1745234888371903, "grad_norm": 2.0304970741271973, "learning_rate": 1.9807388212379352e-05, "loss": 0.7633, "step": 7195 }, { "epoch": 1.1746867474796947, "grad_norm": 1.7407114505767822, "learning_rate": 1.980732622485189e-05, "loss": 0.7003, "step": 7196 }, { "epoch": 1.1748500061221991, "grad_norm": 1.731769323348999, "learning_rate": 1.9807264227448456e-05, "loss": 0.7286, "step": 7197 }, { "epoch": 1.1750132647647034, "grad_norm": 1.772623062133789, "learning_rate": 1.980720222016911e-05, "loss": 0.7105, "step": 7198 }, { "epoch": 1.1751765234072078, "grad_norm": 1.680614948272705, "learning_rate": 1.9807140203013914e-05, "loss": 0.6188, "step": 7199 }, { "epoch": 1.1753397820497122, "grad_norm": 1.6053986549377441, "learning_rate": 1.9807078175982925e-05, "loss": 0.6323, "step": 7200 }, { "epoch": 1.1755030406922167, "grad_norm": 1.564581274986267, "learning_rate": 1.9807016139076216e-05, "loss": 0.5888, "step": 7201 }, { "epoch": 1.175666299334721, "grad_norm": 1.6494383811950684, "learning_rate": 1.980695409229384e-05, "loss": 0.6808, "step": 7202 }, { "epoch": 1.1758295579772253, "grad_norm": 2.1700117588043213, "learning_rate": 1.980689203563587e-05, "loss": 0.6326, "step": 7203 }, { "epoch": 1.1759928166197298, "grad_norm": 2.066725015640259, "learning_rate": 1.9806829969102356e-05, "loss": 0.7514, "step": 7204 }, { "epoch": 1.1761560752622342, "grad_norm": 1.9142897129058838, "learning_rate": 1.980676789269337e-05, "loss": 0.6411, "step": 7205 }, { "epoch": 1.1763193339047386, "grad_norm": 1.920900821685791, "learning_rate": 1.980670580640897e-05, "loss": 0.7701, "step": 7206 }, { "epoch": 1.1764825925472429, "grad_norm": 1.7631487846374512, "learning_rate": 1.9806643710249224e-05, "loss": 0.6583, "step": 7207 }, { "epoch": 1.1766458511897473, "grad_norm": 2.0889523029327393, "learning_rate": 1.9806581604214184e-05, "loss": 0.7652, "step": 7208 }, { "epoch": 1.1768091098322517, "grad_norm": 1.8101285696029663, "learning_rate": 1.9806519488303926e-05, "loss": 0.6677, "step": 7209 }, { "epoch": 1.1769723684747562, "grad_norm": 2.164069414138794, "learning_rate": 1.98064573625185e-05, "loss": 0.9241, "step": 7210 }, { "epoch": 1.1771356271172606, "grad_norm": 2.1131157875061035, "learning_rate": 1.9806395226857975e-05, "loss": 0.8211, "step": 7211 }, { "epoch": 1.1772988857597648, "grad_norm": 1.6238783597946167, "learning_rate": 1.9806333081322414e-05, "loss": 0.6878, "step": 7212 }, { "epoch": 1.1774621444022693, "grad_norm": 1.96669340133667, "learning_rate": 1.980627092591188e-05, "loss": 0.6012, "step": 7213 }, { "epoch": 1.1776254030447737, "grad_norm": 1.8521089553833008, "learning_rate": 1.980620876062643e-05, "loss": 0.6935, "step": 7214 }, { "epoch": 1.1777886616872781, "grad_norm": 2.254666328430176, "learning_rate": 1.980614658546613e-05, "loss": 0.6948, "step": 7215 }, { "epoch": 1.1779519203297824, "grad_norm": 1.829430341720581, "learning_rate": 1.9806084400431048e-05, "loss": 0.6497, "step": 7216 }, { "epoch": 1.1781151789722868, "grad_norm": 1.8784542083740234, "learning_rate": 1.9806022205521235e-05, "loss": 0.6555, "step": 7217 }, { "epoch": 1.1782784376147912, "grad_norm": 1.8420171737670898, "learning_rate": 1.9805960000736767e-05, "loss": 0.6848, "step": 7218 }, { "epoch": 1.1784416962572957, "grad_norm": 1.7938734292984009, "learning_rate": 1.9805897786077693e-05, "loss": 0.7145, "step": 7219 }, { "epoch": 1.1786049548998, "grad_norm": 1.8854244947433472, "learning_rate": 1.9805835561544086e-05, "loss": 0.698, "step": 7220 }, { "epoch": 1.1787682135423043, "grad_norm": 1.705085277557373, "learning_rate": 1.9805773327136005e-05, "loss": 0.7466, "step": 7221 }, { "epoch": 1.1789314721848088, "grad_norm": 1.8277993202209473, "learning_rate": 1.9805711082853513e-05, "loss": 0.6877, "step": 7222 }, { "epoch": 1.1790947308273132, "grad_norm": 1.8434219360351562, "learning_rate": 1.9805648828696676e-05, "loss": 0.6875, "step": 7223 }, { "epoch": 1.1792579894698176, "grad_norm": 1.9891518354415894, "learning_rate": 1.980558656466555e-05, "loss": 0.7142, "step": 7224 }, { "epoch": 1.1794212481123219, "grad_norm": 1.7292863130569458, "learning_rate": 1.98055242907602e-05, "loss": 0.6988, "step": 7225 }, { "epoch": 1.1795845067548263, "grad_norm": 1.930353045463562, "learning_rate": 1.9805462006980688e-05, "loss": 0.7645, "step": 7226 }, { "epoch": 1.1797477653973307, "grad_norm": 2.107757091522217, "learning_rate": 1.980539971332708e-05, "loss": 0.6784, "step": 7227 }, { "epoch": 1.1799110240398352, "grad_norm": 1.8817429542541504, "learning_rate": 1.9805337409799442e-05, "loss": 0.7438, "step": 7228 }, { "epoch": 1.1800742826823396, "grad_norm": 2.0765912532806396, "learning_rate": 1.9805275096397828e-05, "loss": 0.7681, "step": 7229 }, { "epoch": 1.1802375413248438, "grad_norm": 1.9435827732086182, "learning_rate": 1.9805212773122303e-05, "loss": 0.619, "step": 7230 }, { "epoch": 1.1804007999673483, "grad_norm": 1.7972595691680908, "learning_rate": 1.9805150439972933e-05, "loss": 0.658, "step": 7231 }, { "epoch": 1.1805640586098527, "grad_norm": 1.7873631715774536, "learning_rate": 1.9805088096949777e-05, "loss": 0.6799, "step": 7232 }, { "epoch": 1.1807273172523571, "grad_norm": 1.9250143766403198, "learning_rate": 1.98050257440529e-05, "loss": 0.7841, "step": 7233 }, { "epoch": 1.1808905758948613, "grad_norm": 1.7598320245742798, "learning_rate": 1.9804963381282367e-05, "loss": 0.6523, "step": 7234 }, { "epoch": 1.1810538345373658, "grad_norm": 2.061849355697632, "learning_rate": 1.980490100863824e-05, "loss": 0.8347, "step": 7235 }, { "epoch": 1.1812170931798702, "grad_norm": 1.8773934841156006, "learning_rate": 1.9804838626120576e-05, "loss": 0.6891, "step": 7236 }, { "epoch": 1.1813803518223747, "grad_norm": 1.5814512968063354, "learning_rate": 1.9804776233729446e-05, "loss": 0.6159, "step": 7237 }, { "epoch": 1.181543610464879, "grad_norm": 1.5928398370742798, "learning_rate": 1.9804713831464908e-05, "loss": 0.5482, "step": 7238 }, { "epoch": 1.1817068691073833, "grad_norm": 1.9759432077407837, "learning_rate": 1.9804651419327025e-05, "loss": 0.8435, "step": 7239 }, { "epoch": 1.1818701277498878, "grad_norm": 1.6878821849822998, "learning_rate": 1.9804588997315858e-05, "loss": 0.6631, "step": 7240 }, { "epoch": 1.1820333863923922, "grad_norm": 1.6341748237609863, "learning_rate": 1.9804526565431478e-05, "loss": 0.5929, "step": 7241 }, { "epoch": 1.1821966450348966, "grad_norm": 1.8746899366378784, "learning_rate": 1.980446412367394e-05, "loss": 0.5766, "step": 7242 }, { "epoch": 1.1823599036774008, "grad_norm": 1.624620795249939, "learning_rate": 1.980440167204331e-05, "loss": 0.5421, "step": 7243 }, { "epoch": 1.1825231623199053, "grad_norm": 1.8365871906280518, "learning_rate": 1.9804339210539644e-05, "loss": 0.6809, "step": 7244 }, { "epoch": 1.1826864209624097, "grad_norm": 1.7437423467636108, "learning_rate": 1.9804276739163017e-05, "loss": 0.761, "step": 7245 }, { "epoch": 1.1828496796049142, "grad_norm": 1.7837105989456177, "learning_rate": 1.9804214257913483e-05, "loss": 0.6267, "step": 7246 }, { "epoch": 1.1830129382474184, "grad_norm": 1.418367624282837, "learning_rate": 1.980415176679111e-05, "loss": 0.5915, "step": 7247 }, { "epoch": 1.1831761968899228, "grad_norm": 2.2641959190368652, "learning_rate": 1.980408926579596e-05, "loss": 0.9123, "step": 7248 }, { "epoch": 1.1833394555324273, "grad_norm": 2.065375804901123, "learning_rate": 1.9804026754928092e-05, "loss": 0.6492, "step": 7249 }, { "epoch": 1.1835027141749317, "grad_norm": 1.9078385829925537, "learning_rate": 1.980396423418757e-05, "loss": 0.7211, "step": 7250 }, { "epoch": 1.183665972817436, "grad_norm": 2.0276901721954346, "learning_rate": 1.9803901703574465e-05, "loss": 0.7703, "step": 7251 }, { "epoch": 1.1838292314599403, "grad_norm": 2.091717004776001, "learning_rate": 1.980383916308883e-05, "loss": 0.8034, "step": 7252 }, { "epoch": 1.1839924901024448, "grad_norm": 1.7219983339309692, "learning_rate": 1.9803776612730728e-05, "loss": 0.5072, "step": 7253 }, { "epoch": 1.1841557487449492, "grad_norm": 1.8698267936706543, "learning_rate": 1.980371405250023e-05, "loss": 0.5958, "step": 7254 }, { "epoch": 1.1843190073874537, "grad_norm": 1.6547167301177979, "learning_rate": 1.9803651482397394e-05, "loss": 0.6008, "step": 7255 }, { "epoch": 1.1844822660299579, "grad_norm": 2.0773849487304688, "learning_rate": 1.9803588902422283e-05, "loss": 0.6719, "step": 7256 }, { "epoch": 1.1846455246724623, "grad_norm": 2.084836721420288, "learning_rate": 1.980352631257496e-05, "loss": 0.7241, "step": 7257 }, { "epoch": 1.1848087833149668, "grad_norm": 1.738813877105713, "learning_rate": 1.980346371285549e-05, "loss": 0.6673, "step": 7258 }, { "epoch": 1.1849720419574712, "grad_norm": 1.6602576971054077, "learning_rate": 1.980340110326393e-05, "loss": 0.6159, "step": 7259 }, { "epoch": 1.1851353005999754, "grad_norm": 2.4889349937438965, "learning_rate": 1.9803338483800355e-05, "loss": 0.7961, "step": 7260 }, { "epoch": 1.1852985592424798, "grad_norm": 1.765291452407837, "learning_rate": 1.9803275854464817e-05, "loss": 0.5899, "step": 7261 }, { "epoch": 1.1854618178849843, "grad_norm": 1.704529047012329, "learning_rate": 1.9803213215257383e-05, "loss": 0.607, "step": 7262 }, { "epoch": 1.1856250765274887, "grad_norm": 1.74045729637146, "learning_rate": 1.9803150566178117e-05, "loss": 0.5892, "step": 7263 }, { "epoch": 1.1857883351699932, "grad_norm": 3.506566047668457, "learning_rate": 1.9803087907227077e-05, "loss": 0.7507, "step": 7264 }, { "epoch": 1.1859515938124974, "grad_norm": 2.0792431831359863, "learning_rate": 1.9803025238404333e-05, "loss": 0.8694, "step": 7265 }, { "epoch": 1.1861148524550018, "grad_norm": 1.960637092590332, "learning_rate": 1.980296255970995e-05, "loss": 0.6969, "step": 7266 }, { "epoch": 1.1862781110975062, "grad_norm": 1.9349961280822754, "learning_rate": 1.9802899871143978e-05, "loss": 0.7018, "step": 7267 }, { "epoch": 1.1864413697400107, "grad_norm": 1.7124922275543213, "learning_rate": 1.980283717270649e-05, "loss": 0.6634, "step": 7268 }, { "epoch": 1.186604628382515, "grad_norm": 2.0709786415100098, "learning_rate": 1.980277446439755e-05, "loss": 0.6693, "step": 7269 }, { "epoch": 1.1867678870250193, "grad_norm": 1.8755772113800049, "learning_rate": 1.9802711746217222e-05, "loss": 0.6189, "step": 7270 }, { "epoch": 1.1869311456675238, "grad_norm": 2.1060848236083984, "learning_rate": 1.980264901816556e-05, "loss": 0.6175, "step": 7271 }, { "epoch": 1.1870944043100282, "grad_norm": 1.7640128135681152, "learning_rate": 1.9802586280242634e-05, "loss": 0.6187, "step": 7272 }, { "epoch": 1.1872576629525327, "grad_norm": 1.568260669708252, "learning_rate": 1.980252353244851e-05, "loss": 0.6559, "step": 7273 }, { "epoch": 1.1874209215950369, "grad_norm": 1.6387568712234497, "learning_rate": 1.9802460774783242e-05, "loss": 0.5489, "step": 7274 }, { "epoch": 1.1875841802375413, "grad_norm": 1.8237985372543335, "learning_rate": 1.9802398007246902e-05, "loss": 0.6579, "step": 7275 }, { "epoch": 1.1877474388800457, "grad_norm": 1.8865231275558472, "learning_rate": 1.9802335229839552e-05, "loss": 0.6619, "step": 7276 }, { "epoch": 1.1879106975225502, "grad_norm": 1.8043243885040283, "learning_rate": 1.980227244256125e-05, "loss": 0.6802, "step": 7277 }, { "epoch": 1.1880739561650544, "grad_norm": 1.956106424331665, "learning_rate": 1.980220964541206e-05, "loss": 0.6292, "step": 7278 }, { "epoch": 1.1882372148075588, "grad_norm": 1.6089187860488892, "learning_rate": 1.980214683839205e-05, "loss": 0.5624, "step": 7279 }, { "epoch": 1.1884004734500633, "grad_norm": 1.9813133478164673, "learning_rate": 1.9802084021501282e-05, "loss": 0.6202, "step": 7280 }, { "epoch": 1.1885637320925677, "grad_norm": 2.019984006881714, "learning_rate": 1.9802021194739815e-05, "loss": 0.7063, "step": 7281 }, { "epoch": 1.1887269907350722, "grad_norm": 1.9593933820724487, "learning_rate": 1.9801958358107718e-05, "loss": 0.625, "step": 7282 }, { "epoch": 1.1888902493775764, "grad_norm": 1.495078206062317, "learning_rate": 1.980189551160505e-05, "loss": 0.5563, "step": 7283 }, { "epoch": 1.1890535080200808, "grad_norm": 1.9710201025009155, "learning_rate": 1.9801832655231876e-05, "loss": 0.671, "step": 7284 }, { "epoch": 1.1892167666625852, "grad_norm": 1.5401666164398193, "learning_rate": 1.980176978898826e-05, "loss": 0.5619, "step": 7285 }, { "epoch": 1.1893800253050897, "grad_norm": 2.0315983295440674, "learning_rate": 1.9801706912874262e-05, "loss": 0.7018, "step": 7286 }, { "epoch": 1.189543283947594, "grad_norm": 1.9702680110931396, "learning_rate": 1.980164402688995e-05, "loss": 0.6616, "step": 7287 }, { "epoch": 1.1897065425900983, "grad_norm": 1.7691479921340942, "learning_rate": 1.9801581131035385e-05, "loss": 0.713, "step": 7288 }, { "epoch": 1.1898698012326028, "grad_norm": 1.7871026992797852, "learning_rate": 1.980151822531063e-05, "loss": 0.6984, "step": 7289 }, { "epoch": 1.1900330598751072, "grad_norm": 1.8511483669281006, "learning_rate": 1.9801455309715748e-05, "loss": 0.6633, "step": 7290 }, { "epoch": 1.1901963185176114, "grad_norm": 2.142162799835205, "learning_rate": 1.9801392384250806e-05, "loss": 0.8227, "step": 7291 }, { "epoch": 1.1903595771601159, "grad_norm": 1.9787348508834839, "learning_rate": 1.9801329448915863e-05, "loss": 0.7722, "step": 7292 }, { "epoch": 1.1905228358026203, "grad_norm": 2.010305166244507, "learning_rate": 1.980126650371098e-05, "loss": 0.7682, "step": 7293 }, { "epoch": 1.1906860944451247, "grad_norm": 1.6822975873947144, "learning_rate": 1.980120354863623e-05, "loss": 0.6293, "step": 7294 }, { "epoch": 1.190849353087629, "grad_norm": 1.685307502746582, "learning_rate": 1.9801140583691666e-05, "loss": 0.6219, "step": 7295 }, { "epoch": 1.1910126117301334, "grad_norm": 1.659957766532898, "learning_rate": 1.980107760887736e-05, "loss": 0.5978, "step": 7296 }, { "epoch": 1.1911758703726378, "grad_norm": 1.998290777206421, "learning_rate": 1.980101462419337e-05, "loss": 0.6747, "step": 7297 }, { "epoch": 1.1913391290151423, "grad_norm": 1.5270265340805054, "learning_rate": 1.9800951629639758e-05, "loss": 0.5715, "step": 7298 }, { "epoch": 1.1915023876576467, "grad_norm": 2.379990577697754, "learning_rate": 1.9800888625216593e-05, "loss": 0.6822, "step": 7299 }, { "epoch": 1.191665646300151, "grad_norm": 1.9211238622665405, "learning_rate": 1.9800825610923937e-05, "loss": 0.7268, "step": 7300 }, { "epoch": 1.1918289049426554, "grad_norm": 2.146958589553833, "learning_rate": 1.9800762586761852e-05, "loss": 0.7994, "step": 7301 }, { "epoch": 1.1919921635851598, "grad_norm": 1.6015897989273071, "learning_rate": 1.9800699552730397e-05, "loss": 0.677, "step": 7302 }, { "epoch": 1.1921554222276642, "grad_norm": 1.5932108163833618, "learning_rate": 1.9800636508829646e-05, "loss": 0.5979, "step": 7303 }, { "epoch": 1.1923186808701685, "grad_norm": 2.099510431289673, "learning_rate": 1.9800573455059653e-05, "loss": 0.8138, "step": 7304 }, { "epoch": 1.192481939512673, "grad_norm": 1.926492691040039, "learning_rate": 1.9800510391420483e-05, "loss": 0.7524, "step": 7305 }, { "epoch": 1.1926451981551773, "grad_norm": 1.57281494140625, "learning_rate": 1.9800447317912207e-05, "loss": 0.571, "step": 7306 }, { "epoch": 1.1928084567976818, "grad_norm": 1.8618874549865723, "learning_rate": 1.9800384234534882e-05, "loss": 0.8058, "step": 7307 }, { "epoch": 1.1929717154401862, "grad_norm": 1.6998305320739746, "learning_rate": 1.9800321141288573e-05, "loss": 0.6863, "step": 7308 }, { "epoch": 1.1931349740826904, "grad_norm": 1.6840764284133911, "learning_rate": 1.980025803817334e-05, "loss": 0.7451, "step": 7309 }, { "epoch": 1.1932982327251949, "grad_norm": 1.5975855588912964, "learning_rate": 1.980019492518925e-05, "loss": 0.6996, "step": 7310 }, { "epoch": 1.1934614913676993, "grad_norm": 2.0803775787353516, "learning_rate": 1.9800131802336372e-05, "loss": 0.6648, "step": 7311 }, { "epoch": 1.1936247500102037, "grad_norm": 1.947798728942871, "learning_rate": 1.9800068669614757e-05, "loss": 0.7646, "step": 7312 }, { "epoch": 1.193788008652708, "grad_norm": 1.6884888410568237, "learning_rate": 1.980000552702448e-05, "loss": 0.7267, "step": 7313 }, { "epoch": 1.1939512672952124, "grad_norm": 1.6648905277252197, "learning_rate": 1.9799942374565597e-05, "loss": 0.6768, "step": 7314 }, { "epoch": 1.1941145259377168, "grad_norm": 1.9926567077636719, "learning_rate": 1.9799879212238175e-05, "loss": 0.8388, "step": 7315 }, { "epoch": 1.1942777845802213, "grad_norm": 1.9007270336151123, "learning_rate": 1.979981604004228e-05, "loss": 0.742, "step": 7316 }, { "epoch": 1.1944410432227257, "grad_norm": 1.920228362083435, "learning_rate": 1.979975285797797e-05, "loss": 0.6888, "step": 7317 }, { "epoch": 1.19460430186523, "grad_norm": 1.8172974586486816, "learning_rate": 1.9799689666045313e-05, "loss": 0.8582, "step": 7318 }, { "epoch": 1.1947675605077344, "grad_norm": 1.8705157041549683, "learning_rate": 1.979962646424437e-05, "loss": 0.7773, "step": 7319 }, { "epoch": 1.1949308191502388, "grad_norm": 1.8717389106750488, "learning_rate": 1.9799563252575208e-05, "loss": 0.6498, "step": 7320 }, { "epoch": 1.1950940777927432, "grad_norm": 1.9353580474853516, "learning_rate": 1.9799500031037885e-05, "loss": 0.6992, "step": 7321 }, { "epoch": 1.1952573364352475, "grad_norm": 2.026383876800537, "learning_rate": 1.979943679963247e-05, "loss": 0.7626, "step": 7322 }, { "epoch": 1.195420595077752, "grad_norm": 1.6690312623977661, "learning_rate": 1.9799373558359025e-05, "loss": 0.6714, "step": 7323 }, { "epoch": 1.1955838537202563, "grad_norm": 1.6994627714157104, "learning_rate": 1.9799310307217613e-05, "loss": 0.6626, "step": 7324 }, { "epoch": 1.1957471123627608, "grad_norm": 2.0497868061065674, "learning_rate": 1.9799247046208297e-05, "loss": 0.5524, "step": 7325 }, { "epoch": 1.1959103710052652, "grad_norm": 1.9385045766830444, "learning_rate": 1.9799183775331143e-05, "loss": 0.8281, "step": 7326 }, { "epoch": 1.1960736296477694, "grad_norm": 1.5171760320663452, "learning_rate": 1.9799120494586214e-05, "loss": 0.6417, "step": 7327 }, { "epoch": 1.1962368882902739, "grad_norm": 2.231236457824707, "learning_rate": 1.9799057203973574e-05, "loss": 0.7207, "step": 7328 }, { "epoch": 1.1964001469327783, "grad_norm": 1.6312599182128906, "learning_rate": 1.9798993903493282e-05, "loss": 0.7419, "step": 7329 }, { "epoch": 1.1965634055752827, "grad_norm": 1.8975082635879517, "learning_rate": 1.9798930593145412e-05, "loss": 0.7752, "step": 7330 }, { "epoch": 1.196726664217787, "grad_norm": 1.7579262256622314, "learning_rate": 1.9798867272930015e-05, "loss": 0.695, "step": 7331 }, { "epoch": 1.1968899228602914, "grad_norm": 1.8946683406829834, "learning_rate": 1.9798803942847165e-05, "loss": 0.8183, "step": 7332 }, { "epoch": 1.1970531815027958, "grad_norm": 1.6872467994689941, "learning_rate": 1.9798740602896924e-05, "loss": 0.5919, "step": 7333 }, { "epoch": 1.1972164401453003, "grad_norm": 1.8607194423675537, "learning_rate": 1.9798677253079348e-05, "loss": 0.742, "step": 7334 }, { "epoch": 1.1973796987878047, "grad_norm": 1.941464900970459, "learning_rate": 1.9798613893394512e-05, "loss": 0.8724, "step": 7335 }, { "epoch": 1.197542957430309, "grad_norm": 1.8272013664245605, "learning_rate": 1.979855052384247e-05, "loss": 0.6446, "step": 7336 }, { "epoch": 1.1977062160728134, "grad_norm": 1.8992843627929688, "learning_rate": 1.9798487144423293e-05, "loss": 0.6438, "step": 7337 }, { "epoch": 1.1978694747153178, "grad_norm": 1.8274142742156982, "learning_rate": 1.9798423755137038e-05, "loss": 0.7506, "step": 7338 }, { "epoch": 1.198032733357822, "grad_norm": 1.6475982666015625, "learning_rate": 1.9798360355983777e-05, "loss": 0.591, "step": 7339 }, { "epoch": 1.1981959920003264, "grad_norm": 1.7964210510253906, "learning_rate": 1.9798296946963572e-05, "loss": 0.6687, "step": 7340 }, { "epoch": 1.1983592506428309, "grad_norm": 1.7517529726028442, "learning_rate": 1.979823352807648e-05, "loss": 0.7453, "step": 7341 }, { "epoch": 1.1985225092853353, "grad_norm": 1.6476244926452637, "learning_rate": 1.9798170099322573e-05, "loss": 0.6486, "step": 7342 }, { "epoch": 1.1986857679278398, "grad_norm": 1.8606644868850708, "learning_rate": 1.9798106660701907e-05, "loss": 0.7362, "step": 7343 }, { "epoch": 1.198849026570344, "grad_norm": 1.2816218137741089, "learning_rate": 1.9798043212214554e-05, "loss": 0.5284, "step": 7344 }, { "epoch": 1.1990122852128484, "grad_norm": 1.9276995658874512, "learning_rate": 1.979797975386057e-05, "loss": 0.7632, "step": 7345 }, { "epoch": 1.1991755438553529, "grad_norm": 1.9830139875411987, "learning_rate": 1.9797916285640028e-05, "loss": 0.6872, "step": 7346 }, { "epoch": 1.1993388024978573, "grad_norm": 2.1543588638305664, "learning_rate": 1.9797852807552983e-05, "loss": 0.7702, "step": 7347 }, { "epoch": 1.1995020611403615, "grad_norm": 1.978318452835083, "learning_rate": 1.9797789319599506e-05, "loss": 0.8007, "step": 7348 }, { "epoch": 1.199665319782866, "grad_norm": 1.8143136501312256, "learning_rate": 1.9797725821779656e-05, "loss": 0.6891, "step": 7349 }, { "epoch": 1.1998285784253704, "grad_norm": 1.9532885551452637, "learning_rate": 1.9797662314093496e-05, "loss": 0.6453, "step": 7350 }, { "epoch": 1.1999918370678748, "grad_norm": 2.064199209213257, "learning_rate": 1.9797598796541096e-05, "loss": 0.8316, "step": 7351 }, { "epoch": 1.2001550957103793, "grad_norm": 1.9671635627746582, "learning_rate": 1.9797535269122517e-05, "loss": 0.8231, "step": 7352 }, { "epoch": 1.2003183543528835, "grad_norm": 1.6491751670837402, "learning_rate": 1.9797471731837824e-05, "loss": 0.5953, "step": 7353 }, { "epoch": 1.200481612995388, "grad_norm": 2.0630359649658203, "learning_rate": 1.9797408184687074e-05, "loss": 0.7362, "step": 7354 }, { "epoch": 1.2006448716378924, "grad_norm": 1.8140848875045776, "learning_rate": 1.9797344627670338e-05, "loss": 0.7408, "step": 7355 }, { "epoch": 1.2008081302803968, "grad_norm": 2.2212843894958496, "learning_rate": 1.9797281060787684e-05, "loss": 0.7596, "step": 7356 }, { "epoch": 1.200971388922901, "grad_norm": 1.7277783155441284, "learning_rate": 1.9797217484039164e-05, "loss": 0.6246, "step": 7357 }, { "epoch": 1.2011346475654054, "grad_norm": 1.7584041357040405, "learning_rate": 1.9797153897424854e-05, "loss": 0.6868, "step": 7358 }, { "epoch": 1.2012979062079099, "grad_norm": 1.5274354219436646, "learning_rate": 1.979709030094481e-05, "loss": 0.5934, "step": 7359 }, { "epoch": 1.2014611648504143, "grad_norm": 2.0911166667938232, "learning_rate": 1.9797026694599097e-05, "loss": 0.7944, "step": 7360 }, { "epoch": 1.2016244234929188, "grad_norm": 1.8187841176986694, "learning_rate": 1.9796963078387782e-05, "loss": 0.5665, "step": 7361 }, { "epoch": 1.201787682135423, "grad_norm": 1.5789079666137695, "learning_rate": 1.979689945231093e-05, "loss": 0.6763, "step": 7362 }, { "epoch": 1.2019509407779274, "grad_norm": 1.6954537630081177, "learning_rate": 1.97968358163686e-05, "loss": 0.6646, "step": 7363 }, { "epoch": 1.2021141994204319, "grad_norm": 1.6038451194763184, "learning_rate": 1.9796772170560858e-05, "loss": 0.5968, "step": 7364 }, { "epoch": 1.2022774580629363, "grad_norm": 1.8748506307601929, "learning_rate": 1.9796708514887773e-05, "loss": 0.7605, "step": 7365 }, { "epoch": 1.2024407167054405, "grad_norm": 2.053863763809204, "learning_rate": 1.9796644849349403e-05, "loss": 0.6618, "step": 7366 }, { "epoch": 1.202603975347945, "grad_norm": 1.8704049587249756, "learning_rate": 1.9796581173945816e-05, "loss": 0.7232, "step": 7367 }, { "epoch": 1.2027672339904494, "grad_norm": 2.0381157398223877, "learning_rate": 1.979651748867707e-05, "loss": 0.6162, "step": 7368 }, { "epoch": 1.2029304926329538, "grad_norm": 1.7795929908752441, "learning_rate": 1.9796453793543237e-05, "loss": 0.7657, "step": 7369 }, { "epoch": 1.2030937512754583, "grad_norm": 2.1925785541534424, "learning_rate": 1.9796390088544377e-05, "loss": 0.8536, "step": 7370 }, { "epoch": 1.2032570099179625, "grad_norm": 2.0009453296661377, "learning_rate": 1.9796326373680555e-05, "loss": 0.8244, "step": 7371 }, { "epoch": 1.203420268560467, "grad_norm": 1.6859406232833862, "learning_rate": 1.9796262648951834e-05, "loss": 0.582, "step": 7372 }, { "epoch": 1.2035835272029713, "grad_norm": 1.9571212530136108, "learning_rate": 1.979619891435828e-05, "loss": 0.7924, "step": 7373 }, { "epoch": 1.2037467858454758, "grad_norm": 1.5581319332122803, "learning_rate": 1.9796135169899956e-05, "loss": 0.669, "step": 7374 }, { "epoch": 1.20391004448798, "grad_norm": 2.0011534690856934, "learning_rate": 1.9796071415576925e-05, "loss": 1.0497, "step": 7375 }, { "epoch": 1.2040733031304844, "grad_norm": 1.766790747642517, "learning_rate": 1.9796007651389255e-05, "loss": 0.6862, "step": 7376 }, { "epoch": 1.2042365617729889, "grad_norm": 2.090240001678467, "learning_rate": 1.9795943877337007e-05, "loss": 0.8743, "step": 7377 }, { "epoch": 1.2043998204154933, "grad_norm": 1.8979412317276, "learning_rate": 1.9795880093420246e-05, "loss": 0.6517, "step": 7378 }, { "epoch": 1.2045630790579978, "grad_norm": 1.5886799097061157, "learning_rate": 1.9795816299639035e-05, "loss": 0.6421, "step": 7379 }, { "epoch": 1.204726337700502, "grad_norm": 2.1308624744415283, "learning_rate": 1.979575249599344e-05, "loss": 0.8061, "step": 7380 }, { "epoch": 1.2048895963430064, "grad_norm": 2.105107307434082, "learning_rate": 1.979568868248353e-05, "loss": 0.7815, "step": 7381 }, { "epoch": 1.2050528549855108, "grad_norm": 1.7962852716445923, "learning_rate": 1.9795624859109357e-05, "loss": 0.6426, "step": 7382 }, { "epoch": 1.205216113628015, "grad_norm": 2.0234248638153076, "learning_rate": 1.9795561025870996e-05, "loss": 0.6454, "step": 7383 }, { "epoch": 1.2053793722705195, "grad_norm": 1.873284935951233, "learning_rate": 1.9795497182768506e-05, "loss": 0.5997, "step": 7384 }, { "epoch": 1.205542630913024, "grad_norm": 1.996983528137207, "learning_rate": 1.9795433329801955e-05, "loss": 0.6538, "step": 7385 }, { "epoch": 1.2057058895555284, "grad_norm": 1.926608920097351, "learning_rate": 1.9795369466971404e-05, "loss": 0.6682, "step": 7386 }, { "epoch": 1.2058691481980328, "grad_norm": 1.704397201538086, "learning_rate": 1.979530559427692e-05, "loss": 0.6599, "step": 7387 }, { "epoch": 1.206032406840537, "grad_norm": 1.5180952548980713, "learning_rate": 1.9795241711718565e-05, "loss": 0.6247, "step": 7388 }, { "epoch": 1.2061956654830415, "grad_norm": 1.8134716749191284, "learning_rate": 1.9795177819296403e-05, "loss": 0.7213, "step": 7389 }, { "epoch": 1.206358924125546, "grad_norm": 1.9658679962158203, "learning_rate": 1.97951139170105e-05, "loss": 0.6354, "step": 7390 }, { "epoch": 1.2065221827680503, "grad_norm": 1.792641282081604, "learning_rate": 1.9795050004860918e-05, "loss": 0.7693, "step": 7391 }, { "epoch": 1.2066854414105546, "grad_norm": 1.9670463800430298, "learning_rate": 1.9794986082847728e-05, "loss": 0.789, "step": 7392 }, { "epoch": 1.206848700053059, "grad_norm": 1.7635657787322998, "learning_rate": 1.9794922150970984e-05, "loss": 0.6938, "step": 7393 }, { "epoch": 1.2070119586955634, "grad_norm": 2.0216426849365234, "learning_rate": 1.979485820923076e-05, "loss": 0.7903, "step": 7394 }, { "epoch": 1.2071752173380679, "grad_norm": 1.8245142698287964, "learning_rate": 1.9794794257627117e-05, "loss": 0.5638, "step": 7395 }, { "epoch": 1.2073384759805723, "grad_norm": 1.6951179504394531, "learning_rate": 1.9794730296160117e-05, "loss": 0.7518, "step": 7396 }, { "epoch": 1.2075017346230765, "grad_norm": 1.9777193069458008, "learning_rate": 1.9794666324829826e-05, "loss": 0.8065, "step": 7397 }, { "epoch": 1.207664993265581, "grad_norm": 2.2356173992156982, "learning_rate": 1.979460234363631e-05, "loss": 0.8131, "step": 7398 }, { "epoch": 1.2078282519080854, "grad_norm": 1.884108066558838, "learning_rate": 1.9794538352579628e-05, "loss": 0.8158, "step": 7399 }, { "epoch": 1.2079915105505898, "grad_norm": 1.6056896448135376, "learning_rate": 1.9794474351659854e-05, "loss": 0.5784, "step": 7400 }, { "epoch": 1.208154769193094, "grad_norm": 1.8474441766738892, "learning_rate": 1.9794410340877045e-05, "loss": 0.6167, "step": 7401 }, { "epoch": 1.2083180278355985, "grad_norm": 1.7950236797332764, "learning_rate": 1.9794346320231265e-05, "loss": 0.6855, "step": 7402 }, { "epoch": 1.208481286478103, "grad_norm": 2.13885235786438, "learning_rate": 1.979428228972258e-05, "loss": 0.8704, "step": 7403 }, { "epoch": 1.2086445451206074, "grad_norm": 1.4476736783981323, "learning_rate": 1.979421824935106e-05, "loss": 0.6461, "step": 7404 }, { "epoch": 1.2088078037631118, "grad_norm": 2.2221169471740723, "learning_rate": 1.9794154199116763e-05, "loss": 0.7934, "step": 7405 }, { "epoch": 1.208971062405616, "grad_norm": 1.5845366716384888, "learning_rate": 1.9794090139019757e-05, "loss": 0.5762, "step": 7406 }, { "epoch": 1.2091343210481205, "grad_norm": 1.6272059679031372, "learning_rate": 1.9794026069060102e-05, "loss": 0.6487, "step": 7407 }, { "epoch": 1.209297579690625, "grad_norm": 1.706217646598816, "learning_rate": 1.9793961989237867e-05, "loss": 0.7021, "step": 7408 }, { "epoch": 1.2094608383331293, "grad_norm": 1.7049869298934937, "learning_rate": 1.9793897899553116e-05, "loss": 0.6993, "step": 7409 }, { "epoch": 1.2096240969756336, "grad_norm": 1.7050567865371704, "learning_rate": 1.979383380000591e-05, "loss": 0.701, "step": 7410 }, { "epoch": 1.209787355618138, "grad_norm": 1.7534161806106567, "learning_rate": 1.9793769690596315e-05, "loss": 0.8432, "step": 7411 }, { "epoch": 1.2099506142606424, "grad_norm": 2.0166826248168945, "learning_rate": 1.9793705571324397e-05, "loss": 0.7088, "step": 7412 }, { "epoch": 1.2101138729031469, "grad_norm": 1.508067011833191, "learning_rate": 1.979364144219022e-05, "loss": 0.6877, "step": 7413 }, { "epoch": 1.2102771315456513, "grad_norm": 1.8355813026428223, "learning_rate": 1.979357730319385e-05, "loss": 0.7764, "step": 7414 }, { "epoch": 1.2104403901881555, "grad_norm": 1.6093283891677856, "learning_rate": 1.9793513154335354e-05, "loss": 0.5639, "step": 7415 }, { "epoch": 1.21060364883066, "grad_norm": 1.8645378351211548, "learning_rate": 1.9793448995614785e-05, "loss": 0.7806, "step": 7416 }, { "epoch": 1.2107669074731644, "grad_norm": 2.003026008605957, "learning_rate": 1.979338482703222e-05, "loss": 0.7819, "step": 7417 }, { "epoch": 1.2109301661156688, "grad_norm": 1.5802714824676514, "learning_rate": 1.979332064858772e-05, "loss": 0.6399, "step": 7418 }, { "epoch": 1.211093424758173, "grad_norm": 2.0080184936523438, "learning_rate": 1.9793256460281348e-05, "loss": 0.74, "step": 7419 }, { "epoch": 1.2112566834006775, "grad_norm": 1.827246904373169, "learning_rate": 1.9793192262113167e-05, "loss": 0.7025, "step": 7420 }, { "epoch": 1.211419942043182, "grad_norm": 2.1109707355499268, "learning_rate": 1.9793128054083245e-05, "loss": 0.8903, "step": 7421 }, { "epoch": 1.2115832006856864, "grad_norm": 1.726699948310852, "learning_rate": 1.9793063836191648e-05, "loss": 0.7622, "step": 7422 }, { "epoch": 1.2117464593281908, "grad_norm": 1.724717617034912, "learning_rate": 1.9792999608438436e-05, "loss": 0.6586, "step": 7423 }, { "epoch": 1.211909717970695, "grad_norm": 1.565402626991272, "learning_rate": 1.9792935370823676e-05, "loss": 0.5615, "step": 7424 }, { "epoch": 1.2120729766131995, "grad_norm": 1.5589874982833862, "learning_rate": 1.9792871123347434e-05, "loss": 0.6316, "step": 7425 }, { "epoch": 1.212236235255704, "grad_norm": 1.5304598808288574, "learning_rate": 1.9792806866009773e-05, "loss": 0.5517, "step": 7426 }, { "epoch": 1.2123994938982081, "grad_norm": 1.8662205934524536, "learning_rate": 1.9792742598810758e-05, "loss": 0.7624, "step": 7427 }, { "epoch": 1.2125627525407126, "grad_norm": 1.615124225616455, "learning_rate": 1.979267832175045e-05, "loss": 0.6313, "step": 7428 }, { "epoch": 1.212726011183217, "grad_norm": 2.4157090187072754, "learning_rate": 1.9792614034828923e-05, "loss": 0.8412, "step": 7429 }, { "epoch": 1.2128892698257214, "grad_norm": 1.7291311025619507, "learning_rate": 1.9792549738046232e-05, "loss": 0.6257, "step": 7430 }, { "epoch": 1.2130525284682259, "grad_norm": 2.1258742809295654, "learning_rate": 1.979248543140245e-05, "loss": 0.8124, "step": 7431 }, { "epoch": 1.21321578711073, "grad_norm": 1.614032506942749, "learning_rate": 1.9792421114897635e-05, "loss": 0.5492, "step": 7432 }, { "epoch": 1.2133790457532345, "grad_norm": 1.8756171464920044, "learning_rate": 1.9792356788531854e-05, "loss": 0.8174, "step": 7433 }, { "epoch": 1.213542304395739, "grad_norm": 1.996145248413086, "learning_rate": 1.9792292452305174e-05, "loss": 0.7093, "step": 7434 }, { "epoch": 1.2137055630382434, "grad_norm": 1.7242380380630493, "learning_rate": 1.979222810621766e-05, "loss": 0.7007, "step": 7435 }, { "epoch": 1.2138688216807476, "grad_norm": 1.8769707679748535, "learning_rate": 1.9792163750269373e-05, "loss": 0.6683, "step": 7436 }, { "epoch": 1.214032080323252, "grad_norm": 1.8688710927963257, "learning_rate": 1.9792099384460378e-05, "loss": 0.7083, "step": 7437 }, { "epoch": 1.2141953389657565, "grad_norm": 2.06347393989563, "learning_rate": 1.9792035008790744e-05, "loss": 0.8888, "step": 7438 }, { "epoch": 1.214358597608261, "grad_norm": 1.9763275384902954, "learning_rate": 1.979197062326053e-05, "loss": 0.8097, "step": 7439 }, { "epoch": 1.2145218562507654, "grad_norm": 1.916993260383606, "learning_rate": 1.979190622786981e-05, "loss": 0.6881, "step": 7440 }, { "epoch": 1.2146851148932696, "grad_norm": 1.4702719449996948, "learning_rate": 1.9791841822618637e-05, "loss": 0.7119, "step": 7441 }, { "epoch": 1.214848373535774, "grad_norm": 1.6330474615097046, "learning_rate": 1.9791777407507083e-05, "loss": 0.6813, "step": 7442 }, { "epoch": 1.2150116321782785, "grad_norm": 1.896609902381897, "learning_rate": 1.9791712982535215e-05, "loss": 0.7386, "step": 7443 }, { "epoch": 1.215174890820783, "grad_norm": 1.6126973628997803, "learning_rate": 1.9791648547703095e-05, "loss": 0.637, "step": 7444 }, { "epoch": 1.215338149463287, "grad_norm": 1.545535683631897, "learning_rate": 1.9791584103010785e-05, "loss": 0.5901, "step": 7445 }, { "epoch": 1.2155014081057915, "grad_norm": 1.709138035774231, "learning_rate": 1.9791519648458352e-05, "loss": 0.6097, "step": 7446 }, { "epoch": 1.215664666748296, "grad_norm": 1.778785228729248, "learning_rate": 1.979145518404586e-05, "loss": 0.6195, "step": 7447 }, { "epoch": 1.2158279253908004, "grad_norm": 1.553452968597412, "learning_rate": 1.979139070977338e-05, "loss": 0.7041, "step": 7448 }, { "epoch": 1.2159911840333049, "grad_norm": 1.8251323699951172, "learning_rate": 1.9791326225640967e-05, "loss": 0.7159, "step": 7449 }, { "epoch": 1.216154442675809, "grad_norm": 1.6588391065597534, "learning_rate": 1.9791261731648694e-05, "loss": 0.7381, "step": 7450 }, { "epoch": 1.2163177013183135, "grad_norm": 2.0273489952087402, "learning_rate": 1.9791197227796622e-05, "loss": 0.8044, "step": 7451 }, { "epoch": 1.216480959960818, "grad_norm": 1.787227988243103, "learning_rate": 1.979113271408482e-05, "loss": 0.6639, "step": 7452 }, { "epoch": 1.2166442186033224, "grad_norm": 1.8415346145629883, "learning_rate": 1.9791068190513346e-05, "loss": 0.7877, "step": 7453 }, { "epoch": 1.2168074772458266, "grad_norm": 1.6401402950286865, "learning_rate": 1.979100365708227e-05, "loss": 0.6705, "step": 7454 }, { "epoch": 1.216970735888331, "grad_norm": 1.6079798936843872, "learning_rate": 1.9790939113791657e-05, "loss": 0.5733, "step": 7455 }, { "epoch": 1.2171339945308355, "grad_norm": 2.3078153133392334, "learning_rate": 1.979087456064157e-05, "loss": 0.8324, "step": 7456 }, { "epoch": 1.21729725317334, "grad_norm": 1.5739309787750244, "learning_rate": 1.9790809997632076e-05, "loss": 0.6777, "step": 7457 }, { "epoch": 1.2174605118158444, "grad_norm": 1.692357063293457, "learning_rate": 1.9790745424763238e-05, "loss": 0.6101, "step": 7458 }, { "epoch": 1.2176237704583486, "grad_norm": 1.6537089347839355, "learning_rate": 1.9790680842035123e-05, "loss": 0.6955, "step": 7459 }, { "epoch": 1.217787029100853, "grad_norm": 1.7182763814926147, "learning_rate": 1.9790616249447795e-05, "loss": 0.6761, "step": 7460 }, { "epoch": 1.2179502877433575, "grad_norm": 2.062626838684082, "learning_rate": 1.979055164700132e-05, "loss": 0.7244, "step": 7461 }, { "epoch": 1.218113546385862, "grad_norm": 2.0550293922424316, "learning_rate": 1.9790487034695754e-05, "loss": 1.1498, "step": 7462 }, { "epoch": 1.218276805028366, "grad_norm": 1.920966625213623, "learning_rate": 1.9790422412531178e-05, "loss": 0.7786, "step": 7463 }, { "epoch": 1.2184400636708705, "grad_norm": 1.987795114517212, "learning_rate": 1.979035778050765e-05, "loss": 0.657, "step": 7464 }, { "epoch": 1.218603322313375, "grad_norm": 1.7071714401245117, "learning_rate": 1.979029313862523e-05, "loss": 0.657, "step": 7465 }, { "epoch": 1.2187665809558794, "grad_norm": 2.6193277835845947, "learning_rate": 1.979022848688399e-05, "loss": 0.9032, "step": 7466 }, { "epoch": 1.2189298395983839, "grad_norm": 1.8673352003097534, "learning_rate": 1.979016382528399e-05, "loss": 0.7123, "step": 7467 }, { "epoch": 1.219093098240888, "grad_norm": 1.7711907625198364, "learning_rate": 1.97900991538253e-05, "loss": 0.6199, "step": 7468 }, { "epoch": 1.2192563568833925, "grad_norm": 1.7074365615844727, "learning_rate": 1.9790034472507983e-05, "loss": 0.567, "step": 7469 }, { "epoch": 1.219419615525897, "grad_norm": 2.19486927986145, "learning_rate": 1.9789969781332102e-05, "loss": 0.682, "step": 7470 }, { "epoch": 1.2195828741684012, "grad_norm": 2.1546709537506104, "learning_rate": 1.9789905080297726e-05, "loss": 0.8092, "step": 7471 }, { "epoch": 1.2197461328109056, "grad_norm": 2.123701810836792, "learning_rate": 1.9789840369404917e-05, "loss": 0.6304, "step": 7472 }, { "epoch": 1.21990939145341, "grad_norm": 1.953593373298645, "learning_rate": 1.978977564865374e-05, "loss": 0.7079, "step": 7473 }, { "epoch": 1.2200726500959145, "grad_norm": 1.8833160400390625, "learning_rate": 1.9789710918044265e-05, "loss": 0.6876, "step": 7474 }, { "epoch": 1.220235908738419, "grad_norm": 1.8395326137542725, "learning_rate": 1.9789646177576548e-05, "loss": 0.7133, "step": 7475 }, { "epoch": 1.2203991673809231, "grad_norm": 1.5282162427902222, "learning_rate": 1.9789581427250665e-05, "loss": 0.5327, "step": 7476 }, { "epoch": 1.2205624260234276, "grad_norm": 1.685650110244751, "learning_rate": 1.9789516667066677e-05, "loss": 0.7168, "step": 7477 }, { "epoch": 1.220725684665932, "grad_norm": 1.9821810722351074, "learning_rate": 1.9789451897024646e-05, "loss": 0.7603, "step": 7478 }, { "epoch": 1.2208889433084364, "grad_norm": 2.0764896869659424, "learning_rate": 1.9789387117124638e-05, "loss": 0.7399, "step": 7479 }, { "epoch": 1.2210522019509407, "grad_norm": 1.5705757141113281, "learning_rate": 1.9789322327366722e-05, "loss": 0.6538, "step": 7480 }, { "epoch": 1.221215460593445, "grad_norm": 1.7783465385437012, "learning_rate": 1.9789257527750962e-05, "loss": 0.7886, "step": 7481 }, { "epoch": 1.2213787192359495, "grad_norm": 1.8706525564193726, "learning_rate": 1.978919271827742e-05, "loss": 0.8065, "step": 7482 }, { "epoch": 1.221541977878454, "grad_norm": 1.6382510662078857, "learning_rate": 1.9789127898946164e-05, "loss": 0.6214, "step": 7483 }, { "epoch": 1.2217052365209584, "grad_norm": 1.5115511417388916, "learning_rate": 1.9789063069757258e-05, "loss": 0.5285, "step": 7484 }, { "epoch": 1.2218684951634626, "grad_norm": 1.820772409439087, "learning_rate": 1.978899823071077e-05, "loss": 0.7185, "step": 7485 }, { "epoch": 1.222031753805967, "grad_norm": 2.119948387145996, "learning_rate": 1.9788933381806762e-05, "loss": 0.7139, "step": 7486 }, { "epoch": 1.2221950124484715, "grad_norm": 1.9118001461029053, "learning_rate": 1.97888685230453e-05, "loss": 0.733, "step": 7487 }, { "epoch": 1.222358271090976, "grad_norm": 1.7695504426956177, "learning_rate": 1.9788803654426455e-05, "loss": 0.6999, "step": 7488 }, { "epoch": 1.2225215297334802, "grad_norm": 1.9525483846664429, "learning_rate": 1.978873877595028e-05, "loss": 0.6499, "step": 7489 }, { "epoch": 1.2226847883759846, "grad_norm": 2.044201612472534, "learning_rate": 1.9788673887616852e-05, "loss": 0.7619, "step": 7490 }, { "epoch": 1.222848047018489, "grad_norm": 1.5164257287979126, "learning_rate": 1.9788608989426234e-05, "loss": 0.6218, "step": 7491 }, { "epoch": 1.2230113056609935, "grad_norm": 1.8486992120742798, "learning_rate": 1.9788544081378483e-05, "loss": 0.6926, "step": 7492 }, { "epoch": 1.223174564303498, "grad_norm": 2.0172231197357178, "learning_rate": 1.9788479163473674e-05, "loss": 0.7996, "step": 7493 }, { "epoch": 1.2233378229460021, "grad_norm": 1.7185602188110352, "learning_rate": 1.978841423571187e-05, "loss": 0.6985, "step": 7494 }, { "epoch": 1.2235010815885066, "grad_norm": 2.0612454414367676, "learning_rate": 1.9788349298093136e-05, "loss": 0.7728, "step": 7495 }, { "epoch": 1.223664340231011, "grad_norm": 1.6618387699127197, "learning_rate": 1.9788284350617536e-05, "loss": 0.577, "step": 7496 }, { "epoch": 1.2238275988735154, "grad_norm": 1.7117677927017212, "learning_rate": 1.9788219393285133e-05, "loss": 0.7177, "step": 7497 }, { "epoch": 1.2239908575160197, "grad_norm": 1.8012889623641968, "learning_rate": 1.9788154426096e-05, "loss": 0.7537, "step": 7498 }, { "epoch": 1.224154116158524, "grad_norm": 1.5860058069229126, "learning_rate": 1.9788089449050195e-05, "loss": 0.5982, "step": 7499 }, { "epoch": 1.2243173748010285, "grad_norm": 2.0279064178466797, "learning_rate": 1.978802446214779e-05, "loss": 0.7333, "step": 7500 }, { "epoch": 1.224480633443533, "grad_norm": 1.8839024305343628, "learning_rate": 1.9787959465388845e-05, "loss": 0.7533, "step": 7501 }, { "epoch": 1.2246438920860374, "grad_norm": 1.6589711904525757, "learning_rate": 1.9787894458773428e-05, "loss": 0.7363, "step": 7502 }, { "epoch": 1.2248071507285416, "grad_norm": 1.5441715717315674, "learning_rate": 1.9787829442301604e-05, "loss": 0.6431, "step": 7503 }, { "epoch": 1.224970409371046, "grad_norm": 1.8326237201690674, "learning_rate": 1.9787764415973437e-05, "loss": 0.6777, "step": 7504 }, { "epoch": 1.2251336680135505, "grad_norm": 1.619685173034668, "learning_rate": 1.9787699379788995e-05, "loss": 0.617, "step": 7505 }, { "epoch": 1.225296926656055, "grad_norm": 1.4395554065704346, "learning_rate": 1.9787634333748342e-05, "loss": 0.5516, "step": 7506 }, { "epoch": 1.2254601852985592, "grad_norm": 1.9953265190124512, "learning_rate": 1.9787569277851542e-05, "loss": 0.6325, "step": 7507 }, { "epoch": 1.2256234439410636, "grad_norm": 1.965248703956604, "learning_rate": 1.9787504212098664e-05, "loss": 0.7321, "step": 7508 }, { "epoch": 1.225786702583568, "grad_norm": 1.9028277397155762, "learning_rate": 1.978743913648977e-05, "loss": 0.8295, "step": 7509 }, { "epoch": 1.2259499612260725, "grad_norm": 1.8230926990509033, "learning_rate": 1.978737405102493e-05, "loss": 0.6857, "step": 7510 }, { "epoch": 1.226113219868577, "grad_norm": 1.6447460651397705, "learning_rate": 1.9787308955704206e-05, "loss": 0.6885, "step": 7511 }, { "epoch": 1.2262764785110811, "grad_norm": 1.6718796491622925, "learning_rate": 1.9787243850527663e-05, "loss": 0.7368, "step": 7512 }, { "epoch": 1.2264397371535856, "grad_norm": 2.4728660583496094, "learning_rate": 1.978717873549537e-05, "loss": 0.6239, "step": 7513 }, { "epoch": 1.22660299579609, "grad_norm": 1.6918766498565674, "learning_rate": 1.9787113610607394e-05, "loss": 0.5896, "step": 7514 }, { "epoch": 1.2267662544385944, "grad_norm": 1.5402162075042725, "learning_rate": 1.978704847586379e-05, "loss": 0.6032, "step": 7515 }, { "epoch": 1.2269295130810987, "grad_norm": 1.9853535890579224, "learning_rate": 1.9786983331264634e-05, "loss": 0.813, "step": 7516 }, { "epoch": 1.227092771723603, "grad_norm": 1.6917046308517456, "learning_rate": 1.978691817680999e-05, "loss": 0.6085, "step": 7517 }, { "epoch": 1.2272560303661075, "grad_norm": 1.9412026405334473, "learning_rate": 1.9786853012499923e-05, "loss": 0.7982, "step": 7518 }, { "epoch": 1.227419289008612, "grad_norm": 1.9164992570877075, "learning_rate": 1.9786787838334494e-05, "loss": 0.8812, "step": 7519 }, { "epoch": 1.2275825476511162, "grad_norm": 1.9447320699691772, "learning_rate": 1.9786722654313773e-05, "loss": 0.7351, "step": 7520 }, { "epoch": 1.2277458062936206, "grad_norm": 1.603618860244751, "learning_rate": 1.9786657460437824e-05, "loss": 0.5899, "step": 7521 }, { "epoch": 1.227909064936125, "grad_norm": 1.685020089149475, "learning_rate": 1.9786592256706717e-05, "loss": 0.6576, "step": 7522 }, { "epoch": 1.2280723235786295, "grad_norm": 1.6966872215270996, "learning_rate": 1.978652704312051e-05, "loss": 0.6758, "step": 7523 }, { "epoch": 1.2282355822211337, "grad_norm": 2.2998721599578857, "learning_rate": 1.9786461819679276e-05, "loss": 0.8076, "step": 7524 }, { "epoch": 1.2283988408636382, "grad_norm": 1.8802968263626099, "learning_rate": 1.9786396586383078e-05, "loss": 0.9473, "step": 7525 }, { "epoch": 1.2285620995061426, "grad_norm": 2.023282289505005, "learning_rate": 1.978633134323198e-05, "loss": 0.765, "step": 7526 }, { "epoch": 1.228725358148647, "grad_norm": 1.6884429454803467, "learning_rate": 1.978626609022605e-05, "loss": 0.6562, "step": 7527 }, { "epoch": 1.2288886167911515, "grad_norm": 1.9121553897857666, "learning_rate": 1.978620082736535e-05, "loss": 0.784, "step": 7528 }, { "epoch": 1.2290518754336557, "grad_norm": 1.818278431892395, "learning_rate": 1.9786135554649946e-05, "loss": 0.7256, "step": 7529 }, { "epoch": 1.2292151340761601, "grad_norm": 1.631551742553711, "learning_rate": 1.978607027207991e-05, "loss": 0.6141, "step": 7530 }, { "epoch": 1.2293783927186646, "grad_norm": 1.8147847652435303, "learning_rate": 1.9786004979655306e-05, "loss": 0.6736, "step": 7531 }, { "epoch": 1.229541651361169, "grad_norm": 1.8590754270553589, "learning_rate": 1.9785939677376195e-05, "loss": 0.6144, "step": 7532 }, { "epoch": 1.2297049100036732, "grad_norm": 1.6786022186279297, "learning_rate": 1.9785874365242645e-05, "loss": 0.757, "step": 7533 }, { "epoch": 1.2298681686461777, "grad_norm": 2.0117757320404053, "learning_rate": 1.978580904325472e-05, "loss": 0.7214, "step": 7534 }, { "epoch": 1.230031427288682, "grad_norm": 1.4406912326812744, "learning_rate": 1.9785743711412493e-05, "loss": 0.6053, "step": 7535 }, { "epoch": 1.2301946859311865, "grad_norm": 1.8663067817687988, "learning_rate": 1.9785678369716023e-05, "loss": 0.7099, "step": 7536 }, { "epoch": 1.230357944573691, "grad_norm": 1.7292609214782715, "learning_rate": 1.978561301816538e-05, "loss": 0.6191, "step": 7537 }, { "epoch": 1.2305212032161952, "grad_norm": 1.6086658239364624, "learning_rate": 1.978554765676062e-05, "loss": 0.6372, "step": 7538 }, { "epoch": 1.2306844618586996, "grad_norm": 2.157968521118164, "learning_rate": 1.978548228550182e-05, "loss": 0.857, "step": 7539 }, { "epoch": 1.230847720501204, "grad_norm": 1.5795437097549438, "learning_rate": 1.9785416904389044e-05, "loss": 0.661, "step": 7540 }, { "epoch": 1.2310109791437085, "grad_norm": 1.3310855627059937, "learning_rate": 1.9785351513422354e-05, "loss": 0.5867, "step": 7541 }, { "epoch": 1.2311742377862127, "grad_norm": 1.9253145456314087, "learning_rate": 1.9785286112601816e-05, "loss": 0.7932, "step": 7542 }, { "epoch": 1.2313374964287171, "grad_norm": 1.7828696966171265, "learning_rate": 1.97852207019275e-05, "loss": 0.7077, "step": 7543 }, { "epoch": 1.2315007550712216, "grad_norm": 1.7212601900100708, "learning_rate": 1.9785155281399465e-05, "loss": 0.6967, "step": 7544 }, { "epoch": 1.231664013713726, "grad_norm": 1.7045612335205078, "learning_rate": 1.9785089851017788e-05, "loss": 0.7531, "step": 7545 }, { "epoch": 1.2318272723562305, "grad_norm": 1.7298681735992432, "learning_rate": 1.978502441078252e-05, "loss": 0.6396, "step": 7546 }, { "epoch": 1.2319905309987347, "grad_norm": 1.7949471473693848, "learning_rate": 1.978495896069374e-05, "loss": 0.785, "step": 7547 }, { "epoch": 1.2321537896412391, "grad_norm": 1.9116441011428833, "learning_rate": 1.978489350075151e-05, "loss": 0.7124, "step": 7548 }, { "epoch": 1.2323170482837436, "grad_norm": 2.066927909851074, "learning_rate": 1.9784828030955894e-05, "loss": 0.8599, "step": 7549 }, { "epoch": 1.232480306926248, "grad_norm": 2.042468309402466, "learning_rate": 1.978476255130696e-05, "loss": 0.8423, "step": 7550 }, { "epoch": 1.2326435655687522, "grad_norm": 1.9192731380462646, "learning_rate": 1.978469706180477e-05, "loss": 0.8324, "step": 7551 }, { "epoch": 1.2328068242112566, "grad_norm": 1.5339199304580688, "learning_rate": 1.978463156244939e-05, "loss": 0.6332, "step": 7552 }, { "epoch": 1.232970082853761, "grad_norm": 1.9158146381378174, "learning_rate": 1.9784566053240893e-05, "loss": 0.7756, "step": 7553 }, { "epoch": 1.2331333414962655, "grad_norm": 1.5751270055770874, "learning_rate": 1.978450053417934e-05, "loss": 0.6795, "step": 7554 }, { "epoch": 1.23329660013877, "grad_norm": 1.721064567565918, "learning_rate": 1.97844350052648e-05, "loss": 0.8753, "step": 7555 }, { "epoch": 1.2334598587812742, "grad_norm": 1.827374815940857, "learning_rate": 1.9784369466497333e-05, "loss": 0.6286, "step": 7556 }, { "epoch": 1.2336231174237786, "grad_norm": 1.7738226652145386, "learning_rate": 1.978430391787701e-05, "loss": 0.7312, "step": 7557 }, { "epoch": 1.233786376066283, "grad_norm": 1.9537365436553955, "learning_rate": 1.9784238359403893e-05, "loss": 0.7627, "step": 7558 }, { "epoch": 1.2339496347087875, "grad_norm": 1.6540791988372803, "learning_rate": 1.9784172791078052e-05, "loss": 0.6382, "step": 7559 }, { "epoch": 1.2341128933512917, "grad_norm": 1.8772302865982056, "learning_rate": 1.978410721289955e-05, "loss": 0.7602, "step": 7560 }, { "epoch": 1.2342761519937961, "grad_norm": 1.7176569700241089, "learning_rate": 1.9784041624868458e-05, "loss": 0.6428, "step": 7561 }, { "epoch": 1.2344394106363006, "grad_norm": 1.8083763122558594, "learning_rate": 1.978397602698484e-05, "loss": 0.6789, "step": 7562 }, { "epoch": 1.234602669278805, "grad_norm": 1.871168851852417, "learning_rate": 1.9783910419248755e-05, "loss": 0.7171, "step": 7563 }, { "epoch": 1.2347659279213095, "grad_norm": 1.5781874656677246, "learning_rate": 1.9783844801660278e-05, "loss": 0.548, "step": 7564 }, { "epoch": 1.2349291865638137, "grad_norm": 1.7105425596237183, "learning_rate": 1.9783779174219472e-05, "loss": 0.6713, "step": 7565 }, { "epoch": 1.2350924452063181, "grad_norm": 1.7158540487289429, "learning_rate": 1.9783713536926403e-05, "loss": 0.7682, "step": 7566 }, { "epoch": 1.2352557038488226, "grad_norm": 1.7054301500320435, "learning_rate": 1.9783647889781138e-05, "loss": 0.8025, "step": 7567 }, { "epoch": 1.2354189624913268, "grad_norm": 2.0687429904937744, "learning_rate": 1.978358223278374e-05, "loss": 0.7486, "step": 7568 }, { "epoch": 1.2355822211338312, "grad_norm": 1.774264931678772, "learning_rate": 1.9783516565934278e-05, "loss": 0.7578, "step": 7569 }, { "epoch": 1.2357454797763356, "grad_norm": 2.1112060546875, "learning_rate": 1.9783450889232818e-05, "loss": 1.2941, "step": 7570 }, { "epoch": 1.23590873841884, "grad_norm": 2.18243670463562, "learning_rate": 1.9783385202679426e-05, "loss": 0.9275, "step": 7571 }, { "epoch": 1.2360719970613445, "grad_norm": 1.6945784091949463, "learning_rate": 1.9783319506274167e-05, "loss": 0.6898, "step": 7572 }, { "epoch": 1.2362352557038487, "grad_norm": 2.231377363204956, "learning_rate": 1.9783253800017105e-05, "loss": 0.8277, "step": 7573 }, { "epoch": 1.2363985143463532, "grad_norm": 1.7557791471481323, "learning_rate": 1.9783188083908315e-05, "loss": 0.6462, "step": 7574 }, { "epoch": 1.2365617729888576, "grad_norm": 1.9476746320724487, "learning_rate": 1.9783122357947854e-05, "loss": 0.7874, "step": 7575 }, { "epoch": 1.236725031631362, "grad_norm": 1.7728602886199951, "learning_rate": 1.978305662213579e-05, "loss": 0.7225, "step": 7576 }, { "epoch": 1.2368882902738663, "grad_norm": 1.6741236448287964, "learning_rate": 1.9782990876472193e-05, "loss": 0.6549, "step": 7577 }, { "epoch": 1.2370515489163707, "grad_norm": 1.8071931600570679, "learning_rate": 1.9782925120957123e-05, "loss": 0.6933, "step": 7578 }, { "epoch": 1.2372148075588751, "grad_norm": 1.6948342323303223, "learning_rate": 1.9782859355590656e-05, "loss": 0.6599, "step": 7579 }, { "epoch": 1.2373780662013796, "grad_norm": 1.8294676542282104, "learning_rate": 1.9782793580372848e-05, "loss": 0.7019, "step": 7580 }, { "epoch": 1.237541324843884, "grad_norm": 1.938857078552246, "learning_rate": 1.9782727795303768e-05, "loss": 0.7807, "step": 7581 }, { "epoch": 1.2377045834863882, "grad_norm": 1.955514669418335, "learning_rate": 1.9782662000383488e-05, "loss": 0.7311, "step": 7582 }, { "epoch": 1.2378678421288927, "grad_norm": 1.8646504878997803, "learning_rate": 1.978259619561207e-05, "loss": 0.787, "step": 7583 }, { "epoch": 1.238031100771397, "grad_norm": 1.7462517023086548, "learning_rate": 1.9782530380989576e-05, "loss": 0.7283, "step": 7584 }, { "epoch": 1.2381943594139015, "grad_norm": 1.7047559022903442, "learning_rate": 1.978246455651608e-05, "loss": 0.6147, "step": 7585 }, { "epoch": 1.2383576180564058, "grad_norm": 2.0425400733947754, "learning_rate": 1.978239872219164e-05, "loss": 0.7465, "step": 7586 }, { "epoch": 1.2385208766989102, "grad_norm": 1.8751040697097778, "learning_rate": 1.978233287801633e-05, "loss": 0.772, "step": 7587 }, { "epoch": 1.2386841353414146, "grad_norm": 1.9482773542404175, "learning_rate": 1.9782267023990214e-05, "loss": 0.7237, "step": 7588 }, { "epoch": 1.238847393983919, "grad_norm": 1.8579981327056885, "learning_rate": 1.9782201160113362e-05, "loss": 0.7704, "step": 7589 }, { "epoch": 1.2390106526264235, "grad_norm": 1.8677568435668945, "learning_rate": 1.978213528638583e-05, "loss": 0.714, "step": 7590 }, { "epoch": 1.2391739112689277, "grad_norm": 1.916264295578003, "learning_rate": 1.978206940280769e-05, "loss": 0.6573, "step": 7591 }, { "epoch": 1.2393371699114322, "grad_norm": 1.900738000869751, "learning_rate": 1.9782003509379014e-05, "loss": 0.9007, "step": 7592 }, { "epoch": 1.2395004285539366, "grad_norm": 1.8706860542297363, "learning_rate": 1.978193760609986e-05, "loss": 0.9908, "step": 7593 }, { "epoch": 1.239663687196441, "grad_norm": 1.7971549034118652, "learning_rate": 1.9781871692970297e-05, "loss": 0.7879, "step": 7594 }, { "epoch": 1.2398269458389453, "grad_norm": 1.6671955585479736, "learning_rate": 1.9781805769990393e-05, "loss": 0.6745, "step": 7595 }, { "epoch": 1.2399902044814497, "grad_norm": 1.849905014038086, "learning_rate": 1.9781739837160213e-05, "loss": 0.7276, "step": 7596 }, { "epoch": 1.2401534631239541, "grad_norm": 1.7919561862945557, "learning_rate": 1.978167389447982e-05, "loss": 0.7663, "step": 7597 }, { "epoch": 1.2403167217664586, "grad_norm": 1.9800949096679688, "learning_rate": 1.9781607941949287e-05, "loss": 0.848, "step": 7598 }, { "epoch": 1.240479980408963, "grad_norm": 1.8648529052734375, "learning_rate": 1.978154197956868e-05, "loss": 0.6196, "step": 7599 }, { "epoch": 1.2406432390514672, "grad_norm": 1.9672514200210571, "learning_rate": 1.9781476007338058e-05, "loss": 0.7438, "step": 7600 }, { "epoch": 1.2408064976939717, "grad_norm": 1.766627311706543, "learning_rate": 1.9781410025257493e-05, "loss": 0.6668, "step": 7601 }, { "epoch": 1.240969756336476, "grad_norm": 1.8047432899475098, "learning_rate": 1.9781344033327054e-05, "loss": 0.679, "step": 7602 }, { "epoch": 1.2411330149789805, "grad_norm": 1.577041506767273, "learning_rate": 1.97812780315468e-05, "loss": 0.7485, "step": 7603 }, { "epoch": 1.2412962736214848, "grad_norm": 1.663478136062622, "learning_rate": 1.9781212019916807e-05, "loss": 0.7359, "step": 7604 }, { "epoch": 1.2414595322639892, "grad_norm": 2.0024514198303223, "learning_rate": 1.978114599843713e-05, "loss": 0.8007, "step": 7605 }, { "epoch": 1.2416227909064936, "grad_norm": 1.5555124282836914, "learning_rate": 1.9781079967107845e-05, "loss": 0.7269, "step": 7606 }, { "epoch": 1.241786049548998, "grad_norm": 1.819848656654358, "learning_rate": 1.9781013925929016e-05, "loss": 0.7114, "step": 7607 }, { "epoch": 1.2419493081915025, "grad_norm": 1.5645266771316528, "learning_rate": 1.9780947874900708e-05, "loss": 0.6312, "step": 7608 }, { "epoch": 1.2421125668340067, "grad_norm": 2.1099443435668945, "learning_rate": 1.9780881814022986e-05, "loss": 0.8755, "step": 7609 }, { "epoch": 1.2422758254765112, "grad_norm": 1.6440852880477905, "learning_rate": 1.978081574329592e-05, "loss": 0.6424, "step": 7610 }, { "epoch": 1.2424390841190156, "grad_norm": 1.9759521484375, "learning_rate": 1.9780749662719573e-05, "loss": 0.7619, "step": 7611 }, { "epoch": 1.2426023427615198, "grad_norm": 1.7985447645187378, "learning_rate": 1.9780683572294018e-05, "loss": 0.6113, "step": 7612 }, { "epoch": 1.2427656014040243, "grad_norm": 1.6091721057891846, "learning_rate": 1.9780617472019313e-05, "loss": 0.7038, "step": 7613 }, { "epoch": 1.2429288600465287, "grad_norm": 1.8612099885940552, "learning_rate": 1.978055136189553e-05, "loss": 0.7097, "step": 7614 }, { "epoch": 1.2430921186890331, "grad_norm": 1.870557188987732, "learning_rate": 1.9780485241922734e-05, "loss": 0.7068, "step": 7615 }, { "epoch": 1.2432553773315376, "grad_norm": 1.7763323783874512, "learning_rate": 1.9780419112100995e-05, "loss": 0.7164, "step": 7616 }, { "epoch": 1.2434186359740418, "grad_norm": 1.7473191022872925, "learning_rate": 1.9780352972430375e-05, "loss": 0.6909, "step": 7617 }, { "epoch": 1.2435818946165462, "grad_norm": 1.9092015027999878, "learning_rate": 1.9780286822910942e-05, "loss": 0.7368, "step": 7618 }, { "epoch": 1.2437451532590507, "grad_norm": 1.9576643705368042, "learning_rate": 1.9780220663542764e-05, "loss": 0.6332, "step": 7619 }, { "epoch": 1.243908411901555, "grad_norm": 1.6331299543380737, "learning_rate": 1.9780154494325902e-05, "loss": 0.6185, "step": 7620 }, { "epoch": 1.2440716705440593, "grad_norm": 1.696237325668335, "learning_rate": 1.978008831526043e-05, "loss": 0.6288, "step": 7621 }, { "epoch": 1.2442349291865638, "grad_norm": 1.8367983102798462, "learning_rate": 1.9780022126346413e-05, "loss": 0.7056, "step": 7622 }, { "epoch": 1.2443981878290682, "grad_norm": 1.749759554862976, "learning_rate": 1.9779955927583914e-05, "loss": 0.773, "step": 7623 }, { "epoch": 1.2445614464715726, "grad_norm": 1.8033175468444824, "learning_rate": 1.9779889718973004e-05, "loss": 0.798, "step": 7624 }, { "epoch": 1.244724705114077, "grad_norm": 1.7719976902008057, "learning_rate": 1.9779823500513747e-05, "loss": 0.5895, "step": 7625 }, { "epoch": 1.2448879637565813, "grad_norm": 1.870887041091919, "learning_rate": 1.9779757272206207e-05, "loss": 0.724, "step": 7626 }, { "epoch": 1.2450512223990857, "grad_norm": 1.4038174152374268, "learning_rate": 1.977969103405046e-05, "loss": 0.5393, "step": 7627 }, { "epoch": 1.2452144810415902, "grad_norm": 1.7500932216644287, "learning_rate": 1.977962478604656e-05, "loss": 0.6522, "step": 7628 }, { "epoch": 1.2453777396840946, "grad_norm": 2.207958459854126, "learning_rate": 1.9779558528194585e-05, "loss": 0.6067, "step": 7629 }, { "epoch": 1.2455409983265988, "grad_norm": 1.92695152759552, "learning_rate": 1.9779492260494596e-05, "loss": 0.6675, "step": 7630 }, { "epoch": 1.2457042569691033, "grad_norm": 1.93916916847229, "learning_rate": 1.9779425982946662e-05, "loss": 0.6948, "step": 7631 }, { "epoch": 1.2458675156116077, "grad_norm": 1.7454262971878052, "learning_rate": 1.977935969555085e-05, "loss": 0.7128, "step": 7632 }, { "epoch": 1.2460307742541121, "grad_norm": 1.8135579824447632, "learning_rate": 1.977929339830722e-05, "loss": 0.7945, "step": 7633 }, { "epoch": 1.2461940328966166, "grad_norm": 1.382270097732544, "learning_rate": 1.9779227091215846e-05, "loss": 0.6059, "step": 7634 }, { "epoch": 1.2463572915391208, "grad_norm": 2.013002634048462, "learning_rate": 1.9779160774276794e-05, "loss": 0.7905, "step": 7635 }, { "epoch": 1.2465205501816252, "grad_norm": 1.7730835676193237, "learning_rate": 1.977909444749013e-05, "loss": 0.6421, "step": 7636 }, { "epoch": 1.2466838088241297, "grad_norm": 1.6572157144546509, "learning_rate": 1.977902811085592e-05, "loss": 0.7893, "step": 7637 }, { "epoch": 1.246847067466634, "grad_norm": 1.3499289751052856, "learning_rate": 1.9778961764374232e-05, "loss": 0.5738, "step": 7638 }, { "epoch": 1.2470103261091383, "grad_norm": 1.7579472064971924, "learning_rate": 1.977889540804513e-05, "loss": 0.6667, "step": 7639 }, { "epoch": 1.2471735847516427, "grad_norm": 1.6760621070861816, "learning_rate": 1.9778829041868687e-05, "loss": 0.6109, "step": 7640 }, { "epoch": 1.2473368433941472, "grad_norm": 1.7900716066360474, "learning_rate": 1.977876266584496e-05, "loss": 0.6302, "step": 7641 }, { "epoch": 1.2475001020366516, "grad_norm": 1.535232663154602, "learning_rate": 1.977869627997403e-05, "loss": 0.6592, "step": 7642 }, { "epoch": 1.247663360679156, "grad_norm": 2.1644678115844727, "learning_rate": 1.977862988425595e-05, "loss": 0.7755, "step": 7643 }, { "epoch": 1.2478266193216603, "grad_norm": 2.142556667327881, "learning_rate": 1.977856347869079e-05, "loss": 0.7794, "step": 7644 }, { "epoch": 1.2479898779641647, "grad_norm": 1.6852294206619263, "learning_rate": 1.9778497063278622e-05, "loss": 0.7434, "step": 7645 }, { "epoch": 1.2481531366066692, "grad_norm": 2.489095687866211, "learning_rate": 1.977843063801951e-05, "loss": 1.0666, "step": 7646 }, { "epoch": 1.2483163952491736, "grad_norm": 1.9005029201507568, "learning_rate": 1.977836420291352e-05, "loss": 0.7712, "step": 7647 }, { "epoch": 1.2484796538916778, "grad_norm": 1.8809047937393188, "learning_rate": 1.9778297757960723e-05, "loss": 0.6823, "step": 7648 }, { "epoch": 1.2486429125341822, "grad_norm": 2.559624433517456, "learning_rate": 1.977823130316118e-05, "loss": 0.861, "step": 7649 }, { "epoch": 1.2488061711766867, "grad_norm": 2.0007712841033936, "learning_rate": 1.977816483851496e-05, "loss": 0.8188, "step": 7650 }, { "epoch": 1.2489694298191911, "grad_norm": 1.69324791431427, "learning_rate": 1.977809836402213e-05, "loss": 0.6908, "step": 7651 }, { "epoch": 1.2491326884616956, "grad_norm": 2.074950695037842, "learning_rate": 1.977803187968276e-05, "loss": 0.8386, "step": 7652 }, { "epoch": 1.2492959471041998, "grad_norm": 1.6235787868499756, "learning_rate": 1.9777965385496912e-05, "loss": 0.6974, "step": 7653 }, { "epoch": 1.2494592057467042, "grad_norm": 1.6162400245666504, "learning_rate": 1.9777898881464657e-05, "loss": 0.6999, "step": 7654 }, { "epoch": 1.2496224643892087, "grad_norm": 1.6110872030258179, "learning_rate": 1.977783236758606e-05, "loss": 0.6352, "step": 7655 }, { "epoch": 1.2497857230317129, "grad_norm": 2.1014626026153564, "learning_rate": 1.977776584386119e-05, "loss": 0.8632, "step": 7656 }, { "epoch": 1.2499489816742173, "grad_norm": 1.6196662187576294, "learning_rate": 1.9777699310290113e-05, "loss": 0.7205, "step": 7657 }, { "epoch": 1.2501122403167217, "grad_norm": 1.626107931137085, "learning_rate": 1.9777632766872893e-05, "loss": 0.6002, "step": 7658 }, { "epoch": 1.2502754989592262, "grad_norm": 1.876182198524475, "learning_rate": 1.97775662136096e-05, "loss": 0.7505, "step": 7659 }, { "epoch": 1.2504387576017306, "grad_norm": 1.9309813976287842, "learning_rate": 1.9777499650500303e-05, "loss": 0.6913, "step": 7660 }, { "epoch": 1.250602016244235, "grad_norm": 1.8079001903533936, "learning_rate": 1.977743307754506e-05, "loss": 0.7033, "step": 7661 }, { "epoch": 1.2507652748867393, "grad_norm": 1.7961138486862183, "learning_rate": 1.977736649474395e-05, "loss": 0.8187, "step": 7662 }, { "epoch": 1.2509285335292437, "grad_norm": 1.737817645072937, "learning_rate": 1.9777299902097033e-05, "loss": 0.7598, "step": 7663 }, { "epoch": 1.2510917921717482, "grad_norm": 1.5884069204330444, "learning_rate": 1.977723329960438e-05, "loss": 0.6067, "step": 7664 }, { "epoch": 1.2512550508142524, "grad_norm": 1.7477344274520874, "learning_rate": 1.9777166687266055e-05, "loss": 0.6746, "step": 7665 }, { "epoch": 1.2514183094567568, "grad_norm": 1.7770012617111206, "learning_rate": 1.977710006508212e-05, "loss": 0.6714, "step": 7666 }, { "epoch": 1.2515815680992612, "grad_norm": 1.717362403869629, "learning_rate": 1.9777033433052653e-05, "loss": 0.6306, "step": 7667 }, { "epoch": 1.2517448267417657, "grad_norm": 2.1102142333984375, "learning_rate": 1.9776966791177715e-05, "loss": 0.8492, "step": 7668 }, { "epoch": 1.2519080853842701, "grad_norm": 1.5703054666519165, "learning_rate": 1.9776900139457375e-05, "loss": 0.6426, "step": 7669 }, { "epoch": 1.2520713440267743, "grad_norm": 1.5975521802902222, "learning_rate": 1.9776833477891696e-05, "loss": 0.5865, "step": 7670 }, { "epoch": 1.2522346026692788, "grad_norm": 1.959734559059143, "learning_rate": 1.977676680648075e-05, "loss": 0.7569, "step": 7671 }, { "epoch": 1.2523978613117832, "grad_norm": 1.758931279182434, "learning_rate": 1.9776700125224605e-05, "loss": 0.7334, "step": 7672 }, { "epoch": 1.2525611199542876, "grad_norm": 1.7981336116790771, "learning_rate": 1.977663343412332e-05, "loss": 0.7665, "step": 7673 }, { "epoch": 1.2527243785967919, "grad_norm": 2.122328281402588, "learning_rate": 1.9776566733176974e-05, "loss": 0.8494, "step": 7674 }, { "epoch": 1.2528876372392963, "grad_norm": 1.827448844909668, "learning_rate": 1.9776500022385623e-05, "loss": 0.7345, "step": 7675 }, { "epoch": 1.2530508958818007, "grad_norm": 1.9876526594161987, "learning_rate": 1.9776433301749344e-05, "loss": 0.8205, "step": 7676 }, { "epoch": 1.2532141545243052, "grad_norm": 1.6124964952468872, "learning_rate": 1.9776366571268194e-05, "loss": 0.6994, "step": 7677 }, { "epoch": 1.2533774131668096, "grad_norm": 2.0584192276000977, "learning_rate": 1.9776299830942248e-05, "loss": 0.8039, "step": 7678 }, { "epoch": 1.2535406718093138, "grad_norm": 1.963331699371338, "learning_rate": 1.977623308077157e-05, "loss": 0.6681, "step": 7679 }, { "epoch": 1.2537039304518183, "grad_norm": 1.6164655685424805, "learning_rate": 1.9776166320756228e-05, "loss": 0.6163, "step": 7680 }, { "epoch": 1.2538671890943227, "grad_norm": 2.323378801345825, "learning_rate": 1.977609955089629e-05, "loss": 0.6766, "step": 7681 }, { "epoch": 1.2540304477368271, "grad_norm": 1.8354038000106812, "learning_rate": 1.977603277119182e-05, "loss": 0.7172, "step": 7682 }, { "epoch": 1.2541937063793314, "grad_norm": 2.1104421615600586, "learning_rate": 1.977596598164289e-05, "loss": 0.8365, "step": 7683 }, { "epoch": 1.2543569650218358, "grad_norm": 1.6812859773635864, "learning_rate": 1.9775899182249563e-05, "loss": 0.6215, "step": 7684 }, { "epoch": 1.2545202236643402, "grad_norm": 1.4956614971160889, "learning_rate": 1.977583237301191e-05, "loss": 0.6295, "step": 7685 }, { "epoch": 1.2546834823068447, "grad_norm": 1.9258769750595093, "learning_rate": 1.9775765553929995e-05, "loss": 0.7462, "step": 7686 }, { "epoch": 1.2548467409493491, "grad_norm": 2.1310577392578125, "learning_rate": 1.9775698725003888e-05, "loss": 0.888, "step": 7687 }, { "epoch": 1.2550099995918533, "grad_norm": 1.5908682346343994, "learning_rate": 1.9775631886233655e-05, "loss": 0.6188, "step": 7688 }, { "epoch": 1.2551732582343578, "grad_norm": 1.9881330728530884, "learning_rate": 1.977556503761936e-05, "loss": 0.7757, "step": 7689 }, { "epoch": 1.2553365168768622, "grad_norm": 1.6291533708572388, "learning_rate": 1.9775498179161077e-05, "loss": 0.6383, "step": 7690 }, { "epoch": 1.2554997755193664, "grad_norm": 1.8524706363677979, "learning_rate": 1.977543131085887e-05, "loss": 0.6524, "step": 7691 }, { "epoch": 1.2556630341618709, "grad_norm": 1.8678884506225586, "learning_rate": 1.9775364432712804e-05, "loss": 0.6793, "step": 7692 }, { "epoch": 1.2558262928043753, "grad_norm": 1.7547338008880615, "learning_rate": 1.977529754472295e-05, "loss": 0.7139, "step": 7693 }, { "epoch": 1.2559895514468797, "grad_norm": 1.5473077297210693, "learning_rate": 1.9775230646889374e-05, "loss": 0.6014, "step": 7694 }, { "epoch": 1.2561528100893842, "grad_norm": 1.750540018081665, "learning_rate": 1.9775163739212143e-05, "loss": 0.6479, "step": 7695 }, { "epoch": 1.2563160687318886, "grad_norm": 1.7480374574661255, "learning_rate": 1.9775096821691323e-05, "loss": 0.651, "step": 7696 }, { "epoch": 1.2564793273743928, "grad_norm": 1.787429928779602, "learning_rate": 1.9775029894326987e-05, "loss": 0.7655, "step": 7697 }, { "epoch": 1.2566425860168973, "grad_norm": 1.5788383483886719, "learning_rate": 1.9774962957119196e-05, "loss": 0.8434, "step": 7698 }, { "epoch": 1.2568058446594017, "grad_norm": 1.7003456354141235, "learning_rate": 1.9774896010068022e-05, "loss": 0.6534, "step": 7699 }, { "epoch": 1.256969103301906, "grad_norm": 1.9412543773651123, "learning_rate": 1.977482905317353e-05, "loss": 0.8285, "step": 7700 }, { "epoch": 1.2571323619444104, "grad_norm": 1.6494460105895996, "learning_rate": 1.9774762086435784e-05, "loss": 0.6975, "step": 7701 }, { "epoch": 1.2572956205869148, "grad_norm": 1.571478009223938, "learning_rate": 1.977469510985486e-05, "loss": 0.6001, "step": 7702 }, { "epoch": 1.2574588792294192, "grad_norm": 1.7906489372253418, "learning_rate": 1.9774628123430815e-05, "loss": 0.7018, "step": 7703 }, { "epoch": 1.2576221378719237, "grad_norm": 1.9880855083465576, "learning_rate": 1.9774561127163723e-05, "loss": 0.6201, "step": 7704 }, { "epoch": 1.2577853965144281, "grad_norm": 1.9657167196273804, "learning_rate": 1.9774494121053655e-05, "loss": 0.7326, "step": 7705 }, { "epoch": 1.2579486551569323, "grad_norm": 1.5162639617919922, "learning_rate": 1.9774427105100674e-05, "loss": 0.5814, "step": 7706 }, { "epoch": 1.2581119137994368, "grad_norm": 1.9122223854064941, "learning_rate": 1.9774360079304844e-05, "loss": 0.7358, "step": 7707 }, { "epoch": 1.2582751724419412, "grad_norm": 1.7837613821029663, "learning_rate": 1.9774293043666238e-05, "loss": 0.811, "step": 7708 }, { "epoch": 1.2584384310844454, "grad_norm": 1.8560820817947388, "learning_rate": 1.9774225998184923e-05, "loss": 0.7148, "step": 7709 }, { "epoch": 1.2586016897269499, "grad_norm": 1.8998948335647583, "learning_rate": 1.9774158942860962e-05, "loss": 0.6309, "step": 7710 }, { "epoch": 1.2587649483694543, "grad_norm": 1.9607232809066772, "learning_rate": 1.9774091877694425e-05, "loss": 0.6746, "step": 7711 }, { "epoch": 1.2589282070119587, "grad_norm": 1.683692455291748, "learning_rate": 1.977402480268538e-05, "loss": 0.5997, "step": 7712 }, { "epoch": 1.2590914656544632, "grad_norm": 1.9616204500198364, "learning_rate": 1.9773957717833897e-05, "loss": 0.8368, "step": 7713 }, { "epoch": 1.2592547242969676, "grad_norm": 1.7093327045440674, "learning_rate": 1.977389062314004e-05, "loss": 0.6858, "step": 7714 }, { "epoch": 1.2594179829394718, "grad_norm": 1.8307019472122192, "learning_rate": 1.9773823518603877e-05, "loss": 0.7845, "step": 7715 }, { "epoch": 1.2595812415819763, "grad_norm": 1.880604863166809, "learning_rate": 1.9773756404225478e-05, "loss": 0.6558, "step": 7716 }, { "epoch": 1.2597445002244807, "grad_norm": 1.7975512742996216, "learning_rate": 1.9773689280004908e-05, "loss": 0.6869, "step": 7717 }, { "epoch": 1.259907758866985, "grad_norm": 1.8914908170700073, "learning_rate": 1.977362214594224e-05, "loss": 0.7408, "step": 7718 }, { "epoch": 1.2600710175094894, "grad_norm": 2.219393014907837, "learning_rate": 1.977355500203753e-05, "loss": 0.7213, "step": 7719 }, { "epoch": 1.2602342761519938, "grad_norm": 1.5083045959472656, "learning_rate": 1.9773487848290856e-05, "loss": 0.6135, "step": 7720 }, { "epoch": 1.2603975347944982, "grad_norm": 1.557945728302002, "learning_rate": 1.977342068470228e-05, "loss": 0.5062, "step": 7721 }, { "epoch": 1.2605607934370027, "grad_norm": 1.8384490013122559, "learning_rate": 1.9773353511271876e-05, "loss": 0.6928, "step": 7722 }, { "epoch": 1.2607240520795069, "grad_norm": 1.7242950201034546, "learning_rate": 1.9773286327999703e-05, "loss": 0.675, "step": 7723 }, { "epoch": 1.2608873107220113, "grad_norm": 1.8644347190856934, "learning_rate": 1.9773219134885834e-05, "loss": 0.6784, "step": 7724 }, { "epoch": 1.2610505693645158, "grad_norm": 1.5499472618103027, "learning_rate": 1.977315193193034e-05, "loss": 0.5699, "step": 7725 }, { "epoch": 1.2612138280070202, "grad_norm": 1.7247308492660522, "learning_rate": 1.977308471913328e-05, "loss": 0.6725, "step": 7726 }, { "epoch": 1.2613770866495244, "grad_norm": 1.889413833618164, "learning_rate": 1.977301749649473e-05, "loss": 0.8358, "step": 7727 }, { "epoch": 1.2615403452920289, "grad_norm": 1.627528190612793, "learning_rate": 1.977295026401475e-05, "loss": 0.6034, "step": 7728 }, { "epoch": 1.2617036039345333, "grad_norm": 1.7325314283370972, "learning_rate": 1.9772883021693417e-05, "loss": 0.7557, "step": 7729 }, { "epoch": 1.2618668625770377, "grad_norm": 1.6630433797836304, "learning_rate": 1.977281576953079e-05, "loss": 0.667, "step": 7730 }, { "epoch": 1.2620301212195422, "grad_norm": 1.6435582637786865, "learning_rate": 1.9772748507526942e-05, "loss": 0.6536, "step": 7731 }, { "epoch": 1.2621933798620464, "grad_norm": 1.7910091876983643, "learning_rate": 1.9772681235681936e-05, "loss": 0.7353, "step": 7732 }, { "epoch": 1.2623566385045508, "grad_norm": 1.982434868812561, "learning_rate": 1.9772613953995844e-05, "loss": 0.7628, "step": 7733 }, { "epoch": 1.2625198971470553, "grad_norm": 1.97652268409729, "learning_rate": 1.9772546662468734e-05, "loss": 0.6555, "step": 7734 }, { "epoch": 1.2626831557895595, "grad_norm": 1.81621515750885, "learning_rate": 1.977247936110067e-05, "loss": 0.7882, "step": 7735 }, { "epoch": 1.262846414432064, "grad_norm": 1.7060892581939697, "learning_rate": 1.977241204989172e-05, "loss": 0.635, "step": 7736 }, { "epoch": 1.2630096730745684, "grad_norm": 1.807970643043518, "learning_rate": 1.977234472884196e-05, "loss": 0.7383, "step": 7737 }, { "epoch": 1.2631729317170728, "grad_norm": 1.8335809707641602, "learning_rate": 1.9772277397951445e-05, "loss": 0.7604, "step": 7738 }, { "epoch": 1.2633361903595772, "grad_norm": 1.91744065284729, "learning_rate": 1.977221005722025e-05, "loss": 0.7054, "step": 7739 }, { "epoch": 1.2634994490020817, "grad_norm": 1.6507023572921753, "learning_rate": 1.9772142706648446e-05, "loss": 0.744, "step": 7740 }, { "epoch": 1.2636627076445859, "grad_norm": 2.0793471336364746, "learning_rate": 1.9772075346236096e-05, "loss": 0.9387, "step": 7741 }, { "epoch": 1.2638259662870903, "grad_norm": 1.916503667831421, "learning_rate": 1.9772007975983263e-05, "loss": 0.781, "step": 7742 }, { "epoch": 1.2639892249295948, "grad_norm": 1.8557668924331665, "learning_rate": 1.9771940595890025e-05, "loss": 0.6865, "step": 7743 }, { "epoch": 1.264152483572099, "grad_norm": 1.9744746685028076, "learning_rate": 1.9771873205956446e-05, "loss": 1.1702, "step": 7744 }, { "epoch": 1.2643157422146034, "grad_norm": 1.8330345153808594, "learning_rate": 1.9771805806182594e-05, "loss": 0.7574, "step": 7745 }, { "epoch": 1.2644790008571078, "grad_norm": 1.5432018041610718, "learning_rate": 1.9771738396568537e-05, "loss": 0.5835, "step": 7746 }, { "epoch": 1.2646422594996123, "grad_norm": 1.8032466173171997, "learning_rate": 1.977167097711434e-05, "loss": 0.7626, "step": 7747 }, { "epoch": 1.2648055181421167, "grad_norm": 2.006655693054199, "learning_rate": 1.977160354782007e-05, "loss": 0.7264, "step": 7748 }, { "epoch": 1.2649687767846212, "grad_norm": 2.0377323627471924, "learning_rate": 1.97715361086858e-05, "loss": 0.8226, "step": 7749 }, { "epoch": 1.2651320354271254, "grad_norm": 1.6945147514343262, "learning_rate": 1.9771468659711595e-05, "loss": 0.5963, "step": 7750 }, { "epoch": 1.2652952940696298, "grad_norm": 1.6919796466827393, "learning_rate": 1.9771401200897527e-05, "loss": 0.7075, "step": 7751 }, { "epoch": 1.2654585527121343, "grad_norm": 1.703816533088684, "learning_rate": 1.9771333732243657e-05, "loss": 0.5682, "step": 7752 }, { "epoch": 1.2656218113546385, "grad_norm": 1.3699593544006348, "learning_rate": 1.977126625375006e-05, "loss": 0.5468, "step": 7753 }, { "epoch": 1.265785069997143, "grad_norm": 1.692429780960083, "learning_rate": 1.97711987654168e-05, "loss": 0.698, "step": 7754 }, { "epoch": 1.2659483286396473, "grad_norm": 2.1237411499023438, "learning_rate": 1.9771131267243942e-05, "loss": 0.7937, "step": 7755 }, { "epoch": 1.2661115872821518, "grad_norm": 1.583988904953003, "learning_rate": 1.977106375923156e-05, "loss": 0.6226, "step": 7756 }, { "epoch": 1.2662748459246562, "grad_norm": 1.7571914196014404, "learning_rate": 1.9770996241379718e-05, "loss": 0.7396, "step": 7757 }, { "epoch": 1.2664381045671607, "grad_norm": 1.635484218597412, "learning_rate": 1.9770928713688488e-05, "loss": 0.7162, "step": 7758 }, { "epoch": 1.2666013632096649, "grad_norm": 1.8203418254852295, "learning_rate": 1.977086117615793e-05, "loss": 0.6953, "step": 7759 }, { "epoch": 1.2667646218521693, "grad_norm": 1.6656782627105713, "learning_rate": 1.9770793628788123e-05, "loss": 0.6281, "step": 7760 }, { "epoch": 1.2669278804946738, "grad_norm": 2.008305788040161, "learning_rate": 1.9770726071579127e-05, "loss": 1.1971, "step": 7761 }, { "epoch": 1.267091139137178, "grad_norm": 1.805480718612671, "learning_rate": 1.977065850453101e-05, "loss": 1.2614, "step": 7762 }, { "epoch": 1.2672543977796824, "grad_norm": 2.207421064376831, "learning_rate": 1.9770590927643846e-05, "loss": 0.88, "step": 7763 }, { "epoch": 1.2674176564221868, "grad_norm": 1.8903216123580933, "learning_rate": 1.97705233409177e-05, "loss": 0.7529, "step": 7764 }, { "epoch": 1.2675809150646913, "grad_norm": 2.075789451599121, "learning_rate": 1.977045574435264e-05, "loss": 0.8089, "step": 7765 }, { "epoch": 1.2677441737071957, "grad_norm": 1.9584882259368896, "learning_rate": 1.9770388137948728e-05, "loss": 0.8219, "step": 7766 }, { "epoch": 1.2679074323497, "grad_norm": 1.5612034797668457, "learning_rate": 1.9770320521706045e-05, "loss": 0.5108, "step": 7767 }, { "epoch": 1.2680706909922044, "grad_norm": 1.9009487628936768, "learning_rate": 1.9770252895624647e-05, "loss": 0.6953, "step": 7768 }, { "epoch": 1.2682339496347088, "grad_norm": 1.9827795028686523, "learning_rate": 1.977018525970461e-05, "loss": 0.7968, "step": 7769 }, { "epoch": 1.2683972082772133, "grad_norm": 1.7919560670852661, "learning_rate": 1.9770117613945996e-05, "loss": 0.647, "step": 7770 }, { "epoch": 1.2685604669197175, "grad_norm": 1.7263643741607666, "learning_rate": 1.9770049958348875e-05, "loss": 0.6824, "step": 7771 }, { "epoch": 1.268723725562222, "grad_norm": 1.5347492694854736, "learning_rate": 1.976998229291332e-05, "loss": 0.5474, "step": 7772 }, { "epoch": 1.2688869842047263, "grad_norm": 2.062748432159424, "learning_rate": 1.9769914617639397e-05, "loss": 0.8293, "step": 7773 }, { "epoch": 1.2690502428472308, "grad_norm": 1.599362850189209, "learning_rate": 1.976984693252717e-05, "loss": 0.6123, "step": 7774 }, { "epoch": 1.2692135014897352, "grad_norm": 1.8141447305679321, "learning_rate": 1.9769779237576707e-05, "loss": 0.73, "step": 7775 }, { "epoch": 1.2693767601322394, "grad_norm": 1.947662353515625, "learning_rate": 1.9769711532788083e-05, "loss": 0.7038, "step": 7776 }, { "epoch": 1.2695400187747439, "grad_norm": 2.233693838119507, "learning_rate": 1.976964381816136e-05, "loss": 0.5868, "step": 7777 }, { "epoch": 1.2697032774172483, "grad_norm": 1.6217938661575317, "learning_rate": 1.976957609369661e-05, "loss": 0.5973, "step": 7778 }, { "epoch": 1.2698665360597525, "grad_norm": 1.966148853302002, "learning_rate": 1.9769508359393897e-05, "loss": 0.773, "step": 7779 }, { "epoch": 1.270029794702257, "grad_norm": 1.6554374694824219, "learning_rate": 1.9769440615253295e-05, "loss": 0.7291, "step": 7780 }, { "epoch": 1.2701930533447614, "grad_norm": 1.6029794216156006, "learning_rate": 1.9769372861274865e-05, "loss": 0.5961, "step": 7781 }, { "epoch": 1.2703563119872658, "grad_norm": 1.7516472339630127, "learning_rate": 1.9769305097458684e-05, "loss": 0.6267, "step": 7782 }, { "epoch": 1.2705195706297703, "grad_norm": 1.6798949241638184, "learning_rate": 1.976923732380481e-05, "loss": 0.5832, "step": 7783 }, { "epoch": 1.2706828292722747, "grad_norm": 2.195237874984741, "learning_rate": 1.976916954031332e-05, "loss": 0.5794, "step": 7784 }, { "epoch": 1.270846087914779, "grad_norm": 2.1128997802734375, "learning_rate": 1.9769101746984275e-05, "loss": 0.6734, "step": 7785 }, { "epoch": 1.2710093465572834, "grad_norm": 2.115288257598877, "learning_rate": 1.976903394381775e-05, "loss": 0.8272, "step": 7786 }, { "epoch": 1.2711726051997878, "grad_norm": 1.61924147605896, "learning_rate": 1.976896613081381e-05, "loss": 0.612, "step": 7787 }, { "epoch": 1.271335863842292, "grad_norm": 1.5435549020767212, "learning_rate": 1.9768898307972527e-05, "loss": 0.5269, "step": 7788 }, { "epoch": 1.2714991224847965, "grad_norm": 1.6413521766662598, "learning_rate": 1.976883047529396e-05, "loss": 0.6241, "step": 7789 }, { "epoch": 1.271662381127301, "grad_norm": 1.605469822883606, "learning_rate": 1.9768762632778188e-05, "loss": 0.6143, "step": 7790 }, { "epoch": 1.2718256397698053, "grad_norm": 1.7283122539520264, "learning_rate": 1.9768694780425274e-05, "loss": 0.7243, "step": 7791 }, { "epoch": 1.2719888984123098, "grad_norm": 1.983344316482544, "learning_rate": 1.9768626918235286e-05, "loss": 0.7424, "step": 7792 }, { "epoch": 1.2721521570548142, "grad_norm": 1.8348256349563599, "learning_rate": 1.976855904620829e-05, "loss": 0.7129, "step": 7793 }, { "epoch": 1.2723154156973184, "grad_norm": 1.776906132698059, "learning_rate": 1.9768491164344362e-05, "loss": 0.6994, "step": 7794 }, { "epoch": 1.2724786743398229, "grad_norm": 1.7686011791229248, "learning_rate": 1.9768423272643566e-05, "loss": 0.638, "step": 7795 }, { "epoch": 1.2726419329823273, "grad_norm": 1.5960432291030884, "learning_rate": 1.976835537110597e-05, "loss": 0.601, "step": 7796 }, { "epoch": 1.2728051916248315, "grad_norm": 1.6386282444000244, "learning_rate": 1.976828745973164e-05, "loss": 0.6379, "step": 7797 }, { "epoch": 1.272968450267336, "grad_norm": 1.861646294593811, "learning_rate": 1.976821953852065e-05, "loss": 0.6203, "step": 7798 }, { "epoch": 1.2731317089098404, "grad_norm": 1.7679351568222046, "learning_rate": 1.9768151607473064e-05, "loss": 0.7021, "step": 7799 }, { "epoch": 1.2732949675523448, "grad_norm": 1.7943164110183716, "learning_rate": 1.9768083666588954e-05, "loss": 0.6469, "step": 7800 }, { "epoch": 1.2734582261948493, "grad_norm": 1.6696391105651855, "learning_rate": 1.9768015715868386e-05, "loss": 0.6664, "step": 7801 }, { "epoch": 1.2736214848373537, "grad_norm": 1.4941529035568237, "learning_rate": 1.9767947755311425e-05, "loss": 0.6303, "step": 7802 }, { "epoch": 1.273784743479858, "grad_norm": 2.3338375091552734, "learning_rate": 1.9767879784918148e-05, "loss": 0.9233, "step": 7803 }, { "epoch": 1.2739480021223624, "grad_norm": 1.883003830909729, "learning_rate": 1.9767811804688616e-05, "loss": 0.6893, "step": 7804 }, { "epoch": 1.2741112607648668, "grad_norm": 1.6260789632797241, "learning_rate": 1.97677438146229e-05, "loss": 0.5862, "step": 7805 }, { "epoch": 1.274274519407371, "grad_norm": 1.9480656385421753, "learning_rate": 1.976767581472107e-05, "loss": 0.6986, "step": 7806 }, { "epoch": 1.2744377780498755, "grad_norm": 1.780089259147644, "learning_rate": 1.9767607804983192e-05, "loss": 0.5556, "step": 7807 }, { "epoch": 1.27460103669238, "grad_norm": 1.6911287307739258, "learning_rate": 1.9767539785409332e-05, "loss": 0.6794, "step": 7808 }, { "epoch": 1.2747642953348843, "grad_norm": 1.6296827793121338, "learning_rate": 1.976747175599957e-05, "loss": 0.5866, "step": 7809 }, { "epoch": 1.2749275539773888, "grad_norm": 1.5960873365402222, "learning_rate": 1.976740371675396e-05, "loss": 0.6731, "step": 7810 }, { "epoch": 1.275090812619893, "grad_norm": 1.8045580387115479, "learning_rate": 1.976733566767258e-05, "loss": 0.7185, "step": 7811 }, { "epoch": 1.2752540712623974, "grad_norm": 1.6713072061538696, "learning_rate": 1.9767267608755497e-05, "loss": 0.7675, "step": 7812 }, { "epoch": 1.2754173299049019, "grad_norm": 1.6557163000106812, "learning_rate": 1.9767199540002772e-05, "loss": 0.6629, "step": 7813 }, { "epoch": 1.2755805885474063, "grad_norm": 1.3956446647644043, "learning_rate": 1.9767131461414487e-05, "loss": 0.6063, "step": 7814 }, { "epoch": 1.2757438471899105, "grad_norm": 1.9779384136199951, "learning_rate": 1.97670633729907e-05, "loss": 0.7719, "step": 7815 }, { "epoch": 1.275907105832415, "grad_norm": 1.9786607027053833, "learning_rate": 1.976699527473148e-05, "loss": 0.827, "step": 7816 }, { "epoch": 1.2760703644749194, "grad_norm": 1.7793630361557007, "learning_rate": 1.9766927166636903e-05, "loss": 0.6342, "step": 7817 }, { "epoch": 1.2762336231174238, "grad_norm": 1.824869990348816, "learning_rate": 1.976685904870703e-05, "loss": 0.6645, "step": 7818 }, { "epoch": 1.2763968817599283, "grad_norm": 1.8695714473724365, "learning_rate": 1.9766790920941933e-05, "loss": 0.7893, "step": 7819 }, { "epoch": 1.2765601404024325, "grad_norm": 1.6905813217163086, "learning_rate": 1.9766722783341682e-05, "loss": 0.6688, "step": 7820 }, { "epoch": 1.276723399044937, "grad_norm": 1.794819951057434, "learning_rate": 1.9766654635906342e-05, "loss": 0.6051, "step": 7821 }, { "epoch": 1.2768866576874414, "grad_norm": 2.206143856048584, "learning_rate": 1.9766586478635984e-05, "loss": 0.627, "step": 7822 }, { "epoch": 1.2770499163299456, "grad_norm": 1.6245406866073608, "learning_rate": 1.9766518311530675e-05, "loss": 0.6305, "step": 7823 }, { "epoch": 1.27721317497245, "grad_norm": 1.692724347114563, "learning_rate": 1.9766450134590484e-05, "loss": 0.6676, "step": 7824 }, { "epoch": 1.2773764336149545, "grad_norm": 1.8956235647201538, "learning_rate": 1.9766381947815484e-05, "loss": 0.7416, "step": 7825 }, { "epoch": 1.277539692257459, "grad_norm": 1.786946415901184, "learning_rate": 1.9766313751205738e-05, "loss": 0.6977, "step": 7826 }, { "epoch": 1.2777029508999633, "grad_norm": 1.944962501525879, "learning_rate": 1.9766245544761316e-05, "loss": 0.8914, "step": 7827 }, { "epoch": 1.2778662095424678, "grad_norm": 2.5685558319091797, "learning_rate": 1.9766177328482285e-05, "loss": 1.2777, "step": 7828 }, { "epoch": 1.278029468184972, "grad_norm": 1.7290880680084229, "learning_rate": 1.9766109102368717e-05, "loss": 0.6887, "step": 7829 }, { "epoch": 1.2781927268274764, "grad_norm": 1.6054240465164185, "learning_rate": 1.9766040866420684e-05, "loss": 0.6621, "step": 7830 }, { "epoch": 1.2783559854699809, "grad_norm": 2.233534574508667, "learning_rate": 1.976597262063825e-05, "loss": 0.7421, "step": 7831 }, { "epoch": 1.278519244112485, "grad_norm": 2.093970537185669, "learning_rate": 1.976590436502148e-05, "loss": 0.6147, "step": 7832 }, { "epoch": 1.2786825027549895, "grad_norm": 1.656832218170166, "learning_rate": 1.976583609957045e-05, "loss": 0.6692, "step": 7833 }, { "epoch": 1.278845761397494, "grad_norm": 1.8849139213562012, "learning_rate": 1.9765767824285223e-05, "loss": 0.7561, "step": 7834 }, { "epoch": 1.2790090200399984, "grad_norm": 1.7517282962799072, "learning_rate": 1.9765699539165873e-05, "loss": 0.7164, "step": 7835 }, { "epoch": 1.2791722786825028, "grad_norm": 1.7697194814682007, "learning_rate": 1.9765631244212464e-05, "loss": 0.6427, "step": 7836 }, { "epoch": 1.2793355373250073, "grad_norm": 1.7115105390548706, "learning_rate": 1.9765562939425067e-05, "loss": 0.569, "step": 7837 }, { "epoch": 1.2794987959675115, "grad_norm": 1.9331912994384766, "learning_rate": 1.9765494624803753e-05, "loss": 0.5535, "step": 7838 }, { "epoch": 1.279662054610016, "grad_norm": 1.931525468826294, "learning_rate": 1.9765426300348586e-05, "loss": 0.8501, "step": 7839 }, { "epoch": 1.2798253132525204, "grad_norm": 1.9250407218933105, "learning_rate": 1.9765357966059638e-05, "loss": 0.7133, "step": 7840 }, { "epoch": 1.2799885718950246, "grad_norm": 1.63577401638031, "learning_rate": 1.976528962193698e-05, "loss": 0.7053, "step": 7841 }, { "epoch": 1.280151830537529, "grad_norm": 2.1839020252227783, "learning_rate": 1.9765221267980675e-05, "loss": 0.824, "step": 7842 }, { "epoch": 1.2803150891800334, "grad_norm": 2.152672052383423, "learning_rate": 1.9765152904190795e-05, "loss": 0.7613, "step": 7843 }, { "epoch": 1.2804783478225379, "grad_norm": 2.0152671337127686, "learning_rate": 1.9765084530567406e-05, "loss": 0.7149, "step": 7844 }, { "epoch": 1.2806416064650423, "grad_norm": 1.336719274520874, "learning_rate": 1.9765016147110583e-05, "loss": 0.5689, "step": 7845 }, { "epoch": 1.2808048651075468, "grad_norm": 1.9520338773727417, "learning_rate": 1.9764947753820393e-05, "loss": 0.8217, "step": 7846 }, { "epoch": 1.280968123750051, "grad_norm": 1.8902567625045776, "learning_rate": 1.97648793506969e-05, "loss": 0.6859, "step": 7847 }, { "epoch": 1.2811313823925554, "grad_norm": 1.731766700744629, "learning_rate": 1.976481093774018e-05, "loss": 0.6289, "step": 7848 }, { "epoch": 1.2812946410350599, "grad_norm": 1.7768175601959229, "learning_rate": 1.976474251495029e-05, "loss": 0.6862, "step": 7849 }, { "epoch": 1.281457899677564, "grad_norm": 1.7624109983444214, "learning_rate": 1.9764674082327313e-05, "loss": 0.6904, "step": 7850 }, { "epoch": 1.2816211583200685, "grad_norm": 1.9158507585525513, "learning_rate": 1.9764605639871312e-05, "loss": 0.7408, "step": 7851 }, { "epoch": 1.281784416962573, "grad_norm": 1.5964126586914062, "learning_rate": 1.9764537187582353e-05, "loss": 0.6257, "step": 7852 }, { "epoch": 1.2819476756050774, "grad_norm": 1.9964649677276611, "learning_rate": 1.976446872546051e-05, "loss": 0.8109, "step": 7853 }, { "epoch": 1.2821109342475818, "grad_norm": 2.534752368927002, "learning_rate": 1.9764400253505848e-05, "loss": 0.5853, "step": 7854 }, { "epoch": 1.282274192890086, "grad_norm": 2.2597615718841553, "learning_rate": 1.9764331771718438e-05, "loss": 0.7133, "step": 7855 }, { "epoch": 1.2824374515325905, "grad_norm": 1.4301605224609375, "learning_rate": 1.976426328009835e-05, "loss": 0.6284, "step": 7856 }, { "epoch": 1.282600710175095, "grad_norm": 1.9297692775726318, "learning_rate": 1.9764194778645648e-05, "loss": 0.6202, "step": 7857 }, { "epoch": 1.2827639688175994, "grad_norm": 1.8295924663543701, "learning_rate": 1.9764126267360407e-05, "loss": 0.7669, "step": 7858 }, { "epoch": 1.2829272274601036, "grad_norm": 1.9317713975906372, "learning_rate": 1.9764057746242693e-05, "loss": 0.7333, "step": 7859 }, { "epoch": 1.283090486102608, "grad_norm": 1.668118953704834, "learning_rate": 1.9763989215292576e-05, "loss": 0.5821, "step": 7860 }, { "epoch": 1.2832537447451124, "grad_norm": 2.004638910293579, "learning_rate": 1.9763920674510124e-05, "loss": 0.7335, "step": 7861 }, { "epoch": 1.2834170033876169, "grad_norm": 1.9131665229797363, "learning_rate": 1.9763852123895405e-05, "loss": 0.7544, "step": 7862 }, { "epoch": 1.2835802620301213, "grad_norm": 1.7500256299972534, "learning_rate": 1.9763783563448494e-05, "loss": 0.6237, "step": 7863 }, { "epoch": 1.2837435206726255, "grad_norm": 1.7953238487243652, "learning_rate": 1.976371499316945e-05, "loss": 0.6667, "step": 7864 }, { "epoch": 1.28390677931513, "grad_norm": 1.908550500869751, "learning_rate": 1.9763646413058352e-05, "loss": 0.7049, "step": 7865 }, { "epoch": 1.2840700379576344, "grad_norm": 1.4671032428741455, "learning_rate": 1.976357782311526e-05, "loss": 0.4892, "step": 7866 }, { "epoch": 1.2842332966001389, "grad_norm": 1.6461607217788696, "learning_rate": 1.9763509223340253e-05, "loss": 0.6934, "step": 7867 }, { "epoch": 1.284396555242643, "grad_norm": 1.7850788831710815, "learning_rate": 1.9763440613733393e-05, "loss": 0.6971, "step": 7868 }, { "epoch": 1.2845598138851475, "grad_norm": 1.7590973377227783, "learning_rate": 1.976337199429475e-05, "loss": 0.6887, "step": 7869 }, { "epoch": 1.284723072527652, "grad_norm": 1.560594081878662, "learning_rate": 1.9763303365024392e-05, "loss": 0.6946, "step": 7870 }, { "epoch": 1.2848863311701564, "grad_norm": 1.957930564880371, "learning_rate": 1.976323472592239e-05, "loss": 0.7353, "step": 7871 }, { "epoch": 1.2850495898126608, "grad_norm": 2.104898691177368, "learning_rate": 1.9763166076988818e-05, "loss": 0.8932, "step": 7872 }, { "epoch": 1.285212848455165, "grad_norm": 1.911778450012207, "learning_rate": 1.9763097418223736e-05, "loss": 0.7654, "step": 7873 }, { "epoch": 1.2853761070976695, "grad_norm": 2.049375057220459, "learning_rate": 1.976302874962722e-05, "loss": 0.7244, "step": 7874 }, { "epoch": 1.285539365740174, "grad_norm": 1.8312249183654785, "learning_rate": 1.9762960071199334e-05, "loss": 0.7212, "step": 7875 }, { "epoch": 1.2857026243826781, "grad_norm": 1.8828734159469604, "learning_rate": 1.9762891382940152e-05, "loss": 0.6881, "step": 7876 }, { "epoch": 1.2858658830251826, "grad_norm": 1.6369315385818481, "learning_rate": 1.9762822684849743e-05, "loss": 0.6802, "step": 7877 }, { "epoch": 1.286029141667687, "grad_norm": 1.9573595523834229, "learning_rate": 1.9762753976928172e-05, "loss": 0.8298, "step": 7878 }, { "epoch": 1.2861924003101914, "grad_norm": 1.815604329109192, "learning_rate": 1.9762685259175506e-05, "loss": 0.6406, "step": 7879 }, { "epoch": 1.2863556589526959, "grad_norm": 1.72400963306427, "learning_rate": 1.9762616531591826e-05, "loss": 0.7195, "step": 7880 }, { "epoch": 1.2865189175952003, "grad_norm": 1.8507131338119507, "learning_rate": 1.9762547794177188e-05, "loss": 0.7279, "step": 7881 }, { "epoch": 1.2866821762377045, "grad_norm": 1.7912498712539673, "learning_rate": 1.976247904693167e-05, "loss": 0.7648, "step": 7882 }, { "epoch": 1.286845434880209, "grad_norm": 1.6017032861709595, "learning_rate": 1.976241028985534e-05, "loss": 0.6642, "step": 7883 }, { "epoch": 1.2870086935227134, "grad_norm": 1.5813000202178955, "learning_rate": 1.976234152294826e-05, "loss": 0.7493, "step": 7884 }, { "epoch": 1.2871719521652176, "grad_norm": 1.9897912740707397, "learning_rate": 1.9762272746210506e-05, "loss": 0.6715, "step": 7885 }, { "epoch": 1.287335210807722, "grad_norm": 2.0365378856658936, "learning_rate": 1.976220395964215e-05, "loss": 0.8664, "step": 7886 }, { "epoch": 1.2874984694502265, "grad_norm": 1.9992727041244507, "learning_rate": 1.9762135163243253e-05, "loss": 0.6703, "step": 7887 }, { "epoch": 1.287661728092731, "grad_norm": 1.9783709049224854, "learning_rate": 1.9762066357013893e-05, "loss": 0.7249, "step": 7888 }, { "epoch": 1.2878249867352354, "grad_norm": 1.850899338722229, "learning_rate": 1.9761997540954132e-05, "loss": 0.7121, "step": 7889 }, { "epoch": 1.2879882453777398, "grad_norm": 2.0041005611419678, "learning_rate": 1.976192871506404e-05, "loss": 0.8162, "step": 7890 }, { "epoch": 1.288151504020244, "grad_norm": 1.9776133298873901, "learning_rate": 1.9761859879343692e-05, "loss": 0.6506, "step": 7891 }, { "epoch": 1.2883147626627485, "grad_norm": 1.7234914302825928, "learning_rate": 1.9761791033793152e-05, "loss": 0.6508, "step": 7892 }, { "epoch": 1.288478021305253, "grad_norm": 1.662414312362671, "learning_rate": 1.976172217841249e-05, "loss": 0.7318, "step": 7893 }, { "epoch": 1.2886412799477571, "grad_norm": 1.699636459350586, "learning_rate": 1.9761653313201775e-05, "loss": 0.5838, "step": 7894 }, { "epoch": 1.2888045385902616, "grad_norm": 1.5659583806991577, "learning_rate": 1.9761584438161084e-05, "loss": 0.6985, "step": 7895 }, { "epoch": 1.288967797232766, "grad_norm": 1.8006304502487183, "learning_rate": 1.9761515553290474e-05, "loss": 0.6567, "step": 7896 }, { "epoch": 1.2891310558752704, "grad_norm": 1.6377241611480713, "learning_rate": 1.9761446658590024e-05, "loss": 0.6389, "step": 7897 }, { "epoch": 1.2892943145177749, "grad_norm": 1.77823007106781, "learning_rate": 1.97613777540598e-05, "loss": 0.7368, "step": 7898 }, { "epoch": 1.289457573160279, "grad_norm": 1.7207746505737305, "learning_rate": 1.9761308839699866e-05, "loss": 0.7228, "step": 7899 }, { "epoch": 1.2896208318027835, "grad_norm": 1.7510266304016113, "learning_rate": 1.9761239915510302e-05, "loss": 0.7386, "step": 7900 }, { "epoch": 1.289784090445288, "grad_norm": 1.668355107307434, "learning_rate": 1.976117098149117e-05, "loss": 0.6338, "step": 7901 }, { "epoch": 1.2899473490877924, "grad_norm": 1.5757437944412231, "learning_rate": 1.9761102037642542e-05, "loss": 0.5099, "step": 7902 }, { "epoch": 1.2901106077302966, "grad_norm": 1.8097087144851685, "learning_rate": 1.976103308396449e-05, "loss": 0.6304, "step": 7903 }, { "epoch": 1.290273866372801, "grad_norm": 1.902798056602478, "learning_rate": 1.9760964120457075e-05, "loss": 0.773, "step": 7904 }, { "epoch": 1.2904371250153055, "grad_norm": 1.7141273021697998, "learning_rate": 1.9760895147120372e-05, "loss": 0.5703, "step": 7905 }, { "epoch": 1.29060038365781, "grad_norm": 2.1649088859558105, "learning_rate": 1.9760826163954452e-05, "loss": 0.892, "step": 7906 }, { "epoch": 1.2907636423003144, "grad_norm": 2.2144346237182617, "learning_rate": 1.9760757170959382e-05, "loss": 0.7149, "step": 7907 }, { "epoch": 1.2909269009428186, "grad_norm": 1.6204133033752441, "learning_rate": 1.9760688168135233e-05, "loss": 0.5849, "step": 7908 }, { "epoch": 1.291090159585323, "grad_norm": 1.9654207229614258, "learning_rate": 1.9760619155482073e-05, "loss": 0.6406, "step": 7909 }, { "epoch": 1.2912534182278275, "grad_norm": 1.6770851612091064, "learning_rate": 1.976055013299997e-05, "loss": 0.6645, "step": 7910 }, { "epoch": 1.291416676870332, "grad_norm": 1.4437510967254639, "learning_rate": 1.9760481100688998e-05, "loss": 0.5364, "step": 7911 }, { "epoch": 1.2915799355128361, "grad_norm": 1.6092826128005981, "learning_rate": 1.9760412058549226e-05, "loss": 0.5882, "step": 7912 }, { "epoch": 1.2917431941553406, "grad_norm": 1.610540747642517, "learning_rate": 1.9760343006580716e-05, "loss": 0.6371, "step": 7913 }, { "epoch": 1.291906452797845, "grad_norm": 1.7379995584487915, "learning_rate": 1.976027394478355e-05, "loss": 0.6655, "step": 7914 }, { "epoch": 1.2920697114403494, "grad_norm": 2.1518821716308594, "learning_rate": 1.9760204873157786e-05, "loss": 1.3302, "step": 7915 }, { "epoch": 1.2922329700828539, "grad_norm": 2.120119094848633, "learning_rate": 1.97601357917035e-05, "loss": 0.7598, "step": 7916 }, { "epoch": 1.292396228725358, "grad_norm": 2.0103819370269775, "learning_rate": 1.9760066700420758e-05, "loss": 0.7538, "step": 7917 }, { "epoch": 1.2925594873678625, "grad_norm": 1.6739648580551147, "learning_rate": 1.9759997599309636e-05, "loss": 0.6042, "step": 7918 }, { "epoch": 1.292722746010367, "grad_norm": 1.6913206577301025, "learning_rate": 1.9759928488370195e-05, "loss": 0.6321, "step": 7919 }, { "epoch": 1.2928860046528712, "grad_norm": 1.6233775615692139, "learning_rate": 1.975985936760251e-05, "loss": 0.6157, "step": 7920 }, { "epoch": 1.2930492632953756, "grad_norm": 1.6490920782089233, "learning_rate": 1.975979023700665e-05, "loss": 0.5568, "step": 7921 }, { "epoch": 1.29321252193788, "grad_norm": 1.834374189376831, "learning_rate": 1.9759721096582682e-05, "loss": 0.787, "step": 7922 }, { "epoch": 1.2933757805803845, "grad_norm": 1.9529129266738892, "learning_rate": 1.9759651946330676e-05, "loss": 0.762, "step": 7923 }, { "epoch": 1.293539039222889, "grad_norm": 2.000338554382324, "learning_rate": 1.9759582786250707e-05, "loss": 0.7009, "step": 7924 }, { "epoch": 1.2937022978653934, "grad_norm": 3.424417734146118, "learning_rate": 1.9759513616342838e-05, "loss": 0.7751, "step": 7925 }, { "epoch": 1.2938655565078976, "grad_norm": 1.6400361061096191, "learning_rate": 1.9759444436607144e-05, "loss": 0.6112, "step": 7926 }, { "epoch": 1.294028815150402, "grad_norm": 2.056544303894043, "learning_rate": 1.975937524704369e-05, "loss": 0.6866, "step": 7927 }, { "epoch": 1.2941920737929065, "grad_norm": 1.823322057723999, "learning_rate": 1.9759306047652544e-05, "loss": 0.5563, "step": 7928 }, { "epoch": 1.2943553324354107, "grad_norm": 1.6962625980377197, "learning_rate": 1.9759236838433785e-05, "loss": 0.6942, "step": 7929 }, { "epoch": 1.2945185910779151, "grad_norm": 1.8685322999954224, "learning_rate": 1.9759167619387474e-05, "loss": 0.9046, "step": 7930 }, { "epoch": 1.2946818497204196, "grad_norm": 1.8325632810592651, "learning_rate": 1.9759098390513688e-05, "loss": 0.7726, "step": 7931 }, { "epoch": 1.294845108362924, "grad_norm": 1.7968077659606934, "learning_rate": 1.9759029151812486e-05, "loss": 0.6975, "step": 7932 }, { "epoch": 1.2950083670054284, "grad_norm": 2.505927562713623, "learning_rate": 1.975895990328395e-05, "loss": 0.7527, "step": 7933 }, { "epoch": 1.2951716256479329, "grad_norm": 1.9175455570220947, "learning_rate": 1.9758890644928142e-05, "loss": 0.7382, "step": 7934 }, { "epoch": 1.295334884290437, "grad_norm": 1.942936897277832, "learning_rate": 1.9758821376745136e-05, "loss": 0.7598, "step": 7935 }, { "epoch": 1.2954981429329415, "grad_norm": 2.1784725189208984, "learning_rate": 1.9758752098734995e-05, "loss": 0.9112, "step": 7936 }, { "epoch": 1.295661401575446, "grad_norm": 1.7469836473464966, "learning_rate": 1.9758682810897795e-05, "loss": 0.6725, "step": 7937 }, { "epoch": 1.2958246602179502, "grad_norm": 1.668153166770935, "learning_rate": 1.9758613513233603e-05, "loss": 0.5533, "step": 7938 }, { "epoch": 1.2959879188604546, "grad_norm": 1.8061392307281494, "learning_rate": 1.9758544205742495e-05, "loss": 0.6103, "step": 7939 }, { "epoch": 1.296151177502959, "grad_norm": 1.781225323677063, "learning_rate": 1.975847488842453e-05, "loss": 0.5765, "step": 7940 }, { "epoch": 1.2963144361454635, "grad_norm": 1.6806485652923584, "learning_rate": 1.9758405561279787e-05, "loss": 0.6042, "step": 7941 }, { "epoch": 1.296477694787968, "grad_norm": 1.7812618017196655, "learning_rate": 1.9758336224308326e-05, "loss": 0.6573, "step": 7942 }, { "epoch": 1.2966409534304721, "grad_norm": 2.053067207336426, "learning_rate": 1.975826687751023e-05, "loss": 0.8159, "step": 7943 }, { "epoch": 1.2968042120729766, "grad_norm": 2.0060856342315674, "learning_rate": 1.975819752088556e-05, "loss": 0.8457, "step": 7944 }, { "epoch": 1.296967470715481, "grad_norm": 1.792073369026184, "learning_rate": 1.9758128154434387e-05, "loss": 0.6568, "step": 7945 }, { "epoch": 1.2971307293579855, "grad_norm": 2.4094059467315674, "learning_rate": 1.975805877815678e-05, "loss": 0.7995, "step": 7946 }, { "epoch": 1.2972939880004897, "grad_norm": 1.7473502159118652, "learning_rate": 1.975798939205281e-05, "loss": 0.5127, "step": 7947 }, { "epoch": 1.297457246642994, "grad_norm": 1.8392223119735718, "learning_rate": 1.9757919996122548e-05, "loss": 0.6454, "step": 7948 }, { "epoch": 1.2976205052854985, "grad_norm": 1.753745436668396, "learning_rate": 1.9757850590366066e-05, "loss": 0.6928, "step": 7949 }, { "epoch": 1.297783763928003, "grad_norm": 1.9866712093353271, "learning_rate": 1.975778117478343e-05, "loss": 0.73, "step": 7950 }, { "epoch": 1.2979470225705074, "grad_norm": 1.5264848470687866, "learning_rate": 1.975771174937471e-05, "loss": 0.5966, "step": 7951 }, { "epoch": 1.2981102812130116, "grad_norm": 1.7728151082992554, "learning_rate": 1.9757642314139977e-05, "loss": 0.6844, "step": 7952 }, { "epoch": 1.298273539855516, "grad_norm": 1.826828122138977, "learning_rate": 1.9757572869079303e-05, "loss": 0.7398, "step": 7953 }, { "epoch": 1.2984367984980205, "grad_norm": 1.8208987712860107, "learning_rate": 1.9757503414192752e-05, "loss": 0.6533, "step": 7954 }, { "epoch": 1.298600057140525, "grad_norm": 1.841678261756897, "learning_rate": 1.9757433949480398e-05, "loss": 0.56, "step": 7955 }, { "epoch": 1.2987633157830292, "grad_norm": 2.5227131843566895, "learning_rate": 1.975736447494231e-05, "loss": 0.8545, "step": 7956 }, { "epoch": 1.2989265744255336, "grad_norm": 1.783561110496521, "learning_rate": 1.9757294990578565e-05, "loss": 0.7527, "step": 7957 }, { "epoch": 1.299089833068038, "grad_norm": 1.6390873193740845, "learning_rate": 1.9757225496389218e-05, "loss": 0.6454, "step": 7958 }, { "epoch": 1.2992530917105425, "grad_norm": 1.9630640745162964, "learning_rate": 1.9757155992374353e-05, "loss": 0.6226, "step": 7959 }, { "epoch": 1.299416350353047, "grad_norm": 1.6627589464187622, "learning_rate": 1.975708647853403e-05, "loss": 0.6783, "step": 7960 }, { "epoch": 1.2995796089955511, "grad_norm": 1.7812631130218506, "learning_rate": 1.975701695486833e-05, "loss": 0.5985, "step": 7961 }, { "epoch": 1.2997428676380556, "grad_norm": 1.687026858329773, "learning_rate": 1.975694742137731e-05, "loss": 0.7377, "step": 7962 }, { "epoch": 1.29990612628056, "grad_norm": 1.7239652872085571, "learning_rate": 1.9756877878061053e-05, "loss": 0.7842, "step": 7963 }, { "epoch": 1.3000693849230642, "grad_norm": 2.248943567276001, "learning_rate": 1.9756808324919618e-05, "loss": 0.8301, "step": 7964 }, { "epoch": 1.3002326435655687, "grad_norm": 1.955113172531128, "learning_rate": 1.975673876195308e-05, "loss": 0.7522, "step": 7965 }, { "epoch": 1.300395902208073, "grad_norm": 1.861124038696289, "learning_rate": 1.9756669189161507e-05, "loss": 0.7477, "step": 7966 }, { "epoch": 1.3005591608505775, "grad_norm": 1.788654088973999, "learning_rate": 1.9756599606544976e-05, "loss": 0.6239, "step": 7967 }, { "epoch": 1.300722419493082, "grad_norm": 1.594048023223877, "learning_rate": 1.9756530014103548e-05, "loss": 0.6346, "step": 7968 }, { "epoch": 1.3008856781355864, "grad_norm": 1.4570657014846802, "learning_rate": 1.9756460411837296e-05, "loss": 0.7204, "step": 7969 }, { "epoch": 1.3010489367780906, "grad_norm": 1.7533776760101318, "learning_rate": 1.9756390799746295e-05, "loss": 0.6831, "step": 7970 }, { "epoch": 1.301212195420595, "grad_norm": 1.9141697883605957, "learning_rate": 1.975632117783061e-05, "loss": 0.8091, "step": 7971 }, { "epoch": 1.3013754540630995, "grad_norm": 1.6265720129013062, "learning_rate": 1.975625154609031e-05, "loss": 0.6001, "step": 7972 }, { "epoch": 1.3015387127056037, "grad_norm": 1.7112325429916382, "learning_rate": 1.9756181904525468e-05, "loss": 0.6261, "step": 7973 }, { "epoch": 1.3017019713481082, "grad_norm": 1.7691562175750732, "learning_rate": 1.9756112253136154e-05, "loss": 0.6276, "step": 7974 }, { "epoch": 1.3018652299906126, "grad_norm": 1.780705213546753, "learning_rate": 1.9756042591922436e-05, "loss": 0.6415, "step": 7975 }, { "epoch": 1.302028488633117, "grad_norm": 1.6425621509552002, "learning_rate": 1.9755972920884387e-05, "loss": 0.6327, "step": 7976 }, { "epoch": 1.3021917472756215, "grad_norm": 1.8404335975646973, "learning_rate": 1.9755903240022073e-05, "loss": 0.6882, "step": 7977 }, { "epoch": 1.302355005918126, "grad_norm": 1.753991723060608, "learning_rate": 1.975583354933557e-05, "loss": 0.6555, "step": 7978 }, { "epoch": 1.3025182645606301, "grad_norm": 1.7840451002120972, "learning_rate": 1.9755763848824944e-05, "loss": 0.6578, "step": 7979 }, { "epoch": 1.3026815232031346, "grad_norm": 2.0250473022460938, "learning_rate": 1.9755694138490268e-05, "loss": 0.821, "step": 7980 }, { "epoch": 1.302844781845639, "grad_norm": 1.6208056211471558, "learning_rate": 1.975562441833161e-05, "loss": 0.6431, "step": 7981 }, { "epoch": 1.3030080404881432, "grad_norm": 1.5214015245437622, "learning_rate": 1.975555468834904e-05, "loss": 0.5933, "step": 7982 }, { "epoch": 1.3031712991306477, "grad_norm": 1.5170669555664062, "learning_rate": 1.975548494854263e-05, "loss": 0.6946, "step": 7983 }, { "epoch": 1.303334557773152, "grad_norm": 2.0292656421661377, "learning_rate": 1.975541519891245e-05, "loss": 0.8115, "step": 7984 }, { "epoch": 1.3034978164156565, "grad_norm": 2.1278674602508545, "learning_rate": 1.9755345439458566e-05, "loss": 0.7302, "step": 7985 }, { "epoch": 1.303661075058161, "grad_norm": 2.096353769302368, "learning_rate": 1.9755275670181055e-05, "loss": 0.6779, "step": 7986 }, { "epoch": 1.3038243337006654, "grad_norm": 1.8634445667266846, "learning_rate": 1.9755205891079986e-05, "loss": 0.7434, "step": 7987 }, { "epoch": 1.3039875923431696, "grad_norm": 2.119852066040039, "learning_rate": 1.975513610215542e-05, "loss": 0.7847, "step": 7988 }, { "epoch": 1.304150850985674, "grad_norm": 1.674684762954712, "learning_rate": 1.975506630340744e-05, "loss": 0.6287, "step": 7989 }, { "epoch": 1.3043141096281785, "grad_norm": 1.7489315271377563, "learning_rate": 1.975499649483611e-05, "loss": 0.7182, "step": 7990 }, { "epoch": 1.3044773682706827, "grad_norm": 1.8974401950836182, "learning_rate": 1.97549266764415e-05, "loss": 0.7091, "step": 7991 }, { "epoch": 1.3046406269131872, "grad_norm": 1.9123034477233887, "learning_rate": 1.975485684822368e-05, "loss": 0.7825, "step": 7992 }, { "epoch": 1.3048038855556916, "grad_norm": 1.835289478302002, "learning_rate": 1.975478701018273e-05, "loss": 0.67, "step": 7993 }, { "epoch": 1.304967144198196, "grad_norm": 1.5446045398712158, "learning_rate": 1.97547171623187e-05, "loss": 0.6607, "step": 7994 }, { "epoch": 1.3051304028407005, "grad_norm": 1.5740768909454346, "learning_rate": 1.975464730463168e-05, "loss": 0.6209, "step": 7995 }, { "epoch": 1.3052936614832047, "grad_norm": 1.8842756748199463, "learning_rate": 1.9754577437121733e-05, "loss": 0.6936, "step": 7996 }, { "epoch": 1.3054569201257091, "grad_norm": 1.884804129600525, "learning_rate": 1.9754507559788928e-05, "loss": 0.6859, "step": 7997 }, { "epoch": 1.3056201787682136, "grad_norm": 1.6712723970413208, "learning_rate": 1.9754437672633332e-05, "loss": 0.7219, "step": 7998 }, { "epoch": 1.305783437410718, "grad_norm": 1.7728461027145386, "learning_rate": 1.9754367775655024e-05, "loss": 0.6836, "step": 7999 }, { "epoch": 1.3059466960532222, "grad_norm": 1.7760695219039917, "learning_rate": 1.9754297868854075e-05, "loss": 0.6534, "step": 8000 }, { "epoch": 1.3061099546957267, "grad_norm": 1.550175666809082, "learning_rate": 1.975422795223054e-05, "loss": 0.5462, "step": 8001 }, { "epoch": 1.306273213338231, "grad_norm": 1.7566498517990112, "learning_rate": 1.975415802578451e-05, "loss": 0.7351, "step": 8002 }, { "epoch": 1.3064364719807355, "grad_norm": 1.7652151584625244, "learning_rate": 1.975408808951604e-05, "loss": 0.5762, "step": 8003 }, { "epoch": 1.30659973062324, "grad_norm": 1.7197532653808594, "learning_rate": 1.9754018143425206e-05, "loss": 0.6578, "step": 8004 }, { "epoch": 1.3067629892657442, "grad_norm": 1.623373031616211, "learning_rate": 1.975394818751208e-05, "loss": 0.7892, "step": 8005 }, { "epoch": 1.3069262479082486, "grad_norm": 1.8877060413360596, "learning_rate": 1.9753878221776726e-05, "loss": 0.7061, "step": 8006 }, { "epoch": 1.307089506550753, "grad_norm": 1.9480184316635132, "learning_rate": 1.9753808246219226e-05, "loss": 0.6747, "step": 8007 }, { "epoch": 1.3072527651932573, "grad_norm": 1.763015866279602, "learning_rate": 1.975373826083964e-05, "loss": 0.5604, "step": 8008 }, { "epoch": 1.3074160238357617, "grad_norm": 1.736615777015686, "learning_rate": 1.9753668265638043e-05, "loss": 0.7349, "step": 8009 }, { "epoch": 1.3075792824782662, "grad_norm": 1.5803017616271973, "learning_rate": 1.9753598260614506e-05, "loss": 0.5913, "step": 8010 }, { "epoch": 1.3077425411207706, "grad_norm": 1.8399187326431274, "learning_rate": 1.9753528245769096e-05, "loss": 0.6181, "step": 8011 }, { "epoch": 1.307905799763275, "grad_norm": 1.6089118719100952, "learning_rate": 1.9753458221101886e-05, "loss": 0.6655, "step": 8012 }, { "epoch": 1.3080690584057795, "grad_norm": 2.0973799228668213, "learning_rate": 1.9753388186612946e-05, "loss": 0.6441, "step": 8013 }, { "epoch": 1.3082323170482837, "grad_norm": 1.724628210067749, "learning_rate": 1.9753318142302347e-05, "loss": 0.7289, "step": 8014 }, { "epoch": 1.3083955756907881, "grad_norm": 1.7998989820480347, "learning_rate": 1.975324808817016e-05, "loss": 0.7638, "step": 8015 }, { "epoch": 1.3085588343332926, "grad_norm": 1.7368721961975098, "learning_rate": 1.9753178024216454e-05, "loss": 0.6393, "step": 8016 }, { "epoch": 1.3087220929757968, "grad_norm": 1.6947457790374756, "learning_rate": 1.9753107950441303e-05, "loss": 0.5748, "step": 8017 }, { "epoch": 1.3088853516183012, "grad_norm": 1.7658355236053467, "learning_rate": 1.975303786684477e-05, "loss": 0.6786, "step": 8018 }, { "epoch": 1.3090486102608057, "grad_norm": 1.8872051239013672, "learning_rate": 1.975296777342693e-05, "loss": 0.7267, "step": 8019 }, { "epoch": 1.30921186890331, "grad_norm": 1.6568111181259155, "learning_rate": 1.975289767018786e-05, "loss": 0.6146, "step": 8020 }, { "epoch": 1.3093751275458145, "grad_norm": 1.6706973314285278, "learning_rate": 1.9752827557127622e-05, "loss": 0.6413, "step": 8021 }, { "epoch": 1.309538386188319, "grad_norm": 1.4978121519088745, "learning_rate": 1.975275743424629e-05, "loss": 0.5876, "step": 8022 }, { "epoch": 1.3097016448308232, "grad_norm": 1.4152112007141113, "learning_rate": 1.9752687301543932e-05, "loss": 0.5421, "step": 8023 }, { "epoch": 1.3098649034733276, "grad_norm": 1.8553547859191895, "learning_rate": 1.9752617159020618e-05, "loss": 0.8561, "step": 8024 }, { "epoch": 1.310028162115832, "grad_norm": 1.5379799604415894, "learning_rate": 1.975254700667643e-05, "loss": 0.6795, "step": 8025 }, { "epoch": 1.3101914207583363, "grad_norm": 1.6830320358276367, "learning_rate": 1.975247684451142e-05, "loss": 0.8293, "step": 8026 }, { "epoch": 1.3103546794008407, "grad_norm": 1.6142410039901733, "learning_rate": 1.9752406672525675e-05, "loss": 0.5814, "step": 8027 }, { "epoch": 1.3105179380433452, "grad_norm": 1.6834763288497925, "learning_rate": 1.9752336490719254e-05, "loss": 0.5859, "step": 8028 }, { "epoch": 1.3106811966858496, "grad_norm": 1.9352947473526, "learning_rate": 1.9752266299092234e-05, "loss": 0.7103, "step": 8029 }, { "epoch": 1.310844455328354, "grad_norm": 2.2926385402679443, "learning_rate": 1.9752196097644687e-05, "loss": 0.8156, "step": 8030 }, { "epoch": 1.3110077139708585, "grad_norm": 2.0337791442871094, "learning_rate": 1.975212588637668e-05, "loss": 0.6592, "step": 8031 }, { "epoch": 1.3111709726133627, "grad_norm": 2.0646533966064453, "learning_rate": 1.9752055665288283e-05, "loss": 0.743, "step": 8032 }, { "epoch": 1.3113342312558671, "grad_norm": 1.8210393190383911, "learning_rate": 1.9751985434379572e-05, "loss": 0.7813, "step": 8033 }, { "epoch": 1.3114974898983716, "grad_norm": 1.658936619758606, "learning_rate": 1.975191519365061e-05, "loss": 0.5223, "step": 8034 }, { "epoch": 1.3116607485408758, "grad_norm": 1.6187835931777954, "learning_rate": 1.9751844943101476e-05, "loss": 0.6131, "step": 8035 }, { "epoch": 1.3118240071833802, "grad_norm": 2.1905157566070557, "learning_rate": 1.9751774682732234e-05, "loss": 0.7696, "step": 8036 }, { "epoch": 1.3119872658258847, "grad_norm": 1.9017592668533325, "learning_rate": 1.975170441254296e-05, "loss": 0.7994, "step": 8037 }, { "epoch": 1.312150524468389, "grad_norm": 1.7407249212265015, "learning_rate": 1.975163413253372e-05, "loss": 0.7763, "step": 8038 }, { "epoch": 1.3123137831108935, "grad_norm": 1.7648754119873047, "learning_rate": 1.975156384270459e-05, "loss": 0.7295, "step": 8039 }, { "epoch": 1.3124770417533977, "grad_norm": 1.6347990036010742, "learning_rate": 1.9751493543055634e-05, "loss": 0.6003, "step": 8040 }, { "epoch": 1.3126403003959022, "grad_norm": 1.660689115524292, "learning_rate": 1.9751423233586928e-05, "loss": 0.705, "step": 8041 }, { "epoch": 1.3128035590384066, "grad_norm": 1.5299161672592163, "learning_rate": 1.975135291429854e-05, "loss": 0.5818, "step": 8042 }, { "epoch": 1.312966817680911, "grad_norm": 1.6677756309509277, "learning_rate": 1.9751282585190546e-05, "loss": 0.765, "step": 8043 }, { "epoch": 1.3131300763234153, "grad_norm": 1.5151625871658325, "learning_rate": 1.975121224626301e-05, "loss": 0.7289, "step": 8044 }, { "epoch": 1.3132933349659197, "grad_norm": 1.716366171836853, "learning_rate": 1.9751141897516006e-05, "loss": 0.7082, "step": 8045 }, { "epoch": 1.3134565936084241, "grad_norm": 1.7935971021652222, "learning_rate": 1.9751071538949607e-05, "loss": 0.748, "step": 8046 }, { "epoch": 1.3136198522509286, "grad_norm": 1.9383065700531006, "learning_rate": 1.975100117056388e-05, "loss": 0.61, "step": 8047 }, { "epoch": 1.313783110893433, "grad_norm": 2.078526496887207, "learning_rate": 1.9750930792358898e-05, "loss": 0.8679, "step": 8048 }, { "epoch": 1.3139463695359372, "grad_norm": 1.8518353700637817, "learning_rate": 1.975086040433473e-05, "loss": 0.6961, "step": 8049 }, { "epoch": 1.3141096281784417, "grad_norm": 1.9666190147399902, "learning_rate": 1.9750790006491447e-05, "loss": 0.6792, "step": 8050 }, { "epoch": 1.3142728868209461, "grad_norm": 1.8078874349594116, "learning_rate": 1.975071959882912e-05, "loss": 0.6854, "step": 8051 }, { "epoch": 1.3144361454634503, "grad_norm": 1.619911551475525, "learning_rate": 1.9750649181347827e-05, "loss": 0.6506, "step": 8052 }, { "epoch": 1.3145994041059548, "grad_norm": 1.752761721611023, "learning_rate": 1.975057875404763e-05, "loss": 0.6942, "step": 8053 }, { "epoch": 1.3147626627484592, "grad_norm": 1.930680751800537, "learning_rate": 1.97505083169286e-05, "loss": 0.7038, "step": 8054 }, { "epoch": 1.3149259213909636, "grad_norm": 1.8137420415878296, "learning_rate": 1.9750437869990814e-05, "loss": 0.7447, "step": 8055 }, { "epoch": 1.315089180033468, "grad_norm": 1.6947797536849976, "learning_rate": 1.975036741323434e-05, "loss": 0.6758, "step": 8056 }, { "epoch": 1.3152524386759725, "grad_norm": 1.9128025770187378, "learning_rate": 1.9750296946659247e-05, "loss": 0.7756, "step": 8057 }, { "epoch": 1.3154156973184767, "grad_norm": 1.7921639680862427, "learning_rate": 1.9750226470265608e-05, "loss": 0.6835, "step": 8058 }, { "epoch": 1.3155789559609812, "grad_norm": 1.5529574155807495, "learning_rate": 1.9750155984053492e-05, "loss": 0.6173, "step": 8059 }, { "epoch": 1.3157422146034856, "grad_norm": 1.6517568826675415, "learning_rate": 1.9750085488022974e-05, "loss": 0.6938, "step": 8060 }, { "epoch": 1.3159054732459898, "grad_norm": 1.534742832183838, "learning_rate": 1.975001498217412e-05, "loss": 0.5277, "step": 8061 }, { "epoch": 1.3160687318884943, "grad_norm": 2.2611887454986572, "learning_rate": 1.9749944466507007e-05, "loss": 0.7019, "step": 8062 }, { "epoch": 1.3162319905309987, "grad_norm": 1.742044448852539, "learning_rate": 1.97498739410217e-05, "loss": 0.6104, "step": 8063 }, { "epoch": 1.3163952491735031, "grad_norm": 1.5628321170806885, "learning_rate": 1.9749803405718272e-05, "loss": 0.6045, "step": 8064 }, { "epoch": 1.3165585078160076, "grad_norm": 1.9653853178024292, "learning_rate": 1.9749732860596797e-05, "loss": 0.7479, "step": 8065 }, { "epoch": 1.316721766458512, "grad_norm": 1.7425636053085327, "learning_rate": 1.9749662305657342e-05, "loss": 0.715, "step": 8066 }, { "epoch": 1.3168850251010162, "grad_norm": 1.893271565437317, "learning_rate": 1.9749591740899977e-05, "loss": 0.6489, "step": 8067 }, { "epoch": 1.3170482837435207, "grad_norm": 1.837673544883728, "learning_rate": 1.974952116632478e-05, "loss": 0.7078, "step": 8068 }, { "epoch": 1.3172115423860251, "grad_norm": 1.7138010263442993, "learning_rate": 1.9749450581931816e-05, "loss": 0.6381, "step": 8069 }, { "epoch": 1.3173748010285293, "grad_norm": 1.743113398551941, "learning_rate": 1.974937998772116e-05, "loss": 0.7032, "step": 8070 }, { "epoch": 1.3175380596710338, "grad_norm": 1.8289601802825928, "learning_rate": 1.974930938369288e-05, "loss": 0.7865, "step": 8071 }, { "epoch": 1.3177013183135382, "grad_norm": 1.6350865364074707, "learning_rate": 1.9749238769847044e-05, "loss": 0.535, "step": 8072 }, { "epoch": 1.3178645769560426, "grad_norm": 1.6765069961547852, "learning_rate": 1.9749168146183734e-05, "loss": 0.623, "step": 8073 }, { "epoch": 1.318027835598547, "grad_norm": 1.5076298713684082, "learning_rate": 1.9749097512703008e-05, "loss": 0.6332, "step": 8074 }, { "epoch": 1.3181910942410515, "grad_norm": 1.5401374101638794, "learning_rate": 1.9749026869404945e-05, "loss": 0.5836, "step": 8075 }, { "epoch": 1.3183543528835557, "grad_norm": 1.817402958869934, "learning_rate": 1.9748956216289616e-05, "loss": 0.6298, "step": 8076 }, { "epoch": 1.3185176115260602, "grad_norm": 1.966994047164917, "learning_rate": 1.9748885553357093e-05, "loss": 0.822, "step": 8077 }, { "epoch": 1.3186808701685646, "grad_norm": 1.9398101568222046, "learning_rate": 1.9748814880607437e-05, "loss": 0.7489, "step": 8078 }, { "epoch": 1.3188441288110688, "grad_norm": 1.5898211002349854, "learning_rate": 1.974874419804073e-05, "loss": 0.6641, "step": 8079 }, { "epoch": 1.3190073874535733, "grad_norm": 2.002545118331909, "learning_rate": 1.9748673505657045e-05, "loss": 0.6466, "step": 8080 }, { "epoch": 1.3191706460960777, "grad_norm": 1.5677013397216797, "learning_rate": 1.9748602803456448e-05, "loss": 0.6405, "step": 8081 }, { "epoch": 1.3193339047385821, "grad_norm": 1.4841282367706299, "learning_rate": 1.9748532091439006e-05, "loss": 0.5822, "step": 8082 }, { "epoch": 1.3194971633810866, "grad_norm": 1.5994740724563599, "learning_rate": 1.9748461369604797e-05, "loss": 0.6568, "step": 8083 }, { "epoch": 1.3196604220235908, "grad_norm": 2.0916876792907715, "learning_rate": 1.974839063795389e-05, "loss": 0.8002, "step": 8084 }, { "epoch": 1.3198236806660952, "grad_norm": 1.5649064779281616, "learning_rate": 1.9748319896486357e-05, "loss": 0.6308, "step": 8085 }, { "epoch": 1.3199869393085997, "grad_norm": 1.684648036956787, "learning_rate": 1.9748249145202266e-05, "loss": 0.5968, "step": 8086 }, { "epoch": 1.320150197951104, "grad_norm": 1.6424453258514404, "learning_rate": 1.9748178384101694e-05, "loss": 0.6824, "step": 8087 }, { "epoch": 1.3203134565936083, "grad_norm": 2.351764678955078, "learning_rate": 1.9748107613184705e-05, "loss": 0.7244, "step": 8088 }, { "epoch": 1.3204767152361128, "grad_norm": 1.6778290271759033, "learning_rate": 1.9748036832451377e-05, "loss": 0.6595, "step": 8089 }, { "epoch": 1.3206399738786172, "grad_norm": 1.7173930406570435, "learning_rate": 1.9747966041901776e-05, "loss": 0.7211, "step": 8090 }, { "epoch": 1.3208032325211216, "grad_norm": 1.709312915802002, "learning_rate": 1.974789524153598e-05, "loss": 0.6519, "step": 8091 }, { "epoch": 1.320966491163626, "grad_norm": 1.7854859828948975, "learning_rate": 1.9747824431354052e-05, "loss": 0.7465, "step": 8092 }, { "epoch": 1.3211297498061303, "grad_norm": 2.0232982635498047, "learning_rate": 1.974775361135607e-05, "loss": 0.7568, "step": 8093 }, { "epoch": 1.3212930084486347, "grad_norm": 1.856859564781189, "learning_rate": 1.9747682781542103e-05, "loss": 0.8868, "step": 8094 }, { "epoch": 1.3214562670911392, "grad_norm": 1.8469316959381104, "learning_rate": 1.974761194191222e-05, "loss": 0.743, "step": 8095 }, { "epoch": 1.3216195257336436, "grad_norm": 1.8788880109786987, "learning_rate": 1.9747541092466496e-05, "loss": 0.7537, "step": 8096 }, { "epoch": 1.3217827843761478, "grad_norm": 1.8057591915130615, "learning_rate": 1.9747470233204997e-05, "loss": 0.7791, "step": 8097 }, { "epoch": 1.3219460430186523, "grad_norm": 1.8077384233474731, "learning_rate": 1.9747399364127803e-05, "loss": 0.6298, "step": 8098 }, { "epoch": 1.3221093016611567, "grad_norm": 2.701310873031616, "learning_rate": 1.9747328485234977e-05, "loss": 0.7212, "step": 8099 }, { "epoch": 1.3222725603036611, "grad_norm": 2.286334753036499, "learning_rate": 1.9747257596526594e-05, "loss": 0.7267, "step": 8100 }, { "epoch": 1.3224358189461656, "grad_norm": 1.9304906129837036, "learning_rate": 1.974718669800273e-05, "loss": 0.713, "step": 8101 }, { "epoch": 1.3225990775886698, "grad_norm": 1.848392367362976, "learning_rate": 1.9747115789663443e-05, "loss": 0.7195, "step": 8102 }, { "epoch": 1.3227623362311742, "grad_norm": 1.5515904426574707, "learning_rate": 1.974704487150882e-05, "loss": 0.6979, "step": 8103 }, { "epoch": 1.3229255948736787, "grad_norm": 1.6959209442138672, "learning_rate": 1.9746973943538923e-05, "loss": 0.6765, "step": 8104 }, { "epoch": 1.3230888535161829, "grad_norm": 1.8557239770889282, "learning_rate": 1.9746903005753828e-05, "loss": 0.6677, "step": 8105 }, { "epoch": 1.3232521121586873, "grad_norm": 1.8728312253952026, "learning_rate": 1.9746832058153602e-05, "loss": 0.6587, "step": 8106 }, { "epoch": 1.3234153708011918, "grad_norm": 1.5390833616256714, "learning_rate": 1.9746761100738318e-05, "loss": 0.5208, "step": 8107 }, { "epoch": 1.3235786294436962, "grad_norm": 2.1384029388427734, "learning_rate": 1.974669013350805e-05, "loss": 0.7188, "step": 8108 }, { "epoch": 1.3237418880862006, "grad_norm": 2.177511692047119, "learning_rate": 1.9746619156462866e-05, "loss": 0.7011, "step": 8109 }, { "epoch": 1.323905146728705, "grad_norm": 1.6290374994277954, "learning_rate": 1.9746548169602843e-05, "loss": 0.5815, "step": 8110 }, { "epoch": 1.3240684053712093, "grad_norm": 2.136305093765259, "learning_rate": 1.9746477172928043e-05, "loss": 1.0092, "step": 8111 }, { "epoch": 1.3242316640137137, "grad_norm": 1.8016566038131714, "learning_rate": 1.9746406166438546e-05, "loss": 0.7505, "step": 8112 }, { "epoch": 1.3243949226562182, "grad_norm": 1.9030386209487915, "learning_rate": 1.9746335150134418e-05, "loss": 0.7526, "step": 8113 }, { "epoch": 1.3245581812987224, "grad_norm": 1.9278345108032227, "learning_rate": 1.9746264124015736e-05, "loss": 0.6744, "step": 8114 }, { "epoch": 1.3247214399412268, "grad_norm": 1.8157521486282349, "learning_rate": 1.9746193088082568e-05, "loss": 0.7071, "step": 8115 }, { "epoch": 1.3248846985837313, "grad_norm": 1.6955139636993408, "learning_rate": 1.974612204233499e-05, "loss": 0.641, "step": 8116 }, { "epoch": 1.3250479572262357, "grad_norm": 1.770561933517456, "learning_rate": 1.9746050986773062e-05, "loss": 0.5339, "step": 8117 }, { "epoch": 1.3252112158687401, "grad_norm": 1.8109534978866577, "learning_rate": 1.9745979921396866e-05, "loss": 0.7265, "step": 8118 }, { "epoch": 1.3253744745112446, "grad_norm": 2.2096080780029297, "learning_rate": 1.9745908846206472e-05, "loss": 0.7263, "step": 8119 }, { "epoch": 1.3255377331537488, "grad_norm": 1.6850926876068115, "learning_rate": 1.9745837761201948e-05, "loss": 0.6671, "step": 8120 }, { "epoch": 1.3257009917962532, "grad_norm": 1.6506586074829102, "learning_rate": 1.974576666638337e-05, "loss": 0.747, "step": 8121 }, { "epoch": 1.3258642504387577, "grad_norm": 1.7427412271499634, "learning_rate": 1.974569556175081e-05, "loss": 0.7494, "step": 8122 }, { "epoch": 1.3260275090812619, "grad_norm": 2.103715181350708, "learning_rate": 1.9745624447304335e-05, "loss": 0.7641, "step": 8123 }, { "epoch": 1.3261907677237663, "grad_norm": 1.9754912853240967, "learning_rate": 1.9745553323044018e-05, "loss": 0.7744, "step": 8124 }, { "epoch": 1.3263540263662708, "grad_norm": 1.8413755893707275, "learning_rate": 1.974548218896993e-05, "loss": 0.825, "step": 8125 }, { "epoch": 1.3265172850087752, "grad_norm": 1.590610146522522, "learning_rate": 1.974541104508215e-05, "loss": 0.6029, "step": 8126 }, { "epoch": 1.3266805436512796, "grad_norm": 1.9644966125488281, "learning_rate": 1.974533989138074e-05, "loss": 0.7711, "step": 8127 }, { "epoch": 1.3268438022937838, "grad_norm": 1.9963268041610718, "learning_rate": 1.9745268727865774e-05, "loss": 0.7562, "step": 8128 }, { "epoch": 1.3270070609362883, "grad_norm": 1.7514228820800781, "learning_rate": 1.9745197554537326e-05, "loss": 0.712, "step": 8129 }, { "epoch": 1.3271703195787927, "grad_norm": 1.4804366827011108, "learning_rate": 1.9745126371395463e-05, "loss": 0.5842, "step": 8130 }, { "epoch": 1.3273335782212972, "grad_norm": 1.6377873420715332, "learning_rate": 1.9745055178440266e-05, "loss": 0.7235, "step": 8131 }, { "epoch": 1.3274968368638014, "grad_norm": 1.6488251686096191, "learning_rate": 1.97449839756718e-05, "loss": 0.7401, "step": 8132 }, { "epoch": 1.3276600955063058, "grad_norm": 1.6622512340545654, "learning_rate": 1.9744912763090136e-05, "loss": 0.6575, "step": 8133 }, { "epoch": 1.3278233541488103, "grad_norm": 1.5592079162597656, "learning_rate": 1.9744841540695347e-05, "loss": 0.6029, "step": 8134 }, { "epoch": 1.3279866127913147, "grad_norm": 1.8984745740890503, "learning_rate": 1.9744770308487508e-05, "loss": 0.7554, "step": 8135 }, { "epoch": 1.3281498714338191, "grad_norm": 1.8811886310577393, "learning_rate": 1.9744699066466687e-05, "loss": 0.6936, "step": 8136 }, { "epoch": 1.3283131300763233, "grad_norm": 1.97040855884552, "learning_rate": 1.9744627814632956e-05, "loss": 0.6338, "step": 8137 }, { "epoch": 1.3284763887188278, "grad_norm": 1.8400938510894775, "learning_rate": 1.9744556552986385e-05, "loss": 0.6148, "step": 8138 }, { "epoch": 1.3286396473613322, "grad_norm": 1.9490940570831299, "learning_rate": 1.974448528152705e-05, "loss": 0.8123, "step": 8139 }, { "epoch": 1.3288029060038367, "grad_norm": 1.7277472019195557, "learning_rate": 1.974441400025502e-05, "loss": 0.7226, "step": 8140 }, { "epoch": 1.3289661646463409, "grad_norm": 1.7936655282974243, "learning_rate": 1.974434270917037e-05, "loss": 0.7749, "step": 8141 }, { "epoch": 1.3291294232888453, "grad_norm": 1.713377833366394, "learning_rate": 1.9744271408273168e-05, "loss": 0.7318, "step": 8142 }, { "epoch": 1.3292926819313498, "grad_norm": 2.0047049522399902, "learning_rate": 1.9744200097563487e-05, "loss": 0.8208, "step": 8143 }, { "epoch": 1.3294559405738542, "grad_norm": 1.46015465259552, "learning_rate": 1.97441287770414e-05, "loss": 0.6884, "step": 8144 }, { "epoch": 1.3296191992163586, "grad_norm": 1.7572280168533325, "learning_rate": 1.9744057446706977e-05, "loss": 0.6906, "step": 8145 }, { "epoch": 1.3297824578588628, "grad_norm": 1.5216182470321655, "learning_rate": 1.9743986106560293e-05, "loss": 0.5991, "step": 8146 }, { "epoch": 1.3299457165013673, "grad_norm": 2.378322124481201, "learning_rate": 1.9743914756601413e-05, "loss": 0.7505, "step": 8147 }, { "epoch": 1.3301089751438717, "grad_norm": 1.7232651710510254, "learning_rate": 1.974384339683042e-05, "loss": 0.6279, "step": 8148 }, { "epoch": 1.330272233786376, "grad_norm": 1.5912830829620361, "learning_rate": 1.9743772027247375e-05, "loss": 0.6606, "step": 8149 }, { "epoch": 1.3304354924288804, "grad_norm": 1.8480104207992554, "learning_rate": 1.9743700647852356e-05, "loss": 0.8552, "step": 8150 }, { "epoch": 1.3305987510713848, "grad_norm": 1.7613649368286133, "learning_rate": 1.9743629258645428e-05, "loss": 0.6728, "step": 8151 }, { "epoch": 1.3307620097138892, "grad_norm": 1.889197587966919, "learning_rate": 1.974355785962667e-05, "loss": 0.6592, "step": 8152 }, { "epoch": 1.3309252683563937, "grad_norm": 2.1506028175354004, "learning_rate": 1.9743486450796156e-05, "loss": 0.598, "step": 8153 }, { "epoch": 1.3310885269988981, "grad_norm": 1.7290527820587158, "learning_rate": 1.9743415032153953e-05, "loss": 0.7441, "step": 8154 }, { "epoch": 1.3312517856414023, "grad_norm": 1.5732158422470093, "learning_rate": 1.974334360370013e-05, "loss": 0.7231, "step": 8155 }, { "epoch": 1.3314150442839068, "grad_norm": 1.9649945497512817, "learning_rate": 1.9743272165434765e-05, "loss": 0.7538, "step": 8156 }, { "epoch": 1.3315783029264112, "grad_norm": 1.9967423677444458, "learning_rate": 1.9743200717357927e-05, "loss": 0.6029, "step": 8157 }, { "epoch": 1.3317415615689154, "grad_norm": 1.8611247539520264, "learning_rate": 1.974312925946969e-05, "loss": 0.7021, "step": 8158 }, { "epoch": 1.3319048202114199, "grad_norm": 1.3805376291275024, "learning_rate": 1.9743057791770122e-05, "loss": 0.5725, "step": 8159 }, { "epoch": 1.3320680788539243, "grad_norm": 2.0026237964630127, "learning_rate": 1.97429863142593e-05, "loss": 0.7716, "step": 8160 }, { "epoch": 1.3322313374964287, "grad_norm": 1.6132175922393799, "learning_rate": 1.974291482693729e-05, "loss": 0.7247, "step": 8161 }, { "epoch": 1.3323945961389332, "grad_norm": 1.7901742458343506, "learning_rate": 1.9742843329804168e-05, "loss": 0.6654, "step": 8162 }, { "epoch": 1.3325578547814376, "grad_norm": 1.7907439470291138, "learning_rate": 1.9742771822860007e-05, "loss": 0.6095, "step": 8163 }, { "epoch": 1.3327211134239418, "grad_norm": 1.5700527429580688, "learning_rate": 1.9742700306104876e-05, "loss": 0.5795, "step": 8164 }, { "epoch": 1.3328843720664463, "grad_norm": 1.961586356163025, "learning_rate": 1.9742628779538848e-05, "loss": 0.7602, "step": 8165 }, { "epoch": 1.3330476307089507, "grad_norm": 1.613202691078186, "learning_rate": 1.9742557243162e-05, "loss": 0.6464, "step": 8166 }, { "epoch": 1.333210889351455, "grad_norm": 1.7981504201889038, "learning_rate": 1.9742485696974395e-05, "loss": 0.7119, "step": 8167 }, { "epoch": 1.3333741479939594, "grad_norm": 2.0982704162597656, "learning_rate": 1.974241414097611e-05, "loss": 0.5979, "step": 8168 }, { "epoch": 1.3335374066364638, "grad_norm": 1.7805267572402954, "learning_rate": 1.9742342575167213e-05, "loss": 0.6941, "step": 8169 }, { "epoch": 1.3337006652789682, "grad_norm": 1.9152238368988037, "learning_rate": 1.974227099954779e-05, "loss": 0.7014, "step": 8170 }, { "epoch": 1.3338639239214727, "grad_norm": 1.9404221773147583, "learning_rate": 1.9742199414117894e-05, "loss": 0.7491, "step": 8171 }, { "epoch": 1.334027182563977, "grad_norm": 1.9786393642425537, "learning_rate": 1.9742127818877605e-05, "loss": 0.6749, "step": 8172 }, { "epoch": 1.3341904412064813, "grad_norm": 1.8398462533950806, "learning_rate": 1.9742056213827e-05, "loss": 0.6962, "step": 8173 }, { "epoch": 1.3343536998489858, "grad_norm": 1.7491601705551147, "learning_rate": 1.9741984598966146e-05, "loss": 0.7358, "step": 8174 }, { "epoch": 1.3345169584914902, "grad_norm": 1.5813140869140625, "learning_rate": 1.9741912974295115e-05, "loss": 0.6308, "step": 8175 }, { "epoch": 1.3346802171339944, "grad_norm": 1.6845985651016235, "learning_rate": 1.9741841339813982e-05, "loss": 0.6448, "step": 8176 }, { "epoch": 1.3348434757764989, "grad_norm": 2.0108141899108887, "learning_rate": 1.9741769695522815e-05, "loss": 0.8839, "step": 8177 }, { "epoch": 1.3350067344190033, "grad_norm": 1.986931324005127, "learning_rate": 1.9741698041421692e-05, "loss": 0.7152, "step": 8178 }, { "epoch": 1.3351699930615077, "grad_norm": 1.5547858476638794, "learning_rate": 1.9741626377510677e-05, "loss": 0.6183, "step": 8179 }, { "epoch": 1.3353332517040122, "grad_norm": 1.5541472434997559, "learning_rate": 1.9741554703789852e-05, "loss": 0.6218, "step": 8180 }, { "epoch": 1.3354965103465164, "grad_norm": 2.258358955383301, "learning_rate": 1.974148302025928e-05, "loss": 0.6975, "step": 8181 }, { "epoch": 1.3356597689890208, "grad_norm": 1.9497052431106567, "learning_rate": 1.974141132691904e-05, "loss": 0.63, "step": 8182 }, { "epoch": 1.3358230276315253, "grad_norm": 1.7772135734558105, "learning_rate": 1.97413396237692e-05, "loss": 0.6188, "step": 8183 }, { "epoch": 1.3359862862740297, "grad_norm": 1.7281135320663452, "learning_rate": 1.9741267910809832e-05, "loss": 0.7195, "step": 8184 }, { "epoch": 1.336149544916534, "grad_norm": 1.7641513347625732, "learning_rate": 1.9741196188041015e-05, "loss": 0.7744, "step": 8185 }, { "epoch": 1.3363128035590384, "grad_norm": 1.8051087856292725, "learning_rate": 1.974112445546281e-05, "loss": 0.8182, "step": 8186 }, { "epoch": 1.3364760622015428, "grad_norm": 1.5499836206436157, "learning_rate": 1.97410527130753e-05, "loss": 0.5419, "step": 8187 }, { "epoch": 1.3366393208440472, "grad_norm": 1.8288648128509521, "learning_rate": 1.9740980960878548e-05, "loss": 0.6964, "step": 8188 }, { "epoch": 1.3368025794865517, "grad_norm": 1.9618353843688965, "learning_rate": 1.9740909198872634e-05, "loss": 0.7829, "step": 8189 }, { "epoch": 1.336965838129056, "grad_norm": 1.8466296195983887, "learning_rate": 1.9740837427057627e-05, "loss": 0.657, "step": 8190 }, { "epoch": 1.3371290967715603, "grad_norm": 1.6712092161178589, "learning_rate": 1.9740765645433597e-05, "loss": 0.6126, "step": 8191 }, { "epoch": 1.3372923554140648, "grad_norm": 1.8561662435531616, "learning_rate": 1.974069385400062e-05, "loss": 0.7387, "step": 8192 }, { "epoch": 1.337455614056569, "grad_norm": 1.8573241233825684, "learning_rate": 1.9740622052758767e-05, "loss": 0.7766, "step": 8193 }, { "epoch": 1.3376188726990734, "grad_norm": 1.5488899946212769, "learning_rate": 1.974055024170811e-05, "loss": 0.6702, "step": 8194 }, { "epoch": 1.3377821313415779, "grad_norm": 1.6390408277511597, "learning_rate": 1.974047842084872e-05, "loss": 0.6084, "step": 8195 }, { "epoch": 1.3379453899840823, "grad_norm": 1.8268978595733643, "learning_rate": 1.9740406590180673e-05, "loss": 0.6841, "step": 8196 }, { "epoch": 1.3381086486265867, "grad_norm": 1.70668625831604, "learning_rate": 1.974033474970404e-05, "loss": 0.6748, "step": 8197 }, { "epoch": 1.3382719072690912, "grad_norm": 1.8198738098144531, "learning_rate": 1.974026289941889e-05, "loss": 0.6791, "step": 8198 }, { "epoch": 1.3384351659115954, "grad_norm": 1.6487101316452026, "learning_rate": 1.9740191039325297e-05, "loss": 0.6012, "step": 8199 }, { "epoch": 1.3385984245540998, "grad_norm": 1.8011170625686646, "learning_rate": 1.9740119169423337e-05, "loss": 0.895, "step": 8200 }, { "epoch": 1.3387616831966043, "grad_norm": 1.9904978275299072, "learning_rate": 1.9740047289713077e-05, "loss": 0.7848, "step": 8201 }, { "epoch": 1.3389249418391085, "grad_norm": 2.1504199504852295, "learning_rate": 1.9739975400194593e-05, "loss": 0.7995, "step": 8202 }, { "epoch": 1.339088200481613, "grad_norm": 1.6668440103530884, "learning_rate": 1.9739903500867958e-05, "loss": 0.6498, "step": 8203 }, { "epoch": 1.3392514591241174, "grad_norm": 1.6868622303009033, "learning_rate": 1.973983159173324e-05, "loss": 0.6898, "step": 8204 }, { "epoch": 1.3394147177666218, "grad_norm": 1.7088543176651, "learning_rate": 1.973975967279052e-05, "loss": 0.6725, "step": 8205 }, { "epoch": 1.3395779764091262, "grad_norm": 2.2173802852630615, "learning_rate": 1.9739687744039858e-05, "loss": 0.8342, "step": 8206 }, { "epoch": 1.3397412350516307, "grad_norm": 1.6543817520141602, "learning_rate": 1.9739615805481337e-05, "loss": 0.6462, "step": 8207 }, { "epoch": 1.339904493694135, "grad_norm": 1.6901558637619019, "learning_rate": 1.9739543857115022e-05, "loss": 0.6842, "step": 8208 }, { "epoch": 1.3400677523366393, "grad_norm": 1.7962465286254883, "learning_rate": 1.973947189894099e-05, "loss": 0.7414, "step": 8209 }, { "epoch": 1.3402310109791438, "grad_norm": 2.0246405601501465, "learning_rate": 1.9739399930959316e-05, "loss": 0.7772, "step": 8210 }, { "epoch": 1.340394269621648, "grad_norm": 1.8361958265304565, "learning_rate": 1.9739327953170065e-05, "loss": 0.7662, "step": 8211 }, { "epoch": 1.3405575282641524, "grad_norm": 1.6218116283416748, "learning_rate": 1.973925596557331e-05, "loss": 0.6298, "step": 8212 }, { "epoch": 1.3407207869066569, "grad_norm": 1.82340407371521, "learning_rate": 1.9739183968169134e-05, "loss": 0.8959, "step": 8213 }, { "epoch": 1.3408840455491613, "grad_norm": 1.682847261428833, "learning_rate": 1.97391119609576e-05, "loss": 0.5834, "step": 8214 }, { "epoch": 1.3410473041916657, "grad_norm": 1.565724492073059, "learning_rate": 1.9739039943938784e-05, "loss": 0.5932, "step": 8215 }, { "epoch": 1.3412105628341702, "grad_norm": 1.6493486166000366, "learning_rate": 1.9738967917112752e-05, "loss": 0.5719, "step": 8216 }, { "epoch": 1.3413738214766744, "grad_norm": 1.4760262966156006, "learning_rate": 1.9738895880479586e-05, "loss": 0.5388, "step": 8217 }, { "epoch": 1.3415370801191788, "grad_norm": 1.8590553998947144, "learning_rate": 1.9738823834039354e-05, "loss": 0.7256, "step": 8218 }, { "epoch": 1.3417003387616833, "grad_norm": 1.8090965747833252, "learning_rate": 1.9738751777792128e-05, "loss": 0.7672, "step": 8219 }, { "epoch": 1.3418635974041875, "grad_norm": 1.9499576091766357, "learning_rate": 1.9738679711737984e-05, "loss": 0.7934, "step": 8220 }, { "epoch": 1.342026856046692, "grad_norm": 1.5775538682937622, "learning_rate": 1.9738607635876992e-05, "loss": 0.7127, "step": 8221 }, { "epoch": 1.3421901146891964, "grad_norm": 1.8453025817871094, "learning_rate": 1.9738535550209224e-05, "loss": 0.713, "step": 8222 }, { "epoch": 1.3423533733317008, "grad_norm": 1.6190437078475952, "learning_rate": 1.973846345473475e-05, "loss": 0.6475, "step": 8223 }, { "epoch": 1.3425166319742052, "grad_norm": 1.601019024848938, "learning_rate": 1.973839134945365e-05, "loss": 0.6121, "step": 8224 }, { "epoch": 1.3426798906167094, "grad_norm": 1.6232457160949707, "learning_rate": 1.973831923436599e-05, "loss": 0.5755, "step": 8225 }, { "epoch": 1.3428431492592139, "grad_norm": 1.4971954822540283, "learning_rate": 1.9738247109471848e-05, "loss": 0.5409, "step": 8226 }, { "epoch": 1.3430064079017183, "grad_norm": 1.6471303701400757, "learning_rate": 1.9738174974771288e-05, "loss": 0.6231, "step": 8227 }, { "epoch": 1.3431696665442228, "grad_norm": 1.7278141975402832, "learning_rate": 1.9738102830264397e-05, "loss": 0.6721, "step": 8228 }, { "epoch": 1.343332925186727, "grad_norm": 2.037161350250244, "learning_rate": 1.9738030675951233e-05, "loss": 0.6543, "step": 8229 }, { "epoch": 1.3434961838292314, "grad_norm": 1.4915215969085693, "learning_rate": 1.9737958511831878e-05, "loss": 0.5908, "step": 8230 }, { "epoch": 1.3436594424717359, "grad_norm": 1.5434035062789917, "learning_rate": 1.9737886337906394e-05, "loss": 0.5733, "step": 8231 }, { "epoch": 1.3438227011142403, "grad_norm": 1.872965693473816, "learning_rate": 1.973781415417487e-05, "loss": 0.6923, "step": 8232 }, { "epoch": 1.3439859597567447, "grad_norm": 1.7643595933914185, "learning_rate": 1.9737741960637363e-05, "loss": 0.6127, "step": 8233 }, { "epoch": 1.344149218399249, "grad_norm": 1.779660701751709, "learning_rate": 1.9737669757293955e-05, "loss": 0.7227, "step": 8234 }, { "epoch": 1.3443124770417534, "grad_norm": 1.5762369632720947, "learning_rate": 1.9737597544144717e-05, "loss": 0.664, "step": 8235 }, { "epoch": 1.3444757356842578, "grad_norm": 1.7186492681503296, "learning_rate": 1.9737525321189724e-05, "loss": 0.7634, "step": 8236 }, { "epoch": 1.344638994326762, "grad_norm": 2.5254247188568115, "learning_rate": 1.973745308842904e-05, "loss": 0.8074, "step": 8237 }, { "epoch": 1.3448022529692665, "grad_norm": 1.7636942863464355, "learning_rate": 1.9737380845862745e-05, "loss": 0.6222, "step": 8238 }, { "epoch": 1.344965511611771, "grad_norm": 1.6847233772277832, "learning_rate": 1.973730859349091e-05, "loss": 0.7141, "step": 8239 }, { "epoch": 1.3451287702542754, "grad_norm": 1.8828792572021484, "learning_rate": 1.973723633131361e-05, "loss": 0.7007, "step": 8240 }, { "epoch": 1.3452920288967798, "grad_norm": 2.0926733016967773, "learning_rate": 1.9737164059330914e-05, "loss": 0.7448, "step": 8241 }, { "epoch": 1.3454552875392842, "grad_norm": 2.036897897720337, "learning_rate": 1.9737091777542896e-05, "loss": 0.7313, "step": 8242 }, { "epoch": 1.3456185461817884, "grad_norm": 1.5905730724334717, "learning_rate": 1.973701948594963e-05, "loss": 0.6581, "step": 8243 }, { "epoch": 1.3457818048242929, "grad_norm": 1.699825644493103, "learning_rate": 1.973694718455119e-05, "loss": 0.6663, "step": 8244 }, { "epoch": 1.3459450634667973, "grad_norm": 1.9438644647598267, "learning_rate": 1.973687487334764e-05, "loss": 0.7834, "step": 8245 }, { "epoch": 1.3461083221093015, "grad_norm": 1.8610684871673584, "learning_rate": 1.9736802552339064e-05, "loss": 0.5558, "step": 8246 }, { "epoch": 1.346271580751806, "grad_norm": 1.677097201347351, "learning_rate": 1.9736730221525532e-05, "loss": 0.5716, "step": 8247 }, { "epoch": 1.3464348393943104, "grad_norm": 1.4830352067947388, "learning_rate": 1.9736657880907112e-05, "loss": 0.6042, "step": 8248 }, { "epoch": 1.3465980980368149, "grad_norm": 1.85309636592865, "learning_rate": 1.973658553048388e-05, "loss": 0.7583, "step": 8249 }, { "epoch": 1.3467613566793193, "grad_norm": 1.703464388847351, "learning_rate": 1.973651317025591e-05, "loss": 0.6163, "step": 8250 }, { "epoch": 1.3469246153218237, "grad_norm": 1.9636685848236084, "learning_rate": 1.9736440800223276e-05, "loss": 0.9122, "step": 8251 }, { "epoch": 1.347087873964328, "grad_norm": 1.645098328590393, "learning_rate": 1.973636842038605e-05, "loss": 0.6377, "step": 8252 }, { "epoch": 1.3472511326068324, "grad_norm": 1.6568094491958618, "learning_rate": 1.9736296030744296e-05, "loss": 0.6104, "step": 8253 }, { "epoch": 1.3474143912493368, "grad_norm": 1.670772910118103, "learning_rate": 1.9736223631298103e-05, "loss": 0.574, "step": 8254 }, { "epoch": 1.347577649891841, "grad_norm": 2.1538875102996826, "learning_rate": 1.9736151222047527e-05, "loss": 0.7407, "step": 8255 }, { "epoch": 1.3477409085343455, "grad_norm": 1.7135804891586304, "learning_rate": 1.9736078802992654e-05, "loss": 0.7331, "step": 8256 }, { "epoch": 1.34790416717685, "grad_norm": 1.590151309967041, "learning_rate": 1.9736006374133553e-05, "loss": 0.6461, "step": 8257 }, { "epoch": 1.3480674258193543, "grad_norm": 1.4683552980422974, "learning_rate": 1.9735933935470294e-05, "loss": 0.5678, "step": 8258 }, { "epoch": 1.3482306844618588, "grad_norm": 1.7317265272140503, "learning_rate": 1.9735861487002954e-05, "loss": 0.6819, "step": 8259 }, { "epoch": 1.3483939431043632, "grad_norm": 1.912006139755249, "learning_rate": 1.9735789028731603e-05, "loss": 0.8791, "step": 8260 }, { "epoch": 1.3485572017468674, "grad_norm": 1.7017078399658203, "learning_rate": 1.9735716560656314e-05, "loss": 0.559, "step": 8261 }, { "epoch": 1.3487204603893719, "grad_norm": 1.7753666639328003, "learning_rate": 1.9735644082777164e-05, "loss": 0.7439, "step": 8262 }, { "epoch": 1.3488837190318763, "grad_norm": 1.7905590534210205, "learning_rate": 1.9735571595094224e-05, "loss": 0.8084, "step": 8263 }, { "epoch": 1.3490469776743805, "grad_norm": 2.5920846462249756, "learning_rate": 1.973549909760756e-05, "loss": 0.709, "step": 8264 }, { "epoch": 1.349210236316885, "grad_norm": 1.9217218160629272, "learning_rate": 1.9735426590317256e-05, "loss": 0.7488, "step": 8265 }, { "epoch": 1.3493734949593894, "grad_norm": 1.7934809923171997, "learning_rate": 1.9735354073223378e-05, "loss": 0.6592, "step": 8266 }, { "epoch": 1.3495367536018938, "grad_norm": 1.7547911405563354, "learning_rate": 1.9735281546326e-05, "loss": 0.718, "step": 8267 }, { "epoch": 1.3497000122443983, "grad_norm": 1.705729603767395, "learning_rate": 1.97352090096252e-05, "loss": 0.7656, "step": 8268 }, { "epoch": 1.3498632708869025, "grad_norm": 1.8302091360092163, "learning_rate": 1.9735136463121043e-05, "loss": 0.7812, "step": 8269 }, { "epoch": 1.350026529529407, "grad_norm": 1.8392120599746704, "learning_rate": 1.9735063906813608e-05, "loss": 0.8434, "step": 8270 }, { "epoch": 1.3501897881719114, "grad_norm": 2.2930102348327637, "learning_rate": 1.9734991340702966e-05, "loss": 1.2712, "step": 8271 }, { "epoch": 1.3503530468144158, "grad_norm": 1.7212575674057007, "learning_rate": 1.973491876478919e-05, "loss": 0.7042, "step": 8272 }, { "epoch": 1.35051630545692, "grad_norm": 1.8384109735488892, "learning_rate": 1.9734846179072352e-05, "loss": 0.8172, "step": 8273 }, { "epoch": 1.3506795640994245, "grad_norm": 1.7737782001495361, "learning_rate": 1.973477358355253e-05, "loss": 0.7011, "step": 8274 }, { "epoch": 1.350842822741929, "grad_norm": 1.3445665836334229, "learning_rate": 1.973470097822979e-05, "loss": 0.5368, "step": 8275 }, { "epoch": 1.3510060813844333, "grad_norm": 1.594993233680725, "learning_rate": 1.9734628363104213e-05, "loss": 0.6287, "step": 8276 }, { "epoch": 1.3511693400269378, "grad_norm": 1.858632206916809, "learning_rate": 1.9734555738175864e-05, "loss": 0.8417, "step": 8277 }, { "epoch": 1.351332598669442, "grad_norm": 1.6728861331939697, "learning_rate": 1.9734483103444823e-05, "loss": 0.6752, "step": 8278 }, { "epoch": 1.3514958573119464, "grad_norm": 1.7547121047973633, "learning_rate": 1.9734410458911158e-05, "loss": 0.7308, "step": 8279 }, { "epoch": 1.3516591159544509, "grad_norm": 1.7454992532730103, "learning_rate": 1.9734337804574943e-05, "loss": 0.687, "step": 8280 }, { "epoch": 1.351822374596955, "grad_norm": 1.7882388830184937, "learning_rate": 1.9734265140436256e-05, "loss": 0.6725, "step": 8281 }, { "epoch": 1.3519856332394595, "grad_norm": 2.1416709423065186, "learning_rate": 1.9734192466495162e-05, "loss": 0.7862, "step": 8282 }, { "epoch": 1.352148891881964, "grad_norm": 1.6497728824615479, "learning_rate": 1.9734119782751742e-05, "loss": 0.7424, "step": 8283 }, { "epoch": 1.3523121505244684, "grad_norm": 1.6605168581008911, "learning_rate": 1.9734047089206064e-05, "loss": 0.7964, "step": 8284 }, { "epoch": 1.3524754091669728, "grad_norm": 1.5457677841186523, "learning_rate": 1.9733974385858205e-05, "loss": 0.657, "step": 8285 }, { "epoch": 1.3526386678094773, "grad_norm": 1.933210015296936, "learning_rate": 1.9733901672708234e-05, "loss": 0.79, "step": 8286 }, { "epoch": 1.3528019264519815, "grad_norm": 1.7117021083831787, "learning_rate": 1.973382894975623e-05, "loss": 0.5921, "step": 8287 }, { "epoch": 1.352965185094486, "grad_norm": 1.8295398950576782, "learning_rate": 1.973375621700226e-05, "loss": 0.7953, "step": 8288 }, { "epoch": 1.3531284437369904, "grad_norm": 1.7313681840896606, "learning_rate": 1.97336834744464e-05, "loss": 0.7181, "step": 8289 }, { "epoch": 1.3532917023794946, "grad_norm": 1.9124257564544678, "learning_rate": 1.9733610722088724e-05, "loss": 0.7201, "step": 8290 }, { "epoch": 1.353454961021999, "grad_norm": 1.8623815774917603, "learning_rate": 1.9733537959929308e-05, "loss": 0.7329, "step": 8291 }, { "epoch": 1.3536182196645035, "grad_norm": 1.6922436952590942, "learning_rate": 1.973346518796822e-05, "loss": 0.5764, "step": 8292 }, { "epoch": 1.353781478307008, "grad_norm": 1.5830777883529663, "learning_rate": 1.973339240620553e-05, "loss": 0.7261, "step": 8293 }, { "epoch": 1.3539447369495123, "grad_norm": 1.9336048364639282, "learning_rate": 1.9733319614641325e-05, "loss": 0.8334, "step": 8294 }, { "epoch": 1.3541079955920168, "grad_norm": 1.6308181285858154, "learning_rate": 1.9733246813275664e-05, "loss": 0.7336, "step": 8295 }, { "epoch": 1.354271254234521, "grad_norm": 1.9394677877426147, "learning_rate": 1.9733174002108626e-05, "loss": 0.7514, "step": 8296 }, { "epoch": 1.3544345128770254, "grad_norm": 1.7411705255508423, "learning_rate": 1.9733101181140285e-05, "loss": 0.6415, "step": 8297 }, { "epoch": 1.3545977715195299, "grad_norm": 1.7820934057235718, "learning_rate": 1.9733028350370715e-05, "loss": 0.7054, "step": 8298 }, { "epoch": 1.354761030162034, "grad_norm": 1.9797648191452026, "learning_rate": 1.973295550979999e-05, "loss": 0.8253, "step": 8299 }, { "epoch": 1.3549242888045385, "grad_norm": 1.7091258764266968, "learning_rate": 1.973288265942818e-05, "loss": 0.7096, "step": 8300 }, { "epoch": 1.355087547447043, "grad_norm": 2.037309169769287, "learning_rate": 1.9732809799255356e-05, "loss": 0.6262, "step": 8301 }, { "epoch": 1.3552508060895474, "grad_norm": 1.6845673322677612, "learning_rate": 1.9732736929281598e-05, "loss": 0.6478, "step": 8302 }, { "epoch": 1.3554140647320518, "grad_norm": 1.7483186721801758, "learning_rate": 1.9732664049506975e-05, "loss": 0.6602, "step": 8303 }, { "epoch": 1.3555773233745563, "grad_norm": 1.6480823755264282, "learning_rate": 1.9732591159931564e-05, "loss": 0.7562, "step": 8304 }, { "epoch": 1.3557405820170605, "grad_norm": 2.142760753631592, "learning_rate": 1.9732518260555436e-05, "loss": 0.7359, "step": 8305 }, { "epoch": 1.355903840659565, "grad_norm": 1.6228832006454468, "learning_rate": 1.9732445351378663e-05, "loss": 0.6438, "step": 8306 }, { "epoch": 1.3560670993020694, "grad_norm": 2.149221420288086, "learning_rate": 1.9732372432401322e-05, "loss": 0.901, "step": 8307 }, { "epoch": 1.3562303579445736, "grad_norm": 2.020387649536133, "learning_rate": 1.9732299503623484e-05, "loss": 0.6961, "step": 8308 }, { "epoch": 1.356393616587078, "grad_norm": 1.8501379489898682, "learning_rate": 1.973222656504522e-05, "loss": 0.8253, "step": 8309 }, { "epoch": 1.3565568752295825, "grad_norm": 1.7977896928787231, "learning_rate": 1.9732153616666608e-05, "loss": 0.7699, "step": 8310 }, { "epoch": 1.356720133872087, "grad_norm": 1.849785327911377, "learning_rate": 1.9732080658487725e-05, "loss": 0.699, "step": 8311 }, { "epoch": 1.3568833925145913, "grad_norm": 1.7371591329574585, "learning_rate": 1.9732007690508634e-05, "loss": 0.6855, "step": 8312 }, { "epoch": 1.3570466511570956, "grad_norm": 1.8989543914794922, "learning_rate": 1.9731934712729415e-05, "loss": 0.7579, "step": 8313 }, { "epoch": 1.3572099097996, "grad_norm": 1.6754093170166016, "learning_rate": 1.973186172515014e-05, "loss": 0.8034, "step": 8314 }, { "epoch": 1.3573731684421044, "grad_norm": 1.8184170722961426, "learning_rate": 1.9731788727770885e-05, "loss": 0.7807, "step": 8315 }, { "epoch": 1.3575364270846089, "grad_norm": 1.8979014158248901, "learning_rate": 1.973171572059172e-05, "loss": 0.7901, "step": 8316 }, { "epoch": 1.357699685727113, "grad_norm": 1.8058786392211914, "learning_rate": 1.973164270361272e-05, "loss": 0.6959, "step": 8317 }, { "epoch": 1.3578629443696175, "grad_norm": 1.9051337242126465, "learning_rate": 1.9731569676833956e-05, "loss": 0.682, "step": 8318 }, { "epoch": 1.358026203012122, "grad_norm": 1.8176357746124268, "learning_rate": 1.9731496640255506e-05, "loss": 0.724, "step": 8319 }, { "epoch": 1.3581894616546264, "grad_norm": 1.7155450582504272, "learning_rate": 1.973142359387744e-05, "loss": 0.7476, "step": 8320 }, { "epoch": 1.3583527202971308, "grad_norm": 2.5844178199768066, "learning_rate": 1.9731350537699834e-05, "loss": 0.723, "step": 8321 }, { "epoch": 1.358515978939635, "grad_norm": 1.807054042816162, "learning_rate": 1.973127747172276e-05, "loss": 0.8197, "step": 8322 }, { "epoch": 1.3586792375821395, "grad_norm": 1.4998122453689575, "learning_rate": 1.9731204395946296e-05, "loss": 0.7217, "step": 8323 }, { "epoch": 1.358842496224644, "grad_norm": 1.4987891912460327, "learning_rate": 1.9731131310370507e-05, "loss": 0.6774, "step": 8324 }, { "epoch": 1.3590057548671481, "grad_norm": 2.301417589187622, "learning_rate": 1.9731058214995475e-05, "loss": 0.7901, "step": 8325 }, { "epoch": 1.3591690135096526, "grad_norm": 1.8290350437164307, "learning_rate": 1.9730985109821268e-05, "loss": 0.8052, "step": 8326 }, { "epoch": 1.359332272152157, "grad_norm": 1.6052716970443726, "learning_rate": 1.973091199484796e-05, "loss": 0.6198, "step": 8327 }, { "epoch": 1.3594955307946615, "grad_norm": 1.7550108432769775, "learning_rate": 1.973083887007563e-05, "loss": 0.7344, "step": 8328 }, { "epoch": 1.359658789437166, "grad_norm": 1.6170223951339722, "learning_rate": 1.9730765735504345e-05, "loss": 0.5764, "step": 8329 }, { "epoch": 1.3598220480796703, "grad_norm": 1.6686460971832275, "learning_rate": 1.9730692591134182e-05, "loss": 0.6056, "step": 8330 }, { "epoch": 1.3599853067221745, "grad_norm": 1.6460363864898682, "learning_rate": 1.9730619436965215e-05, "loss": 0.6323, "step": 8331 }, { "epoch": 1.360148565364679, "grad_norm": 1.4896211624145508, "learning_rate": 1.9730546272997516e-05, "loss": 0.5703, "step": 8332 }, { "epoch": 1.3603118240071834, "grad_norm": 1.9263339042663574, "learning_rate": 1.973047309923116e-05, "loss": 0.6436, "step": 8333 }, { "epoch": 1.3604750826496876, "grad_norm": 1.7282179594039917, "learning_rate": 1.973039991566622e-05, "loss": 0.6072, "step": 8334 }, { "epoch": 1.360638341292192, "grad_norm": 1.7295589447021484, "learning_rate": 1.973032672230277e-05, "loss": 0.6472, "step": 8335 }, { "epoch": 1.3608015999346965, "grad_norm": 1.7596914768218994, "learning_rate": 1.973025351914088e-05, "loss": 0.7705, "step": 8336 }, { "epoch": 1.360964858577201, "grad_norm": 2.0013060569763184, "learning_rate": 1.973018030618063e-05, "loss": 0.6773, "step": 8337 }, { "epoch": 1.3611281172197054, "grad_norm": 1.6858500242233276, "learning_rate": 1.9730107083422095e-05, "loss": 0.7056, "step": 8338 }, { "epoch": 1.3612913758622098, "grad_norm": 1.834686517715454, "learning_rate": 1.973003385086534e-05, "loss": 0.6982, "step": 8339 }, { "epoch": 1.361454634504714, "grad_norm": 1.5939444303512573, "learning_rate": 1.9729960608510444e-05, "loss": 0.6232, "step": 8340 }, { "epoch": 1.3616178931472185, "grad_norm": 2.04644775390625, "learning_rate": 1.972988735635748e-05, "loss": 0.6972, "step": 8341 }, { "epoch": 1.361781151789723, "grad_norm": 1.950292944908142, "learning_rate": 1.9729814094406525e-05, "loss": 0.6302, "step": 8342 }, { "epoch": 1.3619444104322271, "grad_norm": 1.7416141033172607, "learning_rate": 1.9729740822657648e-05, "loss": 0.5998, "step": 8343 }, { "epoch": 1.3621076690747316, "grad_norm": 1.8044193983078003, "learning_rate": 1.9729667541110923e-05, "loss": 0.6572, "step": 8344 }, { "epoch": 1.362270927717236, "grad_norm": 2.0061612129211426, "learning_rate": 1.972959424976643e-05, "loss": 0.678, "step": 8345 }, { "epoch": 1.3624341863597405, "grad_norm": 2.8036489486694336, "learning_rate": 1.9729520948624232e-05, "loss": 0.7473, "step": 8346 }, { "epoch": 1.362597445002245, "grad_norm": 2.05035138130188, "learning_rate": 1.972944763768441e-05, "loss": 0.7647, "step": 8347 }, { "epoch": 1.3627607036447493, "grad_norm": 1.7214281558990479, "learning_rate": 1.972937431694704e-05, "loss": 0.6989, "step": 8348 }, { "epoch": 1.3629239622872535, "grad_norm": 1.5661144256591797, "learning_rate": 1.9729300986412193e-05, "loss": 0.5184, "step": 8349 }, { "epoch": 1.363087220929758, "grad_norm": 1.9768506288528442, "learning_rate": 1.9729227646079942e-05, "loss": 0.7588, "step": 8350 }, { "epoch": 1.3632504795722624, "grad_norm": 2.1065573692321777, "learning_rate": 1.972915429595036e-05, "loss": 0.7968, "step": 8351 }, { "epoch": 1.3634137382147666, "grad_norm": 1.6749401092529297, "learning_rate": 1.9729080936023522e-05, "loss": 0.6017, "step": 8352 }, { "epoch": 1.363576996857271, "grad_norm": 1.9006191492080688, "learning_rate": 1.97290075662995e-05, "loss": 0.7278, "step": 8353 }, { "epoch": 1.3637402554997755, "grad_norm": 1.978261947631836, "learning_rate": 1.9728934186778374e-05, "loss": 0.7953, "step": 8354 }, { "epoch": 1.36390351414228, "grad_norm": 1.6980211734771729, "learning_rate": 1.9728860797460213e-05, "loss": 0.6293, "step": 8355 }, { "epoch": 1.3640667727847844, "grad_norm": 1.7803338766098022, "learning_rate": 1.972878739834509e-05, "loss": 0.7124, "step": 8356 }, { "epoch": 1.3642300314272886, "grad_norm": 2.0836610794067383, "learning_rate": 1.9728713989433082e-05, "loss": 0.7916, "step": 8357 }, { "epoch": 1.364393290069793, "grad_norm": 2.059305429458618, "learning_rate": 1.972864057072426e-05, "loss": 0.7813, "step": 8358 }, { "epoch": 1.3645565487122975, "grad_norm": 1.6354259252548218, "learning_rate": 1.9728567142218705e-05, "loss": 0.6936, "step": 8359 }, { "epoch": 1.364719807354802, "grad_norm": 1.8395733833312988, "learning_rate": 1.972849370391648e-05, "loss": 0.6993, "step": 8360 }, { "epoch": 1.3648830659973061, "grad_norm": 1.5453143119812012, "learning_rate": 1.9728420255817666e-05, "loss": 0.5523, "step": 8361 }, { "epoch": 1.3650463246398106, "grad_norm": 1.8116971254348755, "learning_rate": 1.9728346797922334e-05, "loss": 0.5959, "step": 8362 }, { "epoch": 1.365209583282315, "grad_norm": 1.4897252321243286, "learning_rate": 1.972827333023056e-05, "loss": 0.5545, "step": 8363 }, { "epoch": 1.3653728419248194, "grad_norm": 2.1675009727478027, "learning_rate": 1.972819985274242e-05, "loss": 0.709, "step": 8364 }, { "epoch": 1.3655361005673239, "grad_norm": 1.7929800748825073, "learning_rate": 1.9728126365457982e-05, "loss": 0.7585, "step": 8365 }, { "epoch": 1.365699359209828, "grad_norm": 1.8542165756225586, "learning_rate": 1.9728052868377324e-05, "loss": 0.6729, "step": 8366 }, { "epoch": 1.3658626178523325, "grad_norm": 1.7181657552719116, "learning_rate": 1.972797936150052e-05, "loss": 0.6551, "step": 8367 }, { "epoch": 1.366025876494837, "grad_norm": 1.775887131690979, "learning_rate": 1.9727905844827644e-05, "loss": 0.7949, "step": 8368 }, { "epoch": 1.3661891351373414, "grad_norm": 1.7418924570083618, "learning_rate": 1.9727832318358765e-05, "loss": 0.5483, "step": 8369 }, { "epoch": 1.3663523937798456, "grad_norm": 1.5054987668991089, "learning_rate": 1.972775878209397e-05, "loss": 0.6097, "step": 8370 }, { "epoch": 1.36651565242235, "grad_norm": 2.1408700942993164, "learning_rate": 1.9727685236033317e-05, "loss": 0.8409, "step": 8371 }, { "epoch": 1.3666789110648545, "grad_norm": 1.7996951341629028, "learning_rate": 1.972761168017689e-05, "loss": 0.6961, "step": 8372 }, { "epoch": 1.366842169707359, "grad_norm": 1.6046819686889648, "learning_rate": 1.972753811452476e-05, "loss": 0.589, "step": 8373 }, { "epoch": 1.3670054283498634, "grad_norm": 1.8360761404037476, "learning_rate": 1.9727464539077004e-05, "loss": 0.6544, "step": 8374 }, { "epoch": 1.3671686869923676, "grad_norm": 1.8914958238601685, "learning_rate": 1.972739095383369e-05, "loss": 0.6764, "step": 8375 }, { "epoch": 1.367331945634872, "grad_norm": 1.4578981399536133, "learning_rate": 1.97273173587949e-05, "loss": 0.5745, "step": 8376 }, { "epoch": 1.3674952042773765, "grad_norm": 1.8077691793441772, "learning_rate": 1.97272437539607e-05, "loss": 0.6334, "step": 8377 }, { "epoch": 1.3676584629198807, "grad_norm": 1.7290151119232178, "learning_rate": 1.972717013933117e-05, "loss": 0.6115, "step": 8378 }, { "epoch": 1.3678217215623851, "grad_norm": 1.860940933227539, "learning_rate": 1.9727096514906384e-05, "loss": 0.7351, "step": 8379 }, { "epoch": 1.3679849802048896, "grad_norm": 1.5859614610671997, "learning_rate": 1.9727022880686413e-05, "loss": 0.6391, "step": 8380 }, { "epoch": 1.368148238847394, "grad_norm": 1.8746851682662964, "learning_rate": 1.9726949236671332e-05, "loss": 0.8745, "step": 8381 }, { "epoch": 1.3683114974898984, "grad_norm": 1.9152168035507202, "learning_rate": 1.9726875582861217e-05, "loss": 0.7353, "step": 8382 }, { "epoch": 1.3684747561324029, "grad_norm": 1.7264350652694702, "learning_rate": 1.9726801919256138e-05, "loss": 0.633, "step": 8383 }, { "epoch": 1.368638014774907, "grad_norm": 1.8237354755401611, "learning_rate": 1.9726728245856174e-05, "loss": 0.8483, "step": 8384 }, { "epoch": 1.3688012734174115, "grad_norm": 1.8498300313949585, "learning_rate": 1.9726654562661398e-05, "loss": 0.6789, "step": 8385 }, { "epoch": 1.368964532059916, "grad_norm": 1.6092272996902466, "learning_rate": 1.9726580869671882e-05, "loss": 0.6186, "step": 8386 }, { "epoch": 1.3691277907024202, "grad_norm": 1.704261302947998, "learning_rate": 1.9726507166887704e-05, "loss": 0.5943, "step": 8387 }, { "epoch": 1.3692910493449246, "grad_norm": 1.932511806488037, "learning_rate": 1.9726433454308932e-05, "loss": 0.7102, "step": 8388 }, { "epoch": 1.369454307987429, "grad_norm": 1.6476396322250366, "learning_rate": 1.9726359731935646e-05, "loss": 0.6825, "step": 8389 }, { "epoch": 1.3696175666299335, "grad_norm": 1.7646197080612183, "learning_rate": 1.972628599976792e-05, "loss": 0.6763, "step": 8390 }, { "epoch": 1.369780825272438, "grad_norm": 1.9146653413772583, "learning_rate": 1.9726212257805825e-05, "loss": 0.6274, "step": 8391 }, { "epoch": 1.3699440839149424, "grad_norm": 2.177828311920166, "learning_rate": 1.9726138506049438e-05, "loss": 0.8401, "step": 8392 }, { "epoch": 1.3701073425574466, "grad_norm": 1.65445077419281, "learning_rate": 1.972606474449883e-05, "loss": 0.7258, "step": 8393 }, { "epoch": 1.370270601199951, "grad_norm": 1.7009174823760986, "learning_rate": 1.972599097315408e-05, "loss": 0.63, "step": 8394 }, { "epoch": 1.3704338598424555, "grad_norm": 1.6280282735824585, "learning_rate": 1.9725917192015254e-05, "loss": 0.6838, "step": 8395 }, { "epoch": 1.3705971184849597, "grad_norm": 1.4994230270385742, "learning_rate": 1.9725843401082438e-05, "loss": 0.6918, "step": 8396 }, { "epoch": 1.3707603771274641, "grad_norm": 1.7638756036758423, "learning_rate": 1.97257696003557e-05, "loss": 0.6169, "step": 8397 }, { "epoch": 1.3709236357699686, "grad_norm": 1.8542600870132446, "learning_rate": 1.9725695789835112e-05, "loss": 0.6576, "step": 8398 }, { "epoch": 1.371086894412473, "grad_norm": 1.7300984859466553, "learning_rate": 1.9725621969520753e-05, "loss": 0.598, "step": 8399 }, { "epoch": 1.3712501530549774, "grad_norm": 1.796614170074463, "learning_rate": 1.9725548139412693e-05, "loss": 0.5613, "step": 8400 }, { "epoch": 1.3714134116974817, "grad_norm": 1.8692671060562134, "learning_rate": 1.9725474299511008e-05, "loss": 0.6775, "step": 8401 }, { "epoch": 1.371576670339986, "grad_norm": 1.8673346042633057, "learning_rate": 1.972540044981578e-05, "loss": 0.6933, "step": 8402 }, { "epoch": 1.3717399289824905, "grad_norm": 1.405704379081726, "learning_rate": 1.9725326590327066e-05, "loss": 0.538, "step": 8403 }, { "epoch": 1.371903187624995, "grad_norm": 1.8175907135009766, "learning_rate": 1.9725252721044956e-05, "loss": 0.6977, "step": 8404 }, { "epoch": 1.3720664462674992, "grad_norm": 1.5783928632736206, "learning_rate": 1.9725178841969522e-05, "loss": 0.7136, "step": 8405 }, { "epoch": 1.3722297049100036, "grad_norm": 1.7211568355560303, "learning_rate": 1.972510495310083e-05, "loss": 0.6778, "step": 8406 }, { "epoch": 1.372392963552508, "grad_norm": 1.9487515687942505, "learning_rate": 1.972503105443896e-05, "loss": 0.7074, "step": 8407 }, { "epoch": 1.3725562221950125, "grad_norm": 2.003492593765259, "learning_rate": 1.972495714598399e-05, "loss": 1.4301, "step": 8408 }, { "epoch": 1.372719480837517, "grad_norm": 1.524784803390503, "learning_rate": 1.972488322773599e-05, "loss": 0.592, "step": 8409 }, { "epoch": 1.3728827394800212, "grad_norm": 2.12522554397583, "learning_rate": 1.9724809299695035e-05, "loss": 0.718, "step": 8410 }, { "epoch": 1.3730459981225256, "grad_norm": 1.7248750925064087, "learning_rate": 1.97247353618612e-05, "loss": 0.6978, "step": 8411 }, { "epoch": 1.37320925676503, "grad_norm": 1.4759026765823364, "learning_rate": 1.9724661414234556e-05, "loss": 0.5639, "step": 8412 }, { "epoch": 1.3733725154075345, "grad_norm": 1.5456266403198242, "learning_rate": 1.9724587456815184e-05, "loss": 0.5608, "step": 8413 }, { "epoch": 1.3735357740500387, "grad_norm": 1.664540410041809, "learning_rate": 1.9724513489603153e-05, "loss": 0.6107, "step": 8414 }, { "epoch": 1.3736990326925431, "grad_norm": 1.495896816253662, "learning_rate": 1.972443951259854e-05, "loss": 0.5233, "step": 8415 }, { "epoch": 1.3738622913350476, "grad_norm": 1.6689540147781372, "learning_rate": 1.972436552580142e-05, "loss": 0.7058, "step": 8416 }, { "epoch": 1.374025549977552, "grad_norm": 1.6329103708267212, "learning_rate": 1.9724291529211866e-05, "loss": 0.6715, "step": 8417 }, { "epoch": 1.3741888086200564, "grad_norm": 1.9518340826034546, "learning_rate": 1.972421752282995e-05, "loss": 0.6714, "step": 8418 }, { "epoch": 1.3743520672625607, "grad_norm": 2.0537843704223633, "learning_rate": 1.9724143506655757e-05, "loss": 0.8565, "step": 8419 }, { "epoch": 1.374515325905065, "grad_norm": 1.83000910282135, "learning_rate": 1.9724069480689346e-05, "loss": 0.6048, "step": 8420 }, { "epoch": 1.3746785845475695, "grad_norm": 2.023005962371826, "learning_rate": 1.9723995444930804e-05, "loss": 0.7768, "step": 8421 }, { "epoch": 1.3748418431900737, "grad_norm": 1.9521901607513428, "learning_rate": 1.97239213993802e-05, "loss": 0.7615, "step": 8422 }, { "epoch": 1.3750051018325782, "grad_norm": 2.0240862369537354, "learning_rate": 1.972384734403761e-05, "loss": 0.84, "step": 8423 }, { "epoch": 1.3751683604750826, "grad_norm": 1.9210774898529053, "learning_rate": 1.9723773278903106e-05, "loss": 0.7791, "step": 8424 }, { "epoch": 1.375331619117587, "grad_norm": 1.6599315404891968, "learning_rate": 1.9723699203976768e-05, "loss": 0.6974, "step": 8425 }, { "epoch": 1.3754948777600915, "grad_norm": 1.7277590036392212, "learning_rate": 1.9723625119258668e-05, "loss": 0.6386, "step": 8426 }, { "epoch": 1.375658136402596, "grad_norm": 1.7206592559814453, "learning_rate": 1.9723551024748878e-05, "loss": 0.6766, "step": 8427 }, { "epoch": 1.3758213950451001, "grad_norm": 1.845370888710022, "learning_rate": 1.9723476920447474e-05, "loss": 0.6321, "step": 8428 }, { "epoch": 1.3759846536876046, "grad_norm": 1.9309136867523193, "learning_rate": 1.9723402806354533e-05, "loss": 0.796, "step": 8429 }, { "epoch": 1.376147912330109, "grad_norm": 1.4519853591918945, "learning_rate": 1.9723328682470127e-05, "loss": 0.5607, "step": 8430 }, { "epoch": 1.3763111709726132, "grad_norm": 1.7722735404968262, "learning_rate": 1.972325454879433e-05, "loss": 0.7211, "step": 8431 }, { "epoch": 1.3764744296151177, "grad_norm": 1.5729968547821045, "learning_rate": 1.9723180405327224e-05, "loss": 0.596, "step": 8432 }, { "epoch": 1.3766376882576221, "grad_norm": 1.8054226636886597, "learning_rate": 1.972310625206887e-05, "loss": 0.675, "step": 8433 }, { "epoch": 1.3768009469001266, "grad_norm": 1.6092917919158936, "learning_rate": 1.9723032089019356e-05, "loss": 0.6061, "step": 8434 }, { "epoch": 1.376964205542631, "grad_norm": 1.646209478378296, "learning_rate": 1.972295791617875e-05, "loss": 0.7091, "step": 8435 }, { "epoch": 1.3771274641851354, "grad_norm": 1.661869764328003, "learning_rate": 1.9722883733547128e-05, "loss": 0.6502, "step": 8436 }, { "epoch": 1.3772907228276396, "grad_norm": 2.031036853790283, "learning_rate": 1.9722809541124567e-05, "loss": 0.6234, "step": 8437 }, { "epoch": 1.377453981470144, "grad_norm": 1.404412865638733, "learning_rate": 1.9722735338911136e-05, "loss": 0.5742, "step": 8438 }, { "epoch": 1.3776172401126485, "grad_norm": 1.6166009902954102, "learning_rate": 1.9722661126906915e-05, "loss": 0.6311, "step": 8439 }, { "epoch": 1.3777804987551527, "grad_norm": 1.8461928367614746, "learning_rate": 1.9722586905111975e-05, "loss": 0.7471, "step": 8440 }, { "epoch": 1.3779437573976572, "grad_norm": 1.308459758758545, "learning_rate": 1.9722512673526392e-05, "loss": 0.5337, "step": 8441 }, { "epoch": 1.3781070160401616, "grad_norm": 1.8069099187850952, "learning_rate": 1.9722438432150243e-05, "loss": 0.6939, "step": 8442 }, { "epoch": 1.378270274682666, "grad_norm": 1.8021799325942993, "learning_rate": 1.9722364180983603e-05, "loss": 0.636, "step": 8443 }, { "epoch": 1.3784335333251705, "grad_norm": 1.7545087337493896, "learning_rate": 1.9722289920026543e-05, "loss": 0.606, "step": 8444 }, { "epoch": 1.3785967919676747, "grad_norm": 1.8423954248428345, "learning_rate": 1.9722215649279136e-05, "loss": 0.8203, "step": 8445 }, { "epoch": 1.3787600506101791, "grad_norm": 1.6208566427230835, "learning_rate": 1.9722141368741466e-05, "loss": 0.6257, "step": 8446 }, { "epoch": 1.3789233092526836, "grad_norm": 1.9168658256530762, "learning_rate": 1.97220670784136e-05, "loss": 0.7611, "step": 8447 }, { "epoch": 1.379086567895188, "grad_norm": 1.3439618349075317, "learning_rate": 1.9721992778295617e-05, "loss": 0.427, "step": 8448 }, { "epoch": 1.3792498265376922, "grad_norm": 1.9709808826446533, "learning_rate": 1.9721918468387588e-05, "loss": 0.7794, "step": 8449 }, { "epoch": 1.3794130851801967, "grad_norm": 2.0591931343078613, "learning_rate": 1.972184414868959e-05, "loss": 0.7943, "step": 8450 }, { "epoch": 1.3795763438227011, "grad_norm": 2.0306127071380615, "learning_rate": 1.9721769819201694e-05, "loss": 0.7248, "step": 8451 }, { "epoch": 1.3797396024652056, "grad_norm": 2.016033887863159, "learning_rate": 1.9721695479923983e-05, "loss": 0.7622, "step": 8452 }, { "epoch": 1.37990286110771, "grad_norm": 1.3038848638534546, "learning_rate": 1.9721621130856526e-05, "loss": 0.5002, "step": 8453 }, { "epoch": 1.3800661197502142, "grad_norm": 1.81847083568573, "learning_rate": 1.9721546771999398e-05, "loss": 0.7565, "step": 8454 }, { "epoch": 1.3802293783927186, "grad_norm": 1.5064761638641357, "learning_rate": 1.9721472403352676e-05, "loss": 0.6592, "step": 8455 }, { "epoch": 1.380392637035223, "grad_norm": 1.9900223016738892, "learning_rate": 1.9721398024916435e-05, "loss": 0.8305, "step": 8456 }, { "epoch": 1.3805558956777275, "grad_norm": 2.13301420211792, "learning_rate": 1.9721323636690746e-05, "loss": 0.8289, "step": 8457 }, { "epoch": 1.3807191543202317, "grad_norm": 2.0816996097564697, "learning_rate": 1.9721249238675688e-05, "loss": 0.8236, "step": 8458 }, { "epoch": 1.3808824129627362, "grad_norm": 1.7025365829467773, "learning_rate": 1.9721174830871334e-05, "loss": 0.7274, "step": 8459 }, { "epoch": 1.3810456716052406, "grad_norm": 1.9764474630355835, "learning_rate": 1.9721100413277763e-05, "loss": 0.6989, "step": 8460 }, { "epoch": 1.381208930247745, "grad_norm": 2.0128912925720215, "learning_rate": 1.9721025985895042e-05, "loss": 0.7921, "step": 8461 }, { "epoch": 1.3813721888902495, "grad_norm": 1.8539080619812012, "learning_rate": 1.9720951548723253e-05, "loss": 0.8018, "step": 8462 }, { "epoch": 1.3815354475327537, "grad_norm": 1.7069453001022339, "learning_rate": 1.9720877101762467e-05, "loss": 0.6945, "step": 8463 }, { "epoch": 1.3816987061752581, "grad_norm": 1.773798942565918, "learning_rate": 1.9720802645012762e-05, "loss": 0.7168, "step": 8464 }, { "epoch": 1.3818619648177626, "grad_norm": 1.5558085441589355, "learning_rate": 1.9720728178474208e-05, "loss": 0.6448, "step": 8465 }, { "epoch": 1.3820252234602668, "grad_norm": 1.868607997894287, "learning_rate": 1.9720653702146885e-05, "loss": 0.742, "step": 8466 }, { "epoch": 1.3821884821027712, "grad_norm": 1.3872383832931519, "learning_rate": 1.972057921603087e-05, "loss": 0.524, "step": 8467 }, { "epoch": 1.3823517407452757, "grad_norm": 1.7063668966293335, "learning_rate": 1.972050472012623e-05, "loss": 0.7065, "step": 8468 }, { "epoch": 1.38251499938778, "grad_norm": 1.5860522985458374, "learning_rate": 1.9720430214433045e-05, "loss": 0.6801, "step": 8469 }, { "epoch": 1.3826782580302845, "grad_norm": 1.6875216960906982, "learning_rate": 1.9720355698951387e-05, "loss": 0.6728, "step": 8470 }, { "epoch": 1.382841516672789, "grad_norm": 1.8280620574951172, "learning_rate": 1.972028117368134e-05, "loss": 0.7737, "step": 8471 }, { "epoch": 1.3830047753152932, "grad_norm": 1.9733465909957886, "learning_rate": 1.972020663862297e-05, "loss": 0.7799, "step": 8472 }, { "epoch": 1.3831680339577976, "grad_norm": 1.6831047534942627, "learning_rate": 1.972013209377635e-05, "loss": 0.6566, "step": 8473 }, { "epoch": 1.383331292600302, "grad_norm": 2.041114330291748, "learning_rate": 1.9720057539141563e-05, "loss": 0.8495, "step": 8474 }, { "epoch": 1.3834945512428063, "grad_norm": 1.895912528038025, "learning_rate": 1.971998297471868e-05, "loss": 0.7798, "step": 8475 }, { "epoch": 1.3836578098853107, "grad_norm": 1.6067018508911133, "learning_rate": 1.9719908400507778e-05, "loss": 0.6908, "step": 8476 }, { "epoch": 1.3838210685278152, "grad_norm": 1.7075824737548828, "learning_rate": 1.971983381650893e-05, "loss": 0.6489, "step": 8477 }, { "epoch": 1.3839843271703196, "grad_norm": 1.7513080835342407, "learning_rate": 1.971975922272221e-05, "loss": 0.8004, "step": 8478 }, { "epoch": 1.384147585812824, "grad_norm": 2.122152805328369, "learning_rate": 1.9719684619147698e-05, "loss": 0.7697, "step": 8479 }, { "epoch": 1.3843108444553285, "grad_norm": 1.645156979560852, "learning_rate": 1.9719610005785466e-05, "loss": 0.6356, "step": 8480 }, { "epoch": 1.3844741030978327, "grad_norm": 1.6937166452407837, "learning_rate": 1.9719535382635586e-05, "loss": 0.8289, "step": 8481 }, { "epoch": 1.3846373617403371, "grad_norm": 1.561585545539856, "learning_rate": 1.9719460749698142e-05, "loss": 0.5439, "step": 8482 }, { "epoch": 1.3848006203828416, "grad_norm": 1.5185095071792603, "learning_rate": 1.97193861069732e-05, "loss": 0.5596, "step": 8483 }, { "epoch": 1.3849638790253458, "grad_norm": 1.5849449634552002, "learning_rate": 1.9719311454460836e-05, "loss": 0.5882, "step": 8484 }, { "epoch": 1.3851271376678502, "grad_norm": 1.8550829887390137, "learning_rate": 1.971923679216113e-05, "loss": 0.663, "step": 8485 }, { "epoch": 1.3852903963103547, "grad_norm": 1.7792022228240967, "learning_rate": 1.9719162120074156e-05, "loss": 0.8061, "step": 8486 }, { "epoch": 1.385453654952859, "grad_norm": 1.7230370044708252, "learning_rate": 1.971908743819999e-05, "loss": 0.5245, "step": 8487 }, { "epoch": 1.3856169135953635, "grad_norm": 1.7696950435638428, "learning_rate": 1.9719012746538704e-05, "loss": 0.7117, "step": 8488 }, { "epoch": 1.385780172237868, "grad_norm": 1.7883312702178955, "learning_rate": 1.9718938045090375e-05, "loss": 0.7331, "step": 8489 }, { "epoch": 1.3859434308803722, "grad_norm": 1.7699166536331177, "learning_rate": 1.971886333385508e-05, "loss": 0.6431, "step": 8490 }, { "epoch": 1.3861066895228766, "grad_norm": 1.5653668642044067, "learning_rate": 1.9718788612832886e-05, "loss": 0.5702, "step": 8491 }, { "epoch": 1.386269948165381, "grad_norm": 1.8735970258712769, "learning_rate": 1.971871388202388e-05, "loss": 0.803, "step": 8492 }, { "epoch": 1.3864332068078853, "grad_norm": 1.7645715475082397, "learning_rate": 1.971863914142813e-05, "loss": 0.6647, "step": 8493 }, { "epoch": 1.3865964654503897, "grad_norm": 1.8286306858062744, "learning_rate": 1.9718564391045712e-05, "loss": 0.7621, "step": 8494 }, { "epoch": 1.3867597240928942, "grad_norm": 1.7723857164382935, "learning_rate": 1.9718489630876703e-05, "loss": 0.7225, "step": 8495 }, { "epoch": 1.3869229827353986, "grad_norm": 1.737130880355835, "learning_rate": 1.9718414860921176e-05, "loss": 0.7053, "step": 8496 }, { "epoch": 1.387086241377903, "grad_norm": 2.0651021003723145, "learning_rate": 1.971834008117921e-05, "loss": 0.8018, "step": 8497 }, { "epoch": 1.3872495000204073, "grad_norm": 2.4117748737335205, "learning_rate": 1.971826529165088e-05, "loss": 0.7719, "step": 8498 }, { "epoch": 1.3874127586629117, "grad_norm": 1.6619681119918823, "learning_rate": 1.9718190492336257e-05, "loss": 0.6618, "step": 8499 }, { "epoch": 1.3875760173054161, "grad_norm": 1.7703887224197388, "learning_rate": 1.9718115683235418e-05, "loss": 0.6855, "step": 8500 }, { "epoch": 1.3877392759479206, "grad_norm": 1.65807044506073, "learning_rate": 1.971804086434844e-05, "loss": 0.5962, "step": 8501 }, { "epoch": 1.3879025345904248, "grad_norm": 1.7825350761413574, "learning_rate": 1.97179660356754e-05, "loss": 0.6879, "step": 8502 }, { "epoch": 1.3880657932329292, "grad_norm": 1.7502659559249878, "learning_rate": 1.9717891197216367e-05, "loss": 0.5989, "step": 8503 }, { "epoch": 1.3882290518754337, "grad_norm": 1.7276153564453125, "learning_rate": 1.971781634897142e-05, "loss": 0.6905, "step": 8504 }, { "epoch": 1.388392310517938, "grad_norm": 1.6576237678527832, "learning_rate": 1.9717741490940637e-05, "loss": 0.6274, "step": 8505 }, { "epoch": 1.3885555691604425, "grad_norm": 1.527543306350708, "learning_rate": 1.9717666623124087e-05, "loss": 0.5538, "step": 8506 }, { "epoch": 1.3887188278029468, "grad_norm": 1.7954412698745728, "learning_rate": 1.9717591745521854e-05, "loss": 0.6967, "step": 8507 }, { "epoch": 1.3888820864454512, "grad_norm": 1.86507248878479, "learning_rate": 1.9717516858134008e-05, "loss": 0.9216, "step": 8508 }, { "epoch": 1.3890453450879556, "grad_norm": 2.1200945377349854, "learning_rate": 1.9717441960960624e-05, "loss": 0.7155, "step": 8509 }, { "epoch": 1.3892086037304598, "grad_norm": 1.9099801778793335, "learning_rate": 1.9717367054001774e-05, "loss": 0.8699, "step": 8510 }, { "epoch": 1.3893718623729643, "grad_norm": 1.8651418685913086, "learning_rate": 1.9717292137257545e-05, "loss": 0.7518, "step": 8511 }, { "epoch": 1.3895351210154687, "grad_norm": 1.9073890447616577, "learning_rate": 1.9717217210728002e-05, "loss": 0.7477, "step": 8512 }, { "epoch": 1.3896983796579732, "grad_norm": 1.4458518028259277, "learning_rate": 1.9717142274413223e-05, "loss": 0.5297, "step": 8513 }, { "epoch": 1.3898616383004776, "grad_norm": 2.2982072830200195, "learning_rate": 1.971706732831329e-05, "loss": 0.8076, "step": 8514 }, { "epoch": 1.390024896942982, "grad_norm": 1.7847076654434204, "learning_rate": 1.9716992372428267e-05, "loss": 0.8157, "step": 8515 }, { "epoch": 1.3901881555854863, "grad_norm": 1.7081732749938965, "learning_rate": 1.9716917406758236e-05, "loss": 0.657, "step": 8516 }, { "epoch": 1.3903514142279907, "grad_norm": 1.6708954572677612, "learning_rate": 1.9716842431303273e-05, "loss": 0.6302, "step": 8517 }, { "epoch": 1.3905146728704951, "grad_norm": 1.8395391702651978, "learning_rate": 1.971676744606345e-05, "loss": 0.6845, "step": 8518 }, { "epoch": 1.3906779315129993, "grad_norm": 1.5903441905975342, "learning_rate": 1.971669245103885e-05, "loss": 0.5573, "step": 8519 }, { "epoch": 1.3908411901555038, "grad_norm": 1.6655635833740234, "learning_rate": 1.9716617446229537e-05, "loss": 0.6297, "step": 8520 }, { "epoch": 1.3910044487980082, "grad_norm": 1.5163934230804443, "learning_rate": 1.9716542431635598e-05, "loss": 0.6018, "step": 8521 }, { "epoch": 1.3911677074405127, "grad_norm": 1.642490029335022, "learning_rate": 1.97164674072571e-05, "loss": 0.7182, "step": 8522 }, { "epoch": 1.391330966083017, "grad_norm": 1.523086428642273, "learning_rate": 1.971639237309412e-05, "loss": 0.681, "step": 8523 }, { "epoch": 1.3914942247255215, "grad_norm": 2.526933431625366, "learning_rate": 1.971631732914674e-05, "loss": 0.7705, "step": 8524 }, { "epoch": 1.3916574833680257, "grad_norm": 1.7300961017608643, "learning_rate": 1.971624227541503e-05, "loss": 0.7853, "step": 8525 }, { "epoch": 1.3918207420105302, "grad_norm": 1.736971139907837, "learning_rate": 1.9716167211899067e-05, "loss": 0.6571, "step": 8526 }, { "epoch": 1.3919840006530346, "grad_norm": 1.771987795829773, "learning_rate": 1.9716092138598924e-05, "loss": 0.6783, "step": 8527 }, { "epoch": 1.3921472592955388, "grad_norm": 1.593761920928955, "learning_rate": 1.971601705551468e-05, "loss": 0.7499, "step": 8528 }, { "epoch": 1.3923105179380433, "grad_norm": 1.7637159824371338, "learning_rate": 1.971594196264641e-05, "loss": 0.7221, "step": 8529 }, { "epoch": 1.3924737765805477, "grad_norm": 1.7757338285446167, "learning_rate": 1.9715866859994187e-05, "loss": 0.7513, "step": 8530 }, { "epoch": 1.3926370352230522, "grad_norm": 1.885926365852356, "learning_rate": 1.9715791747558093e-05, "loss": 0.7363, "step": 8531 }, { "epoch": 1.3928002938655566, "grad_norm": 1.9472426176071167, "learning_rate": 1.9715716625338196e-05, "loss": 0.7103, "step": 8532 }, { "epoch": 1.392963552508061, "grad_norm": 1.7686352729797363, "learning_rate": 1.9715641493334574e-05, "loss": 0.6079, "step": 8533 }, { "epoch": 1.3931268111505652, "grad_norm": 1.8642363548278809, "learning_rate": 1.9715566351547305e-05, "loss": 0.8424, "step": 8534 }, { "epoch": 1.3932900697930697, "grad_norm": 1.9722398519515991, "learning_rate": 1.9715491199976462e-05, "loss": 0.8831, "step": 8535 }, { "epoch": 1.3934533284355741, "grad_norm": 1.8228189945220947, "learning_rate": 1.9715416038622125e-05, "loss": 0.5906, "step": 8536 }, { "epoch": 1.3936165870780783, "grad_norm": 1.7515661716461182, "learning_rate": 1.971534086748436e-05, "loss": 0.6965, "step": 8537 }, { "epoch": 1.3937798457205828, "grad_norm": 1.684076189994812, "learning_rate": 1.971526568656326e-05, "loss": 0.6883, "step": 8538 }, { "epoch": 1.3939431043630872, "grad_norm": 1.8726963996887207, "learning_rate": 1.971519049585888e-05, "loss": 0.757, "step": 8539 }, { "epoch": 1.3941063630055917, "grad_norm": 1.4726639986038208, "learning_rate": 1.9715115295371313e-05, "loss": 0.6563, "step": 8540 }, { "epoch": 1.394269621648096, "grad_norm": 1.849305510520935, "learning_rate": 1.9715040085100624e-05, "loss": 0.6945, "step": 8541 }, { "epoch": 1.3944328802906003, "grad_norm": 1.706517219543457, "learning_rate": 1.9714964865046893e-05, "loss": 0.6534, "step": 8542 }, { "epoch": 1.3945961389331047, "grad_norm": 1.6469526290893555, "learning_rate": 1.9714889635210193e-05, "loss": 0.6445, "step": 8543 }, { "epoch": 1.3947593975756092, "grad_norm": 1.6730015277862549, "learning_rate": 1.97148143955906e-05, "loss": 0.6667, "step": 8544 }, { "epoch": 1.3949226562181136, "grad_norm": 2.0204241275787354, "learning_rate": 1.9714739146188196e-05, "loss": 0.7039, "step": 8545 }, { "epoch": 1.3950859148606178, "grad_norm": 1.676442265510559, "learning_rate": 1.9714663887003055e-05, "loss": 0.6555, "step": 8546 }, { "epoch": 1.3952491735031223, "grad_norm": 1.7082538604736328, "learning_rate": 1.9714588618035245e-05, "loss": 0.6539, "step": 8547 }, { "epoch": 1.3954124321456267, "grad_norm": 2.2412357330322266, "learning_rate": 1.9714513339284845e-05, "loss": 0.8125, "step": 8548 }, { "epoch": 1.3955756907881312, "grad_norm": 1.7997382879257202, "learning_rate": 1.9714438050751937e-05, "loss": 0.5491, "step": 8549 }, { "epoch": 1.3957389494306356, "grad_norm": 1.772769570350647, "learning_rate": 1.971436275243659e-05, "loss": 0.6555, "step": 8550 }, { "epoch": 1.3959022080731398, "grad_norm": 1.7535885572433472, "learning_rate": 1.9714287444338884e-05, "loss": 0.7208, "step": 8551 }, { "epoch": 1.3960654667156442, "grad_norm": 2.0372729301452637, "learning_rate": 1.9714212126458893e-05, "loss": 0.6892, "step": 8552 }, { "epoch": 1.3962287253581487, "grad_norm": 1.8749943971633911, "learning_rate": 1.971413679879669e-05, "loss": 0.7605, "step": 8553 }, { "epoch": 1.396391984000653, "grad_norm": 1.5726737976074219, "learning_rate": 1.971406146135236e-05, "loss": 0.6779, "step": 8554 }, { "epoch": 1.3965552426431573, "grad_norm": 1.4064544439315796, "learning_rate": 1.9713986114125966e-05, "loss": 0.5512, "step": 8555 }, { "epoch": 1.3967185012856618, "grad_norm": 1.8914309740066528, "learning_rate": 1.9713910757117593e-05, "loss": 0.7273, "step": 8556 }, { "epoch": 1.3968817599281662, "grad_norm": 1.82962167263031, "learning_rate": 1.9713835390327317e-05, "loss": 0.7273, "step": 8557 }, { "epoch": 1.3970450185706706, "grad_norm": 1.9358083009719849, "learning_rate": 1.9713760013755207e-05, "loss": 0.6718, "step": 8558 }, { "epoch": 1.397208277213175, "grad_norm": 1.5062962770462036, "learning_rate": 1.9713684627401346e-05, "loss": 0.5279, "step": 8559 }, { "epoch": 1.3973715358556793, "grad_norm": 2.1053473949432373, "learning_rate": 1.9713609231265807e-05, "loss": 0.9349, "step": 8560 }, { "epoch": 1.3975347944981837, "grad_norm": 1.8815748691558838, "learning_rate": 1.9713533825348664e-05, "loss": 0.7169, "step": 8561 }, { "epoch": 1.3976980531406882, "grad_norm": 2.067535877227783, "learning_rate": 1.971345840965e-05, "loss": 0.7811, "step": 8562 }, { "epoch": 1.3978613117831924, "grad_norm": 1.6335721015930176, "learning_rate": 1.971338298416988e-05, "loss": 0.7191, "step": 8563 }, { "epoch": 1.3980245704256968, "grad_norm": 1.6634080410003662, "learning_rate": 1.971330754890839e-05, "loss": 0.6422, "step": 8564 }, { "epoch": 1.3981878290682013, "grad_norm": 1.6815085411071777, "learning_rate": 1.97132321038656e-05, "loss": 0.7019, "step": 8565 }, { "epoch": 1.3983510877107057, "grad_norm": 1.504040002822876, "learning_rate": 1.9713156649041587e-05, "loss": 0.5265, "step": 8566 }, { "epoch": 1.3985143463532101, "grad_norm": 1.8774583339691162, "learning_rate": 1.971308118443643e-05, "loss": 0.7659, "step": 8567 }, { "epoch": 1.3986776049957146, "grad_norm": 1.7899527549743652, "learning_rate": 1.9713005710050203e-05, "loss": 0.7163, "step": 8568 }, { "epoch": 1.3988408636382188, "grad_norm": 1.8520280122756958, "learning_rate": 1.9712930225882976e-05, "loss": 0.6883, "step": 8569 }, { "epoch": 1.3990041222807232, "grad_norm": 1.8585915565490723, "learning_rate": 1.9712854731934837e-05, "loss": 0.7027, "step": 8570 }, { "epoch": 1.3991673809232277, "grad_norm": 1.9673842191696167, "learning_rate": 1.9712779228205856e-05, "loss": 0.8631, "step": 8571 }, { "epoch": 1.399330639565732, "grad_norm": 1.672910213470459, "learning_rate": 1.9712703714696108e-05, "loss": 0.7067, "step": 8572 }, { "epoch": 1.3994938982082363, "grad_norm": 1.9864866733551025, "learning_rate": 1.971262819140567e-05, "loss": 0.9096, "step": 8573 }, { "epoch": 1.3996571568507408, "grad_norm": 1.9118305444717407, "learning_rate": 1.9712552658334617e-05, "loss": 0.7571, "step": 8574 }, { "epoch": 1.3998204154932452, "grad_norm": 1.7030729055404663, "learning_rate": 1.9712477115483027e-05, "loss": 0.6483, "step": 8575 }, { "epoch": 1.3999836741357496, "grad_norm": 1.8887096643447876, "learning_rate": 1.9712401562850975e-05, "loss": 0.9116, "step": 8576 }, { "epoch": 1.400146932778254, "grad_norm": 1.522375464439392, "learning_rate": 1.9712326000438535e-05, "loss": 0.6523, "step": 8577 }, { "epoch": 1.4003101914207583, "grad_norm": 1.520180583000183, "learning_rate": 1.9712250428245788e-05, "loss": 0.6304, "step": 8578 }, { "epoch": 1.4004734500632627, "grad_norm": 1.7145904302597046, "learning_rate": 1.9712174846272806e-05, "loss": 0.6621, "step": 8579 }, { "epoch": 1.4006367087057672, "grad_norm": 1.7909493446350098, "learning_rate": 1.9712099254519666e-05, "loss": 0.6608, "step": 8580 }, { "epoch": 1.4007999673482714, "grad_norm": 1.867161750793457, "learning_rate": 1.9712023652986444e-05, "loss": 0.9778, "step": 8581 }, { "epoch": 1.4009632259907758, "grad_norm": 1.8501540422439575, "learning_rate": 1.971194804167322e-05, "loss": 0.7388, "step": 8582 }, { "epoch": 1.4011264846332803, "grad_norm": 1.9510643482208252, "learning_rate": 1.971187242058006e-05, "loss": 0.5734, "step": 8583 }, { "epoch": 1.4012897432757847, "grad_norm": 1.752379059791565, "learning_rate": 1.9711796789707057e-05, "loss": 0.6218, "step": 8584 }, { "epoch": 1.4014530019182891, "grad_norm": 2.176736354827881, "learning_rate": 1.9711721149054272e-05, "loss": 0.7749, "step": 8585 }, { "epoch": 1.4016162605607934, "grad_norm": 1.680879831314087, "learning_rate": 1.9711645498621787e-05, "loss": 0.7899, "step": 8586 }, { "epoch": 1.4017795192032978, "grad_norm": 1.8759444952011108, "learning_rate": 1.9711569838409675e-05, "loss": 0.8015, "step": 8587 }, { "epoch": 1.4019427778458022, "grad_norm": 1.7387847900390625, "learning_rate": 1.9711494168418017e-05, "loss": 0.6439, "step": 8588 }, { "epoch": 1.4021060364883067, "grad_norm": 2.000535011291504, "learning_rate": 1.971141848864689e-05, "loss": 0.8285, "step": 8589 }, { "epoch": 1.4022692951308109, "grad_norm": 1.91960871219635, "learning_rate": 1.971134279909636e-05, "loss": 0.6128, "step": 8590 }, { "epoch": 1.4024325537733153, "grad_norm": 1.8916155099868774, "learning_rate": 1.9711267099766517e-05, "loss": 0.7893, "step": 8591 }, { "epoch": 1.4025958124158198, "grad_norm": 1.5142968893051147, "learning_rate": 1.971119139065743e-05, "loss": 0.639, "step": 8592 }, { "epoch": 1.4027590710583242, "grad_norm": 2.034125566482544, "learning_rate": 1.9711115671769172e-05, "loss": 0.7668, "step": 8593 }, { "epoch": 1.4029223297008286, "grad_norm": 1.9606164693832397, "learning_rate": 1.9711039943101826e-05, "loss": 0.8288, "step": 8594 }, { "epoch": 1.4030855883433329, "grad_norm": 1.683655023574829, "learning_rate": 1.9710964204655462e-05, "loss": 0.7308, "step": 8595 }, { "epoch": 1.4032488469858373, "grad_norm": 2.032135248184204, "learning_rate": 1.9710888456430163e-05, "loss": 0.7335, "step": 8596 }, { "epoch": 1.4034121056283417, "grad_norm": 1.6199026107788086, "learning_rate": 1.9710812698426e-05, "loss": 0.5885, "step": 8597 }, { "epoch": 1.4035753642708462, "grad_norm": 1.8474699258804321, "learning_rate": 1.9710736930643054e-05, "loss": 0.7112, "step": 8598 }, { "epoch": 1.4037386229133504, "grad_norm": 1.4029401540756226, "learning_rate": 1.9710661153081396e-05, "loss": 0.5002, "step": 8599 }, { "epoch": 1.4039018815558548, "grad_norm": 1.5530167818069458, "learning_rate": 1.9710585365741105e-05, "loss": 0.6494, "step": 8600 }, { "epoch": 1.4040651401983593, "grad_norm": 1.735854983329773, "learning_rate": 1.971050956862226e-05, "loss": 0.648, "step": 8601 }, { "epoch": 1.4042283988408637, "grad_norm": 1.6918120384216309, "learning_rate": 1.971043376172493e-05, "loss": 0.6145, "step": 8602 }, { "epoch": 1.4043916574833681, "grad_norm": 1.797217607498169, "learning_rate": 1.97103579450492e-05, "loss": 0.5966, "step": 8603 }, { "epoch": 1.4045549161258724, "grad_norm": 1.838832974433899, "learning_rate": 1.9710282118595137e-05, "loss": 0.6548, "step": 8604 }, { "epoch": 1.4047181747683768, "grad_norm": 1.9912137985229492, "learning_rate": 1.9710206282362823e-05, "loss": 0.719, "step": 8605 }, { "epoch": 1.4048814334108812, "grad_norm": 1.36310613155365, "learning_rate": 1.9710130436352338e-05, "loss": 0.6004, "step": 8606 }, { "epoch": 1.4050446920533854, "grad_norm": 1.7034072875976562, "learning_rate": 1.971005458056375e-05, "loss": 0.609, "step": 8607 }, { "epoch": 1.4052079506958899, "grad_norm": 2.064579963684082, "learning_rate": 1.9709978714997146e-05, "loss": 0.7673, "step": 8608 }, { "epoch": 1.4053712093383943, "grad_norm": 2.082993507385254, "learning_rate": 1.970990283965259e-05, "loss": 1.4007, "step": 8609 }, { "epoch": 1.4055344679808988, "grad_norm": 1.8789527416229248, "learning_rate": 1.970982695453017e-05, "loss": 0.6507, "step": 8610 }, { "epoch": 1.4056977266234032, "grad_norm": 1.537828803062439, "learning_rate": 1.9709751059629952e-05, "loss": 0.639, "step": 8611 }, { "epoch": 1.4058609852659076, "grad_norm": 1.7417701482772827, "learning_rate": 1.9709675154952017e-05, "loss": 0.6875, "step": 8612 }, { "epoch": 1.4060242439084119, "grad_norm": 1.5917898416519165, "learning_rate": 1.970959924049644e-05, "loss": 0.5525, "step": 8613 }, { "epoch": 1.4061875025509163, "grad_norm": 1.656715989112854, "learning_rate": 1.9709523316263306e-05, "loss": 0.6078, "step": 8614 }, { "epoch": 1.4063507611934207, "grad_norm": 1.5196971893310547, "learning_rate": 1.970944738225268e-05, "loss": 0.6065, "step": 8615 }, { "epoch": 1.406514019835925, "grad_norm": 1.885778546333313, "learning_rate": 1.9709371438464646e-05, "loss": 0.8296, "step": 8616 }, { "epoch": 1.4066772784784294, "grad_norm": 1.5454318523406982, "learning_rate": 1.9709295484899275e-05, "loss": 0.6074, "step": 8617 }, { "epoch": 1.4068405371209338, "grad_norm": 1.496180772781372, "learning_rate": 1.9709219521556647e-05, "loss": 0.5919, "step": 8618 }, { "epoch": 1.4070037957634383, "grad_norm": 1.583449363708496, "learning_rate": 1.9709143548436837e-05, "loss": 0.6574, "step": 8619 }, { "epoch": 1.4071670544059427, "grad_norm": 2.218541383743286, "learning_rate": 1.970906756553992e-05, "loss": 0.6347, "step": 8620 }, { "epoch": 1.4073303130484471, "grad_norm": 1.5744132995605469, "learning_rate": 1.970899157286598e-05, "loss": 0.6046, "step": 8621 }, { "epoch": 1.4074935716909514, "grad_norm": 1.9045554399490356, "learning_rate": 1.970891557041508e-05, "loss": 0.6964, "step": 8622 }, { "epoch": 1.4076568303334558, "grad_norm": 1.8541924953460693, "learning_rate": 1.9708839558187313e-05, "loss": 0.7256, "step": 8623 }, { "epoch": 1.4078200889759602, "grad_norm": 1.7579020261764526, "learning_rate": 1.9708763536182744e-05, "loss": 0.6599, "step": 8624 }, { "epoch": 1.4079833476184644, "grad_norm": 1.97645902633667, "learning_rate": 1.970868750440145e-05, "loss": 0.7083, "step": 8625 }, { "epoch": 1.4081466062609689, "grad_norm": 1.5480568408966064, "learning_rate": 1.9708611462843512e-05, "loss": 0.5439, "step": 8626 }, { "epoch": 1.4083098649034733, "grad_norm": 1.7147431373596191, "learning_rate": 1.9708535411509008e-05, "loss": 0.7496, "step": 8627 }, { "epoch": 1.4084731235459778, "grad_norm": 1.6770806312561035, "learning_rate": 1.9708459350398005e-05, "loss": 0.6306, "step": 8628 }, { "epoch": 1.4086363821884822, "grad_norm": 1.5218342542648315, "learning_rate": 1.970838327951059e-05, "loss": 0.6411, "step": 8629 }, { "epoch": 1.4087996408309864, "grad_norm": 1.7869845628738403, "learning_rate": 1.9708307198846837e-05, "loss": 0.6951, "step": 8630 }, { "epoch": 1.4089628994734908, "grad_norm": 1.572424292564392, "learning_rate": 1.9708231108406817e-05, "loss": 0.6699, "step": 8631 }, { "epoch": 1.4091261581159953, "grad_norm": 1.7874997854232788, "learning_rate": 1.9708155008190614e-05, "loss": 0.6934, "step": 8632 }, { "epoch": 1.4092894167584997, "grad_norm": 1.6763380765914917, "learning_rate": 1.9708078898198303e-05, "loss": 0.5726, "step": 8633 }, { "epoch": 1.409452675401004, "grad_norm": 1.6334975957870483, "learning_rate": 1.9708002778429957e-05, "loss": 0.5428, "step": 8634 }, { "epoch": 1.4096159340435084, "grad_norm": 2.127756357192993, "learning_rate": 1.9707926648885655e-05, "loss": 0.7091, "step": 8635 }, { "epoch": 1.4097791926860128, "grad_norm": 1.5964088439941406, "learning_rate": 1.970785050956547e-05, "loss": 0.5284, "step": 8636 }, { "epoch": 1.4099424513285173, "grad_norm": 1.9498860836029053, "learning_rate": 1.9707774360469487e-05, "loss": 0.7433, "step": 8637 }, { "epoch": 1.4101057099710217, "grad_norm": 1.544713020324707, "learning_rate": 1.9707698201597777e-05, "loss": 0.6993, "step": 8638 }, { "epoch": 1.410268968613526, "grad_norm": 1.9762260913848877, "learning_rate": 1.9707622032950416e-05, "loss": 0.6025, "step": 8639 }, { "epoch": 1.4104322272560303, "grad_norm": 1.6134401559829712, "learning_rate": 1.970754585452748e-05, "loss": 0.6924, "step": 8640 }, { "epoch": 1.4105954858985348, "grad_norm": 1.6906917095184326, "learning_rate": 1.970746966632905e-05, "loss": 0.5938, "step": 8641 }, { "epoch": 1.4107587445410392, "grad_norm": 2.017824172973633, "learning_rate": 1.9707393468355204e-05, "loss": 0.7295, "step": 8642 }, { "epoch": 1.4109220031835434, "grad_norm": 1.6652110815048218, "learning_rate": 1.9707317260606014e-05, "loss": 0.5972, "step": 8643 }, { "epoch": 1.4110852618260479, "grad_norm": 1.6441612243652344, "learning_rate": 1.9707241043081555e-05, "loss": 0.6796, "step": 8644 }, { "epoch": 1.4112485204685523, "grad_norm": 1.569763422012329, "learning_rate": 1.970716481578191e-05, "loss": 0.5633, "step": 8645 }, { "epoch": 1.4114117791110568, "grad_norm": 1.7102195024490356, "learning_rate": 1.9707088578707154e-05, "loss": 0.6562, "step": 8646 }, { "epoch": 1.4115750377535612, "grad_norm": 1.9142568111419678, "learning_rate": 1.9707012331857357e-05, "loss": 0.7614, "step": 8647 }, { "epoch": 1.4117382963960654, "grad_norm": 2.4415085315704346, "learning_rate": 1.97069360752326e-05, "loss": 0.7942, "step": 8648 }, { "epoch": 1.4119015550385698, "grad_norm": 1.930181860923767, "learning_rate": 1.9706859808832968e-05, "loss": 0.7382, "step": 8649 }, { "epoch": 1.4120648136810743, "grad_norm": 1.903906226158142, "learning_rate": 1.9706783532658528e-05, "loss": 0.724, "step": 8650 }, { "epoch": 1.4122280723235785, "grad_norm": 1.726954698562622, "learning_rate": 1.970670724670936e-05, "loss": 0.7078, "step": 8651 }, { "epoch": 1.412391330966083, "grad_norm": 1.8145008087158203, "learning_rate": 1.9706630950985537e-05, "loss": 0.7004, "step": 8652 }, { "epoch": 1.4125545896085874, "grad_norm": 2.449061632156372, "learning_rate": 1.9706554645487142e-05, "loss": 0.7962, "step": 8653 }, { "epoch": 1.4127178482510918, "grad_norm": 2.031891345977783, "learning_rate": 1.9706478330214248e-05, "loss": 0.8449, "step": 8654 }, { "epoch": 1.4128811068935963, "grad_norm": 1.7556610107421875, "learning_rate": 1.9706402005166935e-05, "loss": 0.6719, "step": 8655 }, { "epoch": 1.4130443655361007, "grad_norm": 1.8780179023742676, "learning_rate": 1.9706325670345276e-05, "loss": 0.7667, "step": 8656 }, { "epoch": 1.413207624178605, "grad_norm": 1.9974851608276367, "learning_rate": 1.9706249325749348e-05, "loss": 1.1717, "step": 8657 }, { "epoch": 1.4133708828211093, "grad_norm": 1.8004084825515747, "learning_rate": 1.970617297137923e-05, "loss": 0.6947, "step": 8658 }, { "epoch": 1.4135341414636138, "grad_norm": 1.635007381439209, "learning_rate": 1.9706096607235003e-05, "loss": 0.7377, "step": 8659 }, { "epoch": 1.413697400106118, "grad_norm": 1.5896035432815552, "learning_rate": 1.9706020233316735e-05, "loss": 0.6808, "step": 8660 }, { "epoch": 1.4138606587486224, "grad_norm": 1.8063644170761108, "learning_rate": 1.9705943849624507e-05, "loss": 0.8254, "step": 8661 }, { "epoch": 1.4140239173911269, "grad_norm": 1.8654104471206665, "learning_rate": 1.97058674561584e-05, "loss": 0.9091, "step": 8662 }, { "epoch": 1.4141871760336313, "grad_norm": 2.1827971935272217, "learning_rate": 1.970579105291848e-05, "loss": 0.8624, "step": 8663 }, { "epoch": 1.4143504346761357, "grad_norm": 1.82045316696167, "learning_rate": 1.9705714639904838e-05, "loss": 0.8159, "step": 8664 }, { "epoch": 1.4145136933186402, "grad_norm": 2.075427532196045, "learning_rate": 1.9705638217117543e-05, "loss": 0.7499, "step": 8665 }, { "epoch": 1.4146769519611444, "grad_norm": 1.4834234714508057, "learning_rate": 1.970556178455667e-05, "loss": 0.6925, "step": 8666 }, { "epoch": 1.4148402106036488, "grad_norm": 1.8697466850280762, "learning_rate": 1.9705485342222302e-05, "loss": 0.4816, "step": 8667 }, { "epoch": 1.4150034692461533, "grad_norm": 1.6457347869873047, "learning_rate": 1.970540889011451e-05, "loss": 0.7459, "step": 8668 }, { "epoch": 1.4151667278886575, "grad_norm": 1.6625922918319702, "learning_rate": 1.9705332428233373e-05, "loss": 0.6969, "step": 8669 }, { "epoch": 1.415329986531162, "grad_norm": 1.704240322113037, "learning_rate": 1.9705255956578972e-05, "loss": 0.7391, "step": 8670 }, { "epoch": 1.4154932451736664, "grad_norm": 1.9477009773254395, "learning_rate": 1.9705179475151377e-05, "loss": 0.705, "step": 8671 }, { "epoch": 1.4156565038161708, "grad_norm": 1.6242307424545288, "learning_rate": 1.9705102983950674e-05, "loss": 0.6855, "step": 8672 }, { "epoch": 1.4158197624586752, "grad_norm": 1.8269885778427124, "learning_rate": 1.9705026482976933e-05, "loss": 0.7559, "step": 8673 }, { "epoch": 1.4159830211011795, "grad_norm": 1.6294444799423218, "learning_rate": 1.970494997223023e-05, "loss": 0.645, "step": 8674 }, { "epoch": 1.416146279743684, "grad_norm": 1.6249995231628418, "learning_rate": 1.9704873451710647e-05, "loss": 0.6799, "step": 8675 }, { "epoch": 1.4163095383861883, "grad_norm": 1.5880489349365234, "learning_rate": 1.970479692141826e-05, "loss": 0.6422, "step": 8676 }, { "epoch": 1.4164727970286928, "grad_norm": 1.3066085577011108, "learning_rate": 1.970472038135314e-05, "loss": 0.5293, "step": 8677 }, { "epoch": 1.416636055671197, "grad_norm": 1.8472591638565063, "learning_rate": 1.9704643831515377e-05, "loss": 0.7655, "step": 8678 }, { "epoch": 1.4167993143137014, "grad_norm": 1.6922271251678467, "learning_rate": 1.9704567271905034e-05, "loss": 0.6675, "step": 8679 }, { "epoch": 1.4169625729562059, "grad_norm": 1.7736334800720215, "learning_rate": 1.9704490702522198e-05, "loss": 0.6286, "step": 8680 }, { "epoch": 1.4171258315987103, "grad_norm": 1.7819796800613403, "learning_rate": 1.970441412336694e-05, "loss": 0.6429, "step": 8681 }, { "epoch": 1.4172890902412147, "grad_norm": 1.796210765838623, "learning_rate": 1.9704337534439343e-05, "loss": 0.74, "step": 8682 }, { "epoch": 1.417452348883719, "grad_norm": 1.7743102312088013, "learning_rate": 1.9704260935739477e-05, "loss": 0.5991, "step": 8683 }, { "epoch": 1.4176156075262234, "grad_norm": 1.7765942811965942, "learning_rate": 1.9704184327267425e-05, "loss": 0.7021, "step": 8684 }, { "epoch": 1.4177788661687278, "grad_norm": 2.0916383266448975, "learning_rate": 1.970410770902326e-05, "loss": 0.9195, "step": 8685 }, { "epoch": 1.4179421248112323, "grad_norm": 1.8041877746582031, "learning_rate": 1.970403108100706e-05, "loss": 0.7846, "step": 8686 }, { "epoch": 1.4181053834537365, "grad_norm": 1.8553102016448975, "learning_rate": 1.9703954443218908e-05, "loss": 0.7172, "step": 8687 }, { "epoch": 1.418268642096241, "grad_norm": 1.6210575103759766, "learning_rate": 1.9703877795658874e-05, "loss": 0.6126, "step": 8688 }, { "epoch": 1.4184319007387454, "grad_norm": 1.5161592960357666, "learning_rate": 1.970380113832704e-05, "loss": 0.6379, "step": 8689 }, { "epoch": 1.4185951593812498, "grad_norm": 1.5917373895645142, "learning_rate": 1.9703724471223475e-05, "loss": 0.6061, "step": 8690 }, { "epoch": 1.4187584180237542, "grad_norm": 1.9403901100158691, "learning_rate": 1.9703647794348268e-05, "loss": 0.7945, "step": 8691 }, { "epoch": 1.4189216766662585, "grad_norm": 2.016359329223633, "learning_rate": 1.9703571107701486e-05, "loss": 0.8547, "step": 8692 }, { "epoch": 1.419084935308763, "grad_norm": 1.7833921909332275, "learning_rate": 1.9703494411283213e-05, "loss": 0.6531, "step": 8693 }, { "epoch": 1.4192481939512673, "grad_norm": 1.3497540950775146, "learning_rate": 1.9703417705093524e-05, "loss": 0.5188, "step": 8694 }, { "epoch": 1.4194114525937715, "grad_norm": 1.8821314573287964, "learning_rate": 1.9703340989132493e-05, "loss": 0.6633, "step": 8695 }, { "epoch": 1.419574711236276, "grad_norm": 2.2048559188842773, "learning_rate": 1.9703264263400202e-05, "loss": 0.5689, "step": 8696 }, { "epoch": 1.4197379698787804, "grad_norm": 1.8296291828155518, "learning_rate": 1.970318752789673e-05, "loss": 0.7196, "step": 8697 }, { "epoch": 1.4199012285212849, "grad_norm": 1.7738217115402222, "learning_rate": 1.9703110782622145e-05, "loss": 0.7041, "step": 8698 }, { "epoch": 1.4200644871637893, "grad_norm": 1.960978388786316, "learning_rate": 1.970303402757653e-05, "loss": 0.5878, "step": 8699 }, { "epoch": 1.4202277458062937, "grad_norm": 1.8107755184173584, "learning_rate": 1.9702957262759964e-05, "loss": 0.6232, "step": 8700 }, { "epoch": 1.420391004448798, "grad_norm": 1.9930211305618286, "learning_rate": 1.9702880488172527e-05, "loss": 0.8347, "step": 8701 }, { "epoch": 1.4205542630913024, "grad_norm": 1.8488643169403076, "learning_rate": 1.9702803703814288e-05, "loss": 0.7217, "step": 8702 }, { "epoch": 1.4207175217338068, "grad_norm": 1.8284389972686768, "learning_rate": 1.970272690968533e-05, "loss": 0.592, "step": 8703 }, { "epoch": 1.420880780376311, "grad_norm": 1.5800256729125977, "learning_rate": 1.9702650105785725e-05, "loss": 0.6384, "step": 8704 }, { "epoch": 1.4210440390188155, "grad_norm": 1.4261492490768433, "learning_rate": 1.9702573292115554e-05, "loss": 0.5645, "step": 8705 }, { "epoch": 1.42120729766132, "grad_norm": 1.4399464130401611, "learning_rate": 1.9702496468674896e-05, "loss": 0.5822, "step": 8706 }, { "epoch": 1.4213705563038244, "grad_norm": 1.874807357788086, "learning_rate": 1.9702419635463827e-05, "loss": 0.6434, "step": 8707 }, { "epoch": 1.4215338149463288, "grad_norm": 1.5424790382385254, "learning_rate": 1.9702342792482424e-05, "loss": 0.5829, "step": 8708 }, { "epoch": 1.4216970735888332, "grad_norm": 1.8187928199768066, "learning_rate": 1.9702265939730766e-05, "loss": 0.777, "step": 8709 }, { "epoch": 1.4218603322313375, "grad_norm": 2.059610366821289, "learning_rate": 1.9702189077208922e-05, "loss": 0.711, "step": 8710 }, { "epoch": 1.422023590873842, "grad_norm": 1.433377981185913, "learning_rate": 1.9702112204916984e-05, "loss": 0.6031, "step": 8711 }, { "epoch": 1.4221868495163463, "grad_norm": 1.5410363674163818, "learning_rate": 1.970203532285502e-05, "loss": 0.6234, "step": 8712 }, { "epoch": 1.4223501081588505, "grad_norm": 1.857173204421997, "learning_rate": 1.9701958431023107e-05, "loss": 0.7091, "step": 8713 }, { "epoch": 1.422513366801355, "grad_norm": 1.596121072769165, "learning_rate": 1.9701881529421325e-05, "loss": 0.575, "step": 8714 }, { "epoch": 1.4226766254438594, "grad_norm": 2.114319086074829, "learning_rate": 1.9701804618049753e-05, "loss": 0.8154, "step": 8715 }, { "epoch": 1.4228398840863639, "grad_norm": 1.5954052209854126, "learning_rate": 1.9701727696908462e-05, "loss": 0.7106, "step": 8716 }, { "epoch": 1.4230031427288683, "grad_norm": 1.8025619983673096, "learning_rate": 1.9701650765997537e-05, "loss": 0.7304, "step": 8717 }, { "epoch": 1.4231664013713727, "grad_norm": 1.7387498617172241, "learning_rate": 1.970157382531705e-05, "loss": 0.6477, "step": 8718 }, { "epoch": 1.423329660013877, "grad_norm": 1.6344341039657593, "learning_rate": 1.9701496874867084e-05, "loss": 0.6726, "step": 8719 }, { "epoch": 1.4234929186563814, "grad_norm": 1.7721304893493652, "learning_rate": 1.970141991464771e-05, "loss": 0.6554, "step": 8720 }, { "epoch": 1.4236561772988858, "grad_norm": 1.947679042816162, "learning_rate": 1.970134294465901e-05, "loss": 0.6028, "step": 8721 }, { "epoch": 1.42381943594139, "grad_norm": 1.831961989402771, "learning_rate": 1.970126596490106e-05, "loss": 0.6697, "step": 8722 }, { "epoch": 1.4239826945838945, "grad_norm": 2.8235700130462646, "learning_rate": 1.9701188975373937e-05, "loss": 0.7072, "step": 8723 }, { "epoch": 1.424145953226399, "grad_norm": 1.8825876712799072, "learning_rate": 1.9701111976077722e-05, "loss": 0.7553, "step": 8724 }, { "epoch": 1.4243092118689034, "grad_norm": 1.498405933380127, "learning_rate": 1.9701034967012487e-05, "loss": 0.5215, "step": 8725 }, { "epoch": 1.4244724705114078, "grad_norm": 2.080427646636963, "learning_rate": 1.9700957948178313e-05, "loss": 0.7098, "step": 8726 }, { "epoch": 1.424635729153912, "grad_norm": 1.6905525922775269, "learning_rate": 1.970088091957528e-05, "loss": 0.6493, "step": 8727 }, { "epoch": 1.4247989877964164, "grad_norm": 1.7911847829818726, "learning_rate": 1.9700803881203457e-05, "loss": 0.7161, "step": 8728 }, { "epoch": 1.4249622464389209, "grad_norm": 1.8361754417419434, "learning_rate": 1.970072683306293e-05, "loss": 0.622, "step": 8729 }, { "epoch": 1.4251255050814253, "grad_norm": 1.5975465774536133, "learning_rate": 1.9700649775153775e-05, "loss": 0.6475, "step": 8730 }, { "epoch": 1.4252887637239295, "grad_norm": 1.692894697189331, "learning_rate": 1.9700572707476066e-05, "loss": 0.6282, "step": 8731 }, { "epoch": 1.425452022366434, "grad_norm": 1.785949468612671, "learning_rate": 1.9700495630029884e-05, "loss": 0.7739, "step": 8732 }, { "epoch": 1.4256152810089384, "grad_norm": 1.609365701675415, "learning_rate": 1.9700418542815306e-05, "loss": 0.6576, "step": 8733 }, { "epoch": 1.4257785396514429, "grad_norm": 2.2793776988983154, "learning_rate": 1.9700341445832408e-05, "loss": 0.8449, "step": 8734 }, { "epoch": 1.4259417982939473, "grad_norm": 1.681693196296692, "learning_rate": 1.9700264339081268e-05, "loss": 0.6439, "step": 8735 }, { "epoch": 1.4261050569364515, "grad_norm": 1.8200008869171143, "learning_rate": 1.9700187222561965e-05, "loss": 0.5901, "step": 8736 }, { "epoch": 1.426268315578956, "grad_norm": 2.0122389793395996, "learning_rate": 1.9700110096274577e-05, "loss": 0.7284, "step": 8737 }, { "epoch": 1.4264315742214604, "grad_norm": 1.8310739994049072, "learning_rate": 1.970003296021918e-05, "loss": 0.6716, "step": 8738 }, { "epoch": 1.4265948328639646, "grad_norm": 1.7809780836105347, "learning_rate": 1.969995581439585e-05, "loss": 0.6034, "step": 8739 }, { "epoch": 1.426758091506469, "grad_norm": 1.7563358545303345, "learning_rate": 1.9699878658804673e-05, "loss": 0.6303, "step": 8740 }, { "epoch": 1.4269213501489735, "grad_norm": 2.5034730434417725, "learning_rate": 1.9699801493445715e-05, "loss": 0.7325, "step": 8741 }, { "epoch": 1.427084608791478, "grad_norm": 1.8000407218933105, "learning_rate": 1.9699724318319064e-05, "loss": 0.7488, "step": 8742 }, { "epoch": 1.4272478674339824, "grad_norm": 1.9862347841262817, "learning_rate": 1.969964713342479e-05, "loss": 0.812, "step": 8743 }, { "epoch": 1.4274111260764868, "grad_norm": 2.2829973697662354, "learning_rate": 1.9699569938762975e-05, "loss": 0.7595, "step": 8744 }, { "epoch": 1.427574384718991, "grad_norm": 1.8009029626846313, "learning_rate": 1.9699492734333697e-05, "loss": 0.6809, "step": 8745 }, { "epoch": 1.4277376433614954, "grad_norm": 1.894785761833191, "learning_rate": 1.9699415520137028e-05, "loss": 0.6845, "step": 8746 }, { "epoch": 1.4279009020039999, "grad_norm": 1.7852375507354736, "learning_rate": 1.9699338296173053e-05, "loss": 0.6888, "step": 8747 }, { "epoch": 1.428064160646504, "grad_norm": 1.8087503910064697, "learning_rate": 1.969926106244185e-05, "loss": 0.6605, "step": 8748 }, { "epoch": 1.4282274192890085, "grad_norm": 1.8518214225769043, "learning_rate": 1.969918381894349e-05, "loss": 0.7444, "step": 8749 }, { "epoch": 1.428390677931513, "grad_norm": 1.5850977897644043, "learning_rate": 1.969910656567805e-05, "loss": 0.6481, "step": 8750 }, { "epoch": 1.4285539365740174, "grad_norm": 2.013476610183716, "learning_rate": 1.9699029302645622e-05, "loss": 0.7409, "step": 8751 }, { "epoch": 1.4287171952165219, "grad_norm": 1.9487580060958862, "learning_rate": 1.9698952029846267e-05, "loss": 0.7502, "step": 8752 }, { "epoch": 1.4288804538590263, "grad_norm": 1.5989595651626587, "learning_rate": 1.9698874747280074e-05, "loss": 0.5553, "step": 8753 }, { "epoch": 1.4290437125015305, "grad_norm": 1.3954507112503052, "learning_rate": 1.9698797454947115e-05, "loss": 0.542, "step": 8754 }, { "epoch": 1.429206971144035, "grad_norm": 1.8733289241790771, "learning_rate": 1.969872015284747e-05, "loss": 0.7858, "step": 8755 }, { "epoch": 1.4293702297865394, "grad_norm": 1.565057635307312, "learning_rate": 1.9698642840981215e-05, "loss": 0.5471, "step": 8756 }, { "epoch": 1.4295334884290436, "grad_norm": 1.8549431562423706, "learning_rate": 1.969856551934843e-05, "loss": 0.6572, "step": 8757 }, { "epoch": 1.429696747071548, "grad_norm": 2.008464813232422, "learning_rate": 1.9698488187949193e-05, "loss": 0.6773, "step": 8758 }, { "epoch": 1.4298600057140525, "grad_norm": 1.900803804397583, "learning_rate": 1.969841084678358e-05, "loss": 0.6809, "step": 8759 }, { "epoch": 1.430023264356557, "grad_norm": 1.7062910795211792, "learning_rate": 1.9698333495851672e-05, "loss": 0.6733, "step": 8760 }, { "epoch": 1.4301865229990613, "grad_norm": 1.7559092044830322, "learning_rate": 1.969825613515354e-05, "loss": 0.7575, "step": 8761 }, { "epoch": 1.4303497816415658, "grad_norm": 2.0171620845794678, "learning_rate": 1.969817876468927e-05, "loss": 0.7365, "step": 8762 }, { "epoch": 1.43051304028407, "grad_norm": 1.7397462129592896, "learning_rate": 1.9698101384458937e-05, "loss": 0.6802, "step": 8763 }, { "epoch": 1.4306762989265744, "grad_norm": 1.8439674377441406, "learning_rate": 1.9698023994462616e-05, "loss": 0.8354, "step": 8764 }, { "epoch": 1.4308395575690789, "grad_norm": 1.7651145458221436, "learning_rate": 1.969794659470039e-05, "loss": 0.6704, "step": 8765 }, { "epoch": 1.431002816211583, "grad_norm": 1.5611993074417114, "learning_rate": 1.969786918517233e-05, "loss": 0.5461, "step": 8766 }, { "epoch": 1.4311660748540875, "grad_norm": 1.6152845621109009, "learning_rate": 1.9697791765878526e-05, "loss": 0.663, "step": 8767 }, { "epoch": 1.431329333496592, "grad_norm": 1.5751186609268188, "learning_rate": 1.9697714336819044e-05, "loss": 0.7085, "step": 8768 }, { "epoch": 1.4314925921390964, "grad_norm": 1.459073781967163, "learning_rate": 1.9697636897993964e-05, "loss": 0.5974, "step": 8769 }, { "epoch": 1.4316558507816008, "grad_norm": 1.4595152139663696, "learning_rate": 1.969755944940337e-05, "loss": 0.6075, "step": 8770 }, { "epoch": 1.431819109424105, "grad_norm": 1.6487301588058472, "learning_rate": 1.9697481991047332e-05, "loss": 0.6795, "step": 8771 }, { "epoch": 1.4319823680666095, "grad_norm": 1.5374324321746826, "learning_rate": 1.9697404522925937e-05, "loss": 0.6009, "step": 8772 }, { "epoch": 1.432145626709114, "grad_norm": 1.6228915452957153, "learning_rate": 1.9697327045039255e-05, "loss": 0.678, "step": 8773 }, { "epoch": 1.4323088853516184, "grad_norm": 2.013782262802124, "learning_rate": 1.9697249557387367e-05, "loss": 0.6317, "step": 8774 }, { "epoch": 1.4324721439941226, "grad_norm": 2.1244795322418213, "learning_rate": 1.9697172059970355e-05, "loss": 0.8958, "step": 8775 }, { "epoch": 1.432635402636627, "grad_norm": 2.0930838584899902, "learning_rate": 1.969709455278829e-05, "loss": 0.7222, "step": 8776 }, { "epoch": 1.4327986612791315, "grad_norm": 1.6331270933151245, "learning_rate": 1.969701703584125e-05, "loss": 0.5573, "step": 8777 }, { "epoch": 1.432961919921636, "grad_norm": 1.7399390935897827, "learning_rate": 1.9696939509129324e-05, "loss": 0.709, "step": 8778 }, { "epoch": 1.4331251785641403, "grad_norm": 1.8957643508911133, "learning_rate": 1.969686197265258e-05, "loss": 0.7133, "step": 8779 }, { "epoch": 1.4332884372066446, "grad_norm": 2.001325845718384, "learning_rate": 1.9696784426411097e-05, "loss": 0.6728, "step": 8780 }, { "epoch": 1.433451695849149, "grad_norm": 2.9992613792419434, "learning_rate": 1.9696706870404955e-05, "loss": 0.8508, "step": 8781 }, { "epoch": 1.4336149544916534, "grad_norm": 1.7546895742416382, "learning_rate": 1.969662930463423e-05, "loss": 0.632, "step": 8782 }, { "epoch": 1.4337782131341577, "grad_norm": 1.5804767608642578, "learning_rate": 1.9696551729099005e-05, "loss": 0.6293, "step": 8783 }, { "epoch": 1.433941471776662, "grad_norm": 1.536778211593628, "learning_rate": 1.9696474143799353e-05, "loss": 0.5573, "step": 8784 }, { "epoch": 1.4341047304191665, "grad_norm": 1.7767776250839233, "learning_rate": 1.9696396548735356e-05, "loss": 0.7113, "step": 8785 }, { "epoch": 1.434267989061671, "grad_norm": 1.680504560470581, "learning_rate": 1.9696318943907088e-05, "loss": 0.6455, "step": 8786 }, { "epoch": 1.4344312477041754, "grad_norm": 1.624477744102478, "learning_rate": 1.969624132931463e-05, "loss": 0.6571, "step": 8787 }, { "epoch": 1.4345945063466798, "grad_norm": 1.6071616411209106, "learning_rate": 1.969616370495806e-05, "loss": 0.6614, "step": 8788 }, { "epoch": 1.434757764989184, "grad_norm": 1.9092326164245605, "learning_rate": 1.9696086070837458e-05, "loss": 0.6732, "step": 8789 }, { "epoch": 1.4349210236316885, "grad_norm": 1.5513759851455688, "learning_rate": 1.9696008426952897e-05, "loss": 0.6378, "step": 8790 }, { "epoch": 1.435084282274193, "grad_norm": 1.7605129480361938, "learning_rate": 1.969593077330446e-05, "loss": 0.7416, "step": 8791 }, { "epoch": 1.4352475409166972, "grad_norm": 1.7378876209259033, "learning_rate": 1.969585310989222e-05, "loss": 0.6029, "step": 8792 }, { "epoch": 1.4354107995592016, "grad_norm": 1.768326759338379, "learning_rate": 1.9695775436716263e-05, "loss": 0.7506, "step": 8793 }, { "epoch": 1.435574058201706, "grad_norm": 1.810838222503662, "learning_rate": 1.969569775377666e-05, "loss": 0.6507, "step": 8794 }, { "epoch": 1.4357373168442105, "grad_norm": 2.145047664642334, "learning_rate": 1.969562006107349e-05, "loss": 0.8034, "step": 8795 }, { "epoch": 1.435900575486715, "grad_norm": 1.648671269416809, "learning_rate": 1.9695542358606838e-05, "loss": 0.61, "step": 8796 }, { "epoch": 1.4360638341292193, "grad_norm": 1.6534838676452637, "learning_rate": 1.969546464637677e-05, "loss": 0.7447, "step": 8797 }, { "epoch": 1.4362270927717236, "grad_norm": 1.6590546369552612, "learning_rate": 1.969538692438338e-05, "loss": 0.6742, "step": 8798 }, { "epoch": 1.436390351414228, "grad_norm": 1.8258951902389526, "learning_rate": 1.9695309192626736e-05, "loss": 0.6259, "step": 8799 }, { "epoch": 1.4365536100567324, "grad_norm": 1.8605329990386963, "learning_rate": 1.9695231451106914e-05, "loss": 0.7122, "step": 8800 }, { "epoch": 1.4367168686992366, "grad_norm": 2.0588760375976562, "learning_rate": 1.9695153699824e-05, "loss": 0.7439, "step": 8801 }, { "epoch": 1.436880127341741, "grad_norm": 1.859056830406189, "learning_rate": 1.9695075938778066e-05, "loss": 0.7721, "step": 8802 }, { "epoch": 1.4370433859842455, "grad_norm": 1.6266734600067139, "learning_rate": 1.9694998167969196e-05, "loss": 0.6049, "step": 8803 }, { "epoch": 1.43720664462675, "grad_norm": 2.1042253971099854, "learning_rate": 1.9694920387397466e-05, "loss": 0.708, "step": 8804 }, { "epoch": 1.4373699032692544, "grad_norm": 1.7716577053070068, "learning_rate": 1.969484259706295e-05, "loss": 0.7349, "step": 8805 }, { "epoch": 1.4375331619117588, "grad_norm": 1.8162206411361694, "learning_rate": 1.9694764796965736e-05, "loss": 0.7807, "step": 8806 }, { "epoch": 1.437696420554263, "grad_norm": 1.6888149976730347, "learning_rate": 1.9694686987105893e-05, "loss": 0.7046, "step": 8807 }, { "epoch": 1.4378596791967675, "grad_norm": 1.7741550207138062, "learning_rate": 1.96946091674835e-05, "loss": 0.6955, "step": 8808 }, { "epoch": 1.438022937839272, "grad_norm": 1.968450665473938, "learning_rate": 1.969453133809864e-05, "loss": 0.847, "step": 8809 }, { "epoch": 1.4381861964817761, "grad_norm": 1.718488097190857, "learning_rate": 1.9694453498951392e-05, "loss": 0.7291, "step": 8810 }, { "epoch": 1.4383494551242806, "grad_norm": 1.6668022871017456, "learning_rate": 1.969437565004183e-05, "loss": 0.7082, "step": 8811 }, { "epoch": 1.438512713766785, "grad_norm": 1.5092474222183228, "learning_rate": 1.9694297791370035e-05, "loss": 0.5709, "step": 8812 }, { "epoch": 1.4386759724092895, "grad_norm": 1.7654494047164917, "learning_rate": 1.9694219922936082e-05, "loss": 0.6133, "step": 8813 }, { "epoch": 1.438839231051794, "grad_norm": 2.369175434112549, "learning_rate": 1.9694142044740053e-05, "loss": 0.77, "step": 8814 }, { "epoch": 1.4390024896942981, "grad_norm": 1.8999091386795044, "learning_rate": 1.9694064156782027e-05, "loss": 0.7538, "step": 8815 }, { "epoch": 1.4391657483368026, "grad_norm": 1.6721547842025757, "learning_rate": 1.9693986259062082e-05, "loss": 0.6238, "step": 8816 }, { "epoch": 1.439329006979307, "grad_norm": 1.805945634841919, "learning_rate": 1.9693908351580293e-05, "loss": 0.5796, "step": 8817 }, { "epoch": 1.4394922656218114, "grad_norm": 1.6975070238113403, "learning_rate": 1.9693830434336743e-05, "loss": 0.7425, "step": 8818 }, { "epoch": 1.4396555242643156, "grad_norm": 1.7397018671035767, "learning_rate": 1.969375250733151e-05, "loss": 0.6267, "step": 8819 }, { "epoch": 1.43981878290682, "grad_norm": 2.073922634124756, "learning_rate": 1.9693674570564663e-05, "loss": 0.6892, "step": 8820 }, { "epoch": 1.4399820415493245, "grad_norm": 1.7947229146957397, "learning_rate": 1.9693596624036294e-05, "loss": 0.669, "step": 8821 }, { "epoch": 1.440145300191829, "grad_norm": 1.7729836702346802, "learning_rate": 1.9693518667746474e-05, "loss": 0.7091, "step": 8822 }, { "epoch": 1.4403085588343334, "grad_norm": 1.8461151123046875, "learning_rate": 1.9693440701695287e-05, "loss": 0.7911, "step": 8823 }, { "epoch": 1.4404718174768376, "grad_norm": 1.8674719333648682, "learning_rate": 1.9693362725882804e-05, "loss": 0.6728, "step": 8824 }, { "epoch": 1.440635076119342, "grad_norm": 1.5845552682876587, "learning_rate": 1.969328474030911e-05, "loss": 0.662, "step": 8825 }, { "epoch": 1.4407983347618465, "grad_norm": 1.7984018325805664, "learning_rate": 1.9693206744974276e-05, "loss": 0.6898, "step": 8826 }, { "epoch": 1.4409615934043507, "grad_norm": 1.4857940673828125, "learning_rate": 1.969312873987839e-05, "loss": 0.5708, "step": 8827 }, { "epoch": 1.4411248520468551, "grad_norm": 1.6860756874084473, "learning_rate": 1.9693050725021523e-05, "loss": 0.634, "step": 8828 }, { "epoch": 1.4412881106893596, "grad_norm": 1.8787457942962646, "learning_rate": 1.969297270040376e-05, "loss": 0.6958, "step": 8829 }, { "epoch": 1.441451369331864, "grad_norm": 1.9820045232772827, "learning_rate": 1.9692894666025174e-05, "loss": 0.6795, "step": 8830 }, { "epoch": 1.4416146279743685, "grad_norm": 1.8316231966018677, "learning_rate": 1.9692816621885846e-05, "loss": 0.5626, "step": 8831 }, { "epoch": 1.441777886616873, "grad_norm": 1.7729562520980835, "learning_rate": 1.9692738567985853e-05, "loss": 0.7078, "step": 8832 }, { "epoch": 1.441941145259377, "grad_norm": 1.785528302192688, "learning_rate": 1.9692660504325276e-05, "loss": 0.7462, "step": 8833 }, { "epoch": 1.4421044039018815, "grad_norm": 1.8229944705963135, "learning_rate": 1.9692582430904193e-05, "loss": 0.6522, "step": 8834 }, { "epoch": 1.442267662544386, "grad_norm": 1.9123971462249756, "learning_rate": 1.969250434772268e-05, "loss": 0.7042, "step": 8835 }, { "epoch": 1.4424309211868902, "grad_norm": 1.9770201444625854, "learning_rate": 1.9692426254780818e-05, "loss": 0.6078, "step": 8836 }, { "epoch": 1.4425941798293946, "grad_norm": 2.1088626384735107, "learning_rate": 1.969234815207869e-05, "loss": 0.7078, "step": 8837 }, { "epoch": 1.442757438471899, "grad_norm": 1.9816428422927856, "learning_rate": 1.9692270039616367e-05, "loss": 0.69, "step": 8838 }, { "epoch": 1.4429206971144035, "grad_norm": 1.7847563028335571, "learning_rate": 1.9692191917393927e-05, "loss": 0.6528, "step": 8839 }, { "epoch": 1.443083955756908, "grad_norm": 1.604605793952942, "learning_rate": 1.9692113785411456e-05, "loss": 0.6549, "step": 8840 }, { "epoch": 1.4432472143994124, "grad_norm": 2.4138731956481934, "learning_rate": 1.969203564366903e-05, "loss": 0.7697, "step": 8841 }, { "epoch": 1.4434104730419166, "grad_norm": 1.5967594385147095, "learning_rate": 1.9691957492166725e-05, "loss": 0.5896, "step": 8842 }, { "epoch": 1.443573731684421, "grad_norm": 1.6789475679397583, "learning_rate": 1.9691879330904618e-05, "loss": 0.6122, "step": 8843 }, { "epoch": 1.4437369903269255, "grad_norm": 1.994183897972107, "learning_rate": 1.9691801159882798e-05, "loss": 0.6192, "step": 8844 }, { "epoch": 1.4439002489694297, "grad_norm": 1.6905208826065063, "learning_rate": 1.9691722979101332e-05, "loss": 0.5792, "step": 8845 }, { "epoch": 1.4440635076119341, "grad_norm": 1.8205161094665527, "learning_rate": 1.96916447885603e-05, "loss": 0.7432, "step": 8846 }, { "epoch": 1.4442267662544386, "grad_norm": 2.0391979217529297, "learning_rate": 1.969156658825979e-05, "loss": 0.829, "step": 8847 }, { "epoch": 1.444390024896943, "grad_norm": 1.4860278367996216, "learning_rate": 1.9691488378199875e-05, "loss": 0.5302, "step": 8848 }, { "epoch": 1.4445532835394475, "grad_norm": 2.521899461746216, "learning_rate": 1.9691410158380636e-05, "loss": 0.7345, "step": 8849 }, { "epoch": 1.444716542181952, "grad_norm": 1.5531392097473145, "learning_rate": 1.9691331928802144e-05, "loss": 0.5653, "step": 8850 }, { "epoch": 1.444879800824456, "grad_norm": 1.8882185220718384, "learning_rate": 1.9691253689464487e-05, "loss": 0.6665, "step": 8851 }, { "epoch": 1.4450430594669605, "grad_norm": 1.9289053678512573, "learning_rate": 1.9691175440367737e-05, "loss": 0.8581, "step": 8852 }, { "epoch": 1.445206318109465, "grad_norm": 1.838510513305664, "learning_rate": 1.9691097181511978e-05, "loss": 0.6403, "step": 8853 }, { "epoch": 1.4453695767519692, "grad_norm": 1.6136966943740845, "learning_rate": 1.9691018912897285e-05, "loss": 0.5826, "step": 8854 }, { "epoch": 1.4455328353944736, "grad_norm": 1.6708736419677734, "learning_rate": 1.9690940634523742e-05, "loss": 0.618, "step": 8855 }, { "epoch": 1.445696094036978, "grad_norm": 1.8291469812393188, "learning_rate": 1.9690862346391424e-05, "loss": 0.5951, "step": 8856 }, { "epoch": 1.4458593526794825, "grad_norm": 1.8768844604492188, "learning_rate": 1.9690784048500406e-05, "loss": 0.6985, "step": 8857 }, { "epoch": 1.446022611321987, "grad_norm": 1.7540936470031738, "learning_rate": 1.9690705740850772e-05, "loss": 0.7089, "step": 8858 }, { "epoch": 1.4461858699644912, "grad_norm": 1.8675518035888672, "learning_rate": 1.9690627423442607e-05, "loss": 0.7893, "step": 8859 }, { "epoch": 1.4463491286069956, "grad_norm": 1.8266639709472656, "learning_rate": 1.9690549096275972e-05, "loss": 0.6541, "step": 8860 }, { "epoch": 1.4465123872495, "grad_norm": 1.8189997673034668, "learning_rate": 1.9690470759350965e-05, "loss": 0.8061, "step": 8861 }, { "epoch": 1.4466756458920045, "grad_norm": 1.9061384201049805, "learning_rate": 1.969039241266765e-05, "loss": 0.658, "step": 8862 }, { "epoch": 1.4468389045345087, "grad_norm": 1.792631983757019, "learning_rate": 1.969031405622612e-05, "loss": 0.664, "step": 8863 }, { "epoch": 1.4470021631770131, "grad_norm": 1.602497935295105, "learning_rate": 1.9690235690026438e-05, "loss": 0.6939, "step": 8864 }, { "epoch": 1.4471654218195176, "grad_norm": 1.7190130949020386, "learning_rate": 1.9690157314068696e-05, "loss": 0.627, "step": 8865 }, { "epoch": 1.447328680462022, "grad_norm": 1.664627194404602, "learning_rate": 1.969007892835297e-05, "loss": 0.6644, "step": 8866 }, { "epoch": 1.4474919391045264, "grad_norm": 1.7824649810791016, "learning_rate": 1.9690000532879333e-05, "loss": 0.7233, "step": 8867 }, { "epoch": 1.4476551977470307, "grad_norm": 1.507237434387207, "learning_rate": 1.9689922127647868e-05, "loss": 0.5911, "step": 8868 }, { "epoch": 1.447818456389535, "grad_norm": 1.8902795314788818, "learning_rate": 1.9689843712658655e-05, "loss": 0.702, "step": 8869 }, { "epoch": 1.4479817150320395, "grad_norm": 3.1822617053985596, "learning_rate": 1.9689765287911774e-05, "loss": 0.7296, "step": 8870 }, { "epoch": 1.448144973674544, "grad_norm": 2.177860736846924, "learning_rate": 1.9689686853407298e-05, "loss": 0.8308, "step": 8871 }, { "epoch": 1.4483082323170482, "grad_norm": 1.6397078037261963, "learning_rate": 1.9689608409145317e-05, "loss": 0.6511, "step": 8872 }, { "epoch": 1.4484714909595526, "grad_norm": 1.585407018661499, "learning_rate": 1.9689529955125896e-05, "loss": 0.5585, "step": 8873 }, { "epoch": 1.448634749602057, "grad_norm": 1.9478856325149536, "learning_rate": 1.9689451491349123e-05, "loss": 0.5887, "step": 8874 }, { "epoch": 1.4487980082445615, "grad_norm": 1.236572504043579, "learning_rate": 1.9689373017815076e-05, "loss": 0.4629, "step": 8875 }, { "epoch": 1.448961266887066, "grad_norm": 1.6886515617370605, "learning_rate": 1.968929453452383e-05, "loss": 0.6839, "step": 8876 }, { "epoch": 1.4491245255295702, "grad_norm": 1.6924022436141968, "learning_rate": 1.968921604147547e-05, "loss": 0.6596, "step": 8877 }, { "epoch": 1.4492877841720746, "grad_norm": 1.924888253211975, "learning_rate": 1.968913753867007e-05, "loss": 0.8504, "step": 8878 }, { "epoch": 1.449451042814579, "grad_norm": 1.2581098079681396, "learning_rate": 1.9689059026107712e-05, "loss": 0.5499, "step": 8879 }, { "epoch": 1.4496143014570833, "grad_norm": 1.4864521026611328, "learning_rate": 1.9688980503788474e-05, "loss": 0.6178, "step": 8880 }, { "epoch": 1.4497775600995877, "grad_norm": 1.8477166891098022, "learning_rate": 1.9688901971712436e-05, "loss": 0.749, "step": 8881 }, { "epoch": 1.4499408187420921, "grad_norm": 1.677072286605835, "learning_rate": 1.9688823429879676e-05, "loss": 0.647, "step": 8882 }, { "epoch": 1.4501040773845966, "grad_norm": 1.9211666584014893, "learning_rate": 1.9688744878290273e-05, "loss": 0.7423, "step": 8883 }, { "epoch": 1.450267336027101, "grad_norm": 2.0923755168914795, "learning_rate": 1.9688666316944306e-05, "loss": 0.7062, "step": 8884 }, { "epoch": 1.4504305946696054, "grad_norm": 1.7697687149047852, "learning_rate": 1.9688587745841856e-05, "loss": 0.7711, "step": 8885 }, { "epoch": 1.4505938533121097, "grad_norm": 1.7329542636871338, "learning_rate": 1.9688509164982998e-05, "loss": 0.7016, "step": 8886 }, { "epoch": 1.450757111954614, "grad_norm": 1.7890706062316895, "learning_rate": 1.968843057436782e-05, "loss": 0.6942, "step": 8887 }, { "epoch": 1.4509203705971185, "grad_norm": 1.8924055099487305, "learning_rate": 1.9688351973996388e-05, "loss": 0.7323, "step": 8888 }, { "epoch": 1.4510836292396228, "grad_norm": 2.0330381393432617, "learning_rate": 1.968827336386879e-05, "loss": 0.7495, "step": 8889 }, { "epoch": 1.4512468878821272, "grad_norm": 1.9958441257476807, "learning_rate": 1.9688194743985103e-05, "loss": 0.8014, "step": 8890 }, { "epoch": 1.4514101465246316, "grad_norm": 1.8537036180496216, "learning_rate": 1.968811611434541e-05, "loss": 0.5945, "step": 8891 }, { "epoch": 1.451573405167136, "grad_norm": 1.607219934463501, "learning_rate": 1.9688037474949784e-05, "loss": 0.6038, "step": 8892 }, { "epoch": 1.4517366638096405, "grad_norm": 1.8624411821365356, "learning_rate": 1.9687958825798306e-05, "loss": 0.747, "step": 8893 }, { "epoch": 1.451899922452145, "grad_norm": 1.9149376153945923, "learning_rate": 1.9687880166891058e-05, "loss": 0.9008, "step": 8894 }, { "epoch": 1.4520631810946492, "grad_norm": 1.603529453277588, "learning_rate": 1.9687801498228114e-05, "loss": 0.6193, "step": 8895 }, { "epoch": 1.4522264397371536, "grad_norm": 1.8671441078186035, "learning_rate": 1.968772281980956e-05, "loss": 0.597, "step": 8896 }, { "epoch": 1.452389698379658, "grad_norm": 1.612332820892334, "learning_rate": 1.9687644131635467e-05, "loss": 0.6899, "step": 8897 }, { "epoch": 1.4525529570221622, "grad_norm": 1.4945591688156128, "learning_rate": 1.9687565433705926e-05, "loss": 0.5442, "step": 8898 }, { "epoch": 1.4527162156646667, "grad_norm": 1.6702260971069336, "learning_rate": 1.9687486726021005e-05, "loss": 0.6935, "step": 8899 }, { "epoch": 1.4528794743071711, "grad_norm": 1.7897847890853882, "learning_rate": 1.9687408008580785e-05, "loss": 0.6721, "step": 8900 }, { "epoch": 1.4530427329496756, "grad_norm": 1.9352015256881714, "learning_rate": 1.968732928138535e-05, "loss": 0.696, "step": 8901 }, { "epoch": 1.45320599159218, "grad_norm": 1.6360608339309692, "learning_rate": 1.968725054443478e-05, "loss": 0.6205, "step": 8902 }, { "epoch": 1.4533692502346842, "grad_norm": 1.6993275880813599, "learning_rate": 1.968717179772915e-05, "loss": 0.6969, "step": 8903 }, { "epoch": 1.4535325088771887, "grad_norm": 1.7121833562850952, "learning_rate": 1.968709304126854e-05, "loss": 0.7229, "step": 8904 }, { "epoch": 1.453695767519693, "grad_norm": 1.625565528869629, "learning_rate": 1.9687014275053026e-05, "loss": 0.7491, "step": 8905 }, { "epoch": 1.4538590261621975, "grad_norm": 1.7381452322006226, "learning_rate": 1.9686935499082697e-05, "loss": 0.7959, "step": 8906 }, { "epoch": 1.4540222848047017, "grad_norm": 1.6734864711761475, "learning_rate": 1.9686856713357625e-05, "loss": 0.6764, "step": 8907 }, { "epoch": 1.4541855434472062, "grad_norm": 1.7063758373260498, "learning_rate": 1.9686777917877888e-05, "loss": 0.6148, "step": 8908 }, { "epoch": 1.4543488020897106, "grad_norm": 1.736620306968689, "learning_rate": 1.9686699112643574e-05, "loss": 0.7172, "step": 8909 }, { "epoch": 1.454512060732215, "grad_norm": 1.738698959350586, "learning_rate": 1.9686620297654747e-05, "loss": 0.6342, "step": 8910 }, { "epoch": 1.4546753193747195, "grad_norm": 1.5827399492263794, "learning_rate": 1.9686541472911506e-05, "loss": 0.6034, "step": 8911 }, { "epoch": 1.4548385780172237, "grad_norm": 1.6460212469100952, "learning_rate": 1.9686462638413914e-05, "loss": 0.7573, "step": 8912 }, { "epoch": 1.4550018366597282, "grad_norm": 1.6471400260925293, "learning_rate": 1.9686383794162057e-05, "loss": 0.6466, "step": 8913 }, { "epoch": 1.4551650953022326, "grad_norm": 1.5103318691253662, "learning_rate": 1.968630494015602e-05, "loss": 0.5164, "step": 8914 }, { "epoch": 1.455328353944737, "grad_norm": 1.4050352573394775, "learning_rate": 1.9686226076395873e-05, "loss": 0.6073, "step": 8915 }, { "epoch": 1.4554916125872412, "grad_norm": 1.7616324424743652, "learning_rate": 1.9686147202881694e-05, "loss": 0.6873, "step": 8916 }, { "epoch": 1.4556548712297457, "grad_norm": 1.7206569910049438, "learning_rate": 1.968606831961357e-05, "loss": 0.6892, "step": 8917 }, { "epoch": 1.4558181298722501, "grad_norm": 1.9095330238342285, "learning_rate": 1.968598942659158e-05, "loss": 0.8287, "step": 8918 }, { "epoch": 1.4559813885147546, "grad_norm": 2.0181291103363037, "learning_rate": 1.96859105238158e-05, "loss": 0.9043, "step": 8919 }, { "epoch": 1.456144647157259, "grad_norm": 1.5471086502075195, "learning_rate": 1.9685831611286312e-05, "loss": 0.5298, "step": 8920 }, { "epoch": 1.4563079057997632, "grad_norm": 2.033290386199951, "learning_rate": 1.9685752689003195e-05, "loss": 0.8341, "step": 8921 }, { "epoch": 1.4564711644422677, "grad_norm": 1.501089334487915, "learning_rate": 1.9685673756966524e-05, "loss": 0.6193, "step": 8922 }, { "epoch": 1.456634423084772, "grad_norm": 1.7674059867858887, "learning_rate": 1.9685594815176384e-05, "loss": 0.6865, "step": 8923 }, { "epoch": 1.4567976817272763, "grad_norm": 1.8313947916030884, "learning_rate": 1.9685515863632852e-05, "loss": 0.6695, "step": 8924 }, { "epoch": 1.4569609403697807, "grad_norm": 1.558915376663208, "learning_rate": 1.968543690233601e-05, "loss": 0.6673, "step": 8925 }, { "epoch": 1.4571241990122852, "grad_norm": 1.7462880611419678, "learning_rate": 1.9685357931285934e-05, "loss": 0.6312, "step": 8926 }, { "epoch": 1.4572874576547896, "grad_norm": 1.8824939727783203, "learning_rate": 1.9685278950482707e-05, "loss": 0.7016, "step": 8927 }, { "epoch": 1.457450716297294, "grad_norm": 2.0164718627929688, "learning_rate": 1.9685199959926403e-05, "loss": 0.846, "step": 8928 }, { "epoch": 1.4576139749397985, "grad_norm": 1.7564702033996582, "learning_rate": 1.9685120959617108e-05, "loss": 0.7853, "step": 8929 }, { "epoch": 1.4577772335823027, "grad_norm": 1.609257698059082, "learning_rate": 1.9685041949554896e-05, "loss": 0.6617, "step": 8930 }, { "epoch": 1.4579404922248071, "grad_norm": 2.1343986988067627, "learning_rate": 1.968496292973985e-05, "loss": 0.7274, "step": 8931 }, { "epoch": 1.4581037508673116, "grad_norm": 1.4686297178268433, "learning_rate": 1.9684883900172053e-05, "loss": 0.5256, "step": 8932 }, { "epoch": 1.4582670095098158, "grad_norm": 1.8336305618286133, "learning_rate": 1.9684804860851578e-05, "loss": 0.7734, "step": 8933 }, { "epoch": 1.4584302681523202, "grad_norm": 1.5332152843475342, "learning_rate": 1.9684725811778507e-05, "loss": 0.6916, "step": 8934 }, { "epoch": 1.4585935267948247, "grad_norm": 1.8223613500595093, "learning_rate": 1.9684646752952917e-05, "loss": 0.627, "step": 8935 }, { "epoch": 1.4587567854373291, "grad_norm": 1.650575041770935, "learning_rate": 1.9684567684374897e-05, "loss": 0.6455, "step": 8936 }, { "epoch": 1.4589200440798336, "grad_norm": 1.8065630197525024, "learning_rate": 1.9684488606044513e-05, "loss": 0.708, "step": 8937 }, { "epoch": 1.459083302722338, "grad_norm": 1.6888772249221802, "learning_rate": 1.9684409517961852e-05, "loss": 0.6138, "step": 8938 }, { "epoch": 1.4592465613648422, "grad_norm": 1.49433434009552, "learning_rate": 1.9684330420127e-05, "loss": 0.5789, "step": 8939 }, { "epoch": 1.4594098200073466, "grad_norm": 1.7103060483932495, "learning_rate": 1.9684251312540023e-05, "loss": 0.5977, "step": 8940 }, { "epoch": 1.459573078649851, "grad_norm": 1.4001315832138062, "learning_rate": 1.968417219520101e-05, "loss": 0.5701, "step": 8941 }, { "epoch": 1.4597363372923553, "grad_norm": 1.755624771118164, "learning_rate": 1.968409306811004e-05, "loss": 0.7509, "step": 8942 }, { "epoch": 1.4598995959348597, "grad_norm": 1.7058089971542358, "learning_rate": 1.9684013931267184e-05, "loss": 0.6316, "step": 8943 }, { "epoch": 1.4600628545773642, "grad_norm": 1.8235746622085571, "learning_rate": 1.9683934784672535e-05, "loss": 0.7068, "step": 8944 }, { "epoch": 1.4602261132198686, "grad_norm": 2.3905768394470215, "learning_rate": 1.9683855628326164e-05, "loss": 0.7308, "step": 8945 }, { "epoch": 1.460389371862373, "grad_norm": 1.8120334148406982, "learning_rate": 1.9683776462228153e-05, "loss": 0.6979, "step": 8946 }, { "epoch": 1.4605526305048773, "grad_norm": 1.9066009521484375, "learning_rate": 1.968369728637858e-05, "loss": 0.6391, "step": 8947 }, { "epoch": 1.4607158891473817, "grad_norm": 2.0647151470184326, "learning_rate": 1.9683618100777532e-05, "loss": 0.6905, "step": 8948 }, { "epoch": 1.4608791477898861, "grad_norm": 1.4807260036468506, "learning_rate": 1.968353890542508e-05, "loss": 0.6011, "step": 8949 }, { "epoch": 1.4610424064323906, "grad_norm": 1.5642013549804688, "learning_rate": 1.9683459700321305e-05, "loss": 0.5416, "step": 8950 }, { "epoch": 1.4612056650748948, "grad_norm": 2.0606138706207275, "learning_rate": 1.9683380485466292e-05, "loss": 0.8551, "step": 8951 }, { "epoch": 1.4613689237173992, "grad_norm": 1.5649231672286987, "learning_rate": 1.9683301260860115e-05, "loss": 0.552, "step": 8952 }, { "epoch": 1.4615321823599037, "grad_norm": 1.970979928970337, "learning_rate": 1.9683222026502856e-05, "loss": 0.7977, "step": 8953 }, { "epoch": 1.4616954410024081, "grad_norm": 1.6149612665176392, "learning_rate": 1.96831427823946e-05, "loss": 0.5893, "step": 8954 }, { "epoch": 1.4618586996449126, "grad_norm": 2.6173040866851807, "learning_rate": 1.9683063528535417e-05, "loss": 0.7842, "step": 8955 }, { "epoch": 1.4620219582874168, "grad_norm": 1.8031708002090454, "learning_rate": 1.968298426492539e-05, "loss": 0.7307, "step": 8956 }, { "epoch": 1.4621852169299212, "grad_norm": 1.7610505819320679, "learning_rate": 1.9682904991564603e-05, "loss": 0.7092, "step": 8957 }, { "epoch": 1.4623484755724256, "grad_norm": 1.911726951599121, "learning_rate": 1.968282570845313e-05, "loss": 0.7592, "step": 8958 }, { "epoch": 1.46251173421493, "grad_norm": 1.5143975019454956, "learning_rate": 1.968274641559106e-05, "loss": 0.5559, "step": 8959 }, { "epoch": 1.4626749928574343, "grad_norm": 1.7001502513885498, "learning_rate": 1.9682667112978464e-05, "loss": 0.6005, "step": 8960 }, { "epoch": 1.4628382514999387, "grad_norm": 1.7902227640151978, "learning_rate": 1.9682587800615425e-05, "loss": 0.715, "step": 8961 }, { "epoch": 1.4630015101424432, "grad_norm": 1.785680890083313, "learning_rate": 1.968250847850202e-05, "loss": 0.684, "step": 8962 }, { "epoch": 1.4631647687849476, "grad_norm": 1.8269716501235962, "learning_rate": 1.9682429146638336e-05, "loss": 0.6348, "step": 8963 }, { "epoch": 1.463328027427452, "grad_norm": 1.5608235597610474, "learning_rate": 1.9682349805024447e-05, "loss": 0.5834, "step": 8964 }, { "epoch": 1.4634912860699563, "grad_norm": 2.2523598670959473, "learning_rate": 1.9682270453660432e-05, "loss": 0.787, "step": 8965 }, { "epoch": 1.4636545447124607, "grad_norm": 1.7545371055603027, "learning_rate": 1.9682191092546374e-05, "loss": 0.6705, "step": 8966 }, { "epoch": 1.4638178033549651, "grad_norm": 2.055321455001831, "learning_rate": 1.9682111721682353e-05, "loss": 0.6296, "step": 8967 }, { "epoch": 1.4639810619974694, "grad_norm": 1.9529483318328857, "learning_rate": 1.9682032341068448e-05, "loss": 0.6944, "step": 8968 }, { "epoch": 1.4641443206399738, "grad_norm": 1.7448019981384277, "learning_rate": 1.9681952950704743e-05, "loss": 0.6622, "step": 8969 }, { "epoch": 1.4643075792824782, "grad_norm": 1.8738882541656494, "learning_rate": 1.9681873550591306e-05, "loss": 0.7063, "step": 8970 }, { "epoch": 1.4644708379249827, "grad_norm": 1.9551479816436768, "learning_rate": 1.968179414072823e-05, "loss": 0.6939, "step": 8971 }, { "epoch": 1.464634096567487, "grad_norm": 1.868450403213501, "learning_rate": 1.9681714721115587e-05, "loss": 0.7406, "step": 8972 }, { "epoch": 1.4647973552099915, "grad_norm": 1.7002311944961548, "learning_rate": 1.9681635291753462e-05, "loss": 0.7327, "step": 8973 }, { "epoch": 1.4649606138524958, "grad_norm": 1.853320598602295, "learning_rate": 1.9681555852641933e-05, "loss": 0.6743, "step": 8974 }, { "epoch": 1.4651238724950002, "grad_norm": 1.9320247173309326, "learning_rate": 1.968147640378108e-05, "loss": 0.7754, "step": 8975 }, { "epoch": 1.4652871311375046, "grad_norm": 1.8944302797317505, "learning_rate": 1.968139694517098e-05, "loss": 0.7493, "step": 8976 }, { "epoch": 1.4654503897800089, "grad_norm": 1.8367654085159302, "learning_rate": 1.968131747681172e-05, "loss": 0.6369, "step": 8977 }, { "epoch": 1.4656136484225133, "grad_norm": 1.6192930936813354, "learning_rate": 1.9681237998703373e-05, "loss": 0.6345, "step": 8978 }, { "epoch": 1.4657769070650177, "grad_norm": 1.45469069480896, "learning_rate": 1.968115851084602e-05, "loss": 0.6031, "step": 8979 }, { "epoch": 1.4659401657075222, "grad_norm": 1.8003584146499634, "learning_rate": 1.9681079013239748e-05, "loss": 0.6345, "step": 8980 }, { "epoch": 1.4661034243500266, "grad_norm": 1.8090649843215942, "learning_rate": 1.968099950588463e-05, "loss": 0.6823, "step": 8981 }, { "epoch": 1.466266682992531, "grad_norm": 1.8707369565963745, "learning_rate": 1.968091998878075e-05, "loss": 0.6424, "step": 8982 }, { "epoch": 1.4664299416350353, "grad_norm": 1.453446388244629, "learning_rate": 1.968084046192818e-05, "loss": 0.5033, "step": 8983 }, { "epoch": 1.4665932002775397, "grad_norm": 1.6681972742080688, "learning_rate": 1.968076092532701e-05, "loss": 0.7276, "step": 8984 }, { "epoch": 1.4667564589200441, "grad_norm": 1.7357639074325562, "learning_rate": 1.9680681378977317e-05, "loss": 0.6636, "step": 8985 }, { "epoch": 1.4669197175625484, "grad_norm": 1.9360885620117188, "learning_rate": 1.968060182287918e-05, "loss": 0.8142, "step": 8986 }, { "epoch": 1.4670829762050528, "grad_norm": 1.8844066858291626, "learning_rate": 1.968052225703268e-05, "loss": 0.6701, "step": 8987 }, { "epoch": 1.4672462348475572, "grad_norm": 1.8245185613632202, "learning_rate": 1.9680442681437895e-05, "loss": 0.7221, "step": 8988 }, { "epoch": 1.4674094934900617, "grad_norm": 1.888819932937622, "learning_rate": 1.9680363096094906e-05, "loss": 0.7516, "step": 8989 }, { "epoch": 1.467572752132566, "grad_norm": 2.142040967941284, "learning_rate": 1.96802835010038e-05, "loss": 0.7106, "step": 8990 }, { "epoch": 1.4677360107750705, "grad_norm": 1.72548508644104, "learning_rate": 1.9680203896164646e-05, "loss": 0.6289, "step": 8991 }, { "epoch": 1.4678992694175748, "grad_norm": 1.670089602470398, "learning_rate": 1.968012428157753e-05, "loss": 0.6565, "step": 8992 }, { "epoch": 1.4680625280600792, "grad_norm": 1.9842661619186401, "learning_rate": 1.9680044657242532e-05, "loss": 0.6647, "step": 8993 }, { "epoch": 1.4682257867025836, "grad_norm": 1.5729340314865112, "learning_rate": 1.967996502315973e-05, "loss": 0.6357, "step": 8994 }, { "epoch": 1.4683890453450879, "grad_norm": 1.904778242111206, "learning_rate": 1.9679885379329208e-05, "loss": 0.815, "step": 8995 }, { "epoch": 1.4685523039875923, "grad_norm": 1.6344839334487915, "learning_rate": 1.967980572575104e-05, "loss": 0.6655, "step": 8996 }, { "epoch": 1.4687155626300967, "grad_norm": 1.3686703443527222, "learning_rate": 1.9679726062425314e-05, "loss": 0.5549, "step": 8997 }, { "epoch": 1.4688788212726012, "grad_norm": 2.151660442352295, "learning_rate": 1.9679646389352104e-05, "loss": 0.7694, "step": 8998 }, { "epoch": 1.4690420799151056, "grad_norm": 1.7980393171310425, "learning_rate": 1.9679566706531497e-05, "loss": 0.7108, "step": 8999 }, { "epoch": 1.4692053385576098, "grad_norm": 1.281118392944336, "learning_rate": 1.9679487013963566e-05, "loss": 0.5591, "step": 9000 }, { "epoch": 1.4693685972001143, "grad_norm": 1.8228079080581665, "learning_rate": 1.9679407311648394e-05, "loss": 0.8789, "step": 9001 }, { "epoch": 1.4695318558426187, "grad_norm": 1.6603171825408936, "learning_rate": 1.967932759958606e-05, "loss": 0.6465, "step": 9002 }, { "epoch": 1.4696951144851231, "grad_norm": 1.6893362998962402, "learning_rate": 1.9679247877776647e-05, "loss": 0.753, "step": 9003 }, { "epoch": 1.4698583731276273, "grad_norm": 1.5887150764465332, "learning_rate": 1.9679168146220237e-05, "loss": 0.6982, "step": 9004 }, { "epoch": 1.4700216317701318, "grad_norm": 2.0297529697418213, "learning_rate": 1.96790884049169e-05, "loss": 0.9495, "step": 9005 }, { "epoch": 1.4701848904126362, "grad_norm": 1.7772586345672607, "learning_rate": 1.967900865386673e-05, "loss": 0.5847, "step": 9006 }, { "epoch": 1.4703481490551407, "grad_norm": 1.844789981842041, "learning_rate": 1.9678928893069797e-05, "loss": 0.6561, "step": 9007 }, { "epoch": 1.470511407697645, "grad_norm": 1.8459062576293945, "learning_rate": 1.967884912252619e-05, "loss": 0.6643, "step": 9008 }, { "epoch": 1.4706746663401493, "grad_norm": 1.3451393842697144, "learning_rate": 1.967876934223598e-05, "loss": 0.5516, "step": 9009 }, { "epoch": 1.4708379249826538, "grad_norm": 1.5642448663711548, "learning_rate": 1.9678689552199252e-05, "loss": 0.6731, "step": 9010 }, { "epoch": 1.4710011836251582, "grad_norm": 1.9455457925796509, "learning_rate": 1.967860975241609e-05, "loss": 0.8433, "step": 9011 }, { "epoch": 1.4711644422676624, "grad_norm": 1.8119231462478638, "learning_rate": 1.9678529942886567e-05, "loss": 0.7301, "step": 9012 }, { "epoch": 1.4713277009101668, "grad_norm": 1.4559024572372437, "learning_rate": 1.967845012361077e-05, "loss": 0.6607, "step": 9013 }, { "epoch": 1.4714909595526713, "grad_norm": 1.8885624408721924, "learning_rate": 1.967837029458877e-05, "loss": 0.6219, "step": 9014 }, { "epoch": 1.4716542181951757, "grad_norm": 1.8942476511001587, "learning_rate": 1.967829045582066e-05, "loss": 0.8689, "step": 9015 }, { "epoch": 1.4718174768376802, "grad_norm": 1.6232682466506958, "learning_rate": 1.967821060730651e-05, "loss": 0.7082, "step": 9016 }, { "epoch": 1.4719807354801846, "grad_norm": 1.6811983585357666, "learning_rate": 1.967813074904641e-05, "loss": 0.7271, "step": 9017 }, { "epoch": 1.4721439941226888, "grad_norm": 1.6239910125732422, "learning_rate": 1.967805088104043e-05, "loss": 0.6812, "step": 9018 }, { "epoch": 1.4723072527651933, "grad_norm": 1.9905056953430176, "learning_rate": 1.9677971003288657e-05, "loss": 0.7455, "step": 9019 }, { "epoch": 1.4724705114076977, "grad_norm": 1.909723162651062, "learning_rate": 1.967789111579117e-05, "loss": 0.8209, "step": 9020 }, { "epoch": 1.472633770050202, "grad_norm": 1.8239691257476807, "learning_rate": 1.9677811218548046e-05, "loss": 0.6477, "step": 9021 }, { "epoch": 1.4727970286927063, "grad_norm": 1.5077400207519531, "learning_rate": 1.9677731311559373e-05, "loss": 0.5737, "step": 9022 }, { "epoch": 1.4729602873352108, "grad_norm": 1.6240965127944946, "learning_rate": 1.9677651394825227e-05, "loss": 0.6272, "step": 9023 }, { "epoch": 1.4731235459777152, "grad_norm": 1.628836989402771, "learning_rate": 1.9677571468345686e-05, "loss": 0.6922, "step": 9024 }, { "epoch": 1.4732868046202197, "grad_norm": 1.8529151678085327, "learning_rate": 1.9677491532120834e-05, "loss": 0.6906, "step": 9025 }, { "epoch": 1.473450063262724, "grad_norm": 1.7926055192947388, "learning_rate": 1.9677411586150753e-05, "loss": 0.787, "step": 9026 }, { "epoch": 1.4736133219052283, "grad_norm": 1.7347469329833984, "learning_rate": 1.9677331630435517e-05, "loss": 0.6541, "step": 9027 }, { "epoch": 1.4737765805477328, "grad_norm": 1.986503005027771, "learning_rate": 1.967725166497521e-05, "loss": 0.7756, "step": 9028 }, { "epoch": 1.4739398391902372, "grad_norm": 2.0473058223724365, "learning_rate": 1.9677171689769916e-05, "loss": 0.7631, "step": 9029 }, { "epoch": 1.4741030978327414, "grad_norm": 1.802970290184021, "learning_rate": 1.9677091704819714e-05, "loss": 0.6057, "step": 9030 }, { "epoch": 1.4742663564752458, "grad_norm": 2.1840810775756836, "learning_rate": 1.9677011710124683e-05, "loss": 0.7156, "step": 9031 }, { "epoch": 1.4744296151177503, "grad_norm": 1.8086096048355103, "learning_rate": 1.96769317056849e-05, "loss": 0.6159, "step": 9032 }, { "epoch": 1.4745928737602547, "grad_norm": 1.9140915870666504, "learning_rate": 1.9676851691500453e-05, "loss": 0.78, "step": 9033 }, { "epoch": 1.4747561324027592, "grad_norm": 1.899588704109192, "learning_rate": 1.9676771667571418e-05, "loss": 0.7481, "step": 9034 }, { "epoch": 1.4749193910452636, "grad_norm": 1.7222570180892944, "learning_rate": 1.9676691633897875e-05, "loss": 0.6163, "step": 9035 }, { "epoch": 1.4750826496877678, "grad_norm": 1.9422483444213867, "learning_rate": 1.9676611590479906e-05, "loss": 0.7814, "step": 9036 }, { "epoch": 1.4752459083302722, "grad_norm": 1.7540614604949951, "learning_rate": 1.9676531537317595e-05, "loss": 0.8064, "step": 9037 }, { "epoch": 1.4754091669727767, "grad_norm": 1.5926730632781982, "learning_rate": 1.9676451474411017e-05, "loss": 0.7934, "step": 9038 }, { "epoch": 1.475572425615281, "grad_norm": 1.6391246318817139, "learning_rate": 1.9676371401760254e-05, "loss": 0.6613, "step": 9039 }, { "epoch": 1.4757356842577853, "grad_norm": 2.1777079105377197, "learning_rate": 1.967629131936539e-05, "loss": 0.8736, "step": 9040 }, { "epoch": 1.4758989429002898, "grad_norm": 1.7378536462783813, "learning_rate": 1.96762112272265e-05, "loss": 0.7168, "step": 9041 }, { "epoch": 1.4760622015427942, "grad_norm": 1.8915256261825562, "learning_rate": 1.967613112534367e-05, "loss": 0.6616, "step": 9042 }, { "epoch": 1.4762254601852987, "grad_norm": 1.9580936431884766, "learning_rate": 1.967605101371698e-05, "loss": 0.82, "step": 9043 }, { "epoch": 1.4763887188278029, "grad_norm": 1.555055022239685, "learning_rate": 1.9675970892346507e-05, "loss": 0.6338, "step": 9044 }, { "epoch": 1.4765519774703073, "grad_norm": 1.7066384553909302, "learning_rate": 1.9675890761232333e-05, "loss": 0.7131, "step": 9045 }, { "epoch": 1.4767152361128117, "grad_norm": 2.2390172481536865, "learning_rate": 1.967581062037454e-05, "loss": 0.7875, "step": 9046 }, { "epoch": 1.4768784947553162, "grad_norm": 1.7735713720321655, "learning_rate": 1.967573046977321e-05, "loss": 0.7758, "step": 9047 }, { "epoch": 1.4770417533978204, "grad_norm": 1.4973689317703247, "learning_rate": 1.9675650309428422e-05, "loss": 0.5297, "step": 9048 }, { "epoch": 1.4772050120403248, "grad_norm": 2.062238931655884, "learning_rate": 1.9675570139340253e-05, "loss": 0.6909, "step": 9049 }, { "epoch": 1.4773682706828293, "grad_norm": 1.9142225980758667, "learning_rate": 1.9675489959508794e-05, "loss": 0.7513, "step": 9050 }, { "epoch": 1.4775315293253337, "grad_norm": 1.7617467641830444, "learning_rate": 1.9675409769934114e-05, "loss": 0.7313, "step": 9051 }, { "epoch": 1.4776947879678382, "grad_norm": 1.784393072128296, "learning_rate": 1.96753295706163e-05, "loss": 0.817, "step": 9052 }, { "epoch": 1.4778580466103424, "grad_norm": 1.799633502960205, "learning_rate": 1.9675249361555432e-05, "loss": 0.7463, "step": 9053 }, { "epoch": 1.4780213052528468, "grad_norm": 1.5760775804519653, "learning_rate": 1.967516914275159e-05, "loss": 0.5882, "step": 9054 }, { "epoch": 1.4781845638953512, "grad_norm": 1.7817902565002441, "learning_rate": 1.9675088914204857e-05, "loss": 0.7736, "step": 9055 }, { "epoch": 1.4783478225378555, "grad_norm": 1.8306775093078613, "learning_rate": 1.967500867591531e-05, "loss": 0.6716, "step": 9056 }, { "epoch": 1.47851108118036, "grad_norm": 1.504346251487732, "learning_rate": 1.9674928427883034e-05, "loss": 0.6421, "step": 9057 }, { "epoch": 1.4786743398228643, "grad_norm": 1.5553910732269287, "learning_rate": 1.9674848170108104e-05, "loss": 0.6441, "step": 9058 }, { "epoch": 1.4788375984653688, "grad_norm": 1.7634400129318237, "learning_rate": 1.967476790259061e-05, "loss": 0.6661, "step": 9059 }, { "epoch": 1.4790008571078732, "grad_norm": 1.7333580255508423, "learning_rate": 1.9674687625330623e-05, "loss": 0.6357, "step": 9060 }, { "epoch": 1.4791641157503777, "grad_norm": 1.7884796857833862, "learning_rate": 1.9674607338328228e-05, "loss": 0.5948, "step": 9061 }, { "epoch": 1.4793273743928819, "grad_norm": 1.8649100065231323, "learning_rate": 1.967452704158351e-05, "loss": 0.78, "step": 9062 }, { "epoch": 1.4794906330353863, "grad_norm": 2.2516536712646484, "learning_rate": 1.9674446735096542e-05, "loss": 0.7381, "step": 9063 }, { "epoch": 1.4796538916778907, "grad_norm": 1.592813491821289, "learning_rate": 1.967436641886741e-05, "loss": 0.5817, "step": 9064 }, { "epoch": 1.479817150320395, "grad_norm": 1.4915244579315186, "learning_rate": 1.9674286092896195e-05, "loss": 0.4999, "step": 9065 }, { "epoch": 1.4799804089628994, "grad_norm": 1.7625669240951538, "learning_rate": 1.9674205757182974e-05, "loss": 0.7207, "step": 9066 }, { "epoch": 1.4801436676054038, "grad_norm": 1.804884433746338, "learning_rate": 1.967412541172783e-05, "loss": 0.6145, "step": 9067 }, { "epoch": 1.4803069262479083, "grad_norm": 1.881123661994934, "learning_rate": 1.9674045056530845e-05, "loss": 0.7023, "step": 9068 }, { "epoch": 1.4804701848904127, "grad_norm": 1.9595675468444824, "learning_rate": 1.9673964691592098e-05, "loss": 0.8453, "step": 9069 }, { "epoch": 1.4806334435329171, "grad_norm": 1.7436041831970215, "learning_rate": 1.9673884316911673e-05, "loss": 0.7787, "step": 9070 }, { "epoch": 1.4807967021754214, "grad_norm": 2.031121253967285, "learning_rate": 1.967380393248965e-05, "loss": 0.7452, "step": 9071 }, { "epoch": 1.4809599608179258, "grad_norm": 1.909403920173645, "learning_rate": 1.9673723538326105e-05, "loss": 0.7403, "step": 9072 }, { "epoch": 1.4811232194604302, "grad_norm": 1.9976520538330078, "learning_rate": 1.967364313442113e-05, "loss": 0.9728, "step": 9073 }, { "epoch": 1.4812864781029345, "grad_norm": 1.8859496116638184, "learning_rate": 1.9673562720774792e-05, "loss": 0.7759, "step": 9074 }, { "epoch": 1.481449736745439, "grad_norm": 2.164538621902466, "learning_rate": 1.9673482297387184e-05, "loss": 0.7303, "step": 9075 }, { "epoch": 1.4816129953879433, "grad_norm": 1.7385215759277344, "learning_rate": 1.967340186425838e-05, "loss": 0.6137, "step": 9076 }, { "epoch": 1.4817762540304478, "grad_norm": 2.0848400592803955, "learning_rate": 1.967332142138846e-05, "loss": 0.7787, "step": 9077 }, { "epoch": 1.4819395126729522, "grad_norm": 1.7590702772140503, "learning_rate": 1.967324096877751e-05, "loss": 0.7461, "step": 9078 }, { "epoch": 1.4821027713154566, "grad_norm": 1.5054785013198853, "learning_rate": 1.9673160506425607e-05, "loss": 0.5668, "step": 9079 }, { "epoch": 1.4822660299579609, "grad_norm": 1.7041674852371216, "learning_rate": 1.967308003433284e-05, "loss": 0.6293, "step": 9080 }, { "epoch": 1.4824292886004653, "grad_norm": 1.8820064067840576, "learning_rate": 1.967299955249928e-05, "loss": 0.6842, "step": 9081 }, { "epoch": 1.4825925472429697, "grad_norm": 2.2066659927368164, "learning_rate": 1.967291906092501e-05, "loss": 0.6683, "step": 9082 }, { "epoch": 1.482755805885474, "grad_norm": 2.0514180660247803, "learning_rate": 1.9672838559610118e-05, "loss": 0.6462, "step": 9083 }, { "epoch": 1.4829190645279784, "grad_norm": 2.01760196685791, "learning_rate": 1.9672758048554677e-05, "loss": 0.7397, "step": 9084 }, { "epoch": 1.4830823231704828, "grad_norm": 2.738631010055542, "learning_rate": 1.967267752775877e-05, "loss": 0.5749, "step": 9085 }, { "epoch": 1.4832455818129873, "grad_norm": 1.6711211204528809, "learning_rate": 1.967259699722248e-05, "loss": 0.6433, "step": 9086 }, { "epoch": 1.4834088404554917, "grad_norm": 1.727579951286316, "learning_rate": 1.9672516456945888e-05, "loss": 0.6293, "step": 9087 }, { "epoch": 1.483572099097996, "grad_norm": 1.698111891746521, "learning_rate": 1.9672435906929074e-05, "loss": 0.8248, "step": 9088 }, { "epoch": 1.4837353577405004, "grad_norm": 1.9113200902938843, "learning_rate": 1.9672355347172122e-05, "loss": 0.9747, "step": 9089 }, { "epoch": 1.4838986163830048, "grad_norm": 1.4510388374328613, "learning_rate": 1.9672274777675108e-05, "loss": 0.598, "step": 9090 }, { "epoch": 1.4840618750255092, "grad_norm": 1.572234034538269, "learning_rate": 1.9672194198438117e-05, "loss": 0.6069, "step": 9091 }, { "epoch": 1.4842251336680135, "grad_norm": 1.679648995399475, "learning_rate": 1.967211360946123e-05, "loss": 0.6325, "step": 9092 }, { "epoch": 1.484388392310518, "grad_norm": 1.8022860288619995, "learning_rate": 1.9672033010744526e-05, "loss": 0.6567, "step": 9093 }, { "epoch": 1.4845516509530223, "grad_norm": 1.748140811920166, "learning_rate": 1.967195240228809e-05, "loss": 0.7216, "step": 9094 }, { "epoch": 1.4847149095955268, "grad_norm": 2.0105020999908447, "learning_rate": 1.9671871784091997e-05, "loss": 0.8218, "step": 9095 }, { "epoch": 1.4848781682380312, "grad_norm": 1.7798386812210083, "learning_rate": 1.967179115615633e-05, "loss": 0.6534, "step": 9096 }, { "epoch": 1.4850414268805354, "grad_norm": 1.692250370979309, "learning_rate": 1.9671710518481177e-05, "loss": 0.5302, "step": 9097 }, { "epoch": 1.4852046855230399, "grad_norm": 1.853210210800171, "learning_rate": 1.967162987106661e-05, "loss": 0.6747, "step": 9098 }, { "epoch": 1.4853679441655443, "grad_norm": 2.030076265335083, "learning_rate": 1.9671549213912716e-05, "loss": 0.8537, "step": 9099 }, { "epoch": 1.4855312028080487, "grad_norm": 1.8264412879943848, "learning_rate": 1.9671468547019575e-05, "loss": 0.7458, "step": 9100 }, { "epoch": 1.485694461450553, "grad_norm": 1.701270580291748, "learning_rate": 1.9671387870387266e-05, "loss": 0.733, "step": 9101 }, { "epoch": 1.4858577200930574, "grad_norm": 1.9856560230255127, "learning_rate": 1.9671307184015873e-05, "loss": 0.6115, "step": 9102 }, { "epoch": 1.4860209787355618, "grad_norm": 1.9717539548873901, "learning_rate": 1.9671226487905476e-05, "loss": 0.8944, "step": 9103 }, { "epoch": 1.4861842373780663, "grad_norm": 1.9335845708847046, "learning_rate": 1.9671145782056157e-05, "loss": 0.667, "step": 9104 }, { "epoch": 1.4863474960205707, "grad_norm": 1.6994874477386475, "learning_rate": 1.9671065066467996e-05, "loss": 0.5546, "step": 9105 }, { "epoch": 1.486510754663075, "grad_norm": 1.7942761182785034, "learning_rate": 1.9670984341141074e-05, "loss": 0.6887, "step": 9106 }, { "epoch": 1.4866740133055794, "grad_norm": 2.0976054668426514, "learning_rate": 1.9670903606075475e-05, "loss": 0.7733, "step": 9107 }, { "epoch": 1.4868372719480838, "grad_norm": 1.624464988708496, "learning_rate": 1.9670822861271278e-05, "loss": 0.6162, "step": 9108 }, { "epoch": 1.487000530590588, "grad_norm": 1.6011568307876587, "learning_rate": 1.9670742106728567e-05, "loss": 0.6705, "step": 9109 }, { "epoch": 1.4871637892330924, "grad_norm": 1.576406717300415, "learning_rate": 1.967066134244742e-05, "loss": 0.6255, "step": 9110 }, { "epoch": 1.4873270478755969, "grad_norm": 1.797203779220581, "learning_rate": 1.9670580568427917e-05, "loss": 0.6619, "step": 9111 }, { "epoch": 1.4874903065181013, "grad_norm": 1.3939777612686157, "learning_rate": 1.9670499784670145e-05, "loss": 0.55, "step": 9112 }, { "epoch": 1.4876535651606058, "grad_norm": 1.9019756317138672, "learning_rate": 1.9670418991174184e-05, "loss": 0.7272, "step": 9113 }, { "epoch": 1.4878168238031102, "grad_norm": 1.8233121633529663, "learning_rate": 1.967033818794011e-05, "loss": 0.6993, "step": 9114 }, { "epoch": 1.4879800824456144, "grad_norm": 1.8720684051513672, "learning_rate": 1.967025737496801e-05, "loss": 0.7981, "step": 9115 }, { "epoch": 1.4881433410881189, "grad_norm": 1.3731341361999512, "learning_rate": 1.967017655225796e-05, "loss": 0.5393, "step": 9116 }, { "epoch": 1.4883065997306233, "grad_norm": 1.834725260734558, "learning_rate": 1.9670095719810048e-05, "loss": 0.7205, "step": 9117 }, { "epoch": 1.4884698583731275, "grad_norm": 2.1068835258483887, "learning_rate": 1.9670014877624353e-05, "loss": 0.9391, "step": 9118 }, { "epoch": 1.488633117015632, "grad_norm": 1.5917508602142334, "learning_rate": 1.966993402570095e-05, "loss": 0.6302, "step": 9119 }, { "epoch": 1.4887963756581364, "grad_norm": 1.9223756790161133, "learning_rate": 1.9669853164039935e-05, "loss": 0.8191, "step": 9120 }, { "epoch": 1.4889596343006408, "grad_norm": 1.9295414686203003, "learning_rate": 1.9669772292641375e-05, "loss": 0.811, "step": 9121 }, { "epoch": 1.4891228929431453, "grad_norm": 1.7273468971252441, "learning_rate": 1.9669691411505354e-05, "loss": 0.6129, "step": 9122 }, { "epoch": 1.4892861515856497, "grad_norm": 1.4255917072296143, "learning_rate": 1.966961052063196e-05, "loss": 0.566, "step": 9123 }, { "epoch": 1.489449410228154, "grad_norm": 1.725005030632019, "learning_rate": 1.966952962002127e-05, "loss": 0.7371, "step": 9124 }, { "epoch": 1.4896126688706584, "grad_norm": 1.9894649982452393, "learning_rate": 1.9669448709673368e-05, "loss": 0.8302, "step": 9125 }, { "epoch": 1.4897759275131628, "grad_norm": 1.9463542699813843, "learning_rate": 1.9669367789588333e-05, "loss": 0.9191, "step": 9126 }, { "epoch": 1.489939186155667, "grad_norm": 1.8729273080825806, "learning_rate": 1.9669286859766248e-05, "loss": 0.6541, "step": 9127 }, { "epoch": 1.4901024447981714, "grad_norm": 1.723410964012146, "learning_rate": 1.9669205920207194e-05, "loss": 0.8172, "step": 9128 }, { "epoch": 1.4902657034406759, "grad_norm": 1.819735050201416, "learning_rate": 1.9669124970911245e-05, "loss": 0.6752, "step": 9129 }, { "epoch": 1.4904289620831803, "grad_norm": 1.7257792949676514, "learning_rate": 1.9669044011878497e-05, "loss": 0.5587, "step": 9130 }, { "epoch": 1.4905922207256848, "grad_norm": 1.8726648092269897, "learning_rate": 1.9668963043109023e-05, "loss": 0.8271, "step": 9131 }, { "epoch": 1.490755479368189, "grad_norm": 1.4942326545715332, "learning_rate": 1.9668882064602906e-05, "loss": 0.5533, "step": 9132 }, { "epoch": 1.4909187380106934, "grad_norm": 1.7653864622116089, "learning_rate": 1.9668801076360227e-05, "loss": 0.7308, "step": 9133 }, { "epoch": 1.4910819966531978, "grad_norm": 1.714220643043518, "learning_rate": 1.9668720078381066e-05, "loss": 0.6165, "step": 9134 }, { "epoch": 1.4912452552957023, "grad_norm": 1.9725171327590942, "learning_rate": 1.966863907066551e-05, "loss": 0.8641, "step": 9135 }, { "epoch": 1.4914085139382065, "grad_norm": 1.8221533298492432, "learning_rate": 1.9668558053213634e-05, "loss": 0.7162, "step": 9136 }, { "epoch": 1.491571772580711, "grad_norm": 1.8465925455093384, "learning_rate": 1.9668477026025525e-05, "loss": 0.8089, "step": 9137 }, { "epoch": 1.4917350312232154, "grad_norm": 2.0709924697875977, "learning_rate": 1.966839598910126e-05, "loss": 0.7927, "step": 9138 }, { "epoch": 1.4918982898657198, "grad_norm": 1.331532597541809, "learning_rate": 1.9668314942440923e-05, "loss": 0.5162, "step": 9139 }, { "epoch": 1.4920615485082243, "grad_norm": 1.3391073942184448, "learning_rate": 1.9668233886044597e-05, "loss": 0.5695, "step": 9140 }, { "epoch": 1.4922248071507285, "grad_norm": 1.4688925743103027, "learning_rate": 1.966815281991236e-05, "loss": 0.5507, "step": 9141 }, { "epoch": 1.492388065793233, "grad_norm": 1.8267731666564941, "learning_rate": 1.9668071744044295e-05, "loss": 0.7267, "step": 9142 }, { "epoch": 1.4925513244357373, "grad_norm": 2.8579459190368652, "learning_rate": 1.9667990658440487e-05, "loss": 0.5176, "step": 9143 }, { "epoch": 1.4927145830782418, "grad_norm": 1.6282614469528198, "learning_rate": 1.9667909563101015e-05, "loss": 0.6303, "step": 9144 }, { "epoch": 1.492877841720746, "grad_norm": 2.0728390216827393, "learning_rate": 1.966782845802596e-05, "loss": 0.8314, "step": 9145 }, { "epoch": 1.4930411003632504, "grad_norm": 1.6684895753860474, "learning_rate": 1.9667747343215402e-05, "loss": 0.5929, "step": 9146 }, { "epoch": 1.4932043590057549, "grad_norm": 1.7978848218917847, "learning_rate": 1.9667666218669428e-05, "loss": 0.6771, "step": 9147 }, { "epoch": 1.4933676176482593, "grad_norm": 1.8945505619049072, "learning_rate": 1.9667585084388117e-05, "loss": 0.8041, "step": 9148 }, { "epoch": 1.4935308762907638, "grad_norm": 1.6281328201293945, "learning_rate": 1.966750394037155e-05, "loss": 0.6738, "step": 9149 }, { "epoch": 1.493694134933268, "grad_norm": 1.9344099760055542, "learning_rate": 1.9667422786619804e-05, "loss": 0.7119, "step": 9150 }, { "epoch": 1.4938573935757724, "grad_norm": 1.782348871231079, "learning_rate": 1.966734162313297e-05, "loss": 0.6851, "step": 9151 }, { "epoch": 1.4940206522182768, "grad_norm": 2.119680643081665, "learning_rate": 1.9667260449911126e-05, "loss": 0.6951, "step": 9152 }, { "epoch": 1.494183910860781, "grad_norm": 1.817963719367981, "learning_rate": 1.9667179266954352e-05, "loss": 0.7162, "step": 9153 }, { "epoch": 1.4943471695032855, "grad_norm": 1.91780424118042, "learning_rate": 1.9667098074262734e-05, "loss": 0.6791, "step": 9154 }, { "epoch": 1.49451042814579, "grad_norm": 1.9460184574127197, "learning_rate": 1.9667016871836346e-05, "loss": 0.7164, "step": 9155 }, { "epoch": 1.4946736867882944, "grad_norm": 1.706470012664795, "learning_rate": 1.966693565967528e-05, "loss": 0.5374, "step": 9156 }, { "epoch": 1.4948369454307988, "grad_norm": 1.5911709070205688, "learning_rate": 1.9666854437779608e-05, "loss": 0.6614, "step": 9157 }, { "epoch": 1.4950002040733033, "grad_norm": 1.8055311441421509, "learning_rate": 1.9666773206149417e-05, "loss": 0.763, "step": 9158 }, { "epoch": 1.4951634627158075, "grad_norm": 1.8154795169830322, "learning_rate": 1.966669196478479e-05, "loss": 0.7054, "step": 9159 }, { "epoch": 1.495326721358312, "grad_norm": 1.8464452028274536, "learning_rate": 1.9666610713685804e-05, "loss": 0.807, "step": 9160 }, { "epoch": 1.4954899800008163, "grad_norm": 1.5572665929794312, "learning_rate": 1.9666529452852546e-05, "loss": 0.6909, "step": 9161 }, { "epoch": 1.4956532386433206, "grad_norm": 1.8463075160980225, "learning_rate": 1.9666448182285095e-05, "loss": 0.7549, "step": 9162 }, { "epoch": 1.495816497285825, "grad_norm": 1.896497368812561, "learning_rate": 1.966636690198353e-05, "loss": 0.6934, "step": 9163 }, { "epoch": 1.4959797559283294, "grad_norm": 2.0354511737823486, "learning_rate": 1.966628561194794e-05, "loss": 0.6209, "step": 9164 }, { "epoch": 1.4961430145708339, "grad_norm": 1.7633668184280396, "learning_rate": 1.96662043121784e-05, "loss": 0.6536, "step": 9165 }, { "epoch": 1.4963062732133383, "grad_norm": 1.7485493421554565, "learning_rate": 1.9666123002675e-05, "loss": 0.7377, "step": 9166 }, { "epoch": 1.4964695318558427, "grad_norm": 1.7833659648895264, "learning_rate": 1.966604168343781e-05, "loss": 0.6842, "step": 9167 }, { "epoch": 1.496632790498347, "grad_norm": 1.9445688724517822, "learning_rate": 1.9665960354466925e-05, "loss": 0.7661, "step": 9168 }, { "epoch": 1.4967960491408514, "grad_norm": 1.648514986038208, "learning_rate": 1.9665879015762416e-05, "loss": 0.6517, "step": 9169 }, { "epoch": 1.4969593077833558, "grad_norm": 1.8444446325302124, "learning_rate": 1.966579766732437e-05, "loss": 0.6036, "step": 9170 }, { "epoch": 1.49712256642586, "grad_norm": 1.5765466690063477, "learning_rate": 1.9665716309152868e-05, "loss": 0.7006, "step": 9171 }, { "epoch": 1.4972858250683645, "grad_norm": 1.851354718208313, "learning_rate": 1.966563494124799e-05, "loss": 0.8096, "step": 9172 }, { "epoch": 1.497449083710869, "grad_norm": 1.39980947971344, "learning_rate": 1.9665553563609826e-05, "loss": 0.5258, "step": 9173 }, { "epoch": 1.4976123423533734, "grad_norm": 1.563685655593872, "learning_rate": 1.9665472176238452e-05, "loss": 0.6536, "step": 9174 }, { "epoch": 1.4977756009958778, "grad_norm": 1.8312667608261108, "learning_rate": 1.9665390779133945e-05, "loss": 0.751, "step": 9175 }, { "epoch": 1.497938859638382, "grad_norm": 1.9688165187835693, "learning_rate": 1.9665309372296396e-05, "loss": 0.7066, "step": 9176 }, { "epoch": 1.4981021182808865, "grad_norm": 1.7810673713684082, "learning_rate": 1.9665227955725882e-05, "loss": 0.6123, "step": 9177 }, { "epoch": 1.498265376923391, "grad_norm": 2.097825527191162, "learning_rate": 1.9665146529422485e-05, "loss": 0.6828, "step": 9178 }, { "epoch": 1.4984286355658953, "grad_norm": 1.5779398679733276, "learning_rate": 1.9665065093386287e-05, "loss": 0.6153, "step": 9179 }, { "epoch": 1.4985918942083996, "grad_norm": 1.778352975845337, "learning_rate": 1.9664983647617375e-05, "loss": 0.6283, "step": 9180 }, { "epoch": 1.498755152850904, "grad_norm": 1.5818698406219482, "learning_rate": 1.9664902192115825e-05, "loss": 0.6047, "step": 9181 }, { "epoch": 1.4989184114934084, "grad_norm": 1.9181369543075562, "learning_rate": 1.966482072688172e-05, "loss": 0.8305, "step": 9182 }, { "epoch": 1.4990816701359129, "grad_norm": 1.7858409881591797, "learning_rate": 1.9664739251915142e-05, "loss": 0.64, "step": 9183 }, { "epoch": 1.4992449287784173, "grad_norm": 1.523867130279541, "learning_rate": 1.9664657767216176e-05, "loss": 0.6484, "step": 9184 }, { "epoch": 1.4994081874209215, "grad_norm": 1.4505800008773804, "learning_rate": 1.9664576272784903e-05, "loss": 0.5926, "step": 9185 }, { "epoch": 1.499571446063426, "grad_norm": 1.8463571071624756, "learning_rate": 1.9664494768621405e-05, "loss": 0.7281, "step": 9186 }, { "epoch": 1.4997347047059304, "grad_norm": 1.5847265720367432, "learning_rate": 1.9664413254725762e-05, "loss": 0.5892, "step": 9187 }, { "epoch": 1.4998979633484348, "grad_norm": 2.2423532009124756, "learning_rate": 1.9664331731098056e-05, "loss": 0.7726, "step": 9188 }, { "epoch": 1.500061221990939, "grad_norm": 2.0647356510162354, "learning_rate": 1.9664250197738372e-05, "loss": 0.617, "step": 9189 }, { "epoch": 1.5002244806334435, "grad_norm": 1.7320938110351562, "learning_rate": 1.966416865464679e-05, "loss": 0.6483, "step": 9190 }, { "epoch": 1.500387739275948, "grad_norm": 1.760252833366394, "learning_rate": 1.9664087101823394e-05, "loss": 0.6737, "step": 9191 }, { "epoch": 1.5005509979184524, "grad_norm": 1.7635743618011475, "learning_rate": 1.9664005539268263e-05, "loss": 0.7009, "step": 9192 }, { "epoch": 1.5007142565609568, "grad_norm": 1.603127360343933, "learning_rate": 1.9663923966981482e-05, "loss": 0.6958, "step": 9193 }, { "epoch": 1.5008775152034612, "grad_norm": 1.9792087078094482, "learning_rate": 1.9663842384963133e-05, "loss": 0.8413, "step": 9194 }, { "epoch": 1.5010407738459655, "grad_norm": 1.7429194450378418, "learning_rate": 1.9663760793213297e-05, "loss": 0.7146, "step": 9195 }, { "epoch": 1.50120403248847, "grad_norm": 1.8340983390808105, "learning_rate": 1.9663679191732052e-05, "loss": 0.6614, "step": 9196 }, { "epoch": 1.5013672911309741, "grad_norm": 1.8444374799728394, "learning_rate": 1.966359758051949e-05, "loss": 0.7719, "step": 9197 }, { "epoch": 1.5015305497734786, "grad_norm": 1.5356831550598145, "learning_rate": 1.9663515959575687e-05, "loss": 0.5604, "step": 9198 }, { "epoch": 1.501693808415983, "grad_norm": 1.9556879997253418, "learning_rate": 1.9663434328900727e-05, "loss": 0.8634, "step": 9199 }, { "epoch": 1.5018570670584874, "grad_norm": 1.7234108448028564, "learning_rate": 1.9663352688494686e-05, "loss": 0.6602, "step": 9200 }, { "epoch": 1.5020203257009919, "grad_norm": 1.6151106357574463, "learning_rate": 1.9663271038357656e-05, "loss": 0.669, "step": 9201 }, { "epoch": 1.5021835843434963, "grad_norm": 1.6345739364624023, "learning_rate": 1.966318937848971e-05, "loss": 0.6766, "step": 9202 }, { "epoch": 1.5023468429860005, "grad_norm": 1.4169303178787231, "learning_rate": 1.966310770889094e-05, "loss": 0.5183, "step": 9203 }, { "epoch": 1.502510101628505, "grad_norm": 1.6124773025512695, "learning_rate": 1.9663026029561422e-05, "loss": 0.5786, "step": 9204 }, { "epoch": 1.5026733602710094, "grad_norm": 2.07743501663208, "learning_rate": 1.966294434050124e-05, "loss": 0.7735, "step": 9205 }, { "epoch": 1.5028366189135136, "grad_norm": 1.560802936553955, "learning_rate": 1.966286264171047e-05, "loss": 0.5835, "step": 9206 }, { "epoch": 1.502999877556018, "grad_norm": 1.7170870304107666, "learning_rate": 1.9662780933189208e-05, "loss": 0.615, "step": 9207 }, { "epoch": 1.5031631361985225, "grad_norm": 1.8181757926940918, "learning_rate": 1.9662699214937525e-05, "loss": 0.7619, "step": 9208 }, { "epoch": 1.503326394841027, "grad_norm": 1.8135143518447876, "learning_rate": 1.9662617486955505e-05, "loss": 0.6889, "step": 9209 }, { "epoch": 1.5034896534835314, "grad_norm": 1.8922208547592163, "learning_rate": 1.9662535749243233e-05, "loss": 0.699, "step": 9210 }, { "epoch": 1.5036529121260358, "grad_norm": 1.7311550378799438, "learning_rate": 1.966245400180079e-05, "loss": 0.7802, "step": 9211 }, { "epoch": 1.50381617076854, "grad_norm": 1.93391752243042, "learning_rate": 1.9662372244628255e-05, "loss": 0.6381, "step": 9212 }, { "epoch": 1.5039794294110445, "grad_norm": 1.8411240577697754, "learning_rate": 1.9662290477725717e-05, "loss": 0.7454, "step": 9213 }, { "epoch": 1.5041426880535487, "grad_norm": 1.8079935312271118, "learning_rate": 1.9662208701093255e-05, "loss": 0.6248, "step": 9214 }, { "epoch": 1.504305946696053, "grad_norm": 1.8441911935806274, "learning_rate": 1.966212691473095e-05, "loss": 0.7209, "step": 9215 }, { "epoch": 1.5044692053385575, "grad_norm": 1.901990532875061, "learning_rate": 1.966204511863889e-05, "loss": 0.8821, "step": 9216 }, { "epoch": 1.504632463981062, "grad_norm": 1.8280116319656372, "learning_rate": 1.966196331281715e-05, "loss": 0.7127, "step": 9217 }, { "epoch": 1.5047957226235664, "grad_norm": 1.7730761766433716, "learning_rate": 1.9661881497265813e-05, "loss": 0.8725, "step": 9218 }, { "epoch": 1.5049589812660709, "grad_norm": 1.665906310081482, "learning_rate": 1.966179967198497e-05, "loss": 0.6916, "step": 9219 }, { "epoch": 1.5051222399085753, "grad_norm": 2.053025960922241, "learning_rate": 1.9661717836974694e-05, "loss": 0.8238, "step": 9220 }, { "epoch": 1.5052854985510795, "grad_norm": 1.7003999948501587, "learning_rate": 1.966163599223507e-05, "loss": 0.8462, "step": 9221 }, { "epoch": 1.505448757193584, "grad_norm": 1.9179718494415283, "learning_rate": 1.9661554137766178e-05, "loss": 0.6394, "step": 9222 }, { "epoch": 1.5056120158360882, "grad_norm": 1.6990323066711426, "learning_rate": 1.9661472273568106e-05, "loss": 0.6942, "step": 9223 }, { "epoch": 1.5057752744785926, "grad_norm": 1.8510026931762695, "learning_rate": 1.9661390399640936e-05, "loss": 0.7517, "step": 9224 }, { "epoch": 1.505938533121097, "grad_norm": 1.67653226852417, "learning_rate": 1.966130851598475e-05, "loss": 0.6773, "step": 9225 }, { "epoch": 1.5061017917636015, "grad_norm": 2.137173891067505, "learning_rate": 1.9661226622599627e-05, "loss": 0.7611, "step": 9226 }, { "epoch": 1.506265050406106, "grad_norm": 1.999756932258606, "learning_rate": 1.966114471948565e-05, "loss": 0.7817, "step": 9227 }, { "epoch": 1.5064283090486104, "grad_norm": 1.7564342021942139, "learning_rate": 1.9661062806642903e-05, "loss": 0.6394, "step": 9228 }, { "epoch": 1.5065915676911148, "grad_norm": 1.843449592590332, "learning_rate": 1.9660980884071468e-05, "loss": 0.7945, "step": 9229 }, { "epoch": 1.506754826333619, "grad_norm": 1.749974250793457, "learning_rate": 1.966089895177143e-05, "loss": 0.8012, "step": 9230 }, { "epoch": 1.5069180849761235, "grad_norm": 1.9351595640182495, "learning_rate": 1.9660817009742867e-05, "loss": 0.7638, "step": 9231 }, { "epoch": 1.5070813436186277, "grad_norm": 1.7174971103668213, "learning_rate": 1.9660735057985865e-05, "loss": 0.7143, "step": 9232 }, { "epoch": 1.507244602261132, "grad_norm": 1.7320640087127686, "learning_rate": 1.9660653096500506e-05, "loss": 0.6668, "step": 9233 }, { "epoch": 1.5074078609036365, "grad_norm": 1.7025355100631714, "learning_rate": 1.9660571125286873e-05, "loss": 0.6427, "step": 9234 }, { "epoch": 1.507571119546141, "grad_norm": 1.5663793087005615, "learning_rate": 1.9660489144345042e-05, "loss": 0.6787, "step": 9235 }, { "epoch": 1.5077343781886454, "grad_norm": 1.5784380435943604, "learning_rate": 1.966040715367511e-05, "loss": 0.5122, "step": 9236 }, { "epoch": 1.5078976368311499, "grad_norm": 1.7486788034439087, "learning_rate": 1.966032515327714e-05, "loss": 0.7114, "step": 9237 }, { "epoch": 1.5080608954736543, "grad_norm": 1.7123584747314453, "learning_rate": 1.966024314315123e-05, "loss": 0.6606, "step": 9238 }, { "epoch": 1.5082241541161585, "grad_norm": 1.9746376276016235, "learning_rate": 1.966016112329746e-05, "loss": 0.8135, "step": 9239 }, { "epoch": 1.508387412758663, "grad_norm": 1.6528640985488892, "learning_rate": 1.9660079093715906e-05, "loss": 0.679, "step": 9240 }, { "epoch": 1.5085506714011672, "grad_norm": 2.140596628189087, "learning_rate": 1.9659997054406657e-05, "loss": 0.8388, "step": 9241 }, { "epoch": 1.5087139300436716, "grad_norm": 1.8062299489974976, "learning_rate": 1.9659915005369795e-05, "loss": 0.8103, "step": 9242 }, { "epoch": 1.508877188686176, "grad_norm": 1.6751171350479126, "learning_rate": 1.96598329466054e-05, "loss": 0.6851, "step": 9243 }, { "epoch": 1.5090404473286805, "grad_norm": 1.5374935865402222, "learning_rate": 1.9659750878113555e-05, "loss": 0.6603, "step": 9244 }, { "epoch": 1.509203705971185, "grad_norm": 1.698721170425415, "learning_rate": 1.9659668799894344e-05, "loss": 0.5855, "step": 9245 }, { "epoch": 1.5093669646136894, "grad_norm": 1.7462859153747559, "learning_rate": 1.965958671194785e-05, "loss": 0.7167, "step": 9246 }, { "epoch": 1.5095302232561936, "grad_norm": 1.5849785804748535, "learning_rate": 1.9659504614274153e-05, "loss": 0.5612, "step": 9247 }, { "epoch": 1.509693481898698, "grad_norm": 1.6543867588043213, "learning_rate": 1.965942250687334e-05, "loss": 0.7275, "step": 9248 }, { "epoch": 1.5098567405412024, "grad_norm": 1.5935299396514893, "learning_rate": 1.965934038974549e-05, "loss": 0.6857, "step": 9249 }, { "epoch": 1.5100199991837067, "grad_norm": 1.6700538396835327, "learning_rate": 1.9659258262890683e-05, "loss": 0.6465, "step": 9250 }, { "epoch": 1.510183257826211, "grad_norm": 1.8055061101913452, "learning_rate": 1.965917612630901e-05, "loss": 0.7505, "step": 9251 }, { "epoch": 1.5103465164687155, "grad_norm": 2.0924155712127686, "learning_rate": 1.965909398000055e-05, "loss": 0.7012, "step": 9252 }, { "epoch": 1.51050977511122, "grad_norm": 1.4190868139266968, "learning_rate": 1.965901182396538e-05, "loss": 0.5304, "step": 9253 }, { "epoch": 1.5106730337537244, "grad_norm": 1.8384699821472168, "learning_rate": 1.9658929658203593e-05, "loss": 0.7084, "step": 9254 }, { "epoch": 1.5108362923962289, "grad_norm": 1.6622133255004883, "learning_rate": 1.965884748271526e-05, "loss": 0.6267, "step": 9255 }, { "epoch": 1.510999551038733, "grad_norm": 1.6499580144882202, "learning_rate": 1.9658765297500478e-05, "loss": 0.6374, "step": 9256 }, { "epoch": 1.5111628096812375, "grad_norm": 2.0668561458587646, "learning_rate": 1.9658683102559317e-05, "loss": 0.7817, "step": 9257 }, { "epoch": 1.5113260683237417, "grad_norm": 1.9733960628509521, "learning_rate": 1.9658600897891867e-05, "loss": 0.7533, "step": 9258 }, { "epoch": 1.5114893269662462, "grad_norm": 1.5487282276153564, "learning_rate": 1.9658518683498204e-05, "loss": 0.6505, "step": 9259 }, { "epoch": 1.5116525856087506, "grad_norm": 1.4914352893829346, "learning_rate": 1.9658436459378422e-05, "loss": 0.6095, "step": 9260 }, { "epoch": 1.511815844251255, "grad_norm": 1.7780154943466187, "learning_rate": 1.965835422553259e-05, "loss": 0.7749, "step": 9261 }, { "epoch": 1.5119791028937595, "grad_norm": 1.8552535772323608, "learning_rate": 1.96582719819608e-05, "loss": 0.7629, "step": 9262 }, { "epoch": 1.512142361536264, "grad_norm": 1.5163869857788086, "learning_rate": 1.9658189728663136e-05, "loss": 0.6201, "step": 9263 }, { "epoch": 1.5123056201787684, "grad_norm": 1.7470884323120117, "learning_rate": 1.9658107465639676e-05, "loss": 0.7801, "step": 9264 }, { "epoch": 1.5124688788212726, "grad_norm": 1.929487705230713, "learning_rate": 1.9658025192890502e-05, "loss": 0.6101, "step": 9265 }, { "epoch": 1.512632137463777, "grad_norm": 1.523269772529602, "learning_rate": 1.9657942910415703e-05, "loss": 0.6186, "step": 9266 }, { "epoch": 1.5127953961062812, "grad_norm": 1.6428195238113403, "learning_rate": 1.9657860618215353e-05, "loss": 0.725, "step": 9267 }, { "epoch": 1.5129586547487857, "grad_norm": 1.737205147743225, "learning_rate": 1.9657778316289543e-05, "loss": 0.6692, "step": 9268 }, { "epoch": 1.51312191339129, "grad_norm": 1.7569655179977417, "learning_rate": 1.9657696004638355e-05, "loss": 0.6871, "step": 9269 }, { "epoch": 1.5132851720337945, "grad_norm": 1.7467747926712036, "learning_rate": 1.9657613683261866e-05, "loss": 0.6881, "step": 9270 }, { "epoch": 1.513448430676299, "grad_norm": 1.6222825050354004, "learning_rate": 1.9657531352160163e-05, "loss": 0.6495, "step": 9271 }, { "epoch": 1.5136116893188034, "grad_norm": 1.6093761920928955, "learning_rate": 1.9657449011333328e-05, "loss": 0.62, "step": 9272 }, { "epoch": 1.5137749479613078, "grad_norm": 1.7211860418319702, "learning_rate": 1.965736666078145e-05, "loss": 0.6791, "step": 9273 }, { "epoch": 1.513938206603812, "grad_norm": 1.9281294345855713, "learning_rate": 1.96572843005046e-05, "loss": 0.8709, "step": 9274 }, { "epoch": 1.5141014652463165, "grad_norm": 1.6260515451431274, "learning_rate": 1.9657201930502872e-05, "loss": 0.6259, "step": 9275 }, { "epoch": 1.5142647238888207, "grad_norm": 1.791630744934082, "learning_rate": 1.965711955077634e-05, "loss": 0.7608, "step": 9276 }, { "epoch": 1.5144279825313252, "grad_norm": 1.6905677318572998, "learning_rate": 1.9657037161325095e-05, "loss": 0.8017, "step": 9277 }, { "epoch": 1.5145912411738296, "grad_norm": 1.9632296562194824, "learning_rate": 1.9656954762149213e-05, "loss": 0.7707, "step": 9278 }, { "epoch": 1.514754499816334, "grad_norm": 1.8477476835250854, "learning_rate": 1.965687235324878e-05, "loss": 0.5879, "step": 9279 }, { "epoch": 1.5149177584588385, "grad_norm": 1.8311630487442017, "learning_rate": 1.965678993462388e-05, "loss": 0.7546, "step": 9280 }, { "epoch": 1.515081017101343, "grad_norm": 1.6614177227020264, "learning_rate": 1.9656707506274595e-05, "loss": 0.6978, "step": 9281 }, { "epoch": 1.5152442757438473, "grad_norm": 1.8868060111999512, "learning_rate": 1.965662506820101e-05, "loss": 0.8386, "step": 9282 }, { "epoch": 1.5154075343863516, "grad_norm": 1.8684314489364624, "learning_rate": 1.9656542620403203e-05, "loss": 0.6841, "step": 9283 }, { "epoch": 1.515570793028856, "grad_norm": 1.99854576587677, "learning_rate": 1.9656460162881262e-05, "loss": 0.7589, "step": 9284 }, { "epoch": 1.5157340516713602, "grad_norm": 1.7998199462890625, "learning_rate": 1.965637769563527e-05, "loss": 0.6682, "step": 9285 }, { "epoch": 1.5158973103138647, "grad_norm": 1.6971325874328613, "learning_rate": 1.9656295218665306e-05, "loss": 0.7699, "step": 9286 }, { "epoch": 1.516060568956369, "grad_norm": 2.063441753387451, "learning_rate": 1.9656212731971452e-05, "loss": 0.8038, "step": 9287 }, { "epoch": 1.5162238275988735, "grad_norm": 1.5106374025344849, "learning_rate": 1.96561302355538e-05, "loss": 0.6174, "step": 9288 }, { "epoch": 1.516387086241378, "grad_norm": 2.0775322914123535, "learning_rate": 1.9656047729412426e-05, "loss": 0.7731, "step": 9289 }, { "epoch": 1.5165503448838824, "grad_norm": 1.535444974899292, "learning_rate": 1.965596521354741e-05, "loss": 0.6611, "step": 9290 }, { "epoch": 1.5167136035263868, "grad_norm": 1.8570891618728638, "learning_rate": 1.9655882687958845e-05, "loss": 0.6318, "step": 9291 }, { "epoch": 1.516876862168891, "grad_norm": 1.9117178916931152, "learning_rate": 1.965580015264681e-05, "loss": 0.7406, "step": 9292 }, { "epoch": 1.5170401208113955, "grad_norm": 2.491406202316284, "learning_rate": 1.965571760761138e-05, "loss": 0.8373, "step": 9293 }, { "epoch": 1.5172033794538997, "grad_norm": 1.6160461902618408, "learning_rate": 1.9655635052852648e-05, "loss": 0.6843, "step": 9294 }, { "epoch": 1.5173666380964042, "grad_norm": 1.84751558303833, "learning_rate": 1.9655552488370694e-05, "loss": 0.6786, "step": 9295 }, { "epoch": 1.5175298967389086, "grad_norm": 1.9708728790283203, "learning_rate": 1.9655469914165604e-05, "loss": 0.6661, "step": 9296 }, { "epoch": 1.517693155381413, "grad_norm": 1.6271531581878662, "learning_rate": 1.9655387330237454e-05, "loss": 0.6595, "step": 9297 }, { "epoch": 1.5178564140239175, "grad_norm": 1.6992237567901611, "learning_rate": 1.9655304736586335e-05, "loss": 0.6635, "step": 9298 }, { "epoch": 1.518019672666422, "grad_norm": 1.8905822038650513, "learning_rate": 1.9655222133212327e-05, "loss": 0.7588, "step": 9299 }, { "epoch": 1.5181829313089261, "grad_norm": 1.8169599771499634, "learning_rate": 1.965513952011551e-05, "loss": 0.706, "step": 9300 }, { "epoch": 1.5183461899514306, "grad_norm": 1.8131130933761597, "learning_rate": 1.965505689729597e-05, "loss": 0.6845, "step": 9301 }, { "epoch": 1.518509448593935, "grad_norm": 1.4808218479156494, "learning_rate": 1.965497426475379e-05, "loss": 0.6094, "step": 9302 }, { "epoch": 1.5186727072364392, "grad_norm": 1.6372148990631104, "learning_rate": 1.965489162248906e-05, "loss": 0.5549, "step": 9303 }, { "epoch": 1.5188359658789437, "grad_norm": 1.836823582649231, "learning_rate": 1.9654808970501852e-05, "loss": 0.6963, "step": 9304 }, { "epoch": 1.518999224521448, "grad_norm": 1.5312870740890503, "learning_rate": 1.9654726308792252e-05, "loss": 0.6237, "step": 9305 }, { "epoch": 1.5191624831639525, "grad_norm": 1.572156548500061, "learning_rate": 1.9654643637360347e-05, "loss": 0.5897, "step": 9306 }, { "epoch": 1.519325741806457, "grad_norm": 1.3847283124923706, "learning_rate": 1.9654560956206218e-05, "loss": 0.5061, "step": 9307 }, { "epoch": 1.5194890004489614, "grad_norm": 1.8717001676559448, "learning_rate": 1.965447826532995e-05, "loss": 0.7329, "step": 9308 }, { "epoch": 1.5196522590914656, "grad_norm": 1.6560218334197998, "learning_rate": 1.9654395564731624e-05, "loss": 0.5534, "step": 9309 }, { "epoch": 1.51981551773397, "grad_norm": 1.86985445022583, "learning_rate": 1.9654312854411325e-05, "loss": 0.7143, "step": 9310 }, { "epoch": 1.5199787763764743, "grad_norm": 1.5928958654403687, "learning_rate": 1.9654230134369134e-05, "loss": 0.7016, "step": 9311 }, { "epoch": 1.5201420350189787, "grad_norm": 1.5898302793502808, "learning_rate": 1.9654147404605136e-05, "loss": 0.6971, "step": 9312 }, { "epoch": 1.5203052936614831, "grad_norm": 1.5732944011688232, "learning_rate": 1.9654064665119415e-05, "loss": 0.6445, "step": 9313 }, { "epoch": 1.5204685523039876, "grad_norm": 1.8308943510055542, "learning_rate": 1.9653981915912054e-05, "loss": 0.6964, "step": 9314 }, { "epoch": 1.520631810946492, "grad_norm": 1.5813237428665161, "learning_rate": 1.9653899156983132e-05, "loss": 0.7283, "step": 9315 }, { "epoch": 1.5207950695889965, "grad_norm": 1.772891640663147, "learning_rate": 1.965381638833274e-05, "loss": 0.86, "step": 9316 }, { "epoch": 1.520958328231501, "grad_norm": 1.5530004501342773, "learning_rate": 1.9653733609960956e-05, "loss": 0.6495, "step": 9317 }, { "epoch": 1.5211215868740051, "grad_norm": 1.4502285718917847, "learning_rate": 1.9653650821867867e-05, "loss": 0.5297, "step": 9318 }, { "epoch": 1.5212848455165096, "grad_norm": 1.927639365196228, "learning_rate": 1.965356802405355e-05, "loss": 0.8034, "step": 9319 }, { "epoch": 1.5214481041590138, "grad_norm": 1.9937869310379028, "learning_rate": 1.9653485216518094e-05, "loss": 0.7011, "step": 9320 }, { "epoch": 1.5216113628015182, "grad_norm": 1.6955549716949463, "learning_rate": 1.9653402399261586e-05, "loss": 0.65, "step": 9321 }, { "epoch": 1.5217746214440226, "grad_norm": 2.036925792694092, "learning_rate": 1.9653319572284098e-05, "loss": 0.6836, "step": 9322 }, { "epoch": 1.521937880086527, "grad_norm": 1.7867528200149536, "learning_rate": 1.9653236735585724e-05, "loss": 0.7357, "step": 9323 }, { "epoch": 1.5221011387290315, "grad_norm": 1.7286356687545776, "learning_rate": 1.965315388916654e-05, "loss": 0.7656, "step": 9324 }, { "epoch": 1.522264397371536, "grad_norm": 1.7868379354476929, "learning_rate": 1.9653071033026635e-05, "loss": 0.7108, "step": 9325 }, { "epoch": 1.5224276560140404, "grad_norm": 1.4771361351013184, "learning_rate": 1.9652988167166086e-05, "loss": 0.5685, "step": 9326 }, { "epoch": 1.5225909146565446, "grad_norm": 1.7919398546218872, "learning_rate": 1.9652905291584987e-05, "loss": 0.7258, "step": 9327 }, { "epoch": 1.522754173299049, "grad_norm": 1.9175596237182617, "learning_rate": 1.9652822406283408e-05, "loss": 0.7582, "step": 9328 }, { "epoch": 1.5229174319415533, "grad_norm": 1.8960909843444824, "learning_rate": 1.9652739511261446e-05, "loss": 0.7412, "step": 9329 }, { "epoch": 1.5230806905840577, "grad_norm": 1.8308054208755493, "learning_rate": 1.9652656606519174e-05, "loss": 0.7085, "step": 9330 }, { "epoch": 1.5232439492265621, "grad_norm": 1.6823025941848755, "learning_rate": 1.9652573692056677e-05, "loss": 0.6263, "step": 9331 }, { "epoch": 1.5234072078690666, "grad_norm": 1.8177499771118164, "learning_rate": 1.9652490767874047e-05, "loss": 0.7205, "step": 9332 }, { "epoch": 1.523570466511571, "grad_norm": 1.3807194232940674, "learning_rate": 1.9652407833971358e-05, "loss": 0.546, "step": 9333 }, { "epoch": 1.5237337251540755, "grad_norm": 1.6263338327407837, "learning_rate": 1.9652324890348695e-05, "loss": 0.6379, "step": 9334 }, { "epoch": 1.52389698379658, "grad_norm": 1.8905985355377197, "learning_rate": 1.9652241937006146e-05, "loss": 0.7782, "step": 9335 }, { "epoch": 1.5240602424390841, "grad_norm": 1.5670658349990845, "learning_rate": 1.9652158973943792e-05, "loss": 0.7025, "step": 9336 }, { "epoch": 1.5242235010815885, "grad_norm": 1.6258268356323242, "learning_rate": 1.9652076001161715e-05, "loss": 0.6802, "step": 9337 }, { "epoch": 1.5243867597240928, "grad_norm": 2.497962236404419, "learning_rate": 1.9651993018660002e-05, "loss": 0.9689, "step": 9338 }, { "epoch": 1.5245500183665972, "grad_norm": 2.0998306274414062, "learning_rate": 1.9651910026438732e-05, "loss": 0.8589, "step": 9339 }, { "epoch": 1.5247132770091016, "grad_norm": 1.6235616207122803, "learning_rate": 1.9651827024497993e-05, "loss": 0.597, "step": 9340 }, { "epoch": 1.524876535651606, "grad_norm": 1.7903447151184082, "learning_rate": 1.9651744012837866e-05, "loss": 0.7067, "step": 9341 }, { "epoch": 1.5250397942941105, "grad_norm": 1.436903715133667, "learning_rate": 1.9651660991458435e-05, "loss": 0.6438, "step": 9342 }, { "epoch": 1.525203052936615, "grad_norm": 1.8167574405670166, "learning_rate": 1.9651577960359783e-05, "loss": 0.7403, "step": 9343 }, { "epoch": 1.5253663115791192, "grad_norm": 1.548518419265747, "learning_rate": 1.9651494919541997e-05, "loss": 0.6544, "step": 9344 }, { "epoch": 1.5255295702216236, "grad_norm": 1.7820303440093994, "learning_rate": 1.9651411869005158e-05, "loss": 0.5357, "step": 9345 }, { "epoch": 1.525692828864128, "grad_norm": 1.4347519874572754, "learning_rate": 1.965132880874935e-05, "loss": 0.575, "step": 9346 }, { "epoch": 1.5258560875066323, "grad_norm": 2.0128748416900635, "learning_rate": 1.9651245738774655e-05, "loss": 0.7017, "step": 9347 }, { "epoch": 1.5260193461491367, "grad_norm": 1.557662010192871, "learning_rate": 1.9651162659081156e-05, "loss": 0.6049, "step": 9348 }, { "epoch": 1.5261826047916411, "grad_norm": 1.7654147148132324, "learning_rate": 1.9651079569668944e-05, "loss": 0.7448, "step": 9349 }, { "epoch": 1.5263458634341456, "grad_norm": 1.6691595315933228, "learning_rate": 1.9650996470538093e-05, "loss": 0.5857, "step": 9350 }, { "epoch": 1.52650912207665, "grad_norm": 1.4767142534255981, "learning_rate": 1.9650913361688694e-05, "loss": 0.5637, "step": 9351 }, { "epoch": 1.5266723807191545, "grad_norm": 1.6665476560592651, "learning_rate": 1.9650830243120828e-05, "loss": 0.7047, "step": 9352 }, { "epoch": 1.5268356393616587, "grad_norm": 2.0503389835357666, "learning_rate": 1.9650747114834578e-05, "loss": 0.7518, "step": 9353 }, { "epoch": 1.526998898004163, "grad_norm": 1.7322858572006226, "learning_rate": 1.9650663976830023e-05, "loss": 0.6056, "step": 9354 }, { "epoch": 1.5271621566466673, "grad_norm": 1.836872935295105, "learning_rate": 1.965058082910726e-05, "loss": 0.8339, "step": 9355 }, { "epoch": 1.5273254152891718, "grad_norm": 2.110888957977295, "learning_rate": 1.965049767166636e-05, "loss": 0.6538, "step": 9356 }, { "epoch": 1.5274886739316762, "grad_norm": 1.788400411605835, "learning_rate": 1.9650414504507412e-05, "loss": 0.7375, "step": 9357 }, { "epoch": 1.5276519325741806, "grad_norm": 1.6644761562347412, "learning_rate": 1.96503313276305e-05, "loss": 0.7288, "step": 9358 }, { "epoch": 1.527815191216685, "grad_norm": 1.7994214296340942, "learning_rate": 1.9650248141035707e-05, "loss": 0.6819, "step": 9359 }, { "epoch": 1.5279784498591895, "grad_norm": 1.8320527076721191, "learning_rate": 1.9650164944723116e-05, "loss": 0.7593, "step": 9360 }, { "epoch": 1.528141708501694, "grad_norm": 1.9133325815200806, "learning_rate": 1.9650081738692813e-05, "loss": 0.7984, "step": 9361 }, { "epoch": 1.5283049671441982, "grad_norm": 1.5597805976867676, "learning_rate": 1.9649998522944878e-05, "loss": 0.6208, "step": 9362 }, { "epoch": 1.5284682257867026, "grad_norm": 2.3474831581115723, "learning_rate": 1.9649915297479398e-05, "loss": 0.7769, "step": 9363 }, { "epoch": 1.5286314844292068, "grad_norm": 1.6871731281280518, "learning_rate": 1.9649832062296458e-05, "loss": 0.6851, "step": 9364 }, { "epoch": 1.5287947430717113, "grad_norm": 2.04590106010437, "learning_rate": 1.9649748817396136e-05, "loss": 0.7258, "step": 9365 }, { "epoch": 1.5289580017142157, "grad_norm": 1.6969974040985107, "learning_rate": 1.964966556277852e-05, "loss": 0.7712, "step": 9366 }, { "epoch": 1.5291212603567201, "grad_norm": 1.7721267938613892, "learning_rate": 1.9649582298443693e-05, "loss": 0.7927, "step": 9367 }, { "epoch": 1.5292845189992246, "grad_norm": 1.5970799922943115, "learning_rate": 1.964949902439174e-05, "loss": 0.6102, "step": 9368 }, { "epoch": 1.529447777641729, "grad_norm": 1.5986828804016113, "learning_rate": 1.964941574062275e-05, "loss": 0.7066, "step": 9369 }, { "epoch": 1.5296110362842334, "grad_norm": 1.8751593828201294, "learning_rate": 1.964933244713679e-05, "loss": 0.6897, "step": 9370 }, { "epoch": 1.5297742949267377, "grad_norm": 1.4945485591888428, "learning_rate": 1.9649249143933963e-05, "loss": 0.562, "step": 9371 }, { "epoch": 1.529937553569242, "grad_norm": 1.695502519607544, "learning_rate": 1.964916583101434e-05, "loss": 0.6652, "step": 9372 }, { "epoch": 1.5301008122117463, "grad_norm": 1.6750316619873047, "learning_rate": 1.9649082508378013e-05, "loss": 0.693, "step": 9373 }, { "epoch": 1.5302640708542508, "grad_norm": 1.7775930166244507, "learning_rate": 1.964899917602506e-05, "loss": 0.6717, "step": 9374 }, { "epoch": 1.5304273294967552, "grad_norm": 1.643917202949524, "learning_rate": 1.964891583395557e-05, "loss": 0.6035, "step": 9375 }, { "epoch": 1.5305905881392596, "grad_norm": 1.6897881031036377, "learning_rate": 1.964883248216962e-05, "loss": 0.6478, "step": 9376 }, { "epoch": 1.530753846781764, "grad_norm": 1.842919945716858, "learning_rate": 1.9648749120667302e-05, "loss": 0.766, "step": 9377 }, { "epoch": 1.5309171054242685, "grad_norm": 1.752646565437317, "learning_rate": 1.9648665749448695e-05, "loss": 0.6809, "step": 9378 }, { "epoch": 1.531080364066773, "grad_norm": 1.7405864000320435, "learning_rate": 1.9648582368513885e-05, "loss": 0.7546, "step": 9379 }, { "epoch": 1.5312436227092772, "grad_norm": 1.726799488067627, "learning_rate": 1.9648498977862954e-05, "loss": 0.6971, "step": 9380 }, { "epoch": 1.5314068813517816, "grad_norm": 1.6051521301269531, "learning_rate": 1.964841557749599e-05, "loss": 0.6073, "step": 9381 }, { "epoch": 1.5315701399942858, "grad_norm": 2.2582545280456543, "learning_rate": 1.9648332167413067e-05, "loss": 0.6857, "step": 9382 }, { "epoch": 1.5317333986367903, "grad_norm": 2.403881072998047, "learning_rate": 1.9648248747614285e-05, "loss": 0.7608, "step": 9383 }, { "epoch": 1.5318966572792947, "grad_norm": 1.3908957242965698, "learning_rate": 1.9648165318099714e-05, "loss": 0.5469, "step": 9384 }, { "epoch": 1.5320599159217991, "grad_norm": 1.831740140914917, "learning_rate": 1.9648081878869443e-05, "loss": 0.7482, "step": 9385 }, { "epoch": 1.5322231745643036, "grad_norm": 1.8593417406082153, "learning_rate": 1.964799842992356e-05, "loss": 0.6722, "step": 9386 }, { "epoch": 1.532386433206808, "grad_norm": 1.6878613233566284, "learning_rate": 1.9647914971262137e-05, "loss": 0.5754, "step": 9387 }, { "epoch": 1.5325496918493122, "grad_norm": 1.3713314533233643, "learning_rate": 1.9647831502885273e-05, "loss": 0.6178, "step": 9388 }, { "epoch": 1.5327129504918167, "grad_norm": 1.7864274978637695, "learning_rate": 1.9647748024793044e-05, "loss": 0.6046, "step": 9389 }, { "epoch": 1.532876209134321, "grad_norm": 2.0263280868530273, "learning_rate": 1.9647664536985536e-05, "loss": 0.6397, "step": 9390 }, { "epoch": 1.5330394677768253, "grad_norm": 2.020458459854126, "learning_rate": 1.964758103946283e-05, "loss": 0.5877, "step": 9391 }, { "epoch": 1.5332027264193298, "grad_norm": 2.217482328414917, "learning_rate": 1.9647497532225014e-05, "loss": 0.7866, "step": 9392 }, { "epoch": 1.5333659850618342, "grad_norm": 1.3647398948669434, "learning_rate": 1.964741401527217e-05, "loss": 0.594, "step": 9393 }, { "epoch": 1.5335292437043386, "grad_norm": 1.7567780017852783, "learning_rate": 1.9647330488604382e-05, "loss": 0.6781, "step": 9394 }, { "epoch": 1.533692502346843, "grad_norm": 1.8904190063476562, "learning_rate": 1.9647246952221734e-05, "loss": 0.7468, "step": 9395 }, { "epoch": 1.5338557609893475, "grad_norm": 1.7654463052749634, "learning_rate": 1.9647163406124315e-05, "loss": 0.6503, "step": 9396 }, { "epoch": 1.5340190196318517, "grad_norm": 1.9758763313293457, "learning_rate": 1.96470798503122e-05, "loss": 0.8037, "step": 9397 }, { "epoch": 1.5341822782743562, "grad_norm": 1.7731866836547852, "learning_rate": 1.964699628478548e-05, "loss": 0.6418, "step": 9398 }, { "epoch": 1.5343455369168604, "grad_norm": 1.700462818145752, "learning_rate": 1.964691270954424e-05, "loss": 0.6968, "step": 9399 }, { "epoch": 1.5345087955593648, "grad_norm": 1.5746897459030151, "learning_rate": 1.964682912458856e-05, "loss": 0.6294, "step": 9400 }, { "epoch": 1.5346720542018693, "grad_norm": 1.8645474910736084, "learning_rate": 1.9646745529918526e-05, "loss": 0.6567, "step": 9401 }, { "epoch": 1.5348353128443737, "grad_norm": 1.8743840456008911, "learning_rate": 1.964666192553422e-05, "loss": 0.7049, "step": 9402 }, { "epoch": 1.5349985714868781, "grad_norm": 1.6272512674331665, "learning_rate": 1.9646578311435728e-05, "loss": 0.6325, "step": 9403 }, { "epoch": 1.5351618301293826, "grad_norm": 1.9324036836624146, "learning_rate": 1.9646494687623135e-05, "loss": 0.6345, "step": 9404 }, { "epoch": 1.535325088771887, "grad_norm": 1.6970335245132446, "learning_rate": 1.9646411054096524e-05, "loss": 0.7013, "step": 9405 }, { "epoch": 1.5354883474143912, "grad_norm": 1.935442328453064, "learning_rate": 1.964632741085598e-05, "loss": 0.6793, "step": 9406 }, { "epoch": 1.5356516060568957, "grad_norm": 1.86101233959198, "learning_rate": 1.9646243757901587e-05, "loss": 0.623, "step": 9407 }, { "epoch": 1.5358148646993999, "grad_norm": 1.621456503868103, "learning_rate": 1.9646160095233428e-05, "loss": 0.561, "step": 9408 }, { "epoch": 1.5359781233419043, "grad_norm": 2.1596784591674805, "learning_rate": 1.964607642285159e-05, "loss": 0.6999, "step": 9409 }, { "epoch": 1.5361413819844087, "grad_norm": 1.689790964126587, "learning_rate": 1.9645992740756153e-05, "loss": 0.6402, "step": 9410 }, { "epoch": 1.5363046406269132, "grad_norm": 1.6741058826446533, "learning_rate": 1.9645909048947207e-05, "loss": 0.6824, "step": 9411 }, { "epoch": 1.5364678992694176, "grad_norm": 1.7208133935928345, "learning_rate": 1.9645825347424833e-05, "loss": 0.6777, "step": 9412 }, { "epoch": 1.536631157911922, "grad_norm": 1.6265747547149658, "learning_rate": 1.9645741636189112e-05, "loss": 0.704, "step": 9413 }, { "epoch": 1.5367944165544265, "grad_norm": 1.759395956993103, "learning_rate": 1.9645657915240136e-05, "loss": 0.4509, "step": 9414 }, { "epoch": 1.5369576751969307, "grad_norm": 1.398733139038086, "learning_rate": 1.9645574184577982e-05, "loss": 0.6261, "step": 9415 }, { "epoch": 1.5371209338394352, "grad_norm": 2.0852603912353516, "learning_rate": 1.964549044420274e-05, "loss": 0.8101, "step": 9416 }, { "epoch": 1.5372841924819394, "grad_norm": 1.7341099977493286, "learning_rate": 1.964540669411449e-05, "loss": 0.5394, "step": 9417 }, { "epoch": 1.5374474511244438, "grad_norm": 1.7706143856048584, "learning_rate": 1.964532293431332e-05, "loss": 0.7079, "step": 9418 }, { "epoch": 1.5376107097669482, "grad_norm": 2.027695894241333, "learning_rate": 1.964523916479931e-05, "loss": 0.8463, "step": 9419 }, { "epoch": 1.5377739684094527, "grad_norm": 1.492868185043335, "learning_rate": 1.9645155385572545e-05, "loss": 0.6326, "step": 9420 }, { "epoch": 1.5379372270519571, "grad_norm": 1.6397600173950195, "learning_rate": 1.9645071596633115e-05, "loss": 0.6157, "step": 9421 }, { "epoch": 1.5381004856944616, "grad_norm": 1.9635564088821411, "learning_rate": 1.9644987797981097e-05, "loss": 0.7325, "step": 9422 }, { "epoch": 1.538263744336966, "grad_norm": 2.169646739959717, "learning_rate": 1.9644903989616582e-05, "loss": 0.8427, "step": 9423 }, { "epoch": 1.5384270029794702, "grad_norm": 1.6734992265701294, "learning_rate": 1.964482017153965e-05, "loss": 0.6713, "step": 9424 }, { "epoch": 1.5385902616219747, "grad_norm": 1.5758618116378784, "learning_rate": 1.9644736343750385e-05, "loss": 0.6104, "step": 9425 }, { "epoch": 1.5387535202644789, "grad_norm": 1.783199429512024, "learning_rate": 1.9644652506248872e-05, "loss": 0.6291, "step": 9426 }, { "epoch": 1.5389167789069833, "grad_norm": 1.4725489616394043, "learning_rate": 1.96445686590352e-05, "loss": 0.6347, "step": 9427 }, { "epoch": 1.5390800375494877, "grad_norm": 2.1908011436462402, "learning_rate": 1.964448480210945e-05, "loss": 0.6561, "step": 9428 }, { "epoch": 1.5392432961919922, "grad_norm": 1.6816900968551636, "learning_rate": 1.9644400935471706e-05, "loss": 0.5446, "step": 9429 }, { "epoch": 1.5394065548344966, "grad_norm": 2.009796142578125, "learning_rate": 1.964431705912205e-05, "loss": 0.6878, "step": 9430 }, { "epoch": 1.539569813477001, "grad_norm": 1.8893064260482788, "learning_rate": 1.9644233173060575e-05, "loss": 0.716, "step": 9431 }, { "epoch": 1.5397330721195053, "grad_norm": 1.784517765045166, "learning_rate": 1.9644149277287353e-05, "loss": 0.6896, "step": 9432 }, { "epoch": 1.5398963307620097, "grad_norm": 1.4236937761306763, "learning_rate": 1.9644065371802478e-05, "loss": 0.5781, "step": 9433 }, { "epoch": 1.5400595894045142, "grad_norm": 1.6893795728683472, "learning_rate": 1.9643981456606034e-05, "loss": 0.6674, "step": 9434 }, { "epoch": 1.5402228480470184, "grad_norm": 1.608532190322876, "learning_rate": 1.96438975316981e-05, "loss": 0.5973, "step": 9435 }, { "epoch": 1.5403861066895228, "grad_norm": 1.5652847290039062, "learning_rate": 1.9643813597078768e-05, "loss": 0.6186, "step": 9436 }, { "epoch": 1.5405493653320272, "grad_norm": 1.8109371662139893, "learning_rate": 1.9643729652748115e-05, "loss": 0.6145, "step": 9437 }, { "epoch": 1.5407126239745317, "grad_norm": 1.8994207382202148, "learning_rate": 1.964364569870623e-05, "loss": 0.8121, "step": 9438 }, { "epoch": 1.5408758826170361, "grad_norm": 2.2794837951660156, "learning_rate": 1.9643561734953195e-05, "loss": 0.7427, "step": 9439 }, { "epoch": 1.5410391412595406, "grad_norm": 1.7784830331802368, "learning_rate": 1.9643477761489097e-05, "loss": 0.747, "step": 9440 }, { "epoch": 1.5412023999020448, "grad_norm": 1.7229548692703247, "learning_rate": 1.9643393778314018e-05, "loss": 0.698, "step": 9441 }, { "epoch": 1.5413656585445492, "grad_norm": 1.675310730934143, "learning_rate": 1.9643309785428045e-05, "loss": 0.6494, "step": 9442 }, { "epoch": 1.5415289171870534, "grad_norm": 1.901889681816101, "learning_rate": 1.9643225782831262e-05, "loss": 0.6561, "step": 9443 }, { "epoch": 1.5416921758295579, "grad_norm": 1.9107081890106201, "learning_rate": 1.964314177052375e-05, "loss": 0.6224, "step": 9444 }, { "epoch": 1.5418554344720623, "grad_norm": 1.5033913850784302, "learning_rate": 1.96430577485056e-05, "loss": 0.5995, "step": 9445 }, { "epoch": 1.5420186931145667, "grad_norm": 1.827280044555664, "learning_rate": 1.9642973716776892e-05, "loss": 0.6724, "step": 9446 }, { "epoch": 1.5421819517570712, "grad_norm": 1.7390449047088623, "learning_rate": 1.9642889675337717e-05, "loss": 0.6183, "step": 9447 }, { "epoch": 1.5423452103995756, "grad_norm": 1.7406712770462036, "learning_rate": 1.964280562418815e-05, "loss": 0.6435, "step": 9448 }, { "epoch": 1.54250846904208, "grad_norm": 1.9621742963790894, "learning_rate": 1.964272156332828e-05, "loss": 0.7979, "step": 9449 }, { "epoch": 1.5426717276845843, "grad_norm": 2.3159193992614746, "learning_rate": 1.9642637492758193e-05, "loss": 0.8412, "step": 9450 }, { "epoch": 1.5428349863270887, "grad_norm": 1.9173122644424438, "learning_rate": 1.9642553412477973e-05, "loss": 0.6118, "step": 9451 }, { "epoch": 1.542998244969593, "grad_norm": 1.4431688785552979, "learning_rate": 1.9642469322487702e-05, "loss": 0.5739, "step": 9452 }, { "epoch": 1.5431615036120974, "grad_norm": 1.9666340351104736, "learning_rate": 1.964238522278747e-05, "loss": 0.6618, "step": 9453 }, { "epoch": 1.5433247622546018, "grad_norm": 1.8873834609985352, "learning_rate": 1.9642301113377355e-05, "loss": 0.8051, "step": 9454 }, { "epoch": 1.5434880208971062, "grad_norm": 1.6692343950271606, "learning_rate": 1.9642216994257448e-05, "loss": 0.6213, "step": 9455 }, { "epoch": 1.5436512795396107, "grad_norm": 1.4786773920059204, "learning_rate": 1.964213286542783e-05, "loss": 0.6224, "step": 9456 }, { "epoch": 1.5438145381821151, "grad_norm": 1.3987064361572266, "learning_rate": 1.964204872688859e-05, "loss": 0.4941, "step": 9457 }, { "epoch": 1.5439777968246196, "grad_norm": 1.7440259456634521, "learning_rate": 1.9641964578639805e-05, "loss": 0.7177, "step": 9458 }, { "epoch": 1.5441410554671238, "grad_norm": 1.8765745162963867, "learning_rate": 1.9641880420681567e-05, "loss": 0.7719, "step": 9459 }, { "epoch": 1.5443043141096282, "grad_norm": 1.6687086820602417, "learning_rate": 1.9641796253013957e-05, "loss": 0.695, "step": 9460 }, { "epoch": 1.5444675727521324, "grad_norm": 2.0355796813964844, "learning_rate": 1.9641712075637062e-05, "loss": 0.6932, "step": 9461 }, { "epoch": 1.5446308313946369, "grad_norm": 1.624402403831482, "learning_rate": 1.9641627888550964e-05, "loss": 0.7277, "step": 9462 }, { "epoch": 1.5447940900371413, "grad_norm": 1.74776291847229, "learning_rate": 1.9641543691755747e-05, "loss": 0.672, "step": 9463 }, { "epoch": 1.5449573486796457, "grad_norm": 1.9137241840362549, "learning_rate": 1.9641459485251504e-05, "loss": 0.6882, "step": 9464 }, { "epoch": 1.5451206073221502, "grad_norm": 1.885491132736206, "learning_rate": 1.964137526903831e-05, "loss": 0.8219, "step": 9465 }, { "epoch": 1.5452838659646546, "grad_norm": 1.469007134437561, "learning_rate": 1.9641291043116254e-05, "loss": 0.6622, "step": 9466 }, { "epoch": 1.545447124607159, "grad_norm": 1.6938213109970093, "learning_rate": 1.964120680748542e-05, "loss": 0.6371, "step": 9467 }, { "epoch": 1.5456103832496633, "grad_norm": 1.6654342412948608, "learning_rate": 1.9641122562145895e-05, "loss": 0.6855, "step": 9468 }, { "epoch": 1.5457736418921677, "grad_norm": 2.3407859802246094, "learning_rate": 1.964103830709776e-05, "loss": 0.8867, "step": 9469 }, { "epoch": 1.545936900534672, "grad_norm": 1.5600014925003052, "learning_rate": 1.96409540423411e-05, "loss": 0.6878, "step": 9470 }, { "epoch": 1.5461001591771764, "grad_norm": 1.5103298425674438, "learning_rate": 1.9640869767876006e-05, "loss": 0.4963, "step": 9471 }, { "epoch": 1.5462634178196808, "grad_norm": 1.6041033267974854, "learning_rate": 1.9640785483702558e-05, "loss": 0.6611, "step": 9472 }, { "epoch": 1.5464266764621852, "grad_norm": 1.6403136253356934, "learning_rate": 1.964070118982084e-05, "loss": 0.6864, "step": 9473 }, { "epoch": 1.5465899351046897, "grad_norm": 1.4710911512374878, "learning_rate": 1.9640616886230942e-05, "loss": 0.5796, "step": 9474 }, { "epoch": 1.546753193747194, "grad_norm": 1.810124397277832, "learning_rate": 1.9640532572932944e-05, "loss": 0.7551, "step": 9475 }, { "epoch": 1.5469164523896983, "grad_norm": 1.5248197317123413, "learning_rate": 1.9640448249926928e-05, "loss": 0.5875, "step": 9476 }, { "epoch": 1.5470797110322028, "grad_norm": 1.9090111255645752, "learning_rate": 1.964036391721299e-05, "loss": 0.7733, "step": 9477 }, { "epoch": 1.5472429696747072, "grad_norm": 1.6833593845367432, "learning_rate": 1.9640279574791203e-05, "loss": 0.7213, "step": 9478 }, { "epoch": 1.5474062283172114, "grad_norm": 1.626120686531067, "learning_rate": 1.964019522266166e-05, "loss": 0.5786, "step": 9479 }, { "epoch": 1.5475694869597159, "grad_norm": 1.9080774784088135, "learning_rate": 1.9640110860824442e-05, "loss": 0.8009, "step": 9480 }, { "epoch": 1.5477327456022203, "grad_norm": 1.9155495166778564, "learning_rate": 1.9640026489279633e-05, "loss": 0.7603, "step": 9481 }, { "epoch": 1.5478960042447247, "grad_norm": 1.9354203939437866, "learning_rate": 1.9639942108027322e-05, "loss": 0.8244, "step": 9482 }, { "epoch": 1.5480592628872292, "grad_norm": 1.695570707321167, "learning_rate": 1.963985771706759e-05, "loss": 0.7406, "step": 9483 }, { "epoch": 1.5482225215297336, "grad_norm": 1.7488254308700562, "learning_rate": 1.9639773316400525e-05, "loss": 0.6846, "step": 9484 }, { "epoch": 1.5483857801722378, "grad_norm": 1.9334919452667236, "learning_rate": 1.9639688906026212e-05, "loss": 0.8951, "step": 9485 }, { "epoch": 1.5485490388147423, "grad_norm": 2.0990407466888428, "learning_rate": 1.9639604485944735e-05, "loss": 0.6024, "step": 9486 }, { "epoch": 1.5487122974572465, "grad_norm": 1.7125242948532104, "learning_rate": 1.9639520056156176e-05, "loss": 0.6593, "step": 9487 }, { "epoch": 1.548875556099751, "grad_norm": 1.9527026414871216, "learning_rate": 1.9639435616660622e-05, "loss": 0.7042, "step": 9488 }, { "epoch": 1.5490388147422554, "grad_norm": 1.6159279346466064, "learning_rate": 1.9639351167458163e-05, "loss": 0.632, "step": 9489 }, { "epoch": 1.5492020733847598, "grad_norm": 1.655697226524353, "learning_rate": 1.9639266708548878e-05, "loss": 0.6587, "step": 9490 }, { "epoch": 1.5493653320272642, "grad_norm": 1.8595929145812988, "learning_rate": 1.9639182239932854e-05, "loss": 0.6823, "step": 9491 }, { "epoch": 1.5495285906697687, "grad_norm": 1.7073559761047363, "learning_rate": 1.9639097761610174e-05, "loss": 0.6504, "step": 9492 }, { "epoch": 1.549691849312273, "grad_norm": 1.7612725496292114, "learning_rate": 1.963901327358093e-05, "loss": 0.5967, "step": 9493 }, { "epoch": 1.5498551079547773, "grad_norm": 1.9051953554153442, "learning_rate": 1.9638928775845197e-05, "loss": 0.7358, "step": 9494 }, { "epoch": 1.5500183665972818, "grad_norm": 1.7650326490402222, "learning_rate": 1.963884426840307e-05, "loss": 0.6547, "step": 9495 }, { "epoch": 1.550181625239786, "grad_norm": 1.5050806999206543, "learning_rate": 1.963875975125463e-05, "loss": 0.6222, "step": 9496 }, { "epoch": 1.5503448838822904, "grad_norm": 1.9541840553283691, "learning_rate": 1.9638675224399957e-05, "loss": 0.7282, "step": 9497 }, { "epoch": 1.5505081425247949, "grad_norm": 1.9103825092315674, "learning_rate": 1.963859068783914e-05, "loss": 0.7999, "step": 9498 }, { "epoch": 1.5506714011672993, "grad_norm": 1.9060308933258057, "learning_rate": 1.963850614157227e-05, "loss": 0.6585, "step": 9499 }, { "epoch": 1.5508346598098037, "grad_norm": 1.8326544761657715, "learning_rate": 1.9638421585599422e-05, "loss": 0.74, "step": 9500 }, { "epoch": 1.5509979184523082, "grad_norm": 2.0242741107940674, "learning_rate": 1.963833701992069e-05, "loss": 0.7776, "step": 9501 }, { "epoch": 1.5511611770948126, "grad_norm": 1.8957571983337402, "learning_rate": 1.9638252444536152e-05, "loss": 0.7789, "step": 9502 }, { "epoch": 1.5513244357373168, "grad_norm": 1.5746911764144897, "learning_rate": 1.9638167859445894e-05, "loss": 0.6695, "step": 9503 }, { "epoch": 1.5514876943798213, "grad_norm": 1.419299840927124, "learning_rate": 1.963808326465001e-05, "loss": 0.6859, "step": 9504 }, { "epoch": 1.5516509530223255, "grad_norm": 1.813405156135559, "learning_rate": 1.9637998660148577e-05, "loss": 0.6895, "step": 9505 }, { "epoch": 1.55181421166483, "grad_norm": 1.8447346687316895, "learning_rate": 1.9637914045941677e-05, "loss": 0.7008, "step": 9506 }, { "epoch": 1.5519774703073344, "grad_norm": 1.4773714542388916, "learning_rate": 1.9637829422029405e-05, "loss": 0.6266, "step": 9507 }, { "epoch": 1.5521407289498388, "grad_norm": 2.018190622329712, "learning_rate": 1.963774478841184e-05, "loss": 0.7602, "step": 9508 }, { "epoch": 1.5523039875923432, "grad_norm": 1.513196587562561, "learning_rate": 1.9637660145089067e-05, "loss": 0.6334, "step": 9509 }, { "epoch": 1.5524672462348477, "grad_norm": 2.016235828399658, "learning_rate": 1.9637575492061176e-05, "loss": 0.7618, "step": 9510 }, { "epoch": 1.552630504877352, "grad_norm": 1.7155921459197998, "learning_rate": 1.9637490829328247e-05, "loss": 0.7685, "step": 9511 }, { "epoch": 1.5527937635198563, "grad_norm": 1.4299681186676025, "learning_rate": 1.963740615689037e-05, "loss": 0.5304, "step": 9512 }, { "epoch": 1.5529570221623608, "grad_norm": 1.7057603597640991, "learning_rate": 1.9637321474747625e-05, "loss": 0.6019, "step": 9513 }, { "epoch": 1.553120280804865, "grad_norm": 1.5450531244277954, "learning_rate": 1.96372367829001e-05, "loss": 0.5715, "step": 9514 }, { "epoch": 1.5532835394473694, "grad_norm": 1.659255027770996, "learning_rate": 1.963715208134788e-05, "loss": 0.7389, "step": 9515 }, { "epoch": 1.5534467980898738, "grad_norm": 1.8471099138259888, "learning_rate": 1.9637067370091048e-05, "loss": 0.7451, "step": 9516 }, { "epoch": 1.5536100567323783, "grad_norm": 1.7966442108154297, "learning_rate": 1.9636982649129695e-05, "loss": 0.7506, "step": 9517 }, { "epoch": 1.5537733153748827, "grad_norm": 1.3493247032165527, "learning_rate": 1.9636897918463903e-05, "loss": 0.5638, "step": 9518 }, { "epoch": 1.5539365740173872, "grad_norm": 1.52828049659729, "learning_rate": 1.9636813178093756e-05, "loss": 0.6087, "step": 9519 }, { "epoch": 1.5540998326598914, "grad_norm": 1.4387402534484863, "learning_rate": 1.963672842801934e-05, "loss": 0.555, "step": 9520 }, { "epoch": 1.5542630913023958, "grad_norm": 1.6602486371994019, "learning_rate": 1.9636643668240743e-05, "loss": 0.6016, "step": 9521 }, { "epoch": 1.5544263499449003, "grad_norm": 1.9072169065475464, "learning_rate": 1.963655889875805e-05, "loss": 0.7039, "step": 9522 }, { "epoch": 1.5545896085874045, "grad_norm": 1.6349225044250488, "learning_rate": 1.963647411957134e-05, "loss": 0.5079, "step": 9523 }, { "epoch": 1.554752867229909, "grad_norm": 1.445678949356079, "learning_rate": 1.9636389330680708e-05, "loss": 0.6049, "step": 9524 }, { "epoch": 1.5549161258724133, "grad_norm": 1.4620128870010376, "learning_rate": 1.963630453208623e-05, "loss": 0.5696, "step": 9525 }, { "epoch": 1.5550793845149178, "grad_norm": 1.8855654001235962, "learning_rate": 1.9636219723788e-05, "loss": 0.689, "step": 9526 }, { "epoch": 1.5552426431574222, "grad_norm": 2.1212708950042725, "learning_rate": 1.9636134905786096e-05, "loss": 0.8069, "step": 9527 }, { "epoch": 1.5554059017999267, "grad_norm": 2.0869486331939697, "learning_rate": 1.9636050078080608e-05, "loss": 0.8693, "step": 9528 }, { "epoch": 1.5555691604424309, "grad_norm": 2.024595260620117, "learning_rate": 1.9635965240671622e-05, "loss": 0.7426, "step": 9529 }, { "epoch": 1.5557324190849353, "grad_norm": 1.9686275720596313, "learning_rate": 1.963588039355922e-05, "loss": 0.7862, "step": 9530 }, { "epoch": 1.5558956777274395, "grad_norm": 1.6909815073013306, "learning_rate": 1.9635795536743487e-05, "loss": 0.7284, "step": 9531 }, { "epoch": 1.556058936369944, "grad_norm": 2.011925458908081, "learning_rate": 1.9635710670224513e-05, "loss": 0.8127, "step": 9532 }, { "epoch": 1.5562221950124484, "grad_norm": 1.8116381168365479, "learning_rate": 1.963562579400238e-05, "loss": 0.7137, "step": 9533 }, { "epoch": 1.5563854536549528, "grad_norm": 1.5640140771865845, "learning_rate": 1.9635540908077173e-05, "loss": 0.5296, "step": 9534 }, { "epoch": 1.5565487122974573, "grad_norm": 1.7783217430114746, "learning_rate": 1.963545601244898e-05, "loss": 0.6481, "step": 9535 }, { "epoch": 1.5567119709399617, "grad_norm": 1.8671517372131348, "learning_rate": 1.963537110711789e-05, "loss": 0.7734, "step": 9536 }, { "epoch": 1.5568752295824662, "grad_norm": 1.9079513549804688, "learning_rate": 1.963528619208398e-05, "loss": 0.7461, "step": 9537 }, { "epoch": 1.5570384882249704, "grad_norm": 1.9567031860351562, "learning_rate": 1.9635201267347336e-05, "loss": 0.7161, "step": 9538 }, { "epoch": 1.5572017468674748, "grad_norm": 1.6965001821517944, "learning_rate": 1.963511633290805e-05, "loss": 0.7697, "step": 9539 }, { "epoch": 1.557365005509979, "grad_norm": 1.786491870880127, "learning_rate": 1.9635031388766204e-05, "loss": 0.6188, "step": 9540 }, { "epoch": 1.5575282641524835, "grad_norm": 1.981308937072754, "learning_rate": 1.9634946434921884e-05, "loss": 0.9482, "step": 9541 }, { "epoch": 1.557691522794988, "grad_norm": 2.067786455154419, "learning_rate": 1.9634861471375174e-05, "loss": 0.8016, "step": 9542 }, { "epoch": 1.5578547814374923, "grad_norm": 1.5726114511489868, "learning_rate": 1.9634776498126166e-05, "loss": 0.6652, "step": 9543 }, { "epoch": 1.5580180400799968, "grad_norm": 1.5051180124282837, "learning_rate": 1.9634691515174934e-05, "loss": 0.5873, "step": 9544 }, { "epoch": 1.5581812987225012, "grad_norm": 1.5987205505371094, "learning_rate": 1.9634606522521574e-05, "loss": 0.5854, "step": 9545 }, { "epoch": 1.5583445573650057, "grad_norm": 1.6799548864364624, "learning_rate": 1.963452152016617e-05, "loss": 0.6978, "step": 9546 }, { "epoch": 1.5585078160075099, "grad_norm": 1.5968040227890015, "learning_rate": 1.96344365081088e-05, "loss": 0.6686, "step": 9547 }, { "epoch": 1.5586710746500143, "grad_norm": 2.0574891567230225, "learning_rate": 1.9634351486349556e-05, "loss": 0.6583, "step": 9548 }, { "epoch": 1.5588343332925185, "grad_norm": 1.7352831363677979, "learning_rate": 1.9634266454888527e-05, "loss": 0.6494, "step": 9549 }, { "epoch": 1.558997591935023, "grad_norm": 1.7633482217788696, "learning_rate": 1.963418141372579e-05, "loss": 0.714, "step": 9550 }, { "epoch": 1.5591608505775274, "grad_norm": 1.8283002376556396, "learning_rate": 1.963409636286144e-05, "loss": 0.7036, "step": 9551 }, { "epoch": 1.5593241092200318, "grad_norm": 1.66374933719635, "learning_rate": 1.963401130229555e-05, "loss": 0.6001, "step": 9552 }, { "epoch": 1.5594873678625363, "grad_norm": 1.5216139554977417, "learning_rate": 1.9633926232028216e-05, "loss": 0.5998, "step": 9553 }, { "epoch": 1.5596506265050407, "grad_norm": 1.7688652276992798, "learning_rate": 1.9633841152059525e-05, "loss": 0.6438, "step": 9554 }, { "epoch": 1.5598138851475452, "grad_norm": 1.620160698890686, "learning_rate": 1.963375606238955e-05, "loss": 0.6136, "step": 9555 }, { "epoch": 1.5599771437900494, "grad_norm": 1.5937694311141968, "learning_rate": 1.9633670963018395e-05, "loss": 0.6295, "step": 9556 }, { "epoch": 1.5601404024325538, "grad_norm": 1.5217456817626953, "learning_rate": 1.9633585853946132e-05, "loss": 0.6039, "step": 9557 }, { "epoch": 1.560303661075058, "grad_norm": 1.7440733909606934, "learning_rate": 1.963350073517285e-05, "loss": 0.5994, "step": 9558 }, { "epoch": 1.5604669197175625, "grad_norm": 1.9913594722747803, "learning_rate": 1.9633415606698633e-05, "loss": 0.8227, "step": 9559 }, { "epoch": 1.560630178360067, "grad_norm": 1.7856299877166748, "learning_rate": 1.9633330468523572e-05, "loss": 0.7041, "step": 9560 }, { "epoch": 1.5607934370025713, "grad_norm": 1.4807101488113403, "learning_rate": 1.963324532064775e-05, "loss": 0.5649, "step": 9561 }, { "epoch": 1.5609566956450758, "grad_norm": 1.810655951499939, "learning_rate": 1.9633160163071255e-05, "loss": 0.6781, "step": 9562 }, { "epoch": 1.5611199542875802, "grad_norm": 1.9491256475448608, "learning_rate": 1.9633074995794165e-05, "loss": 0.7119, "step": 9563 }, { "epoch": 1.5612832129300847, "grad_norm": 1.8171676397323608, "learning_rate": 1.9632989818816572e-05, "loss": 0.6558, "step": 9564 }, { "epoch": 1.5614464715725889, "grad_norm": 1.7695295810699463, "learning_rate": 1.963290463213856e-05, "loss": 0.8233, "step": 9565 }, { "epoch": 1.5616097302150933, "grad_norm": 1.6034908294677734, "learning_rate": 1.963281943576022e-05, "loss": 0.6301, "step": 9566 }, { "epoch": 1.5617729888575975, "grad_norm": 1.45742666721344, "learning_rate": 1.963273422968163e-05, "loss": 0.645, "step": 9567 }, { "epoch": 1.561936247500102, "grad_norm": 1.860463261604309, "learning_rate": 1.963264901390288e-05, "loss": 0.6393, "step": 9568 }, { "epoch": 1.5620995061426064, "grad_norm": 1.9907405376434326, "learning_rate": 1.9632563788424055e-05, "loss": 0.837, "step": 9569 }, { "epoch": 1.5622627647851108, "grad_norm": 1.9828431606292725, "learning_rate": 1.9632478553245243e-05, "loss": 0.7032, "step": 9570 }, { "epoch": 1.5624260234276153, "grad_norm": 1.9811490774154663, "learning_rate": 1.9632393308366525e-05, "loss": 0.7237, "step": 9571 }, { "epoch": 1.5625892820701197, "grad_norm": 1.7508249282836914, "learning_rate": 1.963230805378799e-05, "loss": 0.6036, "step": 9572 }, { "epoch": 1.562752540712624, "grad_norm": 1.7123987674713135, "learning_rate": 1.9632222789509722e-05, "loss": 0.6487, "step": 9573 }, { "epoch": 1.5629157993551284, "grad_norm": 1.7815738916397095, "learning_rate": 1.963213751553181e-05, "loss": 0.665, "step": 9574 }, { "epoch": 1.5630790579976328, "grad_norm": 1.7878392934799194, "learning_rate": 1.9632052231854337e-05, "loss": 0.6659, "step": 9575 }, { "epoch": 1.563242316640137, "grad_norm": 2.019481897354126, "learning_rate": 1.9631966938477392e-05, "loss": 0.803, "step": 9576 }, { "epoch": 1.5634055752826415, "grad_norm": 1.6701409816741943, "learning_rate": 1.9631881635401056e-05, "loss": 0.629, "step": 9577 }, { "epoch": 1.563568833925146, "grad_norm": 1.3975422382354736, "learning_rate": 1.963179632262542e-05, "loss": 0.5311, "step": 9578 }, { "epoch": 1.5637320925676503, "grad_norm": 1.4472918510437012, "learning_rate": 1.963171100015057e-05, "loss": 0.4911, "step": 9579 }, { "epoch": 1.5638953512101548, "grad_norm": 1.8500815629959106, "learning_rate": 1.9631625667976584e-05, "loss": 0.638, "step": 9580 }, { "epoch": 1.5640586098526592, "grad_norm": 1.8440124988555908, "learning_rate": 1.9631540326103554e-05, "loss": 0.7963, "step": 9581 }, { "epoch": 1.5642218684951634, "grad_norm": 1.9842220544815063, "learning_rate": 1.963145497453157e-05, "loss": 0.727, "step": 9582 }, { "epoch": 1.5643851271376679, "grad_norm": 1.47636079788208, "learning_rate": 1.963136961326071e-05, "loss": 0.5364, "step": 9583 }, { "epoch": 1.564548385780172, "grad_norm": 1.92975652217865, "learning_rate": 1.9631284242291063e-05, "loss": 0.6864, "step": 9584 }, { "epoch": 1.5647116444226765, "grad_norm": 1.669007658958435, "learning_rate": 1.9631198861622714e-05, "loss": 0.7495, "step": 9585 }, { "epoch": 1.564874903065181, "grad_norm": 1.7122869491577148, "learning_rate": 1.9631113471255757e-05, "loss": 0.7092, "step": 9586 }, { "epoch": 1.5650381617076854, "grad_norm": 1.8365459442138672, "learning_rate": 1.9631028071190265e-05, "loss": 0.6647, "step": 9587 }, { "epoch": 1.5652014203501898, "grad_norm": 1.5710625648498535, "learning_rate": 1.9630942661426335e-05, "loss": 0.676, "step": 9588 }, { "epoch": 1.5653646789926943, "grad_norm": 1.5955301523208618, "learning_rate": 1.9630857241964043e-05, "loss": 0.6991, "step": 9589 }, { "epoch": 1.5655279376351987, "grad_norm": 1.9265618324279785, "learning_rate": 1.9630771812803484e-05, "loss": 0.8455, "step": 9590 }, { "epoch": 1.565691196277703, "grad_norm": 1.7142196893692017, "learning_rate": 1.9630686373944738e-05, "loss": 0.6454, "step": 9591 }, { "epoch": 1.5658544549202074, "grad_norm": 1.727154016494751, "learning_rate": 1.9630600925387894e-05, "loss": 0.7186, "step": 9592 }, { "epoch": 1.5660177135627116, "grad_norm": 1.8517849445343018, "learning_rate": 1.9630515467133038e-05, "loss": 0.6316, "step": 9593 }, { "epoch": 1.566180972205216, "grad_norm": 1.6774290800094604, "learning_rate": 1.9630429999180255e-05, "loss": 0.6176, "step": 9594 }, { "epoch": 1.5663442308477205, "grad_norm": 1.7105761766433716, "learning_rate": 1.9630344521529635e-05, "loss": 0.5854, "step": 9595 }, { "epoch": 1.566507489490225, "grad_norm": 1.564675211906433, "learning_rate": 1.9630259034181258e-05, "loss": 0.5798, "step": 9596 }, { "epoch": 1.5666707481327293, "grad_norm": 1.781473994255066, "learning_rate": 1.963017353713521e-05, "loss": 0.6623, "step": 9597 }, { "epoch": 1.5668340067752338, "grad_norm": 1.386099100112915, "learning_rate": 1.9630088030391584e-05, "loss": 0.453, "step": 9598 }, { "epoch": 1.5669972654177382, "grad_norm": 1.7640938758850098, "learning_rate": 1.963000251395046e-05, "loss": 0.6131, "step": 9599 }, { "epoch": 1.5671605240602424, "grad_norm": 1.4326421022415161, "learning_rate": 1.9629916987811924e-05, "loss": 0.6078, "step": 9600 }, { "epoch": 1.5673237827027469, "grad_norm": 1.5772504806518555, "learning_rate": 1.962983145197607e-05, "loss": 0.6182, "step": 9601 }, { "epoch": 1.567487041345251, "grad_norm": 1.5269213914871216, "learning_rate": 1.9629745906442973e-05, "loss": 0.6665, "step": 9602 }, { "epoch": 1.5676502999877555, "grad_norm": 1.963954210281372, "learning_rate": 1.9629660351212725e-05, "loss": 0.6247, "step": 9603 }, { "epoch": 1.56781355863026, "grad_norm": 1.3504971265792847, "learning_rate": 1.9629574786285413e-05, "loss": 0.5662, "step": 9604 }, { "epoch": 1.5679768172727644, "grad_norm": 1.725431203842163, "learning_rate": 1.9629489211661122e-05, "loss": 0.6482, "step": 9605 }, { "epoch": 1.5681400759152688, "grad_norm": 1.8823821544647217, "learning_rate": 1.9629403627339937e-05, "loss": 0.5387, "step": 9606 }, { "epoch": 1.5683033345577733, "grad_norm": 1.6015703678131104, "learning_rate": 1.9629318033321945e-05, "loss": 0.5905, "step": 9607 }, { "epoch": 1.5684665932002777, "grad_norm": 1.7509411573410034, "learning_rate": 1.9629232429607233e-05, "loss": 0.6801, "step": 9608 }, { "epoch": 1.568629851842782, "grad_norm": 1.9154456853866577, "learning_rate": 1.9629146816195887e-05, "loss": 0.6379, "step": 9609 }, { "epoch": 1.5687931104852864, "grad_norm": 1.942124366760254, "learning_rate": 1.962906119308799e-05, "loss": 0.8034, "step": 9610 }, { "epoch": 1.5689563691277906, "grad_norm": 1.6453465223312378, "learning_rate": 1.9628975560283634e-05, "loss": 0.5165, "step": 9611 }, { "epoch": 1.569119627770295, "grad_norm": 1.6520541906356812, "learning_rate": 1.96288899177829e-05, "loss": 0.6191, "step": 9612 }, { "epoch": 1.5692828864127994, "grad_norm": 2.0133602619171143, "learning_rate": 1.9628804265585878e-05, "loss": 0.6422, "step": 9613 }, { "epoch": 1.5694461450553039, "grad_norm": 1.7317301034927368, "learning_rate": 1.962871860369265e-05, "loss": 0.598, "step": 9614 }, { "epoch": 1.5696094036978083, "grad_norm": 1.7891112565994263, "learning_rate": 1.962863293210331e-05, "loss": 0.6364, "step": 9615 }, { "epoch": 1.5697726623403128, "grad_norm": 2.103178024291992, "learning_rate": 1.9628547250817937e-05, "loss": 0.7649, "step": 9616 }, { "epoch": 1.569935920982817, "grad_norm": 1.7231590747833252, "learning_rate": 1.9628461559836615e-05, "loss": 0.6916, "step": 9617 }, { "epoch": 1.5700991796253214, "grad_norm": 1.6548823118209839, "learning_rate": 1.962837585915944e-05, "loss": 0.6397, "step": 9618 }, { "epoch": 1.5702624382678259, "grad_norm": 1.6805055141448975, "learning_rate": 1.962829014878649e-05, "loss": 0.6595, "step": 9619 }, { "epoch": 1.57042569691033, "grad_norm": 1.789267659187317, "learning_rate": 1.9628204428717856e-05, "loss": 0.6939, "step": 9620 }, { "epoch": 1.5705889555528345, "grad_norm": 1.7846240997314453, "learning_rate": 1.9628118698953623e-05, "loss": 0.7044, "step": 9621 }, { "epoch": 1.570752214195339, "grad_norm": 2.000779628753662, "learning_rate": 1.9628032959493878e-05, "loss": 0.7943, "step": 9622 }, { "epoch": 1.5709154728378434, "grad_norm": 2.066596746444702, "learning_rate": 1.9627947210338702e-05, "loss": 0.6147, "step": 9623 }, { "epoch": 1.5710787314803478, "grad_norm": 1.521427869796753, "learning_rate": 1.962786145148819e-05, "loss": 0.5556, "step": 9624 }, { "epoch": 1.5712419901228523, "grad_norm": 1.427954912185669, "learning_rate": 1.962777568294242e-05, "loss": 0.5564, "step": 9625 }, { "epoch": 1.5714052487653565, "grad_norm": 1.943752408027649, "learning_rate": 1.9627689904701486e-05, "loss": 0.8123, "step": 9626 }, { "epoch": 1.571568507407861, "grad_norm": 1.705159306526184, "learning_rate": 1.962760411676547e-05, "loss": 0.6693, "step": 9627 }, { "epoch": 1.5717317660503651, "grad_norm": 1.8894529342651367, "learning_rate": 1.9627518319134463e-05, "loss": 0.8278, "step": 9628 }, { "epoch": 1.5718950246928696, "grad_norm": 1.8755106925964355, "learning_rate": 1.962743251180854e-05, "loss": 0.696, "step": 9629 }, { "epoch": 1.572058283335374, "grad_norm": 2.0930750370025635, "learning_rate": 1.9627346694787798e-05, "loss": 0.7219, "step": 9630 }, { "epoch": 1.5722215419778784, "grad_norm": 1.7161297798156738, "learning_rate": 1.9627260868072322e-05, "loss": 0.7818, "step": 9631 }, { "epoch": 1.5723848006203829, "grad_norm": 1.7828248739242554, "learning_rate": 1.96271750316622e-05, "loss": 0.6071, "step": 9632 }, { "epoch": 1.5725480592628873, "grad_norm": 1.4134396314620972, "learning_rate": 1.962708918555751e-05, "loss": 0.5648, "step": 9633 }, { "epoch": 1.5727113179053918, "grad_norm": 2.270962715148926, "learning_rate": 1.9627003329758344e-05, "loss": 0.788, "step": 9634 }, { "epoch": 1.572874576547896, "grad_norm": 1.729169249534607, "learning_rate": 1.962691746426479e-05, "loss": 0.6177, "step": 9635 }, { "epoch": 1.5730378351904004, "grad_norm": 1.5509408712387085, "learning_rate": 1.9626831589076932e-05, "loss": 0.611, "step": 9636 }, { "epoch": 1.5732010938329046, "grad_norm": 1.3985943794250488, "learning_rate": 1.9626745704194857e-05, "loss": 0.56, "step": 9637 }, { "epoch": 1.573364352475409, "grad_norm": 1.8105063438415527, "learning_rate": 1.9626659809618652e-05, "loss": 0.7991, "step": 9638 }, { "epoch": 1.5735276111179135, "grad_norm": 2.213312864303589, "learning_rate": 1.9626573905348403e-05, "loss": 0.8037, "step": 9639 }, { "epoch": 1.573690869760418, "grad_norm": 1.7250983715057373, "learning_rate": 1.9626487991384194e-05, "loss": 0.7364, "step": 9640 }, { "epoch": 1.5738541284029224, "grad_norm": 1.6180000305175781, "learning_rate": 1.962640206772612e-05, "loss": 0.6066, "step": 9641 }, { "epoch": 1.5740173870454268, "grad_norm": 1.8795654773712158, "learning_rate": 1.9626316134374255e-05, "loss": 0.9205, "step": 9642 }, { "epoch": 1.5741806456879313, "grad_norm": 1.9067094326019287, "learning_rate": 1.9626230191328697e-05, "loss": 0.6631, "step": 9643 }, { "epoch": 1.5743439043304355, "grad_norm": 1.6158788204193115, "learning_rate": 1.9626144238589525e-05, "loss": 0.6429, "step": 9644 }, { "epoch": 1.57450716297294, "grad_norm": 1.6274019479751587, "learning_rate": 1.962605827615683e-05, "loss": 0.6413, "step": 9645 }, { "epoch": 1.5746704216154441, "grad_norm": 1.8255980014801025, "learning_rate": 1.9625972304030697e-05, "loss": 0.7866, "step": 9646 }, { "epoch": 1.5748336802579486, "grad_norm": 1.7922167778015137, "learning_rate": 1.962588632221121e-05, "loss": 0.6896, "step": 9647 }, { "epoch": 1.574996938900453, "grad_norm": 1.7598341703414917, "learning_rate": 1.9625800330698462e-05, "loss": 0.7013, "step": 9648 }, { "epoch": 1.5751601975429574, "grad_norm": 1.2833775281906128, "learning_rate": 1.9625714329492532e-05, "loss": 0.4882, "step": 9649 }, { "epoch": 1.5753234561854619, "grad_norm": 1.2973188161849976, "learning_rate": 1.9625628318593514e-05, "loss": 0.4906, "step": 9650 }, { "epoch": 1.5754867148279663, "grad_norm": 1.550643801689148, "learning_rate": 1.9625542298001487e-05, "loss": 0.5901, "step": 9651 }, { "epoch": 1.5756499734704708, "grad_norm": 1.6381577253341675, "learning_rate": 1.9625456267716544e-05, "loss": 0.6771, "step": 9652 }, { "epoch": 1.575813232112975, "grad_norm": 1.581642508506775, "learning_rate": 1.962537022773877e-05, "loss": 0.5938, "step": 9653 }, { "epoch": 1.5759764907554794, "grad_norm": 1.5784766674041748, "learning_rate": 1.9625284178068246e-05, "loss": 0.6929, "step": 9654 }, { "epoch": 1.5761397493979836, "grad_norm": 1.7521753311157227, "learning_rate": 1.9625198118705065e-05, "loss": 0.7275, "step": 9655 }, { "epoch": 1.576303008040488, "grad_norm": 1.6148757934570312, "learning_rate": 1.9625112049649316e-05, "loss": 0.5798, "step": 9656 }, { "epoch": 1.5764662666829925, "grad_norm": 1.9123215675354004, "learning_rate": 1.9625025970901078e-05, "loss": 0.7116, "step": 9657 }, { "epoch": 1.576629525325497, "grad_norm": 1.487979531288147, "learning_rate": 1.962493988246044e-05, "loss": 0.6228, "step": 9658 }, { "epoch": 1.5767927839680014, "grad_norm": 1.454133152961731, "learning_rate": 1.9624853784327495e-05, "loss": 0.5909, "step": 9659 }, { "epoch": 1.5769560426105058, "grad_norm": 1.2248520851135254, "learning_rate": 1.962476767650232e-05, "loss": 0.5306, "step": 9660 }, { "epoch": 1.57711930125301, "grad_norm": 1.439718246459961, "learning_rate": 1.962468155898501e-05, "loss": 0.6166, "step": 9661 }, { "epoch": 1.5772825598955145, "grad_norm": 1.550396203994751, "learning_rate": 1.962459543177565e-05, "loss": 0.6085, "step": 9662 }, { "epoch": 1.577445818538019, "grad_norm": 1.7822028398513794, "learning_rate": 1.962450929487432e-05, "loss": 0.609, "step": 9663 }, { "epoch": 1.5776090771805231, "grad_norm": 1.931081771850586, "learning_rate": 1.9624423148281114e-05, "loss": 0.691, "step": 9664 }, { "epoch": 1.5777723358230276, "grad_norm": 1.650681495666504, "learning_rate": 1.9624336991996117e-05, "loss": 0.5518, "step": 9665 }, { "epoch": 1.577935594465532, "grad_norm": 1.5022908449172974, "learning_rate": 1.9624250826019413e-05, "loss": 0.6965, "step": 9666 }, { "epoch": 1.5780988531080364, "grad_norm": 1.8011267185211182, "learning_rate": 1.9624164650351093e-05, "loss": 0.6192, "step": 9667 }, { "epoch": 1.5782621117505409, "grad_norm": 2.0723159313201904, "learning_rate": 1.962407846499124e-05, "loss": 0.7164, "step": 9668 }, { "epoch": 1.5784253703930453, "grad_norm": 2.1561760902404785, "learning_rate": 1.9623992269939946e-05, "loss": 0.9268, "step": 9669 }, { "epoch": 1.5785886290355495, "grad_norm": 1.8231765031814575, "learning_rate": 1.9623906065197288e-05, "loss": 0.745, "step": 9670 }, { "epoch": 1.578751887678054, "grad_norm": 1.982771396636963, "learning_rate": 1.9623819850763364e-05, "loss": 0.8288, "step": 9671 }, { "epoch": 1.5789151463205582, "grad_norm": 1.9587383270263672, "learning_rate": 1.9623733626638258e-05, "loss": 0.755, "step": 9672 }, { "epoch": 1.5790784049630626, "grad_norm": 1.9257392883300781, "learning_rate": 1.962364739282205e-05, "loss": 0.6861, "step": 9673 }, { "epoch": 1.579241663605567, "grad_norm": 1.9680873155593872, "learning_rate": 1.9623561149314832e-05, "loss": 0.687, "step": 9674 }, { "epoch": 1.5794049222480715, "grad_norm": 1.7040352821350098, "learning_rate": 1.9623474896116696e-05, "loss": 0.641, "step": 9675 }, { "epoch": 1.579568180890576, "grad_norm": 2.0922768115997314, "learning_rate": 1.9623388633227716e-05, "loss": 0.6539, "step": 9676 }, { "epoch": 1.5797314395330804, "grad_norm": 1.504915714263916, "learning_rate": 1.9623302360647992e-05, "loss": 0.6375, "step": 9677 }, { "epoch": 1.5798946981755848, "grad_norm": 2.211939573287964, "learning_rate": 1.96232160783776e-05, "loss": 0.8916, "step": 9678 }, { "epoch": 1.580057956818089, "grad_norm": 1.6597124338150024, "learning_rate": 1.9623129786416635e-05, "loss": 0.7033, "step": 9679 }, { "epoch": 1.5802212154605935, "grad_norm": 1.8660601377487183, "learning_rate": 1.962304348476518e-05, "loss": 0.7886, "step": 9680 }, { "epoch": 1.5803844741030977, "grad_norm": 2.0038650035858154, "learning_rate": 1.9622957173423325e-05, "loss": 0.751, "step": 9681 }, { "epoch": 1.5805477327456021, "grad_norm": 1.450705885887146, "learning_rate": 1.962287085239115e-05, "loss": 0.5394, "step": 9682 }, { "epoch": 1.5807109913881066, "grad_norm": 1.3762441873550415, "learning_rate": 1.962278452166875e-05, "loss": 0.5902, "step": 9683 }, { "epoch": 1.580874250030611, "grad_norm": 1.8528920412063599, "learning_rate": 1.9622698181256207e-05, "loss": 0.7836, "step": 9684 }, { "epoch": 1.5810375086731154, "grad_norm": 1.8280096054077148, "learning_rate": 1.962261183115361e-05, "loss": 0.7014, "step": 9685 }, { "epoch": 1.5812007673156199, "grad_norm": 1.8464151620864868, "learning_rate": 1.962252547136105e-05, "loss": 0.6491, "step": 9686 }, { "epoch": 1.5813640259581243, "grad_norm": 1.6274847984313965, "learning_rate": 1.9622439101878603e-05, "loss": 0.7113, "step": 9687 }, { "epoch": 1.5815272846006285, "grad_norm": 1.425610899925232, "learning_rate": 1.9622352722706365e-05, "loss": 0.5823, "step": 9688 }, { "epoch": 1.581690543243133, "grad_norm": 1.72265625, "learning_rate": 1.962226633384442e-05, "loss": 0.6272, "step": 9689 }, { "epoch": 1.5818538018856372, "grad_norm": 1.851369023323059, "learning_rate": 1.9622179935292855e-05, "loss": 0.6878, "step": 9690 }, { "epoch": 1.5820170605281416, "grad_norm": 1.6060316562652588, "learning_rate": 1.9622093527051758e-05, "loss": 0.7342, "step": 9691 }, { "epoch": 1.582180319170646, "grad_norm": 1.7634220123291016, "learning_rate": 1.9622007109121214e-05, "loss": 0.6128, "step": 9692 }, { "epoch": 1.5823435778131505, "grad_norm": 1.73552405834198, "learning_rate": 1.9621920681501314e-05, "loss": 0.6906, "step": 9693 }, { "epoch": 1.582506836455655, "grad_norm": 1.418906807899475, "learning_rate": 1.962183424419214e-05, "loss": 0.5313, "step": 9694 }, { "epoch": 1.5826700950981594, "grad_norm": 1.5142323970794678, "learning_rate": 1.9621747797193784e-05, "loss": 0.5362, "step": 9695 }, { "epoch": 1.5828333537406638, "grad_norm": 1.5299216508865356, "learning_rate": 1.962166134050633e-05, "loss": 0.6117, "step": 9696 }, { "epoch": 1.582996612383168, "grad_norm": 1.7742419242858887, "learning_rate": 1.962157487412986e-05, "loss": 0.7099, "step": 9697 }, { "epoch": 1.5831598710256725, "grad_norm": 2.0223584175109863, "learning_rate": 1.962148839806447e-05, "loss": 0.8548, "step": 9698 }, { "epoch": 1.5833231296681767, "grad_norm": 1.4292356967926025, "learning_rate": 1.9621401912310247e-05, "loss": 0.5669, "step": 9699 }, { "epoch": 1.5834863883106811, "grad_norm": 1.6820064783096313, "learning_rate": 1.9621315416867274e-05, "loss": 0.6238, "step": 9700 }, { "epoch": 1.5836496469531856, "grad_norm": 1.8175023794174194, "learning_rate": 1.9621228911735637e-05, "loss": 0.6982, "step": 9701 }, { "epoch": 1.58381290559569, "grad_norm": 1.7978641986846924, "learning_rate": 1.9621142396915423e-05, "loss": 0.7152, "step": 9702 }, { "epoch": 1.5839761642381944, "grad_norm": 1.5713573694229126, "learning_rate": 1.962105587240673e-05, "loss": 0.6519, "step": 9703 }, { "epoch": 1.5841394228806989, "grad_norm": 1.7591252326965332, "learning_rate": 1.9620969338209626e-05, "loss": 0.7534, "step": 9704 }, { "epoch": 1.584302681523203, "grad_norm": 1.8434993028640747, "learning_rate": 1.9620882794324213e-05, "loss": 0.6487, "step": 9705 }, { "epoch": 1.5844659401657075, "grad_norm": 1.859194278717041, "learning_rate": 1.962079624075057e-05, "loss": 0.7473, "step": 9706 }, { "epoch": 1.584629198808212, "grad_norm": 1.8070935010910034, "learning_rate": 1.9620709677488794e-05, "loss": 0.701, "step": 9707 }, { "epoch": 1.5847924574507162, "grad_norm": 1.6189990043640137, "learning_rate": 1.9620623104538963e-05, "loss": 0.7371, "step": 9708 }, { "epoch": 1.5849557160932206, "grad_norm": 1.8244355916976929, "learning_rate": 1.9620536521901168e-05, "loss": 0.6861, "step": 9709 }, { "epoch": 1.585118974735725, "grad_norm": 2.8467419147491455, "learning_rate": 1.9620449929575495e-05, "loss": 0.8209, "step": 9710 }, { "epoch": 1.5852822333782295, "grad_norm": 1.955719232559204, "learning_rate": 1.9620363327562028e-05, "loss": 0.6263, "step": 9711 }, { "epoch": 1.585445492020734, "grad_norm": 2.0077857971191406, "learning_rate": 1.962027671586086e-05, "loss": 0.6842, "step": 9712 }, { "epoch": 1.5856087506632384, "grad_norm": 1.3232954740524292, "learning_rate": 1.9620190094472077e-05, "loss": 0.5559, "step": 9713 }, { "epoch": 1.5857720093057426, "grad_norm": 1.67738938331604, "learning_rate": 1.9620103463395764e-05, "loss": 0.7409, "step": 9714 }, { "epoch": 1.585935267948247, "grad_norm": 1.9344784021377563, "learning_rate": 1.962001682263201e-05, "loss": 0.8101, "step": 9715 }, { "epoch": 1.5860985265907512, "grad_norm": 1.799301266670227, "learning_rate": 1.96199301721809e-05, "loss": 0.6269, "step": 9716 }, { "epoch": 1.5862617852332557, "grad_norm": 1.8337783813476562, "learning_rate": 1.9619843512042525e-05, "loss": 0.7435, "step": 9717 }, { "epoch": 1.58642504387576, "grad_norm": 1.773411750793457, "learning_rate": 1.961975684221697e-05, "loss": 0.825, "step": 9718 }, { "epoch": 1.5865883025182645, "grad_norm": 1.6811023950576782, "learning_rate": 1.9619670162704322e-05, "loss": 0.6637, "step": 9719 }, { "epoch": 1.586751561160769, "grad_norm": 2.137704610824585, "learning_rate": 1.961958347350467e-05, "loss": 0.6936, "step": 9720 }, { "epoch": 1.5869148198032734, "grad_norm": 1.6904270648956299, "learning_rate": 1.9619496774618098e-05, "loss": 0.6042, "step": 9721 }, { "epoch": 1.5870780784457779, "grad_norm": 1.9231176376342773, "learning_rate": 1.961941006604469e-05, "loss": 0.7972, "step": 9722 }, { "epoch": 1.587241337088282, "grad_norm": 1.6723405122756958, "learning_rate": 1.961932334778455e-05, "loss": 0.6414, "step": 9723 }, { "epoch": 1.5874045957307865, "grad_norm": 1.6982735395431519, "learning_rate": 1.9619236619837747e-05, "loss": 0.6742, "step": 9724 }, { "epoch": 1.5875678543732907, "grad_norm": 1.5371806621551514, "learning_rate": 1.9619149882204375e-05, "loss": 0.6047, "step": 9725 }, { "epoch": 1.5877311130157952, "grad_norm": 1.7299354076385498, "learning_rate": 1.961906313488452e-05, "loss": 0.66, "step": 9726 }, { "epoch": 1.5878943716582996, "grad_norm": 1.7391077280044556, "learning_rate": 1.9618976377878275e-05, "loss": 0.669, "step": 9727 }, { "epoch": 1.588057630300804, "grad_norm": 1.5976592302322388, "learning_rate": 1.9618889611185726e-05, "loss": 0.661, "step": 9728 }, { "epoch": 1.5882208889433085, "grad_norm": 1.6944645643234253, "learning_rate": 1.9618802834806953e-05, "loss": 0.7033, "step": 9729 }, { "epoch": 1.588384147585813, "grad_norm": 1.7903512716293335, "learning_rate": 1.961871604874205e-05, "loss": 0.7511, "step": 9730 }, { "epoch": 1.5885474062283174, "grad_norm": 1.5805833339691162, "learning_rate": 1.96186292529911e-05, "loss": 0.6207, "step": 9731 }, { "epoch": 1.5887106648708216, "grad_norm": 2.1036124229431152, "learning_rate": 1.9618542447554195e-05, "loss": 0.918, "step": 9732 }, { "epoch": 1.588873923513326, "grad_norm": 1.789934515953064, "learning_rate": 1.961845563243142e-05, "loss": 0.6784, "step": 9733 }, { "epoch": 1.5890371821558302, "grad_norm": 1.4626398086547852, "learning_rate": 1.9618368807622863e-05, "loss": 0.7582, "step": 9734 }, { "epoch": 1.5892004407983347, "grad_norm": 2.245030164718628, "learning_rate": 1.961828197312861e-05, "loss": 0.7274, "step": 9735 }, { "epoch": 1.589363699440839, "grad_norm": 1.7031570672988892, "learning_rate": 1.9618195128948753e-05, "loss": 0.668, "step": 9736 }, { "epoch": 1.5895269580833435, "grad_norm": 1.381311297416687, "learning_rate": 1.961810827508337e-05, "loss": 0.6, "step": 9737 }, { "epoch": 1.589690216725848, "grad_norm": 1.9340081214904785, "learning_rate": 1.9618021411532558e-05, "loss": 0.8211, "step": 9738 }, { "epoch": 1.5898534753683524, "grad_norm": 1.357742428779602, "learning_rate": 1.9617934538296404e-05, "loss": 0.6042, "step": 9739 }, { "epoch": 1.5900167340108569, "grad_norm": 1.8038944005966187, "learning_rate": 1.9617847655374988e-05, "loss": 0.7426, "step": 9740 }, { "epoch": 1.590179992653361, "grad_norm": 1.5917192697525024, "learning_rate": 1.9617760762768406e-05, "loss": 0.7127, "step": 9741 }, { "epoch": 1.5903432512958655, "grad_norm": 1.8327029943466187, "learning_rate": 1.9617673860476735e-05, "loss": 0.6998, "step": 9742 }, { "epoch": 1.5905065099383697, "grad_norm": 1.6016114950180054, "learning_rate": 1.9617586948500076e-05, "loss": 0.7127, "step": 9743 }, { "epoch": 1.5906697685808742, "grad_norm": 1.760743498802185, "learning_rate": 1.9617500026838506e-05, "loss": 0.8217, "step": 9744 }, { "epoch": 1.5908330272233786, "grad_norm": 1.8219060897827148, "learning_rate": 1.9617413095492114e-05, "loss": 0.7175, "step": 9745 }, { "epoch": 1.590996285865883, "grad_norm": 1.7804641723632812, "learning_rate": 1.9617326154460992e-05, "loss": 0.5986, "step": 9746 }, { "epoch": 1.5911595445083875, "grad_norm": 1.873049020767212, "learning_rate": 1.9617239203745226e-05, "loss": 0.7798, "step": 9747 }, { "epoch": 1.591322803150892, "grad_norm": 1.6832165718078613, "learning_rate": 1.9617152243344903e-05, "loss": 0.6572, "step": 9748 }, { "epoch": 1.5914860617933961, "grad_norm": 1.6698039770126343, "learning_rate": 1.9617065273260106e-05, "loss": 0.6375, "step": 9749 }, { "epoch": 1.5916493204359006, "grad_norm": 1.550872564315796, "learning_rate": 1.961697829349093e-05, "loss": 0.5906, "step": 9750 }, { "epoch": 1.591812579078405, "grad_norm": 1.819465160369873, "learning_rate": 1.961689130403746e-05, "loss": 0.6161, "step": 9751 }, { "epoch": 1.5919758377209092, "grad_norm": 1.714302659034729, "learning_rate": 1.9616804304899785e-05, "loss": 0.6474, "step": 9752 }, { "epoch": 1.5921390963634137, "grad_norm": 1.7182247638702393, "learning_rate": 1.9616717296077986e-05, "loss": 0.7266, "step": 9753 }, { "epoch": 1.592302355005918, "grad_norm": 1.7214372158050537, "learning_rate": 1.9616630277572158e-05, "loss": 0.6537, "step": 9754 }, { "epoch": 1.5924656136484225, "grad_norm": 2.0256571769714355, "learning_rate": 1.9616543249382385e-05, "loss": 0.7473, "step": 9755 }, { "epoch": 1.592628872290927, "grad_norm": 1.7589939832687378, "learning_rate": 1.9616456211508756e-05, "loss": 0.7651, "step": 9756 }, { "epoch": 1.5927921309334314, "grad_norm": 1.6143070459365845, "learning_rate": 1.9616369163951354e-05, "loss": 0.5442, "step": 9757 }, { "epoch": 1.5929553895759356, "grad_norm": 2.2633056640625, "learning_rate": 1.9616282106710276e-05, "loss": 0.8147, "step": 9758 }, { "epoch": 1.59311864821844, "grad_norm": 1.8863481283187866, "learning_rate": 1.96161950397856e-05, "loss": 0.5512, "step": 9759 }, { "epoch": 1.5932819068609443, "grad_norm": 1.6044455766677856, "learning_rate": 1.9616107963177423e-05, "loss": 0.6513, "step": 9760 }, { "epoch": 1.5934451655034487, "grad_norm": 1.6253830194473267, "learning_rate": 1.9616020876885825e-05, "loss": 0.6259, "step": 9761 }, { "epoch": 1.5936084241459532, "grad_norm": 1.7241250276565552, "learning_rate": 1.96159337809109e-05, "loss": 0.5976, "step": 9762 }, { "epoch": 1.5937716827884576, "grad_norm": 1.7269858121871948, "learning_rate": 1.9615846675252726e-05, "loss": 0.7293, "step": 9763 }, { "epoch": 1.593934941430962, "grad_norm": 1.744154453277588, "learning_rate": 1.96157595599114e-05, "loss": 0.6749, "step": 9764 }, { "epoch": 1.5940982000734665, "grad_norm": 1.6006718873977661, "learning_rate": 1.961567243488701e-05, "loss": 0.6577, "step": 9765 }, { "epoch": 1.594261458715971, "grad_norm": 1.3282138109207153, "learning_rate": 1.9615585300179638e-05, "loss": 0.5595, "step": 9766 }, { "epoch": 1.5944247173584751, "grad_norm": 1.617884874343872, "learning_rate": 1.9615498155789373e-05, "loss": 0.6207, "step": 9767 }, { "epoch": 1.5945879760009796, "grad_norm": 1.6365516185760498, "learning_rate": 1.9615411001716308e-05, "loss": 0.6657, "step": 9768 }, { "epoch": 1.5947512346434838, "grad_norm": 1.6910046339035034, "learning_rate": 1.961532383796052e-05, "loss": 0.6715, "step": 9769 }, { "epoch": 1.5949144932859882, "grad_norm": 1.6335252523422241, "learning_rate": 1.9615236664522108e-05, "loss": 0.5335, "step": 9770 }, { "epoch": 1.5950777519284927, "grad_norm": 1.5872143507003784, "learning_rate": 1.9615149481401152e-05, "loss": 0.6462, "step": 9771 }, { "epoch": 1.595241010570997, "grad_norm": 1.7322975397109985, "learning_rate": 1.9615062288597746e-05, "loss": 0.6994, "step": 9772 }, { "epoch": 1.5954042692135015, "grad_norm": 1.4623985290527344, "learning_rate": 1.9614975086111974e-05, "loss": 0.6346, "step": 9773 }, { "epoch": 1.595567527856006, "grad_norm": 1.7291159629821777, "learning_rate": 1.9614887873943928e-05, "loss": 0.662, "step": 9774 }, { "epoch": 1.5957307864985104, "grad_norm": 1.6546647548675537, "learning_rate": 1.9614800652093685e-05, "loss": 0.6841, "step": 9775 }, { "epoch": 1.5958940451410146, "grad_norm": 2.449777603149414, "learning_rate": 1.9614713420561348e-05, "loss": 1.1406, "step": 9776 }, { "epoch": 1.596057303783519, "grad_norm": 1.750852108001709, "learning_rate": 1.961462617934699e-05, "loss": 0.6832, "step": 9777 }, { "epoch": 1.5962205624260233, "grad_norm": 1.6234248876571655, "learning_rate": 1.961453892845071e-05, "loss": 0.5867, "step": 9778 }, { "epoch": 1.5963838210685277, "grad_norm": 2.2385153770446777, "learning_rate": 1.9614451667872593e-05, "loss": 0.6472, "step": 9779 }, { "epoch": 1.5965470797110322, "grad_norm": 1.9185349941253662, "learning_rate": 1.9614364397612723e-05, "loss": 0.765, "step": 9780 }, { "epoch": 1.5967103383535366, "grad_norm": 2.019627571105957, "learning_rate": 1.9614277117671193e-05, "loss": 0.7736, "step": 9781 }, { "epoch": 1.596873596996041, "grad_norm": 1.8009591102600098, "learning_rate": 1.9614189828048085e-05, "loss": 0.6769, "step": 9782 }, { "epoch": 1.5970368556385455, "grad_norm": 1.7499608993530273, "learning_rate": 1.961410252874349e-05, "loss": 0.6106, "step": 9783 }, { "epoch": 1.59720011428105, "grad_norm": 1.6766297817230225, "learning_rate": 1.96140152197575e-05, "loss": 0.6041, "step": 9784 }, { "epoch": 1.5973633729235541, "grad_norm": 1.8669971227645874, "learning_rate": 1.9613927901090196e-05, "loss": 0.8988, "step": 9785 }, { "epoch": 1.5975266315660586, "grad_norm": 1.4861773252487183, "learning_rate": 1.9613840572741674e-05, "loss": 0.5519, "step": 9786 }, { "epoch": 1.5976898902085628, "grad_norm": 1.947023868560791, "learning_rate": 1.9613753234712013e-05, "loss": 0.8491, "step": 9787 }, { "epoch": 1.5978531488510672, "grad_norm": 1.5674654245376587, "learning_rate": 1.9613665887001307e-05, "loss": 0.6673, "step": 9788 }, { "epoch": 1.5980164074935717, "grad_norm": 1.5393977165222168, "learning_rate": 1.9613578529609642e-05, "loss": 0.5741, "step": 9789 }, { "epoch": 1.598179666136076, "grad_norm": 1.576027512550354, "learning_rate": 1.9613491162537105e-05, "loss": 0.7173, "step": 9790 }, { "epoch": 1.5983429247785805, "grad_norm": 1.6827441453933716, "learning_rate": 1.9613403785783784e-05, "loss": 0.7371, "step": 9791 }, { "epoch": 1.598506183421085, "grad_norm": 1.2762179374694824, "learning_rate": 1.961331639934977e-05, "loss": 0.4867, "step": 9792 }, { "epoch": 1.5986694420635894, "grad_norm": 1.607686996459961, "learning_rate": 1.9613229003235147e-05, "loss": 0.6327, "step": 9793 }, { "epoch": 1.5988327007060936, "grad_norm": 1.6687633991241455, "learning_rate": 1.9613141597440008e-05, "loss": 0.6449, "step": 9794 }, { "epoch": 1.598995959348598, "grad_norm": 1.7764214277267456, "learning_rate": 1.9613054181964433e-05, "loss": 0.7078, "step": 9795 }, { "epoch": 1.5991592179911023, "grad_norm": 1.3742303848266602, "learning_rate": 1.961296675680852e-05, "loss": 0.6207, "step": 9796 }, { "epoch": 1.5993224766336067, "grad_norm": 2.047309398651123, "learning_rate": 1.961287932197235e-05, "loss": 0.7094, "step": 9797 }, { "epoch": 1.5994857352761112, "grad_norm": 1.6702216863632202, "learning_rate": 1.961279187745601e-05, "loss": 0.6823, "step": 9798 }, { "epoch": 1.5996489939186156, "grad_norm": 1.7494348287582397, "learning_rate": 1.9612704423259596e-05, "loss": 0.7207, "step": 9799 }, { "epoch": 1.59981225256112, "grad_norm": 1.8203837871551514, "learning_rate": 1.961261695938319e-05, "loss": 0.6547, "step": 9800 }, { "epoch": 1.5999755112036245, "grad_norm": 1.4315557479858398, "learning_rate": 1.9612529485826882e-05, "loss": 0.5449, "step": 9801 }, { "epoch": 1.6001387698461287, "grad_norm": 1.6292190551757812, "learning_rate": 1.9612442002590756e-05, "loss": 0.6805, "step": 9802 }, { "epoch": 1.6003020284886331, "grad_norm": 1.9793721437454224, "learning_rate": 1.9612354509674906e-05, "loss": 0.908, "step": 9803 }, { "epoch": 1.6004652871311376, "grad_norm": 1.7853200435638428, "learning_rate": 1.961226700707942e-05, "loss": 0.723, "step": 9804 }, { "epoch": 1.6006285457736418, "grad_norm": 1.6329513788223267, "learning_rate": 1.9612179494804377e-05, "loss": 0.6071, "step": 9805 }, { "epoch": 1.6007918044161462, "grad_norm": 1.4837390184402466, "learning_rate": 1.9612091972849876e-05, "loss": 0.6296, "step": 9806 }, { "epoch": 1.6009550630586507, "grad_norm": 1.8138179779052734, "learning_rate": 1.9612004441216e-05, "loss": 0.5592, "step": 9807 }, { "epoch": 1.601118321701155, "grad_norm": 1.7431972026824951, "learning_rate": 1.961191689990284e-05, "loss": 0.6708, "step": 9808 }, { "epoch": 1.6012815803436595, "grad_norm": 1.8263843059539795, "learning_rate": 1.961182934891048e-05, "loss": 0.7488, "step": 9809 }, { "epoch": 1.601444838986164, "grad_norm": 1.8995561599731445, "learning_rate": 1.961174178823901e-05, "loss": 0.713, "step": 9810 }, { "epoch": 1.6016080976286682, "grad_norm": 1.5727001428604126, "learning_rate": 1.961165421788852e-05, "loss": 0.5515, "step": 9811 }, { "epoch": 1.6017713562711726, "grad_norm": 1.6766053438186646, "learning_rate": 1.96115666378591e-05, "loss": 0.5497, "step": 9812 }, { "epoch": 1.6019346149136768, "grad_norm": 1.4809424877166748, "learning_rate": 1.961147904815083e-05, "loss": 0.548, "step": 9813 }, { "epoch": 1.6020978735561813, "grad_norm": 1.7999751567840576, "learning_rate": 1.9611391448763804e-05, "loss": 0.6371, "step": 9814 }, { "epoch": 1.6022611321986857, "grad_norm": 1.8068703413009644, "learning_rate": 1.961130383969811e-05, "loss": 0.7006, "step": 9815 }, { "epoch": 1.6024243908411901, "grad_norm": 1.6693416833877563, "learning_rate": 1.9611216220953833e-05, "loss": 0.6286, "step": 9816 }, { "epoch": 1.6025876494836946, "grad_norm": 2.175062417984009, "learning_rate": 1.961112859253107e-05, "loss": 0.7422, "step": 9817 }, { "epoch": 1.602750908126199, "grad_norm": 2.143648862838745, "learning_rate": 1.96110409544299e-05, "loss": 0.7869, "step": 9818 }, { "epoch": 1.6029141667687035, "grad_norm": 1.748962640762329, "learning_rate": 1.961095330665041e-05, "loss": 0.6511, "step": 9819 }, { "epoch": 1.6030774254112077, "grad_norm": 1.6824527978897095, "learning_rate": 1.9610865649192695e-05, "loss": 0.5978, "step": 9820 }, { "epoch": 1.6032406840537121, "grad_norm": 1.792402744293213, "learning_rate": 1.9610777982056842e-05, "loss": 0.7499, "step": 9821 }, { "epoch": 1.6034039426962163, "grad_norm": 1.67743718624115, "learning_rate": 1.961069030524294e-05, "loss": 0.6277, "step": 9822 }, { "epoch": 1.6035672013387208, "grad_norm": 2.0172066688537598, "learning_rate": 1.9610602618751073e-05, "loss": 0.6842, "step": 9823 }, { "epoch": 1.6037304599812252, "grad_norm": 2.0673320293426514, "learning_rate": 1.9610514922581333e-05, "loss": 0.7876, "step": 9824 }, { "epoch": 1.6038937186237296, "grad_norm": 1.6833585500717163, "learning_rate": 1.9610427216733808e-05, "loss": 0.6936, "step": 9825 }, { "epoch": 1.604056977266234, "grad_norm": 1.5806291103363037, "learning_rate": 1.9610339501208583e-05, "loss": 0.5307, "step": 9826 }, { "epoch": 1.6042202359087385, "grad_norm": 1.6642920970916748, "learning_rate": 1.961025177600575e-05, "loss": 0.6086, "step": 9827 }, { "epoch": 1.604383494551243, "grad_norm": 2.024425983428955, "learning_rate": 1.9610164041125393e-05, "loss": 0.7676, "step": 9828 }, { "epoch": 1.6045467531937472, "grad_norm": 1.5578705072402954, "learning_rate": 1.9610076296567605e-05, "loss": 0.6732, "step": 9829 }, { "epoch": 1.6047100118362516, "grad_norm": 1.6982609033584595, "learning_rate": 1.9609988542332473e-05, "loss": 0.6038, "step": 9830 }, { "epoch": 1.6048732704787558, "grad_norm": 1.5754255056381226, "learning_rate": 1.9609900778420087e-05, "loss": 0.6385, "step": 9831 }, { "epoch": 1.6050365291212603, "grad_norm": 1.5281360149383545, "learning_rate": 1.9609813004830533e-05, "loss": 0.7216, "step": 9832 }, { "epoch": 1.6051997877637647, "grad_norm": 1.5441087484359741, "learning_rate": 1.9609725221563898e-05, "loss": 0.5731, "step": 9833 }, { "epoch": 1.6053630464062691, "grad_norm": 1.8557145595550537, "learning_rate": 1.960963742862027e-05, "loss": 0.6719, "step": 9834 }, { "epoch": 1.6055263050487736, "grad_norm": 1.631670355796814, "learning_rate": 1.9609549625999747e-05, "loss": 0.6293, "step": 9835 }, { "epoch": 1.605689563691278, "grad_norm": 1.6434465646743774, "learning_rate": 1.9609461813702407e-05, "loss": 0.5957, "step": 9836 }, { "epoch": 1.6058528223337825, "grad_norm": 1.421085000038147, "learning_rate": 1.9609373991728338e-05, "loss": 0.5742, "step": 9837 }, { "epoch": 1.6060160809762867, "grad_norm": 1.6619635820388794, "learning_rate": 1.9609286160077633e-05, "loss": 0.7523, "step": 9838 }, { "epoch": 1.6061793396187911, "grad_norm": 1.789896011352539, "learning_rate": 1.9609198318750383e-05, "loss": 0.7153, "step": 9839 }, { "epoch": 1.6063425982612953, "grad_norm": 1.8744020462036133, "learning_rate": 1.960911046774667e-05, "loss": 0.7169, "step": 9840 }, { "epoch": 1.6065058569037998, "grad_norm": 1.7132737636566162, "learning_rate": 1.9609022607066587e-05, "loss": 0.6895, "step": 9841 }, { "epoch": 1.6066691155463042, "grad_norm": 1.7173854112625122, "learning_rate": 1.9608934736710217e-05, "loss": 0.7473, "step": 9842 }, { "epoch": 1.6068323741888086, "grad_norm": 1.2728602886199951, "learning_rate": 1.9608846856677654e-05, "loss": 0.4593, "step": 9843 }, { "epoch": 1.606995632831313, "grad_norm": 1.7314422130584717, "learning_rate": 1.9608758966968987e-05, "loss": 0.6712, "step": 9844 }, { "epoch": 1.6071588914738175, "grad_norm": 1.923888087272644, "learning_rate": 1.9608671067584303e-05, "loss": 0.8787, "step": 9845 }, { "epoch": 1.6073221501163217, "grad_norm": 2.0452096462249756, "learning_rate": 1.9608583158523687e-05, "loss": 0.6715, "step": 9846 }, { "epoch": 1.6074854087588262, "grad_norm": 1.6187763214111328, "learning_rate": 1.9608495239787228e-05, "loss": 0.6473, "step": 9847 }, { "epoch": 1.6076486674013306, "grad_norm": 1.8139688968658447, "learning_rate": 1.9608407311375023e-05, "loss": 0.776, "step": 9848 }, { "epoch": 1.6078119260438348, "grad_norm": 1.7220070362091064, "learning_rate": 1.960831937328715e-05, "loss": 0.654, "step": 9849 }, { "epoch": 1.6079751846863393, "grad_norm": 1.7548117637634277, "learning_rate": 1.9608231425523702e-05, "loss": 0.6715, "step": 9850 }, { "epoch": 1.6081384433288437, "grad_norm": 1.5513639450073242, "learning_rate": 1.960814346808477e-05, "loss": 0.6412, "step": 9851 }, { "epoch": 1.6083017019713481, "grad_norm": 1.6692925691604614, "learning_rate": 1.9608055500970437e-05, "loss": 0.6454, "step": 9852 }, { "epoch": 1.6084649606138526, "grad_norm": 1.9834835529327393, "learning_rate": 1.96079675241808e-05, "loss": 0.8211, "step": 9853 }, { "epoch": 1.608628219256357, "grad_norm": 1.7180918455123901, "learning_rate": 1.9607879537715937e-05, "loss": 0.7711, "step": 9854 }, { "epoch": 1.6087914778988612, "grad_norm": 1.7019182443618774, "learning_rate": 1.9607791541575944e-05, "loss": 0.7384, "step": 9855 }, { "epoch": 1.6089547365413657, "grad_norm": 1.4341639280319214, "learning_rate": 1.9607703535760906e-05, "loss": 0.6709, "step": 9856 }, { "epoch": 1.6091179951838699, "grad_norm": 1.759806513786316, "learning_rate": 1.9607615520270916e-05, "loss": 0.6351, "step": 9857 }, { "epoch": 1.6092812538263743, "grad_norm": 1.6097996234893799, "learning_rate": 1.9607527495106057e-05, "loss": 0.6205, "step": 9858 }, { "epoch": 1.6094445124688788, "grad_norm": 1.626193881034851, "learning_rate": 1.960743946026642e-05, "loss": 0.7315, "step": 9859 }, { "epoch": 1.6096077711113832, "grad_norm": 1.7317359447479248, "learning_rate": 1.9607351415752096e-05, "loss": 0.7009, "step": 9860 }, { "epoch": 1.6097710297538876, "grad_norm": 1.506295084953308, "learning_rate": 1.960726336156317e-05, "loss": 0.5231, "step": 9861 }, { "epoch": 1.609934288396392, "grad_norm": 2.4288222789764404, "learning_rate": 1.9607175297699734e-05, "loss": 0.882, "step": 9862 }, { "epoch": 1.6100975470388965, "grad_norm": 1.2939797639846802, "learning_rate": 1.9607087224161874e-05, "loss": 0.5435, "step": 9863 }, { "epoch": 1.6102608056814007, "grad_norm": 1.8660036325454712, "learning_rate": 1.960699914094968e-05, "loss": 0.8991, "step": 9864 }, { "epoch": 1.6104240643239052, "grad_norm": 1.5615251064300537, "learning_rate": 1.960691104806324e-05, "loss": 0.6336, "step": 9865 }, { "epoch": 1.6105873229664094, "grad_norm": 1.701054334640503, "learning_rate": 1.9606822945502642e-05, "loss": 0.8322, "step": 9866 }, { "epoch": 1.6107505816089138, "grad_norm": 1.5319828987121582, "learning_rate": 1.9606734833267977e-05, "loss": 0.7, "step": 9867 }, { "epoch": 1.6109138402514183, "grad_norm": 1.693289875984192, "learning_rate": 1.9606646711359335e-05, "loss": 0.5494, "step": 9868 }, { "epoch": 1.6110770988939227, "grad_norm": 1.6100633144378662, "learning_rate": 1.9606558579776798e-05, "loss": 0.6064, "step": 9869 }, { "epoch": 1.6112403575364271, "grad_norm": 1.537405014038086, "learning_rate": 1.960647043852046e-05, "loss": 0.5941, "step": 9870 }, { "epoch": 1.6114036161789316, "grad_norm": 1.6685229539871216, "learning_rate": 1.960638228759041e-05, "loss": 0.6318, "step": 9871 }, { "epoch": 1.611566874821436, "grad_norm": 1.8390123844146729, "learning_rate": 1.9606294126986738e-05, "loss": 0.7122, "step": 9872 }, { "epoch": 1.6117301334639402, "grad_norm": 1.894135594367981, "learning_rate": 1.9606205956709527e-05, "loss": 0.7576, "step": 9873 }, { "epoch": 1.6118933921064447, "grad_norm": 1.9738019704818726, "learning_rate": 1.960611777675887e-05, "loss": 0.7139, "step": 9874 }, { "epoch": 1.6120566507489489, "grad_norm": 1.9650458097457886, "learning_rate": 1.9606029587134858e-05, "loss": 0.7973, "step": 9875 }, { "epoch": 1.6122199093914533, "grad_norm": 1.879150152206421, "learning_rate": 1.960594138783757e-05, "loss": 0.7883, "step": 9876 }, { "epoch": 1.6123831680339578, "grad_norm": 1.5775115489959717, "learning_rate": 1.9605853178867107e-05, "loss": 0.7187, "step": 9877 }, { "epoch": 1.6125464266764622, "grad_norm": 1.7599941492080688, "learning_rate": 1.960576496022355e-05, "loss": 0.7485, "step": 9878 }, { "epoch": 1.6127096853189666, "grad_norm": 1.7559595108032227, "learning_rate": 1.960567673190699e-05, "loss": 0.7859, "step": 9879 }, { "epoch": 1.612872943961471, "grad_norm": 1.7789117097854614, "learning_rate": 1.960558849391752e-05, "loss": 0.7202, "step": 9880 }, { "epoch": 1.6130362026039755, "grad_norm": 1.8371061086654663, "learning_rate": 1.960550024625522e-05, "loss": 0.6554, "step": 9881 }, { "epoch": 1.6131994612464797, "grad_norm": 1.47001314163208, "learning_rate": 1.9605411988920185e-05, "loss": 0.6252, "step": 9882 }, { "epoch": 1.6133627198889842, "grad_norm": 1.7572476863861084, "learning_rate": 1.9605323721912506e-05, "loss": 0.6784, "step": 9883 }, { "epoch": 1.6135259785314884, "grad_norm": 1.819886565208435, "learning_rate": 1.9605235445232266e-05, "loss": 0.71, "step": 9884 }, { "epoch": 1.6136892371739928, "grad_norm": 1.5366079807281494, "learning_rate": 1.9605147158879557e-05, "loss": 0.617, "step": 9885 }, { "epoch": 1.6138524958164973, "grad_norm": 1.7918781042099, "learning_rate": 1.9605058862854464e-05, "loss": 0.6851, "step": 9886 }, { "epoch": 1.6140157544590017, "grad_norm": 1.583812952041626, "learning_rate": 1.9604970557157084e-05, "loss": 0.5696, "step": 9887 }, { "epoch": 1.6141790131015061, "grad_norm": 1.5877928733825684, "learning_rate": 1.96048822417875e-05, "loss": 0.6823, "step": 9888 }, { "epoch": 1.6143422717440106, "grad_norm": 2.041011095046997, "learning_rate": 1.96047939167458e-05, "loss": 0.9112, "step": 9889 }, { "epoch": 1.6145055303865148, "grad_norm": 2.0541915893554688, "learning_rate": 1.960470558203208e-05, "loss": 0.7779, "step": 9890 }, { "epoch": 1.6146687890290192, "grad_norm": 1.7293528318405151, "learning_rate": 1.960461723764642e-05, "loss": 0.7389, "step": 9891 }, { "epoch": 1.6148320476715237, "grad_norm": 1.5841768980026245, "learning_rate": 1.9604528883588912e-05, "loss": 0.6533, "step": 9892 }, { "epoch": 1.6149953063140279, "grad_norm": 1.6819813251495361, "learning_rate": 1.9604440519859645e-05, "loss": 0.6759, "step": 9893 }, { "epoch": 1.6151585649565323, "grad_norm": 1.4191339015960693, "learning_rate": 1.9604352146458715e-05, "loss": 0.5056, "step": 9894 }, { "epoch": 1.6153218235990368, "grad_norm": 1.487356424331665, "learning_rate": 1.96042637633862e-05, "loss": 0.5664, "step": 9895 }, { "epoch": 1.6154850822415412, "grad_norm": 1.6558493375778198, "learning_rate": 1.9604175370642196e-05, "loss": 0.6178, "step": 9896 }, { "epoch": 1.6156483408840456, "grad_norm": 1.8192882537841797, "learning_rate": 1.960408696822679e-05, "loss": 0.7783, "step": 9897 }, { "epoch": 1.61581159952655, "grad_norm": 1.5951824188232422, "learning_rate": 1.9603998556140066e-05, "loss": 0.6511, "step": 9898 }, { "epoch": 1.6159748581690543, "grad_norm": 1.6060268878936768, "learning_rate": 1.9603910134382124e-05, "loss": 0.6585, "step": 9899 }, { "epoch": 1.6161381168115587, "grad_norm": 2.108264207839966, "learning_rate": 1.9603821702953047e-05, "loss": 0.7558, "step": 9900 }, { "epoch": 1.616301375454063, "grad_norm": 1.8553142547607422, "learning_rate": 1.960373326185292e-05, "loss": 0.7651, "step": 9901 }, { "epoch": 1.6164646340965674, "grad_norm": 1.7204899787902832, "learning_rate": 1.960364481108184e-05, "loss": 0.769, "step": 9902 }, { "epoch": 1.6166278927390718, "grad_norm": 1.4396463632583618, "learning_rate": 1.960355635063989e-05, "loss": 0.573, "step": 9903 }, { "epoch": 1.6167911513815763, "grad_norm": 1.8877251148223877, "learning_rate": 1.9603467880527164e-05, "loss": 0.7424, "step": 9904 }, { "epoch": 1.6169544100240807, "grad_norm": 1.5495846271514893, "learning_rate": 1.9603379400743744e-05, "loss": 0.7612, "step": 9905 }, { "epoch": 1.6171176686665851, "grad_norm": 1.8033593893051147, "learning_rate": 1.960329091128973e-05, "loss": 0.7705, "step": 9906 }, { "epoch": 1.6172809273090896, "grad_norm": 1.632195234298706, "learning_rate": 1.9603202412165196e-05, "loss": 0.5575, "step": 9907 }, { "epoch": 1.6174441859515938, "grad_norm": 1.864630103111267, "learning_rate": 1.9603113903370245e-05, "loss": 0.772, "step": 9908 }, { "epoch": 1.6176074445940982, "grad_norm": 1.2784122228622437, "learning_rate": 1.960302538490496e-05, "loss": 0.6011, "step": 9909 }, { "epoch": 1.6177707032366024, "grad_norm": 1.7583242654800415, "learning_rate": 1.9602936856769432e-05, "loss": 0.6197, "step": 9910 }, { "epoch": 1.6179339618791069, "grad_norm": 1.7336595058441162, "learning_rate": 1.9602848318963747e-05, "loss": 0.6143, "step": 9911 }, { "epoch": 1.6180972205216113, "grad_norm": 1.9464582204818726, "learning_rate": 1.9602759771488e-05, "loss": 0.8194, "step": 9912 }, { "epoch": 1.6182604791641158, "grad_norm": 1.3688303232192993, "learning_rate": 1.9602671214342272e-05, "loss": 0.571, "step": 9913 }, { "epoch": 1.6184237378066202, "grad_norm": 1.538170576095581, "learning_rate": 1.960258264752666e-05, "loss": 0.6398, "step": 9914 }, { "epoch": 1.6185869964491246, "grad_norm": 1.807430624961853, "learning_rate": 1.960249407104125e-05, "loss": 0.7037, "step": 9915 }, { "epoch": 1.618750255091629, "grad_norm": 1.639836072921753, "learning_rate": 1.9602405484886126e-05, "loss": 0.6233, "step": 9916 }, { "epoch": 1.6189135137341333, "grad_norm": 1.4518277645111084, "learning_rate": 1.9602316889061388e-05, "loss": 0.5711, "step": 9917 }, { "epoch": 1.6190767723766377, "grad_norm": 1.6341288089752197, "learning_rate": 1.960222828356712e-05, "loss": 0.6874, "step": 9918 }, { "epoch": 1.619240031019142, "grad_norm": 1.7196002006530762, "learning_rate": 1.9602139668403402e-05, "loss": 0.664, "step": 9919 }, { "epoch": 1.6194032896616464, "grad_norm": 1.426687240600586, "learning_rate": 1.960205104357034e-05, "loss": 0.5229, "step": 9920 }, { "epoch": 1.6195665483041508, "grad_norm": 1.812554121017456, "learning_rate": 1.960196240906801e-05, "loss": 0.654, "step": 9921 }, { "epoch": 1.6197298069466552, "grad_norm": 2.414975643157959, "learning_rate": 1.960187376489651e-05, "loss": 0.8779, "step": 9922 }, { "epoch": 1.6198930655891597, "grad_norm": 1.5978323221206665, "learning_rate": 1.9601785111055928e-05, "loss": 0.5948, "step": 9923 }, { "epoch": 1.6200563242316641, "grad_norm": 1.3697410821914673, "learning_rate": 1.960169644754635e-05, "loss": 0.5699, "step": 9924 }, { "epoch": 1.6202195828741686, "grad_norm": 1.5069769620895386, "learning_rate": 1.9601607774367862e-05, "loss": 0.7036, "step": 9925 }, { "epoch": 1.6203828415166728, "grad_norm": 1.8763434886932373, "learning_rate": 1.960151909152056e-05, "loss": 0.7439, "step": 9926 }, { "epoch": 1.6205461001591772, "grad_norm": 1.546112298965454, "learning_rate": 1.960143039900453e-05, "loss": 0.6093, "step": 9927 }, { "epoch": 1.6207093588016814, "grad_norm": 1.5846492052078247, "learning_rate": 1.960134169681986e-05, "loss": 0.5848, "step": 9928 }, { "epoch": 1.6208726174441859, "grad_norm": 1.5905203819274902, "learning_rate": 1.9601252984966645e-05, "loss": 0.6502, "step": 9929 }, { "epoch": 1.6210358760866903, "grad_norm": 1.6415377855300903, "learning_rate": 1.960116426344497e-05, "loss": 0.6603, "step": 9930 }, { "epoch": 1.6211991347291947, "grad_norm": 1.7289812564849854, "learning_rate": 1.9601075532254924e-05, "loss": 0.7701, "step": 9931 }, { "epoch": 1.6213623933716992, "grad_norm": 1.66036057472229, "learning_rate": 1.96009867913966e-05, "loss": 0.5878, "step": 9932 }, { "epoch": 1.6215256520142036, "grad_norm": 1.8340846300125122, "learning_rate": 1.9600898040870084e-05, "loss": 0.8278, "step": 9933 }, { "epoch": 1.6216889106567078, "grad_norm": 1.6465085744857788, "learning_rate": 1.9600809280675465e-05, "loss": 0.708, "step": 9934 }, { "epoch": 1.6218521692992123, "grad_norm": 1.9307135343551636, "learning_rate": 1.9600720510812833e-05, "loss": 0.6504, "step": 9935 }, { "epoch": 1.6220154279417167, "grad_norm": 1.862109661102295, "learning_rate": 1.9600631731282278e-05, "loss": 0.8577, "step": 9936 }, { "epoch": 1.622178686584221, "grad_norm": 1.3749257326126099, "learning_rate": 1.9600542942083893e-05, "loss": 0.6912, "step": 9937 }, { "epoch": 1.6223419452267254, "grad_norm": 1.867097020149231, "learning_rate": 1.960045414321776e-05, "loss": 0.6886, "step": 9938 }, { "epoch": 1.6225052038692298, "grad_norm": 1.4429829120635986, "learning_rate": 1.9600365334683972e-05, "loss": 0.5363, "step": 9939 }, { "epoch": 1.6226684625117342, "grad_norm": 1.7537142038345337, "learning_rate": 1.9600276516482623e-05, "loss": 0.6658, "step": 9940 }, { "epoch": 1.6228317211542387, "grad_norm": 1.5109978914260864, "learning_rate": 1.9600187688613795e-05, "loss": 0.5686, "step": 9941 }, { "epoch": 1.6229949797967431, "grad_norm": 1.927838683128357, "learning_rate": 1.960009885107758e-05, "loss": 0.7586, "step": 9942 }, { "epoch": 1.6231582384392473, "grad_norm": 1.647126317024231, "learning_rate": 1.9600010003874067e-05, "loss": 0.6356, "step": 9943 }, { "epoch": 1.6233214970817518, "grad_norm": 1.43631112575531, "learning_rate": 1.959992114700335e-05, "loss": 0.5849, "step": 9944 }, { "epoch": 1.623484755724256, "grad_norm": 1.5692118406295776, "learning_rate": 1.9599832280465513e-05, "loss": 0.6029, "step": 9945 }, { "epoch": 1.6236480143667604, "grad_norm": 1.751539945602417, "learning_rate": 1.9599743404260646e-05, "loss": 0.7169, "step": 9946 }, { "epoch": 1.6238112730092649, "grad_norm": 1.729548692703247, "learning_rate": 1.959965451838884e-05, "loss": 0.5295, "step": 9947 }, { "epoch": 1.6239745316517693, "grad_norm": 1.495545506477356, "learning_rate": 1.959956562285019e-05, "loss": 0.5669, "step": 9948 }, { "epoch": 1.6241377902942737, "grad_norm": 1.667177438735962, "learning_rate": 1.9599476717644777e-05, "loss": 0.7324, "step": 9949 }, { "epoch": 1.6243010489367782, "grad_norm": 1.6816600561141968, "learning_rate": 1.9599387802772693e-05, "loss": 0.6178, "step": 9950 }, { "epoch": 1.6244643075792826, "grad_norm": 1.9057495594024658, "learning_rate": 1.9599298878234024e-05, "loss": 0.9183, "step": 9951 }, { "epoch": 1.6246275662217868, "grad_norm": 2.120357036590576, "learning_rate": 1.9599209944028867e-05, "loss": 0.9988, "step": 9952 }, { "epoch": 1.6247908248642913, "grad_norm": 1.665749430656433, "learning_rate": 1.9599121000157312e-05, "loss": 0.7956, "step": 9953 }, { "epoch": 1.6249540835067955, "grad_norm": 1.842893123626709, "learning_rate": 1.9599032046619437e-05, "loss": 0.9737, "step": 9954 }, { "epoch": 1.6251173421493, "grad_norm": 1.9717347621917725, "learning_rate": 1.9598943083415345e-05, "loss": 0.6459, "step": 9955 }, { "epoch": 1.6252806007918044, "grad_norm": 1.6530388593673706, "learning_rate": 1.959885411054512e-05, "loss": 0.6793, "step": 9956 }, { "epoch": 1.6254438594343088, "grad_norm": 1.346403956413269, "learning_rate": 1.9598765128008847e-05, "loss": 0.5108, "step": 9957 }, { "epoch": 1.6256071180768132, "grad_norm": 1.682734489440918, "learning_rate": 1.9598676135806622e-05, "loss": 0.6677, "step": 9958 }, { "epoch": 1.6257703767193177, "grad_norm": 1.8322635889053345, "learning_rate": 1.9598587133938535e-05, "loss": 0.8418, "step": 9959 }, { "epoch": 1.6259336353618221, "grad_norm": 1.9128732681274414, "learning_rate": 1.9598498122404674e-05, "loss": 0.9465, "step": 9960 }, { "epoch": 1.6260968940043263, "grad_norm": 1.4682658910751343, "learning_rate": 1.9598409101205123e-05, "loss": 0.6055, "step": 9961 }, { "epoch": 1.6262601526468308, "grad_norm": 1.4844465255737305, "learning_rate": 1.9598320070339977e-05, "loss": 0.5966, "step": 9962 }, { "epoch": 1.626423411289335, "grad_norm": 1.6079275608062744, "learning_rate": 1.959823102980933e-05, "loss": 0.6999, "step": 9963 }, { "epoch": 1.6265866699318394, "grad_norm": 1.523175597190857, "learning_rate": 1.9598141979613265e-05, "loss": 0.5925, "step": 9964 }, { "epoch": 1.6267499285743439, "grad_norm": 1.7228418588638306, "learning_rate": 1.959805291975187e-05, "loss": 0.7186, "step": 9965 }, { "epoch": 1.6269131872168483, "grad_norm": 1.6734815835952759, "learning_rate": 1.959796385022524e-05, "loss": 0.5445, "step": 9966 }, { "epoch": 1.6270764458593527, "grad_norm": 1.660291075706482, "learning_rate": 1.9597874771033468e-05, "loss": 0.7424, "step": 9967 }, { "epoch": 1.6272397045018572, "grad_norm": 1.5311706066131592, "learning_rate": 1.9597785682176632e-05, "loss": 0.6522, "step": 9968 }, { "epoch": 1.6274029631443616, "grad_norm": 1.8155827522277832, "learning_rate": 1.959769658365483e-05, "loss": 0.7351, "step": 9969 }, { "epoch": 1.6275662217868658, "grad_norm": 1.6888411045074463, "learning_rate": 1.959760747546815e-05, "loss": 0.6635, "step": 9970 }, { "epoch": 1.6277294804293703, "grad_norm": 1.5978652238845825, "learning_rate": 1.9597518357616686e-05, "loss": 0.6719, "step": 9971 }, { "epoch": 1.6278927390718745, "grad_norm": 1.7928999662399292, "learning_rate": 1.959742923010052e-05, "loss": 0.7563, "step": 9972 }, { "epoch": 1.628055997714379, "grad_norm": 1.623977541923523, "learning_rate": 1.9597340092919747e-05, "loss": 0.69, "step": 9973 }, { "epoch": 1.6282192563568834, "grad_norm": 1.878576397895813, "learning_rate": 1.9597250946074453e-05, "loss": 1.0453, "step": 9974 }, { "epoch": 1.6283825149993878, "grad_norm": 1.8737051486968994, "learning_rate": 1.9597161789564732e-05, "loss": 0.7073, "step": 9975 }, { "epoch": 1.6285457736418922, "grad_norm": 1.7054731845855713, "learning_rate": 1.9597072623390668e-05, "loss": 0.7787, "step": 9976 }, { "epoch": 1.6287090322843967, "grad_norm": 1.9507187604904175, "learning_rate": 1.9596983447552357e-05, "loss": 0.8347, "step": 9977 }, { "epoch": 1.628872290926901, "grad_norm": 1.6655948162078857, "learning_rate": 1.9596894262049885e-05, "loss": 0.6526, "step": 9978 }, { "epoch": 1.6290355495694053, "grad_norm": 1.2738479375839233, "learning_rate": 1.9596805066883347e-05, "loss": 0.4985, "step": 9979 }, { "epoch": 1.6291988082119098, "grad_norm": 1.6888433694839478, "learning_rate": 1.9596715862052823e-05, "loss": 0.6368, "step": 9980 }, { "epoch": 1.629362066854414, "grad_norm": 1.5171151161193848, "learning_rate": 1.9596626647558412e-05, "loss": 0.6109, "step": 9981 }, { "epoch": 1.6295253254969184, "grad_norm": 1.7136192321777344, "learning_rate": 1.9596537423400202e-05, "loss": 0.7133, "step": 9982 }, { "epoch": 1.6296885841394229, "grad_norm": 1.7613364458084106, "learning_rate": 1.9596448189578278e-05, "loss": 0.8167, "step": 9983 }, { "epoch": 1.6298518427819273, "grad_norm": 1.7794079780578613, "learning_rate": 1.9596358946092735e-05, "loss": 0.6288, "step": 9984 }, { "epoch": 1.6300151014244317, "grad_norm": 1.5747590065002441, "learning_rate": 1.959626969294366e-05, "loss": 0.6126, "step": 9985 }, { "epoch": 1.6301783600669362, "grad_norm": 1.7081397771835327, "learning_rate": 1.9596180430131143e-05, "loss": 0.6379, "step": 9986 }, { "epoch": 1.6303416187094404, "grad_norm": 1.5682002305984497, "learning_rate": 1.959609115765528e-05, "loss": 0.6423, "step": 9987 }, { "epoch": 1.6305048773519448, "grad_norm": 1.8774763345718384, "learning_rate": 1.959600187551615e-05, "loss": 0.8596, "step": 9988 }, { "epoch": 1.630668135994449, "grad_norm": 1.760628342628479, "learning_rate": 1.959591258371385e-05, "loss": 0.6936, "step": 9989 }, { "epoch": 1.6308313946369535, "grad_norm": 1.5259106159210205, "learning_rate": 1.959582328224847e-05, "loss": 0.5439, "step": 9990 }, { "epoch": 1.630994653279458, "grad_norm": 1.7732175588607788, "learning_rate": 1.95957339711201e-05, "loss": 0.6592, "step": 9991 }, { "epoch": 1.6311579119219624, "grad_norm": 1.2790030241012573, "learning_rate": 1.9595644650328823e-05, "loss": 0.4824, "step": 9992 }, { "epoch": 1.6313211705644668, "grad_norm": 1.744803786277771, "learning_rate": 1.9595555319874738e-05, "loss": 0.6015, "step": 9993 }, { "epoch": 1.6314844292069712, "grad_norm": 1.7231745719909668, "learning_rate": 1.959546597975793e-05, "loss": 0.6848, "step": 9994 }, { "epoch": 1.6316476878494757, "grad_norm": 1.746822714805603, "learning_rate": 1.9595376629978494e-05, "loss": 0.7217, "step": 9995 }, { "epoch": 1.6318109464919799, "grad_norm": 1.5989131927490234, "learning_rate": 1.9595287270536512e-05, "loss": 0.6715, "step": 9996 }, { "epoch": 1.6319742051344843, "grad_norm": 1.746046781539917, "learning_rate": 1.959519790143208e-05, "loss": 0.6402, "step": 9997 }, { "epoch": 1.6321374637769885, "grad_norm": 1.799950361251831, "learning_rate": 1.959510852266529e-05, "loss": 0.6902, "step": 9998 }, { "epoch": 1.632300722419493, "grad_norm": 1.9233204126358032, "learning_rate": 1.9595019134236223e-05, "loss": 0.7194, "step": 9999 }, { "epoch": 1.6324639810619974, "grad_norm": 1.7570171356201172, "learning_rate": 1.9594929736144978e-05, "loss": 0.5658, "step": 10000 }, { "epoch": 1.6326272397045019, "grad_norm": 1.7283004522323608, "learning_rate": 1.9594840328391638e-05, "loss": 0.7391, "step": 10001 }, { "epoch": 1.6327904983470063, "grad_norm": 1.602363109588623, "learning_rate": 1.9594750910976295e-05, "loss": 0.6407, "step": 10002 }, { "epoch": 1.6329537569895107, "grad_norm": 1.53365957736969, "learning_rate": 1.9594661483899044e-05, "loss": 0.6107, "step": 10003 }, { "epoch": 1.6331170156320152, "grad_norm": 1.4159650802612305, "learning_rate": 1.959457204715997e-05, "loss": 0.5889, "step": 10004 }, { "epoch": 1.6332802742745194, "grad_norm": 1.5799587965011597, "learning_rate": 1.9594482600759166e-05, "loss": 0.537, "step": 10005 }, { "epoch": 1.6334435329170238, "grad_norm": 1.404411792755127, "learning_rate": 1.9594393144696718e-05, "loss": 0.4909, "step": 10006 }, { "epoch": 1.633606791559528, "grad_norm": 1.931803822517395, "learning_rate": 1.959430367897272e-05, "loss": 0.7136, "step": 10007 }, { "epoch": 1.6337700502020325, "grad_norm": 1.7706828117370605, "learning_rate": 1.959421420358726e-05, "loss": 0.6407, "step": 10008 }, { "epoch": 1.633933308844537, "grad_norm": 2.162289619445801, "learning_rate": 1.959412471854043e-05, "loss": 0.759, "step": 10009 }, { "epoch": 1.6340965674870414, "grad_norm": 1.8142954111099243, "learning_rate": 1.959403522383232e-05, "loss": 0.7213, "step": 10010 }, { "epoch": 1.6342598261295458, "grad_norm": 1.6594425439834595, "learning_rate": 1.9593945719463018e-05, "loss": 0.7162, "step": 10011 }, { "epoch": 1.6344230847720502, "grad_norm": 1.7348077297210693, "learning_rate": 1.9593856205432614e-05, "loss": 0.6746, "step": 10012 }, { "epoch": 1.6345863434145547, "grad_norm": 1.941354513168335, "learning_rate": 1.95937666817412e-05, "loss": 0.8183, "step": 10013 }, { "epoch": 1.6347496020570589, "grad_norm": 1.7943456172943115, "learning_rate": 1.959367714838886e-05, "loss": 0.7826, "step": 10014 }, { "epoch": 1.6349128606995633, "grad_norm": 1.5109128952026367, "learning_rate": 1.95935876053757e-05, "loss": 0.4138, "step": 10015 }, { "epoch": 1.6350761193420675, "grad_norm": 1.621902585029602, "learning_rate": 1.9593498052701796e-05, "loss": 0.7606, "step": 10016 }, { "epoch": 1.635239377984572, "grad_norm": 1.5043554306030273, "learning_rate": 1.9593408490367237e-05, "loss": 0.6461, "step": 10017 }, { "epoch": 1.6354026366270764, "grad_norm": 2.1163289546966553, "learning_rate": 1.9593318918372126e-05, "loss": 0.616, "step": 10018 }, { "epoch": 1.6355658952695808, "grad_norm": 1.6585100889205933, "learning_rate": 1.9593229336716542e-05, "loss": 0.5696, "step": 10019 }, { "epoch": 1.6357291539120853, "grad_norm": 1.8742451667785645, "learning_rate": 1.9593139745400575e-05, "loss": 0.787, "step": 10020 }, { "epoch": 1.6358924125545897, "grad_norm": 1.6877708435058594, "learning_rate": 1.9593050144424322e-05, "loss": 0.7967, "step": 10021 }, { "epoch": 1.636055671197094, "grad_norm": 1.4806536436080933, "learning_rate": 1.9592960533787872e-05, "loss": 0.597, "step": 10022 }, { "epoch": 1.6362189298395984, "grad_norm": 1.715296745300293, "learning_rate": 1.959287091349131e-05, "loss": 0.6379, "step": 10023 }, { "epoch": 1.6363821884821028, "grad_norm": 1.9717532396316528, "learning_rate": 1.9592781283534733e-05, "loss": 0.7833, "step": 10024 }, { "epoch": 1.636545447124607, "grad_norm": 1.6474546194076538, "learning_rate": 1.9592691643918226e-05, "loss": 0.6992, "step": 10025 }, { "epoch": 1.6367087057671115, "grad_norm": 1.6564373970031738, "learning_rate": 1.959260199464188e-05, "loss": 0.6613, "step": 10026 }, { "epoch": 1.636871964409616, "grad_norm": 1.668402075767517, "learning_rate": 1.9592512335705786e-05, "loss": 0.6779, "step": 10027 }, { "epoch": 1.6370352230521203, "grad_norm": 1.4628677368164062, "learning_rate": 1.959242266711004e-05, "loss": 0.6286, "step": 10028 }, { "epoch": 1.6371984816946248, "grad_norm": 1.64640474319458, "learning_rate": 1.959233298885472e-05, "loss": 0.653, "step": 10029 }, { "epoch": 1.6373617403371292, "grad_norm": 1.6356184482574463, "learning_rate": 1.9592243300939926e-05, "loss": 0.7245, "step": 10030 }, { "epoch": 1.6375249989796334, "grad_norm": 1.652172565460205, "learning_rate": 1.9592153603365746e-05, "loss": 0.5934, "step": 10031 }, { "epoch": 1.6376882576221379, "grad_norm": 1.905864953994751, "learning_rate": 1.9592063896132266e-05, "loss": 0.5969, "step": 10032 }, { "epoch": 1.637851516264642, "grad_norm": 1.9108155965805054, "learning_rate": 1.9591974179239585e-05, "loss": 0.8251, "step": 10033 }, { "epoch": 1.6380147749071465, "grad_norm": 1.6361916065216064, "learning_rate": 1.9591884452687788e-05, "loss": 0.6828, "step": 10034 }, { "epoch": 1.638178033549651, "grad_norm": 1.8573397397994995, "learning_rate": 1.9591794716476965e-05, "loss": 0.7239, "step": 10035 }, { "epoch": 1.6383412921921554, "grad_norm": 1.657787799835205, "learning_rate": 1.9591704970607206e-05, "loss": 0.6302, "step": 10036 }, { "epoch": 1.6385045508346598, "grad_norm": 1.922446370124817, "learning_rate": 1.9591615215078604e-05, "loss": 0.6878, "step": 10037 }, { "epoch": 1.6386678094771643, "grad_norm": 1.6157526969909668, "learning_rate": 1.959152544989125e-05, "loss": 0.6133, "step": 10038 }, { "epoch": 1.6388310681196687, "grad_norm": 1.8786157369613647, "learning_rate": 1.9591435675045227e-05, "loss": 0.6246, "step": 10039 }, { "epoch": 1.638994326762173, "grad_norm": 1.5269620418548584, "learning_rate": 1.9591345890540635e-05, "loss": 0.5995, "step": 10040 }, { "epoch": 1.6391575854046774, "grad_norm": 1.5697052478790283, "learning_rate": 1.959125609637756e-05, "loss": 0.6434, "step": 10041 }, { "epoch": 1.6393208440471816, "grad_norm": 1.6759674549102783, "learning_rate": 1.9591166292556093e-05, "loss": 0.7986, "step": 10042 }, { "epoch": 1.639484102689686, "grad_norm": 1.3773276805877686, "learning_rate": 1.959107647907632e-05, "loss": 0.5152, "step": 10043 }, { "epoch": 1.6396473613321905, "grad_norm": 1.9850319623947144, "learning_rate": 1.959098665593834e-05, "loss": 0.788, "step": 10044 }, { "epoch": 1.639810619974695, "grad_norm": 1.3423821926116943, "learning_rate": 1.959089682314224e-05, "loss": 0.5296, "step": 10045 }, { "epoch": 1.6399738786171993, "grad_norm": 2.013733148574829, "learning_rate": 1.9590806980688108e-05, "loss": 0.6571, "step": 10046 }, { "epoch": 1.6401371372597038, "grad_norm": 1.755739688873291, "learning_rate": 1.9590717128576032e-05, "loss": 0.7032, "step": 10047 }, { "epoch": 1.6403003959022082, "grad_norm": 1.750331997871399, "learning_rate": 1.959062726680611e-05, "loss": 0.6705, "step": 10048 }, { "epoch": 1.6404636545447124, "grad_norm": 1.7550911903381348, "learning_rate": 1.9590537395378428e-05, "loss": 0.709, "step": 10049 }, { "epoch": 1.6406269131872169, "grad_norm": 2.080109119415283, "learning_rate": 1.959044751429308e-05, "loss": 0.83, "step": 10050 }, { "epoch": 1.640790171829721, "grad_norm": 1.7505443096160889, "learning_rate": 1.959035762355015e-05, "loss": 0.688, "step": 10051 }, { "epoch": 1.6409534304722255, "grad_norm": 1.67646324634552, "learning_rate": 1.959026772314973e-05, "loss": 0.6329, "step": 10052 }, { "epoch": 1.64111668911473, "grad_norm": 1.65740168094635, "learning_rate": 1.9590177813091918e-05, "loss": 0.6158, "step": 10053 }, { "epoch": 1.6412799477572344, "grad_norm": 1.6394296884536743, "learning_rate": 1.95900878933768e-05, "loss": 0.5268, "step": 10054 }, { "epoch": 1.6414432063997388, "grad_norm": 1.5952215194702148, "learning_rate": 1.9589997964004466e-05, "loss": 0.6528, "step": 10055 }, { "epoch": 1.6416064650422433, "grad_norm": 1.866600513458252, "learning_rate": 1.9589908024975002e-05, "loss": 0.7535, "step": 10056 }, { "epoch": 1.6417697236847477, "grad_norm": 2.013523578643799, "learning_rate": 1.9589818076288506e-05, "loss": 0.6565, "step": 10057 }, { "epoch": 1.641932982327252, "grad_norm": 1.9465855360031128, "learning_rate": 1.958972811794507e-05, "loss": 0.7267, "step": 10058 }, { "epoch": 1.6420962409697564, "grad_norm": 2.1008381843566895, "learning_rate": 1.9589638149944774e-05, "loss": 0.6813, "step": 10059 }, { "epoch": 1.6422594996122606, "grad_norm": 1.7441473007202148, "learning_rate": 1.958954817228772e-05, "loss": 0.6865, "step": 10060 }, { "epoch": 1.642422758254765, "grad_norm": 1.651950478553772, "learning_rate": 1.958945818497399e-05, "loss": 0.6578, "step": 10061 }, { "epoch": 1.6425860168972695, "grad_norm": 1.8425498008728027, "learning_rate": 1.9589368188003677e-05, "loss": 0.7691, "step": 10062 }, { "epoch": 1.642749275539774, "grad_norm": 1.5721261501312256, "learning_rate": 1.9589278181376875e-05, "loss": 0.5981, "step": 10063 }, { "epoch": 1.6429125341822783, "grad_norm": 1.6836590766906738, "learning_rate": 1.958918816509367e-05, "loss": 0.7757, "step": 10064 }, { "epoch": 1.6430757928247828, "grad_norm": 2.3528804779052734, "learning_rate": 1.958909813915416e-05, "loss": 0.7057, "step": 10065 }, { "epoch": 1.6432390514672872, "grad_norm": 1.9285943508148193, "learning_rate": 1.9589008103558428e-05, "loss": 0.7917, "step": 10066 }, { "epoch": 1.6434023101097914, "grad_norm": 1.6935430765151978, "learning_rate": 1.9588918058306564e-05, "loss": 0.653, "step": 10067 }, { "epoch": 1.6435655687522959, "grad_norm": 1.6889852285385132, "learning_rate": 1.9588828003398667e-05, "loss": 0.7663, "step": 10068 }, { "epoch": 1.6437288273948, "grad_norm": 1.6337755918502808, "learning_rate": 1.958873793883482e-05, "loss": 0.6401, "step": 10069 }, { "epoch": 1.6438920860373045, "grad_norm": 1.941815733909607, "learning_rate": 1.9588647864615118e-05, "loss": 0.7321, "step": 10070 }, { "epoch": 1.644055344679809, "grad_norm": 1.7820794582366943, "learning_rate": 1.958855778073965e-05, "loss": 0.7407, "step": 10071 }, { "epoch": 1.6442186033223134, "grad_norm": 2.0601255893707275, "learning_rate": 1.9588467687208506e-05, "loss": 0.7374, "step": 10072 }, { "epoch": 1.6443818619648178, "grad_norm": 1.642119288444519, "learning_rate": 1.9588377584021778e-05, "loss": 0.6998, "step": 10073 }, { "epoch": 1.6445451206073223, "grad_norm": 1.5206775665283203, "learning_rate": 1.9588287471179558e-05, "loss": 0.5039, "step": 10074 }, { "epoch": 1.6447083792498265, "grad_norm": 1.826811671257019, "learning_rate": 1.958819734868193e-05, "loss": 0.7671, "step": 10075 }, { "epoch": 1.644871637892331, "grad_norm": 1.8885239362716675, "learning_rate": 1.9588107216528996e-05, "loss": 0.743, "step": 10076 }, { "epoch": 1.6450348965348354, "grad_norm": 1.4056293964385986, "learning_rate": 1.9588017074720838e-05, "loss": 0.5183, "step": 10077 }, { "epoch": 1.6451981551773396, "grad_norm": 1.5689443349838257, "learning_rate": 1.958792692325755e-05, "loss": 0.6731, "step": 10078 }, { "epoch": 1.645361413819844, "grad_norm": 1.500549077987671, "learning_rate": 1.958783676213922e-05, "loss": 0.6045, "step": 10079 }, { "epoch": 1.6455246724623485, "grad_norm": 1.8358031511306763, "learning_rate": 1.958774659136594e-05, "loss": 0.7516, "step": 10080 }, { "epoch": 1.645687931104853, "grad_norm": 1.5842143297195435, "learning_rate": 1.9587656410937806e-05, "loss": 0.6669, "step": 10081 }, { "epoch": 1.6458511897473573, "grad_norm": 1.6282466650009155, "learning_rate": 1.9587566220854902e-05, "loss": 0.7071, "step": 10082 }, { "epoch": 1.6460144483898618, "grad_norm": 1.4204384088516235, "learning_rate": 1.9587476021117324e-05, "loss": 0.5283, "step": 10083 }, { "epoch": 1.646177707032366, "grad_norm": 1.7629834413528442, "learning_rate": 1.9587385811725155e-05, "loss": 0.7256, "step": 10084 }, { "epoch": 1.6463409656748704, "grad_norm": 1.685285210609436, "learning_rate": 1.9587295592678495e-05, "loss": 0.6713, "step": 10085 }, { "epoch": 1.6465042243173746, "grad_norm": 1.574859380722046, "learning_rate": 1.9587205363977428e-05, "loss": 0.5617, "step": 10086 }, { "epoch": 1.646667482959879, "grad_norm": 1.6142386198043823, "learning_rate": 1.9587115125622052e-05, "loss": 0.6672, "step": 10087 }, { "epoch": 1.6468307416023835, "grad_norm": 1.6259280443191528, "learning_rate": 1.958702487761245e-05, "loss": 0.6546, "step": 10088 }, { "epoch": 1.646994000244888, "grad_norm": 2.188521385192871, "learning_rate": 1.958693461994872e-05, "loss": 0.7794, "step": 10089 }, { "epoch": 1.6471572588873924, "grad_norm": 2.423936367034912, "learning_rate": 1.9586844352630943e-05, "loss": 0.8402, "step": 10090 }, { "epoch": 1.6473205175298968, "grad_norm": 1.6714799404144287, "learning_rate": 1.9586754075659223e-05, "loss": 0.5497, "step": 10091 }, { "epoch": 1.6474837761724013, "grad_norm": 1.7275614738464355, "learning_rate": 1.9586663789033642e-05, "loss": 0.7274, "step": 10092 }, { "epoch": 1.6476470348149055, "grad_norm": 1.4821561574935913, "learning_rate": 1.958657349275429e-05, "loss": 0.5739, "step": 10093 }, { "epoch": 1.64781029345741, "grad_norm": 1.535975694656372, "learning_rate": 1.9586483186821265e-05, "loss": 0.6603, "step": 10094 }, { "epoch": 1.6479735520999141, "grad_norm": 1.6246373653411865, "learning_rate": 1.9586392871234655e-05, "loss": 0.6254, "step": 10095 }, { "epoch": 1.6481368107424186, "grad_norm": 1.7950057983398438, "learning_rate": 1.9586302545994546e-05, "loss": 0.7346, "step": 10096 }, { "epoch": 1.648300069384923, "grad_norm": 1.9512600898742676, "learning_rate": 1.9586212211101036e-05, "loss": 0.6961, "step": 10097 }, { "epoch": 1.6484633280274275, "grad_norm": 1.7083897590637207, "learning_rate": 1.958612186655421e-05, "loss": 0.6507, "step": 10098 }, { "epoch": 1.648626586669932, "grad_norm": 1.8619893789291382, "learning_rate": 1.9586031512354163e-05, "loss": 0.7431, "step": 10099 }, { "epoch": 1.6487898453124363, "grad_norm": 2.0211124420166016, "learning_rate": 1.9585941148500987e-05, "loss": 0.9511, "step": 10100 }, { "epoch": 1.6489531039549408, "grad_norm": 1.8720773458480835, "learning_rate": 1.958585077499477e-05, "loss": 0.7469, "step": 10101 }, { "epoch": 1.649116362597445, "grad_norm": 1.8143525123596191, "learning_rate": 1.95857603918356e-05, "loss": 0.7227, "step": 10102 }, { "epoch": 1.6492796212399494, "grad_norm": 1.8428373336791992, "learning_rate": 1.9585669999023573e-05, "loss": 0.6781, "step": 10103 }, { "epoch": 1.6494428798824536, "grad_norm": 1.5892603397369385, "learning_rate": 1.9585579596558783e-05, "loss": 0.4987, "step": 10104 }, { "epoch": 1.649606138524958, "grad_norm": 1.902796745300293, "learning_rate": 1.9585489184441313e-05, "loss": 0.7587, "step": 10105 }, { "epoch": 1.6497693971674625, "grad_norm": 2.0730555057525635, "learning_rate": 1.958539876267126e-05, "loss": 0.8772, "step": 10106 }, { "epoch": 1.649932655809967, "grad_norm": 1.7063666582107544, "learning_rate": 1.9585308331248713e-05, "loss": 0.6221, "step": 10107 }, { "epoch": 1.6500959144524714, "grad_norm": 1.4406131505966187, "learning_rate": 1.958521789017376e-05, "loss": 0.5969, "step": 10108 }, { "epoch": 1.6502591730949758, "grad_norm": 1.9638994932174683, "learning_rate": 1.9585127439446497e-05, "loss": 0.7371, "step": 10109 }, { "epoch": 1.6504224317374803, "grad_norm": 1.8706518411636353, "learning_rate": 1.9585036979067015e-05, "loss": 0.8596, "step": 10110 }, { "epoch": 1.6505856903799845, "grad_norm": 1.835471272468567, "learning_rate": 1.9584946509035402e-05, "loss": 0.597, "step": 10111 }, { "epoch": 1.650748949022489, "grad_norm": 1.612286925315857, "learning_rate": 1.9584856029351747e-05, "loss": 0.6687, "step": 10112 }, { "epoch": 1.6509122076649931, "grad_norm": 1.4977374076843262, "learning_rate": 1.9584765540016152e-05, "loss": 0.6834, "step": 10113 }, { "epoch": 1.6510754663074976, "grad_norm": 1.689755916595459, "learning_rate": 1.9584675041028694e-05, "loss": 0.7033, "step": 10114 }, { "epoch": 1.651238724950002, "grad_norm": 1.6142290830612183, "learning_rate": 1.9584584532389472e-05, "loss": 0.6833, "step": 10115 }, { "epoch": 1.6514019835925065, "grad_norm": 1.7026444673538208, "learning_rate": 1.9584494014098578e-05, "loss": 0.6793, "step": 10116 }, { "epoch": 1.6515652422350109, "grad_norm": 1.5553830862045288, "learning_rate": 1.95844034861561e-05, "loss": 0.6086, "step": 10117 }, { "epoch": 1.6517285008775153, "grad_norm": 1.4408942461013794, "learning_rate": 1.958431294856213e-05, "loss": 0.6797, "step": 10118 }, { "epoch": 1.6518917595200195, "grad_norm": 1.7629644870758057, "learning_rate": 1.958422240131676e-05, "loss": 0.7397, "step": 10119 }, { "epoch": 1.652055018162524, "grad_norm": 1.6951879262924194, "learning_rate": 1.9584131844420084e-05, "loss": 0.6441, "step": 10120 }, { "epoch": 1.6522182768050284, "grad_norm": 1.603844165802002, "learning_rate": 1.9584041277872184e-05, "loss": 0.6324, "step": 10121 }, { "epoch": 1.6523815354475326, "grad_norm": 1.7437760829925537, "learning_rate": 1.958395070167316e-05, "loss": 0.6812, "step": 10122 }, { "epoch": 1.652544794090037, "grad_norm": 1.8784271478652954, "learning_rate": 1.95838601158231e-05, "loss": 0.8148, "step": 10123 }, { "epoch": 1.6527080527325415, "grad_norm": 1.9167051315307617, "learning_rate": 1.9583769520322093e-05, "loss": 0.7725, "step": 10124 }, { "epoch": 1.652871311375046, "grad_norm": 1.5413975715637207, "learning_rate": 1.9583678915170236e-05, "loss": 0.6852, "step": 10125 }, { "epoch": 1.6530345700175504, "grad_norm": 1.6568725109100342, "learning_rate": 1.9583588300367614e-05, "loss": 0.8522, "step": 10126 }, { "epoch": 1.6531978286600548, "grad_norm": 1.7261161804199219, "learning_rate": 1.958349767591432e-05, "loss": 0.6722, "step": 10127 }, { "epoch": 1.653361087302559, "grad_norm": 1.7336678504943848, "learning_rate": 1.958340704181045e-05, "loss": 0.7354, "step": 10128 }, { "epoch": 1.6535243459450635, "grad_norm": 1.8548883199691772, "learning_rate": 1.958331639805609e-05, "loss": 0.6778, "step": 10129 }, { "epoch": 1.6536876045875677, "grad_norm": 1.915725827217102, "learning_rate": 1.9583225744651334e-05, "loss": 0.8282, "step": 10130 }, { "epoch": 1.6538508632300721, "grad_norm": 1.5114574432373047, "learning_rate": 1.958313508159627e-05, "loss": 0.5429, "step": 10131 }, { "epoch": 1.6540141218725766, "grad_norm": 1.8226715326309204, "learning_rate": 1.9583044408890995e-05, "loss": 0.7102, "step": 10132 }, { "epoch": 1.654177380515081, "grad_norm": 1.694242238998413, "learning_rate": 1.9582953726535595e-05, "loss": 0.7684, "step": 10133 }, { "epoch": 1.6543406391575854, "grad_norm": 1.9058992862701416, "learning_rate": 1.9582863034530163e-05, "loss": 0.7588, "step": 10134 }, { "epoch": 1.6545038978000899, "grad_norm": 1.7672829627990723, "learning_rate": 1.9582772332874792e-05, "loss": 0.849, "step": 10135 }, { "epoch": 1.6546671564425943, "grad_norm": 1.5302428007125854, "learning_rate": 1.9582681621569568e-05, "loss": 0.5718, "step": 10136 }, { "epoch": 1.6548304150850985, "grad_norm": 1.6432819366455078, "learning_rate": 1.958259090061459e-05, "loss": 0.6586, "step": 10137 }, { "epoch": 1.654993673727603, "grad_norm": 2.004427433013916, "learning_rate": 1.9582500170009942e-05, "loss": 0.6784, "step": 10138 }, { "epoch": 1.6551569323701072, "grad_norm": 1.5160046815872192, "learning_rate": 1.958240942975572e-05, "loss": 0.5158, "step": 10139 }, { "epoch": 1.6553201910126116, "grad_norm": 1.744916558265686, "learning_rate": 1.9582318679852018e-05, "loss": 0.6407, "step": 10140 }, { "epoch": 1.655483449655116, "grad_norm": 1.7982934713363647, "learning_rate": 1.9582227920298916e-05, "loss": 0.8292, "step": 10141 }, { "epoch": 1.6556467082976205, "grad_norm": 1.6873327493667603, "learning_rate": 1.958213715109652e-05, "loss": 0.6604, "step": 10142 }, { "epoch": 1.655809966940125, "grad_norm": 1.8037564754486084, "learning_rate": 1.9582046372244914e-05, "loss": 0.7041, "step": 10143 }, { "epoch": 1.6559732255826294, "grad_norm": 1.7531410455703735, "learning_rate": 1.9581955583744187e-05, "loss": 0.6746, "step": 10144 }, { "epoch": 1.6561364842251338, "grad_norm": 1.8789091110229492, "learning_rate": 1.9581864785594433e-05, "loss": 0.6639, "step": 10145 }, { "epoch": 1.656299742867638, "grad_norm": 1.7338184118270874, "learning_rate": 1.9581773977795744e-05, "loss": 0.742, "step": 10146 }, { "epoch": 1.6564630015101425, "grad_norm": 1.6008660793304443, "learning_rate": 1.9581683160348212e-05, "loss": 0.6257, "step": 10147 }, { "epoch": 1.6566262601526467, "grad_norm": 1.777541160583496, "learning_rate": 1.9581592333251927e-05, "loss": 0.6574, "step": 10148 }, { "epoch": 1.6567895187951511, "grad_norm": 1.7196379899978638, "learning_rate": 1.9581501496506985e-05, "loss": 0.58, "step": 10149 }, { "epoch": 1.6569527774376556, "grad_norm": 1.6762124300003052, "learning_rate": 1.958141065011347e-05, "loss": 0.731, "step": 10150 }, { "epoch": 1.65711603608016, "grad_norm": 1.7658483982086182, "learning_rate": 1.9581319794071477e-05, "loss": 0.6609, "step": 10151 }, { "epoch": 1.6572792947226644, "grad_norm": 1.8976913690567017, "learning_rate": 1.95812289283811e-05, "loss": 0.6555, "step": 10152 }, { "epoch": 1.6574425533651689, "grad_norm": 1.6834828853607178, "learning_rate": 1.9581138053042425e-05, "loss": 0.6291, "step": 10153 }, { "epoch": 1.6576058120076733, "grad_norm": 2.1503491401672363, "learning_rate": 1.9581047168055548e-05, "loss": 0.7018, "step": 10154 }, { "epoch": 1.6577690706501775, "grad_norm": 1.9483917951583862, "learning_rate": 1.9580956273420556e-05, "loss": 0.6285, "step": 10155 }, { "epoch": 1.657932329292682, "grad_norm": 1.728080153465271, "learning_rate": 1.9580865369137546e-05, "loss": 0.703, "step": 10156 }, { "epoch": 1.6580955879351862, "grad_norm": 1.6704884767532349, "learning_rate": 1.9580774455206608e-05, "loss": 0.6044, "step": 10157 }, { "epoch": 1.6582588465776906, "grad_norm": 1.386238694190979, "learning_rate": 1.958068353162783e-05, "loss": 0.6133, "step": 10158 }, { "epoch": 1.658422105220195, "grad_norm": 1.697726845741272, "learning_rate": 1.9580592598401308e-05, "loss": 0.6337, "step": 10159 }, { "epoch": 1.6585853638626995, "grad_norm": 1.5584254264831543, "learning_rate": 1.9580501655527132e-05, "loss": 0.6383, "step": 10160 }, { "epoch": 1.658748622505204, "grad_norm": 1.5844731330871582, "learning_rate": 1.9580410703005393e-05, "loss": 0.6442, "step": 10161 }, { "epoch": 1.6589118811477084, "grad_norm": 1.3618371486663818, "learning_rate": 1.9580319740836183e-05, "loss": 0.5364, "step": 10162 }, { "epoch": 1.6590751397902126, "grad_norm": 1.4608584642410278, "learning_rate": 1.9580228769019593e-05, "loss": 0.4875, "step": 10163 }, { "epoch": 1.659238398432717, "grad_norm": 1.603968858718872, "learning_rate": 1.9580137787555717e-05, "loss": 0.5694, "step": 10164 }, { "epoch": 1.6594016570752215, "grad_norm": 1.8860054016113281, "learning_rate": 1.958004679644464e-05, "loss": 0.707, "step": 10165 }, { "epoch": 1.6595649157177257, "grad_norm": 1.8118892908096313, "learning_rate": 1.9579955795686466e-05, "loss": 0.7506, "step": 10166 }, { "epoch": 1.6597281743602301, "grad_norm": 1.7888940572738647, "learning_rate": 1.9579864785281274e-05, "loss": 0.747, "step": 10167 }, { "epoch": 1.6598914330027346, "grad_norm": 2.2828190326690674, "learning_rate": 1.9579773765229163e-05, "loss": 0.9094, "step": 10168 }, { "epoch": 1.660054691645239, "grad_norm": 1.816757082939148, "learning_rate": 1.957968273553022e-05, "loss": 0.7806, "step": 10169 }, { "epoch": 1.6602179502877434, "grad_norm": 2.0269222259521484, "learning_rate": 1.957959169618454e-05, "loss": 0.6969, "step": 10170 }, { "epoch": 1.6603812089302479, "grad_norm": 1.7957701683044434, "learning_rate": 1.9579500647192214e-05, "loss": 0.6597, "step": 10171 }, { "epoch": 1.660544467572752, "grad_norm": 1.608731985092163, "learning_rate": 1.9579409588553334e-05, "loss": 0.5678, "step": 10172 }, { "epoch": 1.6607077262152565, "grad_norm": 1.8682831525802612, "learning_rate": 1.9579318520267992e-05, "loss": 0.6903, "step": 10173 }, { "epoch": 1.6608709848577607, "grad_norm": 2.083582639694214, "learning_rate": 1.9579227442336276e-05, "loss": 0.6915, "step": 10174 }, { "epoch": 1.6610342435002652, "grad_norm": 1.598814606666565, "learning_rate": 1.957913635475828e-05, "loss": 0.6622, "step": 10175 }, { "epoch": 1.6611975021427696, "grad_norm": 2.0166127681732178, "learning_rate": 1.95790452575341e-05, "loss": 0.7541, "step": 10176 }, { "epoch": 1.661360760785274, "grad_norm": 1.907181978225708, "learning_rate": 1.957895415066382e-05, "loss": 0.7164, "step": 10177 }, { "epoch": 1.6615240194277785, "grad_norm": 1.5327273607254028, "learning_rate": 1.957886303414754e-05, "loss": 0.591, "step": 10178 }, { "epoch": 1.661687278070283, "grad_norm": 1.5933223962783813, "learning_rate": 1.9578771907985344e-05, "loss": 0.7314, "step": 10179 }, { "epoch": 1.6618505367127874, "grad_norm": 1.9094136953353882, "learning_rate": 1.9578680772177327e-05, "loss": 0.7447, "step": 10180 }, { "epoch": 1.6620137953552916, "grad_norm": 1.566475749015808, "learning_rate": 1.9578589626723583e-05, "loss": 0.6369, "step": 10181 }, { "epoch": 1.662177053997796, "grad_norm": 1.7418193817138672, "learning_rate": 1.9578498471624205e-05, "loss": 0.6863, "step": 10182 }, { "epoch": 1.6623403126403002, "grad_norm": 1.7187318801879883, "learning_rate": 1.9578407306879276e-05, "loss": 0.5922, "step": 10183 }, { "epoch": 1.6625035712828047, "grad_norm": 1.514847755432129, "learning_rate": 1.957831613248889e-05, "loss": 0.5694, "step": 10184 }, { "epoch": 1.6626668299253091, "grad_norm": 1.9945544004440308, "learning_rate": 1.957822494845315e-05, "loss": 0.7401, "step": 10185 }, { "epoch": 1.6628300885678136, "grad_norm": 1.730692982673645, "learning_rate": 1.957813375477214e-05, "loss": 0.5652, "step": 10186 }, { "epoch": 1.662993347210318, "grad_norm": 1.4212172031402588, "learning_rate": 1.957804255144595e-05, "loss": 0.6398, "step": 10187 }, { "epoch": 1.6631566058528224, "grad_norm": 1.857919454574585, "learning_rate": 1.957795133847467e-05, "loss": 0.7642, "step": 10188 }, { "epoch": 1.6633198644953269, "grad_norm": 1.8148958683013916, "learning_rate": 1.9577860115858398e-05, "loss": 0.6605, "step": 10189 }, { "epoch": 1.663483123137831, "grad_norm": 1.7965545654296875, "learning_rate": 1.9577768883597225e-05, "loss": 0.7725, "step": 10190 }, { "epoch": 1.6636463817803355, "grad_norm": 1.9494998455047607, "learning_rate": 1.9577677641691237e-05, "loss": 0.6359, "step": 10191 }, { "epoch": 1.6638096404228397, "grad_norm": 1.5641560554504395, "learning_rate": 1.9577586390140535e-05, "loss": 0.703, "step": 10192 }, { "epoch": 1.6639728990653442, "grad_norm": 1.679521083831787, "learning_rate": 1.9577495128945204e-05, "loss": 0.7099, "step": 10193 }, { "epoch": 1.6641361577078486, "grad_norm": 1.698891282081604, "learning_rate": 1.9577403858105336e-05, "loss": 0.6898, "step": 10194 }, { "epoch": 1.664299416350353, "grad_norm": 1.6322013139724731, "learning_rate": 1.957731257762103e-05, "loss": 0.7258, "step": 10195 }, { "epoch": 1.6644626749928575, "grad_norm": 1.7092620134353638, "learning_rate": 1.9577221287492368e-05, "loss": 0.7265, "step": 10196 }, { "epoch": 1.664625933635362, "grad_norm": 1.7884491682052612, "learning_rate": 1.957712998771945e-05, "loss": 0.6462, "step": 10197 }, { "epoch": 1.6647891922778664, "grad_norm": 1.5271003246307373, "learning_rate": 1.957703867830236e-05, "loss": 0.6393, "step": 10198 }, { "epoch": 1.6649524509203706, "grad_norm": 1.7022309303283691, "learning_rate": 1.9576947359241202e-05, "loss": 0.695, "step": 10199 }, { "epoch": 1.665115709562875, "grad_norm": 1.8166682720184326, "learning_rate": 1.9576856030536055e-05, "loss": 0.7662, "step": 10200 }, { "epoch": 1.6652789682053792, "grad_norm": 1.8851487636566162, "learning_rate": 1.9576764692187017e-05, "loss": 0.8524, "step": 10201 }, { "epoch": 1.6654422268478837, "grad_norm": 1.9430642127990723, "learning_rate": 1.957667334419418e-05, "loss": 0.7425, "step": 10202 }, { "epoch": 1.6656054854903881, "grad_norm": 2.2020504474639893, "learning_rate": 1.9576581986557634e-05, "loss": 0.8968, "step": 10203 }, { "epoch": 1.6657687441328926, "grad_norm": 1.8822375535964966, "learning_rate": 1.9576490619277474e-05, "loss": 0.778, "step": 10204 }, { "epoch": 1.665932002775397, "grad_norm": 1.7037522792816162, "learning_rate": 1.9576399242353794e-05, "loss": 0.6555, "step": 10205 }, { "epoch": 1.6660952614179014, "grad_norm": 1.496363639831543, "learning_rate": 1.9576307855786677e-05, "loss": 0.5289, "step": 10206 }, { "epoch": 1.6662585200604056, "grad_norm": 1.4819103479385376, "learning_rate": 1.9576216459576222e-05, "loss": 0.6322, "step": 10207 }, { "epoch": 1.66642177870291, "grad_norm": 1.5589786767959595, "learning_rate": 1.9576125053722525e-05, "loss": 0.663, "step": 10208 }, { "epoch": 1.6665850373454145, "grad_norm": 1.579542636871338, "learning_rate": 1.9576033638225665e-05, "loss": 0.568, "step": 10209 }, { "epoch": 1.6667482959879187, "grad_norm": 1.6150691509246826, "learning_rate": 1.9575942213085746e-05, "loss": 0.6987, "step": 10210 }, { "epoch": 1.6669115546304232, "grad_norm": 2.034017562866211, "learning_rate": 1.9575850778302854e-05, "loss": 0.8668, "step": 10211 }, { "epoch": 1.6670748132729276, "grad_norm": 1.6262333393096924, "learning_rate": 1.9575759333877082e-05, "loss": 0.6307, "step": 10212 }, { "epoch": 1.667238071915432, "grad_norm": 1.530486822128296, "learning_rate": 1.9575667879808524e-05, "loss": 0.6651, "step": 10213 }, { "epoch": 1.6674013305579365, "grad_norm": 1.7640174627304077, "learning_rate": 1.9575576416097272e-05, "loss": 0.7037, "step": 10214 }, { "epoch": 1.667564589200441, "grad_norm": 1.4648715257644653, "learning_rate": 1.9575484942743417e-05, "loss": 0.4878, "step": 10215 }, { "epoch": 1.6677278478429451, "grad_norm": 1.3118517398834229, "learning_rate": 1.9575393459747047e-05, "loss": 0.4846, "step": 10216 }, { "epoch": 1.6678911064854496, "grad_norm": 2.0088562965393066, "learning_rate": 1.957530196710826e-05, "loss": 0.7943, "step": 10217 }, { "epoch": 1.6680543651279538, "grad_norm": 2.0152411460876465, "learning_rate": 1.957521046482715e-05, "loss": 0.6612, "step": 10218 }, { "epoch": 1.6682176237704582, "grad_norm": 1.4945473670959473, "learning_rate": 1.9575118952903803e-05, "loss": 0.5284, "step": 10219 }, { "epoch": 1.6683808824129627, "grad_norm": 1.4183968305587769, "learning_rate": 1.9575027431338317e-05, "loss": 0.5313, "step": 10220 }, { "epoch": 1.6685441410554671, "grad_norm": 1.8439114093780518, "learning_rate": 1.9574935900130777e-05, "loss": 0.7539, "step": 10221 }, { "epoch": 1.6687073996979715, "grad_norm": 1.5393112897872925, "learning_rate": 1.957484435928128e-05, "loss": 0.6298, "step": 10222 }, { "epoch": 1.668870658340476, "grad_norm": 1.5791175365447998, "learning_rate": 1.9574752808789918e-05, "loss": 0.6354, "step": 10223 }, { "epoch": 1.6690339169829804, "grad_norm": 1.7705105543136597, "learning_rate": 1.9574661248656782e-05, "loss": 0.7537, "step": 10224 }, { "epoch": 1.6691971756254846, "grad_norm": 1.8599151372909546, "learning_rate": 1.9574569678881965e-05, "loss": 0.7374, "step": 10225 }, { "epoch": 1.669360434267989, "grad_norm": 1.5086878538131714, "learning_rate": 1.9574478099465558e-05, "loss": 0.6066, "step": 10226 }, { "epoch": 1.6695236929104933, "grad_norm": 1.995839238166809, "learning_rate": 1.9574386510407656e-05, "loss": 0.7075, "step": 10227 }, { "epoch": 1.6696869515529977, "grad_norm": 1.6829890012741089, "learning_rate": 1.9574294911708348e-05, "loss": 0.7006, "step": 10228 }, { "epoch": 1.6698502101955022, "grad_norm": 1.7326596975326538, "learning_rate": 1.9574203303367728e-05, "loss": 0.7382, "step": 10229 }, { "epoch": 1.6700134688380066, "grad_norm": 1.5273820161819458, "learning_rate": 1.9574111685385887e-05, "loss": 0.5658, "step": 10230 }, { "epoch": 1.670176727480511, "grad_norm": 1.5078397989273071, "learning_rate": 1.9574020057762918e-05, "loss": 0.6716, "step": 10231 }, { "epoch": 1.6703399861230155, "grad_norm": 1.477341651916504, "learning_rate": 1.9573928420498914e-05, "loss": 0.531, "step": 10232 }, { "epoch": 1.67050324476552, "grad_norm": 1.7649548053741455, "learning_rate": 1.9573836773593968e-05, "loss": 0.7227, "step": 10233 }, { "epoch": 1.6706665034080241, "grad_norm": 1.749860405921936, "learning_rate": 1.957374511704817e-05, "loss": 0.5455, "step": 10234 }, { "epoch": 1.6708297620505286, "grad_norm": 1.804117202758789, "learning_rate": 1.957365345086161e-05, "loss": 0.6986, "step": 10235 }, { "epoch": 1.6709930206930328, "grad_norm": 1.6919574737548828, "learning_rate": 1.9573561775034386e-05, "loss": 0.7731, "step": 10236 }, { "epoch": 1.6711562793355372, "grad_norm": 1.8538540601730347, "learning_rate": 1.957347008956659e-05, "loss": 0.6698, "step": 10237 }, { "epoch": 1.6713195379780417, "grad_norm": 1.742448329925537, "learning_rate": 1.957337839445831e-05, "loss": 0.6145, "step": 10238 }, { "epoch": 1.671482796620546, "grad_norm": 1.5862600803375244, "learning_rate": 1.957328668970964e-05, "loss": 0.5309, "step": 10239 }, { "epoch": 1.6716460552630505, "grad_norm": 1.4562309980392456, "learning_rate": 1.9573194975320672e-05, "loss": 0.5538, "step": 10240 }, { "epoch": 1.671809313905555, "grad_norm": 1.452170968055725, "learning_rate": 1.9573103251291503e-05, "loss": 0.6983, "step": 10241 }, { "epoch": 1.6719725725480594, "grad_norm": 1.381913185119629, "learning_rate": 1.9573011517622217e-05, "loss": 0.5774, "step": 10242 }, { "epoch": 1.6721358311905636, "grad_norm": 1.5298691987991333, "learning_rate": 1.9572919774312914e-05, "loss": 0.5737, "step": 10243 }, { "epoch": 1.672299089833068, "grad_norm": 1.8400554656982422, "learning_rate": 1.9572828021363682e-05, "loss": 0.6692, "step": 10244 }, { "epoch": 1.6724623484755723, "grad_norm": 1.9534602165222168, "learning_rate": 1.9572736258774618e-05, "loss": 0.7591, "step": 10245 }, { "epoch": 1.6726256071180767, "grad_norm": 1.6134785413742065, "learning_rate": 1.9572644486545808e-05, "loss": 0.7493, "step": 10246 }, { "epoch": 1.6727888657605812, "grad_norm": 1.6433796882629395, "learning_rate": 1.957255270467735e-05, "loss": 0.7084, "step": 10247 }, { "epoch": 1.6729521244030856, "grad_norm": 2.2395172119140625, "learning_rate": 1.9572460913169327e-05, "loss": 0.7314, "step": 10248 }, { "epoch": 1.67311538304559, "grad_norm": 1.334981918334961, "learning_rate": 1.9572369112021846e-05, "loss": 0.5017, "step": 10249 }, { "epoch": 1.6732786416880945, "grad_norm": 1.6467465162277222, "learning_rate": 1.957227730123499e-05, "loss": 0.6632, "step": 10250 }, { "epoch": 1.6734419003305987, "grad_norm": 1.725786805152893, "learning_rate": 1.9572185480808848e-05, "loss": 0.6755, "step": 10251 }, { "epoch": 1.6736051589731031, "grad_norm": 1.988476276397705, "learning_rate": 1.9572093650743524e-05, "loss": 0.6922, "step": 10252 }, { "epoch": 1.6737684176156076, "grad_norm": 1.3541333675384521, "learning_rate": 1.9572001811039107e-05, "loss": 0.455, "step": 10253 }, { "epoch": 1.6739316762581118, "grad_norm": 1.5863642692565918, "learning_rate": 1.9571909961695678e-05, "loss": 0.5986, "step": 10254 }, { "epoch": 1.6740949349006162, "grad_norm": 1.620029330253601, "learning_rate": 1.9571818102713343e-05, "loss": 0.5714, "step": 10255 }, { "epoch": 1.6742581935431207, "grad_norm": 2.104813575744629, "learning_rate": 1.957172623409219e-05, "loss": 0.8655, "step": 10256 }, { "epoch": 1.674421452185625, "grad_norm": 1.7959791421890259, "learning_rate": 1.957163435583231e-05, "loss": 0.6586, "step": 10257 }, { "epoch": 1.6745847108281295, "grad_norm": 1.5776945352554321, "learning_rate": 1.9571542467933796e-05, "loss": 0.5792, "step": 10258 }, { "epoch": 1.674747969470634, "grad_norm": 1.9580739736557007, "learning_rate": 1.957145057039674e-05, "loss": 0.7608, "step": 10259 }, { "epoch": 1.6749112281131382, "grad_norm": 1.6778337955474854, "learning_rate": 1.957135866322124e-05, "loss": 0.6908, "step": 10260 }, { "epoch": 1.6750744867556426, "grad_norm": 1.6201846599578857, "learning_rate": 1.9571266746407382e-05, "loss": 0.6089, "step": 10261 }, { "epoch": 1.6752377453981468, "grad_norm": 1.748227596282959, "learning_rate": 1.9571174819955264e-05, "loss": 0.6888, "step": 10262 }, { "epoch": 1.6754010040406513, "grad_norm": 1.6993547677993774, "learning_rate": 1.9571082883864973e-05, "loss": 0.655, "step": 10263 }, { "epoch": 1.6755642626831557, "grad_norm": 1.7010806798934937, "learning_rate": 1.95709909381366e-05, "loss": 0.6351, "step": 10264 }, { "epoch": 1.6757275213256602, "grad_norm": 1.4872894287109375, "learning_rate": 1.957089898277025e-05, "loss": 0.6166, "step": 10265 }, { "epoch": 1.6758907799681646, "grad_norm": 1.5588740110397339, "learning_rate": 1.9570807017766e-05, "loss": 0.5526, "step": 10266 }, { "epoch": 1.676054038610669, "grad_norm": 1.8015319108963013, "learning_rate": 1.9570715043123955e-05, "loss": 0.7538, "step": 10267 }, { "epoch": 1.6762172972531735, "grad_norm": 1.6484651565551758, "learning_rate": 1.9570623058844197e-05, "loss": 0.6181, "step": 10268 }, { "epoch": 1.6763805558956777, "grad_norm": 1.5493019819259644, "learning_rate": 1.957053106492683e-05, "loss": 0.524, "step": 10269 }, { "epoch": 1.6765438145381821, "grad_norm": 1.588729739189148, "learning_rate": 1.9570439061371936e-05, "loss": 0.5845, "step": 10270 }, { "epoch": 1.6767070731806863, "grad_norm": 1.5978519916534424, "learning_rate": 1.9570347048179617e-05, "loss": 0.6038, "step": 10271 }, { "epoch": 1.6768703318231908, "grad_norm": 1.2826318740844727, "learning_rate": 1.957025502534996e-05, "loss": 0.5497, "step": 10272 }, { "epoch": 1.6770335904656952, "grad_norm": 1.5943729877471924, "learning_rate": 1.9570162992883056e-05, "loss": 0.6067, "step": 10273 }, { "epoch": 1.6771968491081997, "grad_norm": 1.4985885620117188, "learning_rate": 1.9570070950779002e-05, "loss": 0.5986, "step": 10274 }, { "epoch": 1.677360107750704, "grad_norm": 1.361028790473938, "learning_rate": 1.9569978899037887e-05, "loss": 0.6321, "step": 10275 }, { "epoch": 1.6775233663932085, "grad_norm": 1.8935763835906982, "learning_rate": 1.9569886837659808e-05, "loss": 0.7333, "step": 10276 }, { "epoch": 1.677686625035713, "grad_norm": 1.4201124906539917, "learning_rate": 1.9569794766644856e-05, "loss": 0.5616, "step": 10277 }, { "epoch": 1.6778498836782172, "grad_norm": 1.9005646705627441, "learning_rate": 1.956970268599312e-05, "loss": 0.6629, "step": 10278 }, { "epoch": 1.6780131423207216, "grad_norm": 1.4880372285842896, "learning_rate": 1.95696105957047e-05, "loss": 0.5651, "step": 10279 }, { "epoch": 1.6781764009632258, "grad_norm": 1.7914921045303345, "learning_rate": 1.9569518495779682e-05, "loss": 0.613, "step": 10280 }, { "epoch": 1.6783396596057303, "grad_norm": 1.5072354078292847, "learning_rate": 1.956942638621816e-05, "loss": 0.5642, "step": 10281 }, { "epoch": 1.6785029182482347, "grad_norm": 1.6398664712905884, "learning_rate": 1.9569334267020234e-05, "loss": 0.6774, "step": 10282 }, { "epoch": 1.6786661768907392, "grad_norm": 1.95987069606781, "learning_rate": 1.9569242138185986e-05, "loss": 0.7528, "step": 10283 }, { "epoch": 1.6788294355332436, "grad_norm": 1.4311730861663818, "learning_rate": 1.9569149999715514e-05, "loss": 0.5813, "step": 10284 }, { "epoch": 1.678992694175748, "grad_norm": 1.842061996459961, "learning_rate": 1.9569057851608915e-05, "loss": 0.7237, "step": 10285 }, { "epoch": 1.6791559528182525, "grad_norm": 1.4468880891799927, "learning_rate": 1.9568965693866273e-05, "loss": 0.608, "step": 10286 }, { "epoch": 1.6793192114607567, "grad_norm": 1.6308139562606812, "learning_rate": 1.9568873526487685e-05, "loss": 0.7146, "step": 10287 }, { "epoch": 1.6794824701032611, "grad_norm": 1.9013844728469849, "learning_rate": 1.9568781349473244e-05, "loss": 0.7412, "step": 10288 }, { "epoch": 1.6796457287457653, "grad_norm": 1.6285382509231567, "learning_rate": 1.9568689162823044e-05, "loss": 0.6613, "step": 10289 }, { "epoch": 1.6798089873882698, "grad_norm": 1.9121285676956177, "learning_rate": 1.9568596966537177e-05, "loss": 0.6193, "step": 10290 }, { "epoch": 1.6799722460307742, "grad_norm": 1.769327998161316, "learning_rate": 1.9568504760615734e-05, "loss": 0.7526, "step": 10291 }, { "epoch": 1.6801355046732787, "grad_norm": 1.6257365942001343, "learning_rate": 1.956841254505881e-05, "loss": 0.7587, "step": 10292 }, { "epoch": 1.680298763315783, "grad_norm": 1.6507139205932617, "learning_rate": 1.9568320319866497e-05, "loss": 0.7255, "step": 10293 }, { "epoch": 1.6804620219582875, "grad_norm": 1.782328486442566, "learning_rate": 1.9568228085038886e-05, "loss": 0.6294, "step": 10294 }, { "epoch": 1.680625280600792, "grad_norm": 1.5725504159927368, "learning_rate": 1.956813584057608e-05, "loss": 0.6373, "step": 10295 }, { "epoch": 1.6807885392432962, "grad_norm": 1.7458202838897705, "learning_rate": 1.9568043586478153e-05, "loss": 0.6937, "step": 10296 }, { "epoch": 1.6809517978858006, "grad_norm": 1.6937462091445923, "learning_rate": 1.9567951322745214e-05, "loss": 0.6359, "step": 10297 }, { "epoch": 1.6811150565283048, "grad_norm": 1.3553416728973389, "learning_rate": 1.956785904937735e-05, "loss": 0.5682, "step": 10298 }, { "epoch": 1.6812783151708093, "grad_norm": 1.9182075262069702, "learning_rate": 1.9567766766374655e-05, "loss": 0.7347, "step": 10299 }, { "epoch": 1.6814415738133137, "grad_norm": 1.767186164855957, "learning_rate": 1.956767447373722e-05, "loss": 0.6102, "step": 10300 }, { "epoch": 1.6816048324558182, "grad_norm": 1.6177268028259277, "learning_rate": 1.9567582171465137e-05, "loss": 0.7695, "step": 10301 }, { "epoch": 1.6817680910983226, "grad_norm": 1.9765204191207886, "learning_rate": 1.9567489859558506e-05, "loss": 0.7826, "step": 10302 }, { "epoch": 1.681931349740827, "grad_norm": 1.5368883609771729, "learning_rate": 1.9567397538017415e-05, "loss": 0.6195, "step": 10303 }, { "epoch": 1.6820946083833312, "grad_norm": 1.9082727432250977, "learning_rate": 1.9567305206841954e-05, "loss": 0.7628, "step": 10304 }, { "epoch": 1.6822578670258357, "grad_norm": 1.620835542678833, "learning_rate": 1.9567212866032222e-05, "loss": 0.6095, "step": 10305 }, { "epoch": 1.6824211256683401, "grad_norm": 1.4895868301391602, "learning_rate": 1.9567120515588307e-05, "loss": 0.6356, "step": 10306 }, { "epoch": 1.6825843843108443, "grad_norm": 1.3408678770065308, "learning_rate": 1.9567028155510303e-05, "loss": 0.5468, "step": 10307 }, { "epoch": 1.6827476429533488, "grad_norm": 1.746203899383545, "learning_rate": 1.956693578579831e-05, "loss": 0.8664, "step": 10308 }, { "epoch": 1.6829109015958532, "grad_norm": 1.7717413902282715, "learning_rate": 1.956684340645241e-05, "loss": 0.7478, "step": 10309 }, { "epoch": 1.6830741602383577, "grad_norm": 1.9842815399169922, "learning_rate": 1.9566751017472704e-05, "loss": 0.5489, "step": 10310 }, { "epoch": 1.683237418880862, "grad_norm": 2.006906270980835, "learning_rate": 1.956665861885928e-05, "loss": 0.6401, "step": 10311 }, { "epoch": 1.6834006775233665, "grad_norm": 2.0050320625305176, "learning_rate": 1.9566566210612232e-05, "loss": 0.7387, "step": 10312 }, { "epoch": 1.6835639361658707, "grad_norm": 1.6769111156463623, "learning_rate": 1.9566473792731656e-05, "loss": 0.7487, "step": 10313 }, { "epoch": 1.6837271948083752, "grad_norm": 1.5057857036590576, "learning_rate": 1.9566381365217646e-05, "loss": 0.689, "step": 10314 }, { "epoch": 1.6838904534508794, "grad_norm": 1.684083342552185, "learning_rate": 1.956628892807029e-05, "loss": 0.7788, "step": 10315 }, { "epoch": 1.6840537120933838, "grad_norm": 1.783150553703308, "learning_rate": 1.9566196481289685e-05, "loss": 0.8543, "step": 10316 }, { "epoch": 1.6842169707358883, "grad_norm": 1.849509596824646, "learning_rate": 1.9566104024875924e-05, "loss": 0.7419, "step": 10317 }, { "epoch": 1.6843802293783927, "grad_norm": 1.7741681337356567, "learning_rate": 1.9566011558829095e-05, "loss": 0.7793, "step": 10318 }, { "epoch": 1.6845434880208972, "grad_norm": 1.7577685117721558, "learning_rate": 1.9565919083149295e-05, "loss": 0.8548, "step": 10319 }, { "epoch": 1.6847067466634016, "grad_norm": 1.7868727445602417, "learning_rate": 1.9565826597836623e-05, "loss": 0.7788, "step": 10320 }, { "epoch": 1.684870005305906, "grad_norm": 1.92340087890625, "learning_rate": 1.956573410289116e-05, "loss": 0.692, "step": 10321 }, { "epoch": 1.6850332639484102, "grad_norm": 1.3995797634124756, "learning_rate": 1.9565641598313005e-05, "loss": 0.5757, "step": 10322 }, { "epoch": 1.6851965225909147, "grad_norm": 1.6776593923568726, "learning_rate": 1.9565549084102255e-05, "loss": 0.6972, "step": 10323 }, { "epoch": 1.685359781233419, "grad_norm": 1.6626075506210327, "learning_rate": 1.9565456560258997e-05, "loss": 0.6976, "step": 10324 }, { "epoch": 1.6855230398759233, "grad_norm": 1.7676337957382202, "learning_rate": 1.956536402678333e-05, "loss": 0.7629, "step": 10325 }, { "epoch": 1.6856862985184278, "grad_norm": 1.4399057626724243, "learning_rate": 1.956527148367534e-05, "loss": 0.5663, "step": 10326 }, { "epoch": 1.6858495571609322, "grad_norm": 1.4094305038452148, "learning_rate": 1.956517893093513e-05, "loss": 0.6158, "step": 10327 }, { "epoch": 1.6860128158034366, "grad_norm": 1.7661116123199463, "learning_rate": 1.956508636856278e-05, "loss": 0.6303, "step": 10328 }, { "epoch": 1.686176074445941, "grad_norm": 1.622406244277954, "learning_rate": 1.9564993796558394e-05, "loss": 0.6957, "step": 10329 }, { "epoch": 1.6863393330884455, "grad_norm": 1.8783899545669556, "learning_rate": 1.9564901214922063e-05, "loss": 0.6477, "step": 10330 }, { "epoch": 1.6865025917309497, "grad_norm": 1.7842068672180176, "learning_rate": 1.9564808623653877e-05, "loss": 0.7592, "step": 10331 }, { "epoch": 1.6866658503734542, "grad_norm": 1.5500167608261108, "learning_rate": 1.9564716022753934e-05, "loss": 0.6556, "step": 10332 }, { "epoch": 1.6868291090159584, "grad_norm": 1.8271013498306274, "learning_rate": 1.956462341222232e-05, "loss": 0.8031, "step": 10333 }, { "epoch": 1.6869923676584628, "grad_norm": 1.6053193807601929, "learning_rate": 1.9564530792059134e-05, "loss": 0.5971, "step": 10334 }, { "epoch": 1.6871556263009673, "grad_norm": 1.6521666049957275, "learning_rate": 1.956443816226447e-05, "loss": 0.6919, "step": 10335 }, { "epoch": 1.6873188849434717, "grad_norm": 2.3303864002227783, "learning_rate": 1.956434552283842e-05, "loss": 0.7317, "step": 10336 }, { "epoch": 1.6874821435859761, "grad_norm": 1.5630825757980347, "learning_rate": 1.9564252873781076e-05, "loss": 0.557, "step": 10337 }, { "epoch": 1.6876454022284806, "grad_norm": 1.9298499822616577, "learning_rate": 1.956416021509253e-05, "loss": 0.7512, "step": 10338 }, { "epoch": 1.687808660870985, "grad_norm": 1.9832113981246948, "learning_rate": 1.9564067546772877e-05, "loss": 0.6703, "step": 10339 }, { "epoch": 1.6879719195134892, "grad_norm": 1.7148081064224243, "learning_rate": 1.9563974868822212e-05, "loss": 0.6572, "step": 10340 }, { "epoch": 1.6881351781559937, "grad_norm": 1.9668340682983398, "learning_rate": 1.9563882181240627e-05, "loss": 0.734, "step": 10341 }, { "epoch": 1.688298436798498, "grad_norm": 1.9375584125518799, "learning_rate": 1.9563789484028217e-05, "loss": 0.8926, "step": 10342 }, { "epoch": 1.6884616954410023, "grad_norm": 1.7779438495635986, "learning_rate": 1.956369677718507e-05, "loss": 0.762, "step": 10343 }, { "epoch": 1.6886249540835068, "grad_norm": 1.7154210805892944, "learning_rate": 1.9563604060711284e-05, "loss": 0.6738, "step": 10344 }, { "epoch": 1.6887882127260112, "grad_norm": 1.7179269790649414, "learning_rate": 1.9563511334606952e-05, "loss": 0.7045, "step": 10345 }, { "epoch": 1.6889514713685156, "grad_norm": 1.8920576572418213, "learning_rate": 1.9563418598872167e-05, "loss": 0.6675, "step": 10346 }, { "epoch": 1.68911473001102, "grad_norm": 1.863937258720398, "learning_rate": 1.956332585350702e-05, "loss": 0.6899, "step": 10347 }, { "epoch": 1.6892779886535243, "grad_norm": 1.4510084390640259, "learning_rate": 1.956323309851161e-05, "loss": 0.6185, "step": 10348 }, { "epoch": 1.6894412472960287, "grad_norm": 2.0105741024017334, "learning_rate": 1.956314033388602e-05, "loss": 0.7947, "step": 10349 }, { "epoch": 1.6896045059385332, "grad_norm": 1.918426752090454, "learning_rate": 1.9563047559630356e-05, "loss": 0.7547, "step": 10350 }, { "epoch": 1.6897677645810374, "grad_norm": 1.740210771560669, "learning_rate": 1.9562954775744706e-05, "loss": 0.6539, "step": 10351 }, { "epoch": 1.6899310232235418, "grad_norm": 1.9839599132537842, "learning_rate": 1.956286198222916e-05, "loss": 0.6686, "step": 10352 }, { "epoch": 1.6900942818660463, "grad_norm": 1.5641770362854004, "learning_rate": 1.9562769179083816e-05, "loss": 0.6648, "step": 10353 }, { "epoch": 1.6902575405085507, "grad_norm": 1.8603949546813965, "learning_rate": 1.9562676366308765e-05, "loss": 0.753, "step": 10354 }, { "epoch": 1.6904207991510551, "grad_norm": 1.5097898244857788, "learning_rate": 1.9562583543904102e-05, "loss": 0.5592, "step": 10355 }, { "epoch": 1.6905840577935596, "grad_norm": 1.6280597448349, "learning_rate": 1.956249071186992e-05, "loss": 0.6519, "step": 10356 }, { "epoch": 1.6907473164360638, "grad_norm": 1.6530081033706665, "learning_rate": 1.9562397870206315e-05, "loss": 0.6407, "step": 10357 }, { "epoch": 1.6909105750785682, "grad_norm": 1.8442081212997437, "learning_rate": 1.9562305018913373e-05, "loss": 0.6626, "step": 10358 }, { "epoch": 1.6910738337210725, "grad_norm": 1.640952706336975, "learning_rate": 1.9562212157991194e-05, "loss": 0.6632, "step": 10359 }, { "epoch": 1.6912370923635769, "grad_norm": 1.7141512632369995, "learning_rate": 1.9562119287439874e-05, "loss": 0.6672, "step": 10360 }, { "epoch": 1.6914003510060813, "grad_norm": 1.871482491493225, "learning_rate": 1.9562026407259497e-05, "loss": 0.7884, "step": 10361 }, { "epoch": 1.6915636096485858, "grad_norm": 1.4997402429580688, "learning_rate": 1.9561933517450164e-05, "loss": 0.6638, "step": 10362 }, { "epoch": 1.6917268682910902, "grad_norm": 1.7773383855819702, "learning_rate": 1.9561840618011968e-05, "loss": 0.6184, "step": 10363 }, { "epoch": 1.6918901269335946, "grad_norm": 1.4365946054458618, "learning_rate": 1.9561747708945e-05, "loss": 0.5647, "step": 10364 }, { "epoch": 1.692053385576099, "grad_norm": 1.459729790687561, "learning_rate": 1.9561654790249353e-05, "loss": 0.66, "step": 10365 }, { "epoch": 1.6922166442186033, "grad_norm": 1.6671079397201538, "learning_rate": 1.9561561861925124e-05, "loss": 0.6473, "step": 10366 }, { "epoch": 1.6923799028611077, "grad_norm": 1.923784613609314, "learning_rate": 1.95614689239724e-05, "loss": 0.7695, "step": 10367 }, { "epoch": 1.692543161503612, "grad_norm": 1.9499075412750244, "learning_rate": 1.9561375976391287e-05, "loss": 0.7813, "step": 10368 }, { "epoch": 1.6927064201461164, "grad_norm": 1.6839816570281982, "learning_rate": 1.9561283019181866e-05, "loss": 0.6053, "step": 10369 }, { "epoch": 1.6928696787886208, "grad_norm": 1.5903276205062866, "learning_rate": 1.956119005234424e-05, "loss": 0.6747, "step": 10370 }, { "epoch": 1.6930329374311253, "grad_norm": 1.5954084396362305, "learning_rate": 1.9561097075878492e-05, "loss": 0.6271, "step": 10371 }, { "epoch": 1.6931961960736297, "grad_norm": 1.581481695175171, "learning_rate": 1.9561004089784726e-05, "loss": 0.606, "step": 10372 }, { "epoch": 1.6933594547161341, "grad_norm": 1.6933077573776245, "learning_rate": 1.956091109406303e-05, "loss": 0.7371, "step": 10373 }, { "epoch": 1.6935227133586386, "grad_norm": 1.7279263734817505, "learning_rate": 1.9560818088713498e-05, "loss": 0.638, "step": 10374 }, { "epoch": 1.6936859720011428, "grad_norm": 1.3933202028274536, "learning_rate": 1.9560725073736226e-05, "loss": 0.5544, "step": 10375 }, { "epoch": 1.6938492306436472, "grad_norm": 2.2993035316467285, "learning_rate": 1.9560632049131307e-05, "loss": 1.0912, "step": 10376 }, { "epoch": 1.6940124892861514, "grad_norm": 1.5983117818832397, "learning_rate": 1.9560539014898832e-05, "loss": 0.6224, "step": 10377 }, { "epoch": 1.6941757479286559, "grad_norm": 1.974234938621521, "learning_rate": 1.95604459710389e-05, "loss": 0.8273, "step": 10378 }, { "epoch": 1.6943390065711603, "grad_norm": 1.6951218843460083, "learning_rate": 1.95603529175516e-05, "loss": 0.6049, "step": 10379 }, { "epoch": 1.6945022652136648, "grad_norm": 1.792216181755066, "learning_rate": 1.9560259854437026e-05, "loss": 0.7101, "step": 10380 }, { "epoch": 1.6946655238561692, "grad_norm": 1.470623254776001, "learning_rate": 1.9560166781695272e-05, "loss": 0.7339, "step": 10381 }, { "epoch": 1.6948287824986736, "grad_norm": 1.7016723155975342, "learning_rate": 1.9560073699326433e-05, "loss": 0.6647, "step": 10382 }, { "epoch": 1.694992041141178, "grad_norm": 1.8739635944366455, "learning_rate": 1.9559980607330607e-05, "loss": 0.7532, "step": 10383 }, { "epoch": 1.6951552997836823, "grad_norm": 1.57029128074646, "learning_rate": 1.955988750570788e-05, "loss": 0.7869, "step": 10384 }, { "epoch": 1.6953185584261867, "grad_norm": 1.65780770778656, "learning_rate": 1.9559794394458347e-05, "loss": 0.7125, "step": 10385 }, { "epoch": 1.695481817068691, "grad_norm": 1.6460646390914917, "learning_rate": 1.9559701273582106e-05, "loss": 0.6449, "step": 10386 }, { "epoch": 1.6956450757111954, "grad_norm": 1.6348038911819458, "learning_rate": 1.9559608143079244e-05, "loss": 0.6261, "step": 10387 }, { "epoch": 1.6958083343536998, "grad_norm": 1.7019981145858765, "learning_rate": 1.9559515002949866e-05, "loss": 0.6643, "step": 10388 }, { "epoch": 1.6959715929962043, "grad_norm": 1.745544195175171, "learning_rate": 1.9559421853194057e-05, "loss": 0.6021, "step": 10389 }, { "epoch": 1.6961348516387087, "grad_norm": 1.295472264289856, "learning_rate": 1.955932869381191e-05, "loss": 0.5443, "step": 10390 }, { "epoch": 1.6962981102812131, "grad_norm": 2.1479787826538086, "learning_rate": 1.955923552480352e-05, "loss": 0.6557, "step": 10391 }, { "epoch": 1.6964613689237173, "grad_norm": 1.8739631175994873, "learning_rate": 1.9559142346168988e-05, "loss": 0.7394, "step": 10392 }, { "epoch": 1.6966246275662218, "grad_norm": 1.8318933248519897, "learning_rate": 1.9559049157908396e-05, "loss": 0.6462, "step": 10393 }, { "epoch": 1.6967878862087262, "grad_norm": 1.789330244064331, "learning_rate": 1.9558955960021847e-05, "loss": 0.6907, "step": 10394 }, { "epoch": 1.6969511448512304, "grad_norm": 1.499477505683899, "learning_rate": 1.9558862752509433e-05, "loss": 0.6474, "step": 10395 }, { "epoch": 1.6971144034937349, "grad_norm": 1.9187700748443604, "learning_rate": 1.955876953537125e-05, "loss": 0.7723, "step": 10396 }, { "epoch": 1.6972776621362393, "grad_norm": 1.9851876497268677, "learning_rate": 1.955867630860738e-05, "loss": 0.7118, "step": 10397 }, { "epoch": 1.6974409207787438, "grad_norm": 1.7708414793014526, "learning_rate": 1.955858307221793e-05, "loss": 0.6746, "step": 10398 }, { "epoch": 1.6976041794212482, "grad_norm": 1.5312581062316895, "learning_rate": 1.955848982620299e-05, "loss": 0.6438, "step": 10399 }, { "epoch": 1.6977674380637526, "grad_norm": 1.7737969160079956, "learning_rate": 1.955839657056265e-05, "loss": 0.7262, "step": 10400 }, { "epoch": 1.6979306967062568, "grad_norm": 1.7462016344070435, "learning_rate": 1.9558303305297014e-05, "loss": 0.781, "step": 10401 }, { "epoch": 1.6980939553487613, "grad_norm": 1.778375506401062, "learning_rate": 1.9558210030406165e-05, "loss": 0.7123, "step": 10402 }, { "epoch": 1.6982572139912655, "grad_norm": 1.536126971244812, "learning_rate": 1.9558116745890202e-05, "loss": 0.6312, "step": 10403 }, { "epoch": 1.69842047263377, "grad_norm": 1.692869782447815, "learning_rate": 1.9558023451749214e-05, "loss": 0.7088, "step": 10404 }, { "epoch": 1.6985837312762744, "grad_norm": 1.5854151248931885, "learning_rate": 1.9557930147983303e-05, "loss": 0.6595, "step": 10405 }, { "epoch": 1.6987469899187788, "grad_norm": 1.4214800596237183, "learning_rate": 1.9557836834592557e-05, "loss": 0.5733, "step": 10406 }, { "epoch": 1.6989102485612833, "grad_norm": 1.7989214658737183, "learning_rate": 1.9557743511577073e-05, "loss": 0.618, "step": 10407 }, { "epoch": 1.6990735072037877, "grad_norm": 1.518678903579712, "learning_rate": 1.955765017893694e-05, "loss": 0.6208, "step": 10408 }, { "epoch": 1.6992367658462921, "grad_norm": 2.008467674255371, "learning_rate": 1.9557556836672264e-05, "loss": 0.773, "step": 10409 }, { "epoch": 1.6994000244887963, "grad_norm": 1.423970341682434, "learning_rate": 1.9557463484783125e-05, "loss": 0.5142, "step": 10410 }, { "epoch": 1.6995632831313008, "grad_norm": 1.3258861303329468, "learning_rate": 1.9557370123269624e-05, "loss": 0.5938, "step": 10411 }, { "epoch": 1.699726541773805, "grad_norm": 1.4250050783157349, "learning_rate": 1.9557276752131855e-05, "loss": 0.6355, "step": 10412 }, { "epoch": 1.6998898004163094, "grad_norm": 1.88783860206604, "learning_rate": 1.9557183371369907e-05, "loss": 0.7328, "step": 10413 }, { "epoch": 1.7000530590588139, "grad_norm": 1.9888169765472412, "learning_rate": 1.9557089980983882e-05, "loss": 0.7534, "step": 10414 }, { "epoch": 1.7002163177013183, "grad_norm": 2.227163314819336, "learning_rate": 1.9556996580973866e-05, "loss": 0.6676, "step": 10415 }, { "epoch": 1.7003795763438228, "grad_norm": 1.3720204830169678, "learning_rate": 1.9556903171339963e-05, "loss": 0.4934, "step": 10416 }, { "epoch": 1.7005428349863272, "grad_norm": 1.652794361114502, "learning_rate": 1.9556809752082255e-05, "loss": 0.6418, "step": 10417 }, { "epoch": 1.7007060936288316, "grad_norm": 1.6741639375686646, "learning_rate": 1.9556716323200846e-05, "loss": 0.6612, "step": 10418 }, { "epoch": 1.7008693522713358, "grad_norm": 2.026855230331421, "learning_rate": 1.9556622884695825e-05, "loss": 0.7411, "step": 10419 }, { "epoch": 1.7010326109138403, "grad_norm": 1.985580325126648, "learning_rate": 1.9556529436567287e-05, "loss": 0.6397, "step": 10420 }, { "epoch": 1.7011958695563445, "grad_norm": 1.7391304969787598, "learning_rate": 1.9556435978815326e-05, "loss": 0.6854, "step": 10421 }, { "epoch": 1.701359128198849, "grad_norm": 2.043095111846924, "learning_rate": 1.9556342511440034e-05, "loss": 0.8242, "step": 10422 }, { "epoch": 1.7015223868413534, "grad_norm": 2.246267318725586, "learning_rate": 1.955624903444151e-05, "loss": 0.8087, "step": 10423 }, { "epoch": 1.7016856454838578, "grad_norm": 1.598379135131836, "learning_rate": 1.9556155547819847e-05, "loss": 0.6944, "step": 10424 }, { "epoch": 1.7018489041263622, "grad_norm": 1.617615818977356, "learning_rate": 1.9556062051575138e-05, "loss": 0.5413, "step": 10425 }, { "epoch": 1.7020121627688667, "grad_norm": 1.6692211627960205, "learning_rate": 1.9555968545707474e-05, "loss": 0.6898, "step": 10426 }, { "epoch": 1.7021754214113711, "grad_norm": 1.9588664770126343, "learning_rate": 1.9555875030216957e-05, "loss": 0.7431, "step": 10427 }, { "epoch": 1.7023386800538753, "grad_norm": 1.6091630458831787, "learning_rate": 1.9555781505103674e-05, "loss": 0.6907, "step": 10428 }, { "epoch": 1.7025019386963798, "grad_norm": 1.8552803993225098, "learning_rate": 1.955568797036772e-05, "loss": 0.6197, "step": 10429 }, { "epoch": 1.702665197338884, "grad_norm": 1.7571922540664673, "learning_rate": 1.9555594426009193e-05, "loss": 0.7279, "step": 10430 }, { "epoch": 1.7028284559813884, "grad_norm": 1.8002344369888306, "learning_rate": 1.9555500872028184e-05, "loss": 0.742, "step": 10431 }, { "epoch": 1.7029917146238929, "grad_norm": 1.7845065593719482, "learning_rate": 1.9555407308424786e-05, "loss": 0.6341, "step": 10432 }, { "epoch": 1.7031549732663973, "grad_norm": 1.9372565746307373, "learning_rate": 1.95553137351991e-05, "loss": 0.7339, "step": 10433 }, { "epoch": 1.7033182319089017, "grad_norm": 1.5389457941055298, "learning_rate": 1.9555220152351212e-05, "loss": 0.6166, "step": 10434 }, { "epoch": 1.7034814905514062, "grad_norm": 1.7296741008758545, "learning_rate": 1.9555126559881222e-05, "loss": 0.6789, "step": 10435 }, { "epoch": 1.7036447491939104, "grad_norm": 1.4303754568099976, "learning_rate": 1.9555032957789223e-05, "loss": 0.5658, "step": 10436 }, { "epoch": 1.7038080078364148, "grad_norm": 1.2684929370880127, "learning_rate": 1.9554939346075302e-05, "loss": 0.4861, "step": 10437 }, { "epoch": 1.7039712664789193, "grad_norm": 1.8279582262039185, "learning_rate": 1.9554845724739565e-05, "loss": 0.7368, "step": 10438 }, { "epoch": 1.7041345251214235, "grad_norm": 1.4415745735168457, "learning_rate": 1.9554752093782102e-05, "loss": 0.5763, "step": 10439 }, { "epoch": 1.704297783763928, "grad_norm": 1.8117859363555908, "learning_rate": 1.9554658453203003e-05, "loss": 0.6321, "step": 10440 }, { "epoch": 1.7044610424064324, "grad_norm": 1.9942576885223389, "learning_rate": 1.9554564803002364e-05, "loss": 0.6893, "step": 10441 }, { "epoch": 1.7046243010489368, "grad_norm": 2.0419869422912598, "learning_rate": 1.9554471143180286e-05, "loss": 0.909, "step": 10442 }, { "epoch": 1.7047875596914412, "grad_norm": 1.8236314058303833, "learning_rate": 1.9554377473736858e-05, "loss": 0.8843, "step": 10443 }, { "epoch": 1.7049508183339457, "grad_norm": 1.4909166097640991, "learning_rate": 1.955428379467217e-05, "loss": 0.6325, "step": 10444 }, { "epoch": 1.70511407697645, "grad_norm": 1.6972407102584839, "learning_rate": 1.955419010598632e-05, "loss": 0.628, "step": 10445 }, { "epoch": 1.7052773356189543, "grad_norm": 1.5303136110305786, "learning_rate": 1.9554096407679406e-05, "loss": 0.6242, "step": 10446 }, { "epoch": 1.7054405942614586, "grad_norm": 1.540179967880249, "learning_rate": 1.955400269975152e-05, "loss": 0.545, "step": 10447 }, { "epoch": 1.705603852903963, "grad_norm": 1.9699978828430176, "learning_rate": 1.9553908982202758e-05, "loss": 0.7026, "step": 10448 }, { "epoch": 1.7057671115464674, "grad_norm": 1.5259641408920288, "learning_rate": 1.9553815255033208e-05, "loss": 0.4893, "step": 10449 }, { "epoch": 1.7059303701889719, "grad_norm": 1.7998427152633667, "learning_rate": 1.955372151824297e-05, "loss": 0.6406, "step": 10450 }, { "epoch": 1.7060936288314763, "grad_norm": 1.4915443658828735, "learning_rate": 1.9553627771832135e-05, "loss": 0.5489, "step": 10451 }, { "epoch": 1.7062568874739807, "grad_norm": 2.0249814987182617, "learning_rate": 1.95535340158008e-05, "loss": 0.7057, "step": 10452 }, { "epoch": 1.7064201461164852, "grad_norm": 1.745384931564331, "learning_rate": 1.955344025014906e-05, "loss": 0.5929, "step": 10453 }, { "epoch": 1.7065834047589894, "grad_norm": 1.53267240524292, "learning_rate": 1.9553346474877008e-05, "loss": 0.6604, "step": 10454 }, { "epoch": 1.7067466634014938, "grad_norm": 1.744869351387024, "learning_rate": 1.955325268998474e-05, "loss": 0.7361, "step": 10455 }, { "epoch": 1.706909922043998, "grad_norm": 2.003955364227295, "learning_rate": 1.955315889547235e-05, "loss": 0.8103, "step": 10456 }, { "epoch": 1.7070731806865025, "grad_norm": 1.9372624158859253, "learning_rate": 1.9553065091339925e-05, "loss": 0.8346, "step": 10457 }, { "epoch": 1.707236439329007, "grad_norm": 1.4719704389572144, "learning_rate": 1.9552971277587572e-05, "loss": 0.63, "step": 10458 }, { "epoch": 1.7073996979715114, "grad_norm": 1.672316312789917, "learning_rate": 1.9552877454215378e-05, "loss": 0.699, "step": 10459 }, { "epoch": 1.7075629566140158, "grad_norm": 1.620107889175415, "learning_rate": 1.9552783621223437e-05, "loss": 0.6643, "step": 10460 }, { "epoch": 1.7077262152565202, "grad_norm": 1.8448437452316284, "learning_rate": 1.9552689778611848e-05, "loss": 0.6809, "step": 10461 }, { "epoch": 1.7078894738990247, "grad_norm": 1.763595461845398, "learning_rate": 1.9552595926380703e-05, "loss": 0.7254, "step": 10462 }, { "epoch": 1.708052732541529, "grad_norm": 1.423767328262329, "learning_rate": 1.9552502064530096e-05, "loss": 0.6175, "step": 10463 }, { "epoch": 1.7082159911840333, "grad_norm": 1.5384984016418457, "learning_rate": 1.9552408193060118e-05, "loss": 0.628, "step": 10464 }, { "epoch": 1.7083792498265375, "grad_norm": 1.9554121494293213, "learning_rate": 1.9552314311970875e-05, "loss": 0.741, "step": 10465 }, { "epoch": 1.708542508469042, "grad_norm": 1.7623236179351807, "learning_rate": 1.9552220421262448e-05, "loss": 0.8253, "step": 10466 }, { "epoch": 1.7087057671115464, "grad_norm": 1.346286416053772, "learning_rate": 1.955212652093494e-05, "loss": 0.5285, "step": 10467 }, { "epoch": 1.7088690257540509, "grad_norm": 2.1291158199310303, "learning_rate": 1.9552032610988442e-05, "loss": 0.8457, "step": 10468 }, { "epoch": 1.7090322843965553, "grad_norm": 1.638199806213379, "learning_rate": 1.955193869142305e-05, "loss": 0.6124, "step": 10469 }, { "epoch": 1.7091955430390597, "grad_norm": 1.5657198429107666, "learning_rate": 1.955184476223886e-05, "loss": 0.5607, "step": 10470 }, { "epoch": 1.7093588016815642, "grad_norm": 1.9055685997009277, "learning_rate": 1.9551750823435963e-05, "loss": 0.8028, "step": 10471 }, { "epoch": 1.7095220603240684, "grad_norm": 1.5759227275848389, "learning_rate": 1.9551656875014454e-05, "loss": 0.6475, "step": 10472 }, { "epoch": 1.7096853189665728, "grad_norm": 1.6537609100341797, "learning_rate": 1.9551562916974433e-05, "loss": 0.6772, "step": 10473 }, { "epoch": 1.709848577609077, "grad_norm": 1.5753872394561768, "learning_rate": 1.9551468949315987e-05, "loss": 0.5938, "step": 10474 }, { "epoch": 1.7100118362515815, "grad_norm": 1.4278922080993652, "learning_rate": 1.9551374972039218e-05, "loss": 0.6247, "step": 10475 }, { "epoch": 1.710175094894086, "grad_norm": 1.8292039632797241, "learning_rate": 1.9551280985144213e-05, "loss": 0.6646, "step": 10476 }, { "epoch": 1.7103383535365904, "grad_norm": 1.732792615890503, "learning_rate": 1.955118698863107e-05, "loss": 0.6285, "step": 10477 }, { "epoch": 1.7105016121790948, "grad_norm": 1.8926588296890259, "learning_rate": 1.955109298249989e-05, "loss": 0.7023, "step": 10478 }, { "epoch": 1.7106648708215992, "grad_norm": 1.47390878200531, "learning_rate": 1.955099896675076e-05, "loss": 0.5713, "step": 10479 }, { "epoch": 1.7108281294641035, "grad_norm": 1.9202725887298584, "learning_rate": 1.955090494138377e-05, "loss": 0.7725, "step": 10480 }, { "epoch": 1.710991388106608, "grad_norm": 1.5037055015563965, "learning_rate": 1.955081090639903e-05, "loss": 0.599, "step": 10481 }, { "epoch": 1.7111546467491123, "grad_norm": 1.7637490034103394, "learning_rate": 1.9550716861796623e-05, "loss": 0.793, "step": 10482 }, { "epoch": 1.7113179053916165, "grad_norm": 1.5457683801651, "learning_rate": 1.9550622807576647e-05, "loss": 0.6077, "step": 10483 }, { "epoch": 1.711481164034121, "grad_norm": 1.6197036504745483, "learning_rate": 1.9550528743739196e-05, "loss": 0.682, "step": 10484 }, { "epoch": 1.7116444226766254, "grad_norm": 1.9876668453216553, "learning_rate": 1.9550434670284362e-05, "loss": 0.7511, "step": 10485 }, { "epoch": 1.7118076813191299, "grad_norm": 1.875082015991211, "learning_rate": 1.9550340587212246e-05, "loss": 0.8692, "step": 10486 }, { "epoch": 1.7119709399616343, "grad_norm": 1.7174609899520874, "learning_rate": 1.9550246494522938e-05, "loss": 0.6743, "step": 10487 }, { "epoch": 1.7121341986041387, "grad_norm": 1.7721953392028809, "learning_rate": 1.9550152392216536e-05, "loss": 0.6547, "step": 10488 }, { "epoch": 1.712297457246643, "grad_norm": 1.6389391422271729, "learning_rate": 1.9550058280293132e-05, "loss": 0.7924, "step": 10489 }, { "epoch": 1.7124607158891474, "grad_norm": 1.6384506225585938, "learning_rate": 1.9549964158752825e-05, "loss": 0.6512, "step": 10490 }, { "epoch": 1.7126239745316516, "grad_norm": 1.6584464311599731, "learning_rate": 1.9549870027595702e-05, "loss": 0.7006, "step": 10491 }, { "epoch": 1.712787233174156, "grad_norm": 1.862842082977295, "learning_rate": 1.954977588682186e-05, "loss": 0.732, "step": 10492 }, { "epoch": 1.7129504918166605, "grad_norm": 2.126932382583618, "learning_rate": 1.95496817364314e-05, "loss": 0.8006, "step": 10493 }, { "epoch": 1.713113750459165, "grad_norm": 1.738707184791565, "learning_rate": 1.9549587576424418e-05, "loss": 0.6475, "step": 10494 }, { "epoch": 1.7132770091016694, "grad_norm": 1.613210678100586, "learning_rate": 1.9549493406800997e-05, "loss": 0.6921, "step": 10495 }, { "epoch": 1.7134402677441738, "grad_norm": 1.5360653400421143, "learning_rate": 1.9549399227561243e-05, "loss": 0.7315, "step": 10496 }, { "epoch": 1.7136035263866782, "grad_norm": 1.7634958028793335, "learning_rate": 1.954930503870524e-05, "loss": 0.6274, "step": 10497 }, { "epoch": 1.7137667850291824, "grad_norm": 1.334031105041504, "learning_rate": 1.9549210840233095e-05, "loss": 0.5568, "step": 10498 }, { "epoch": 1.7139300436716869, "grad_norm": 1.8970280885696411, "learning_rate": 1.95491166321449e-05, "loss": 0.6877, "step": 10499 }, { "epoch": 1.714093302314191, "grad_norm": 1.5677272081375122, "learning_rate": 1.9549022414440738e-05, "loss": 0.6232, "step": 10500 }, { "epoch": 1.7142565609566955, "grad_norm": 1.9785327911376953, "learning_rate": 1.954892818712072e-05, "loss": 0.7382, "step": 10501 }, { "epoch": 1.7144198195992, "grad_norm": 1.5939688682556152, "learning_rate": 1.9548833950184933e-05, "loss": 0.7328, "step": 10502 }, { "epoch": 1.7145830782417044, "grad_norm": 1.7605940103530884, "learning_rate": 1.9548739703633472e-05, "loss": 0.6365, "step": 10503 }, { "epoch": 1.7147463368842089, "grad_norm": 1.9104679822921753, "learning_rate": 1.9548645447466433e-05, "loss": 0.7344, "step": 10504 }, { "epoch": 1.7149095955267133, "grad_norm": 1.8453248739242554, "learning_rate": 1.954855118168391e-05, "loss": 0.6519, "step": 10505 }, { "epoch": 1.7150728541692177, "grad_norm": 1.4626961946487427, "learning_rate": 1.9548456906285996e-05, "loss": 0.6188, "step": 10506 }, { "epoch": 1.715236112811722, "grad_norm": 1.644094705581665, "learning_rate": 1.954836262127279e-05, "loss": 0.6216, "step": 10507 }, { "epoch": 1.7153993714542264, "grad_norm": 1.6551867723464966, "learning_rate": 1.9548268326644385e-05, "loss": 0.5509, "step": 10508 }, { "epoch": 1.7155626300967306, "grad_norm": 1.7874832153320312, "learning_rate": 1.954817402240088e-05, "loss": 0.5736, "step": 10509 }, { "epoch": 1.715725888739235, "grad_norm": 1.7282418012619019, "learning_rate": 1.9548079708542365e-05, "loss": 0.6743, "step": 10510 }, { "epoch": 1.7158891473817395, "grad_norm": 1.7626570463180542, "learning_rate": 1.9547985385068932e-05, "loss": 0.7101, "step": 10511 }, { "epoch": 1.716052406024244, "grad_norm": 1.702007532119751, "learning_rate": 1.9547891051980686e-05, "loss": 0.6577, "step": 10512 }, { "epoch": 1.7162156646667484, "grad_norm": 1.5831881761550903, "learning_rate": 1.954779670927771e-05, "loss": 0.5977, "step": 10513 }, { "epoch": 1.7163789233092528, "grad_norm": 1.6448665857315063, "learning_rate": 1.9547702356960112e-05, "loss": 0.544, "step": 10514 }, { "epoch": 1.7165421819517572, "grad_norm": 1.9218090772628784, "learning_rate": 1.954760799502798e-05, "loss": 0.7229, "step": 10515 }, { "epoch": 1.7167054405942614, "grad_norm": 1.7794705629348755, "learning_rate": 1.95475136234814e-05, "loss": 0.6561, "step": 10516 }, { "epoch": 1.7168686992367659, "grad_norm": 1.788110375404358, "learning_rate": 1.9547419242320484e-05, "loss": 0.5809, "step": 10517 }, { "epoch": 1.71703195787927, "grad_norm": 2.137939453125, "learning_rate": 1.9547324851545316e-05, "loss": 0.7334, "step": 10518 }, { "epoch": 1.7171952165217745, "grad_norm": 1.6484497785568237, "learning_rate": 1.9547230451156e-05, "loss": 0.6901, "step": 10519 }, { "epoch": 1.717358475164279, "grad_norm": 1.7820580005645752, "learning_rate": 1.954713604115262e-05, "loss": 0.7329, "step": 10520 }, { "epoch": 1.7175217338067834, "grad_norm": 1.6823458671569824, "learning_rate": 1.954704162153528e-05, "loss": 0.5886, "step": 10521 }, { "epoch": 1.7176849924492879, "grad_norm": 1.5869474411010742, "learning_rate": 1.9546947192304068e-05, "loss": 0.638, "step": 10522 }, { "epoch": 1.7178482510917923, "grad_norm": 1.735540509223938, "learning_rate": 1.9546852753459086e-05, "loss": 0.5968, "step": 10523 }, { "epoch": 1.7180115097342965, "grad_norm": 1.676934838294983, "learning_rate": 1.9546758305000422e-05, "loss": 0.697, "step": 10524 }, { "epoch": 1.718174768376801, "grad_norm": 2.215573787689209, "learning_rate": 1.954666384692818e-05, "loss": 0.9233, "step": 10525 }, { "epoch": 1.7183380270193054, "grad_norm": 1.4089696407318115, "learning_rate": 1.9546569379242446e-05, "loss": 0.5532, "step": 10526 }, { "epoch": 1.7185012856618096, "grad_norm": 1.9756587743759155, "learning_rate": 1.954647490194332e-05, "loss": 0.8333, "step": 10527 }, { "epoch": 1.718664544304314, "grad_norm": 1.6064860820770264, "learning_rate": 1.95463804150309e-05, "loss": 0.6479, "step": 10528 }, { "epoch": 1.7188278029468185, "grad_norm": 1.8488706350326538, "learning_rate": 1.9546285918505274e-05, "loss": 0.7614, "step": 10529 }, { "epoch": 1.718991061589323, "grad_norm": 1.8997039794921875, "learning_rate": 1.9546191412366543e-05, "loss": 0.7544, "step": 10530 }, { "epoch": 1.7191543202318273, "grad_norm": 1.8627084493637085, "learning_rate": 1.9546096896614795e-05, "loss": 0.7237, "step": 10531 }, { "epoch": 1.7193175788743318, "grad_norm": 1.663925290107727, "learning_rate": 1.9546002371250134e-05, "loss": 0.5658, "step": 10532 }, { "epoch": 1.719480837516836, "grad_norm": 1.6518642902374268, "learning_rate": 1.954590783627265e-05, "loss": 0.6932, "step": 10533 }, { "epoch": 1.7196440961593404, "grad_norm": 1.8122893571853638, "learning_rate": 1.9545813291682437e-05, "loss": 0.7529, "step": 10534 }, { "epoch": 1.7198073548018447, "grad_norm": 1.7759326696395874, "learning_rate": 1.9545718737479594e-05, "loss": 0.797, "step": 10535 }, { "epoch": 1.719970613444349, "grad_norm": 1.7745518684387207, "learning_rate": 1.9545624173664218e-05, "loss": 0.7905, "step": 10536 }, { "epoch": 1.7201338720868535, "grad_norm": 1.8253284692764282, "learning_rate": 1.95455296002364e-05, "loss": 0.6049, "step": 10537 }, { "epoch": 1.720297130729358, "grad_norm": 1.4455910921096802, "learning_rate": 1.9545435017196233e-05, "loss": 0.6269, "step": 10538 }, { "epoch": 1.7204603893718624, "grad_norm": 1.3832334280014038, "learning_rate": 1.9545340424543816e-05, "loss": 0.5732, "step": 10539 }, { "epoch": 1.7206236480143668, "grad_norm": 1.675455093383789, "learning_rate": 1.9545245822279243e-05, "loss": 0.7821, "step": 10540 }, { "epoch": 1.7207869066568713, "grad_norm": 1.5102587938308716, "learning_rate": 1.9545151210402615e-05, "loss": 0.6382, "step": 10541 }, { "epoch": 1.7209501652993755, "grad_norm": 1.5390368700027466, "learning_rate": 1.954505658891402e-05, "loss": 0.6797, "step": 10542 }, { "epoch": 1.72111342394188, "grad_norm": 1.714815378189087, "learning_rate": 1.9544961957813554e-05, "loss": 0.6286, "step": 10543 }, { "epoch": 1.7212766825843842, "grad_norm": 1.7877800464630127, "learning_rate": 1.9544867317101315e-05, "loss": 0.6626, "step": 10544 }, { "epoch": 1.7214399412268886, "grad_norm": 1.5722830295562744, "learning_rate": 1.9544772666777397e-05, "loss": 0.621, "step": 10545 }, { "epoch": 1.721603199869393, "grad_norm": 1.7013568878173828, "learning_rate": 1.9544678006841894e-05, "loss": 0.6813, "step": 10546 }, { "epoch": 1.7217664585118975, "grad_norm": 1.8173472881317139, "learning_rate": 1.9544583337294902e-05, "loss": 0.6543, "step": 10547 }, { "epoch": 1.721929717154402, "grad_norm": 1.436177134513855, "learning_rate": 1.9544488658136522e-05, "loss": 0.5965, "step": 10548 }, { "epoch": 1.7220929757969063, "grad_norm": 1.8009873628616333, "learning_rate": 1.954439396936684e-05, "loss": 0.7044, "step": 10549 }, { "epoch": 1.7222562344394108, "grad_norm": 1.9202760457992554, "learning_rate": 1.9544299270985958e-05, "loss": 0.6664, "step": 10550 }, { "epoch": 1.722419493081915, "grad_norm": 1.5490577220916748, "learning_rate": 1.954420456299397e-05, "loss": 0.6142, "step": 10551 }, { "epoch": 1.7225827517244194, "grad_norm": 1.6230870485305786, "learning_rate": 1.954410984539097e-05, "loss": 0.8167, "step": 10552 }, { "epoch": 1.7227460103669237, "grad_norm": 1.6663614511489868, "learning_rate": 1.954401511817705e-05, "loss": 0.6875, "step": 10553 }, { "epoch": 1.722909269009428, "grad_norm": 1.8314623832702637, "learning_rate": 1.954392038135231e-05, "loss": 0.765, "step": 10554 }, { "epoch": 1.7230725276519325, "grad_norm": 1.8969863653182983, "learning_rate": 1.954382563491685e-05, "loss": 0.7448, "step": 10555 }, { "epoch": 1.723235786294437, "grad_norm": 1.8221365213394165, "learning_rate": 1.9543730878870757e-05, "loss": 0.7622, "step": 10556 }, { "epoch": 1.7233990449369414, "grad_norm": 1.4239269495010376, "learning_rate": 1.954363611321413e-05, "loss": 0.5685, "step": 10557 }, { "epoch": 1.7235623035794458, "grad_norm": 1.8908969163894653, "learning_rate": 1.9543541337947063e-05, "loss": 0.7043, "step": 10558 }, { "epoch": 1.7237255622219503, "grad_norm": 2.024406909942627, "learning_rate": 1.954344655306965e-05, "loss": 0.7301, "step": 10559 }, { "epoch": 1.7238888208644545, "grad_norm": 1.7487016916275024, "learning_rate": 1.9543351758581995e-05, "loss": 0.7452, "step": 10560 }, { "epoch": 1.724052079506959, "grad_norm": 1.455611228942871, "learning_rate": 1.9543256954484185e-05, "loss": 0.6436, "step": 10561 }, { "epoch": 1.7242153381494632, "grad_norm": 1.5850998163223267, "learning_rate": 1.9543162140776316e-05, "loss": 0.5881, "step": 10562 }, { "epoch": 1.7243785967919676, "grad_norm": 2.248586654663086, "learning_rate": 1.9543067317458485e-05, "loss": 1.486, "step": 10563 }, { "epoch": 1.724541855434472, "grad_norm": 1.5249744653701782, "learning_rate": 1.954297248453079e-05, "loss": 0.5178, "step": 10564 }, { "epoch": 1.7247051140769765, "grad_norm": 1.6350375413894653, "learning_rate": 1.9542877641993324e-05, "loss": 0.6248, "step": 10565 }, { "epoch": 1.724868372719481, "grad_norm": 1.7565058469772339, "learning_rate": 1.954278278984618e-05, "loss": 0.6314, "step": 10566 }, { "epoch": 1.7250316313619853, "grad_norm": 1.6368539333343506, "learning_rate": 1.954268792808946e-05, "loss": 0.6719, "step": 10567 }, { "epoch": 1.7251948900044898, "grad_norm": 1.6005159616470337, "learning_rate": 1.9542593056723254e-05, "loss": 0.6295, "step": 10568 }, { "epoch": 1.725358148646994, "grad_norm": 1.4432224035263062, "learning_rate": 1.9542498175747657e-05, "loss": 0.6748, "step": 10569 }, { "epoch": 1.7255214072894984, "grad_norm": 1.737804889678955, "learning_rate": 1.954240328516277e-05, "loss": 0.6874, "step": 10570 }, { "epoch": 1.7256846659320026, "grad_norm": 1.7625536918640137, "learning_rate": 1.9542308384968685e-05, "loss": 0.7523, "step": 10571 }, { "epoch": 1.725847924574507, "grad_norm": 1.730936884880066, "learning_rate": 1.95422134751655e-05, "loss": 0.7392, "step": 10572 }, { "epoch": 1.7260111832170115, "grad_norm": 1.934177041053772, "learning_rate": 1.9542118555753302e-05, "loss": 0.7962, "step": 10573 }, { "epoch": 1.726174441859516, "grad_norm": 1.6105421781539917, "learning_rate": 1.9542023626732197e-05, "loss": 0.5954, "step": 10574 }, { "epoch": 1.7263377005020204, "grad_norm": 1.7367273569107056, "learning_rate": 1.9541928688102278e-05, "loss": 0.6114, "step": 10575 }, { "epoch": 1.7265009591445248, "grad_norm": 1.6149802207946777, "learning_rate": 1.954183373986364e-05, "loss": 0.6535, "step": 10576 }, { "epoch": 1.726664217787029, "grad_norm": 2.0760560035705566, "learning_rate": 1.954173878201638e-05, "loss": 0.752, "step": 10577 }, { "epoch": 1.7268274764295335, "grad_norm": 1.5624138116836548, "learning_rate": 1.9541643814560584e-05, "loss": 0.6165, "step": 10578 }, { "epoch": 1.726990735072038, "grad_norm": 1.9820643663406372, "learning_rate": 1.954154883749636e-05, "loss": 0.684, "step": 10579 }, { "epoch": 1.7271539937145421, "grad_norm": 1.8963795900344849, "learning_rate": 1.9541453850823796e-05, "loss": 0.7319, "step": 10580 }, { "epoch": 1.7273172523570466, "grad_norm": 1.5788037776947021, "learning_rate": 1.9541358854542993e-05, "loss": 0.735, "step": 10581 }, { "epoch": 1.727480510999551, "grad_norm": 1.7232866287231445, "learning_rate": 1.9541263848654044e-05, "loss": 0.6275, "step": 10582 }, { "epoch": 1.7276437696420555, "grad_norm": 1.9382745027542114, "learning_rate": 1.9541168833157044e-05, "loss": 0.8312, "step": 10583 }, { "epoch": 1.72780702828456, "grad_norm": 1.3694684505462646, "learning_rate": 1.954107380805209e-05, "loss": 0.6456, "step": 10584 }, { "epoch": 1.7279702869270643, "grad_norm": 1.4017574787139893, "learning_rate": 1.954097877333928e-05, "loss": 0.5594, "step": 10585 }, { "epoch": 1.7281335455695686, "grad_norm": 1.7509292364120483, "learning_rate": 1.95408837290187e-05, "loss": 0.5151, "step": 10586 }, { "epoch": 1.728296804212073, "grad_norm": 1.4830998182296753, "learning_rate": 1.9540788675090458e-05, "loss": 0.5924, "step": 10587 }, { "epoch": 1.7284600628545772, "grad_norm": 1.7681158781051636, "learning_rate": 1.9540693611554645e-05, "loss": 0.7085, "step": 10588 }, { "epoch": 1.7286233214970816, "grad_norm": 1.490742564201355, "learning_rate": 1.954059853841135e-05, "loss": 0.5884, "step": 10589 }, { "epoch": 1.728786580139586, "grad_norm": 1.3024935722351074, "learning_rate": 1.954050345566068e-05, "loss": 0.517, "step": 10590 }, { "epoch": 1.7289498387820905, "grad_norm": 1.6590701341629028, "learning_rate": 1.9540408363302726e-05, "loss": 0.6199, "step": 10591 }, { "epoch": 1.729113097424595, "grad_norm": 1.4368504285812378, "learning_rate": 1.954031326133758e-05, "loss": 0.5192, "step": 10592 }, { "epoch": 1.7292763560670994, "grad_norm": 1.7187329530715942, "learning_rate": 1.9540218149765344e-05, "loss": 0.6571, "step": 10593 }, { "epoch": 1.7294396147096038, "grad_norm": 1.534321665763855, "learning_rate": 1.9540123028586107e-05, "loss": 0.6891, "step": 10594 }, { "epoch": 1.729602873352108, "grad_norm": 1.7339712381362915, "learning_rate": 1.9540027897799975e-05, "loss": 0.767, "step": 10595 }, { "epoch": 1.7297661319946125, "grad_norm": 1.9697129726409912, "learning_rate": 1.9539932757407036e-05, "loss": 0.7218, "step": 10596 }, { "epoch": 1.7299293906371167, "grad_norm": 1.7139936685562134, "learning_rate": 1.953983760740738e-05, "loss": 0.6109, "step": 10597 }, { "epoch": 1.7300926492796211, "grad_norm": 1.6778162717819214, "learning_rate": 1.9539742447801115e-05, "loss": 0.716, "step": 10598 }, { "epoch": 1.7302559079221256, "grad_norm": 1.817650318145752, "learning_rate": 1.9539647278588334e-05, "loss": 0.6826, "step": 10599 }, { "epoch": 1.73041916656463, "grad_norm": 1.484668493270874, "learning_rate": 1.9539552099769128e-05, "loss": 0.55, "step": 10600 }, { "epoch": 1.7305824252071345, "grad_norm": 1.3935587406158447, "learning_rate": 1.9539456911343596e-05, "loss": 0.5205, "step": 10601 }, { "epoch": 1.730745683849639, "grad_norm": 1.8158220052719116, "learning_rate": 1.9539361713311833e-05, "loss": 0.7207, "step": 10602 }, { "epoch": 1.7309089424921433, "grad_norm": 1.341210126876831, "learning_rate": 1.9539266505673938e-05, "loss": 0.5224, "step": 10603 }, { "epoch": 1.7310722011346475, "grad_norm": 1.9447590112686157, "learning_rate": 1.953917128843e-05, "loss": 0.6645, "step": 10604 }, { "epoch": 1.731235459777152, "grad_norm": 1.3831266164779663, "learning_rate": 1.9539076061580124e-05, "loss": 0.6196, "step": 10605 }, { "epoch": 1.7313987184196562, "grad_norm": 1.7333645820617676, "learning_rate": 1.9538980825124395e-05, "loss": 0.5062, "step": 10606 }, { "epoch": 1.7315619770621606, "grad_norm": 1.782394528388977, "learning_rate": 1.953888557906292e-05, "loss": 0.6182, "step": 10607 }, { "epoch": 1.731725235704665, "grad_norm": 2.1425974369049072, "learning_rate": 1.9538790323395786e-05, "loss": 0.7764, "step": 10608 }, { "epoch": 1.7318884943471695, "grad_norm": 2.034274101257324, "learning_rate": 1.9538695058123095e-05, "loss": 0.69, "step": 10609 }, { "epoch": 1.732051752989674, "grad_norm": 1.4751893281936646, "learning_rate": 1.953859978324494e-05, "loss": 0.5055, "step": 10610 }, { "epoch": 1.7322150116321784, "grad_norm": 1.6922223567962646, "learning_rate": 1.953850449876142e-05, "loss": 0.7228, "step": 10611 }, { "epoch": 1.7323782702746828, "grad_norm": 1.9303778409957886, "learning_rate": 1.9538409204672624e-05, "loss": 0.7537, "step": 10612 }, { "epoch": 1.732541528917187, "grad_norm": 1.9266541004180908, "learning_rate": 1.9538313900978654e-05, "loss": 0.847, "step": 10613 }, { "epoch": 1.7327047875596915, "grad_norm": 1.6379420757293701, "learning_rate": 1.9538218587679605e-05, "loss": 0.6329, "step": 10614 }, { "epoch": 1.7328680462021957, "grad_norm": 1.579006552696228, "learning_rate": 1.9538123264775572e-05, "loss": 0.6304, "step": 10615 }, { "epoch": 1.7330313048447001, "grad_norm": 1.603655219078064, "learning_rate": 1.9538027932266653e-05, "loss": 0.6743, "step": 10616 }, { "epoch": 1.7331945634872046, "grad_norm": 1.7723690271377563, "learning_rate": 1.953793259015294e-05, "loss": 0.618, "step": 10617 }, { "epoch": 1.733357822129709, "grad_norm": 1.7787483930587769, "learning_rate": 1.9537837238434532e-05, "loss": 0.6411, "step": 10618 }, { "epoch": 1.7335210807722135, "grad_norm": 1.984925627708435, "learning_rate": 1.9537741877111527e-05, "loss": 0.7424, "step": 10619 }, { "epoch": 1.733684339414718, "grad_norm": 1.7774463891983032, "learning_rate": 1.9537646506184016e-05, "loss": 0.7405, "step": 10620 }, { "epoch": 1.733847598057222, "grad_norm": 1.6227116584777832, "learning_rate": 1.9537551125652096e-05, "loss": 0.7284, "step": 10621 }, { "epoch": 1.7340108566997265, "grad_norm": 1.7611488103866577, "learning_rate": 1.953745573551587e-05, "loss": 0.7101, "step": 10622 }, { "epoch": 1.734174115342231, "grad_norm": 1.513990044593811, "learning_rate": 1.9537360335775425e-05, "loss": 0.5484, "step": 10623 }, { "epoch": 1.7343373739847352, "grad_norm": 1.768786072731018, "learning_rate": 1.9537264926430856e-05, "loss": 0.7907, "step": 10624 }, { "epoch": 1.7345006326272396, "grad_norm": 2.017465829849243, "learning_rate": 1.953716950748227e-05, "loss": 0.7448, "step": 10625 }, { "epoch": 1.734663891269744, "grad_norm": 1.4632030725479126, "learning_rate": 1.9537074078929757e-05, "loss": 0.5172, "step": 10626 }, { "epoch": 1.7348271499122485, "grad_norm": 1.745485782623291, "learning_rate": 1.953697864077341e-05, "loss": 0.6497, "step": 10627 }, { "epoch": 1.734990408554753, "grad_norm": 1.7179838418960571, "learning_rate": 1.953688319301333e-05, "loss": 0.5464, "step": 10628 }, { "epoch": 1.7351536671972574, "grad_norm": 1.576981544494629, "learning_rate": 1.9536787735649612e-05, "loss": 0.6319, "step": 10629 }, { "epoch": 1.7353169258397616, "grad_norm": 1.8889379501342773, "learning_rate": 1.9536692268682348e-05, "loss": 0.7445, "step": 10630 }, { "epoch": 1.735480184482266, "grad_norm": 1.4420335292816162, "learning_rate": 1.953659679211164e-05, "loss": 0.5888, "step": 10631 }, { "epoch": 1.7356434431247703, "grad_norm": 1.801537036895752, "learning_rate": 1.9536501305937578e-05, "loss": 0.712, "step": 10632 }, { "epoch": 1.7358067017672747, "grad_norm": 1.8794673681259155, "learning_rate": 1.9536405810160267e-05, "loss": 0.6791, "step": 10633 }, { "epoch": 1.7359699604097791, "grad_norm": 1.5929036140441895, "learning_rate": 1.9536310304779797e-05, "loss": 0.6131, "step": 10634 }, { "epoch": 1.7361332190522836, "grad_norm": 1.7105309963226318, "learning_rate": 1.953621478979626e-05, "loss": 0.8359, "step": 10635 }, { "epoch": 1.736296477694788, "grad_norm": 1.4507215023040771, "learning_rate": 1.9536119265209763e-05, "loss": 0.6477, "step": 10636 }, { "epoch": 1.7364597363372924, "grad_norm": 1.645018458366394, "learning_rate": 1.953602373102039e-05, "loss": 0.7053, "step": 10637 }, { "epoch": 1.7366229949797969, "grad_norm": 1.6468052864074707, "learning_rate": 1.953592818722825e-05, "loss": 0.7371, "step": 10638 }, { "epoch": 1.736786253622301, "grad_norm": 1.5783240795135498, "learning_rate": 1.953583263383343e-05, "loss": 0.6765, "step": 10639 }, { "epoch": 1.7369495122648055, "grad_norm": 1.2791000604629517, "learning_rate": 1.9535737070836028e-05, "loss": 0.5194, "step": 10640 }, { "epoch": 1.7371127709073098, "grad_norm": 1.3839515447616577, "learning_rate": 1.9535641498236145e-05, "loss": 0.5557, "step": 10641 }, { "epoch": 1.7372760295498142, "grad_norm": 1.5100871324539185, "learning_rate": 1.953554591603387e-05, "loss": 0.5942, "step": 10642 }, { "epoch": 1.7374392881923186, "grad_norm": 1.7086377143859863, "learning_rate": 1.9535450324229307e-05, "loss": 0.5811, "step": 10643 }, { "epoch": 1.737602546834823, "grad_norm": 1.68819260597229, "learning_rate": 1.9535354722822545e-05, "loss": 0.5964, "step": 10644 }, { "epoch": 1.7377658054773275, "grad_norm": 1.4723917245864868, "learning_rate": 1.9535259111813682e-05, "loss": 0.6529, "step": 10645 }, { "epoch": 1.737929064119832, "grad_norm": 1.4796161651611328, "learning_rate": 1.9535163491202817e-05, "loss": 0.6473, "step": 10646 }, { "epoch": 1.7380923227623364, "grad_norm": 1.490499496459961, "learning_rate": 1.9535067860990046e-05, "loss": 0.5595, "step": 10647 }, { "epoch": 1.7382555814048406, "grad_norm": 1.803398609161377, "learning_rate": 1.9534972221175463e-05, "loss": 0.6914, "step": 10648 }, { "epoch": 1.738418840047345, "grad_norm": 1.7491391897201538, "learning_rate": 1.9534876571759165e-05, "loss": 0.7305, "step": 10649 }, { "epoch": 1.7385820986898493, "grad_norm": 1.6702085733413696, "learning_rate": 1.953478091274125e-05, "loss": 0.7222, "step": 10650 }, { "epoch": 1.7387453573323537, "grad_norm": 1.575265645980835, "learning_rate": 1.9534685244121814e-05, "loss": 0.5918, "step": 10651 }, { "epoch": 1.7389086159748581, "grad_norm": 1.8249541521072388, "learning_rate": 1.953458956590095e-05, "loss": 0.7912, "step": 10652 }, { "epoch": 1.7390718746173626, "grad_norm": 1.6756296157836914, "learning_rate": 1.9534493878078756e-05, "loss": 0.7391, "step": 10653 }, { "epoch": 1.739235133259867, "grad_norm": 1.5395827293395996, "learning_rate": 1.953439818065533e-05, "loss": 0.6402, "step": 10654 }, { "epoch": 1.7393983919023714, "grad_norm": 1.6690785884857178, "learning_rate": 1.9534302473630774e-05, "loss": 0.6763, "step": 10655 }, { "epoch": 1.7395616505448759, "grad_norm": 2.1039669513702393, "learning_rate": 1.953420675700517e-05, "loss": 0.8104, "step": 10656 }, { "epoch": 1.73972490918738, "grad_norm": 2.0114612579345703, "learning_rate": 1.9534111030778623e-05, "loss": 0.6973, "step": 10657 }, { "epoch": 1.7398881678298845, "grad_norm": 1.9010531902313232, "learning_rate": 1.9534015294951235e-05, "loss": 0.8051, "step": 10658 }, { "epoch": 1.7400514264723888, "grad_norm": 2.1739630699157715, "learning_rate": 1.9533919549523092e-05, "loss": 0.7386, "step": 10659 }, { "epoch": 1.7402146851148932, "grad_norm": 1.7289904356002808, "learning_rate": 1.9533823794494294e-05, "loss": 0.6817, "step": 10660 }, { "epoch": 1.7403779437573976, "grad_norm": 1.670079231262207, "learning_rate": 1.9533728029864937e-05, "loss": 0.6543, "step": 10661 }, { "epoch": 1.740541202399902, "grad_norm": 1.462788701057434, "learning_rate": 1.953363225563512e-05, "loss": 0.593, "step": 10662 }, { "epoch": 1.7407044610424065, "grad_norm": 1.5636025667190552, "learning_rate": 1.9533536471804938e-05, "loss": 0.6495, "step": 10663 }, { "epoch": 1.740867719684911, "grad_norm": 1.6936249732971191, "learning_rate": 1.9533440678374486e-05, "loss": 0.7034, "step": 10664 }, { "epoch": 1.7410309783274152, "grad_norm": 1.4976593255996704, "learning_rate": 1.9533344875343863e-05, "loss": 0.6615, "step": 10665 }, { "epoch": 1.7411942369699196, "grad_norm": 1.439508318901062, "learning_rate": 1.9533249062713163e-05, "loss": 0.5519, "step": 10666 }, { "epoch": 1.741357495612424, "grad_norm": 1.4585577249526978, "learning_rate": 1.953315324048249e-05, "loss": 0.627, "step": 10667 }, { "epoch": 1.7415207542549282, "grad_norm": 1.6699813604354858, "learning_rate": 1.9533057408651926e-05, "loss": 0.6389, "step": 10668 }, { "epoch": 1.7416840128974327, "grad_norm": 1.5431424379348755, "learning_rate": 1.9532961567221577e-05, "loss": 0.551, "step": 10669 }, { "epoch": 1.7418472715399371, "grad_norm": 1.5191174745559692, "learning_rate": 1.953286571619154e-05, "loss": 0.6245, "step": 10670 }, { "epoch": 1.7420105301824416, "grad_norm": 1.5608497858047485, "learning_rate": 1.953276985556191e-05, "loss": 0.6348, "step": 10671 }, { "epoch": 1.742173788824946, "grad_norm": 1.6324998140335083, "learning_rate": 1.9532673985332783e-05, "loss": 0.6509, "step": 10672 }, { "epoch": 1.7423370474674504, "grad_norm": 2.0397937297821045, "learning_rate": 1.9532578105504255e-05, "loss": 0.7965, "step": 10673 }, { "epoch": 1.7425003061099547, "grad_norm": 1.3539122343063354, "learning_rate": 1.9532482216076425e-05, "loss": 0.5818, "step": 10674 }, { "epoch": 1.742663564752459, "grad_norm": 1.609420895576477, "learning_rate": 1.9532386317049387e-05, "loss": 0.6696, "step": 10675 }, { "epoch": 1.7428268233949633, "grad_norm": 1.7696812152862549, "learning_rate": 1.9532290408423236e-05, "loss": 0.6299, "step": 10676 }, { "epoch": 1.7429900820374677, "grad_norm": 2.063328742980957, "learning_rate": 1.9532194490198074e-05, "loss": 0.817, "step": 10677 }, { "epoch": 1.7431533406799722, "grad_norm": 1.7363944053649902, "learning_rate": 1.9532098562373997e-05, "loss": 0.6601, "step": 10678 }, { "epoch": 1.7433165993224766, "grad_norm": 1.9815986156463623, "learning_rate": 1.9532002624951097e-05, "loss": 0.7904, "step": 10679 }, { "epoch": 1.743479857964981, "grad_norm": 1.4014389514923096, "learning_rate": 1.9531906677929472e-05, "loss": 0.534, "step": 10680 }, { "epoch": 1.7436431166074855, "grad_norm": 1.5112221240997314, "learning_rate": 1.953181072130922e-05, "loss": 0.6423, "step": 10681 }, { "epoch": 1.74380637524999, "grad_norm": 1.6072019338607788, "learning_rate": 1.9531714755090438e-05, "loss": 0.6744, "step": 10682 }, { "epoch": 1.7439696338924942, "grad_norm": 1.5799765586853027, "learning_rate": 1.953161877927322e-05, "loss": 0.7014, "step": 10683 }, { "epoch": 1.7441328925349986, "grad_norm": 1.667450189590454, "learning_rate": 1.9531522793857663e-05, "loss": 0.6508, "step": 10684 }, { "epoch": 1.7442961511775028, "grad_norm": 1.6054002046585083, "learning_rate": 1.953142679884387e-05, "loss": 0.613, "step": 10685 }, { "epoch": 1.7444594098200072, "grad_norm": 1.5304906368255615, "learning_rate": 1.9531330794231928e-05, "loss": 0.6034, "step": 10686 }, { "epoch": 1.7446226684625117, "grad_norm": 1.3575116395950317, "learning_rate": 1.953123478002194e-05, "loss": 0.5364, "step": 10687 }, { "epoch": 1.7447859271050161, "grad_norm": 1.4334372282028198, "learning_rate": 1.9531138756214004e-05, "loss": 0.5706, "step": 10688 }, { "epoch": 1.7449491857475206, "grad_norm": 1.613806128501892, "learning_rate": 1.953104272280821e-05, "loss": 0.6063, "step": 10689 }, { "epoch": 1.745112444390025, "grad_norm": 1.4269624948501587, "learning_rate": 1.953094667980466e-05, "loss": 0.5717, "step": 10690 }, { "epoch": 1.7452757030325294, "grad_norm": 1.5211204290390015, "learning_rate": 1.953085062720345e-05, "loss": 0.6624, "step": 10691 }, { "epoch": 1.7454389616750337, "grad_norm": 1.7872917652130127, "learning_rate": 1.9530754565004674e-05, "loss": 0.8588, "step": 10692 }, { "epoch": 1.745602220317538, "grad_norm": 1.6194450855255127, "learning_rate": 1.953065849320843e-05, "loss": 0.6213, "step": 10693 }, { "epoch": 1.7457654789600423, "grad_norm": 1.3746157884597778, "learning_rate": 1.953056241181482e-05, "loss": 0.5409, "step": 10694 }, { "epoch": 1.7459287376025467, "grad_norm": 1.9657628536224365, "learning_rate": 1.9530466320823933e-05, "loss": 0.7654, "step": 10695 }, { "epoch": 1.7460919962450512, "grad_norm": 1.8772079944610596, "learning_rate": 1.953037022023587e-05, "loss": 0.5858, "step": 10696 }, { "epoch": 1.7462552548875556, "grad_norm": 2.121143341064453, "learning_rate": 1.9530274110050726e-05, "loss": 0.7453, "step": 10697 }, { "epoch": 1.74641851353006, "grad_norm": 2.0563087463378906, "learning_rate": 1.95301779902686e-05, "loss": 0.7033, "step": 10698 }, { "epoch": 1.7465817721725645, "grad_norm": 1.6286327838897705, "learning_rate": 1.9530081860889586e-05, "loss": 0.6415, "step": 10699 }, { "epoch": 1.746745030815069, "grad_norm": 1.7814826965332031, "learning_rate": 1.952998572191378e-05, "loss": 0.6404, "step": 10700 }, { "epoch": 1.7469082894575731, "grad_norm": 1.6063657999038696, "learning_rate": 1.952988957334128e-05, "loss": 0.7378, "step": 10701 }, { "epoch": 1.7470715481000776, "grad_norm": 1.5934327840805054, "learning_rate": 1.952979341517219e-05, "loss": 0.615, "step": 10702 }, { "epoch": 1.7472348067425818, "grad_norm": 1.725294828414917, "learning_rate": 1.9529697247406596e-05, "loss": 0.7344, "step": 10703 }, { "epoch": 1.7473980653850862, "grad_norm": 1.5010987520217896, "learning_rate": 1.9529601070044603e-05, "loss": 0.6444, "step": 10704 }, { "epoch": 1.7475613240275907, "grad_norm": 1.8272624015808105, "learning_rate": 1.9529504883086302e-05, "loss": 0.7557, "step": 10705 }, { "epoch": 1.7477245826700951, "grad_norm": 1.7367616891860962, "learning_rate": 1.952940868653179e-05, "loss": 0.7205, "step": 10706 }, { "epoch": 1.7478878413125996, "grad_norm": 1.749585747718811, "learning_rate": 1.952931248038117e-05, "loss": 0.7402, "step": 10707 }, { "epoch": 1.748051099955104, "grad_norm": 1.794198989868164, "learning_rate": 1.9529216264634533e-05, "loss": 0.7114, "step": 10708 }, { "epoch": 1.7482143585976082, "grad_norm": 1.5261139869689941, "learning_rate": 1.9529120039291975e-05, "loss": 0.7004, "step": 10709 }, { "epoch": 1.7483776172401126, "grad_norm": 1.5516608953475952, "learning_rate": 1.9529023804353598e-05, "loss": 0.6309, "step": 10710 }, { "epoch": 1.748540875882617, "grad_norm": 1.5820624828338623, "learning_rate": 1.9528927559819497e-05, "loss": 0.5994, "step": 10711 }, { "epoch": 1.7487041345251213, "grad_norm": 1.7250977754592896, "learning_rate": 1.952883130568977e-05, "loss": 0.7757, "step": 10712 }, { "epoch": 1.7488673931676257, "grad_norm": 1.8915307521820068, "learning_rate": 1.952873504196451e-05, "loss": 0.8007, "step": 10713 }, { "epoch": 1.7490306518101302, "grad_norm": 1.682375192642212, "learning_rate": 1.9528638768643814e-05, "loss": 0.5906, "step": 10714 }, { "epoch": 1.7491939104526346, "grad_norm": 1.6707977056503296, "learning_rate": 1.9528542485727787e-05, "loss": 0.7422, "step": 10715 }, { "epoch": 1.749357169095139, "grad_norm": 1.7416025400161743, "learning_rate": 1.9528446193216516e-05, "loss": 0.7058, "step": 10716 }, { "epoch": 1.7495204277376435, "grad_norm": 1.7810405492782593, "learning_rate": 1.9528349891110104e-05, "loss": 0.7959, "step": 10717 }, { "epoch": 1.7496836863801477, "grad_norm": 1.707619071006775, "learning_rate": 1.9528253579408644e-05, "loss": 0.7189, "step": 10718 }, { "epoch": 1.7498469450226521, "grad_norm": 1.7502764463424683, "learning_rate": 1.952815725811224e-05, "loss": 0.7811, "step": 10719 }, { "epoch": 1.7500102036651564, "grad_norm": 1.6503678560256958, "learning_rate": 1.952806092722098e-05, "loss": 0.6026, "step": 10720 }, { "epoch": 1.7501734623076608, "grad_norm": 1.7462319135665894, "learning_rate": 1.9527964586734967e-05, "loss": 0.5974, "step": 10721 }, { "epoch": 1.7503367209501652, "grad_norm": 1.6383886337280273, "learning_rate": 1.9527868236654296e-05, "loss": 0.7407, "step": 10722 }, { "epoch": 1.7504999795926697, "grad_norm": 1.77699875831604, "learning_rate": 1.9527771876979062e-05, "loss": 0.5341, "step": 10723 }, { "epoch": 1.7506632382351741, "grad_norm": 1.639184832572937, "learning_rate": 1.9527675507709368e-05, "loss": 0.6405, "step": 10724 }, { "epoch": 1.7508264968776786, "grad_norm": 1.7103420495986938, "learning_rate": 1.9527579128845304e-05, "loss": 0.5437, "step": 10725 }, { "epoch": 1.750989755520183, "grad_norm": 1.494810938835144, "learning_rate": 1.9527482740386972e-05, "loss": 0.5921, "step": 10726 }, { "epoch": 1.7511530141626872, "grad_norm": 1.5288208723068237, "learning_rate": 1.9527386342334468e-05, "loss": 0.5907, "step": 10727 }, { "epoch": 1.7513162728051916, "grad_norm": 1.9668656587600708, "learning_rate": 1.9527289934687886e-05, "loss": 0.7954, "step": 10728 }, { "epoch": 1.7514795314476959, "grad_norm": 1.6285535097122192, "learning_rate": 1.9527193517447328e-05, "loss": 0.6791, "step": 10729 }, { "epoch": 1.7516427900902003, "grad_norm": 1.7145187854766846, "learning_rate": 1.9527097090612888e-05, "loss": 0.6132, "step": 10730 }, { "epoch": 1.7518060487327047, "grad_norm": 1.630653977394104, "learning_rate": 1.952700065418466e-05, "loss": 0.6366, "step": 10731 }, { "epoch": 1.7519693073752092, "grad_norm": 1.727787971496582, "learning_rate": 1.952690420816275e-05, "loss": 0.6666, "step": 10732 }, { "epoch": 1.7521325660177136, "grad_norm": 1.9507721662521362, "learning_rate": 1.952680775254725e-05, "loss": 0.6634, "step": 10733 }, { "epoch": 1.752295824660218, "grad_norm": 1.7663540840148926, "learning_rate": 1.9526711287338256e-05, "loss": 0.7122, "step": 10734 }, { "epoch": 1.7524590833027225, "grad_norm": 1.9366360902786255, "learning_rate": 1.9526614812535866e-05, "loss": 0.7152, "step": 10735 }, { "epoch": 1.7526223419452267, "grad_norm": 1.7407790422439575, "learning_rate": 1.9526518328140177e-05, "loss": 0.6915, "step": 10736 }, { "epoch": 1.7527856005877311, "grad_norm": 1.3461657762527466, "learning_rate": 1.9526421834151284e-05, "loss": 0.5267, "step": 10737 }, { "epoch": 1.7529488592302354, "grad_norm": 1.7447257041931152, "learning_rate": 1.952632533056929e-05, "loss": 0.6183, "step": 10738 }, { "epoch": 1.7531121178727398, "grad_norm": 1.6941795349121094, "learning_rate": 1.952622881739429e-05, "loss": 0.6457, "step": 10739 }, { "epoch": 1.7532753765152442, "grad_norm": 1.592056155204773, "learning_rate": 1.9526132294626377e-05, "loss": 0.709, "step": 10740 }, { "epoch": 1.7534386351577487, "grad_norm": 1.7101908922195435, "learning_rate": 1.9526035762265652e-05, "loss": 0.6995, "step": 10741 }, { "epoch": 1.753601893800253, "grad_norm": 1.7220239639282227, "learning_rate": 1.9525939220312215e-05, "loss": 0.5336, "step": 10742 }, { "epoch": 1.7537651524427575, "grad_norm": 2.173704147338867, "learning_rate": 1.9525842668766156e-05, "loss": 0.8378, "step": 10743 }, { "epoch": 1.753928411085262, "grad_norm": 1.9480594396591187, "learning_rate": 1.952574610762758e-05, "loss": 0.6174, "step": 10744 }, { "epoch": 1.7540916697277662, "grad_norm": 1.6421117782592773, "learning_rate": 1.9525649536896573e-05, "loss": 0.6432, "step": 10745 }, { "epoch": 1.7542549283702706, "grad_norm": 1.4242048263549805, "learning_rate": 1.9525552956573244e-05, "loss": 0.5078, "step": 10746 }, { "epoch": 1.7544181870127749, "grad_norm": 1.7420896291732788, "learning_rate": 1.9525456366657684e-05, "loss": 0.6498, "step": 10747 }, { "epoch": 1.7545814456552793, "grad_norm": 1.5830692052841187, "learning_rate": 1.9525359767149994e-05, "loss": 0.5773, "step": 10748 }, { "epoch": 1.7547447042977837, "grad_norm": 1.6917366981506348, "learning_rate": 1.952526315805027e-05, "loss": 0.6092, "step": 10749 }, { "epoch": 1.7549079629402882, "grad_norm": 1.762800931930542, "learning_rate": 1.9525166539358608e-05, "loss": 0.6278, "step": 10750 }, { "epoch": 1.7550712215827926, "grad_norm": 1.2863050699234009, "learning_rate": 1.9525069911075105e-05, "loss": 0.4441, "step": 10751 }, { "epoch": 1.755234480225297, "grad_norm": 1.832234501838684, "learning_rate": 1.9524973273199855e-05, "loss": 0.6494, "step": 10752 }, { "epoch": 1.7553977388678013, "grad_norm": 1.9815207719802856, "learning_rate": 1.9524876625732963e-05, "loss": 0.7388, "step": 10753 }, { "epoch": 1.7555609975103057, "grad_norm": 1.674972653388977, "learning_rate": 1.9524779968674528e-05, "loss": 0.6514, "step": 10754 }, { "epoch": 1.7557242561528101, "grad_norm": 1.6049739122390747, "learning_rate": 1.9524683302024634e-05, "loss": 0.5816, "step": 10755 }, { "epoch": 1.7558875147953144, "grad_norm": 1.5166209936141968, "learning_rate": 1.952458662578339e-05, "loss": 0.5147, "step": 10756 }, { "epoch": 1.7560507734378188, "grad_norm": 2.034099817276001, "learning_rate": 1.9524489939950892e-05, "loss": 0.7691, "step": 10757 }, { "epoch": 1.7562140320803232, "grad_norm": 1.544663667678833, "learning_rate": 1.952439324452723e-05, "loss": 0.6134, "step": 10758 }, { "epoch": 1.7563772907228277, "grad_norm": 1.7768033742904663, "learning_rate": 1.952429653951251e-05, "loss": 0.6369, "step": 10759 }, { "epoch": 1.756540549365332, "grad_norm": 1.825433611869812, "learning_rate": 1.9524199824906826e-05, "loss": 0.7808, "step": 10760 }, { "epoch": 1.7567038080078365, "grad_norm": 2.1548619270324707, "learning_rate": 1.9524103100710276e-05, "loss": 0.5696, "step": 10761 }, { "epoch": 1.7568670666503408, "grad_norm": 1.6438004970550537, "learning_rate": 1.9524006366922954e-05, "loss": 0.5874, "step": 10762 }, { "epoch": 1.7570303252928452, "grad_norm": 1.8138939142227173, "learning_rate": 1.952390962354496e-05, "loss": 0.6316, "step": 10763 }, { "epoch": 1.7571935839353494, "grad_norm": 1.9225834608078003, "learning_rate": 1.9523812870576395e-05, "loss": 0.6014, "step": 10764 }, { "epoch": 1.7573568425778539, "grad_norm": 1.7963004112243652, "learning_rate": 1.952371610801735e-05, "loss": 0.6416, "step": 10765 }, { "epoch": 1.7575201012203583, "grad_norm": 1.8769713640213013, "learning_rate": 1.9523619335867926e-05, "loss": 0.8003, "step": 10766 }, { "epoch": 1.7576833598628627, "grad_norm": 1.5693458318710327, "learning_rate": 1.952352255412822e-05, "loss": 0.5159, "step": 10767 }, { "epoch": 1.7578466185053672, "grad_norm": 1.669358491897583, "learning_rate": 1.9523425762798328e-05, "loss": 0.7201, "step": 10768 }, { "epoch": 1.7580098771478716, "grad_norm": 1.6975306272506714, "learning_rate": 1.9523328961878353e-05, "loss": 0.6903, "step": 10769 }, { "epoch": 1.758173135790376, "grad_norm": 1.60775625705719, "learning_rate": 1.9523232151368383e-05, "loss": 0.6171, "step": 10770 }, { "epoch": 1.7583363944328803, "grad_norm": 1.6108731031417847, "learning_rate": 1.9523135331268523e-05, "loss": 0.665, "step": 10771 }, { "epoch": 1.7584996530753847, "grad_norm": 2.1032893657684326, "learning_rate": 1.952303850157887e-05, "loss": 0.7419, "step": 10772 }, { "epoch": 1.758662911717889, "grad_norm": 1.6460249423980713, "learning_rate": 1.9522941662299518e-05, "loss": 0.7546, "step": 10773 }, { "epoch": 1.7588261703603933, "grad_norm": 1.875888466835022, "learning_rate": 1.9522844813430567e-05, "loss": 0.7806, "step": 10774 }, { "epoch": 1.7589894290028978, "grad_norm": 1.989449143409729, "learning_rate": 1.952274795497211e-05, "loss": 0.7138, "step": 10775 }, { "epoch": 1.7591526876454022, "grad_norm": 1.8132860660552979, "learning_rate": 1.9522651086924254e-05, "loss": 0.8015, "step": 10776 }, { "epoch": 1.7593159462879067, "grad_norm": 1.652529239654541, "learning_rate": 1.952255420928709e-05, "loss": 0.6898, "step": 10777 }, { "epoch": 1.759479204930411, "grad_norm": 1.75728440284729, "learning_rate": 1.9522457322060714e-05, "loss": 0.6754, "step": 10778 }, { "epoch": 1.7596424635729155, "grad_norm": 1.5660400390625, "learning_rate": 1.9522360425245226e-05, "loss": 0.6484, "step": 10779 }, { "epoch": 1.7598057222154198, "grad_norm": 1.6051851511001587, "learning_rate": 1.952226351884072e-05, "loss": 0.7157, "step": 10780 }, { "epoch": 1.7599689808579242, "grad_norm": 2.1345932483673096, "learning_rate": 1.9522166602847305e-05, "loss": 0.6072, "step": 10781 }, { "epoch": 1.7601322395004284, "grad_norm": 1.5944386720657349, "learning_rate": 1.9522069677265067e-05, "loss": 0.5425, "step": 10782 }, { "epoch": 1.7602954981429328, "grad_norm": 1.6194030046463013, "learning_rate": 1.9521972742094107e-05, "loss": 0.6101, "step": 10783 }, { "epoch": 1.7604587567854373, "grad_norm": 1.5638209581375122, "learning_rate": 1.9521875797334524e-05, "loss": 0.6524, "step": 10784 }, { "epoch": 1.7606220154279417, "grad_norm": 1.5616692304611206, "learning_rate": 1.9521778842986413e-05, "loss": 0.694, "step": 10785 }, { "epoch": 1.7607852740704462, "grad_norm": 1.8090053796768188, "learning_rate": 1.9521681879049876e-05, "loss": 0.6927, "step": 10786 }, { "epoch": 1.7609485327129506, "grad_norm": 1.8393393754959106, "learning_rate": 1.952158490552501e-05, "loss": 0.7657, "step": 10787 }, { "epoch": 1.761111791355455, "grad_norm": 1.587812066078186, "learning_rate": 1.9521487922411904e-05, "loss": 0.5942, "step": 10788 }, { "epoch": 1.7612750499979593, "grad_norm": 1.7164409160614014, "learning_rate": 1.9521390929710663e-05, "loss": 0.6661, "step": 10789 }, { "epoch": 1.7614383086404637, "grad_norm": 1.659538984298706, "learning_rate": 1.9521293927421388e-05, "loss": 0.7343, "step": 10790 }, { "epoch": 1.761601567282968, "grad_norm": 1.581421971321106, "learning_rate": 1.952119691554417e-05, "loss": 0.6272, "step": 10791 }, { "epoch": 1.7617648259254723, "grad_norm": 1.690076231956482, "learning_rate": 1.952109989407911e-05, "loss": 0.6768, "step": 10792 }, { "epoch": 1.7619280845679768, "grad_norm": 2.370445489883423, "learning_rate": 1.9521002863026305e-05, "loss": 0.8073, "step": 10793 }, { "epoch": 1.7620913432104812, "grad_norm": 1.941224455833435, "learning_rate": 1.9520905822385852e-05, "loss": 0.7596, "step": 10794 }, { "epoch": 1.7622546018529857, "grad_norm": 1.7497076988220215, "learning_rate": 1.952080877215785e-05, "loss": 0.5879, "step": 10795 }, { "epoch": 1.76241786049549, "grad_norm": 1.739261507987976, "learning_rate": 1.9520711712342394e-05, "loss": 0.6218, "step": 10796 }, { "epoch": 1.7625811191379943, "grad_norm": 1.5725771188735962, "learning_rate": 1.952061464293959e-05, "loss": 0.6383, "step": 10797 }, { "epoch": 1.7627443777804988, "grad_norm": 1.619012713432312, "learning_rate": 1.9520517563949522e-05, "loss": 0.5779, "step": 10798 }, { "epoch": 1.7629076364230032, "grad_norm": 1.9557584524154663, "learning_rate": 1.95204204753723e-05, "loss": 0.8222, "step": 10799 }, { "epoch": 1.7630708950655074, "grad_norm": 1.8133105039596558, "learning_rate": 1.9520323377208017e-05, "loss": 0.6207, "step": 10800 }, { "epoch": 1.7632341537080118, "grad_norm": 1.490153431892395, "learning_rate": 1.9520226269456767e-05, "loss": 0.5551, "step": 10801 }, { "epoch": 1.7633974123505163, "grad_norm": 1.5352156162261963, "learning_rate": 1.9520129152118653e-05, "loss": 0.6902, "step": 10802 }, { "epoch": 1.7635606709930207, "grad_norm": 1.7540897130966187, "learning_rate": 1.9520032025193772e-05, "loss": 0.7109, "step": 10803 }, { "epoch": 1.7637239296355252, "grad_norm": 1.677668809890747, "learning_rate": 1.9519934888682224e-05, "loss": 0.6896, "step": 10804 }, { "epoch": 1.7638871882780296, "grad_norm": 1.6755605936050415, "learning_rate": 1.9519837742584102e-05, "loss": 0.6232, "step": 10805 }, { "epoch": 1.7640504469205338, "grad_norm": 1.5990185737609863, "learning_rate": 1.951974058689951e-05, "loss": 0.6892, "step": 10806 }, { "epoch": 1.7642137055630382, "grad_norm": 1.263320803642273, "learning_rate": 1.9519643421628535e-05, "loss": 0.455, "step": 10807 }, { "epoch": 1.7643769642055427, "grad_norm": 1.7494382858276367, "learning_rate": 1.9519546246771283e-05, "loss": 0.7423, "step": 10808 }, { "epoch": 1.764540222848047, "grad_norm": 1.8148205280303955, "learning_rate": 1.951944906232785e-05, "loss": 0.6589, "step": 10809 }, { "epoch": 1.7647034814905513, "grad_norm": 1.7060492038726807, "learning_rate": 1.9519351868298337e-05, "loss": 0.5839, "step": 10810 }, { "epoch": 1.7648667401330558, "grad_norm": 1.5221731662750244, "learning_rate": 1.951925466468284e-05, "loss": 0.6025, "step": 10811 }, { "epoch": 1.7650299987755602, "grad_norm": 1.5526695251464844, "learning_rate": 1.9519157451481453e-05, "loss": 0.6064, "step": 10812 }, { "epoch": 1.7651932574180647, "grad_norm": 1.7610591650009155, "learning_rate": 1.951906022869428e-05, "loss": 0.8321, "step": 10813 }, { "epoch": 1.765356516060569, "grad_norm": 1.6823147535324097, "learning_rate": 1.9518962996321413e-05, "loss": 0.5976, "step": 10814 }, { "epoch": 1.7655197747030733, "grad_norm": 1.4573984146118164, "learning_rate": 1.9518865754362953e-05, "loss": 0.5635, "step": 10815 }, { "epoch": 1.7656830333455777, "grad_norm": 1.669206142425537, "learning_rate": 1.9518768502819e-05, "loss": 0.6118, "step": 10816 }, { "epoch": 1.765846291988082, "grad_norm": 1.8541523218154907, "learning_rate": 1.9518671241689648e-05, "loss": 0.6255, "step": 10817 }, { "epoch": 1.7660095506305864, "grad_norm": 1.257539987564087, "learning_rate": 1.9518573970975e-05, "loss": 0.5502, "step": 10818 }, { "epoch": 1.7661728092730908, "grad_norm": 1.723966121673584, "learning_rate": 1.9518476690675145e-05, "loss": 0.6388, "step": 10819 }, { "epoch": 1.7663360679155953, "grad_norm": 1.5228670835494995, "learning_rate": 1.9518379400790192e-05, "loss": 0.578, "step": 10820 }, { "epoch": 1.7664993265580997, "grad_norm": 1.5448646545410156, "learning_rate": 1.9518282101320228e-05, "loss": 0.5407, "step": 10821 }, { "epoch": 1.7666625852006042, "grad_norm": 1.7004462480545044, "learning_rate": 1.9518184792265357e-05, "loss": 0.7933, "step": 10822 }, { "epoch": 1.7668258438431086, "grad_norm": 1.948448657989502, "learning_rate": 1.951808747362568e-05, "loss": 0.7763, "step": 10823 }, { "epoch": 1.7669891024856128, "grad_norm": 1.7008239030838013, "learning_rate": 1.951799014540129e-05, "loss": 0.6474, "step": 10824 }, { "epoch": 1.7671523611281172, "grad_norm": 1.5442135334014893, "learning_rate": 1.9517892807592288e-05, "loss": 0.5985, "step": 10825 }, { "epoch": 1.7673156197706215, "grad_norm": 1.7470180988311768, "learning_rate": 1.9517795460198768e-05, "loss": 0.7397, "step": 10826 }, { "epoch": 1.767478878413126, "grad_norm": 1.6956099271774292, "learning_rate": 1.951769810322083e-05, "loss": 0.7092, "step": 10827 }, { "epoch": 1.7676421370556303, "grad_norm": 1.9263503551483154, "learning_rate": 1.9517600736658572e-05, "loss": 0.6956, "step": 10828 }, { "epoch": 1.7678053956981348, "grad_norm": 1.7992496490478516, "learning_rate": 1.9517503360512095e-05, "loss": 0.6904, "step": 10829 }, { "epoch": 1.7679686543406392, "grad_norm": 1.6746838092803955, "learning_rate": 1.9517405974781495e-05, "loss": 0.6545, "step": 10830 }, { "epoch": 1.7681319129831436, "grad_norm": 1.8178722858428955, "learning_rate": 1.9517308579466866e-05, "loss": 0.6159, "step": 10831 }, { "epoch": 1.768295171625648, "grad_norm": 1.7905980348587036, "learning_rate": 1.9517211174568317e-05, "loss": 0.7204, "step": 10832 }, { "epoch": 1.7684584302681523, "grad_norm": 1.4039496183395386, "learning_rate": 1.9517113760085932e-05, "loss": 0.5344, "step": 10833 }, { "epoch": 1.7686216889106567, "grad_norm": 1.5899111032485962, "learning_rate": 1.9517016336019817e-05, "loss": 0.6479, "step": 10834 }, { "epoch": 1.768784947553161, "grad_norm": 1.8183836936950684, "learning_rate": 1.9516918902370073e-05, "loss": 0.6267, "step": 10835 }, { "epoch": 1.7689482061956654, "grad_norm": 1.6399550437927246, "learning_rate": 1.951682145913679e-05, "loss": 0.5889, "step": 10836 }, { "epoch": 1.7691114648381698, "grad_norm": 1.7220840454101562, "learning_rate": 1.951672400632007e-05, "loss": 0.6122, "step": 10837 }, { "epoch": 1.7692747234806743, "grad_norm": 1.6463868618011475, "learning_rate": 1.9516626543920015e-05, "loss": 0.618, "step": 10838 }, { "epoch": 1.7694379821231787, "grad_norm": 1.4979952573776245, "learning_rate": 1.9516529071936717e-05, "loss": 0.5792, "step": 10839 }, { "epoch": 1.7696012407656831, "grad_norm": 1.7675654888153076, "learning_rate": 1.951643159037028e-05, "loss": 0.6128, "step": 10840 }, { "epoch": 1.7697644994081876, "grad_norm": 1.794031023979187, "learning_rate": 1.9516334099220793e-05, "loss": 0.657, "step": 10841 }, { "epoch": 1.7699277580506918, "grad_norm": 1.63181471824646, "learning_rate": 1.9516236598488364e-05, "loss": 0.6537, "step": 10842 }, { "epoch": 1.7700910166931962, "grad_norm": 1.6753329038619995, "learning_rate": 1.951613908817309e-05, "loss": 0.661, "step": 10843 }, { "epoch": 1.7702542753357005, "grad_norm": 1.980138897895813, "learning_rate": 1.951604156827506e-05, "loss": 0.7286, "step": 10844 }, { "epoch": 1.770417533978205, "grad_norm": 1.683947205543518, "learning_rate": 1.9515944038794384e-05, "loss": 0.5825, "step": 10845 }, { "epoch": 1.7705807926207093, "grad_norm": 1.768072485923767, "learning_rate": 1.9515846499731153e-05, "loss": 0.8085, "step": 10846 }, { "epoch": 1.7707440512632138, "grad_norm": 1.9497138261795044, "learning_rate": 1.9515748951085468e-05, "loss": 0.7266, "step": 10847 }, { "epoch": 1.7709073099057182, "grad_norm": 1.6978740692138672, "learning_rate": 1.951565139285742e-05, "loss": 0.7681, "step": 10848 }, { "epoch": 1.7710705685482226, "grad_norm": 1.4797953367233276, "learning_rate": 1.951555382504712e-05, "loss": 0.5991, "step": 10849 }, { "epoch": 1.7712338271907269, "grad_norm": 1.6163244247436523, "learning_rate": 1.951545624765466e-05, "loss": 0.5965, "step": 10850 }, { "epoch": 1.7713970858332313, "grad_norm": 1.559449315071106, "learning_rate": 1.9515358660680137e-05, "loss": 0.7004, "step": 10851 }, { "epoch": 1.7715603444757357, "grad_norm": 1.8776028156280518, "learning_rate": 1.9515261064123653e-05, "loss": 0.6821, "step": 10852 }, { "epoch": 1.77172360311824, "grad_norm": 1.8561935424804688, "learning_rate": 1.9515163457985298e-05, "loss": 0.5908, "step": 10853 }, { "epoch": 1.7718868617607444, "grad_norm": 1.7763408422470093, "learning_rate": 1.9515065842265178e-05, "loss": 0.5461, "step": 10854 }, { "epoch": 1.7720501204032488, "grad_norm": 1.7872830629348755, "learning_rate": 1.951496821696339e-05, "loss": 0.7028, "step": 10855 }, { "epoch": 1.7722133790457533, "grad_norm": 1.627462387084961, "learning_rate": 1.951487058208003e-05, "loss": 0.5856, "step": 10856 }, { "epoch": 1.7723766376882577, "grad_norm": 1.431952714920044, "learning_rate": 1.95147729376152e-05, "loss": 0.6125, "step": 10857 }, { "epoch": 1.7725398963307621, "grad_norm": 1.858451247215271, "learning_rate": 1.9514675283569e-05, "loss": 0.8164, "step": 10858 }, { "epoch": 1.7727031549732664, "grad_norm": 1.650089979171753, "learning_rate": 1.951457761994152e-05, "loss": 0.6906, "step": 10859 }, { "epoch": 1.7728664136157708, "grad_norm": 1.8180104494094849, "learning_rate": 1.951447994673286e-05, "loss": 0.8398, "step": 10860 }, { "epoch": 1.773029672258275, "grad_norm": 1.6162959337234497, "learning_rate": 1.9514382263943125e-05, "loss": 0.6292, "step": 10861 }, { "epoch": 1.7731929309007795, "grad_norm": 1.700246810913086, "learning_rate": 1.951428457157241e-05, "loss": 0.7498, "step": 10862 }, { "epoch": 1.773356189543284, "grad_norm": 2.20633864402771, "learning_rate": 1.951418686962081e-05, "loss": 0.6972, "step": 10863 }, { "epoch": 1.7735194481857883, "grad_norm": 1.798872470855713, "learning_rate": 1.951408915808843e-05, "loss": 0.6647, "step": 10864 }, { "epoch": 1.7736827068282928, "grad_norm": 1.6198927164077759, "learning_rate": 1.951399143697536e-05, "loss": 0.688, "step": 10865 }, { "epoch": 1.7738459654707972, "grad_norm": 1.865727186203003, "learning_rate": 1.9513893706281707e-05, "loss": 0.8842, "step": 10866 }, { "epoch": 1.7740092241133016, "grad_norm": 1.6654359102249146, "learning_rate": 1.9513795966007563e-05, "loss": 0.6891, "step": 10867 }, { "epoch": 1.7741724827558059, "grad_norm": 1.60623037815094, "learning_rate": 1.951369821615303e-05, "loss": 0.7686, "step": 10868 }, { "epoch": 1.7743357413983103, "grad_norm": 1.9587477445602417, "learning_rate": 1.9513600456718206e-05, "loss": 0.8218, "step": 10869 }, { "epoch": 1.7744990000408145, "grad_norm": 1.5646796226501465, "learning_rate": 1.951350268770319e-05, "loss": 0.665, "step": 10870 }, { "epoch": 1.774662258683319, "grad_norm": 1.5532495975494385, "learning_rate": 1.9513404909108078e-05, "loss": 0.6639, "step": 10871 }, { "epoch": 1.7748255173258234, "grad_norm": 1.6693313121795654, "learning_rate": 1.9513307120932968e-05, "loss": 0.7177, "step": 10872 }, { "epoch": 1.7749887759683278, "grad_norm": 1.5305919647216797, "learning_rate": 1.9513209323177963e-05, "loss": 0.6372, "step": 10873 }, { "epoch": 1.7751520346108323, "grad_norm": 1.5721503496170044, "learning_rate": 1.951311151584316e-05, "loss": 0.6643, "step": 10874 }, { "epoch": 1.7753152932533367, "grad_norm": 1.835545539855957, "learning_rate": 1.9513013698928654e-05, "loss": 0.8045, "step": 10875 }, { "epoch": 1.7754785518958411, "grad_norm": 1.7381603717803955, "learning_rate": 1.9512915872434542e-05, "loss": 0.801, "step": 10876 }, { "epoch": 1.7756418105383454, "grad_norm": 1.596463680267334, "learning_rate": 1.9512818036360932e-05, "loss": 0.6944, "step": 10877 }, { "epoch": 1.7758050691808498, "grad_norm": 1.408737063407898, "learning_rate": 1.9512720190707915e-05, "loss": 0.5943, "step": 10878 }, { "epoch": 1.775968327823354, "grad_norm": 1.5177603960037231, "learning_rate": 1.951262233547559e-05, "loss": 0.6052, "step": 10879 }, { "epoch": 1.7761315864658584, "grad_norm": 1.3990031480789185, "learning_rate": 1.9512524470664058e-05, "loss": 0.5602, "step": 10880 }, { "epoch": 1.7762948451083629, "grad_norm": 1.5601325035095215, "learning_rate": 1.9512426596273418e-05, "loss": 0.6958, "step": 10881 }, { "epoch": 1.7764581037508673, "grad_norm": 2.587397336959839, "learning_rate": 1.9512328712303764e-05, "loss": 0.571, "step": 10882 }, { "epoch": 1.7766213623933718, "grad_norm": 1.969022274017334, "learning_rate": 1.95122308187552e-05, "loss": 0.7032, "step": 10883 }, { "epoch": 1.7767846210358762, "grad_norm": 1.7399964332580566, "learning_rate": 1.951213291562782e-05, "loss": 0.692, "step": 10884 }, { "epoch": 1.7769478796783806, "grad_norm": 1.9375702142715454, "learning_rate": 1.9512035002921726e-05, "loss": 0.7376, "step": 10885 }, { "epoch": 1.7771111383208849, "grad_norm": 1.8940571546554565, "learning_rate": 1.9511937080637015e-05, "loss": 0.7081, "step": 10886 }, { "epoch": 1.7772743969633893, "grad_norm": 1.5788077116012573, "learning_rate": 1.9511839148773783e-05, "loss": 0.6081, "step": 10887 }, { "epoch": 1.7774376556058935, "grad_norm": 1.567949891090393, "learning_rate": 1.9511741207332134e-05, "loss": 0.7016, "step": 10888 }, { "epoch": 1.777600914248398, "grad_norm": 1.5725597143173218, "learning_rate": 1.9511643256312165e-05, "loss": 0.7019, "step": 10889 }, { "epoch": 1.7777641728909024, "grad_norm": 1.3835197687149048, "learning_rate": 1.9511545295713975e-05, "loss": 0.5229, "step": 10890 }, { "epoch": 1.7779274315334068, "grad_norm": 1.983066439628601, "learning_rate": 1.9511447325537658e-05, "loss": 0.7461, "step": 10891 }, { "epoch": 1.7780906901759113, "grad_norm": 1.9186986684799194, "learning_rate": 1.9511349345783316e-05, "loss": 0.7311, "step": 10892 }, { "epoch": 1.7782539488184157, "grad_norm": 1.739055871963501, "learning_rate": 1.951125135645105e-05, "loss": 0.7455, "step": 10893 }, { "epoch": 1.77841720746092, "grad_norm": 1.6442842483520508, "learning_rate": 1.9511153357540954e-05, "loss": 0.7079, "step": 10894 }, { "epoch": 1.7785804661034244, "grad_norm": 1.6049189567565918, "learning_rate": 1.951105534905313e-05, "loss": 0.6143, "step": 10895 }, { "epoch": 1.7787437247459288, "grad_norm": 1.2695266008377075, "learning_rate": 1.951095733098768e-05, "loss": 0.4519, "step": 10896 }, { "epoch": 1.778906983388433, "grad_norm": 1.5489606857299805, "learning_rate": 1.9510859303344695e-05, "loss": 0.6004, "step": 10897 }, { "epoch": 1.7790702420309374, "grad_norm": 1.873193383216858, "learning_rate": 1.9510761266124277e-05, "loss": 0.7072, "step": 10898 }, { "epoch": 1.7792335006734419, "grad_norm": 1.5930944681167603, "learning_rate": 1.9510663219326525e-05, "loss": 0.6252, "step": 10899 }, { "epoch": 1.7793967593159463, "grad_norm": 2.268545389175415, "learning_rate": 1.9510565162951538e-05, "loss": 0.8394, "step": 10900 }, { "epoch": 1.7795600179584508, "grad_norm": 1.66386878490448, "learning_rate": 1.9510467096999413e-05, "loss": 0.7654, "step": 10901 }, { "epoch": 1.7797232766009552, "grad_norm": 2.10508394241333, "learning_rate": 1.9510369021470253e-05, "loss": 0.736, "step": 10902 }, { "epoch": 1.7798865352434594, "grad_norm": 1.7712843418121338, "learning_rate": 1.9510270936364152e-05, "loss": 0.6082, "step": 10903 }, { "epoch": 1.7800497938859638, "grad_norm": 1.484865427017212, "learning_rate": 1.951017284168121e-05, "loss": 0.6835, "step": 10904 }, { "epoch": 1.780213052528468, "grad_norm": 1.6732131242752075, "learning_rate": 1.9510074737421528e-05, "loss": 0.7047, "step": 10905 }, { "epoch": 1.7803763111709725, "grad_norm": 1.665887713432312, "learning_rate": 1.9509976623585204e-05, "loss": 0.8443, "step": 10906 }, { "epoch": 1.780539569813477, "grad_norm": 1.6836378574371338, "learning_rate": 1.9509878500172333e-05, "loss": 0.6747, "step": 10907 }, { "epoch": 1.7807028284559814, "grad_norm": 2.0392541885375977, "learning_rate": 1.950978036718302e-05, "loss": 0.7848, "step": 10908 }, { "epoch": 1.7808660870984858, "grad_norm": 1.3736915588378906, "learning_rate": 1.950968222461736e-05, "loss": 0.4559, "step": 10909 }, { "epoch": 1.7810293457409903, "grad_norm": 1.583001732826233, "learning_rate": 1.950958407247545e-05, "loss": 0.6005, "step": 10910 }, { "epoch": 1.7811926043834947, "grad_norm": 1.6234726905822754, "learning_rate": 1.9509485910757393e-05, "loss": 0.6487, "step": 10911 }, { "epoch": 1.781355863025999, "grad_norm": 1.5186219215393066, "learning_rate": 1.9509387739463284e-05, "loss": 0.5275, "step": 10912 }, { "epoch": 1.7815191216685033, "grad_norm": 1.4962098598480225, "learning_rate": 1.9509289558593227e-05, "loss": 0.5547, "step": 10913 }, { "epoch": 1.7816823803110076, "grad_norm": 1.8600186109542847, "learning_rate": 1.950919136814732e-05, "loss": 0.6616, "step": 10914 }, { "epoch": 1.781845638953512, "grad_norm": 1.588076114654541, "learning_rate": 1.9509093168125656e-05, "loss": 0.5861, "step": 10915 }, { "epoch": 1.7820088975960164, "grad_norm": 1.5765563249588013, "learning_rate": 1.9508994958528337e-05, "loss": 0.6691, "step": 10916 }, { "epoch": 1.7821721562385209, "grad_norm": 1.6238212585449219, "learning_rate": 1.9508896739355467e-05, "loss": 0.5909, "step": 10917 }, { "epoch": 1.7823354148810253, "grad_norm": 1.9103422164916992, "learning_rate": 1.9508798510607137e-05, "loss": 0.6421, "step": 10918 }, { "epoch": 1.7824986735235298, "grad_norm": 1.6012688875198364, "learning_rate": 1.9508700272283452e-05, "loss": 0.7118, "step": 10919 }, { "epoch": 1.7826619321660342, "grad_norm": 1.9534409046173096, "learning_rate": 1.9508602024384507e-05, "loss": 1.2235, "step": 10920 }, { "epoch": 1.7828251908085384, "grad_norm": 1.7806932926177979, "learning_rate": 1.95085037669104e-05, "loss": 0.7503, "step": 10921 }, { "epoch": 1.7829884494510428, "grad_norm": 1.7099485397338867, "learning_rate": 1.9508405499861235e-05, "loss": 0.6738, "step": 10922 }, { "epoch": 1.783151708093547, "grad_norm": 1.6799393892288208, "learning_rate": 1.9508307223237105e-05, "loss": 0.6989, "step": 10923 }, { "epoch": 1.7833149667360515, "grad_norm": 1.8940770626068115, "learning_rate": 1.950820893703812e-05, "loss": 0.6656, "step": 10924 }, { "epoch": 1.783478225378556, "grad_norm": 1.5303853750228882, "learning_rate": 1.9508110641264363e-05, "loss": 0.5947, "step": 10925 }, { "epoch": 1.7836414840210604, "grad_norm": 1.6698651313781738, "learning_rate": 1.9508012335915943e-05, "loss": 0.5975, "step": 10926 }, { "epoch": 1.7838047426635648, "grad_norm": 1.6515226364135742, "learning_rate": 1.950791402099296e-05, "loss": 0.6616, "step": 10927 }, { "epoch": 1.7839680013060693, "grad_norm": 1.8534449338912964, "learning_rate": 1.950781569649551e-05, "loss": 0.7861, "step": 10928 }, { "epoch": 1.7841312599485737, "grad_norm": 1.7015877962112427, "learning_rate": 1.9507717362423686e-05, "loss": 0.6197, "step": 10929 }, { "epoch": 1.784294518591078, "grad_norm": 1.470147728919983, "learning_rate": 1.9507619018777597e-05, "loss": 0.5684, "step": 10930 }, { "epoch": 1.7844577772335823, "grad_norm": 1.8318754434585571, "learning_rate": 1.950752066555734e-05, "loss": 0.7034, "step": 10931 }, { "epoch": 1.7846210358760866, "grad_norm": 2.2507457733154297, "learning_rate": 1.9507422302763012e-05, "loss": 0.7711, "step": 10932 }, { "epoch": 1.784784294518591, "grad_norm": 1.7883604764938354, "learning_rate": 1.950732393039471e-05, "loss": 0.5786, "step": 10933 }, { "epoch": 1.7849475531610954, "grad_norm": 1.744693636894226, "learning_rate": 1.9507225548452536e-05, "loss": 0.861, "step": 10934 }, { "epoch": 1.7851108118035999, "grad_norm": 1.6499569416046143, "learning_rate": 1.950712715693659e-05, "loss": 0.6393, "step": 10935 }, { "epoch": 1.7852740704461043, "grad_norm": 1.3422837257385254, "learning_rate": 1.9507028755846965e-05, "loss": 0.4565, "step": 10936 }, { "epoch": 1.7854373290886087, "grad_norm": 1.9206533432006836, "learning_rate": 1.950693034518377e-05, "loss": 0.6765, "step": 10937 }, { "epoch": 1.785600587731113, "grad_norm": 1.8371315002441406, "learning_rate": 1.9506831924947096e-05, "loss": 0.6633, "step": 10938 }, { "epoch": 1.7857638463736174, "grad_norm": 1.7113640308380127, "learning_rate": 1.9506733495137044e-05, "loss": 0.636, "step": 10939 }, { "epoch": 1.7859271050161218, "grad_norm": 1.4425818920135498, "learning_rate": 1.9506635055753714e-05, "loss": 0.6064, "step": 10940 }, { "epoch": 1.786090363658626, "grad_norm": 1.6489992141723633, "learning_rate": 1.9506536606797207e-05, "loss": 0.7039, "step": 10941 }, { "epoch": 1.7862536223011305, "grad_norm": 1.999359369277954, "learning_rate": 1.9506438148267623e-05, "loss": 0.695, "step": 10942 }, { "epoch": 1.786416880943635, "grad_norm": 1.3801072835922241, "learning_rate": 1.950633968016505e-05, "loss": 0.5638, "step": 10943 }, { "epoch": 1.7865801395861394, "grad_norm": 1.8162299394607544, "learning_rate": 1.95062412024896e-05, "loss": 0.6687, "step": 10944 }, { "epoch": 1.7867433982286438, "grad_norm": 1.202967643737793, "learning_rate": 1.950614271524137e-05, "loss": 0.541, "step": 10945 }, { "epoch": 1.7869066568711482, "grad_norm": 1.8636294603347778, "learning_rate": 1.9506044218420452e-05, "loss": 0.7896, "step": 10946 }, { "epoch": 1.7870699155136525, "grad_norm": 1.6534351110458374, "learning_rate": 1.9505945712026953e-05, "loss": 0.6448, "step": 10947 }, { "epoch": 1.787233174156157, "grad_norm": 1.6277782917022705, "learning_rate": 1.950584719606097e-05, "loss": 0.6023, "step": 10948 }, { "epoch": 1.7873964327986611, "grad_norm": 1.7879890203475952, "learning_rate": 1.95057486705226e-05, "loss": 0.7853, "step": 10949 }, { "epoch": 1.7875596914411656, "grad_norm": 1.671383261680603, "learning_rate": 1.950565013541194e-05, "loss": 0.6176, "step": 10950 }, { "epoch": 1.78772295008367, "grad_norm": 2.2748847007751465, "learning_rate": 1.95055515907291e-05, "loss": 0.6725, "step": 10951 }, { "epoch": 1.7878862087261744, "grad_norm": 1.761818528175354, "learning_rate": 1.9505453036474167e-05, "loss": 0.6628, "step": 10952 }, { "epoch": 1.7880494673686789, "grad_norm": 1.8150115013122559, "learning_rate": 1.950535447264725e-05, "loss": 0.7235, "step": 10953 }, { "epoch": 1.7882127260111833, "grad_norm": 1.8136879205703735, "learning_rate": 1.950525589924844e-05, "loss": 0.7979, "step": 10954 }, { "epoch": 1.7883759846536877, "grad_norm": 1.7881759405136108, "learning_rate": 1.950515731627784e-05, "loss": 0.6053, "step": 10955 }, { "epoch": 1.788539243296192, "grad_norm": 1.6208127737045288, "learning_rate": 1.9505058723735547e-05, "loss": 0.719, "step": 10956 }, { "epoch": 1.7887025019386964, "grad_norm": 1.79038405418396, "learning_rate": 1.9504960121621664e-05, "loss": 0.6503, "step": 10957 }, { "epoch": 1.7888657605812006, "grad_norm": 1.914711833000183, "learning_rate": 1.950486150993629e-05, "loss": 0.7484, "step": 10958 }, { "epoch": 1.789029019223705, "grad_norm": 1.4020333290100098, "learning_rate": 1.9504762888679524e-05, "loss": 0.5701, "step": 10959 }, { "epoch": 1.7891922778662095, "grad_norm": 1.9173874855041504, "learning_rate": 1.9504664257851464e-05, "loss": 0.7554, "step": 10960 }, { "epoch": 1.789355536508714, "grad_norm": 1.640149712562561, "learning_rate": 1.9504565617452206e-05, "loss": 0.6913, "step": 10961 }, { "epoch": 1.7895187951512184, "grad_norm": 1.6545718908309937, "learning_rate": 1.9504466967481855e-05, "loss": 0.637, "step": 10962 }, { "epoch": 1.7896820537937228, "grad_norm": 1.9426301717758179, "learning_rate": 1.950436830794051e-05, "loss": 0.6781, "step": 10963 }, { "epoch": 1.7898453124362272, "grad_norm": 1.5893805027008057, "learning_rate": 1.9504269638828265e-05, "loss": 0.6844, "step": 10964 }, { "epoch": 1.7900085710787315, "grad_norm": 1.4105567932128906, "learning_rate": 1.9504170960145226e-05, "loss": 0.598, "step": 10965 }, { "epoch": 1.790171829721236, "grad_norm": 1.6279853582382202, "learning_rate": 1.9504072271891486e-05, "loss": 0.7025, "step": 10966 }, { "epoch": 1.7903350883637401, "grad_norm": 1.7547433376312256, "learning_rate": 1.9503973574067152e-05, "loss": 0.6982, "step": 10967 }, { "epoch": 1.7904983470062446, "grad_norm": 1.6257331371307373, "learning_rate": 1.9503874866672318e-05, "loss": 0.7528, "step": 10968 }, { "epoch": 1.790661605648749, "grad_norm": 1.776758074760437, "learning_rate": 1.9503776149707082e-05, "loss": 0.716, "step": 10969 }, { "epoch": 1.7908248642912534, "grad_norm": 1.7773115634918213, "learning_rate": 1.950367742317155e-05, "loss": 0.6629, "step": 10970 }, { "epoch": 1.7909881229337579, "grad_norm": 1.7956613302230835, "learning_rate": 1.9503578687065816e-05, "loss": 0.5401, "step": 10971 }, { "epoch": 1.7911513815762623, "grad_norm": 1.4034658670425415, "learning_rate": 1.950347994138998e-05, "loss": 0.5993, "step": 10972 }, { "epoch": 1.7913146402187667, "grad_norm": 1.7711936235427856, "learning_rate": 1.9503381186144145e-05, "loss": 0.639, "step": 10973 }, { "epoch": 1.791477898861271, "grad_norm": 1.958225965499878, "learning_rate": 1.9503282421328402e-05, "loss": 0.703, "step": 10974 }, { "epoch": 1.7916411575037754, "grad_norm": 1.7456353902816772, "learning_rate": 1.9503183646942857e-05, "loss": 0.6142, "step": 10975 }, { "epoch": 1.7918044161462796, "grad_norm": 1.5796600580215454, "learning_rate": 1.950308486298761e-05, "loss": 0.5672, "step": 10976 }, { "epoch": 1.791967674788784, "grad_norm": 1.3280037641525269, "learning_rate": 1.950298606946276e-05, "loss": 0.5625, "step": 10977 }, { "epoch": 1.7921309334312885, "grad_norm": 1.709578037261963, "learning_rate": 1.9502887266368406e-05, "loss": 0.7647, "step": 10978 }, { "epoch": 1.792294192073793, "grad_norm": 1.7585728168487549, "learning_rate": 1.950278845370465e-05, "loss": 0.6456, "step": 10979 }, { "epoch": 1.7924574507162974, "grad_norm": 1.3722118139266968, "learning_rate": 1.9502689631471582e-05, "loss": 0.622, "step": 10980 }, { "epoch": 1.7926207093588018, "grad_norm": 1.5017002820968628, "learning_rate": 1.950259079966931e-05, "loss": 0.6302, "step": 10981 }, { "epoch": 1.792783968001306, "grad_norm": 1.9167367219924927, "learning_rate": 1.9502491958297932e-05, "loss": 0.6902, "step": 10982 }, { "epoch": 1.7929472266438105, "grad_norm": 1.8151402473449707, "learning_rate": 1.950239310735755e-05, "loss": 0.6232, "step": 10983 }, { "epoch": 1.793110485286315, "grad_norm": 1.6310298442840576, "learning_rate": 1.950229424684826e-05, "loss": 0.5942, "step": 10984 }, { "epoch": 1.793273743928819, "grad_norm": 1.7940322160720825, "learning_rate": 1.9502195376770156e-05, "loss": 0.7581, "step": 10985 }, { "epoch": 1.7934370025713235, "grad_norm": 1.4893049001693726, "learning_rate": 1.9502096497123352e-05, "loss": 0.5904, "step": 10986 }, { "epoch": 1.793600261213828, "grad_norm": 1.6086452007293701, "learning_rate": 1.9501997607907936e-05, "loss": 0.544, "step": 10987 }, { "epoch": 1.7937635198563324, "grad_norm": 2.5124011039733887, "learning_rate": 1.950189870912401e-05, "loss": 0.6022, "step": 10988 }, { "epoch": 1.7939267784988369, "grad_norm": 1.6374303102493286, "learning_rate": 1.9501799800771674e-05, "loss": 0.6678, "step": 10989 }, { "epoch": 1.7940900371413413, "grad_norm": 1.6227725744247437, "learning_rate": 1.950170088285103e-05, "loss": 0.5933, "step": 10990 }, { "epoch": 1.7942532957838455, "grad_norm": 1.307260513305664, "learning_rate": 1.9501601955362172e-05, "loss": 0.5326, "step": 10991 }, { "epoch": 1.79441655442635, "grad_norm": 1.6664745807647705, "learning_rate": 1.9501503018305206e-05, "loss": 0.5674, "step": 10992 }, { "epoch": 1.7945798130688542, "grad_norm": 1.581840991973877, "learning_rate": 1.950140407168023e-05, "loss": 0.6307, "step": 10993 }, { "epoch": 1.7947430717113586, "grad_norm": 2.0847830772399902, "learning_rate": 1.9501305115487345e-05, "loss": 0.8225, "step": 10994 }, { "epoch": 1.794906330353863, "grad_norm": 1.8122012615203857, "learning_rate": 1.950120614972664e-05, "loss": 0.7781, "step": 10995 }, { "epoch": 1.7950695889963675, "grad_norm": 1.956193447113037, "learning_rate": 1.950110717439823e-05, "loss": 0.7514, "step": 10996 }, { "epoch": 1.795232847638872, "grad_norm": 1.7041512727737427, "learning_rate": 1.9501008189502206e-05, "loss": 0.5448, "step": 10997 }, { "epoch": 1.7953961062813764, "grad_norm": 1.6150808334350586, "learning_rate": 1.950090919503867e-05, "loss": 0.6938, "step": 10998 }, { "epoch": 1.7955593649238808, "grad_norm": 2.1417901515960693, "learning_rate": 1.9500810191007717e-05, "loss": 0.7258, "step": 10999 }, { "epoch": 1.795722623566385, "grad_norm": 2.011456251144409, "learning_rate": 1.9500711177409456e-05, "loss": 1.074, "step": 11000 }, { "epoch": 1.7958858822088895, "grad_norm": 1.5073484182357788, "learning_rate": 1.950061215424398e-05, "loss": 0.6574, "step": 11001 }, { "epoch": 1.7960491408513937, "grad_norm": 1.8210022449493408, "learning_rate": 1.9500513121511386e-05, "loss": 0.6497, "step": 11002 }, { "epoch": 1.796212399493898, "grad_norm": 1.3758474588394165, "learning_rate": 1.9500414079211782e-05, "loss": 0.6458, "step": 11003 }, { "epoch": 1.7963756581364025, "grad_norm": 1.6705750226974487, "learning_rate": 1.9500315027345264e-05, "loss": 0.6876, "step": 11004 }, { "epoch": 1.796538916778907, "grad_norm": 1.51702082157135, "learning_rate": 1.950021596591193e-05, "loss": 0.5106, "step": 11005 }, { "epoch": 1.7967021754214114, "grad_norm": 1.6331568956375122, "learning_rate": 1.9500116894911878e-05, "loss": 0.5728, "step": 11006 }, { "epoch": 1.7968654340639159, "grad_norm": 1.6726586818695068, "learning_rate": 1.9500017814345213e-05, "loss": 0.6824, "step": 11007 }, { "epoch": 1.7970286927064203, "grad_norm": 1.8206206560134888, "learning_rate": 1.9499918724212034e-05, "loss": 0.7046, "step": 11008 }, { "epoch": 1.7971919513489245, "grad_norm": 1.7003638744354248, "learning_rate": 1.949981962451244e-05, "loss": 0.6858, "step": 11009 }, { "epoch": 1.797355209991429, "grad_norm": 1.58940589427948, "learning_rate": 1.9499720515246524e-05, "loss": 0.7, "step": 11010 }, { "epoch": 1.7975184686339332, "grad_norm": 1.6202011108398438, "learning_rate": 1.94996213964144e-05, "loss": 0.6121, "step": 11011 }, { "epoch": 1.7976817272764376, "grad_norm": 1.948164701461792, "learning_rate": 1.9499522268016155e-05, "loss": 0.6405, "step": 11012 }, { "epoch": 1.797844985918942, "grad_norm": 1.833451747894287, "learning_rate": 1.9499423130051895e-05, "loss": 0.6793, "step": 11013 }, { "epoch": 1.7980082445614465, "grad_norm": 1.7831531763076782, "learning_rate": 1.949932398252172e-05, "loss": 0.7406, "step": 11014 }, { "epoch": 1.798171503203951, "grad_norm": 1.360020637512207, "learning_rate": 1.9499224825425727e-05, "loss": 0.5295, "step": 11015 }, { "epoch": 1.7983347618464554, "grad_norm": 1.5655766725540161, "learning_rate": 1.9499125658764014e-05, "loss": 0.5825, "step": 11016 }, { "epoch": 1.7984980204889598, "grad_norm": 1.7183139324188232, "learning_rate": 1.9499026482536688e-05, "loss": 0.7503, "step": 11017 }, { "epoch": 1.798661279131464, "grad_norm": 1.521177887916565, "learning_rate": 1.9498927296743842e-05, "loss": 0.5439, "step": 11018 }, { "epoch": 1.7988245377739684, "grad_norm": 1.4163738489151, "learning_rate": 1.9498828101385582e-05, "loss": 0.4802, "step": 11019 }, { "epoch": 1.7989877964164727, "grad_norm": 1.779533863067627, "learning_rate": 1.9498728896462002e-05, "loss": 0.6561, "step": 11020 }, { "epoch": 1.799151055058977, "grad_norm": 1.7703741788864136, "learning_rate": 1.9498629681973208e-05, "loss": 0.6692, "step": 11021 }, { "epoch": 1.7993143137014815, "grad_norm": 1.3677408695220947, "learning_rate": 1.949853045791929e-05, "loss": 0.5202, "step": 11022 }, { "epoch": 1.799477572343986, "grad_norm": 1.752852201461792, "learning_rate": 1.949843122430036e-05, "loss": 0.6024, "step": 11023 }, { "epoch": 1.7996408309864904, "grad_norm": 1.8039292097091675, "learning_rate": 1.9498331981116512e-05, "loss": 0.6691, "step": 11024 }, { "epoch": 1.7998040896289949, "grad_norm": 1.6552469730377197, "learning_rate": 1.9498232728367843e-05, "loss": 0.5659, "step": 11025 }, { "epoch": 1.799967348271499, "grad_norm": 1.9618523120880127, "learning_rate": 1.9498133466054457e-05, "loss": 0.7139, "step": 11026 }, { "epoch": 1.8001306069140035, "grad_norm": 1.5022468566894531, "learning_rate": 1.9498034194176454e-05, "loss": 0.5399, "step": 11027 }, { "epoch": 1.800293865556508, "grad_norm": 1.4631116390228271, "learning_rate": 1.9497934912733933e-05, "loss": 0.6006, "step": 11028 }, { "epoch": 1.8004571241990122, "grad_norm": 1.6841527223587036, "learning_rate": 1.9497835621726995e-05, "loss": 0.6096, "step": 11029 }, { "epoch": 1.8006203828415166, "grad_norm": 1.5290027856826782, "learning_rate": 1.9497736321155737e-05, "loss": 0.6097, "step": 11030 }, { "epoch": 1.800783641484021, "grad_norm": 1.6787018775939941, "learning_rate": 1.9497637011020264e-05, "loss": 0.6967, "step": 11031 }, { "epoch": 1.8009469001265255, "grad_norm": 1.9323915243148804, "learning_rate": 1.949753769132067e-05, "loss": 0.7356, "step": 11032 }, { "epoch": 1.80111015876903, "grad_norm": 1.8231273889541626, "learning_rate": 1.949743836205706e-05, "loss": 0.608, "step": 11033 }, { "epoch": 1.8012734174115343, "grad_norm": 1.6533604860305786, "learning_rate": 1.949733902322953e-05, "loss": 0.6386, "step": 11034 }, { "epoch": 1.8014366760540386, "grad_norm": 1.9590764045715332, "learning_rate": 1.9497239674838183e-05, "loss": 0.8171, "step": 11035 }, { "epoch": 1.801599934696543, "grad_norm": 1.7612595558166504, "learning_rate": 1.949714031688312e-05, "loss": 0.6796, "step": 11036 }, { "epoch": 1.8017631933390472, "grad_norm": 1.6914145946502686, "learning_rate": 1.949704094936444e-05, "loss": 0.6809, "step": 11037 }, { "epoch": 1.8019264519815517, "grad_norm": 1.405837893486023, "learning_rate": 1.9496941572282237e-05, "loss": 0.6175, "step": 11038 }, { "epoch": 1.802089710624056, "grad_norm": 2.1227335929870605, "learning_rate": 1.949684218563662e-05, "loss": 0.6949, "step": 11039 }, { "epoch": 1.8022529692665605, "grad_norm": 1.6746982336044312, "learning_rate": 1.9496742789427684e-05, "loss": 0.6762, "step": 11040 }, { "epoch": 1.802416227909065, "grad_norm": 1.447788953781128, "learning_rate": 1.9496643383655533e-05, "loss": 0.5899, "step": 11041 }, { "epoch": 1.8025794865515694, "grad_norm": 1.5123610496520996, "learning_rate": 1.9496543968320262e-05, "loss": 0.5845, "step": 11042 }, { "epoch": 1.8027427451940738, "grad_norm": 1.5610320568084717, "learning_rate": 1.9496444543421975e-05, "loss": 0.5345, "step": 11043 }, { "epoch": 1.802906003836578, "grad_norm": 1.5995087623596191, "learning_rate": 1.949634510896077e-05, "loss": 0.7436, "step": 11044 }, { "epoch": 1.8030692624790825, "grad_norm": 1.524634599685669, "learning_rate": 1.9496245664936752e-05, "loss": 0.6011, "step": 11045 }, { "epoch": 1.8032325211215867, "grad_norm": 1.7527985572814941, "learning_rate": 1.9496146211350015e-05, "loss": 0.7144, "step": 11046 }, { "epoch": 1.8033957797640912, "grad_norm": 2.2442309856414795, "learning_rate": 1.9496046748200655e-05, "loss": 0.7303, "step": 11047 }, { "epoch": 1.8035590384065956, "grad_norm": 1.8784981966018677, "learning_rate": 1.9495947275488785e-05, "loss": 0.6966, "step": 11048 }, { "epoch": 1.8037222970491, "grad_norm": 2.1215879917144775, "learning_rate": 1.94958477932145e-05, "loss": 0.8389, "step": 11049 }, { "epoch": 1.8038855556916045, "grad_norm": 1.6216996908187866, "learning_rate": 1.9495748301377895e-05, "loss": 0.6188, "step": 11050 }, { "epoch": 1.804048814334109, "grad_norm": 1.7379426956176758, "learning_rate": 1.9495648799979076e-05, "loss": 0.6953, "step": 11051 }, { "epoch": 1.8042120729766133, "grad_norm": 1.5838842391967773, "learning_rate": 1.949554928901814e-05, "loss": 0.6626, "step": 11052 }, { "epoch": 1.8043753316191176, "grad_norm": 1.4576048851013184, "learning_rate": 1.9495449768495185e-05, "loss": 0.4798, "step": 11053 }, { "epoch": 1.804538590261622, "grad_norm": 1.676416039466858, "learning_rate": 1.949535023841032e-05, "loss": 0.6886, "step": 11054 }, { "epoch": 1.8047018489041262, "grad_norm": 1.896082878112793, "learning_rate": 1.9495250698763637e-05, "loss": 0.7831, "step": 11055 }, { "epoch": 1.8048651075466307, "grad_norm": 2.168466329574585, "learning_rate": 1.9495151149555243e-05, "loss": 0.6503, "step": 11056 }, { "epoch": 1.805028366189135, "grad_norm": 2.002229928970337, "learning_rate": 1.949505159078523e-05, "loss": 0.7434, "step": 11057 }, { "epoch": 1.8051916248316395, "grad_norm": 1.827767252922058, "learning_rate": 1.9494952022453703e-05, "loss": 0.6399, "step": 11058 }, { "epoch": 1.805354883474144, "grad_norm": 1.5005276203155518, "learning_rate": 1.9494852444560764e-05, "loss": 0.6119, "step": 11059 }, { "epoch": 1.8055181421166484, "grad_norm": 1.9106030464172363, "learning_rate": 1.949475285710651e-05, "loss": 0.6963, "step": 11060 }, { "epoch": 1.8056814007591528, "grad_norm": 1.6177887916564941, "learning_rate": 1.949465326009104e-05, "loss": 0.5945, "step": 11061 }, { "epoch": 1.805844659401657, "grad_norm": 2.0745201110839844, "learning_rate": 1.949455365351446e-05, "loss": 0.7515, "step": 11062 }, { "epoch": 1.8060079180441615, "grad_norm": 1.9425119161605835, "learning_rate": 1.9494454037376866e-05, "loss": 0.8745, "step": 11063 }, { "epoch": 1.8061711766866657, "grad_norm": 1.957694411277771, "learning_rate": 1.949435441167836e-05, "loss": 0.7694, "step": 11064 }, { "epoch": 1.8063344353291702, "grad_norm": 1.573350429534912, "learning_rate": 1.949425477641904e-05, "loss": 0.5876, "step": 11065 }, { "epoch": 1.8064976939716746, "grad_norm": 1.619499921798706, "learning_rate": 1.9494155131599007e-05, "loss": 0.5589, "step": 11066 }, { "epoch": 1.806660952614179, "grad_norm": 1.5997307300567627, "learning_rate": 1.9494055477218366e-05, "loss": 0.6226, "step": 11067 }, { "epoch": 1.8068242112566835, "grad_norm": 1.6940546035766602, "learning_rate": 1.949395581327721e-05, "loss": 0.6639, "step": 11068 }, { "epoch": 1.806987469899188, "grad_norm": 2.0067224502563477, "learning_rate": 1.9493856139775645e-05, "loss": 0.7095, "step": 11069 }, { "epoch": 1.8071507285416923, "grad_norm": 1.8388453722000122, "learning_rate": 1.949375645671377e-05, "loss": 0.8511, "step": 11070 }, { "epoch": 1.8073139871841966, "grad_norm": 1.4517158269882202, "learning_rate": 1.9493656764091687e-05, "loss": 0.579, "step": 11071 }, { "epoch": 1.807477245826701, "grad_norm": 1.6384245157241821, "learning_rate": 1.9493557061909487e-05, "loss": 0.6546, "step": 11072 }, { "epoch": 1.8076405044692052, "grad_norm": 1.8635280132293701, "learning_rate": 1.949345735016728e-05, "loss": 0.6664, "step": 11073 }, { "epoch": 1.8078037631117096, "grad_norm": 1.9850223064422607, "learning_rate": 1.949335762886517e-05, "loss": 0.7521, "step": 11074 }, { "epoch": 1.807967021754214, "grad_norm": 1.514289140701294, "learning_rate": 1.9493257898003247e-05, "loss": 0.5888, "step": 11075 }, { "epoch": 1.8081302803967185, "grad_norm": 1.883723497390747, "learning_rate": 1.9493158157581617e-05, "loss": 0.749, "step": 11076 }, { "epoch": 1.808293539039223, "grad_norm": 2.0146236419677734, "learning_rate": 1.9493058407600375e-05, "loss": 0.6451, "step": 11077 }, { "epoch": 1.8084567976817274, "grad_norm": 1.5513850450515747, "learning_rate": 1.949295864805963e-05, "loss": 0.5897, "step": 11078 }, { "epoch": 1.8086200563242316, "grad_norm": 1.7357043027877808, "learning_rate": 1.949285887895948e-05, "loss": 0.6933, "step": 11079 }, { "epoch": 1.808783314966736, "grad_norm": 1.5706382989883423, "learning_rate": 1.949275910030002e-05, "loss": 0.6496, "step": 11080 }, { "epoch": 1.8089465736092405, "grad_norm": 1.4792040586471558, "learning_rate": 1.9492659312081353e-05, "loss": 0.6011, "step": 11081 }, { "epoch": 1.8091098322517447, "grad_norm": 1.7933368682861328, "learning_rate": 1.949255951430358e-05, "loss": 0.8305, "step": 11082 }, { "epoch": 1.8092730908942491, "grad_norm": 1.4616010189056396, "learning_rate": 1.9492459706966805e-05, "loss": 0.6949, "step": 11083 }, { "epoch": 1.8094363495367536, "grad_norm": 1.5476882457733154, "learning_rate": 1.9492359890071126e-05, "loss": 0.6311, "step": 11084 }, { "epoch": 1.809599608179258, "grad_norm": 1.691097378730774, "learning_rate": 1.949226006361664e-05, "loss": 0.5961, "step": 11085 }, { "epoch": 1.8097628668217625, "grad_norm": 1.705325722694397, "learning_rate": 1.9492160227603455e-05, "loss": 0.6277, "step": 11086 }, { "epoch": 1.809926125464267, "grad_norm": 1.5117729902267456, "learning_rate": 1.9492060382031663e-05, "loss": 0.6084, "step": 11087 }, { "epoch": 1.8100893841067711, "grad_norm": 1.4805537462234497, "learning_rate": 1.949196052690137e-05, "loss": 0.7033, "step": 11088 }, { "epoch": 1.8102526427492756, "grad_norm": 1.5191841125488281, "learning_rate": 1.949186066221268e-05, "loss": 0.6451, "step": 11089 }, { "epoch": 1.8104159013917798, "grad_norm": 1.7236820459365845, "learning_rate": 1.949176078796568e-05, "loss": 0.5559, "step": 11090 }, { "epoch": 1.8105791600342842, "grad_norm": 1.9414710998535156, "learning_rate": 1.9491660904160487e-05, "loss": 0.7223, "step": 11091 }, { "epoch": 1.8107424186767886, "grad_norm": 1.4370970726013184, "learning_rate": 1.9491561010797188e-05, "loss": 0.6223, "step": 11092 }, { "epoch": 1.810905677319293, "grad_norm": 1.5884202718734741, "learning_rate": 1.949146110787589e-05, "loss": 0.6707, "step": 11093 }, { "epoch": 1.8110689359617975, "grad_norm": 1.7541905641555786, "learning_rate": 1.9491361195396702e-05, "loss": 0.6518, "step": 11094 }, { "epoch": 1.811232194604302, "grad_norm": 1.494960904121399, "learning_rate": 1.9491261273359708e-05, "loss": 0.6113, "step": 11095 }, { "epoch": 1.8113954532468064, "grad_norm": 1.6405655145645142, "learning_rate": 1.9491161341765018e-05, "loss": 0.6551, "step": 11096 }, { "epoch": 1.8115587118893106, "grad_norm": 1.634151816368103, "learning_rate": 1.949106140061273e-05, "loss": 0.5169, "step": 11097 }, { "epoch": 1.811721970531815, "grad_norm": 1.7516169548034668, "learning_rate": 1.9490961449902946e-05, "loss": 0.6103, "step": 11098 }, { "epoch": 1.8118852291743193, "grad_norm": 2.7211694717407227, "learning_rate": 1.9490861489635767e-05, "loss": 0.7449, "step": 11099 }, { "epoch": 1.8120484878168237, "grad_norm": 1.6961859464645386, "learning_rate": 1.9490761519811295e-05, "loss": 0.683, "step": 11100 }, { "epoch": 1.8122117464593281, "grad_norm": 1.8151664733886719, "learning_rate": 1.949066154042963e-05, "loss": 0.6443, "step": 11101 }, { "epoch": 1.8123750051018326, "grad_norm": 1.6227350234985352, "learning_rate": 1.9490561551490863e-05, "loss": 0.6272, "step": 11102 }, { "epoch": 1.812538263744337, "grad_norm": 1.4189320802688599, "learning_rate": 1.949046155299511e-05, "loss": 0.6091, "step": 11103 }, { "epoch": 1.8127015223868415, "grad_norm": 1.7289100885391235, "learning_rate": 1.949036154494246e-05, "loss": 0.5913, "step": 11104 }, { "epoch": 1.812864781029346, "grad_norm": 1.2770830392837524, "learning_rate": 1.949026152733302e-05, "loss": 0.4446, "step": 11105 }, { "epoch": 1.8130280396718501, "grad_norm": 1.9457275867462158, "learning_rate": 1.9490161500166892e-05, "loss": 0.6833, "step": 11106 }, { "epoch": 1.8131912983143545, "grad_norm": 1.361099362373352, "learning_rate": 1.9490061463444175e-05, "loss": 0.6293, "step": 11107 }, { "epoch": 1.8133545569568588, "grad_norm": 1.961525321006775, "learning_rate": 1.9489961417164968e-05, "loss": 0.8395, "step": 11108 }, { "epoch": 1.8135178155993632, "grad_norm": 1.8126193284988403, "learning_rate": 1.948986136132937e-05, "loss": 0.6789, "step": 11109 }, { "epoch": 1.8136810742418676, "grad_norm": 1.8337323665618896, "learning_rate": 1.9489761295937483e-05, "loss": 0.6231, "step": 11110 }, { "epoch": 1.813844332884372, "grad_norm": 1.478893518447876, "learning_rate": 1.948966122098941e-05, "loss": 0.7619, "step": 11111 }, { "epoch": 1.8140075915268765, "grad_norm": 1.8692429065704346, "learning_rate": 1.9489561136485252e-05, "loss": 0.6684, "step": 11112 }, { "epoch": 1.814170850169381, "grad_norm": 1.9728418588638306, "learning_rate": 1.9489461042425106e-05, "loss": 0.7428, "step": 11113 }, { "epoch": 1.8143341088118854, "grad_norm": 1.6161795854568481, "learning_rate": 1.9489360938809076e-05, "loss": 0.7056, "step": 11114 }, { "epoch": 1.8144973674543896, "grad_norm": 1.6895562410354614, "learning_rate": 1.9489260825637265e-05, "loss": 0.5905, "step": 11115 }, { "epoch": 1.814660626096894, "grad_norm": 1.5149509906768799, "learning_rate": 1.948916070290977e-05, "loss": 0.5592, "step": 11116 }, { "epoch": 1.8148238847393983, "grad_norm": 1.7208253145217896, "learning_rate": 1.948906057062669e-05, "loss": 0.721, "step": 11117 }, { "epoch": 1.8149871433819027, "grad_norm": 2.1671388149261475, "learning_rate": 1.948896042878813e-05, "loss": 0.8171, "step": 11118 }, { "epoch": 1.8151504020244071, "grad_norm": 1.679381251335144, "learning_rate": 1.948886027739419e-05, "loss": 0.6085, "step": 11119 }, { "epoch": 1.8153136606669116, "grad_norm": 1.335627555847168, "learning_rate": 1.9488760116444966e-05, "loss": 0.5721, "step": 11120 }, { "epoch": 1.815476919309416, "grad_norm": 2.0207953453063965, "learning_rate": 1.9488659945940568e-05, "loss": 0.7299, "step": 11121 }, { "epoch": 1.8156401779519205, "grad_norm": 1.7400703430175781, "learning_rate": 1.948855976588109e-05, "loss": 0.7527, "step": 11122 }, { "epoch": 1.8158034365944247, "grad_norm": 1.504014253616333, "learning_rate": 1.9488459576266634e-05, "loss": 0.647, "step": 11123 }, { "epoch": 1.815966695236929, "grad_norm": 1.774139165878296, "learning_rate": 1.9488359377097302e-05, "loss": 0.7673, "step": 11124 }, { "epoch": 1.8161299538794335, "grad_norm": 1.3680108785629272, "learning_rate": 1.9488259168373198e-05, "loss": 0.5301, "step": 11125 }, { "epoch": 1.8162932125219378, "grad_norm": 1.664029598236084, "learning_rate": 1.9488158950094417e-05, "loss": 0.6912, "step": 11126 }, { "epoch": 1.8164564711644422, "grad_norm": 1.821329116821289, "learning_rate": 1.9488058722261063e-05, "loss": 0.7851, "step": 11127 }, { "epoch": 1.8166197298069466, "grad_norm": 1.7535254955291748, "learning_rate": 1.948795848487324e-05, "loss": 0.5771, "step": 11128 }, { "epoch": 1.816782988449451, "grad_norm": 1.8704736232757568, "learning_rate": 1.9487858237931035e-05, "loss": 0.7331, "step": 11129 }, { "epoch": 1.8169462470919555, "grad_norm": 1.6518393754959106, "learning_rate": 1.9487757981434568e-05, "loss": 0.7037, "step": 11130 }, { "epoch": 1.81710950573446, "grad_norm": 1.6832218170166016, "learning_rate": 1.9487657715383928e-05, "loss": 0.6977, "step": 11131 }, { "epoch": 1.8172727643769642, "grad_norm": 1.6304597854614258, "learning_rate": 1.948755743977922e-05, "loss": 0.6028, "step": 11132 }, { "epoch": 1.8174360230194686, "grad_norm": 2.182384490966797, "learning_rate": 1.9487457154620544e-05, "loss": 0.6757, "step": 11133 }, { "epoch": 1.8175992816619728, "grad_norm": 1.6859127283096313, "learning_rate": 1.9487356859908003e-05, "loss": 0.699, "step": 11134 }, { "epoch": 1.8177625403044773, "grad_norm": 1.6341341733932495, "learning_rate": 1.9487256555641692e-05, "loss": 0.6604, "step": 11135 }, { "epoch": 1.8179257989469817, "grad_norm": 1.5923099517822266, "learning_rate": 1.948715624182172e-05, "loss": 0.6565, "step": 11136 }, { "epoch": 1.8180890575894861, "grad_norm": 1.6098967790603638, "learning_rate": 1.9487055918448184e-05, "loss": 0.6846, "step": 11137 }, { "epoch": 1.8182523162319906, "grad_norm": 1.581106185913086, "learning_rate": 1.9486955585521183e-05, "loss": 0.7483, "step": 11138 }, { "epoch": 1.818415574874495, "grad_norm": 1.449518084526062, "learning_rate": 1.9486855243040822e-05, "loss": 0.6561, "step": 11139 }, { "epoch": 1.8185788335169994, "grad_norm": 1.4742976427078247, "learning_rate": 1.9486754891007197e-05, "loss": 0.5969, "step": 11140 }, { "epoch": 1.8187420921595037, "grad_norm": 1.4525151252746582, "learning_rate": 1.9486654529420415e-05, "loss": 0.5824, "step": 11141 }, { "epoch": 1.818905350802008, "grad_norm": 2.0713491439819336, "learning_rate": 1.9486554158280576e-05, "loss": 0.6884, "step": 11142 }, { "epoch": 1.8190686094445123, "grad_norm": 1.5504671335220337, "learning_rate": 1.9486453777587777e-05, "loss": 0.6468, "step": 11143 }, { "epoch": 1.8192318680870168, "grad_norm": 1.671420693397522, "learning_rate": 1.9486353387342124e-05, "loss": 0.6752, "step": 11144 }, { "epoch": 1.8193951267295212, "grad_norm": 1.8407148122787476, "learning_rate": 1.9486252987543715e-05, "loss": 0.7886, "step": 11145 }, { "epoch": 1.8195583853720256, "grad_norm": 1.8456368446350098, "learning_rate": 1.948615257819265e-05, "loss": 0.7491, "step": 11146 }, { "epoch": 1.81972164401453, "grad_norm": 1.5769352912902832, "learning_rate": 1.9486052159289033e-05, "loss": 0.6474, "step": 11147 }, { "epoch": 1.8198849026570345, "grad_norm": 1.8910030126571655, "learning_rate": 1.9485951730832965e-05, "loss": 0.7523, "step": 11148 }, { "epoch": 1.820048161299539, "grad_norm": 2.1184542179107666, "learning_rate": 1.9485851292824543e-05, "loss": 0.8583, "step": 11149 }, { "epoch": 1.8202114199420432, "grad_norm": 1.5855315923690796, "learning_rate": 1.9485750845263874e-05, "loss": 0.598, "step": 11150 }, { "epoch": 1.8203746785845476, "grad_norm": 1.4453893899917603, "learning_rate": 1.9485650388151054e-05, "loss": 0.562, "step": 11151 }, { "epoch": 1.8205379372270518, "grad_norm": 1.5758185386657715, "learning_rate": 1.948554992148619e-05, "loss": 0.6767, "step": 11152 }, { "epoch": 1.8207011958695563, "grad_norm": 1.5604318380355835, "learning_rate": 1.9485449445269376e-05, "loss": 0.7189, "step": 11153 }, { "epoch": 1.8208644545120607, "grad_norm": 1.8015416860580444, "learning_rate": 1.948534895950072e-05, "loss": 0.7494, "step": 11154 }, { "epoch": 1.8210277131545651, "grad_norm": 1.725684404373169, "learning_rate": 1.948524846418032e-05, "loss": 0.6621, "step": 11155 }, { "epoch": 1.8211909717970696, "grad_norm": 1.4514883756637573, "learning_rate": 1.9485147959308275e-05, "loss": 0.5491, "step": 11156 }, { "epoch": 1.821354230439574, "grad_norm": 1.6093614101409912, "learning_rate": 1.9485047444884688e-05, "loss": 0.683, "step": 11157 }, { "epoch": 1.8215174890820784, "grad_norm": 1.4736398458480835, "learning_rate": 1.9484946920909663e-05, "loss": 0.6712, "step": 11158 }, { "epoch": 1.8216807477245827, "grad_norm": 1.6235793828964233, "learning_rate": 1.9484846387383298e-05, "loss": 0.7192, "step": 11159 }, { "epoch": 1.821844006367087, "grad_norm": 1.9765546321868896, "learning_rate": 1.9484745844305694e-05, "loss": 0.7272, "step": 11160 }, { "epoch": 1.8220072650095913, "grad_norm": 1.62570321559906, "learning_rate": 1.9484645291676957e-05, "loss": 0.6545, "step": 11161 }, { "epoch": 1.8221705236520958, "grad_norm": 1.3362847566604614, "learning_rate": 1.948454472949718e-05, "loss": 0.5445, "step": 11162 }, { "epoch": 1.8223337822946002, "grad_norm": 1.806139349937439, "learning_rate": 1.9484444157766473e-05, "loss": 0.6307, "step": 11163 }, { "epoch": 1.8224970409371046, "grad_norm": 1.8467718362808228, "learning_rate": 1.9484343576484935e-05, "loss": 0.6713, "step": 11164 }, { "epoch": 1.822660299579609, "grad_norm": 1.821985125541687, "learning_rate": 1.948424298565266e-05, "loss": 0.6373, "step": 11165 }, { "epoch": 1.8228235582221135, "grad_norm": 1.55324387550354, "learning_rate": 1.9484142385269758e-05, "loss": 0.6142, "step": 11166 }, { "epoch": 1.8229868168646177, "grad_norm": 1.7355291843414307, "learning_rate": 1.9484041775336325e-05, "loss": 0.752, "step": 11167 }, { "epoch": 1.8231500755071222, "grad_norm": 1.7012298107147217, "learning_rate": 1.9483941155852467e-05, "loss": 0.7618, "step": 11168 }, { "epoch": 1.8233133341496266, "grad_norm": 2.0106306076049805, "learning_rate": 1.948384052681828e-05, "loss": 0.899, "step": 11169 }, { "epoch": 1.8234765927921308, "grad_norm": 1.5781197547912598, "learning_rate": 1.948373988823387e-05, "loss": 0.6533, "step": 11170 }, { "epoch": 1.8236398514346353, "grad_norm": 1.6318929195404053, "learning_rate": 1.9483639240099337e-05, "loss": 0.6613, "step": 11171 }, { "epoch": 1.8238031100771397, "grad_norm": 1.4863784313201904, "learning_rate": 1.9483538582414784e-05, "loss": 0.5246, "step": 11172 }, { "epoch": 1.8239663687196441, "grad_norm": 1.7090933322906494, "learning_rate": 1.9483437915180307e-05, "loss": 0.6889, "step": 11173 }, { "epoch": 1.8241296273621486, "grad_norm": 1.8445919752120972, "learning_rate": 1.9483337238396008e-05, "loss": 0.6615, "step": 11174 }, { "epoch": 1.824292886004653, "grad_norm": 1.5850337743759155, "learning_rate": 1.9483236552061996e-05, "loss": 0.6031, "step": 11175 }, { "epoch": 1.8244561446471572, "grad_norm": 2.027003765106201, "learning_rate": 1.9483135856178364e-05, "loss": 0.8073, "step": 11176 }, { "epoch": 1.8246194032896617, "grad_norm": 1.7942676544189453, "learning_rate": 1.948303515074522e-05, "loss": 0.6936, "step": 11177 }, { "epoch": 1.8247826619321659, "grad_norm": 1.6726871728897095, "learning_rate": 1.9482934435762658e-05, "loss": 0.5856, "step": 11178 }, { "epoch": 1.8249459205746703, "grad_norm": 1.5492547750473022, "learning_rate": 1.9482833711230783e-05, "loss": 0.5953, "step": 11179 }, { "epoch": 1.8251091792171747, "grad_norm": 1.7913460731506348, "learning_rate": 1.94827329771497e-05, "loss": 0.7202, "step": 11180 }, { "epoch": 1.8252724378596792, "grad_norm": 2.021027088165283, "learning_rate": 1.9482632233519506e-05, "loss": 0.7561, "step": 11181 }, { "epoch": 1.8254356965021836, "grad_norm": 1.536083459854126, "learning_rate": 1.948253148034031e-05, "loss": 0.5773, "step": 11182 }, { "epoch": 1.825598955144688, "grad_norm": 1.5779736042022705, "learning_rate": 1.94824307176122e-05, "loss": 0.6059, "step": 11183 }, { "epoch": 1.8257622137871925, "grad_norm": 2.1145031452178955, "learning_rate": 1.9482329945335286e-05, "loss": 0.7257, "step": 11184 }, { "epoch": 1.8259254724296967, "grad_norm": 1.800539493560791, "learning_rate": 1.948222916350967e-05, "loss": 0.7636, "step": 11185 }, { "epoch": 1.8260887310722012, "grad_norm": 1.9278708696365356, "learning_rate": 1.9482128372135446e-05, "loss": 0.8386, "step": 11186 }, { "epoch": 1.8262519897147054, "grad_norm": 2.0208096504211426, "learning_rate": 1.9482027571212726e-05, "loss": 0.7656, "step": 11187 }, { "epoch": 1.8264152483572098, "grad_norm": 1.288275957107544, "learning_rate": 1.948192676074161e-05, "loss": 0.4425, "step": 11188 }, { "epoch": 1.8265785069997142, "grad_norm": 1.6276092529296875, "learning_rate": 1.9481825940722193e-05, "loss": 0.6872, "step": 11189 }, { "epoch": 1.8267417656422187, "grad_norm": 1.625482439994812, "learning_rate": 1.9481725111154577e-05, "loss": 0.7175, "step": 11190 }, { "epoch": 1.8269050242847231, "grad_norm": 1.7352010011672974, "learning_rate": 1.948162427203887e-05, "loss": 0.7251, "step": 11191 }, { "epoch": 1.8270682829272276, "grad_norm": 1.900902509689331, "learning_rate": 1.948152342337517e-05, "loss": 0.731, "step": 11192 }, { "epoch": 1.827231541569732, "grad_norm": 1.9057289361953735, "learning_rate": 1.9481422565163577e-05, "loss": 0.7099, "step": 11193 }, { "epoch": 1.8273948002122362, "grad_norm": 1.4270421266555786, "learning_rate": 1.9481321697404194e-05, "loss": 0.6543, "step": 11194 }, { "epoch": 1.8275580588547407, "grad_norm": 1.5865037441253662, "learning_rate": 1.948122082009712e-05, "loss": 0.5415, "step": 11195 }, { "epoch": 1.8277213174972449, "grad_norm": 1.4957873821258545, "learning_rate": 1.9481119933242464e-05, "loss": 0.677, "step": 11196 }, { "epoch": 1.8278845761397493, "grad_norm": 1.8010326623916626, "learning_rate": 1.948101903684032e-05, "loss": 0.5954, "step": 11197 }, { "epoch": 1.8280478347822537, "grad_norm": 1.446526288986206, "learning_rate": 1.9480918130890796e-05, "loss": 0.6009, "step": 11198 }, { "epoch": 1.8282110934247582, "grad_norm": 2.0267856121063232, "learning_rate": 1.9480817215393985e-05, "loss": 0.7043, "step": 11199 }, { "epoch": 1.8283743520672626, "grad_norm": 1.6669411659240723, "learning_rate": 1.9480716290349998e-05, "loss": 0.6874, "step": 11200 }, { "epoch": 1.828537610709767, "grad_norm": 1.96531343460083, "learning_rate": 1.9480615355758926e-05, "loss": 0.8037, "step": 11201 }, { "epoch": 1.8287008693522715, "grad_norm": 1.7916260957717896, "learning_rate": 1.9480514411620884e-05, "loss": 0.6397, "step": 11202 }, { "epoch": 1.8288641279947757, "grad_norm": 1.780898928642273, "learning_rate": 1.9480413457935962e-05, "loss": 0.638, "step": 11203 }, { "epoch": 1.8290273866372802, "grad_norm": 1.5533488988876343, "learning_rate": 1.9480312494704267e-05, "loss": 0.5975, "step": 11204 }, { "epoch": 1.8291906452797844, "grad_norm": 1.6473267078399658, "learning_rate": 1.94802115219259e-05, "loss": 0.5847, "step": 11205 }, { "epoch": 1.8293539039222888, "grad_norm": 1.6156991720199585, "learning_rate": 1.9480110539600964e-05, "loss": 0.7599, "step": 11206 }, { "epoch": 1.8295171625647932, "grad_norm": 1.3906582593917847, "learning_rate": 1.948000954772956e-05, "loss": 0.5807, "step": 11207 }, { "epoch": 1.8296804212072977, "grad_norm": 1.4848023653030396, "learning_rate": 1.9479908546311783e-05, "loss": 0.6078, "step": 11208 }, { "epoch": 1.8298436798498021, "grad_norm": 1.6552269458770752, "learning_rate": 1.9479807535347745e-05, "loss": 0.6589, "step": 11209 }, { "epoch": 1.8300069384923066, "grad_norm": 1.9116599559783936, "learning_rate": 1.9479706514837544e-05, "loss": 0.6627, "step": 11210 }, { "epoch": 1.8301701971348108, "grad_norm": 1.8586033582687378, "learning_rate": 1.9479605484781282e-05, "loss": 0.7771, "step": 11211 }, { "epoch": 1.8303334557773152, "grad_norm": 2.0048351287841797, "learning_rate": 1.9479504445179053e-05, "loss": 0.7115, "step": 11212 }, { "epoch": 1.8304967144198196, "grad_norm": 1.5767651796340942, "learning_rate": 1.9479403396030973e-05, "loss": 0.7223, "step": 11213 }, { "epoch": 1.8306599730623239, "grad_norm": 1.8348174095153809, "learning_rate": 1.9479302337337133e-05, "loss": 0.7359, "step": 11214 }, { "epoch": 1.8308232317048283, "grad_norm": 1.330308437347412, "learning_rate": 1.947920126909764e-05, "loss": 0.5897, "step": 11215 }, { "epoch": 1.8309864903473327, "grad_norm": 1.792144775390625, "learning_rate": 1.947910019131259e-05, "loss": 0.7036, "step": 11216 }, { "epoch": 1.8311497489898372, "grad_norm": 1.7983901500701904, "learning_rate": 1.947899910398209e-05, "loss": 0.6751, "step": 11217 }, { "epoch": 1.8313130076323416, "grad_norm": 1.6563245058059692, "learning_rate": 1.9478898007106243e-05, "loss": 0.6748, "step": 11218 }, { "epoch": 1.831476266274846, "grad_norm": 2.09661602973938, "learning_rate": 1.9478796900685145e-05, "loss": 0.7451, "step": 11219 }, { "epoch": 1.8316395249173503, "grad_norm": 1.7088379859924316, "learning_rate": 1.9478695784718905e-05, "loss": 0.6741, "step": 11220 }, { "epoch": 1.8318027835598547, "grad_norm": 1.6102770566940308, "learning_rate": 1.947859465920762e-05, "loss": 0.6785, "step": 11221 }, { "epoch": 1.831966042202359, "grad_norm": 1.98444664478302, "learning_rate": 1.9478493524151388e-05, "loss": 0.725, "step": 11222 }, { "epoch": 1.8321293008448634, "grad_norm": 1.8480366468429565, "learning_rate": 1.9478392379550318e-05, "loss": 0.7385, "step": 11223 }, { "epoch": 1.8322925594873678, "grad_norm": 2.0223300457000732, "learning_rate": 1.9478291225404512e-05, "loss": 0.7017, "step": 11224 }, { "epoch": 1.8324558181298722, "grad_norm": 1.3953683376312256, "learning_rate": 1.9478190061714067e-05, "loss": 0.66, "step": 11225 }, { "epoch": 1.8326190767723767, "grad_norm": 1.421250343322754, "learning_rate": 1.947808888847909e-05, "loss": 0.568, "step": 11226 }, { "epoch": 1.8327823354148811, "grad_norm": 1.884398341178894, "learning_rate": 1.9477987705699676e-05, "loss": 0.6555, "step": 11227 }, { "epoch": 1.8329455940573856, "grad_norm": 1.7607218027114868, "learning_rate": 1.947788651337593e-05, "loss": 0.7077, "step": 11228 }, { "epoch": 1.8331088526998898, "grad_norm": 1.7825236320495605, "learning_rate": 1.947778531150796e-05, "loss": 0.7313, "step": 11229 }, { "epoch": 1.8332721113423942, "grad_norm": 1.4085471630096436, "learning_rate": 1.947768410009586e-05, "loss": 0.6416, "step": 11230 }, { "epoch": 1.8334353699848984, "grad_norm": 1.522440791130066, "learning_rate": 1.9477582879139733e-05, "loss": 0.5594, "step": 11231 }, { "epoch": 1.8335986286274029, "grad_norm": 1.6018871068954468, "learning_rate": 1.9477481648639683e-05, "loss": 0.6622, "step": 11232 }, { "epoch": 1.8337618872699073, "grad_norm": 1.839788556098938, "learning_rate": 1.9477380408595815e-05, "loss": 0.6462, "step": 11233 }, { "epoch": 1.8339251459124117, "grad_norm": 1.8539906740188599, "learning_rate": 1.9477279159008223e-05, "loss": 0.7035, "step": 11234 }, { "epoch": 1.8340884045549162, "grad_norm": 1.6447192430496216, "learning_rate": 1.9477177899877016e-05, "loss": 0.6764, "step": 11235 }, { "epoch": 1.8342516631974206, "grad_norm": 1.8596315383911133, "learning_rate": 1.9477076631202293e-05, "loss": 0.5726, "step": 11236 }, { "epoch": 1.834414921839925, "grad_norm": 1.9673501253128052, "learning_rate": 1.9476975352984157e-05, "loss": 0.795, "step": 11237 }, { "epoch": 1.8345781804824293, "grad_norm": 1.5047483444213867, "learning_rate": 1.9476874065222708e-05, "loss": 0.5813, "step": 11238 }, { "epoch": 1.8347414391249337, "grad_norm": 1.5316208600997925, "learning_rate": 1.9476772767918047e-05, "loss": 0.6202, "step": 11239 }, { "epoch": 1.834904697767438, "grad_norm": 1.510323166847229, "learning_rate": 1.9476671461070286e-05, "loss": 0.5993, "step": 11240 }, { "epoch": 1.8350679564099424, "grad_norm": 1.815978765487671, "learning_rate": 1.9476570144679513e-05, "loss": 0.7495, "step": 11241 }, { "epoch": 1.8352312150524468, "grad_norm": 1.5339387655258179, "learning_rate": 1.9476468818745836e-05, "loss": 0.6798, "step": 11242 }, { "epoch": 1.8353944736949512, "grad_norm": 1.6140520572662354, "learning_rate": 1.947636748326936e-05, "loss": 0.6674, "step": 11243 }, { "epoch": 1.8355577323374557, "grad_norm": 1.4420990943908691, "learning_rate": 1.9476266138250186e-05, "loss": 0.5735, "step": 11244 }, { "epoch": 1.83572099097996, "grad_norm": 1.9324593544006348, "learning_rate": 1.947616478368841e-05, "loss": 0.6141, "step": 11245 }, { "epoch": 1.8358842496224645, "grad_norm": 1.4615000486373901, "learning_rate": 1.947606341958414e-05, "loss": 0.5616, "step": 11246 }, { "epoch": 1.8360475082649688, "grad_norm": 1.7870616912841797, "learning_rate": 1.9475962045937477e-05, "loss": 0.6902, "step": 11247 }, { "epoch": 1.8362107669074732, "grad_norm": 1.646500825881958, "learning_rate": 1.9475860662748524e-05, "loss": 0.6654, "step": 11248 }, { "epoch": 1.8363740255499774, "grad_norm": 1.6858272552490234, "learning_rate": 1.9475759270017382e-05, "loss": 0.643, "step": 11249 }, { "epoch": 1.8365372841924819, "grad_norm": 2.185067653656006, "learning_rate": 1.9475657867744153e-05, "loss": 0.8663, "step": 11250 }, { "epoch": 1.8367005428349863, "grad_norm": 1.560416579246521, "learning_rate": 1.9475556455928938e-05, "loss": 0.6258, "step": 11251 }, { "epoch": 1.8368638014774907, "grad_norm": 1.3716140985488892, "learning_rate": 1.947545503457184e-05, "loss": 0.5354, "step": 11252 }, { "epoch": 1.8370270601199952, "grad_norm": 1.3749982118606567, "learning_rate": 1.947535360367296e-05, "loss": 0.5336, "step": 11253 }, { "epoch": 1.8371903187624996, "grad_norm": 1.6696398258209229, "learning_rate": 1.9475252163232402e-05, "loss": 0.5648, "step": 11254 }, { "epoch": 1.8373535774050038, "grad_norm": 1.7056740522384644, "learning_rate": 1.947515071325027e-05, "loss": 0.6821, "step": 11255 }, { "epoch": 1.8375168360475083, "grad_norm": 1.3943918943405151, "learning_rate": 1.9475049253726663e-05, "loss": 0.5998, "step": 11256 }, { "epoch": 1.8376800946900127, "grad_norm": 1.6272138357162476, "learning_rate": 1.947494778466168e-05, "loss": 0.5563, "step": 11257 }, { "epoch": 1.837843353332517, "grad_norm": 2.0819168090820312, "learning_rate": 1.947484630605543e-05, "loss": 0.6057, "step": 11258 }, { "epoch": 1.8380066119750214, "grad_norm": 2.0557568073272705, "learning_rate": 1.9474744817908013e-05, "loss": 0.7944, "step": 11259 }, { "epoch": 1.8381698706175258, "grad_norm": 2.0977513790130615, "learning_rate": 1.9474643320219534e-05, "loss": 0.6226, "step": 11260 }, { "epoch": 1.8383331292600302, "grad_norm": 1.7435396909713745, "learning_rate": 1.9474541812990083e-05, "loss": 0.7028, "step": 11261 }, { "epoch": 1.8384963879025347, "grad_norm": 1.5857936143875122, "learning_rate": 1.947444029621978e-05, "loss": 0.602, "step": 11262 }, { "epoch": 1.838659646545039, "grad_norm": 1.8107887506484985, "learning_rate": 1.9474338769908712e-05, "loss": 0.6955, "step": 11263 }, { "epoch": 1.8388229051875433, "grad_norm": 1.413750171661377, "learning_rate": 1.9474237234056988e-05, "loss": 0.53, "step": 11264 }, { "epoch": 1.8389861638300478, "grad_norm": 1.4980103969573975, "learning_rate": 1.947413568866471e-05, "loss": 0.5441, "step": 11265 }, { "epoch": 1.839149422472552, "grad_norm": 1.5717209577560425, "learning_rate": 1.947403413373198e-05, "loss": 0.6761, "step": 11266 }, { "epoch": 1.8393126811150564, "grad_norm": 1.5957504510879517, "learning_rate": 1.94739325692589e-05, "loss": 0.5391, "step": 11267 }, { "epoch": 1.8394759397575609, "grad_norm": 1.6413758993148804, "learning_rate": 1.9473830995245575e-05, "loss": 0.6432, "step": 11268 }, { "epoch": 1.8396391984000653, "grad_norm": 1.969042420387268, "learning_rate": 1.9473729411692103e-05, "loss": 0.6869, "step": 11269 }, { "epoch": 1.8398024570425697, "grad_norm": 1.914711833000183, "learning_rate": 1.947362781859859e-05, "loss": 0.7038, "step": 11270 }, { "epoch": 1.8399657156850742, "grad_norm": 1.6980578899383545, "learning_rate": 1.947352621596513e-05, "loss": 0.6639, "step": 11271 }, { "epoch": 1.8401289743275786, "grad_norm": 2.0990400314331055, "learning_rate": 1.947342460379184e-05, "loss": 0.8484, "step": 11272 }, { "epoch": 1.8402922329700828, "grad_norm": 1.8922178745269775, "learning_rate": 1.9473322982078807e-05, "loss": 0.6568, "step": 11273 }, { "epoch": 1.8404554916125873, "grad_norm": 1.7580965757369995, "learning_rate": 1.9473221350826145e-05, "loss": 0.716, "step": 11274 }, { "epoch": 1.8406187502550915, "grad_norm": 1.3946014642715454, "learning_rate": 1.9473119710033947e-05, "loss": 0.5538, "step": 11275 }, { "epoch": 1.840782008897596, "grad_norm": 1.6833205223083496, "learning_rate": 1.9473018059702325e-05, "loss": 0.6515, "step": 11276 }, { "epoch": 1.8409452675401003, "grad_norm": 1.980355143547058, "learning_rate": 1.947291639983137e-05, "loss": 0.6445, "step": 11277 }, { "epoch": 1.8411085261826048, "grad_norm": 1.5229523181915283, "learning_rate": 1.9472814730421196e-05, "loss": 0.5309, "step": 11278 }, { "epoch": 1.8412717848251092, "grad_norm": 1.792183518409729, "learning_rate": 1.94727130514719e-05, "loss": 0.6402, "step": 11279 }, { "epoch": 1.8414350434676137, "grad_norm": 1.711656928062439, "learning_rate": 1.9472611362983583e-05, "loss": 0.6272, "step": 11280 }, { "epoch": 1.841598302110118, "grad_norm": 1.8281450271606445, "learning_rate": 1.947250966495635e-05, "loss": 0.7348, "step": 11281 }, { "epoch": 1.8417615607526223, "grad_norm": 1.6806683540344238, "learning_rate": 1.94724079573903e-05, "loss": 0.6091, "step": 11282 }, { "epoch": 1.8419248193951268, "grad_norm": 1.8940225839614868, "learning_rate": 1.947230624028554e-05, "loss": 0.6362, "step": 11283 }, { "epoch": 1.842088078037631, "grad_norm": 1.767269492149353, "learning_rate": 1.947220451364217e-05, "loss": 0.5912, "step": 11284 }, { "epoch": 1.8422513366801354, "grad_norm": 1.6992754936218262, "learning_rate": 1.9472102777460292e-05, "loss": 0.7633, "step": 11285 }, { "epoch": 1.8424145953226398, "grad_norm": 1.3356091976165771, "learning_rate": 1.9472001031740007e-05, "loss": 0.5407, "step": 11286 }, { "epoch": 1.8425778539651443, "grad_norm": 1.6204113960266113, "learning_rate": 1.947189927648142e-05, "loss": 0.6633, "step": 11287 }, { "epoch": 1.8427411126076487, "grad_norm": 1.5495291948318481, "learning_rate": 1.9471797511684635e-05, "loss": 0.5875, "step": 11288 }, { "epoch": 1.8429043712501532, "grad_norm": 1.8579989671707153, "learning_rate": 1.9471695737349748e-05, "loss": 0.7206, "step": 11289 }, { "epoch": 1.8430676298926576, "grad_norm": 1.920098900794983, "learning_rate": 1.9471593953476873e-05, "loss": 1.2492, "step": 11290 }, { "epoch": 1.8432308885351618, "grad_norm": 1.461244821548462, "learning_rate": 1.9471492160066103e-05, "loss": 0.5794, "step": 11291 }, { "epoch": 1.8433941471776663, "grad_norm": 1.7477149963378906, "learning_rate": 1.947139035711754e-05, "loss": 0.6681, "step": 11292 }, { "epoch": 1.8435574058201705, "grad_norm": 1.699796199798584, "learning_rate": 1.947128854463129e-05, "loss": 0.7775, "step": 11293 }, { "epoch": 1.843720664462675, "grad_norm": 1.916603684425354, "learning_rate": 1.9471186722607456e-05, "loss": 0.6829, "step": 11294 }, { "epoch": 1.8438839231051793, "grad_norm": 1.749789834022522, "learning_rate": 1.9471084891046138e-05, "loss": 0.713, "step": 11295 }, { "epoch": 1.8440471817476838, "grad_norm": 1.7580369710922241, "learning_rate": 1.9470983049947446e-05, "loss": 0.704, "step": 11296 }, { "epoch": 1.8442104403901882, "grad_norm": 1.5205944776535034, "learning_rate": 1.9470881199311472e-05, "loss": 0.5947, "step": 11297 }, { "epoch": 1.8443736990326927, "grad_norm": 1.9895873069763184, "learning_rate": 1.947077933913832e-05, "loss": 0.7648, "step": 11298 }, { "epoch": 1.8445369576751969, "grad_norm": 1.4842942953109741, "learning_rate": 1.9470677469428102e-05, "loss": 0.5077, "step": 11299 }, { "epoch": 1.8447002163177013, "grad_norm": 2.0025477409362793, "learning_rate": 1.947057559018091e-05, "loss": 0.8958, "step": 11300 }, { "epoch": 1.8448634749602058, "grad_norm": 1.8291630744934082, "learning_rate": 1.9470473701396852e-05, "loss": 0.6888, "step": 11301 }, { "epoch": 1.84502673360271, "grad_norm": 1.8233606815338135, "learning_rate": 1.947037180307603e-05, "loss": 0.7836, "step": 11302 }, { "epoch": 1.8451899922452144, "grad_norm": 1.5006870031356812, "learning_rate": 1.9470269895218545e-05, "loss": 0.7262, "step": 11303 }, { "epoch": 1.8453532508877188, "grad_norm": 1.8186402320861816, "learning_rate": 1.9470167977824502e-05, "loss": 0.6472, "step": 11304 }, { "epoch": 1.8455165095302233, "grad_norm": 1.984135389328003, "learning_rate": 1.9470066050894e-05, "loss": 0.7281, "step": 11305 }, { "epoch": 1.8456797681727277, "grad_norm": 1.3637615442276, "learning_rate": 1.9469964114427148e-05, "loss": 0.555, "step": 11306 }, { "epoch": 1.8458430268152322, "grad_norm": 1.862672209739685, "learning_rate": 1.9469862168424042e-05, "loss": 0.7133, "step": 11307 }, { "epoch": 1.8460062854577364, "grad_norm": 1.6852246522903442, "learning_rate": 1.946976021288479e-05, "loss": 0.6778, "step": 11308 }, { "epoch": 1.8461695441002408, "grad_norm": 1.6572946310043335, "learning_rate": 1.946965824780949e-05, "loss": 0.6531, "step": 11309 }, { "epoch": 1.8463328027427452, "grad_norm": 2.092381000518799, "learning_rate": 1.9469556273198246e-05, "loss": 0.8154, "step": 11310 }, { "epoch": 1.8464960613852495, "grad_norm": 1.7718756198883057, "learning_rate": 1.946945428905116e-05, "loss": 0.703, "step": 11311 }, { "epoch": 1.846659320027754, "grad_norm": 1.5157105922698975, "learning_rate": 1.946935229536834e-05, "loss": 0.5882, "step": 11312 }, { "epoch": 1.8468225786702583, "grad_norm": 1.5778361558914185, "learning_rate": 1.9469250292149883e-05, "loss": 0.6684, "step": 11313 }, { "epoch": 1.8469858373127628, "grad_norm": 1.5157126188278198, "learning_rate": 1.9469148279395892e-05, "loss": 0.5557, "step": 11314 }, { "epoch": 1.8471490959552672, "grad_norm": 1.479537844657898, "learning_rate": 1.9469046257106472e-05, "loss": 0.6536, "step": 11315 }, { "epoch": 1.8473123545977717, "grad_norm": 1.673499584197998, "learning_rate": 1.9468944225281724e-05, "loss": 0.717, "step": 11316 }, { "epoch": 1.8474756132402759, "grad_norm": 1.4209645986557007, "learning_rate": 1.9468842183921753e-05, "loss": 0.5635, "step": 11317 }, { "epoch": 1.8476388718827803, "grad_norm": 1.4930143356323242, "learning_rate": 1.946874013302666e-05, "loss": 0.611, "step": 11318 }, { "epoch": 1.8478021305252845, "grad_norm": 1.8762633800506592, "learning_rate": 1.9468638072596547e-05, "loss": 0.7428, "step": 11319 }, { "epoch": 1.847965389167789, "grad_norm": 1.442726492881775, "learning_rate": 1.946853600263152e-05, "loss": 0.5552, "step": 11320 }, { "epoch": 1.8481286478102934, "grad_norm": 1.7356958389282227, "learning_rate": 1.9468433923131683e-05, "loss": 0.6051, "step": 11321 }, { "epoch": 1.8482919064527978, "grad_norm": 1.5646950006484985, "learning_rate": 1.946833183409713e-05, "loss": 0.6417, "step": 11322 }, { "epoch": 1.8484551650953023, "grad_norm": 1.790474534034729, "learning_rate": 1.9468229735527972e-05, "loss": 0.7646, "step": 11323 }, { "epoch": 1.8486184237378067, "grad_norm": 1.8446646928787231, "learning_rate": 1.9468127627424305e-05, "loss": 0.6614, "step": 11324 }, { "epoch": 1.8487816823803112, "grad_norm": 1.4836266040802002, "learning_rate": 1.9468025509786243e-05, "loss": 0.6952, "step": 11325 }, { "epoch": 1.8489449410228154, "grad_norm": 1.7344393730163574, "learning_rate": 1.946792338261388e-05, "loss": 0.6527, "step": 11326 }, { "epoch": 1.8491081996653198, "grad_norm": 1.6935209035873413, "learning_rate": 1.9467821245907316e-05, "loss": 0.7319, "step": 11327 }, { "epoch": 1.849271458307824, "grad_norm": 1.5915470123291016, "learning_rate": 1.9467719099666662e-05, "loss": 0.6877, "step": 11328 }, { "epoch": 1.8494347169503285, "grad_norm": 1.5453569889068604, "learning_rate": 1.946761694389202e-05, "loss": 0.5632, "step": 11329 }, { "epoch": 1.849597975592833, "grad_norm": 1.5011414289474487, "learning_rate": 1.9467514778583484e-05, "loss": 0.5964, "step": 11330 }, { "epoch": 1.8497612342353373, "grad_norm": 1.698552131652832, "learning_rate": 1.9467412603741163e-05, "loss": 0.6291, "step": 11331 }, { "epoch": 1.8499244928778418, "grad_norm": 1.8297092914581299, "learning_rate": 1.9467310419365165e-05, "loss": 0.7323, "step": 11332 }, { "epoch": 1.8500877515203462, "grad_norm": 1.8055306673049927, "learning_rate": 1.9467208225455587e-05, "loss": 0.7204, "step": 11333 }, { "epoch": 1.8502510101628507, "grad_norm": 1.5056910514831543, "learning_rate": 1.946710602201253e-05, "loss": 0.5595, "step": 11334 }, { "epoch": 1.8504142688053549, "grad_norm": 1.6088930368423462, "learning_rate": 1.9467003809036106e-05, "loss": 0.5525, "step": 11335 }, { "epoch": 1.8505775274478593, "grad_norm": 1.2224750518798828, "learning_rate": 1.9466901586526403e-05, "loss": 0.4546, "step": 11336 }, { "epoch": 1.8507407860903635, "grad_norm": 1.5027700662612915, "learning_rate": 1.9466799354483538e-05, "loss": 0.6142, "step": 11337 }, { "epoch": 1.850904044732868, "grad_norm": 1.7277427911758423, "learning_rate": 1.9466697112907608e-05, "loss": 0.6825, "step": 11338 }, { "epoch": 1.8510673033753724, "grad_norm": 1.7092294692993164, "learning_rate": 1.9466594861798715e-05, "loss": 0.6285, "step": 11339 }, { "epoch": 1.8512305620178768, "grad_norm": 1.2341948747634888, "learning_rate": 1.9466492601156964e-05, "loss": 0.4894, "step": 11340 }, { "epoch": 1.8513938206603813, "grad_norm": 1.4450682401657104, "learning_rate": 1.9466390330982457e-05, "loss": 0.6001, "step": 11341 }, { "epoch": 1.8515570793028857, "grad_norm": 1.8335946798324585, "learning_rate": 1.9466288051275303e-05, "loss": 0.7955, "step": 11342 }, { "epoch": 1.8517203379453901, "grad_norm": 1.6008360385894775, "learning_rate": 1.9466185762035592e-05, "loss": 0.5293, "step": 11343 }, { "epoch": 1.8518835965878944, "grad_norm": 1.6721000671386719, "learning_rate": 1.9466083463263437e-05, "loss": 0.6269, "step": 11344 }, { "epoch": 1.8520468552303988, "grad_norm": 1.7251214981079102, "learning_rate": 1.946598115495894e-05, "loss": 0.7318, "step": 11345 }, { "epoch": 1.852210113872903, "grad_norm": 1.382997989654541, "learning_rate": 1.94658788371222e-05, "loss": 0.5113, "step": 11346 }, { "epoch": 1.8523733725154075, "grad_norm": 1.6176731586456299, "learning_rate": 1.9465776509753325e-05, "loss": 0.5884, "step": 11347 }, { "epoch": 1.852536631157912, "grad_norm": 1.3850488662719727, "learning_rate": 1.9465674172852412e-05, "loss": 0.5591, "step": 11348 }, { "epoch": 1.8526998898004163, "grad_norm": 1.716545820236206, "learning_rate": 1.946557182641957e-05, "loss": 0.5858, "step": 11349 }, { "epoch": 1.8528631484429208, "grad_norm": 1.6958024501800537, "learning_rate": 1.94654694704549e-05, "loss": 0.7041, "step": 11350 }, { "epoch": 1.8530264070854252, "grad_norm": 1.573082685470581, "learning_rate": 1.9465367104958507e-05, "loss": 0.723, "step": 11351 }, { "epoch": 1.8531896657279294, "grad_norm": 1.9408619403839111, "learning_rate": 1.946526472993049e-05, "loss": 0.6485, "step": 11352 }, { "epoch": 1.8533529243704339, "grad_norm": 1.4201043844223022, "learning_rate": 1.9465162345370953e-05, "loss": 0.5132, "step": 11353 }, { "epoch": 1.8535161830129383, "grad_norm": 1.8337892293930054, "learning_rate": 1.9465059951279998e-05, "loss": 0.6698, "step": 11354 }, { "epoch": 1.8536794416554425, "grad_norm": 1.4373219013214111, "learning_rate": 1.9464957547657734e-05, "loss": 0.5897, "step": 11355 }, { "epoch": 1.853842700297947, "grad_norm": 2.0871987342834473, "learning_rate": 1.9464855134504256e-05, "loss": 0.5719, "step": 11356 }, { "epoch": 1.8540059589404514, "grad_norm": 1.658308506011963, "learning_rate": 1.9464752711819674e-05, "loss": 0.6588, "step": 11357 }, { "epoch": 1.8541692175829558, "grad_norm": 2.4047887325286865, "learning_rate": 1.946465027960409e-05, "loss": 0.835, "step": 11358 }, { "epoch": 1.8543324762254603, "grad_norm": 1.6464979648590088, "learning_rate": 1.9464547837857603e-05, "loss": 0.6283, "step": 11359 }, { "epoch": 1.8544957348679647, "grad_norm": 1.4339265823364258, "learning_rate": 1.946444538658032e-05, "loss": 0.5472, "step": 11360 }, { "epoch": 1.854658993510469, "grad_norm": 1.6774463653564453, "learning_rate": 1.9464342925772345e-05, "loss": 0.6762, "step": 11361 }, { "epoch": 1.8548222521529734, "grad_norm": 1.815497875213623, "learning_rate": 1.9464240455433775e-05, "loss": 0.643, "step": 11362 }, { "epoch": 1.8549855107954776, "grad_norm": 2.0059027671813965, "learning_rate": 1.946413797556472e-05, "loss": 0.677, "step": 11363 }, { "epoch": 1.855148769437982, "grad_norm": 1.7404001951217651, "learning_rate": 1.946403548616528e-05, "loss": 0.6993, "step": 11364 }, { "epoch": 1.8553120280804865, "grad_norm": 1.6878925561904907, "learning_rate": 1.9463932987235558e-05, "loss": 0.6627, "step": 11365 }, { "epoch": 1.855475286722991, "grad_norm": 2.3901617527008057, "learning_rate": 1.946383047877566e-05, "loss": 0.8456, "step": 11366 }, { "epoch": 1.8556385453654953, "grad_norm": 1.7893768548965454, "learning_rate": 1.9463727960785686e-05, "loss": 0.6815, "step": 11367 }, { "epoch": 1.8558018040079998, "grad_norm": 1.5406551361083984, "learning_rate": 1.946362543326574e-05, "loss": 0.6584, "step": 11368 }, { "epoch": 1.8559650626505042, "grad_norm": 1.621347188949585, "learning_rate": 1.9463522896215927e-05, "loss": 0.7255, "step": 11369 }, { "epoch": 1.8561283212930084, "grad_norm": 1.5381826162338257, "learning_rate": 1.9463420349636348e-05, "loss": 0.7185, "step": 11370 }, { "epoch": 1.8562915799355129, "grad_norm": 1.6084988117218018, "learning_rate": 1.9463317793527106e-05, "loss": 0.6015, "step": 11371 }, { "epoch": 1.856454838578017, "grad_norm": 1.6313977241516113, "learning_rate": 1.9463215227888307e-05, "loss": 0.6538, "step": 11372 }, { "epoch": 1.8566180972205215, "grad_norm": 1.4991780519485474, "learning_rate": 1.9463112652720055e-05, "loss": 0.7267, "step": 11373 }, { "epoch": 1.856781355863026, "grad_norm": 1.4805326461791992, "learning_rate": 1.946301006802245e-05, "loss": 0.6189, "step": 11374 }, { "epoch": 1.8569446145055304, "grad_norm": 1.5228383541107178, "learning_rate": 1.9462907473795593e-05, "loss": 0.634, "step": 11375 }, { "epoch": 1.8571078731480348, "grad_norm": 1.6474980115890503, "learning_rate": 1.9462804870039592e-05, "loss": 0.7198, "step": 11376 }, { "epoch": 1.8572711317905393, "grad_norm": 1.8675572872161865, "learning_rate": 1.946270225675455e-05, "loss": 0.6364, "step": 11377 }, { "epoch": 1.8574343904330437, "grad_norm": 1.5638259649276733, "learning_rate": 1.9462599633940572e-05, "loss": 0.6255, "step": 11378 }, { "epoch": 1.857597649075548, "grad_norm": 1.4250820875167847, "learning_rate": 1.9462497001597752e-05, "loss": 0.53, "step": 11379 }, { "epoch": 1.8577609077180524, "grad_norm": 1.7427653074264526, "learning_rate": 1.9462394359726208e-05, "loss": 0.7597, "step": 11380 }, { "epoch": 1.8579241663605566, "grad_norm": 1.4496889114379883, "learning_rate": 1.9462291708326027e-05, "loss": 0.5825, "step": 11381 }, { "epoch": 1.858087425003061, "grad_norm": 2.135394334793091, "learning_rate": 1.9462189047397326e-05, "loss": 0.85, "step": 11382 }, { "epoch": 1.8582506836455654, "grad_norm": 1.6018152236938477, "learning_rate": 1.9462086376940205e-05, "loss": 0.7263, "step": 11383 }, { "epoch": 1.8584139422880699, "grad_norm": 1.8383878469467163, "learning_rate": 1.946198369695476e-05, "loss": 0.6741, "step": 11384 }, { "epoch": 1.8585772009305743, "grad_norm": 1.8410464525222778, "learning_rate": 1.9461881007441102e-05, "loss": 0.6622, "step": 11385 }, { "epoch": 1.8587404595730788, "grad_norm": 1.4778528213500977, "learning_rate": 1.9461778308399336e-05, "loss": 0.6273, "step": 11386 }, { "epoch": 1.8589037182155832, "grad_norm": 1.4650120735168457, "learning_rate": 1.9461675599829558e-05, "loss": 0.5434, "step": 11387 }, { "epoch": 1.8590669768580874, "grad_norm": 1.691633939743042, "learning_rate": 1.9461572881731876e-05, "loss": 0.7043, "step": 11388 }, { "epoch": 1.8592302355005919, "grad_norm": 1.9041624069213867, "learning_rate": 1.946147015410639e-05, "loss": 0.6537, "step": 11389 }, { "epoch": 1.859393494143096, "grad_norm": 1.782978892326355, "learning_rate": 1.946136741695321e-05, "loss": 0.7683, "step": 11390 }, { "epoch": 1.8595567527856005, "grad_norm": 1.7229278087615967, "learning_rate": 1.9461264670272432e-05, "loss": 0.6608, "step": 11391 }, { "epoch": 1.859720011428105, "grad_norm": 1.489294171333313, "learning_rate": 1.9461161914064166e-05, "loss": 0.7129, "step": 11392 }, { "epoch": 1.8598832700706094, "grad_norm": 1.7740106582641602, "learning_rate": 1.946105914832851e-05, "loss": 0.6981, "step": 11393 }, { "epoch": 1.8600465287131138, "grad_norm": 1.646751046180725, "learning_rate": 1.9460956373065574e-05, "loss": 0.5959, "step": 11394 }, { "epoch": 1.8602097873556183, "grad_norm": 1.8456655740737915, "learning_rate": 1.9460853588275454e-05, "loss": 0.7989, "step": 11395 }, { "epoch": 1.8603730459981225, "grad_norm": 1.670685052871704, "learning_rate": 1.9460750793958257e-05, "loss": 0.6349, "step": 11396 }, { "epoch": 1.860536304640627, "grad_norm": 1.4241327047348022, "learning_rate": 1.946064799011409e-05, "loss": 0.4604, "step": 11397 }, { "epoch": 1.8606995632831314, "grad_norm": 1.3810228109359741, "learning_rate": 1.946054517674305e-05, "loss": 0.5067, "step": 11398 }, { "epoch": 1.8608628219256356, "grad_norm": 1.9902907609939575, "learning_rate": 1.9460442353845245e-05, "loss": 0.7067, "step": 11399 }, { "epoch": 1.86102608056814, "grad_norm": 1.5193673372268677, "learning_rate": 1.946033952142077e-05, "loss": 0.5881, "step": 11400 }, { "epoch": 1.8611893392106444, "grad_norm": 1.539060115814209, "learning_rate": 1.9460236679469748e-05, "loss": 0.5943, "step": 11401 }, { "epoch": 1.8613525978531489, "grad_norm": 2.008824586868286, "learning_rate": 1.946013382799226e-05, "loss": 0.9271, "step": 11402 }, { "epoch": 1.8615158564956533, "grad_norm": 1.7220029830932617, "learning_rate": 1.9460030966988427e-05, "loss": 0.6266, "step": 11403 }, { "epoch": 1.8616791151381578, "grad_norm": 1.668290376663208, "learning_rate": 1.945992809645834e-05, "loss": 0.6703, "step": 11404 }, { "epoch": 1.861842373780662, "grad_norm": 1.9252721071243286, "learning_rate": 1.945982521640211e-05, "loss": 0.6598, "step": 11405 }, { "epoch": 1.8620056324231664, "grad_norm": 1.6196459531784058, "learning_rate": 1.945972232681984e-05, "loss": 0.6037, "step": 11406 }, { "epoch": 1.8621688910656706, "grad_norm": 1.6449024677276611, "learning_rate": 1.9459619427711626e-05, "loss": 0.5798, "step": 11407 }, { "epoch": 1.862332149708175, "grad_norm": 1.7002391815185547, "learning_rate": 1.9459516519077585e-05, "loss": 0.7592, "step": 11408 }, { "epoch": 1.8624954083506795, "grad_norm": 1.6330289840698242, "learning_rate": 1.9459413600917808e-05, "loss": 0.6487, "step": 11409 }, { "epoch": 1.862658666993184, "grad_norm": 1.6738574504852295, "learning_rate": 1.9459310673232407e-05, "loss": 0.7145, "step": 11410 }, { "epoch": 1.8628219256356884, "grad_norm": 1.447174310684204, "learning_rate": 1.9459207736021484e-05, "loss": 0.5083, "step": 11411 }, { "epoch": 1.8629851842781928, "grad_norm": 1.7610563039779663, "learning_rate": 1.945910478928514e-05, "loss": 0.7341, "step": 11412 }, { "epoch": 1.8631484429206973, "grad_norm": 1.4928058385849, "learning_rate": 1.9459001833023477e-05, "loss": 0.5383, "step": 11413 }, { "epoch": 1.8633117015632015, "grad_norm": 1.521897792816162, "learning_rate": 1.9458898867236603e-05, "loss": 0.5651, "step": 11414 }, { "epoch": 1.863474960205706, "grad_norm": 1.6357287168502808, "learning_rate": 1.9458795891924623e-05, "loss": 0.607, "step": 11415 }, { "epoch": 1.8636382188482101, "grad_norm": 1.4678351879119873, "learning_rate": 1.9458692907087636e-05, "loss": 0.496, "step": 11416 }, { "epoch": 1.8638014774907146, "grad_norm": 1.6538318395614624, "learning_rate": 1.9458589912725746e-05, "loss": 0.5518, "step": 11417 }, { "epoch": 1.863964736133219, "grad_norm": 1.44560706615448, "learning_rate": 1.9458486908839063e-05, "loss": 0.6461, "step": 11418 }, { "epoch": 1.8641279947757234, "grad_norm": 1.5160542726516724, "learning_rate": 1.9458383895427683e-05, "loss": 0.5504, "step": 11419 }, { "epoch": 1.8642912534182279, "grad_norm": 1.6698386669158936, "learning_rate": 1.9458280872491713e-05, "loss": 0.6928, "step": 11420 }, { "epoch": 1.8644545120607323, "grad_norm": 1.6586346626281738, "learning_rate": 1.945817784003126e-05, "loss": 0.6696, "step": 11421 }, { "epoch": 1.8646177707032368, "grad_norm": 1.803773283958435, "learning_rate": 1.945807479804642e-05, "loss": 0.7711, "step": 11422 }, { "epoch": 1.864781029345741, "grad_norm": 1.61565363407135, "learning_rate": 1.9457971746537306e-05, "loss": 0.6441, "step": 11423 }, { "epoch": 1.8649442879882454, "grad_norm": 1.5894356966018677, "learning_rate": 1.945786868550401e-05, "loss": 0.7086, "step": 11424 }, { "epoch": 1.8651075466307496, "grad_norm": 1.698496699333191, "learning_rate": 1.9457765614946648e-05, "loss": 0.6764, "step": 11425 }, { "epoch": 1.865270805273254, "grad_norm": 1.6154731512069702, "learning_rate": 1.945766253486532e-05, "loss": 0.6144, "step": 11426 }, { "epoch": 1.8654340639157585, "grad_norm": 1.530824065208435, "learning_rate": 1.9457559445260124e-05, "loss": 0.5731, "step": 11427 }, { "epoch": 1.865597322558263, "grad_norm": 1.7381176948547363, "learning_rate": 1.945745634613117e-05, "loss": 0.6502, "step": 11428 }, { "epoch": 1.8657605812007674, "grad_norm": 1.4016667604446411, "learning_rate": 1.9457353237478557e-05, "loss": 0.5898, "step": 11429 }, { "epoch": 1.8659238398432718, "grad_norm": 1.791313886642456, "learning_rate": 1.9457250119302396e-05, "loss": 0.7827, "step": 11430 }, { "epoch": 1.8660870984857763, "grad_norm": 1.9328656196594238, "learning_rate": 1.9457146991602785e-05, "loss": 0.7082, "step": 11431 }, { "epoch": 1.8662503571282805, "grad_norm": 1.9362692832946777, "learning_rate": 1.945704385437983e-05, "loss": 0.703, "step": 11432 }, { "epoch": 1.866413615770785, "grad_norm": 1.9214754104614258, "learning_rate": 1.9456940707633634e-05, "loss": 0.6218, "step": 11433 }, { "epoch": 1.8665768744132891, "grad_norm": 1.62966787815094, "learning_rate": 1.9456837551364304e-05, "loss": 0.6984, "step": 11434 }, { "epoch": 1.8667401330557936, "grad_norm": 1.689095377922058, "learning_rate": 1.9456734385571935e-05, "loss": 0.7576, "step": 11435 }, { "epoch": 1.866903391698298, "grad_norm": 1.4133380651474, "learning_rate": 1.945663121025664e-05, "loss": 0.5262, "step": 11436 }, { "epoch": 1.8670666503408024, "grad_norm": 1.6677546501159668, "learning_rate": 1.9456528025418523e-05, "loss": 0.657, "step": 11437 }, { "epoch": 1.8672299089833069, "grad_norm": 1.6481825113296509, "learning_rate": 1.945642483105768e-05, "loss": 0.7048, "step": 11438 }, { "epoch": 1.8673931676258113, "grad_norm": 1.8044381141662598, "learning_rate": 1.9456321627174222e-05, "loss": 0.6084, "step": 11439 }, { "epoch": 1.8675564262683155, "grad_norm": 1.6847569942474365, "learning_rate": 1.945621841376825e-05, "loss": 0.6369, "step": 11440 }, { "epoch": 1.86771968491082, "grad_norm": 1.6886372566223145, "learning_rate": 1.945611519083987e-05, "loss": 0.6501, "step": 11441 }, { "epoch": 1.8678829435533244, "grad_norm": 1.530936360359192, "learning_rate": 1.9456011958389186e-05, "loss": 0.5738, "step": 11442 }, { "epoch": 1.8680462021958286, "grad_norm": 1.961145043373108, "learning_rate": 1.9455908716416296e-05, "loss": 0.7787, "step": 11443 }, { "epoch": 1.868209460838333, "grad_norm": 1.7782002687454224, "learning_rate": 1.945580546492131e-05, "loss": 0.6667, "step": 11444 }, { "epoch": 1.8683727194808375, "grad_norm": 1.4123544692993164, "learning_rate": 1.9455702203904333e-05, "loss": 0.5282, "step": 11445 }, { "epoch": 1.868535978123342, "grad_norm": 2.269317626953125, "learning_rate": 1.945559893336546e-05, "loss": 0.5888, "step": 11446 }, { "epoch": 1.8686992367658464, "grad_norm": 2.1223304271698, "learning_rate": 1.9455495653304806e-05, "loss": 0.5977, "step": 11447 }, { "epoch": 1.8688624954083508, "grad_norm": 1.689969539642334, "learning_rate": 1.9455392363722468e-05, "loss": 0.6713, "step": 11448 }, { "epoch": 1.869025754050855, "grad_norm": 1.640174388885498, "learning_rate": 1.9455289064618558e-05, "loss": 0.6213, "step": 11449 }, { "epoch": 1.8691890126933595, "grad_norm": 2.321896553039551, "learning_rate": 1.945518575599317e-05, "loss": 0.7272, "step": 11450 }, { "epoch": 1.8693522713358637, "grad_norm": 1.5930275917053223, "learning_rate": 1.9455082437846415e-05, "loss": 0.594, "step": 11451 }, { "epoch": 1.8695155299783681, "grad_norm": 1.987666368484497, "learning_rate": 1.9454979110178392e-05, "loss": 0.8378, "step": 11452 }, { "epoch": 1.8696787886208726, "grad_norm": 1.6309988498687744, "learning_rate": 1.9454875772989207e-05, "loss": 0.6281, "step": 11453 }, { "epoch": 1.869842047263377, "grad_norm": 1.577735185623169, "learning_rate": 1.9454772426278965e-05, "loss": 0.6775, "step": 11454 }, { "epoch": 1.8700053059058814, "grad_norm": 1.7238805294036865, "learning_rate": 1.9454669070047772e-05, "loss": 0.6788, "step": 11455 }, { "epoch": 1.8701685645483859, "grad_norm": 1.6100451946258545, "learning_rate": 1.9454565704295728e-05, "loss": 0.5719, "step": 11456 }, { "epoch": 1.8703318231908903, "grad_norm": 2.0530707836151123, "learning_rate": 1.945446232902294e-05, "loss": 0.8713, "step": 11457 }, { "epoch": 1.8704950818333945, "grad_norm": 1.7088276147842407, "learning_rate": 1.9454358944229507e-05, "loss": 0.6137, "step": 11458 }, { "epoch": 1.870658340475899, "grad_norm": 1.5064231157302856, "learning_rate": 1.9454255549915542e-05, "loss": 0.5664, "step": 11459 }, { "epoch": 1.8708215991184032, "grad_norm": 2.5388591289520264, "learning_rate": 1.945415214608114e-05, "loss": 0.6976, "step": 11460 }, { "epoch": 1.8709848577609076, "grad_norm": 1.5529669523239136, "learning_rate": 1.9454048732726415e-05, "loss": 0.6593, "step": 11461 }, { "epoch": 1.871148116403412, "grad_norm": 1.5846813917160034, "learning_rate": 1.9453945309851462e-05, "loss": 0.6595, "step": 11462 }, { "epoch": 1.8713113750459165, "grad_norm": 1.4811205863952637, "learning_rate": 1.9453841877456384e-05, "loss": 0.6129, "step": 11463 }, { "epoch": 1.871474633688421, "grad_norm": 1.5899109840393066, "learning_rate": 1.9453738435541296e-05, "loss": 0.7079, "step": 11464 }, { "epoch": 1.8716378923309254, "grad_norm": 2.0411670207977295, "learning_rate": 1.9453634984106293e-05, "loss": 0.8161, "step": 11465 }, { "epoch": 1.8718011509734298, "grad_norm": 1.5072439908981323, "learning_rate": 1.9453531523151486e-05, "loss": 0.4786, "step": 11466 }, { "epoch": 1.871964409615934, "grad_norm": 1.7958481311798096, "learning_rate": 1.945342805267697e-05, "loss": 0.8287, "step": 11467 }, { "epoch": 1.8721276682584385, "grad_norm": 1.8937709331512451, "learning_rate": 1.9453324572682856e-05, "loss": 0.7355, "step": 11468 }, { "epoch": 1.8722909269009427, "grad_norm": 1.47244131565094, "learning_rate": 1.945322108316925e-05, "loss": 0.6431, "step": 11469 }, { "epoch": 1.8724541855434471, "grad_norm": 2.0092735290527344, "learning_rate": 1.9453117584136253e-05, "loss": 0.6744, "step": 11470 }, { "epoch": 1.8726174441859516, "grad_norm": 1.7591285705566406, "learning_rate": 1.9453014075583962e-05, "loss": 0.6495, "step": 11471 }, { "epoch": 1.872780702828456, "grad_norm": 1.8152241706848145, "learning_rate": 1.9452910557512497e-05, "loss": 0.7554, "step": 11472 }, { "epoch": 1.8729439614709604, "grad_norm": 1.8670258522033691, "learning_rate": 1.9452807029921947e-05, "loss": 0.7125, "step": 11473 }, { "epoch": 1.8731072201134649, "grad_norm": 1.7216867208480835, "learning_rate": 1.9452703492812425e-05, "loss": 0.6457, "step": 11474 }, { "epoch": 1.8732704787559693, "grad_norm": 1.6917376518249512, "learning_rate": 1.9452599946184033e-05, "loss": 0.6457, "step": 11475 }, { "epoch": 1.8734337373984735, "grad_norm": 1.7641962766647339, "learning_rate": 1.9452496390036876e-05, "loss": 0.6971, "step": 11476 }, { "epoch": 1.873596996040978, "grad_norm": 1.8004573583602905, "learning_rate": 1.9452392824371057e-05, "loss": 0.6141, "step": 11477 }, { "epoch": 1.8737602546834822, "grad_norm": 1.8828840255737305, "learning_rate": 1.945228924918668e-05, "loss": 0.7949, "step": 11478 }, { "epoch": 1.8739235133259866, "grad_norm": 1.5240252017974854, "learning_rate": 1.9452185664483854e-05, "loss": 0.5971, "step": 11479 }, { "epoch": 1.874086771968491, "grad_norm": 1.577991008758545, "learning_rate": 1.9452082070262678e-05, "loss": 0.6906, "step": 11480 }, { "epoch": 1.8742500306109955, "grad_norm": 1.663020372390747, "learning_rate": 1.9451978466523256e-05, "loss": 0.6785, "step": 11481 }, { "epoch": 1.8744132892535, "grad_norm": 1.7203855514526367, "learning_rate": 1.9451874853265695e-05, "loss": 0.7135, "step": 11482 }, { "epoch": 1.8745765478960044, "grad_norm": 1.2431669235229492, "learning_rate": 1.94517712304901e-05, "loss": 0.4815, "step": 11483 }, { "epoch": 1.8747398065385086, "grad_norm": 1.6367541551589966, "learning_rate": 1.945166759819657e-05, "loss": 0.671, "step": 11484 }, { "epoch": 1.874903065181013, "grad_norm": 1.5486866235733032, "learning_rate": 1.9451563956385217e-05, "loss": 0.5137, "step": 11485 }, { "epoch": 1.8750663238235175, "grad_norm": 1.619064450263977, "learning_rate": 1.945146030505614e-05, "loss": 0.6845, "step": 11486 }, { "epoch": 1.8752295824660217, "grad_norm": 1.8517924547195435, "learning_rate": 1.9451356644209445e-05, "loss": 0.6046, "step": 11487 }, { "epoch": 1.875392841108526, "grad_norm": 1.6384507417678833, "learning_rate": 1.9451252973845238e-05, "loss": 0.6861, "step": 11488 }, { "epoch": 1.8755560997510305, "grad_norm": 1.7289226055145264, "learning_rate": 1.945114929396362e-05, "loss": 0.7351, "step": 11489 }, { "epoch": 1.875719358393535, "grad_norm": 1.6299560070037842, "learning_rate": 1.94510456045647e-05, "loss": 0.6677, "step": 11490 }, { "epoch": 1.8758826170360394, "grad_norm": 1.5504000186920166, "learning_rate": 1.9450941905648575e-05, "loss": 0.6804, "step": 11491 }, { "epoch": 1.8760458756785439, "grad_norm": 1.9602934122085571, "learning_rate": 1.945083819721536e-05, "loss": 0.7653, "step": 11492 }, { "epoch": 1.876209134321048, "grad_norm": 1.7588677406311035, "learning_rate": 1.9450734479265146e-05, "loss": 0.7337, "step": 11493 }, { "epoch": 1.8763723929635525, "grad_norm": 1.8978039026260376, "learning_rate": 1.945063075179805e-05, "loss": 0.6901, "step": 11494 }, { "epoch": 1.8765356516060567, "grad_norm": 1.5178502798080444, "learning_rate": 1.9450527014814173e-05, "loss": 0.5586, "step": 11495 }, { "epoch": 1.8766989102485612, "grad_norm": 1.5865373611450195, "learning_rate": 1.9450423268313613e-05, "loss": 0.6402, "step": 11496 }, { "epoch": 1.8768621688910656, "grad_norm": 1.5291099548339844, "learning_rate": 1.9450319512296478e-05, "loss": 0.6351, "step": 11497 }, { "epoch": 1.87702542753357, "grad_norm": 1.416226863861084, "learning_rate": 1.9450215746762878e-05, "loss": 0.6322, "step": 11498 }, { "epoch": 1.8771886861760745, "grad_norm": 1.5664267539978027, "learning_rate": 1.9450111971712913e-05, "loss": 0.7232, "step": 11499 }, { "epoch": 1.877351944818579, "grad_norm": 1.60057532787323, "learning_rate": 1.9450008187146685e-05, "loss": 0.6844, "step": 11500 }, { "epoch": 1.8775152034610834, "grad_norm": 1.940841794013977, "learning_rate": 1.9449904393064303e-05, "loss": 0.7875, "step": 11501 }, { "epoch": 1.8776784621035876, "grad_norm": 1.4768779277801514, "learning_rate": 1.944980058946587e-05, "loss": 0.5877, "step": 11502 }, { "epoch": 1.877841720746092, "grad_norm": 1.4608166217803955, "learning_rate": 1.944969677635149e-05, "loss": 0.5693, "step": 11503 }, { "epoch": 1.8780049793885962, "grad_norm": 1.460681676864624, "learning_rate": 1.944959295372127e-05, "loss": 0.5551, "step": 11504 }, { "epoch": 1.8781682380311007, "grad_norm": 1.990973949432373, "learning_rate": 1.944948912157531e-05, "loss": 0.7081, "step": 11505 }, { "epoch": 1.878331496673605, "grad_norm": 1.2026201486587524, "learning_rate": 1.9449385279913716e-05, "loss": 0.4524, "step": 11506 }, { "epoch": 1.8784947553161095, "grad_norm": 1.856552004814148, "learning_rate": 1.9449281428736597e-05, "loss": 0.7422, "step": 11507 }, { "epoch": 1.878658013958614, "grad_norm": 1.6628409624099731, "learning_rate": 1.9449177568044052e-05, "loss": 0.7235, "step": 11508 }, { "epoch": 1.8788212726011184, "grad_norm": 1.6752853393554688, "learning_rate": 1.9449073697836187e-05, "loss": 0.6086, "step": 11509 }, { "epoch": 1.8789845312436229, "grad_norm": 1.7337779998779297, "learning_rate": 1.944896981811311e-05, "loss": 0.596, "step": 11510 }, { "epoch": 1.879147789886127, "grad_norm": 1.7454215288162231, "learning_rate": 1.944886592887492e-05, "loss": 0.6668, "step": 11511 }, { "epoch": 1.8793110485286315, "grad_norm": 1.6474817991256714, "learning_rate": 1.9448762030121723e-05, "loss": 0.6368, "step": 11512 }, { "epoch": 1.8794743071711357, "grad_norm": 1.966313123703003, "learning_rate": 1.944865812185363e-05, "loss": 0.6665, "step": 11513 }, { "epoch": 1.8796375658136402, "grad_norm": 1.5946966409683228, "learning_rate": 1.9448554204070738e-05, "loss": 0.6616, "step": 11514 }, { "epoch": 1.8798008244561446, "grad_norm": 1.8290305137634277, "learning_rate": 1.9448450276773154e-05, "loss": 0.7034, "step": 11515 }, { "epoch": 1.879964083098649, "grad_norm": 1.5924561023712158, "learning_rate": 1.9448346339960984e-05, "loss": 0.5389, "step": 11516 }, { "epoch": 1.8801273417411535, "grad_norm": 1.5353035926818848, "learning_rate": 1.944824239363433e-05, "loss": 0.6068, "step": 11517 }, { "epoch": 1.880290600383658, "grad_norm": 1.996812343597412, "learning_rate": 1.94481384377933e-05, "loss": 0.8247, "step": 11518 }, { "epoch": 1.8804538590261624, "grad_norm": 1.9649841785430908, "learning_rate": 1.9448034472437997e-05, "loss": 0.7063, "step": 11519 }, { "epoch": 1.8806171176686666, "grad_norm": 1.616782307624817, "learning_rate": 1.9447930497568528e-05, "loss": 0.6559, "step": 11520 }, { "epoch": 1.880780376311171, "grad_norm": 1.795293927192688, "learning_rate": 1.9447826513184992e-05, "loss": 0.688, "step": 11521 }, { "epoch": 1.8809436349536752, "grad_norm": 1.559537649154663, "learning_rate": 1.94477225192875e-05, "loss": 0.6232, "step": 11522 }, { "epoch": 1.8811068935961797, "grad_norm": 1.8149657249450684, "learning_rate": 1.9447618515876153e-05, "loss": 0.8542, "step": 11523 }, { "epoch": 1.881270152238684, "grad_norm": 1.6024699211120605, "learning_rate": 1.9447514502951055e-05, "loss": 0.7056, "step": 11524 }, { "epoch": 1.8814334108811885, "grad_norm": 1.786564826965332, "learning_rate": 1.9447410480512315e-05, "loss": 0.6538, "step": 11525 }, { "epoch": 1.881596669523693, "grad_norm": 1.922845721244812, "learning_rate": 1.9447306448560032e-05, "loss": 0.7443, "step": 11526 }, { "epoch": 1.8817599281661974, "grad_norm": 1.6194251775741577, "learning_rate": 1.9447202407094315e-05, "loss": 0.6882, "step": 11527 }, { "epoch": 1.8819231868087016, "grad_norm": 1.949500560760498, "learning_rate": 1.9447098356115267e-05, "loss": 0.7363, "step": 11528 }, { "epoch": 1.882086445451206, "grad_norm": 1.908187747001648, "learning_rate": 1.9446994295622995e-05, "loss": 0.6803, "step": 11529 }, { "epoch": 1.8822497040937105, "grad_norm": 1.6194300651550293, "learning_rate": 1.9446890225617604e-05, "loss": 0.6803, "step": 11530 }, { "epoch": 1.8824129627362147, "grad_norm": 1.683687448501587, "learning_rate": 1.9446786146099197e-05, "loss": 0.6183, "step": 11531 }, { "epoch": 1.8825762213787192, "grad_norm": 1.522481083869934, "learning_rate": 1.9446682057067875e-05, "loss": 0.5375, "step": 11532 }, { "epoch": 1.8827394800212236, "grad_norm": 1.6315289735794067, "learning_rate": 1.944657795852375e-05, "loss": 0.7022, "step": 11533 }, { "epoch": 1.882902738663728, "grad_norm": 1.6781351566314697, "learning_rate": 1.9446473850466924e-05, "loss": 0.6486, "step": 11534 }, { "epoch": 1.8830659973062325, "grad_norm": 1.5806336402893066, "learning_rate": 1.9446369732897496e-05, "loss": 0.6879, "step": 11535 }, { "epoch": 1.883229255948737, "grad_norm": 1.7709660530090332, "learning_rate": 1.944626560581558e-05, "loss": 0.6956, "step": 11536 }, { "epoch": 1.8833925145912411, "grad_norm": 1.4701541662216187, "learning_rate": 1.9446161469221277e-05, "loss": 0.6622, "step": 11537 }, { "epoch": 1.8835557732337456, "grad_norm": 1.5637449026107788, "learning_rate": 1.944605732311469e-05, "loss": 0.6178, "step": 11538 }, { "epoch": 1.8837190318762498, "grad_norm": 1.6318939924240112, "learning_rate": 1.9445953167495932e-05, "loss": 0.6781, "step": 11539 }, { "epoch": 1.8838822905187542, "grad_norm": 1.536285638809204, "learning_rate": 1.9445849002365092e-05, "loss": 0.654, "step": 11540 }, { "epoch": 1.8840455491612587, "grad_norm": 1.452775001525879, "learning_rate": 1.9445744827722293e-05, "loss": 0.6759, "step": 11541 }, { "epoch": 1.884208807803763, "grad_norm": 1.6871135234832764, "learning_rate": 1.9445640643567626e-05, "loss": 0.6061, "step": 11542 }, { "epoch": 1.8843720664462675, "grad_norm": 1.548416256904602, "learning_rate": 1.9445536449901205e-05, "loss": 0.6378, "step": 11543 }, { "epoch": 1.884535325088772, "grad_norm": 1.4469001293182373, "learning_rate": 1.944543224672313e-05, "loss": 0.6106, "step": 11544 }, { "epoch": 1.8846985837312764, "grad_norm": 1.4708023071289062, "learning_rate": 1.9445328034033508e-05, "loss": 0.5439, "step": 11545 }, { "epoch": 1.8848618423737806, "grad_norm": 1.399923324584961, "learning_rate": 1.9445223811832438e-05, "loss": 0.6261, "step": 11546 }, { "epoch": 1.885025101016285, "grad_norm": 1.474892497062683, "learning_rate": 1.9445119580120035e-05, "loss": 0.5223, "step": 11547 }, { "epoch": 1.8851883596587893, "grad_norm": 1.8021674156188965, "learning_rate": 1.94450153388964e-05, "loss": 0.5803, "step": 11548 }, { "epoch": 1.8853516183012937, "grad_norm": 1.28997004032135, "learning_rate": 1.9444911088161636e-05, "loss": 0.4958, "step": 11549 }, { "epoch": 1.8855148769437982, "grad_norm": 1.5397969484329224, "learning_rate": 1.9444806827915848e-05, "loss": 0.6634, "step": 11550 }, { "epoch": 1.8856781355863026, "grad_norm": 1.5749480724334717, "learning_rate": 1.9444702558159143e-05, "loss": 0.6273, "step": 11551 }, { "epoch": 1.885841394228807, "grad_norm": 1.5941251516342163, "learning_rate": 1.9444598278891623e-05, "loss": 0.7622, "step": 11552 }, { "epoch": 1.8860046528713115, "grad_norm": 2.005549907684326, "learning_rate": 1.9444493990113398e-05, "loss": 0.8363, "step": 11553 }, { "epoch": 1.886167911513816, "grad_norm": 1.4441014528274536, "learning_rate": 1.9444389691824568e-05, "loss": 0.5922, "step": 11554 }, { "epoch": 1.8863311701563201, "grad_norm": 1.627755880355835, "learning_rate": 1.944428538402524e-05, "loss": 0.6353, "step": 11555 }, { "epoch": 1.8864944287988246, "grad_norm": 1.711790919303894, "learning_rate": 1.944418106671552e-05, "loss": 0.7931, "step": 11556 }, { "epoch": 1.8866576874413288, "grad_norm": 1.3947298526763916, "learning_rate": 1.944407673989551e-05, "loss": 0.6314, "step": 11557 }, { "epoch": 1.8868209460838332, "grad_norm": 1.5337296724319458, "learning_rate": 1.944397240356532e-05, "loss": 0.6828, "step": 11558 }, { "epoch": 1.8869842047263377, "grad_norm": 1.3683524131774902, "learning_rate": 1.9443868057725056e-05, "loss": 0.5719, "step": 11559 }, { "epoch": 1.887147463368842, "grad_norm": 1.47541344165802, "learning_rate": 1.944376370237481e-05, "loss": 0.604, "step": 11560 }, { "epoch": 1.8873107220113465, "grad_norm": 1.732155680656433, "learning_rate": 1.9443659337514704e-05, "loss": 0.6959, "step": 11561 }, { "epoch": 1.887473980653851, "grad_norm": 1.7050541639328003, "learning_rate": 1.9443554963144832e-05, "loss": 0.6815, "step": 11562 }, { "epoch": 1.8876372392963554, "grad_norm": 1.4815789461135864, "learning_rate": 1.9443450579265305e-05, "loss": 0.6102, "step": 11563 }, { "epoch": 1.8878004979388596, "grad_norm": 1.3421001434326172, "learning_rate": 1.9443346185876223e-05, "loss": 0.5227, "step": 11564 }, { "epoch": 1.887963756581364, "grad_norm": 1.4962694644927979, "learning_rate": 1.9443241782977696e-05, "loss": 0.5994, "step": 11565 }, { "epoch": 1.8881270152238683, "grad_norm": 1.3127574920654297, "learning_rate": 1.9443137370569825e-05, "loss": 0.4911, "step": 11566 }, { "epoch": 1.8882902738663727, "grad_norm": 1.929032325744629, "learning_rate": 1.944303294865272e-05, "loss": 0.5909, "step": 11567 }, { "epoch": 1.8884535325088772, "grad_norm": 1.7947477102279663, "learning_rate": 1.944292851722648e-05, "loss": 0.8034, "step": 11568 }, { "epoch": 1.8886167911513816, "grad_norm": 1.4883267879486084, "learning_rate": 1.9442824076291216e-05, "loss": 0.6011, "step": 11569 }, { "epoch": 1.888780049793886, "grad_norm": 1.64246666431427, "learning_rate": 1.9442719625847032e-05, "loss": 0.5931, "step": 11570 }, { "epoch": 1.8889433084363905, "grad_norm": 1.6180378198623657, "learning_rate": 1.944261516589403e-05, "loss": 0.5594, "step": 11571 }, { "epoch": 1.889106567078895, "grad_norm": 1.7483789920806885, "learning_rate": 1.9442510696432315e-05, "loss": 0.6597, "step": 11572 }, { "epoch": 1.8892698257213991, "grad_norm": 1.9996604919433594, "learning_rate": 1.9442406217461996e-05, "loss": 0.7814, "step": 11573 }, { "epoch": 1.8894330843639036, "grad_norm": 1.5139063596725464, "learning_rate": 1.9442301728983176e-05, "loss": 0.6141, "step": 11574 }, { "epoch": 1.8895963430064078, "grad_norm": 1.480607032775879, "learning_rate": 1.944219723099596e-05, "loss": 0.5782, "step": 11575 }, { "epoch": 1.8897596016489122, "grad_norm": 1.8566299676895142, "learning_rate": 1.9442092723500456e-05, "loss": 0.7321, "step": 11576 }, { "epoch": 1.8899228602914167, "grad_norm": 1.8643804788589478, "learning_rate": 1.9441988206496768e-05, "loss": 0.6995, "step": 11577 }, { "epoch": 1.890086118933921, "grad_norm": 1.9485164880752563, "learning_rate": 1.9441883679985e-05, "loss": 0.6794, "step": 11578 }, { "epoch": 1.8902493775764255, "grad_norm": 1.7295310497283936, "learning_rate": 1.9441779143965254e-05, "loss": 0.7492, "step": 11579 }, { "epoch": 1.89041263621893, "grad_norm": 1.6741697788238525, "learning_rate": 1.944167459843764e-05, "loss": 0.6818, "step": 11580 }, { "epoch": 1.8905758948614342, "grad_norm": 1.6923526525497437, "learning_rate": 1.944157004340226e-05, "loss": 0.5761, "step": 11581 }, { "epoch": 1.8907391535039386, "grad_norm": 1.593882441520691, "learning_rate": 1.944146547885923e-05, "loss": 0.7182, "step": 11582 }, { "epoch": 1.890902412146443, "grad_norm": 1.4845552444458008, "learning_rate": 1.9441360904808638e-05, "loss": 0.6187, "step": 11583 }, { "epoch": 1.8910656707889473, "grad_norm": 1.5105583667755127, "learning_rate": 1.9441256321250604e-05, "loss": 0.5492, "step": 11584 }, { "epoch": 1.8912289294314517, "grad_norm": 1.5043299198150635, "learning_rate": 1.9441151728185225e-05, "loss": 0.6411, "step": 11585 }, { "epoch": 1.8913921880739561, "grad_norm": 1.834070086479187, "learning_rate": 1.9441047125612605e-05, "loss": 0.7349, "step": 11586 }, { "epoch": 1.8915554467164606, "grad_norm": 1.4392379522323608, "learning_rate": 1.944094251353286e-05, "loss": 0.6161, "step": 11587 }, { "epoch": 1.891718705358965, "grad_norm": 1.7833870649337769, "learning_rate": 1.9440837891946086e-05, "loss": 0.6482, "step": 11588 }, { "epoch": 1.8918819640014695, "grad_norm": 1.638486385345459, "learning_rate": 1.944073326085239e-05, "loss": 0.6438, "step": 11589 }, { "epoch": 1.8920452226439737, "grad_norm": 1.870792031288147, "learning_rate": 1.9440628620251874e-05, "loss": 0.8086, "step": 11590 }, { "epoch": 1.8922084812864781, "grad_norm": 1.9518942832946777, "learning_rate": 1.9440523970144654e-05, "loss": 0.7676, "step": 11591 }, { "epoch": 1.8923717399289823, "grad_norm": 1.6262729167938232, "learning_rate": 1.9440419310530826e-05, "loss": 0.7074, "step": 11592 }, { "epoch": 1.8925349985714868, "grad_norm": 1.8143446445465088, "learning_rate": 1.94403146414105e-05, "loss": 0.7346, "step": 11593 }, { "epoch": 1.8926982572139912, "grad_norm": 1.6360920667648315, "learning_rate": 1.944020996278378e-05, "loss": 0.7078, "step": 11594 }, { "epoch": 1.8928615158564956, "grad_norm": 1.5741231441497803, "learning_rate": 1.9440105274650766e-05, "loss": 0.6758, "step": 11595 }, { "epoch": 1.893024774499, "grad_norm": 1.2854944467544556, "learning_rate": 1.9440000577011573e-05, "loss": 0.5697, "step": 11596 }, { "epoch": 1.8931880331415045, "grad_norm": 1.6311179399490356, "learning_rate": 1.94398958698663e-05, "loss": 0.8062, "step": 11597 }, { "epoch": 1.893351291784009, "grad_norm": 1.479883074760437, "learning_rate": 1.9439791153215055e-05, "loss": 0.7036, "step": 11598 }, { "epoch": 1.8935145504265132, "grad_norm": 1.4560647010803223, "learning_rate": 1.9439686427057943e-05, "loss": 0.5759, "step": 11599 }, { "epoch": 1.8936778090690176, "grad_norm": 1.7028098106384277, "learning_rate": 1.943958169139507e-05, "loss": 0.737, "step": 11600 }, { "epoch": 1.8938410677115218, "grad_norm": 1.5455909967422485, "learning_rate": 1.943947694622654e-05, "loss": 0.5382, "step": 11601 }, { "epoch": 1.8940043263540263, "grad_norm": 1.5311299562454224, "learning_rate": 1.9439372191552458e-05, "loss": 0.7234, "step": 11602 }, { "epoch": 1.8941675849965307, "grad_norm": 1.8698904514312744, "learning_rate": 1.9439267427372932e-05, "loss": 0.7324, "step": 11603 }, { "epoch": 1.8943308436390351, "grad_norm": 1.7710331678390503, "learning_rate": 1.9439162653688066e-05, "loss": 0.6969, "step": 11604 }, { "epoch": 1.8944941022815396, "grad_norm": 1.4616144895553589, "learning_rate": 1.9439057870497966e-05, "loss": 0.6117, "step": 11605 }, { "epoch": 1.894657360924044, "grad_norm": 1.641790509223938, "learning_rate": 1.9438953077802737e-05, "loss": 0.6662, "step": 11606 }, { "epoch": 1.8948206195665485, "grad_norm": 1.6729071140289307, "learning_rate": 1.9438848275602484e-05, "loss": 0.744, "step": 11607 }, { "epoch": 1.8949838782090527, "grad_norm": 1.5238224267959595, "learning_rate": 1.9438743463897314e-05, "loss": 0.6175, "step": 11608 }, { "epoch": 1.8951471368515571, "grad_norm": 1.5480231046676636, "learning_rate": 1.943863864268733e-05, "loss": 0.714, "step": 11609 }, { "epoch": 1.8953103954940613, "grad_norm": 1.777389645576477, "learning_rate": 1.9438533811972645e-05, "loss": 0.7388, "step": 11610 }, { "epoch": 1.8954736541365658, "grad_norm": 1.6796879768371582, "learning_rate": 1.9438428971753355e-05, "loss": 0.7166, "step": 11611 }, { "epoch": 1.8956369127790702, "grad_norm": 1.7598503828048706, "learning_rate": 1.943832412202957e-05, "loss": 0.7949, "step": 11612 }, { "epoch": 1.8958001714215746, "grad_norm": 1.5808384418487549, "learning_rate": 1.9438219262801393e-05, "loss": 0.6187, "step": 11613 }, { "epoch": 1.895963430064079, "grad_norm": 1.736824631690979, "learning_rate": 1.9438114394068934e-05, "loss": 0.7397, "step": 11614 }, { "epoch": 1.8961266887065835, "grad_norm": 1.594459056854248, "learning_rate": 1.9438009515832298e-05, "loss": 0.6311, "step": 11615 }, { "epoch": 1.896289947349088, "grad_norm": 1.4716886281967163, "learning_rate": 1.9437904628091586e-05, "loss": 0.5776, "step": 11616 }, { "epoch": 1.8964532059915922, "grad_norm": 1.8191249370574951, "learning_rate": 1.9437799730846904e-05, "loss": 0.6652, "step": 11617 }, { "epoch": 1.8966164646340966, "grad_norm": 1.7769027948379517, "learning_rate": 1.9437694824098367e-05, "loss": 0.8172, "step": 11618 }, { "epoch": 1.8967797232766008, "grad_norm": 1.6638751029968262, "learning_rate": 1.943758990784607e-05, "loss": 0.677, "step": 11619 }, { "epoch": 1.8969429819191053, "grad_norm": 1.6761482954025269, "learning_rate": 1.9437484982090122e-05, "loss": 0.7367, "step": 11620 }, { "epoch": 1.8971062405616097, "grad_norm": 1.449048399925232, "learning_rate": 1.943738004683063e-05, "loss": 0.6001, "step": 11621 }, { "epoch": 1.8972694992041141, "grad_norm": 1.8349754810333252, "learning_rate": 1.94372751020677e-05, "loss": 0.9094, "step": 11622 }, { "epoch": 1.8974327578466186, "grad_norm": 1.5482133626937866, "learning_rate": 1.9437170147801434e-05, "loss": 0.5745, "step": 11623 }, { "epoch": 1.897596016489123, "grad_norm": 1.5425126552581787, "learning_rate": 1.943706518403194e-05, "loss": 0.5705, "step": 11624 }, { "epoch": 1.8977592751316272, "grad_norm": 1.6129480600357056, "learning_rate": 1.9436960210759325e-05, "loss": 0.7195, "step": 11625 }, { "epoch": 1.8979225337741317, "grad_norm": 1.5146262645721436, "learning_rate": 1.9436855227983695e-05, "loss": 0.6008, "step": 11626 }, { "epoch": 1.898085792416636, "grad_norm": 1.348144292831421, "learning_rate": 1.9436750235705152e-05, "loss": 0.5853, "step": 11627 }, { "epoch": 1.8982490510591403, "grad_norm": 1.6056288480758667, "learning_rate": 1.9436645233923804e-05, "loss": 0.628, "step": 11628 }, { "epoch": 1.8984123097016448, "grad_norm": 1.5268828868865967, "learning_rate": 1.943654022263976e-05, "loss": 0.5902, "step": 11629 }, { "epoch": 1.8985755683441492, "grad_norm": 1.5219417810440063, "learning_rate": 1.943643520185312e-05, "loss": 0.653, "step": 11630 }, { "epoch": 1.8987388269866536, "grad_norm": 1.4454269409179688, "learning_rate": 1.9436330171563994e-05, "loss": 0.6454, "step": 11631 }, { "epoch": 1.898902085629158, "grad_norm": 1.7486697435379028, "learning_rate": 1.9436225131772482e-05, "loss": 0.7218, "step": 11632 }, { "epoch": 1.8990653442716625, "grad_norm": 1.4002004861831665, "learning_rate": 1.9436120082478698e-05, "loss": 0.5311, "step": 11633 }, { "epoch": 1.8992286029141667, "grad_norm": 1.637635350227356, "learning_rate": 1.9436015023682742e-05, "loss": 0.6077, "step": 11634 }, { "epoch": 1.8993918615566712, "grad_norm": 1.6334716081619263, "learning_rate": 1.9435909955384724e-05, "loss": 0.6824, "step": 11635 }, { "epoch": 1.8995551201991754, "grad_norm": 1.6521251201629639, "learning_rate": 1.943580487758474e-05, "loss": 0.6652, "step": 11636 }, { "epoch": 1.8997183788416798, "grad_norm": 1.570641279220581, "learning_rate": 1.943569979028291e-05, "loss": 0.6456, "step": 11637 }, { "epoch": 1.8998816374841843, "grad_norm": 1.7872321605682373, "learning_rate": 1.943559469347933e-05, "loss": 0.6423, "step": 11638 }, { "epoch": 1.9000448961266887, "grad_norm": 1.7164885997772217, "learning_rate": 1.943548958717411e-05, "loss": 0.6656, "step": 11639 }, { "epoch": 1.9002081547691931, "grad_norm": 1.7776987552642822, "learning_rate": 1.943538447136735e-05, "loss": 0.7419, "step": 11640 }, { "epoch": 1.9003714134116976, "grad_norm": 1.6292767524719238, "learning_rate": 1.9435279346059166e-05, "loss": 0.6836, "step": 11641 }, { "epoch": 1.900534672054202, "grad_norm": 1.8345587253570557, "learning_rate": 1.9435174211249657e-05, "loss": 0.6446, "step": 11642 }, { "epoch": 1.9006979306967062, "grad_norm": 1.5271564722061157, "learning_rate": 1.9435069066938928e-05, "loss": 0.6153, "step": 11643 }, { "epoch": 1.9008611893392107, "grad_norm": 1.6989418268203735, "learning_rate": 1.943496391312709e-05, "loss": 0.6341, "step": 11644 }, { "epoch": 1.9010244479817149, "grad_norm": 1.8898526430130005, "learning_rate": 1.9434858749814244e-05, "loss": 0.7082, "step": 11645 }, { "epoch": 1.9011877066242193, "grad_norm": 1.5955911874771118, "learning_rate": 1.9434753577000494e-05, "loss": 0.6325, "step": 11646 }, { "epoch": 1.9013509652667238, "grad_norm": 1.9128661155700684, "learning_rate": 1.9434648394685952e-05, "loss": 0.6834, "step": 11647 }, { "epoch": 1.9015142239092282, "grad_norm": 1.6466622352600098, "learning_rate": 1.9434543202870726e-05, "loss": 0.6598, "step": 11648 }, { "epoch": 1.9016774825517326, "grad_norm": 1.4612789154052734, "learning_rate": 1.9434438001554914e-05, "loss": 0.5859, "step": 11649 }, { "epoch": 1.901840741194237, "grad_norm": 1.63814115524292, "learning_rate": 1.9434332790738625e-05, "loss": 0.6301, "step": 11650 }, { "epoch": 1.9020039998367415, "grad_norm": 1.784630298614502, "learning_rate": 1.9434227570421966e-05, "loss": 0.6766, "step": 11651 }, { "epoch": 1.9021672584792457, "grad_norm": 1.4790159463882446, "learning_rate": 1.9434122340605044e-05, "loss": 0.5161, "step": 11652 }, { "epoch": 1.9023305171217502, "grad_norm": 1.7346559762954712, "learning_rate": 1.943401710128796e-05, "loss": 0.6128, "step": 11653 }, { "epoch": 1.9024937757642544, "grad_norm": 1.7448608875274658, "learning_rate": 1.9433911852470825e-05, "loss": 0.6304, "step": 11654 }, { "epoch": 1.9026570344067588, "grad_norm": 1.5681493282318115, "learning_rate": 1.9433806594153744e-05, "loss": 0.5843, "step": 11655 }, { "epoch": 1.9028202930492633, "grad_norm": 2.0009074211120605, "learning_rate": 1.9433701326336823e-05, "loss": 0.8535, "step": 11656 }, { "epoch": 1.9029835516917677, "grad_norm": 1.7074265480041504, "learning_rate": 1.9433596049020164e-05, "loss": 0.6463, "step": 11657 }, { "epoch": 1.9031468103342721, "grad_norm": 1.8673193454742432, "learning_rate": 1.943349076220388e-05, "loss": 0.7439, "step": 11658 }, { "epoch": 1.9033100689767766, "grad_norm": 1.4835978746414185, "learning_rate": 1.9433385465888072e-05, "loss": 0.5772, "step": 11659 }, { "epoch": 1.903473327619281, "grad_norm": 1.829437017440796, "learning_rate": 1.9433280160072846e-05, "loss": 0.6159, "step": 11660 }, { "epoch": 1.9036365862617852, "grad_norm": 1.6510778665542603, "learning_rate": 1.9433174844758313e-05, "loss": 0.7643, "step": 11661 }, { "epoch": 1.9037998449042897, "grad_norm": 1.6902453899383545, "learning_rate": 1.943306951994457e-05, "loss": 0.6069, "step": 11662 }, { "epoch": 1.9039631035467939, "grad_norm": 1.6211744546890259, "learning_rate": 1.943296418563173e-05, "loss": 0.7689, "step": 11663 }, { "epoch": 1.9041263621892983, "grad_norm": 1.6205840110778809, "learning_rate": 1.94328588418199e-05, "loss": 0.6317, "step": 11664 }, { "epoch": 1.9042896208318028, "grad_norm": 1.683172345161438, "learning_rate": 1.9432753488509182e-05, "loss": 0.6475, "step": 11665 }, { "epoch": 1.9044528794743072, "grad_norm": 1.7775675058364868, "learning_rate": 1.9432648125699685e-05, "loss": 0.6487, "step": 11666 }, { "epoch": 1.9046161381168116, "grad_norm": 2.018979787826538, "learning_rate": 1.9432542753391512e-05, "loss": 0.796, "step": 11667 }, { "epoch": 1.904779396759316, "grad_norm": 1.7016969919204712, "learning_rate": 1.943243737158477e-05, "loss": 0.6707, "step": 11668 }, { "epoch": 1.9049426554018203, "grad_norm": 1.751571536064148, "learning_rate": 1.943233198027957e-05, "loss": 0.5765, "step": 11669 }, { "epoch": 1.9051059140443247, "grad_norm": 1.912782907485962, "learning_rate": 1.943222657947601e-05, "loss": 0.6805, "step": 11670 }, { "epoch": 1.9052691726868292, "grad_norm": 1.5132701396942139, "learning_rate": 1.94321211691742e-05, "loss": 0.6466, "step": 11671 }, { "epoch": 1.9054324313293334, "grad_norm": 2.2362430095672607, "learning_rate": 1.9432015749374254e-05, "loss": 0.712, "step": 11672 }, { "epoch": 1.9055956899718378, "grad_norm": 1.6058048009872437, "learning_rate": 1.9431910320076265e-05, "loss": 0.6173, "step": 11673 }, { "epoch": 1.9057589486143423, "grad_norm": 1.3507256507873535, "learning_rate": 1.9431804881280346e-05, "loss": 0.5629, "step": 11674 }, { "epoch": 1.9059222072568467, "grad_norm": 2.0930521488189697, "learning_rate": 1.94316994329866e-05, "loss": 0.8736, "step": 11675 }, { "epoch": 1.9060854658993511, "grad_norm": 1.5851383209228516, "learning_rate": 1.9431593975195134e-05, "loss": 0.6051, "step": 11676 }, { "epoch": 1.9062487245418556, "grad_norm": 2.222877264022827, "learning_rate": 1.943148850790606e-05, "loss": 0.7083, "step": 11677 }, { "epoch": 1.9064119831843598, "grad_norm": 1.633547067642212, "learning_rate": 1.943138303111948e-05, "loss": 0.7152, "step": 11678 }, { "epoch": 1.9065752418268642, "grad_norm": 1.6204349994659424, "learning_rate": 1.9431277544835497e-05, "loss": 0.6645, "step": 11679 }, { "epoch": 1.9067385004693684, "grad_norm": 1.8937236070632935, "learning_rate": 1.943117204905422e-05, "loss": 0.7346, "step": 11680 }, { "epoch": 1.9069017591118729, "grad_norm": 1.3777647018432617, "learning_rate": 1.9431066543775753e-05, "loss": 0.6646, "step": 11681 }, { "epoch": 1.9070650177543773, "grad_norm": 1.5879884958267212, "learning_rate": 1.9430961029000207e-05, "loss": 0.6131, "step": 11682 }, { "epoch": 1.9072282763968817, "grad_norm": 1.517434000968933, "learning_rate": 1.943085550472769e-05, "loss": 0.6133, "step": 11683 }, { "epoch": 1.9073915350393862, "grad_norm": 1.3848624229431152, "learning_rate": 1.9430749970958297e-05, "loss": 0.5924, "step": 11684 }, { "epoch": 1.9075547936818906, "grad_norm": 1.649513840675354, "learning_rate": 1.9430644427692143e-05, "loss": 0.7212, "step": 11685 }, { "epoch": 1.907718052324395, "grad_norm": 1.3362343311309814, "learning_rate": 1.9430538874929334e-05, "loss": 0.6522, "step": 11686 }, { "epoch": 1.9078813109668993, "grad_norm": 1.3183152675628662, "learning_rate": 1.9430433312669974e-05, "loss": 0.5194, "step": 11687 }, { "epoch": 1.9080445696094037, "grad_norm": 1.4779022932052612, "learning_rate": 1.9430327740914173e-05, "loss": 0.5552, "step": 11688 }, { "epoch": 1.908207828251908, "grad_norm": 1.6023788452148438, "learning_rate": 1.943022215966203e-05, "loss": 0.6728, "step": 11689 }, { "epoch": 1.9083710868944124, "grad_norm": 2.2967560291290283, "learning_rate": 1.9430116568913656e-05, "loss": 0.7415, "step": 11690 }, { "epoch": 1.9085343455369168, "grad_norm": 1.6845508813858032, "learning_rate": 1.943001096866916e-05, "loss": 0.605, "step": 11691 }, { "epoch": 1.9086976041794212, "grad_norm": 1.5847089290618896, "learning_rate": 1.9429905358928648e-05, "loss": 0.596, "step": 11692 }, { "epoch": 1.9088608628219257, "grad_norm": 1.711670994758606, "learning_rate": 1.9429799739692218e-05, "loss": 0.6932, "step": 11693 }, { "epoch": 1.9090241214644301, "grad_norm": 1.7254462242126465, "learning_rate": 1.9429694110959986e-05, "loss": 0.7109, "step": 11694 }, { "epoch": 1.9091873801069346, "grad_norm": 1.537634015083313, "learning_rate": 1.942958847273205e-05, "loss": 0.6431, "step": 11695 }, { "epoch": 1.9093506387494388, "grad_norm": 1.6208806037902832, "learning_rate": 1.9429482825008527e-05, "loss": 0.7688, "step": 11696 }, { "epoch": 1.9095138973919432, "grad_norm": 1.7726614475250244, "learning_rate": 1.9429377167789513e-05, "loss": 0.7927, "step": 11697 }, { "epoch": 1.9096771560344474, "grad_norm": 1.5612629652023315, "learning_rate": 1.942927150107512e-05, "loss": 0.6202, "step": 11698 }, { "epoch": 1.9098404146769519, "grad_norm": 1.593611240386963, "learning_rate": 1.9429165824865452e-05, "loss": 0.6984, "step": 11699 }, { "epoch": 1.9100036733194563, "grad_norm": 1.4913840293884277, "learning_rate": 1.942906013916062e-05, "loss": 0.7002, "step": 11700 }, { "epoch": 1.9101669319619607, "grad_norm": 1.7763315439224243, "learning_rate": 1.9428954443960722e-05, "loss": 0.6482, "step": 11701 }, { "epoch": 1.9103301906044652, "grad_norm": 1.5564844608306885, "learning_rate": 1.9428848739265874e-05, "loss": 0.6101, "step": 11702 }, { "epoch": 1.9104934492469696, "grad_norm": 1.5636448860168457, "learning_rate": 1.9428743025076177e-05, "loss": 0.581, "step": 11703 }, { "epoch": 1.910656707889474, "grad_norm": 1.5162380933761597, "learning_rate": 1.9428637301391734e-05, "loss": 0.539, "step": 11704 }, { "epoch": 1.9108199665319783, "grad_norm": 1.760331153869629, "learning_rate": 1.942853156821266e-05, "loss": 0.6884, "step": 11705 }, { "epoch": 1.9109832251744827, "grad_norm": 1.3705326318740845, "learning_rate": 1.9428425825539056e-05, "loss": 0.5517, "step": 11706 }, { "epoch": 1.911146483816987, "grad_norm": 1.3694878816604614, "learning_rate": 1.9428320073371027e-05, "loss": 0.576, "step": 11707 }, { "epoch": 1.9113097424594914, "grad_norm": 1.7955211400985718, "learning_rate": 1.9428214311708687e-05, "loss": 0.8505, "step": 11708 }, { "epoch": 1.9114730011019958, "grad_norm": 1.8304907083511353, "learning_rate": 1.9428108540552134e-05, "loss": 0.7603, "step": 11709 }, { "epoch": 1.9116362597445002, "grad_norm": 1.327850580215454, "learning_rate": 1.942800275990148e-05, "loss": 0.5691, "step": 11710 }, { "epoch": 1.9117995183870047, "grad_norm": 1.3578276634216309, "learning_rate": 1.942789696975683e-05, "loss": 0.5837, "step": 11711 }, { "epoch": 1.9119627770295091, "grad_norm": 1.7404872179031372, "learning_rate": 1.942779117011829e-05, "loss": 0.8727, "step": 11712 }, { "epoch": 1.9121260356720133, "grad_norm": 1.5426862239837646, "learning_rate": 1.9427685360985963e-05, "loss": 0.6383, "step": 11713 }, { "epoch": 1.9122892943145178, "grad_norm": 1.4933158159255981, "learning_rate": 1.9427579542359966e-05, "loss": 0.5457, "step": 11714 }, { "epoch": 1.9124525529570222, "grad_norm": 1.4081796407699585, "learning_rate": 1.9427473714240393e-05, "loss": 0.615, "step": 11715 }, { "epoch": 1.9126158115995264, "grad_norm": 1.9119536876678467, "learning_rate": 1.942736787662736e-05, "loss": 0.8581, "step": 11716 }, { "epoch": 1.9127790702420309, "grad_norm": 1.7762911319732666, "learning_rate": 1.942726202952097e-05, "loss": 0.691, "step": 11717 }, { "epoch": 1.9129423288845353, "grad_norm": 2.015127182006836, "learning_rate": 1.9427156172921328e-05, "loss": 1.461, "step": 11718 }, { "epoch": 1.9131055875270397, "grad_norm": 1.500795841217041, "learning_rate": 1.9427050306828543e-05, "loss": 0.6182, "step": 11719 }, { "epoch": 1.9132688461695442, "grad_norm": 1.5139065980911255, "learning_rate": 1.942694443124272e-05, "loss": 0.5902, "step": 11720 }, { "epoch": 1.9134321048120486, "grad_norm": 1.9228928089141846, "learning_rate": 1.942683854616397e-05, "loss": 0.622, "step": 11721 }, { "epoch": 1.9135953634545528, "grad_norm": 1.8428676128387451, "learning_rate": 1.9426732651592393e-05, "loss": 0.7201, "step": 11722 }, { "epoch": 1.9137586220970573, "grad_norm": 1.5700511932373047, "learning_rate": 1.94266267475281e-05, "loss": 0.5663, "step": 11723 }, { "epoch": 1.9139218807395615, "grad_norm": 1.761622667312622, "learning_rate": 1.9426520833971194e-05, "loss": 0.8199, "step": 11724 }, { "epoch": 1.914085139382066, "grad_norm": 1.6457796096801758, "learning_rate": 1.9426414910921785e-05, "loss": 0.6472, "step": 11725 }, { "epoch": 1.9142483980245704, "grad_norm": 1.7948859930038452, "learning_rate": 1.942630897837998e-05, "loss": 0.6941, "step": 11726 }, { "epoch": 1.9144116566670748, "grad_norm": 1.5503339767456055, "learning_rate": 1.9426203036345884e-05, "loss": 0.5245, "step": 11727 }, { "epoch": 1.9145749153095792, "grad_norm": 1.5167207717895508, "learning_rate": 1.9426097084819605e-05, "loss": 0.5713, "step": 11728 }, { "epoch": 1.9147381739520837, "grad_norm": 2.0027010440826416, "learning_rate": 1.9425991123801243e-05, "loss": 0.8289, "step": 11729 }, { "epoch": 1.9149014325945881, "grad_norm": 1.962835431098938, "learning_rate": 1.942588515329092e-05, "loss": 0.7441, "step": 11730 }, { "epoch": 1.9150646912370923, "grad_norm": 1.800398349761963, "learning_rate": 1.9425779173288727e-05, "loss": 0.8642, "step": 11731 }, { "epoch": 1.9152279498795968, "grad_norm": 1.8643889427185059, "learning_rate": 1.9425673183794774e-05, "loss": 0.6075, "step": 11732 }, { "epoch": 1.915391208522101, "grad_norm": 1.6960248947143555, "learning_rate": 1.9425567184809178e-05, "loss": 0.767, "step": 11733 }, { "epoch": 1.9155544671646054, "grad_norm": 1.8338183164596558, "learning_rate": 1.9425461176332035e-05, "loss": 0.5357, "step": 11734 }, { "epoch": 1.9157177258071099, "grad_norm": 1.7097474336624146, "learning_rate": 1.9425355158363454e-05, "loss": 0.7099, "step": 11735 }, { "epoch": 1.9158809844496143, "grad_norm": 1.8905770778656006, "learning_rate": 1.9425249130903544e-05, "loss": 0.7037, "step": 11736 }, { "epoch": 1.9160442430921187, "grad_norm": 1.4940587282180786, "learning_rate": 1.942514309395241e-05, "loss": 0.654, "step": 11737 }, { "epoch": 1.9162075017346232, "grad_norm": 1.5319099426269531, "learning_rate": 1.942503704751016e-05, "loss": 0.6736, "step": 11738 }, { "epoch": 1.9163707603771276, "grad_norm": 2.107459545135498, "learning_rate": 1.9424930991576897e-05, "loss": 0.8462, "step": 11739 }, { "epoch": 1.9165340190196318, "grad_norm": 1.6465506553649902, "learning_rate": 1.9424824926152736e-05, "loss": 0.6625, "step": 11740 }, { "epoch": 1.9166972776621363, "grad_norm": 1.8361597061157227, "learning_rate": 1.9424718851237774e-05, "loss": 0.7942, "step": 11741 }, { "epoch": 1.9168605363046405, "grad_norm": 1.6612377166748047, "learning_rate": 1.9424612766832127e-05, "loss": 0.723, "step": 11742 }, { "epoch": 1.917023794947145, "grad_norm": 1.5709806680679321, "learning_rate": 1.9424506672935896e-05, "loss": 0.6978, "step": 11743 }, { "epoch": 1.9171870535896494, "grad_norm": 1.406995415687561, "learning_rate": 1.942440056954919e-05, "loss": 0.5861, "step": 11744 }, { "epoch": 1.9173503122321538, "grad_norm": 1.3830050230026245, "learning_rate": 1.9424294456672115e-05, "loss": 0.5819, "step": 11745 }, { "epoch": 1.9175135708746582, "grad_norm": 1.52486252784729, "learning_rate": 1.9424188334304778e-05, "loss": 0.5934, "step": 11746 }, { "epoch": 1.9176768295171627, "grad_norm": 1.3594576120376587, "learning_rate": 1.942408220244728e-05, "loss": 0.5656, "step": 11747 }, { "epoch": 1.9178400881596671, "grad_norm": 1.503672480583191, "learning_rate": 1.942397606109974e-05, "loss": 0.508, "step": 11748 }, { "epoch": 1.9180033468021713, "grad_norm": 1.5605601072311401, "learning_rate": 1.942386991026226e-05, "loss": 0.6336, "step": 11749 }, { "epoch": 1.9181666054446758, "grad_norm": 1.738492727279663, "learning_rate": 1.9423763749934942e-05, "loss": 0.6391, "step": 11750 }, { "epoch": 1.91832986408718, "grad_norm": 1.7043906450271606, "learning_rate": 1.9423657580117898e-05, "loss": 0.6923, "step": 11751 }, { "epoch": 1.9184931227296844, "grad_norm": 2.0062167644500732, "learning_rate": 1.942355140081123e-05, "loss": 0.8504, "step": 11752 }, { "epoch": 1.9186563813721889, "grad_norm": 1.654372215270996, "learning_rate": 1.942344521201505e-05, "loss": 0.7043, "step": 11753 }, { "epoch": 1.9188196400146933, "grad_norm": 1.4305592775344849, "learning_rate": 1.9423339013729466e-05, "loss": 0.4818, "step": 11754 }, { "epoch": 1.9189828986571977, "grad_norm": 1.3753505945205688, "learning_rate": 1.942323280595458e-05, "loss": 0.6179, "step": 11755 }, { "epoch": 1.9191461572997022, "grad_norm": 1.6097384691238403, "learning_rate": 1.9423126588690502e-05, "loss": 0.6896, "step": 11756 }, { "epoch": 1.9193094159422064, "grad_norm": 1.5493942499160767, "learning_rate": 1.9423020361937336e-05, "loss": 0.7047, "step": 11757 }, { "epoch": 1.9194726745847108, "grad_norm": 1.7837023735046387, "learning_rate": 1.942291412569519e-05, "loss": 0.8157, "step": 11758 }, { "epoch": 1.9196359332272153, "grad_norm": 1.3748022317886353, "learning_rate": 1.9422807879964178e-05, "loss": 0.5697, "step": 11759 }, { "epoch": 1.9197991918697195, "grad_norm": 1.8129335641860962, "learning_rate": 1.9422701624744396e-05, "loss": 0.8158, "step": 11760 }, { "epoch": 1.919962450512224, "grad_norm": 1.8936262130737305, "learning_rate": 1.9422595360035958e-05, "loss": 0.6479, "step": 11761 }, { "epoch": 1.9201257091547284, "grad_norm": 1.962675929069519, "learning_rate": 1.942248908583897e-05, "loss": 0.6825, "step": 11762 }, { "epoch": 1.9202889677972328, "grad_norm": 1.7639846801757812, "learning_rate": 1.9422382802153533e-05, "loss": 0.8522, "step": 11763 }, { "epoch": 1.9204522264397372, "grad_norm": 1.6288803815841675, "learning_rate": 1.9422276508979763e-05, "loss": 0.6157, "step": 11764 }, { "epoch": 1.9206154850822417, "grad_norm": 2.018666982650757, "learning_rate": 1.942217020631776e-05, "loss": 0.7896, "step": 11765 }, { "epoch": 1.9207787437247459, "grad_norm": 1.672440528869629, "learning_rate": 1.9422063894167638e-05, "loss": 0.7105, "step": 11766 }, { "epoch": 1.9209420023672503, "grad_norm": 1.580072283744812, "learning_rate": 1.94219575725295e-05, "loss": 0.5938, "step": 11767 }, { "epoch": 1.9211052610097545, "grad_norm": 1.575501799583435, "learning_rate": 1.9421851241403452e-05, "loss": 0.6529, "step": 11768 }, { "epoch": 1.921268519652259, "grad_norm": 1.7104986906051636, "learning_rate": 1.94217449007896e-05, "loss": 0.6936, "step": 11769 }, { "epoch": 1.9214317782947634, "grad_norm": 1.6789088249206543, "learning_rate": 1.9421638550688057e-05, "loss": 0.6829, "step": 11770 }, { "epoch": 1.9215950369372679, "grad_norm": 1.5894490480422974, "learning_rate": 1.942153219109892e-05, "loss": 0.6955, "step": 11771 }, { "epoch": 1.9217582955797723, "grad_norm": 1.3835574388504028, "learning_rate": 1.942142582202231e-05, "loss": 0.5821, "step": 11772 }, { "epoch": 1.9219215542222767, "grad_norm": 1.5929983854293823, "learning_rate": 1.9421319443458325e-05, "loss": 0.6847, "step": 11773 }, { "epoch": 1.9220848128647812, "grad_norm": 1.5981950759887695, "learning_rate": 1.942121305540707e-05, "loss": 0.5845, "step": 11774 }, { "epoch": 1.9222480715072854, "grad_norm": 1.4469727277755737, "learning_rate": 1.942110665786866e-05, "loss": 0.661, "step": 11775 }, { "epoch": 1.9224113301497898, "grad_norm": 1.5446134805679321, "learning_rate": 1.9421000250843196e-05, "loss": 0.6548, "step": 11776 }, { "epoch": 1.922574588792294, "grad_norm": 1.4657834768295288, "learning_rate": 1.942089383433079e-05, "loss": 0.564, "step": 11777 }, { "epoch": 1.9227378474347985, "grad_norm": 1.421641230583191, "learning_rate": 1.9420787408331544e-05, "loss": 0.5926, "step": 11778 }, { "epoch": 1.922901106077303, "grad_norm": 1.7069646120071411, "learning_rate": 1.942068097284557e-05, "loss": 0.7318, "step": 11779 }, { "epoch": 1.9230643647198074, "grad_norm": 1.5186021327972412, "learning_rate": 1.942057452787297e-05, "loss": 0.6431, "step": 11780 }, { "epoch": 1.9232276233623118, "grad_norm": 1.4159257411956787, "learning_rate": 1.9420468073413854e-05, "loss": 0.5645, "step": 11781 }, { "epoch": 1.9233908820048162, "grad_norm": 1.5883588790893555, "learning_rate": 1.9420361609468332e-05, "loss": 0.6889, "step": 11782 }, { "epoch": 1.9235541406473207, "grad_norm": 1.6045573949813843, "learning_rate": 1.9420255136036505e-05, "loss": 0.6279, "step": 11783 }, { "epoch": 1.9237173992898249, "grad_norm": 1.5444530248641968, "learning_rate": 1.9420148653118485e-05, "loss": 0.6642, "step": 11784 }, { "epoch": 1.9238806579323293, "grad_norm": 1.7348703145980835, "learning_rate": 1.9420042160714377e-05, "loss": 0.6852, "step": 11785 }, { "epoch": 1.9240439165748335, "grad_norm": 2.358764886856079, "learning_rate": 1.941993565882429e-05, "loss": 0.7529, "step": 11786 }, { "epoch": 1.924207175217338, "grad_norm": 1.7129441499710083, "learning_rate": 1.9419829147448327e-05, "loss": 0.7849, "step": 11787 }, { "epoch": 1.9243704338598424, "grad_norm": 1.803831696510315, "learning_rate": 1.9419722626586605e-05, "loss": 0.7874, "step": 11788 }, { "epoch": 1.9245336925023468, "grad_norm": 1.5189048051834106, "learning_rate": 1.941961609623922e-05, "loss": 0.5925, "step": 11789 }, { "epoch": 1.9246969511448513, "grad_norm": 1.806084394454956, "learning_rate": 1.9419509556406287e-05, "loss": 0.7054, "step": 11790 }, { "epoch": 1.9248602097873557, "grad_norm": 2.151031732559204, "learning_rate": 1.9419403007087908e-05, "loss": 0.7264, "step": 11791 }, { "epoch": 1.9250234684298602, "grad_norm": 1.8768466711044312, "learning_rate": 1.9419296448284193e-05, "loss": 0.7594, "step": 11792 }, { "epoch": 1.9251867270723644, "grad_norm": 1.6631461381912231, "learning_rate": 1.941918987999525e-05, "loss": 0.7077, "step": 11793 }, { "epoch": 1.9253499857148688, "grad_norm": 2.1028552055358887, "learning_rate": 1.9419083302221185e-05, "loss": 0.8045, "step": 11794 }, { "epoch": 1.925513244357373, "grad_norm": 1.4776742458343506, "learning_rate": 1.9418976714962106e-05, "loss": 0.591, "step": 11795 }, { "epoch": 1.9256765029998775, "grad_norm": 1.4929262399673462, "learning_rate": 1.9418870118218118e-05, "loss": 0.6179, "step": 11796 }, { "epoch": 1.925839761642382, "grad_norm": 1.5088690519332886, "learning_rate": 1.9418763511989333e-05, "loss": 0.6251, "step": 11797 }, { "epoch": 1.9260030202848863, "grad_norm": 1.5284916162490845, "learning_rate": 1.9418656896275855e-05, "loss": 0.5944, "step": 11798 }, { "epoch": 1.9261662789273908, "grad_norm": 1.7609362602233887, "learning_rate": 1.941855027107779e-05, "loss": 0.7023, "step": 11799 }, { "epoch": 1.9263295375698952, "grad_norm": 1.5750141143798828, "learning_rate": 1.941844363639525e-05, "loss": 0.6773, "step": 11800 }, { "epoch": 1.9264927962123994, "grad_norm": 1.6417129039764404, "learning_rate": 1.941833699222834e-05, "loss": 0.7012, "step": 11801 }, { "epoch": 1.9266560548549039, "grad_norm": 1.643852710723877, "learning_rate": 1.9418230338577164e-05, "loss": 0.6879, "step": 11802 }, { "epoch": 1.9268193134974083, "grad_norm": 1.7371487617492676, "learning_rate": 1.9418123675441832e-05, "loss": 0.7445, "step": 11803 }, { "epoch": 1.9269825721399125, "grad_norm": 1.7611974477767944, "learning_rate": 1.9418017002822455e-05, "loss": 0.6205, "step": 11804 }, { "epoch": 1.927145830782417, "grad_norm": 1.5743842124938965, "learning_rate": 1.9417910320719135e-05, "loss": 0.6265, "step": 11805 }, { "epoch": 1.9273090894249214, "grad_norm": 1.7411830425262451, "learning_rate": 1.9417803629131984e-05, "loss": 0.6682, "step": 11806 }, { "epoch": 1.9274723480674258, "grad_norm": 1.7101434469223022, "learning_rate": 1.9417696928061106e-05, "loss": 0.6739, "step": 11807 }, { "epoch": 1.9276356067099303, "grad_norm": 1.4828531742095947, "learning_rate": 1.9417590217506614e-05, "loss": 0.6076, "step": 11808 }, { "epoch": 1.9277988653524347, "grad_norm": 1.9070714712142944, "learning_rate": 1.9417483497468605e-05, "loss": 0.7921, "step": 11809 }, { "epoch": 1.927962123994939, "grad_norm": 1.7900432348251343, "learning_rate": 1.9417376767947194e-05, "loss": 0.7557, "step": 11810 }, { "epoch": 1.9281253826374434, "grad_norm": 1.9095922708511353, "learning_rate": 1.9417270028942488e-05, "loss": 0.795, "step": 11811 }, { "epoch": 1.9282886412799478, "grad_norm": 1.9155068397521973, "learning_rate": 1.9417163280454594e-05, "loss": 0.712, "step": 11812 }, { "epoch": 1.928451899922452, "grad_norm": 1.8026355504989624, "learning_rate": 1.941705652248362e-05, "loss": 0.7098, "step": 11813 }, { "epoch": 1.9286151585649565, "grad_norm": 1.7470694780349731, "learning_rate": 1.941694975502967e-05, "loss": 0.6595, "step": 11814 }, { "epoch": 1.928778417207461, "grad_norm": 1.728690266609192, "learning_rate": 1.9416842978092856e-05, "loss": 0.6592, "step": 11815 }, { "epoch": 1.9289416758499653, "grad_norm": 1.8231247663497925, "learning_rate": 1.9416736191673285e-05, "loss": 0.6629, "step": 11816 }, { "epoch": 1.9291049344924698, "grad_norm": 1.4220443964004517, "learning_rate": 1.9416629395771058e-05, "loss": 0.5997, "step": 11817 }, { "epoch": 1.9292681931349742, "grad_norm": 1.7691798210144043, "learning_rate": 1.9416522590386294e-05, "loss": 0.7333, "step": 11818 }, { "epoch": 1.9294314517774784, "grad_norm": 1.833841323852539, "learning_rate": 1.9416415775519087e-05, "loss": 0.7356, "step": 11819 }, { "epoch": 1.9295947104199829, "grad_norm": 1.625185489654541, "learning_rate": 1.9416308951169557e-05, "loss": 0.6786, "step": 11820 }, { "epoch": 1.929757969062487, "grad_norm": 1.5889050960540771, "learning_rate": 1.9416202117337805e-05, "loss": 0.7031, "step": 11821 }, { "epoch": 1.9299212277049915, "grad_norm": 1.4354424476623535, "learning_rate": 1.9416095274023942e-05, "loss": 0.5267, "step": 11822 }, { "epoch": 1.930084486347496, "grad_norm": 1.6790525913238525, "learning_rate": 1.941598842122807e-05, "loss": 0.6366, "step": 11823 }, { "epoch": 1.9302477449900004, "grad_norm": 1.3715513944625854, "learning_rate": 1.9415881558950302e-05, "loss": 0.5502, "step": 11824 }, { "epoch": 1.9304110036325048, "grad_norm": 1.9907474517822266, "learning_rate": 1.9415774687190746e-05, "loss": 0.7156, "step": 11825 }, { "epoch": 1.9305742622750093, "grad_norm": 1.5361100435256958, "learning_rate": 1.9415667805949506e-05, "loss": 0.6144, "step": 11826 }, { "epoch": 1.9307375209175137, "grad_norm": 1.6464711427688599, "learning_rate": 1.941556091522669e-05, "loss": 0.6942, "step": 11827 }, { "epoch": 1.930900779560018, "grad_norm": 1.4327415227890015, "learning_rate": 1.9415454015022405e-05, "loss": 0.5497, "step": 11828 }, { "epoch": 1.9310640382025224, "grad_norm": 1.3958780765533447, "learning_rate": 1.9415347105336762e-05, "loss": 0.5283, "step": 11829 }, { "epoch": 1.9312272968450266, "grad_norm": 1.3692985773086548, "learning_rate": 1.9415240186169866e-05, "loss": 0.5165, "step": 11830 }, { "epoch": 1.931390555487531, "grad_norm": 1.6751124858856201, "learning_rate": 1.9415133257521828e-05, "loss": 0.6799, "step": 11831 }, { "epoch": 1.9315538141300355, "grad_norm": 1.6611381769180298, "learning_rate": 1.941502631939275e-05, "loss": 0.6025, "step": 11832 }, { "epoch": 1.93171707277254, "grad_norm": 1.6147339344024658, "learning_rate": 1.941491937178275e-05, "loss": 0.5768, "step": 11833 }, { "epoch": 1.9318803314150443, "grad_norm": 1.6821485757827759, "learning_rate": 1.941481241469192e-05, "loss": 0.682, "step": 11834 }, { "epoch": 1.9320435900575488, "grad_norm": 1.6029694080352783, "learning_rate": 1.941470544812038e-05, "loss": 0.6723, "step": 11835 }, { "epoch": 1.9322068487000532, "grad_norm": 1.5421282052993774, "learning_rate": 1.9414598472068236e-05, "loss": 0.5409, "step": 11836 }, { "epoch": 1.9323701073425574, "grad_norm": 1.5731903314590454, "learning_rate": 1.941449148653559e-05, "loss": 0.5826, "step": 11837 }, { "epoch": 1.9325333659850619, "grad_norm": 1.4583938121795654, "learning_rate": 1.9414384491522558e-05, "loss": 0.6523, "step": 11838 }, { "epoch": 1.932696624627566, "grad_norm": 1.7036464214324951, "learning_rate": 1.941427748702924e-05, "loss": 0.7176, "step": 11839 }, { "epoch": 1.9328598832700705, "grad_norm": 2.0888636112213135, "learning_rate": 1.9414170473055747e-05, "loss": 0.7365, "step": 11840 }, { "epoch": 1.933023141912575, "grad_norm": 1.5556607246398926, "learning_rate": 1.9414063449602188e-05, "loss": 0.5836, "step": 11841 }, { "epoch": 1.9331864005550794, "grad_norm": 1.3760249614715576, "learning_rate": 1.941395641666867e-05, "loss": 0.5117, "step": 11842 }, { "epoch": 1.9333496591975838, "grad_norm": 1.9275193214416504, "learning_rate": 1.9413849374255302e-05, "loss": 0.6407, "step": 11843 }, { "epoch": 1.9335129178400883, "grad_norm": 1.7549632787704468, "learning_rate": 1.9413742322362185e-05, "loss": 0.5962, "step": 11844 }, { "epoch": 1.9336761764825927, "grad_norm": 1.7436634302139282, "learning_rate": 1.9413635260989437e-05, "loss": 0.6884, "step": 11845 }, { "epoch": 1.933839435125097, "grad_norm": 1.8717013597488403, "learning_rate": 1.9413528190137158e-05, "loss": 0.701, "step": 11846 }, { "epoch": 1.9340026937676014, "grad_norm": 1.5096312761306763, "learning_rate": 1.941342110980546e-05, "loss": 0.6416, "step": 11847 }, { "epoch": 1.9341659524101056, "grad_norm": 1.8403706550598145, "learning_rate": 1.941331401999445e-05, "loss": 0.7761, "step": 11848 }, { "epoch": 1.93432921105261, "grad_norm": 1.6784875392913818, "learning_rate": 1.941320692070423e-05, "loss": 0.6734, "step": 11849 }, { "epoch": 1.9344924696951145, "grad_norm": 1.7998906373977661, "learning_rate": 1.941309981193492e-05, "loss": 0.7674, "step": 11850 }, { "epoch": 1.934655728337619, "grad_norm": 1.6041111946105957, "learning_rate": 1.9412992693686618e-05, "loss": 0.5633, "step": 11851 }, { "epoch": 1.9348189869801233, "grad_norm": 1.6012353897094727, "learning_rate": 1.9412885565959434e-05, "loss": 0.6075, "step": 11852 }, { "epoch": 1.9349822456226278, "grad_norm": 1.3093972206115723, "learning_rate": 1.941277842875348e-05, "loss": 0.5503, "step": 11853 }, { "epoch": 1.935145504265132, "grad_norm": 2.0107674598693848, "learning_rate": 1.9412671282068855e-05, "loss": 0.6156, "step": 11854 }, { "epoch": 1.9353087629076364, "grad_norm": 1.5864704847335815, "learning_rate": 1.9412564125905677e-05, "loss": 0.6223, "step": 11855 }, { "epoch": 1.9354720215501409, "grad_norm": 1.8109952211380005, "learning_rate": 1.941245696026405e-05, "loss": 0.7665, "step": 11856 }, { "epoch": 1.935635280192645, "grad_norm": 1.72359037399292, "learning_rate": 1.9412349785144076e-05, "loss": 0.5815, "step": 11857 }, { "epoch": 1.9357985388351495, "grad_norm": 1.60701322555542, "learning_rate": 1.9412242600545874e-05, "loss": 0.6508, "step": 11858 }, { "epoch": 1.935961797477654, "grad_norm": 1.7096107006072998, "learning_rate": 1.941213540646954e-05, "loss": 0.7131, "step": 11859 }, { "epoch": 1.9361250561201584, "grad_norm": 2.1457674503326416, "learning_rate": 1.94120282029152e-05, "loss": 0.8556, "step": 11860 }, { "epoch": 1.9362883147626628, "grad_norm": 1.6849135160446167, "learning_rate": 1.941192098988294e-05, "loss": 0.6628, "step": 11861 }, { "epoch": 1.9364515734051673, "grad_norm": 1.505239725112915, "learning_rate": 1.941181376737288e-05, "loss": 0.6133, "step": 11862 }, { "epoch": 1.9366148320476715, "grad_norm": 1.8007752895355225, "learning_rate": 1.9411706535385127e-05, "loss": 0.7739, "step": 11863 }, { "epoch": 1.936778090690176, "grad_norm": 1.5837368965148926, "learning_rate": 1.9411599293919785e-05, "loss": 0.5988, "step": 11864 }, { "epoch": 1.9369413493326801, "grad_norm": 1.5620118379592896, "learning_rate": 1.9411492042976968e-05, "loss": 0.5642, "step": 11865 }, { "epoch": 1.9371046079751846, "grad_norm": 1.2763797044754028, "learning_rate": 1.941138478255678e-05, "loss": 0.5131, "step": 11866 }, { "epoch": 1.937267866617689, "grad_norm": 1.4895514249801636, "learning_rate": 1.941127751265933e-05, "loss": 0.5883, "step": 11867 }, { "epoch": 1.9374311252601935, "grad_norm": 1.436539888381958, "learning_rate": 1.9411170233284728e-05, "loss": 0.641, "step": 11868 }, { "epoch": 1.937594383902698, "grad_norm": 1.6441245079040527, "learning_rate": 1.941106294443308e-05, "loss": 0.666, "step": 11869 }, { "epoch": 1.9377576425452023, "grad_norm": 1.547269344329834, "learning_rate": 1.9410955646104492e-05, "loss": 0.6635, "step": 11870 }, { "epoch": 1.9379209011877068, "grad_norm": 1.745683193206787, "learning_rate": 1.9410848338299076e-05, "loss": 0.6699, "step": 11871 }, { "epoch": 1.938084159830211, "grad_norm": 1.8507052659988403, "learning_rate": 1.9410741021016936e-05, "loss": 0.8055, "step": 11872 }, { "epoch": 1.9382474184727154, "grad_norm": 1.5772100687026978, "learning_rate": 1.9410633694258182e-05, "loss": 0.6043, "step": 11873 }, { "epoch": 1.9384106771152196, "grad_norm": 1.426726222038269, "learning_rate": 1.9410526358022925e-05, "loss": 0.5194, "step": 11874 }, { "epoch": 1.938573935757724, "grad_norm": 1.352931261062622, "learning_rate": 1.941041901231127e-05, "loss": 0.5508, "step": 11875 }, { "epoch": 1.9387371944002285, "grad_norm": 1.5917284488677979, "learning_rate": 1.9410311657123325e-05, "loss": 0.6306, "step": 11876 }, { "epoch": 1.938900453042733, "grad_norm": 1.7282652854919434, "learning_rate": 1.94102042924592e-05, "loss": 0.6895, "step": 11877 }, { "epoch": 1.9390637116852374, "grad_norm": 1.617829442024231, "learning_rate": 1.9410096918318998e-05, "loss": 0.6538, "step": 11878 }, { "epoch": 1.9392269703277418, "grad_norm": 1.391880989074707, "learning_rate": 1.9409989534702835e-05, "loss": 0.6143, "step": 11879 }, { "epoch": 1.9393902289702463, "grad_norm": 1.6176685094833374, "learning_rate": 1.9409882141610813e-05, "loss": 0.6173, "step": 11880 }, { "epoch": 1.9395534876127505, "grad_norm": 1.3883602619171143, "learning_rate": 1.940977473904304e-05, "loss": 0.5834, "step": 11881 }, { "epoch": 1.939716746255255, "grad_norm": 1.914481520652771, "learning_rate": 1.9409667326999632e-05, "loss": 0.6893, "step": 11882 }, { "epoch": 1.9398800048977591, "grad_norm": 1.6219054460525513, "learning_rate": 1.9409559905480683e-05, "loss": 0.6163, "step": 11883 }, { "epoch": 1.9400432635402636, "grad_norm": 1.702161192893982, "learning_rate": 1.9409452474486315e-05, "loss": 0.6481, "step": 11884 }, { "epoch": 1.940206522182768, "grad_norm": 1.6728965044021606, "learning_rate": 1.9409345034016634e-05, "loss": 0.6089, "step": 11885 }, { "epoch": 1.9403697808252724, "grad_norm": 1.6810117959976196, "learning_rate": 1.940923758407174e-05, "loss": 0.7011, "step": 11886 }, { "epoch": 1.9405330394677769, "grad_norm": 1.6566284894943237, "learning_rate": 1.940913012465175e-05, "loss": 0.7046, "step": 11887 }, { "epoch": 1.9406962981102813, "grad_norm": 1.9351093769073486, "learning_rate": 1.9409022655756767e-05, "loss": 0.6988, "step": 11888 }, { "epoch": 1.9408595567527858, "grad_norm": 1.8713605403900146, "learning_rate": 1.94089151773869e-05, "loss": 0.7677, "step": 11889 }, { "epoch": 1.94102281539529, "grad_norm": 1.6238309144973755, "learning_rate": 1.9408807689542257e-05, "loss": 0.5783, "step": 11890 }, { "epoch": 1.9411860740377944, "grad_norm": 1.6282639503479004, "learning_rate": 1.940870019222295e-05, "loss": 0.7235, "step": 11891 }, { "epoch": 1.9413493326802986, "grad_norm": 1.4680988788604736, "learning_rate": 1.940859268542908e-05, "loss": 0.6437, "step": 11892 }, { "epoch": 1.941512591322803, "grad_norm": 1.4780998229980469, "learning_rate": 1.940848516916076e-05, "loss": 0.5537, "step": 11893 }, { "epoch": 1.9416758499653075, "grad_norm": 1.5085655450820923, "learning_rate": 1.94083776434181e-05, "loss": 0.5834, "step": 11894 }, { "epoch": 1.941839108607812, "grad_norm": 1.606579065322876, "learning_rate": 1.940827010820121e-05, "loss": 0.5493, "step": 11895 }, { "epoch": 1.9420023672503164, "grad_norm": 1.626221776008606, "learning_rate": 1.940816256351019e-05, "loss": 0.6152, "step": 11896 }, { "epoch": 1.9421656258928208, "grad_norm": 1.639137625694275, "learning_rate": 1.9408055009345154e-05, "loss": 0.7171, "step": 11897 }, { "epoch": 1.942328884535325, "grad_norm": 1.8815704584121704, "learning_rate": 1.940794744570621e-05, "loss": 0.6404, "step": 11898 }, { "epoch": 1.9424921431778295, "grad_norm": 1.4051271677017212, "learning_rate": 1.940783987259346e-05, "loss": 0.669, "step": 11899 }, { "epoch": 1.942655401820334, "grad_norm": 1.8181684017181396, "learning_rate": 1.9407732290007023e-05, "loss": 0.8261, "step": 11900 }, { "epoch": 1.9428186604628381, "grad_norm": 1.7623094320297241, "learning_rate": 1.9407624697947003e-05, "loss": 0.6683, "step": 11901 }, { "epoch": 1.9429819191053426, "grad_norm": 1.4922568798065186, "learning_rate": 1.9407517096413505e-05, "loss": 0.5939, "step": 11902 }, { "epoch": 1.943145177747847, "grad_norm": 1.7313512563705444, "learning_rate": 1.9407409485406638e-05, "loss": 0.7464, "step": 11903 }, { "epoch": 1.9433084363903514, "grad_norm": 1.9489524364471436, "learning_rate": 1.9407301864926514e-05, "loss": 0.77, "step": 11904 }, { "epoch": 1.9434716950328559, "grad_norm": 1.9707510471343994, "learning_rate": 1.940719423497324e-05, "loss": 0.6625, "step": 11905 }, { "epoch": 1.9436349536753603, "grad_norm": 1.5538692474365234, "learning_rate": 1.9407086595546928e-05, "loss": 0.6655, "step": 11906 }, { "epoch": 1.9437982123178645, "grad_norm": 1.7071088552474976, "learning_rate": 1.9406978946647676e-05, "loss": 0.6007, "step": 11907 }, { "epoch": 1.943961470960369, "grad_norm": 1.5549540519714355, "learning_rate": 1.94068712882756e-05, "loss": 0.525, "step": 11908 }, { "epoch": 1.9441247296028732, "grad_norm": 1.5966929197311401, "learning_rate": 1.9406763620430808e-05, "loss": 0.5729, "step": 11909 }, { "epoch": 1.9442879882453776, "grad_norm": 1.6473256349563599, "learning_rate": 1.9406655943113408e-05, "loss": 0.7152, "step": 11910 }, { "epoch": 1.944451246887882, "grad_norm": 1.61025071144104, "learning_rate": 1.940654825632351e-05, "loss": 0.5859, "step": 11911 }, { "epoch": 1.9446145055303865, "grad_norm": 1.5468478202819824, "learning_rate": 1.9406440560061214e-05, "loss": 0.6572, "step": 11912 }, { "epoch": 1.944777764172891, "grad_norm": 1.7527316808700562, "learning_rate": 1.9406332854326638e-05, "loss": 0.8893, "step": 11913 }, { "epoch": 1.9449410228153954, "grad_norm": 1.5590773820877075, "learning_rate": 1.940622513911989e-05, "loss": 0.6309, "step": 11914 }, { "epoch": 1.9451042814578998, "grad_norm": 1.507565975189209, "learning_rate": 1.940611741444107e-05, "loss": 0.612, "step": 11915 }, { "epoch": 1.945267540100404, "grad_norm": 1.5965625047683716, "learning_rate": 1.9406009680290298e-05, "loss": 0.6378, "step": 11916 }, { "epoch": 1.9454307987429085, "grad_norm": 1.2325236797332764, "learning_rate": 1.9405901936667677e-05, "loss": 0.5645, "step": 11917 }, { "epoch": 1.9455940573854127, "grad_norm": 1.8313679695129395, "learning_rate": 1.9405794183573314e-05, "loss": 0.654, "step": 11918 }, { "epoch": 1.9457573160279171, "grad_norm": 1.9758769273757935, "learning_rate": 1.9405686421007316e-05, "loss": 0.8476, "step": 11919 }, { "epoch": 1.9459205746704216, "grad_norm": 1.7152938842773438, "learning_rate": 1.9405578648969796e-05, "loss": 0.6631, "step": 11920 }, { "epoch": 1.946083833312926, "grad_norm": 1.5823169946670532, "learning_rate": 1.940547086746086e-05, "loss": 0.6815, "step": 11921 }, { "epoch": 1.9462470919554304, "grad_norm": 1.6402860879898071, "learning_rate": 1.940536307648062e-05, "loss": 0.6395, "step": 11922 }, { "epoch": 1.9464103505979349, "grad_norm": 2.1109843254089355, "learning_rate": 1.940525527602918e-05, "loss": 0.8161, "step": 11923 }, { "epoch": 1.9465736092404393, "grad_norm": 1.7880148887634277, "learning_rate": 1.940514746610665e-05, "loss": 0.7565, "step": 11924 }, { "epoch": 1.9467368678829435, "grad_norm": 1.4932868480682373, "learning_rate": 1.9405039646713136e-05, "loss": 0.548, "step": 11925 }, { "epoch": 1.946900126525448, "grad_norm": 1.547722578048706, "learning_rate": 1.9404931817848755e-05, "loss": 0.6436, "step": 11926 }, { "epoch": 1.9470633851679522, "grad_norm": 1.4842886924743652, "learning_rate": 1.940482397951361e-05, "loss": 0.7105, "step": 11927 }, { "epoch": 1.9472266438104566, "grad_norm": 1.9733026027679443, "learning_rate": 1.9404716131707807e-05, "loss": 0.816, "step": 11928 }, { "epoch": 1.947389902452961, "grad_norm": 1.324195384979248, "learning_rate": 1.940460827443146e-05, "loss": 0.5185, "step": 11929 }, { "epoch": 1.9475531610954655, "grad_norm": 1.5936919450759888, "learning_rate": 1.9404500407684673e-05, "loss": 0.6647, "step": 11930 }, { "epoch": 1.94771641973797, "grad_norm": 1.255367398262024, "learning_rate": 1.9404392531467555e-05, "loss": 0.5246, "step": 11931 }, { "epoch": 1.9478796783804744, "grad_norm": 1.396157145500183, "learning_rate": 1.940428464578022e-05, "loss": 0.5967, "step": 11932 }, { "epoch": 1.9480429370229788, "grad_norm": 1.4370572566986084, "learning_rate": 1.940417675062277e-05, "loss": 0.5791, "step": 11933 }, { "epoch": 1.948206195665483, "grad_norm": 1.703366756439209, "learning_rate": 1.9404068845995317e-05, "loss": 0.7141, "step": 11934 }, { "epoch": 1.9483694543079875, "grad_norm": 1.7182344198226929, "learning_rate": 1.940396093189797e-05, "loss": 0.6485, "step": 11935 }, { "epoch": 1.9485327129504917, "grad_norm": 1.6210448741912842, "learning_rate": 1.9403853008330834e-05, "loss": 0.7121, "step": 11936 }, { "epoch": 1.9486959715929961, "grad_norm": 1.9727495908737183, "learning_rate": 1.9403745075294024e-05, "loss": 0.6171, "step": 11937 }, { "epoch": 1.9488592302355006, "grad_norm": 1.6028192043304443, "learning_rate": 1.9403637132787644e-05, "loss": 0.6185, "step": 11938 }, { "epoch": 1.949022488878005, "grad_norm": 1.730743646621704, "learning_rate": 1.9403529180811804e-05, "loss": 0.6883, "step": 11939 }, { "epoch": 1.9491857475205094, "grad_norm": 1.8238142728805542, "learning_rate": 1.940342121936661e-05, "loss": 0.67, "step": 11940 }, { "epoch": 1.9493490061630139, "grad_norm": 1.81600022315979, "learning_rate": 1.9403313248452182e-05, "loss": 0.6244, "step": 11941 }, { "epoch": 1.949512264805518, "grad_norm": 1.6514146327972412, "learning_rate": 1.9403205268068612e-05, "loss": 0.6431, "step": 11942 }, { "epoch": 1.9496755234480225, "grad_norm": 1.6588079929351807, "learning_rate": 1.9403097278216017e-05, "loss": 0.7106, "step": 11943 }, { "epoch": 1.949838782090527, "grad_norm": 1.656566858291626, "learning_rate": 1.940298927889451e-05, "loss": 0.6997, "step": 11944 }, { "epoch": 1.9500020407330312, "grad_norm": 1.9318604469299316, "learning_rate": 1.940288127010419e-05, "loss": 0.5375, "step": 11945 }, { "epoch": 1.9501652993755356, "grad_norm": 1.740936040878296, "learning_rate": 1.9402773251845175e-05, "loss": 0.6618, "step": 11946 }, { "epoch": 1.95032855801804, "grad_norm": 1.7183349132537842, "learning_rate": 1.940266522411757e-05, "loss": 0.7356, "step": 11947 }, { "epoch": 1.9504918166605445, "grad_norm": 1.8850504159927368, "learning_rate": 1.940255718692148e-05, "loss": 0.7671, "step": 11948 }, { "epoch": 1.950655075303049, "grad_norm": 1.2519774436950684, "learning_rate": 1.9402449140257018e-05, "loss": 0.4942, "step": 11949 }, { "epoch": 1.9508183339455534, "grad_norm": 1.7590252161026, "learning_rate": 1.9402341084124298e-05, "loss": 0.6554, "step": 11950 }, { "epoch": 1.9509815925880576, "grad_norm": 1.5754247903823853, "learning_rate": 1.9402233018523417e-05, "loss": 0.612, "step": 11951 }, { "epoch": 1.951144851230562, "grad_norm": 1.5095124244689941, "learning_rate": 1.940212494345449e-05, "loss": 0.66, "step": 11952 }, { "epoch": 1.9513081098730662, "grad_norm": 1.978344202041626, "learning_rate": 1.9402016858917628e-05, "loss": 0.7207, "step": 11953 }, { "epoch": 1.9514713685155707, "grad_norm": 2.0964579582214355, "learning_rate": 1.9401908764912937e-05, "loss": 0.7026, "step": 11954 }, { "epoch": 1.9516346271580751, "grad_norm": 1.8792282342910767, "learning_rate": 1.9401800661440525e-05, "loss": 0.802, "step": 11955 }, { "epoch": 1.9517978858005796, "grad_norm": 1.7344902753829956, "learning_rate": 1.9401692548500504e-05, "loss": 0.6642, "step": 11956 }, { "epoch": 1.951961144443084, "grad_norm": 1.9206347465515137, "learning_rate": 1.9401584426092982e-05, "loss": 0.8014, "step": 11957 }, { "epoch": 1.9521244030855884, "grad_norm": 1.6475178003311157, "learning_rate": 1.9401476294218065e-05, "loss": 0.6549, "step": 11958 }, { "epoch": 1.9522876617280929, "grad_norm": 1.8357062339782715, "learning_rate": 1.9401368152875863e-05, "loss": 0.7624, "step": 11959 }, { "epoch": 1.952450920370597, "grad_norm": 1.6990190744400024, "learning_rate": 1.940126000206649e-05, "loss": 0.6021, "step": 11960 }, { "epoch": 1.9526141790131015, "grad_norm": 1.7167134284973145, "learning_rate": 1.9401151841790046e-05, "loss": 0.7366, "step": 11961 }, { "epoch": 1.9527774376556057, "grad_norm": 1.4468706846237183, "learning_rate": 1.9401043672046644e-05, "loss": 0.6386, "step": 11962 }, { "epoch": 1.9529406962981102, "grad_norm": 1.8635231256484985, "learning_rate": 1.9400935492836398e-05, "loss": 0.6615, "step": 11963 }, { "epoch": 1.9531039549406146, "grad_norm": 1.3481049537658691, "learning_rate": 1.940082730415941e-05, "loss": 0.5214, "step": 11964 }, { "epoch": 1.953267213583119, "grad_norm": 1.5708295106887817, "learning_rate": 1.9400719106015793e-05, "loss": 0.7488, "step": 11965 }, { "epoch": 1.9534304722256235, "grad_norm": 1.6124669313430786, "learning_rate": 1.940061089840565e-05, "loss": 0.6604, "step": 11966 }, { "epoch": 1.953593730868128, "grad_norm": 1.6876554489135742, "learning_rate": 1.94005026813291e-05, "loss": 0.6591, "step": 11967 }, { "epoch": 1.9537569895106324, "grad_norm": 1.6444077491760254, "learning_rate": 1.940039445478624e-05, "loss": 0.6898, "step": 11968 }, { "epoch": 1.9539202481531366, "grad_norm": 1.8075302839279175, "learning_rate": 1.9400286218777192e-05, "loss": 0.7073, "step": 11969 }, { "epoch": 1.954083506795641, "grad_norm": 1.7064119577407837, "learning_rate": 1.9400177973302054e-05, "loss": 0.6558, "step": 11970 }, { "epoch": 1.9542467654381452, "grad_norm": 1.7711230516433716, "learning_rate": 1.940006971836094e-05, "loss": 0.7402, "step": 11971 }, { "epoch": 1.9544100240806497, "grad_norm": 1.6719141006469727, "learning_rate": 1.939996145395396e-05, "loss": 0.6116, "step": 11972 }, { "epoch": 1.9545732827231541, "grad_norm": 1.4994275569915771, "learning_rate": 1.939985318008122e-05, "loss": 0.5857, "step": 11973 }, { "epoch": 1.9547365413656586, "grad_norm": 1.4007822275161743, "learning_rate": 1.9399744896742828e-05, "loss": 0.548, "step": 11974 }, { "epoch": 1.954899800008163, "grad_norm": 1.6796579360961914, "learning_rate": 1.93996366039389e-05, "loss": 0.6458, "step": 11975 }, { "epoch": 1.9550630586506674, "grad_norm": 1.861620545387268, "learning_rate": 1.939952830166954e-05, "loss": 0.5731, "step": 11976 }, { "epoch": 1.9552263172931719, "grad_norm": 1.6025257110595703, "learning_rate": 1.9399419989934857e-05, "loss": 0.7297, "step": 11977 }, { "epoch": 1.955389575935676, "grad_norm": 1.6865417957305908, "learning_rate": 1.9399311668734957e-05, "loss": 0.6633, "step": 11978 }, { "epoch": 1.9555528345781805, "grad_norm": 1.510353922843933, "learning_rate": 1.9399203338069955e-05, "loss": 0.5982, "step": 11979 }, { "epoch": 1.9557160932206847, "grad_norm": 1.5171585083007812, "learning_rate": 1.9399094997939957e-05, "loss": 0.5717, "step": 11980 }, { "epoch": 1.9558793518631892, "grad_norm": 1.6111290454864502, "learning_rate": 1.9398986648345076e-05, "loss": 0.6715, "step": 11981 }, { "epoch": 1.9560426105056936, "grad_norm": 1.4529098272323608, "learning_rate": 1.9398878289285412e-05, "loss": 0.5377, "step": 11982 }, { "epoch": 1.956205869148198, "grad_norm": 1.8047800064086914, "learning_rate": 1.9398769920761083e-05, "loss": 0.5593, "step": 11983 }, { "epoch": 1.9563691277907025, "grad_norm": 1.4388797283172607, "learning_rate": 1.93986615427722e-05, "loss": 0.5896, "step": 11984 }, { "epoch": 1.956532386433207, "grad_norm": 1.6703429222106934, "learning_rate": 1.939855315531886e-05, "loss": 0.6144, "step": 11985 }, { "epoch": 1.9566956450757111, "grad_norm": 1.8608746528625488, "learning_rate": 1.9398444758401182e-05, "loss": 0.7315, "step": 11986 }, { "epoch": 1.9568589037182156, "grad_norm": 1.593725323677063, "learning_rate": 1.9398336352019275e-05, "loss": 0.6422, "step": 11987 }, { "epoch": 1.95702216236072, "grad_norm": 1.5424983501434326, "learning_rate": 1.9398227936173245e-05, "loss": 0.6332, "step": 11988 }, { "epoch": 1.9571854210032242, "grad_norm": 1.6589657068252563, "learning_rate": 1.9398119510863197e-05, "loss": 0.5421, "step": 11989 }, { "epoch": 1.9573486796457287, "grad_norm": 1.4812122583389282, "learning_rate": 1.939801107608925e-05, "loss": 0.691, "step": 11990 }, { "epoch": 1.957511938288233, "grad_norm": 1.9188082218170166, "learning_rate": 1.9397902631851506e-05, "loss": 0.7515, "step": 11991 }, { "epoch": 1.9576751969307375, "grad_norm": 1.9120335578918457, "learning_rate": 1.939779417815008e-05, "loss": 0.8544, "step": 11992 }, { "epoch": 1.957838455573242, "grad_norm": 1.6071158647537231, "learning_rate": 1.9397685714985076e-05, "loss": 0.69, "step": 11993 }, { "epoch": 1.9580017142157464, "grad_norm": 1.5111114978790283, "learning_rate": 1.9397577242356603e-05, "loss": 0.6307, "step": 11994 }, { "epoch": 1.9581649728582506, "grad_norm": 1.7906697988510132, "learning_rate": 1.9397468760264774e-05, "loss": 0.6417, "step": 11995 }, { "epoch": 1.958328231500755, "grad_norm": 1.601850152015686, "learning_rate": 1.93973602687097e-05, "loss": 0.6505, "step": 11996 }, { "epoch": 1.9584914901432593, "grad_norm": 1.5569384098052979, "learning_rate": 1.939725176769148e-05, "loss": 0.5183, "step": 11997 }, { "epoch": 1.9586547487857637, "grad_norm": 1.4771968126296997, "learning_rate": 1.939714325721023e-05, "loss": 0.5916, "step": 11998 }, { "epoch": 1.9588180074282682, "grad_norm": 1.8051952123641968, "learning_rate": 1.9397034737266067e-05, "loss": 0.7851, "step": 11999 }, { "epoch": 1.9589812660707726, "grad_norm": 1.4665353298187256, "learning_rate": 1.9396926207859085e-05, "loss": 0.579, "step": 12000 }, { "epoch": 1.959144524713277, "grad_norm": 1.7448382377624512, "learning_rate": 1.9396817668989404e-05, "loss": 0.6175, "step": 12001 }, { "epoch": 1.9593077833557815, "grad_norm": 2.0720083713531494, "learning_rate": 1.9396709120657128e-05, "loss": 0.771, "step": 12002 }, { "epoch": 1.959471041998286, "grad_norm": 1.5964202880859375, "learning_rate": 1.9396600562862372e-05, "loss": 0.608, "step": 12003 }, { "epoch": 1.9596343006407901, "grad_norm": 1.5067237615585327, "learning_rate": 1.939649199560524e-05, "loss": 0.5785, "step": 12004 }, { "epoch": 1.9597975592832946, "grad_norm": 1.605988621711731, "learning_rate": 1.9396383418885842e-05, "loss": 0.6657, "step": 12005 }, { "epoch": 1.9599608179257988, "grad_norm": 1.1931746006011963, "learning_rate": 1.939627483270429e-05, "loss": 0.497, "step": 12006 }, { "epoch": 1.9601240765683032, "grad_norm": 1.7894536256790161, "learning_rate": 1.939616623706069e-05, "loss": 0.733, "step": 12007 }, { "epoch": 1.9602873352108077, "grad_norm": 1.8156068325042725, "learning_rate": 1.9396057631955156e-05, "loss": 0.8219, "step": 12008 }, { "epoch": 1.960450593853312, "grad_norm": 1.7503235340118408, "learning_rate": 1.939594901738779e-05, "loss": 0.6707, "step": 12009 }, { "epoch": 1.9606138524958165, "grad_norm": 1.8041573762893677, "learning_rate": 1.9395840393358707e-05, "loss": 0.643, "step": 12010 }, { "epoch": 1.960777111138321, "grad_norm": 2.050607919692993, "learning_rate": 1.939573175986802e-05, "loss": 0.8475, "step": 12011 }, { "epoch": 1.9609403697808254, "grad_norm": 1.6523118019104004, "learning_rate": 1.939562311691583e-05, "loss": 0.6726, "step": 12012 }, { "epoch": 1.9611036284233296, "grad_norm": 1.946329951286316, "learning_rate": 1.939551446450225e-05, "loss": 0.6729, "step": 12013 }, { "epoch": 1.961266887065834, "grad_norm": 1.5787888765335083, "learning_rate": 1.939540580262739e-05, "loss": 0.6382, "step": 12014 }, { "epoch": 1.9614301457083383, "grad_norm": 1.4827346801757812, "learning_rate": 1.9395297131291356e-05, "loss": 0.5127, "step": 12015 }, { "epoch": 1.9615934043508427, "grad_norm": 1.6452802419662476, "learning_rate": 1.9395188450494264e-05, "loss": 0.5958, "step": 12016 }, { "epoch": 1.9617566629933472, "grad_norm": 1.5317522287368774, "learning_rate": 1.9395079760236217e-05, "loss": 0.5678, "step": 12017 }, { "epoch": 1.9619199216358516, "grad_norm": 1.7947872877120972, "learning_rate": 1.939497106051733e-05, "loss": 0.7603, "step": 12018 }, { "epoch": 1.962083180278356, "grad_norm": 1.741202473640442, "learning_rate": 1.9394862351337705e-05, "loss": 0.7124, "step": 12019 }, { "epoch": 1.9622464389208605, "grad_norm": 1.6219642162322998, "learning_rate": 1.9394753632697464e-05, "loss": 0.67, "step": 12020 }, { "epoch": 1.962409697563365, "grad_norm": 1.2691242694854736, "learning_rate": 1.93946449045967e-05, "loss": 0.5101, "step": 12021 }, { "epoch": 1.9625729562058691, "grad_norm": 2.352670907974243, "learning_rate": 1.9394536167035535e-05, "loss": 0.8106, "step": 12022 }, { "epoch": 1.9627362148483736, "grad_norm": 1.6988601684570312, "learning_rate": 1.9394427420014076e-05, "loss": 0.687, "step": 12023 }, { "epoch": 1.9628994734908778, "grad_norm": 1.5797284841537476, "learning_rate": 1.939431866353243e-05, "loss": 0.678, "step": 12024 }, { "epoch": 1.9630627321333822, "grad_norm": 1.591332197189331, "learning_rate": 1.9394209897590707e-05, "loss": 0.6662, "step": 12025 }, { "epoch": 1.9632259907758867, "grad_norm": 1.634377360343933, "learning_rate": 1.9394101122189016e-05, "loss": 0.6491, "step": 12026 }, { "epoch": 1.963389249418391, "grad_norm": 1.498908281326294, "learning_rate": 1.939399233732747e-05, "loss": 0.5941, "step": 12027 }, { "epoch": 1.9635525080608955, "grad_norm": 1.9735352993011475, "learning_rate": 1.9393883543006176e-05, "loss": 0.7243, "step": 12028 }, { "epoch": 1.9637157667034, "grad_norm": 1.6846818923950195, "learning_rate": 1.9393774739225243e-05, "loss": 0.6727, "step": 12029 }, { "epoch": 1.9638790253459042, "grad_norm": 1.8270232677459717, "learning_rate": 1.939366592598478e-05, "loss": 0.7377, "step": 12030 }, { "epoch": 1.9640422839884086, "grad_norm": 1.4123948812484741, "learning_rate": 1.93935571032849e-05, "loss": 0.5128, "step": 12031 }, { "epoch": 1.964205542630913, "grad_norm": 1.7303087711334229, "learning_rate": 1.9393448271125706e-05, "loss": 0.7214, "step": 12032 }, { "epoch": 1.9643688012734173, "grad_norm": 1.463138461112976, "learning_rate": 1.9393339429507317e-05, "loss": 0.5781, "step": 12033 }, { "epoch": 1.9645320599159217, "grad_norm": 1.850738286972046, "learning_rate": 1.939323057842984e-05, "loss": 0.8161, "step": 12034 }, { "epoch": 1.9646953185584262, "grad_norm": 1.6018259525299072, "learning_rate": 1.9393121717893376e-05, "loss": 0.5641, "step": 12035 }, { "epoch": 1.9648585772009306, "grad_norm": 1.6531469821929932, "learning_rate": 1.9393012847898044e-05, "loss": 0.6972, "step": 12036 }, { "epoch": 1.965021835843435, "grad_norm": 1.5803368091583252, "learning_rate": 1.939290396844395e-05, "loss": 0.5991, "step": 12037 }, { "epoch": 1.9651850944859395, "grad_norm": 1.4351414442062378, "learning_rate": 1.9392795079531205e-05, "loss": 0.5981, "step": 12038 }, { "epoch": 1.9653483531284437, "grad_norm": 1.5181951522827148, "learning_rate": 1.9392686181159917e-05, "loss": 0.7562, "step": 12039 }, { "epoch": 1.9655116117709481, "grad_norm": 1.3229573965072632, "learning_rate": 1.93925772733302e-05, "loss": 0.5342, "step": 12040 }, { "epoch": 1.9656748704134523, "grad_norm": 1.5528271198272705, "learning_rate": 1.9392468356042155e-05, "loss": 0.6365, "step": 12041 }, { "epoch": 1.9658381290559568, "grad_norm": 1.7845171689987183, "learning_rate": 1.9392359429295897e-05, "loss": 0.7221, "step": 12042 }, { "epoch": 1.9660013876984612, "grad_norm": 1.6315809488296509, "learning_rate": 1.9392250493091537e-05, "loss": 0.622, "step": 12043 }, { "epoch": 1.9661646463409657, "grad_norm": 1.6713262796401978, "learning_rate": 1.9392141547429183e-05, "loss": 0.6051, "step": 12044 }, { "epoch": 1.96632790498347, "grad_norm": 1.6520575284957886, "learning_rate": 1.9392032592308948e-05, "loss": 0.5678, "step": 12045 }, { "epoch": 1.9664911636259745, "grad_norm": 1.4269263744354248, "learning_rate": 1.9391923627730936e-05, "loss": 0.5601, "step": 12046 }, { "epoch": 1.966654422268479, "grad_norm": 1.8615847826004028, "learning_rate": 1.939181465369526e-05, "loss": 0.7337, "step": 12047 }, { "epoch": 1.9668176809109832, "grad_norm": 1.6449984312057495, "learning_rate": 1.939170567020203e-05, "loss": 0.6296, "step": 12048 }, { "epoch": 1.9669809395534876, "grad_norm": 1.7710739374160767, "learning_rate": 1.939159667725135e-05, "loss": 0.5948, "step": 12049 }, { "epoch": 1.9671441981959918, "grad_norm": 1.583418369293213, "learning_rate": 1.939148767484334e-05, "loss": 0.6202, "step": 12050 }, { "epoch": 1.9673074568384963, "grad_norm": 1.7176554203033447, "learning_rate": 1.93913786629781e-05, "loss": 0.6994, "step": 12051 }, { "epoch": 1.9674707154810007, "grad_norm": 1.4113578796386719, "learning_rate": 1.939126964165575e-05, "loss": 0.6283, "step": 12052 }, { "epoch": 1.9676339741235052, "grad_norm": 1.5644431114196777, "learning_rate": 1.939116061087639e-05, "loss": 0.5194, "step": 12053 }, { "epoch": 1.9677972327660096, "grad_norm": 1.4710571765899658, "learning_rate": 1.9391051570640138e-05, "loss": 0.687, "step": 12054 }, { "epoch": 1.967960491408514, "grad_norm": 1.864527702331543, "learning_rate": 1.939094252094709e-05, "loss": 0.6359, "step": 12055 }, { "epoch": 1.9681237500510185, "grad_norm": 2.0226376056671143, "learning_rate": 1.9390833461797377e-05, "loss": 0.688, "step": 12056 }, { "epoch": 1.9682870086935227, "grad_norm": 1.5084041357040405, "learning_rate": 1.939072439319109e-05, "loss": 0.5828, "step": 12057 }, { "epoch": 1.9684502673360271, "grad_norm": 1.6363697052001953, "learning_rate": 1.9390615315128347e-05, "loss": 0.6178, "step": 12058 }, { "epoch": 1.9686135259785313, "grad_norm": 1.7761976718902588, "learning_rate": 1.9390506227609255e-05, "loss": 0.5727, "step": 12059 }, { "epoch": 1.9687767846210358, "grad_norm": 1.7256630659103394, "learning_rate": 1.9390397130633932e-05, "loss": 0.5695, "step": 12060 }, { "epoch": 1.9689400432635402, "grad_norm": 1.5373591184616089, "learning_rate": 1.9390288024202476e-05, "loss": 0.66, "step": 12061 }, { "epoch": 1.9691033019060447, "grad_norm": 1.5092006921768188, "learning_rate": 1.9390178908315004e-05, "loss": 0.5797, "step": 12062 }, { "epoch": 1.969266560548549, "grad_norm": 1.4031505584716797, "learning_rate": 1.9390069782971626e-05, "loss": 0.5978, "step": 12063 }, { "epoch": 1.9694298191910535, "grad_norm": 1.6241410970687866, "learning_rate": 1.9389960648172447e-05, "loss": 0.6566, "step": 12064 }, { "epoch": 1.969593077833558, "grad_norm": 1.5984833240509033, "learning_rate": 1.9389851503917584e-05, "loss": 0.5981, "step": 12065 }, { "epoch": 1.9697563364760622, "grad_norm": 1.8714030981063843, "learning_rate": 1.938974235020714e-05, "loss": 0.7991, "step": 12066 }, { "epoch": 1.9699195951185666, "grad_norm": 1.4397425651550293, "learning_rate": 1.938963318704123e-05, "loss": 0.6206, "step": 12067 }, { "epoch": 1.9700828537610708, "grad_norm": 1.7518298625946045, "learning_rate": 1.938952401441996e-05, "loss": 0.6507, "step": 12068 }, { "epoch": 1.9702461124035753, "grad_norm": 1.352191686630249, "learning_rate": 1.9389414832343444e-05, "loss": 0.5693, "step": 12069 }, { "epoch": 1.9704093710460797, "grad_norm": 1.6283620595932007, "learning_rate": 1.938930564081179e-05, "loss": 0.5533, "step": 12070 }, { "epoch": 1.9705726296885842, "grad_norm": 1.182111382484436, "learning_rate": 1.9389196439825103e-05, "loss": 0.5118, "step": 12071 }, { "epoch": 1.9707358883310886, "grad_norm": 1.5730998516082764, "learning_rate": 1.93890872293835e-05, "loss": 0.6207, "step": 12072 }, { "epoch": 1.970899146973593, "grad_norm": 1.605098009109497, "learning_rate": 1.938897800948709e-05, "loss": 0.7108, "step": 12073 }, { "epoch": 1.9710624056160975, "grad_norm": 1.8476513624191284, "learning_rate": 1.938886878013598e-05, "loss": 0.8175, "step": 12074 }, { "epoch": 1.9712256642586017, "grad_norm": 1.972294569015503, "learning_rate": 1.9388759541330284e-05, "loss": 0.7987, "step": 12075 }, { "epoch": 1.9713889229011061, "grad_norm": 1.495556116104126, "learning_rate": 1.938865029307011e-05, "loss": 0.6187, "step": 12076 }, { "epoch": 1.9715521815436103, "grad_norm": 2.112205743789673, "learning_rate": 1.9388541035355563e-05, "loss": 0.7068, "step": 12077 }, { "epoch": 1.9717154401861148, "grad_norm": 1.800990343093872, "learning_rate": 1.938843176818676e-05, "loss": 0.6639, "step": 12078 }, { "epoch": 1.9718786988286192, "grad_norm": 1.5970319509506226, "learning_rate": 1.9388322491563813e-05, "loss": 0.5971, "step": 12079 }, { "epoch": 1.9720419574711237, "grad_norm": 1.7734706401824951, "learning_rate": 1.9388213205486823e-05, "loss": 0.7537, "step": 12080 }, { "epoch": 1.972205216113628, "grad_norm": 1.7885246276855469, "learning_rate": 1.9388103909955904e-05, "loss": 0.6601, "step": 12081 }, { "epoch": 1.9723684747561325, "grad_norm": 2.0376458168029785, "learning_rate": 1.938799460497117e-05, "loss": 0.7621, "step": 12082 }, { "epoch": 1.9725317333986367, "grad_norm": 1.8590543270111084, "learning_rate": 1.9387885290532728e-05, "loss": 0.7521, "step": 12083 }, { "epoch": 1.9726949920411412, "grad_norm": 2.142286777496338, "learning_rate": 1.9387775966640683e-05, "loss": 0.8508, "step": 12084 }, { "epoch": 1.9728582506836456, "grad_norm": 1.8114097118377686, "learning_rate": 1.9387666633295158e-05, "loss": 0.6396, "step": 12085 }, { "epoch": 1.9730215093261498, "grad_norm": 1.8248679637908936, "learning_rate": 1.9387557290496247e-05, "loss": 0.6903, "step": 12086 }, { "epoch": 1.9731847679686543, "grad_norm": 1.7347304821014404, "learning_rate": 1.9387447938244076e-05, "loss": 0.5835, "step": 12087 }, { "epoch": 1.9733480266111587, "grad_norm": 1.798732876777649, "learning_rate": 1.9387338576538743e-05, "loss": 0.704, "step": 12088 }, { "epoch": 1.9735112852536632, "grad_norm": 1.6307047605514526, "learning_rate": 1.9387229205380362e-05, "loss": 0.7075, "step": 12089 }, { "epoch": 1.9736745438961676, "grad_norm": 1.6454919576644897, "learning_rate": 1.9387119824769043e-05, "loss": 0.6971, "step": 12090 }, { "epoch": 1.973837802538672, "grad_norm": 1.5941905975341797, "learning_rate": 1.93870104347049e-05, "loss": 0.8269, "step": 12091 }, { "epoch": 1.9740010611811762, "grad_norm": 2.082308292388916, "learning_rate": 1.938690103518804e-05, "loss": 1.4123, "step": 12092 }, { "epoch": 1.9741643198236807, "grad_norm": 1.4190151691436768, "learning_rate": 1.938679162621857e-05, "loss": 0.5935, "step": 12093 }, { "epoch": 1.974327578466185, "grad_norm": 1.720835566520691, "learning_rate": 1.938668220779661e-05, "loss": 0.8161, "step": 12094 }, { "epoch": 1.9744908371086893, "grad_norm": 1.4237022399902344, "learning_rate": 1.9386572779922254e-05, "loss": 0.6986, "step": 12095 }, { "epoch": 1.9746540957511938, "grad_norm": 1.5185050964355469, "learning_rate": 1.938646334259563e-05, "loss": 0.6821, "step": 12096 }, { "epoch": 1.9748173543936982, "grad_norm": 1.6775425672531128, "learning_rate": 1.9386353895816833e-05, "loss": 0.6664, "step": 12097 }, { "epoch": 1.9749806130362026, "grad_norm": 1.555795431137085, "learning_rate": 1.9386244439585984e-05, "loss": 0.6958, "step": 12098 }, { "epoch": 1.975143871678707, "grad_norm": 1.7459584474563599, "learning_rate": 1.938613497390319e-05, "loss": 0.6423, "step": 12099 }, { "epoch": 1.9753071303212115, "grad_norm": 1.8588008880615234, "learning_rate": 1.938602549876856e-05, "loss": 0.7026, "step": 12100 }, { "epoch": 1.9754703889637157, "grad_norm": 2.1107265949249268, "learning_rate": 1.9385916014182202e-05, "loss": 1.4162, "step": 12101 }, { "epoch": 1.9756336476062202, "grad_norm": 1.789146900177002, "learning_rate": 1.9385806520144234e-05, "loss": 0.6781, "step": 12102 }, { "epoch": 1.9757969062487244, "grad_norm": 1.7236100435256958, "learning_rate": 1.9385697016654756e-05, "loss": 0.7142, "step": 12103 }, { "epoch": 1.9759601648912288, "grad_norm": 1.5753521919250488, "learning_rate": 1.9385587503713886e-05, "loss": 0.589, "step": 12104 }, { "epoch": 1.9761234235337333, "grad_norm": 1.7589904069900513, "learning_rate": 1.9385477981321733e-05, "loss": 0.6734, "step": 12105 }, { "epoch": 1.9762866821762377, "grad_norm": 1.6662052869796753, "learning_rate": 1.9385368449478404e-05, "loss": 0.6203, "step": 12106 }, { "epoch": 1.9764499408187421, "grad_norm": 1.709804892539978, "learning_rate": 1.9385258908184014e-05, "loss": 0.6899, "step": 12107 }, { "epoch": 1.9766131994612466, "grad_norm": 1.7481664419174194, "learning_rate": 1.9385149357438667e-05, "loss": 0.6542, "step": 12108 }, { "epoch": 1.976776458103751, "grad_norm": 1.5527619123458862, "learning_rate": 1.938503979724248e-05, "loss": 0.6715, "step": 12109 }, { "epoch": 1.9769397167462552, "grad_norm": 1.7784616947174072, "learning_rate": 1.938493022759556e-05, "loss": 0.6524, "step": 12110 }, { "epoch": 1.9771029753887597, "grad_norm": 1.630524754524231, "learning_rate": 1.938482064849802e-05, "loss": 0.6704, "step": 12111 }, { "epoch": 1.977266234031264, "grad_norm": 1.3653780221939087, "learning_rate": 1.9384711059949964e-05, "loss": 0.5405, "step": 12112 }, { "epoch": 1.9774294926737683, "grad_norm": 1.605733871459961, "learning_rate": 1.938460146195151e-05, "loss": 0.6783, "step": 12113 }, { "epoch": 1.9775927513162728, "grad_norm": 1.586165189743042, "learning_rate": 1.938449185450276e-05, "loss": 0.6009, "step": 12114 }, { "epoch": 1.9777560099587772, "grad_norm": 1.5338736772537231, "learning_rate": 1.9384382237603832e-05, "loss": 0.5675, "step": 12115 }, { "epoch": 1.9779192686012816, "grad_norm": 1.5550426244735718, "learning_rate": 1.9384272611254835e-05, "loss": 0.6254, "step": 12116 }, { "epoch": 1.978082527243786, "grad_norm": 1.650942325592041, "learning_rate": 1.9384162975455877e-05, "loss": 0.7351, "step": 12117 }, { "epoch": 1.9782457858862905, "grad_norm": 1.7282867431640625, "learning_rate": 1.9384053330207068e-05, "loss": 0.693, "step": 12118 }, { "epoch": 1.9784090445287947, "grad_norm": 1.9156718254089355, "learning_rate": 1.938394367550852e-05, "loss": 0.6331, "step": 12119 }, { "epoch": 1.9785723031712992, "grad_norm": 1.570723533630371, "learning_rate": 1.9383834011360347e-05, "loss": 0.5542, "step": 12120 }, { "epoch": 1.9787355618138034, "grad_norm": 1.5698423385620117, "learning_rate": 1.938372433776265e-05, "loss": 0.6428, "step": 12121 }, { "epoch": 1.9788988204563078, "grad_norm": 1.5610013008117676, "learning_rate": 1.9383614654715547e-05, "loss": 0.6275, "step": 12122 }, { "epoch": 1.9790620790988123, "grad_norm": 1.7522907257080078, "learning_rate": 1.938350496221915e-05, "loss": 0.6258, "step": 12123 }, { "epoch": 1.9792253377413167, "grad_norm": 1.4422379732131958, "learning_rate": 1.9383395260273564e-05, "loss": 0.7006, "step": 12124 }, { "epoch": 1.9793885963838211, "grad_norm": 1.5264736413955688, "learning_rate": 1.93832855488789e-05, "loss": 0.5363, "step": 12125 }, { "epoch": 1.9795518550263256, "grad_norm": 1.7550451755523682, "learning_rate": 1.938317582803527e-05, "loss": 0.6404, "step": 12126 }, { "epoch": 1.9797151136688298, "grad_norm": 1.7797822952270508, "learning_rate": 1.9383066097742785e-05, "loss": 0.5658, "step": 12127 }, { "epoch": 1.9798783723113342, "grad_norm": 1.6075304746627808, "learning_rate": 1.9382956358001555e-05, "loss": 0.7103, "step": 12128 }, { "epoch": 1.9800416309538387, "grad_norm": 1.686180830001831, "learning_rate": 1.9382846608811693e-05, "loss": 0.6425, "step": 12129 }, { "epoch": 1.9802048895963429, "grad_norm": 2.1154184341430664, "learning_rate": 1.9382736850173303e-05, "loss": 0.7659, "step": 12130 }, { "epoch": 1.9803681482388473, "grad_norm": 1.4713122844696045, "learning_rate": 1.9382627082086502e-05, "loss": 0.6419, "step": 12131 }, { "epoch": 1.9805314068813518, "grad_norm": 1.5829592943191528, "learning_rate": 1.9382517304551397e-05, "loss": 0.5693, "step": 12132 }, { "epoch": 1.9806946655238562, "grad_norm": 1.9809218645095825, "learning_rate": 1.9382407517568097e-05, "loss": 0.8544, "step": 12133 }, { "epoch": 1.9808579241663606, "grad_norm": 1.4909061193466187, "learning_rate": 1.938229772113672e-05, "loss": 0.5676, "step": 12134 }, { "epoch": 1.981021182808865, "grad_norm": 1.5948035717010498, "learning_rate": 1.938218791525737e-05, "loss": 0.6318, "step": 12135 }, { "epoch": 1.9811844414513693, "grad_norm": 1.7518815994262695, "learning_rate": 1.9382078099930157e-05, "loss": 0.805, "step": 12136 }, { "epoch": 1.9813477000938737, "grad_norm": 1.9464157819747925, "learning_rate": 1.9381968275155195e-05, "loss": 0.6807, "step": 12137 }, { "epoch": 1.981510958736378, "grad_norm": 1.6631522178649902, "learning_rate": 1.9381858440932592e-05, "loss": 0.6681, "step": 12138 }, { "epoch": 1.9816742173788824, "grad_norm": 1.3863495588302612, "learning_rate": 1.9381748597262463e-05, "loss": 0.512, "step": 12139 }, { "epoch": 1.9818374760213868, "grad_norm": 1.447217583656311, "learning_rate": 1.9381638744144914e-05, "loss": 0.5365, "step": 12140 }, { "epoch": 1.9820007346638913, "grad_norm": 1.1956762075424194, "learning_rate": 1.938152888158006e-05, "loss": 0.492, "step": 12141 }, { "epoch": 1.9821639933063957, "grad_norm": 1.561561107635498, "learning_rate": 1.938141900956801e-05, "loss": 0.6441, "step": 12142 }, { "epoch": 1.9823272519489001, "grad_norm": 1.634190320968628, "learning_rate": 1.9381309128108866e-05, "loss": 0.6629, "step": 12143 }, { "epoch": 1.9824905105914046, "grad_norm": 1.5614255666732788, "learning_rate": 1.938119923720275e-05, "loss": 0.6071, "step": 12144 }, { "epoch": 1.9826537692339088, "grad_norm": 1.8012393712997437, "learning_rate": 1.9381089336849773e-05, "loss": 0.5887, "step": 12145 }, { "epoch": 1.9828170278764132, "grad_norm": 1.8545680046081543, "learning_rate": 1.9380979427050036e-05, "loss": 0.6725, "step": 12146 }, { "epoch": 1.9829802865189174, "grad_norm": 1.6923454999923706, "learning_rate": 1.9380869507803656e-05, "loss": 0.7273, "step": 12147 }, { "epoch": 1.9831435451614219, "grad_norm": 1.7317448854446411, "learning_rate": 1.9380759579110745e-05, "loss": 0.7501, "step": 12148 }, { "epoch": 1.9833068038039263, "grad_norm": 2.078296184539795, "learning_rate": 1.938064964097141e-05, "loss": 0.9509, "step": 12149 }, { "epoch": 1.9834700624464308, "grad_norm": 1.6791608333587646, "learning_rate": 1.9380539693385763e-05, "loss": 0.5995, "step": 12150 }, { "epoch": 1.9836333210889352, "grad_norm": 1.5884119272232056, "learning_rate": 1.9380429736353918e-05, "loss": 0.6409, "step": 12151 }, { "epoch": 1.9837965797314396, "grad_norm": 1.6735621690750122, "learning_rate": 1.938031976987598e-05, "loss": 0.7408, "step": 12152 }, { "epoch": 1.983959838373944, "grad_norm": 1.7484724521636963, "learning_rate": 1.938020979395206e-05, "loss": 0.7344, "step": 12153 }, { "epoch": 1.9841230970164483, "grad_norm": 1.7733862400054932, "learning_rate": 1.9380099808582278e-05, "loss": 0.5507, "step": 12154 }, { "epoch": 1.9842863556589527, "grad_norm": 1.4923404455184937, "learning_rate": 1.9379989813766732e-05, "loss": 0.5817, "step": 12155 }, { "epoch": 1.984449614301457, "grad_norm": 1.462174415588379, "learning_rate": 1.9379879809505542e-05, "loss": 0.5973, "step": 12156 }, { "epoch": 1.9846128729439614, "grad_norm": 1.5134918689727783, "learning_rate": 1.9379769795798817e-05, "loss": 0.6221, "step": 12157 }, { "epoch": 1.9847761315864658, "grad_norm": 1.8579589128494263, "learning_rate": 1.937965977264666e-05, "loss": 0.5809, "step": 12158 }, { "epoch": 1.9849393902289703, "grad_norm": 1.865466833114624, "learning_rate": 1.9379549740049197e-05, "loss": 0.7381, "step": 12159 }, { "epoch": 1.9851026488714747, "grad_norm": 1.8792990446090698, "learning_rate": 1.9379439698006522e-05, "loss": 0.6755, "step": 12160 }, { "epoch": 1.9852659075139791, "grad_norm": 1.6961243152618408, "learning_rate": 1.9379329646518756e-05, "loss": 0.6004, "step": 12161 }, { "epoch": 1.9854291661564836, "grad_norm": 1.8712011575698853, "learning_rate": 1.9379219585586007e-05, "loss": 0.6596, "step": 12162 }, { "epoch": 1.9855924247989878, "grad_norm": 1.4411910772323608, "learning_rate": 1.9379109515208387e-05, "loss": 0.5271, "step": 12163 }, { "epoch": 1.9857556834414922, "grad_norm": 1.598284125328064, "learning_rate": 1.937899943538601e-05, "loss": 0.6606, "step": 12164 }, { "epoch": 1.9859189420839964, "grad_norm": 1.925343632698059, "learning_rate": 1.937888934611898e-05, "loss": 0.7625, "step": 12165 }, { "epoch": 1.9860822007265009, "grad_norm": 1.6749473810195923, "learning_rate": 1.9378779247407408e-05, "loss": 0.6631, "step": 12166 }, { "epoch": 1.9862454593690053, "grad_norm": 1.9570813179016113, "learning_rate": 1.937866913925141e-05, "loss": 0.6842, "step": 12167 }, { "epoch": 1.9864087180115098, "grad_norm": 1.6535437107086182, "learning_rate": 1.9378559021651096e-05, "loss": 0.6573, "step": 12168 }, { "epoch": 1.9865719766540142, "grad_norm": 1.5824257135391235, "learning_rate": 1.9378448894606575e-05, "loss": 0.7661, "step": 12169 }, { "epoch": 1.9867352352965186, "grad_norm": 1.619402527809143, "learning_rate": 1.937833875811796e-05, "loss": 0.6753, "step": 12170 }, { "epoch": 1.9868984939390228, "grad_norm": 1.6263985633850098, "learning_rate": 1.9378228612185358e-05, "loss": 0.6029, "step": 12171 }, { "epoch": 1.9870617525815273, "grad_norm": 1.5036473274230957, "learning_rate": 1.937811845680888e-05, "loss": 0.6215, "step": 12172 }, { "epoch": 1.9872250112240317, "grad_norm": 1.5280168056488037, "learning_rate": 1.9378008291988644e-05, "loss": 0.578, "step": 12173 }, { "epoch": 1.987388269866536, "grad_norm": 2.04551362991333, "learning_rate": 1.9377898117724753e-05, "loss": 0.8163, "step": 12174 }, { "epoch": 1.9875515285090404, "grad_norm": 1.5004938840866089, "learning_rate": 1.9377787934017323e-05, "loss": 0.661, "step": 12175 }, { "epoch": 1.9877147871515448, "grad_norm": 1.6754908561706543, "learning_rate": 1.937767774086646e-05, "loss": 0.6899, "step": 12176 }, { "epoch": 1.9878780457940493, "grad_norm": 2.010058879852295, "learning_rate": 1.9377567538272283e-05, "loss": 0.7004, "step": 12177 }, { "epoch": 1.9880413044365537, "grad_norm": 2.1062633991241455, "learning_rate": 1.9377457326234893e-05, "loss": 0.8205, "step": 12178 }, { "epoch": 1.9882045630790581, "grad_norm": 1.7984371185302734, "learning_rate": 1.9377347104754408e-05, "loss": 0.735, "step": 12179 }, { "epoch": 1.9883678217215623, "grad_norm": 1.6382217407226562, "learning_rate": 1.937723687383094e-05, "loss": 0.5169, "step": 12180 }, { "epoch": 1.9885310803640668, "grad_norm": 1.5597251653671265, "learning_rate": 1.937712663346459e-05, "loss": 0.6359, "step": 12181 }, { "epoch": 1.988694339006571, "grad_norm": 1.7302318811416626, "learning_rate": 1.9377016383655484e-05, "loss": 0.6939, "step": 12182 }, { "epoch": 1.9888575976490754, "grad_norm": 1.4742039442062378, "learning_rate": 1.937690612440372e-05, "loss": 0.7445, "step": 12183 }, { "epoch": 1.9890208562915799, "grad_norm": 1.6477793455123901, "learning_rate": 1.9376795855709414e-05, "loss": 0.674, "step": 12184 }, { "epoch": 1.9891841149340843, "grad_norm": 1.4963077306747437, "learning_rate": 1.9376685577572677e-05, "loss": 0.6402, "step": 12185 }, { "epoch": 1.9893473735765888, "grad_norm": 1.5093870162963867, "learning_rate": 1.9376575289993624e-05, "loss": 0.5624, "step": 12186 }, { "epoch": 1.9895106322190932, "grad_norm": 1.3610533475875854, "learning_rate": 1.9376464992972358e-05, "loss": 0.5352, "step": 12187 }, { "epoch": 1.9896738908615976, "grad_norm": 1.4411801099777222, "learning_rate": 1.9376354686508996e-05, "loss": 0.5623, "step": 12188 }, { "epoch": 1.9898371495041018, "grad_norm": 1.5642800331115723, "learning_rate": 1.9376244370603646e-05, "loss": 0.6226, "step": 12189 }, { "epoch": 1.9900004081466063, "grad_norm": 1.593117594718933, "learning_rate": 1.9376134045256424e-05, "loss": 0.5637, "step": 12190 }, { "epoch": 1.9901636667891105, "grad_norm": 1.8389207124710083, "learning_rate": 1.9376023710467433e-05, "loss": 0.7843, "step": 12191 }, { "epoch": 1.990326925431615, "grad_norm": 1.5381425619125366, "learning_rate": 1.9375913366236793e-05, "loss": 0.5866, "step": 12192 }, { "epoch": 1.9904901840741194, "grad_norm": 1.5661377906799316, "learning_rate": 1.9375803012564605e-05, "loss": 0.4959, "step": 12193 }, { "epoch": 1.9906534427166238, "grad_norm": 1.8130446672439575, "learning_rate": 1.937569264945099e-05, "loss": 0.8026, "step": 12194 }, { "epoch": 1.9908167013591282, "grad_norm": 1.7404340505599976, "learning_rate": 1.9375582276896052e-05, "loss": 0.6427, "step": 12195 }, { "epoch": 1.9909799600016327, "grad_norm": 1.7790809869766235, "learning_rate": 1.937547189489991e-05, "loss": 0.6589, "step": 12196 }, { "epoch": 1.9911432186441371, "grad_norm": 1.2181130647659302, "learning_rate": 1.9375361503462666e-05, "loss": 0.4636, "step": 12197 }, { "epoch": 1.9913064772866413, "grad_norm": 1.6261563301086426, "learning_rate": 1.9375251102584438e-05, "loss": 0.6624, "step": 12198 }, { "epoch": 1.9914697359291458, "grad_norm": 1.4645826816558838, "learning_rate": 1.9375140692265333e-05, "loss": 0.6294, "step": 12199 }, { "epoch": 1.99163299457165, "grad_norm": 1.4746330976486206, "learning_rate": 1.9375030272505463e-05, "loss": 0.6459, "step": 12200 }, { "epoch": 1.9917962532141544, "grad_norm": 1.9358316659927368, "learning_rate": 1.9374919843304944e-05, "loss": 0.6549, "step": 12201 }, { "epoch": 1.9919595118566589, "grad_norm": 1.4916775226593018, "learning_rate": 1.937480940466388e-05, "loss": 0.588, "step": 12202 }, { "epoch": 1.9921227704991633, "grad_norm": 1.6753145456314087, "learning_rate": 1.9374698956582385e-05, "loss": 0.6789, "step": 12203 }, { "epoch": 1.9922860291416677, "grad_norm": 1.6334155797958374, "learning_rate": 1.937458849906057e-05, "loss": 0.581, "step": 12204 }, { "epoch": 1.9924492877841722, "grad_norm": 2.046504259109497, "learning_rate": 1.937447803209855e-05, "loss": 0.7244, "step": 12205 }, { "epoch": 1.9926125464266766, "grad_norm": 1.638554334640503, "learning_rate": 1.937436755569643e-05, "loss": 0.749, "step": 12206 }, { "epoch": 1.9927758050691808, "grad_norm": 1.4775389432907104, "learning_rate": 1.9374257069854327e-05, "loss": 0.5999, "step": 12207 }, { "epoch": 1.9929390637116853, "grad_norm": 1.6128982305526733, "learning_rate": 1.937414657457235e-05, "loss": 0.5861, "step": 12208 }, { "epoch": 1.9931023223541895, "grad_norm": 1.7144010066986084, "learning_rate": 1.937403606985061e-05, "loss": 0.7138, "step": 12209 }, { "epoch": 1.993265580996694, "grad_norm": 1.6843595504760742, "learning_rate": 1.9373925555689212e-05, "loss": 0.6638, "step": 12210 }, { "epoch": 1.9934288396391984, "grad_norm": 1.7137644290924072, "learning_rate": 1.937381503208828e-05, "loss": 0.6537, "step": 12211 }, { "epoch": 1.9935920982817028, "grad_norm": 2.0475170612335205, "learning_rate": 1.9373704499047918e-05, "loss": 0.8825, "step": 12212 }, { "epoch": 1.9937553569242072, "grad_norm": 1.7538343667984009, "learning_rate": 1.9373593956568234e-05, "loss": 0.697, "step": 12213 }, { "epoch": 1.9939186155667117, "grad_norm": 1.7188959121704102, "learning_rate": 1.9373483404649347e-05, "loss": 0.5923, "step": 12214 }, { "epoch": 1.994081874209216, "grad_norm": 1.9927308559417725, "learning_rate": 1.9373372843291366e-05, "loss": 0.7649, "step": 12215 }, { "epoch": 1.9942451328517203, "grad_norm": 1.2580665349960327, "learning_rate": 1.9373262272494398e-05, "loss": 0.4279, "step": 12216 }, { "epoch": 1.9944083914942248, "grad_norm": 1.657604455947876, "learning_rate": 1.9373151692258556e-05, "loss": 0.7364, "step": 12217 }, { "epoch": 1.994571650136729, "grad_norm": 1.3979759216308594, "learning_rate": 1.9373041102583955e-05, "loss": 0.5351, "step": 12218 }, { "epoch": 1.9947349087792334, "grad_norm": 1.6789911985397339, "learning_rate": 1.9372930503470706e-05, "loss": 0.5724, "step": 12219 }, { "epoch": 1.9948981674217379, "grad_norm": 1.469632863998413, "learning_rate": 1.937281989491892e-05, "loss": 0.6318, "step": 12220 }, { "epoch": 1.9950614260642423, "grad_norm": 1.9700437784194946, "learning_rate": 1.93727092769287e-05, "loss": 0.6045, "step": 12221 }, { "epoch": 1.9952246847067467, "grad_norm": 1.65242338180542, "learning_rate": 1.9372598649500167e-05, "loss": 0.5749, "step": 12222 }, { "epoch": 1.9953879433492512, "grad_norm": 1.6727186441421509, "learning_rate": 1.9372488012633434e-05, "loss": 0.6501, "step": 12223 }, { "epoch": 1.9955512019917554, "grad_norm": 1.6649644374847412, "learning_rate": 1.9372377366328602e-05, "loss": 0.5341, "step": 12224 }, { "epoch": 1.9957144606342598, "grad_norm": 1.7199417352676392, "learning_rate": 1.937226671058579e-05, "loss": 0.6127, "step": 12225 }, { "epoch": 1.995877719276764, "grad_norm": 1.600156307220459, "learning_rate": 1.937215604540511e-05, "loss": 0.6201, "step": 12226 }, { "epoch": 1.9960409779192685, "grad_norm": 1.7136313915252686, "learning_rate": 1.937204537078667e-05, "loss": 0.6862, "step": 12227 }, { "epoch": 1.996204236561773, "grad_norm": 1.6513633728027344, "learning_rate": 1.9371934686730583e-05, "loss": 0.6691, "step": 12228 }, { "epoch": 1.9963674952042774, "grad_norm": 1.8737003803253174, "learning_rate": 1.937182399323696e-05, "loss": 0.6682, "step": 12229 }, { "epoch": 1.9965307538467818, "grad_norm": 1.7241592407226562, "learning_rate": 1.937171329030591e-05, "loss": 0.8136, "step": 12230 }, { "epoch": 1.9966940124892862, "grad_norm": 1.7646976709365845, "learning_rate": 1.9371602577937554e-05, "loss": 0.7151, "step": 12231 }, { "epoch": 1.9968572711317907, "grad_norm": 1.985079288482666, "learning_rate": 1.9371491856131993e-05, "loss": 0.7016, "step": 12232 }, { "epoch": 1.997020529774295, "grad_norm": 1.5202877521514893, "learning_rate": 1.937138112488934e-05, "loss": 0.6558, "step": 12233 }, { "epoch": 1.9971837884167993, "grad_norm": 1.837958812713623, "learning_rate": 1.9371270384209713e-05, "loss": 0.7943, "step": 12234 }, { "epoch": 1.9973470470593035, "grad_norm": 1.968964695930481, "learning_rate": 1.9371159634093214e-05, "loss": 0.7701, "step": 12235 }, { "epoch": 1.997510305701808, "grad_norm": 1.9323967695236206, "learning_rate": 1.9371048874539965e-05, "loss": 0.6578, "step": 12236 }, { "epoch": 1.9976735643443124, "grad_norm": 1.8352723121643066, "learning_rate": 1.9370938105550068e-05, "loss": 1.06, "step": 12237 }, { "epoch": 1.9978368229868169, "grad_norm": 1.5047938823699951, "learning_rate": 1.937082732712364e-05, "loss": 0.5885, "step": 12238 }, { "epoch": 1.9980000816293213, "grad_norm": 1.8986798524856567, "learning_rate": 1.9370716539260793e-05, "loss": 0.8025, "step": 12239 }, { "epoch": 1.9981633402718257, "grad_norm": 1.7441338300704956, "learning_rate": 1.9370605741961638e-05, "loss": 0.7996, "step": 12240 }, { "epoch": 1.9983265989143302, "grad_norm": 1.7328318357467651, "learning_rate": 1.9370494935226283e-05, "loss": 0.7662, "step": 12241 }, { "epoch": 1.9984898575568344, "grad_norm": 1.4956060647964478, "learning_rate": 1.937038411905484e-05, "loss": 0.7831, "step": 12242 }, { "epoch": 1.9986531161993388, "grad_norm": 1.570144772529602, "learning_rate": 1.937027329344743e-05, "loss": 0.5667, "step": 12243 }, { "epoch": 1.998816374841843, "grad_norm": 1.3083720207214355, "learning_rate": 1.937016245840415e-05, "loss": 0.4757, "step": 12244 }, { "epoch": 1.9989796334843475, "grad_norm": 1.7528181076049805, "learning_rate": 1.9370051613925122e-05, "loss": 0.8001, "step": 12245 }, { "epoch": 1.999142892126852, "grad_norm": 1.518257737159729, "learning_rate": 1.9369940760010454e-05, "loss": 0.5588, "step": 12246 }, { "epoch": 1.9993061507693564, "grad_norm": 1.6943553686141968, "learning_rate": 1.9369829896660257e-05, "loss": 0.7421, "step": 12247 }, { "epoch": 1.9994694094118608, "grad_norm": 1.6327831745147705, "learning_rate": 1.9369719023874644e-05, "loss": 0.6137, "step": 12248 }, { "epoch": 1.9996326680543652, "grad_norm": 1.5350393056869507, "learning_rate": 1.9369608141653728e-05, "loss": 0.6702, "step": 12249 }, { "epoch": 1.9997959266968697, "grad_norm": 1.4994316101074219, "learning_rate": 1.936949724999762e-05, "loss": 0.6092, "step": 12250 }, { "epoch": 1.999959185339374, "grad_norm": 1.688586950302124, "learning_rate": 1.936938634890643e-05, "loss": 0.6434, "step": 12251 }, { "epoch": 2.0, "grad_norm": 3.8284318447113037, "learning_rate": 1.9369275438380268e-05, "loss": 1.0179, "step": 12252 }, { "epoch": 2.0001632586425044, "grad_norm": 1.525017499923706, "learning_rate": 1.936916451841925e-05, "loss": 0.6397, "step": 12253 }, { "epoch": 2.000326517285009, "grad_norm": 1.4315767288208008, "learning_rate": 1.9369053589023485e-05, "loss": 0.617, "step": 12254 }, { "epoch": 2.0004897759275133, "grad_norm": 1.3437190055847168, "learning_rate": 1.9368942650193087e-05, "loss": 0.5643, "step": 12255 }, { "epoch": 2.0006530345700178, "grad_norm": 1.2458715438842773, "learning_rate": 1.9368831701928165e-05, "loss": 0.5059, "step": 12256 }, { "epoch": 2.0008162932125217, "grad_norm": 1.2643648386001587, "learning_rate": 1.9368720744228834e-05, "loss": 0.5238, "step": 12257 }, { "epoch": 2.000979551855026, "grad_norm": 1.5440683364868164, "learning_rate": 1.93686097770952e-05, "loss": 0.7249, "step": 12258 }, { "epoch": 2.0011428104975306, "grad_norm": 1.7303239107131958, "learning_rate": 1.936849880052738e-05, "loss": 0.7757, "step": 12259 }, { "epoch": 2.001306069140035, "grad_norm": 1.387161135673523, "learning_rate": 1.9368387814525483e-05, "loss": 0.6196, "step": 12260 }, { "epoch": 2.0014693277825395, "grad_norm": 1.632778286933899, "learning_rate": 1.9368276819089625e-05, "loss": 0.6434, "step": 12261 }, { "epoch": 2.001632586425044, "grad_norm": 1.4366050958633423, "learning_rate": 1.9368165814219914e-05, "loss": 0.6531, "step": 12262 }, { "epoch": 2.0017958450675484, "grad_norm": 1.8104180097579956, "learning_rate": 1.936805479991646e-05, "loss": 0.7032, "step": 12263 }, { "epoch": 2.001959103710053, "grad_norm": 1.5994017124176025, "learning_rate": 1.936794377617938e-05, "loss": 0.6576, "step": 12264 }, { "epoch": 2.0021223623525572, "grad_norm": 1.330633521080017, "learning_rate": 1.9367832743008782e-05, "loss": 0.5244, "step": 12265 }, { "epoch": 2.0022856209950612, "grad_norm": 1.5582550764083862, "learning_rate": 1.9367721700404776e-05, "loss": 0.5558, "step": 12266 }, { "epoch": 2.0024488796375657, "grad_norm": 1.4332902431488037, "learning_rate": 1.9367610648367483e-05, "loss": 0.4751, "step": 12267 }, { "epoch": 2.00261213828007, "grad_norm": 1.9560657739639282, "learning_rate": 1.9367499586897004e-05, "loss": 0.5831, "step": 12268 }, { "epoch": 2.0027753969225746, "grad_norm": 2.0147335529327393, "learning_rate": 1.9367388515993458e-05, "loss": 0.7942, "step": 12269 }, { "epoch": 2.002938655565079, "grad_norm": 1.9275023937225342, "learning_rate": 1.9367277435656953e-05, "loss": 0.6588, "step": 12270 }, { "epoch": 2.0031019142075834, "grad_norm": 1.6925292015075684, "learning_rate": 1.93671663458876e-05, "loss": 0.6191, "step": 12271 }, { "epoch": 2.003265172850088, "grad_norm": 1.3208918571472168, "learning_rate": 1.9367055246685518e-05, "loss": 0.55, "step": 12272 }, { "epoch": 2.0034284314925923, "grad_norm": 1.4352906942367554, "learning_rate": 1.936694413805081e-05, "loss": 0.5203, "step": 12273 }, { "epoch": 2.0035916901350967, "grad_norm": 1.5033046007156372, "learning_rate": 1.9366833019983594e-05, "loss": 0.6316, "step": 12274 }, { "epoch": 2.0037549487776007, "grad_norm": 1.9667242765426636, "learning_rate": 1.9366721892483976e-05, "loss": 0.673, "step": 12275 }, { "epoch": 2.003918207420105, "grad_norm": 1.6786645650863647, "learning_rate": 1.9366610755552077e-05, "loss": 0.5257, "step": 12276 }, { "epoch": 2.0040814660626096, "grad_norm": 1.7024633884429932, "learning_rate": 1.9366499609188e-05, "loss": 0.6784, "step": 12277 }, { "epoch": 2.004244724705114, "grad_norm": 1.5858862400054932, "learning_rate": 1.936638845339186e-05, "loss": 0.6209, "step": 12278 }, { "epoch": 2.0044079833476185, "grad_norm": 1.7487678527832031, "learning_rate": 1.9366277288163768e-05, "loss": 0.5504, "step": 12279 }, { "epoch": 2.004571241990123, "grad_norm": 1.7262192964553833, "learning_rate": 1.9366166113503843e-05, "loss": 0.5333, "step": 12280 }, { "epoch": 2.0047345006326274, "grad_norm": 1.573883295059204, "learning_rate": 1.9366054929412185e-05, "loss": 0.558, "step": 12281 }, { "epoch": 2.004897759275132, "grad_norm": 1.6428101062774658, "learning_rate": 1.9365943735888914e-05, "loss": 0.6364, "step": 12282 }, { "epoch": 2.005061017917636, "grad_norm": 1.5487192869186401, "learning_rate": 1.936583253293414e-05, "loss": 0.5481, "step": 12283 }, { "epoch": 2.0052242765601402, "grad_norm": 1.6665948629379272, "learning_rate": 1.9365721320547978e-05, "loss": 0.5613, "step": 12284 }, { "epoch": 2.0053875352026447, "grad_norm": 1.5535932779312134, "learning_rate": 1.9365610098730533e-05, "loss": 0.5116, "step": 12285 }, { "epoch": 2.005550793845149, "grad_norm": 1.7239588499069214, "learning_rate": 1.9365498867481926e-05, "loss": 0.5803, "step": 12286 }, { "epoch": 2.0057140524876536, "grad_norm": 1.7922890186309814, "learning_rate": 1.936538762680226e-05, "loss": 0.5568, "step": 12287 }, { "epoch": 2.005877311130158, "grad_norm": 1.456236481666565, "learning_rate": 1.9365276376691652e-05, "loss": 0.5147, "step": 12288 }, { "epoch": 2.0060405697726624, "grad_norm": 2.087960958480835, "learning_rate": 1.9365165117150213e-05, "loss": 0.647, "step": 12289 }, { "epoch": 2.006203828415167, "grad_norm": 1.4387080669403076, "learning_rate": 1.9365053848178058e-05, "loss": 0.5161, "step": 12290 }, { "epoch": 2.0063670870576713, "grad_norm": 1.8212391138076782, "learning_rate": 1.9364942569775292e-05, "loss": 0.5775, "step": 12291 }, { "epoch": 2.0065303457001753, "grad_norm": 1.7119109630584717, "learning_rate": 1.9364831281942034e-05, "loss": 0.6184, "step": 12292 }, { "epoch": 2.0066936043426797, "grad_norm": 1.444809913635254, "learning_rate": 1.9364719984678393e-05, "loss": 0.5046, "step": 12293 }, { "epoch": 2.006856862985184, "grad_norm": 1.689087152481079, "learning_rate": 1.9364608677984483e-05, "loss": 0.6092, "step": 12294 }, { "epoch": 2.0070201216276886, "grad_norm": 1.5682522058486938, "learning_rate": 1.9364497361860413e-05, "loss": 0.5588, "step": 12295 }, { "epoch": 2.007183380270193, "grad_norm": 1.5811374187469482, "learning_rate": 1.9364386036306294e-05, "loss": 0.6533, "step": 12296 }, { "epoch": 2.0073466389126975, "grad_norm": 1.5118218660354614, "learning_rate": 1.9364274701322246e-05, "loss": 0.5512, "step": 12297 }, { "epoch": 2.007509897555202, "grad_norm": 1.7992173433303833, "learning_rate": 1.936416335690837e-05, "loss": 0.5553, "step": 12298 }, { "epoch": 2.0076731561977064, "grad_norm": 1.8048683404922485, "learning_rate": 1.9364052003064787e-05, "loss": 0.5243, "step": 12299 }, { "epoch": 2.007836414840211, "grad_norm": 1.7227141857147217, "learning_rate": 1.9363940639791607e-05, "loss": 0.5865, "step": 12300 }, { "epoch": 2.007999673482715, "grad_norm": 1.9699747562408447, "learning_rate": 1.936382926708894e-05, "loss": 0.9084, "step": 12301 }, { "epoch": 2.0081629321252192, "grad_norm": 1.9576135873794556, "learning_rate": 1.93637178849569e-05, "loss": 0.5981, "step": 12302 }, { "epoch": 2.0083261907677237, "grad_norm": 1.8755521774291992, "learning_rate": 1.9363606493395596e-05, "loss": 0.5671, "step": 12303 }, { "epoch": 2.008489449410228, "grad_norm": 1.4214897155761719, "learning_rate": 1.9363495092405147e-05, "loss": 0.5102, "step": 12304 }, { "epoch": 2.0086527080527325, "grad_norm": 1.9460335969924927, "learning_rate": 1.936338368198566e-05, "loss": 0.7092, "step": 12305 }, { "epoch": 2.008815966695237, "grad_norm": 1.6770280599594116, "learning_rate": 1.9363272262137247e-05, "loss": 0.6075, "step": 12306 }, { "epoch": 2.0089792253377414, "grad_norm": 1.5190061330795288, "learning_rate": 1.936316083286002e-05, "loss": 0.4775, "step": 12307 }, { "epoch": 2.009142483980246, "grad_norm": 1.4265611171722412, "learning_rate": 1.9363049394154095e-05, "loss": 0.4223, "step": 12308 }, { "epoch": 2.0093057426227503, "grad_norm": 2.0123677253723145, "learning_rate": 1.936293794601958e-05, "loss": 0.6592, "step": 12309 }, { "epoch": 2.0094690012652543, "grad_norm": 1.6626603603363037, "learning_rate": 1.936282648845659e-05, "loss": 0.5924, "step": 12310 }, { "epoch": 2.0096322599077587, "grad_norm": 1.5935218334197998, "learning_rate": 1.9362715021465236e-05, "loss": 0.5072, "step": 12311 }, { "epoch": 2.009795518550263, "grad_norm": 1.7734816074371338, "learning_rate": 1.9362603545045632e-05, "loss": 0.569, "step": 12312 }, { "epoch": 2.0099587771927676, "grad_norm": 1.55368971824646, "learning_rate": 1.9362492059197887e-05, "loss": 0.5343, "step": 12313 }, { "epoch": 2.010122035835272, "grad_norm": 2.029787540435791, "learning_rate": 1.9362380563922113e-05, "loss": 0.6977, "step": 12314 }, { "epoch": 2.0102852944777765, "grad_norm": 2.0183026790618896, "learning_rate": 1.9362269059218426e-05, "loss": 0.7911, "step": 12315 }, { "epoch": 2.010448553120281, "grad_norm": 1.7496566772460938, "learning_rate": 1.9362157545086936e-05, "loss": 0.5947, "step": 12316 }, { "epoch": 2.0106118117627854, "grad_norm": 1.8365520238876343, "learning_rate": 1.9362046021527757e-05, "loss": 0.663, "step": 12317 }, { "epoch": 2.01077507040529, "grad_norm": 1.6675431728363037, "learning_rate": 1.9361934488541002e-05, "loss": 0.5584, "step": 12318 }, { "epoch": 2.010938329047794, "grad_norm": 1.592437744140625, "learning_rate": 1.936182294612678e-05, "loss": 0.549, "step": 12319 }, { "epoch": 2.0111015876902982, "grad_norm": 1.6111193895339966, "learning_rate": 1.9361711394285202e-05, "loss": 0.5623, "step": 12320 }, { "epoch": 2.0112648463328027, "grad_norm": 1.9162113666534424, "learning_rate": 1.9361599833016387e-05, "loss": 0.673, "step": 12321 }, { "epoch": 2.011428104975307, "grad_norm": 1.6031928062438965, "learning_rate": 1.936148826232044e-05, "loss": 0.5496, "step": 12322 }, { "epoch": 2.0115913636178115, "grad_norm": 1.9766395092010498, "learning_rate": 1.9361376682197478e-05, "loss": 0.5971, "step": 12323 }, { "epoch": 2.011754622260316, "grad_norm": 1.4820067882537842, "learning_rate": 1.9361265092647615e-05, "loss": 0.6003, "step": 12324 }, { "epoch": 2.0119178809028204, "grad_norm": 1.613274335861206, "learning_rate": 1.936115349367096e-05, "loss": 0.5027, "step": 12325 }, { "epoch": 2.012081139545325, "grad_norm": 1.7153983116149902, "learning_rate": 1.9361041885267623e-05, "loss": 0.5366, "step": 12326 }, { "epoch": 2.012244398187829, "grad_norm": 1.82578706741333, "learning_rate": 1.936093026743772e-05, "loss": 0.6272, "step": 12327 }, { "epoch": 2.0124076568303333, "grad_norm": 1.8054364919662476, "learning_rate": 1.936081864018136e-05, "loss": 0.5157, "step": 12328 }, { "epoch": 2.0125709154728377, "grad_norm": 1.8185817003250122, "learning_rate": 1.936070700349866e-05, "loss": 0.5669, "step": 12329 }, { "epoch": 2.012734174115342, "grad_norm": 1.687887191772461, "learning_rate": 1.9360595357389735e-05, "loss": 0.6349, "step": 12330 }, { "epoch": 2.0128974327578466, "grad_norm": 2.1315226554870605, "learning_rate": 1.9360483701854687e-05, "loss": 0.7192, "step": 12331 }, { "epoch": 2.013060691400351, "grad_norm": 1.6780625581741333, "learning_rate": 1.936037203689364e-05, "loss": 0.5634, "step": 12332 }, { "epoch": 2.0132239500428555, "grad_norm": 1.7298437356948853, "learning_rate": 1.93602603625067e-05, "loss": 0.586, "step": 12333 }, { "epoch": 2.01338720868536, "grad_norm": 1.8218159675598145, "learning_rate": 1.9360148678693974e-05, "loss": 0.7182, "step": 12334 }, { "epoch": 2.0135504673278644, "grad_norm": 1.6506799459457397, "learning_rate": 1.9360036985455586e-05, "loss": 0.6337, "step": 12335 }, { "epoch": 2.0137137259703684, "grad_norm": 2.09773588180542, "learning_rate": 1.9359925282791642e-05, "loss": 0.5782, "step": 12336 }, { "epoch": 2.013876984612873, "grad_norm": 1.899759292602539, "learning_rate": 1.9359813570702254e-05, "loss": 0.6659, "step": 12337 }, { "epoch": 2.0140402432553772, "grad_norm": 2.1470119953155518, "learning_rate": 1.935970184918754e-05, "loss": 0.6836, "step": 12338 }, { "epoch": 2.0142035018978817, "grad_norm": 1.4537296295166016, "learning_rate": 1.9359590118247608e-05, "loss": 0.4847, "step": 12339 }, { "epoch": 2.014366760540386, "grad_norm": 2.1467232704162598, "learning_rate": 1.9359478377882567e-05, "loss": 0.6309, "step": 12340 }, { "epoch": 2.0145300191828905, "grad_norm": 1.8243601322174072, "learning_rate": 1.935936662809254e-05, "loss": 0.5927, "step": 12341 }, { "epoch": 2.014693277825395, "grad_norm": 1.7992668151855469, "learning_rate": 1.935925486887763e-05, "loss": 0.6132, "step": 12342 }, { "epoch": 2.0148565364678994, "grad_norm": 1.5309432744979858, "learning_rate": 1.935914310023795e-05, "loss": 0.4946, "step": 12343 }, { "epoch": 2.015019795110404, "grad_norm": 1.3196916580200195, "learning_rate": 1.935903132217362e-05, "loss": 0.484, "step": 12344 }, { "epoch": 2.015183053752908, "grad_norm": 1.5638799667358398, "learning_rate": 1.9358919534684748e-05, "loss": 0.5677, "step": 12345 }, { "epoch": 2.0153463123954123, "grad_norm": 1.9837442636489868, "learning_rate": 1.9358807737771444e-05, "loss": 0.5411, "step": 12346 }, { "epoch": 2.0155095710379167, "grad_norm": 1.7685649394989014, "learning_rate": 1.9358695931433825e-05, "loss": 0.5711, "step": 12347 }, { "epoch": 2.015672829680421, "grad_norm": 1.9924731254577637, "learning_rate": 1.9358584115672e-05, "loss": 0.561, "step": 12348 }, { "epoch": 2.0158360883229256, "grad_norm": 1.5001784563064575, "learning_rate": 1.9358472290486085e-05, "loss": 0.5066, "step": 12349 }, { "epoch": 2.01599934696543, "grad_norm": 1.7629691362380981, "learning_rate": 1.935836045587619e-05, "loss": 0.5885, "step": 12350 }, { "epoch": 2.0161626056079345, "grad_norm": 1.4926044940948486, "learning_rate": 1.9358248611842427e-05, "loss": 0.5029, "step": 12351 }, { "epoch": 2.016325864250439, "grad_norm": 1.8621244430541992, "learning_rate": 1.935813675838491e-05, "loss": 0.5142, "step": 12352 }, { "epoch": 2.0164891228929434, "grad_norm": 1.6747856140136719, "learning_rate": 1.9358024895503753e-05, "loss": 0.5897, "step": 12353 }, { "epoch": 2.0166523815354473, "grad_norm": 1.4368631839752197, "learning_rate": 1.9357913023199066e-05, "loss": 0.5021, "step": 12354 }, { "epoch": 2.016815640177952, "grad_norm": 2.0751402378082275, "learning_rate": 1.9357801141470967e-05, "loss": 0.604, "step": 12355 }, { "epoch": 2.0169788988204562, "grad_norm": 1.9507529735565186, "learning_rate": 1.9357689250319563e-05, "loss": 0.6411, "step": 12356 }, { "epoch": 2.0171421574629607, "grad_norm": 2.1072123050689697, "learning_rate": 1.935757734974497e-05, "loss": 0.6936, "step": 12357 }, { "epoch": 2.017305416105465, "grad_norm": 1.6697245836257935, "learning_rate": 1.9357465439747295e-05, "loss": 0.5252, "step": 12358 }, { "epoch": 2.0174686747479695, "grad_norm": 1.679677128791809, "learning_rate": 1.9357353520326658e-05, "loss": 0.5411, "step": 12359 }, { "epoch": 2.017631933390474, "grad_norm": 1.3646066188812256, "learning_rate": 1.9357241591483165e-05, "loss": 0.4612, "step": 12360 }, { "epoch": 2.0177951920329784, "grad_norm": 1.9299532175064087, "learning_rate": 1.9357129653216932e-05, "loss": 1.161, "step": 12361 }, { "epoch": 2.017958450675483, "grad_norm": 1.958855152130127, "learning_rate": 1.9357017705528075e-05, "loss": 0.8137, "step": 12362 }, { "epoch": 2.018121709317987, "grad_norm": 1.9061126708984375, "learning_rate": 1.9356905748416704e-05, "loss": 0.5766, "step": 12363 }, { "epoch": 2.0182849679604913, "grad_norm": 1.6292773485183716, "learning_rate": 1.935679378188293e-05, "loss": 0.5095, "step": 12364 }, { "epoch": 2.0184482266029957, "grad_norm": 1.476036787033081, "learning_rate": 1.9356681805926867e-05, "loss": 0.4676, "step": 12365 }, { "epoch": 2.0186114852455, "grad_norm": 1.4244312047958374, "learning_rate": 1.9356569820548628e-05, "loss": 0.5021, "step": 12366 }, { "epoch": 2.0187747438880046, "grad_norm": 1.6099629402160645, "learning_rate": 1.9356457825748326e-05, "loss": 0.5215, "step": 12367 }, { "epoch": 2.018938002530509, "grad_norm": 1.7246471643447876, "learning_rate": 1.9356345821526074e-05, "loss": 0.6251, "step": 12368 }, { "epoch": 2.0191012611730135, "grad_norm": 1.8226594924926758, "learning_rate": 1.9356233807881983e-05, "loss": 0.5616, "step": 12369 }, { "epoch": 2.019264519815518, "grad_norm": 1.8126475811004639, "learning_rate": 1.9356121784816165e-05, "loss": 0.6473, "step": 12370 }, { "epoch": 2.019427778458022, "grad_norm": 1.3992434740066528, "learning_rate": 1.935600975232874e-05, "loss": 0.5093, "step": 12371 }, { "epoch": 2.0195910371005263, "grad_norm": 1.6207711696624756, "learning_rate": 1.935589771041981e-05, "loss": 0.4469, "step": 12372 }, { "epoch": 2.019754295743031, "grad_norm": 1.5531736612319946, "learning_rate": 1.93557856590895e-05, "loss": 0.5149, "step": 12373 }, { "epoch": 2.019917554385535, "grad_norm": 1.960896372795105, "learning_rate": 1.9355673598337916e-05, "loss": 0.6126, "step": 12374 }, { "epoch": 2.0200808130280397, "grad_norm": 2.001063823699951, "learning_rate": 1.9355561528165166e-05, "loss": 0.6533, "step": 12375 }, { "epoch": 2.020244071670544, "grad_norm": 1.4003616571426392, "learning_rate": 1.935544944857137e-05, "loss": 0.4508, "step": 12376 }, { "epoch": 2.0204073303130485, "grad_norm": 2.0550425052642822, "learning_rate": 1.9355337359556642e-05, "loss": 0.6309, "step": 12377 }, { "epoch": 2.020570588955553, "grad_norm": 1.7690050601959229, "learning_rate": 1.9355225261121087e-05, "loss": 0.6835, "step": 12378 }, { "epoch": 2.0207338475980574, "grad_norm": 1.7926242351531982, "learning_rate": 1.9355113153264824e-05, "loss": 0.5439, "step": 12379 }, { "epoch": 2.0208971062405614, "grad_norm": 1.500222086906433, "learning_rate": 1.9355001035987966e-05, "loss": 0.5466, "step": 12380 }, { "epoch": 2.021060364883066, "grad_norm": 1.769160509109497, "learning_rate": 1.935488890929062e-05, "loss": 0.5374, "step": 12381 }, { "epoch": 2.0212236235255703, "grad_norm": 1.8263441324234009, "learning_rate": 1.935477677317291e-05, "loss": 0.532, "step": 12382 }, { "epoch": 2.0213868821680747, "grad_norm": 2.171236753463745, "learning_rate": 1.935466462763494e-05, "loss": 0.5571, "step": 12383 }, { "epoch": 2.021550140810579, "grad_norm": 1.4735336303710938, "learning_rate": 1.935455247267682e-05, "loss": 0.4806, "step": 12384 }, { "epoch": 2.0217133994530836, "grad_norm": 1.8918373584747314, "learning_rate": 1.9354440308298676e-05, "loss": 0.5282, "step": 12385 }, { "epoch": 2.021876658095588, "grad_norm": 1.8940712213516235, "learning_rate": 1.9354328134500608e-05, "loss": 1.325, "step": 12386 }, { "epoch": 2.0220399167380925, "grad_norm": 1.9090542793273926, "learning_rate": 1.9354215951282736e-05, "loss": 0.5491, "step": 12387 }, { "epoch": 2.022203175380597, "grad_norm": 1.726650357246399, "learning_rate": 1.935410375864517e-05, "loss": 0.5842, "step": 12388 }, { "epoch": 2.022366434023101, "grad_norm": 1.8594613075256348, "learning_rate": 1.9353991556588026e-05, "loss": 0.6461, "step": 12389 }, { "epoch": 2.0225296926656053, "grad_norm": 1.581127643585205, "learning_rate": 1.935387934511141e-05, "loss": 0.5789, "step": 12390 }, { "epoch": 2.0226929513081098, "grad_norm": 1.7179625034332275, "learning_rate": 1.9353767124215448e-05, "loss": 0.5809, "step": 12391 }, { "epoch": 2.022856209950614, "grad_norm": 1.5810734033584595, "learning_rate": 1.9353654893900237e-05, "loss": 0.5242, "step": 12392 }, { "epoch": 2.0230194685931187, "grad_norm": 1.9803438186645508, "learning_rate": 1.9353542654165905e-05, "loss": 0.6232, "step": 12393 }, { "epoch": 2.023182727235623, "grad_norm": 1.6825182437896729, "learning_rate": 1.9353430405012553e-05, "loss": 0.5652, "step": 12394 }, { "epoch": 2.0233459858781275, "grad_norm": 1.779036521911621, "learning_rate": 1.93533181464403e-05, "loss": 0.5515, "step": 12395 }, { "epoch": 2.023509244520632, "grad_norm": 1.5682705640792847, "learning_rate": 1.935320587844926e-05, "loss": 0.497, "step": 12396 }, { "epoch": 2.0236725031631364, "grad_norm": 1.9355415105819702, "learning_rate": 1.935309360103954e-05, "loss": 0.6478, "step": 12397 }, { "epoch": 2.0238357618056404, "grad_norm": 2.005685329437256, "learning_rate": 1.935298131421126e-05, "loss": 0.645, "step": 12398 }, { "epoch": 2.023999020448145, "grad_norm": 1.915358543395996, "learning_rate": 1.9352869017964533e-05, "loss": 0.6761, "step": 12399 }, { "epoch": 2.0241622790906493, "grad_norm": 1.7697430849075317, "learning_rate": 1.9352756712299467e-05, "loss": 0.5733, "step": 12400 }, { "epoch": 2.0243255377331537, "grad_norm": 1.916601538658142, "learning_rate": 1.9352644397216177e-05, "loss": 0.6181, "step": 12401 }, { "epoch": 2.024488796375658, "grad_norm": 1.694218635559082, "learning_rate": 1.935253207271478e-05, "loss": 0.5112, "step": 12402 }, { "epoch": 2.0246520550181626, "grad_norm": 1.7563910484313965, "learning_rate": 1.935241973879538e-05, "loss": 0.5661, "step": 12403 }, { "epoch": 2.024815313660667, "grad_norm": 1.9390674829483032, "learning_rate": 1.9352307395458097e-05, "loss": 0.6035, "step": 12404 }, { "epoch": 2.0249785723031715, "grad_norm": 1.9004501104354858, "learning_rate": 1.9352195042703045e-05, "loss": 0.5084, "step": 12405 }, { "epoch": 2.025141830945676, "grad_norm": 1.617708444595337, "learning_rate": 1.9352082680530334e-05, "loss": 0.6307, "step": 12406 }, { "epoch": 2.02530508958818, "grad_norm": 1.418436050415039, "learning_rate": 1.935197030894008e-05, "loss": 0.4534, "step": 12407 }, { "epoch": 2.0254683482306843, "grad_norm": 1.4722530841827393, "learning_rate": 1.9351857927932392e-05, "loss": 0.4057, "step": 12408 }, { "epoch": 2.0256316068731888, "grad_norm": 1.5121897459030151, "learning_rate": 1.9351745537507386e-05, "loss": 0.4033, "step": 12409 }, { "epoch": 2.025794865515693, "grad_norm": 1.6805142164230347, "learning_rate": 1.9351633137665175e-05, "loss": 0.516, "step": 12410 }, { "epoch": 2.0259581241581976, "grad_norm": 1.7234915494918823, "learning_rate": 1.9351520728405872e-05, "loss": 0.5185, "step": 12411 }, { "epoch": 2.026121382800702, "grad_norm": 1.5997729301452637, "learning_rate": 1.9351408309729592e-05, "loss": 0.5622, "step": 12412 }, { "epoch": 2.0262846414432065, "grad_norm": 2.1638271808624268, "learning_rate": 1.9351295881636444e-05, "loss": 0.5176, "step": 12413 }, { "epoch": 2.026447900085711, "grad_norm": 1.865201711654663, "learning_rate": 1.9351183444126542e-05, "loss": 0.6058, "step": 12414 }, { "epoch": 2.026611158728215, "grad_norm": 1.8605846166610718, "learning_rate": 1.9351070997200003e-05, "loss": 0.5994, "step": 12415 }, { "epoch": 2.0267744173707194, "grad_norm": 2.8188579082489014, "learning_rate": 1.935095854085694e-05, "loss": 0.5067, "step": 12416 }, { "epoch": 2.026937676013224, "grad_norm": 1.77742338180542, "learning_rate": 1.935084607509746e-05, "loss": 0.5478, "step": 12417 }, { "epoch": 2.0271009346557283, "grad_norm": 1.7860585451126099, "learning_rate": 1.9350733599921684e-05, "loss": 0.6017, "step": 12418 }, { "epoch": 2.0272641932982327, "grad_norm": 1.9925343990325928, "learning_rate": 1.935062111532972e-05, "loss": 0.6638, "step": 12419 }, { "epoch": 2.027427451940737, "grad_norm": 1.6430071592330933, "learning_rate": 1.9350508621321685e-05, "loss": 0.5226, "step": 12420 }, { "epoch": 2.0275907105832416, "grad_norm": 1.5705299377441406, "learning_rate": 1.935039611789769e-05, "loss": 0.4806, "step": 12421 }, { "epoch": 2.027753969225746, "grad_norm": 1.5908104181289673, "learning_rate": 1.9350283605057844e-05, "loss": 0.5549, "step": 12422 }, { "epoch": 2.0279172278682505, "grad_norm": 1.6370774507522583, "learning_rate": 1.935017108280227e-05, "loss": 0.5892, "step": 12423 }, { "epoch": 2.0280804865107545, "grad_norm": 1.6377973556518555, "learning_rate": 1.9350058551131072e-05, "loss": 0.5636, "step": 12424 }, { "epoch": 2.028243745153259, "grad_norm": 1.7274359464645386, "learning_rate": 1.9349946010044373e-05, "loss": 0.61, "step": 12425 }, { "epoch": 2.0284070037957633, "grad_norm": 1.643977403640747, "learning_rate": 1.9349833459542275e-05, "loss": 0.5339, "step": 12426 }, { "epoch": 2.0285702624382678, "grad_norm": 1.6858141422271729, "learning_rate": 1.93497208996249e-05, "loss": 0.5399, "step": 12427 }, { "epoch": 2.028733521080772, "grad_norm": 1.6895197629928589, "learning_rate": 1.9349608330292357e-05, "loss": 0.5947, "step": 12428 }, { "epoch": 2.0288967797232766, "grad_norm": 1.646095633506775, "learning_rate": 1.9349495751544763e-05, "loss": 0.6307, "step": 12429 }, { "epoch": 2.029060038365781, "grad_norm": 1.4837992191314697, "learning_rate": 1.934938316338223e-05, "loss": 0.4354, "step": 12430 }, { "epoch": 2.0292232970082855, "grad_norm": 1.675665020942688, "learning_rate": 1.934927056580487e-05, "loss": 0.5829, "step": 12431 }, { "epoch": 2.02938655565079, "grad_norm": 1.5921735763549805, "learning_rate": 1.9349157958812795e-05, "loss": 0.4982, "step": 12432 }, { "epoch": 2.029549814293294, "grad_norm": 1.9002126455307007, "learning_rate": 1.934904534240612e-05, "loss": 0.6137, "step": 12433 }, { "epoch": 2.0297130729357984, "grad_norm": 1.4445796012878418, "learning_rate": 1.9348932716584962e-05, "loss": 0.4812, "step": 12434 }, { "epoch": 2.029876331578303, "grad_norm": 1.7333933115005493, "learning_rate": 1.9348820081349432e-05, "loss": 0.6302, "step": 12435 }, { "epoch": 2.0300395902208073, "grad_norm": 1.585367202758789, "learning_rate": 1.9348707436699636e-05, "loss": 0.4908, "step": 12436 }, { "epoch": 2.0302028488633117, "grad_norm": 1.7089742422103882, "learning_rate": 1.9348594782635702e-05, "loss": 0.6236, "step": 12437 }, { "epoch": 2.030366107505816, "grad_norm": 1.989702820777893, "learning_rate": 1.934848211915773e-05, "loss": 0.6611, "step": 12438 }, { "epoch": 2.0305293661483206, "grad_norm": 1.8191251754760742, "learning_rate": 1.934836944626584e-05, "loss": 0.5858, "step": 12439 }, { "epoch": 2.030692624790825, "grad_norm": 1.9148839712142944, "learning_rate": 1.9348256763960146e-05, "loss": 0.5899, "step": 12440 }, { "epoch": 2.0308558834333295, "grad_norm": 1.5678642988204956, "learning_rate": 1.934814407224076e-05, "loss": 0.5242, "step": 12441 }, { "epoch": 2.0310191420758335, "grad_norm": 1.4896069765090942, "learning_rate": 1.9348031371107794e-05, "loss": 0.5328, "step": 12442 }, { "epoch": 2.031182400718338, "grad_norm": 1.612774133682251, "learning_rate": 1.9347918660561365e-05, "loss": 0.4938, "step": 12443 }, { "epoch": 2.0313456593608423, "grad_norm": 2.08781361579895, "learning_rate": 1.9347805940601582e-05, "loss": 0.5837, "step": 12444 }, { "epoch": 2.0315089180033468, "grad_norm": 1.7026292085647583, "learning_rate": 1.9347693211228562e-05, "loss": 0.586, "step": 12445 }, { "epoch": 2.031672176645851, "grad_norm": 1.6097503900527954, "learning_rate": 1.9347580472442414e-05, "loss": 0.5045, "step": 12446 }, { "epoch": 2.0318354352883556, "grad_norm": 1.8581963777542114, "learning_rate": 1.934746772424326e-05, "loss": 0.5824, "step": 12447 }, { "epoch": 2.03199869393086, "grad_norm": 1.5170527696609497, "learning_rate": 1.934735496663121e-05, "loss": 0.547, "step": 12448 }, { "epoch": 2.0321619525733645, "grad_norm": 1.7561402320861816, "learning_rate": 1.9347242199606372e-05, "loss": 0.5077, "step": 12449 }, { "epoch": 2.032325211215869, "grad_norm": 1.9444552659988403, "learning_rate": 1.934712942316886e-05, "loss": 0.7042, "step": 12450 }, { "epoch": 2.032488469858373, "grad_norm": 1.7026249170303345, "learning_rate": 1.9347016637318797e-05, "loss": 0.5485, "step": 12451 }, { "epoch": 2.0326517285008774, "grad_norm": 1.5949026346206665, "learning_rate": 1.934690384205629e-05, "loss": 0.44, "step": 12452 }, { "epoch": 2.032814987143382, "grad_norm": 2.2150423526763916, "learning_rate": 1.9346791037381452e-05, "loss": 0.641, "step": 12453 }, { "epoch": 2.0329782457858863, "grad_norm": 1.4780746698379517, "learning_rate": 1.93466782232944e-05, "loss": 0.5431, "step": 12454 }, { "epoch": 2.0331415044283907, "grad_norm": 2.0856263637542725, "learning_rate": 1.9346565399795244e-05, "loss": 0.5978, "step": 12455 }, { "epoch": 2.033304763070895, "grad_norm": 2.01977801322937, "learning_rate": 1.93464525668841e-05, "loss": 0.5665, "step": 12456 }, { "epoch": 2.0334680217133996, "grad_norm": 1.4879562854766846, "learning_rate": 1.9346339724561078e-05, "loss": 0.4708, "step": 12457 }, { "epoch": 2.033631280355904, "grad_norm": 1.9953008890151978, "learning_rate": 1.9346226872826295e-05, "loss": 0.6554, "step": 12458 }, { "epoch": 2.033794538998408, "grad_norm": 1.7277441024780273, "learning_rate": 1.9346114011679865e-05, "loss": 0.4859, "step": 12459 }, { "epoch": 2.0339577976409124, "grad_norm": 1.5084885358810425, "learning_rate": 1.9346001141121903e-05, "loss": 0.5421, "step": 12460 }, { "epoch": 2.034121056283417, "grad_norm": 1.8652210235595703, "learning_rate": 1.9345888261152517e-05, "loss": 0.5531, "step": 12461 }, { "epoch": 2.0342843149259213, "grad_norm": 1.6558841466903687, "learning_rate": 1.9345775371771826e-05, "loss": 0.5458, "step": 12462 }, { "epoch": 2.0344475735684258, "grad_norm": 2.0411274433135986, "learning_rate": 1.934566247297994e-05, "loss": 0.7397, "step": 12463 }, { "epoch": 2.03461083221093, "grad_norm": 2.1410293579101562, "learning_rate": 1.9345549564776975e-05, "loss": 0.6801, "step": 12464 }, { "epoch": 2.0347740908534346, "grad_norm": 1.7940441370010376, "learning_rate": 1.9345436647163046e-05, "loss": 0.5766, "step": 12465 }, { "epoch": 2.034937349495939, "grad_norm": 1.463208794593811, "learning_rate": 1.934532372013826e-05, "loss": 0.452, "step": 12466 }, { "epoch": 2.0351006081384435, "grad_norm": 2.064758062362671, "learning_rate": 1.934521078370274e-05, "loss": 0.6166, "step": 12467 }, { "epoch": 2.0352638667809475, "grad_norm": 1.3015520572662354, "learning_rate": 1.9345097837856596e-05, "loss": 0.4459, "step": 12468 }, { "epoch": 2.035427125423452, "grad_norm": 1.7860995531082153, "learning_rate": 1.9344984882599937e-05, "loss": 0.6001, "step": 12469 }, { "epoch": 2.0355903840659564, "grad_norm": 1.982786774635315, "learning_rate": 1.9344871917932884e-05, "loss": 0.6464, "step": 12470 }, { "epoch": 2.035753642708461, "grad_norm": 2.14611554145813, "learning_rate": 1.9344758943855545e-05, "loss": 0.5493, "step": 12471 }, { "epoch": 2.0359169013509653, "grad_norm": 1.7070122957229614, "learning_rate": 1.9344645960368037e-05, "loss": 0.4881, "step": 12472 }, { "epoch": 2.0360801599934697, "grad_norm": 1.6498547792434692, "learning_rate": 1.934453296747047e-05, "loss": 0.5667, "step": 12473 }, { "epoch": 2.036243418635974, "grad_norm": 1.8670216798782349, "learning_rate": 1.9344419965162967e-05, "loss": 0.6098, "step": 12474 }, { "epoch": 2.0364066772784786, "grad_norm": 1.786757469177246, "learning_rate": 1.9344306953445632e-05, "loss": 0.5724, "step": 12475 }, { "epoch": 2.036569935920983, "grad_norm": 2.0428555011749268, "learning_rate": 1.934419393231858e-05, "loss": 0.6509, "step": 12476 }, { "epoch": 2.036733194563487, "grad_norm": 1.5577988624572754, "learning_rate": 1.9344080901781933e-05, "loss": 0.5211, "step": 12477 }, { "epoch": 2.0368964532059914, "grad_norm": 1.6693497896194458, "learning_rate": 1.9343967861835796e-05, "loss": 0.6094, "step": 12478 }, { "epoch": 2.037059711848496, "grad_norm": 2.1763880252838135, "learning_rate": 1.9343854812480285e-05, "loss": 0.7541, "step": 12479 }, { "epoch": 2.0372229704910003, "grad_norm": 1.8230410814285278, "learning_rate": 1.934374175371552e-05, "loss": 0.5326, "step": 12480 }, { "epoch": 2.0373862291335048, "grad_norm": 1.7685227394104004, "learning_rate": 1.93436286855416e-05, "loss": 0.678, "step": 12481 }, { "epoch": 2.037549487776009, "grad_norm": 1.429332971572876, "learning_rate": 1.9343515607958653e-05, "loss": 0.5011, "step": 12482 }, { "epoch": 2.0377127464185136, "grad_norm": 1.814310073852539, "learning_rate": 1.9343402520966788e-05, "loss": 0.5916, "step": 12483 }, { "epoch": 2.037876005061018, "grad_norm": 1.6616448163986206, "learning_rate": 1.9343289424566122e-05, "loss": 0.6497, "step": 12484 }, { "epoch": 2.0380392637035225, "grad_norm": 1.8351829051971436, "learning_rate": 1.9343176318756766e-05, "loss": 0.5515, "step": 12485 }, { "epoch": 2.0382025223460265, "grad_norm": 1.3728736639022827, "learning_rate": 1.934306320353883e-05, "loss": 0.4055, "step": 12486 }, { "epoch": 2.038365780988531, "grad_norm": 1.5474668741226196, "learning_rate": 1.9342950078912436e-05, "loss": 0.5315, "step": 12487 }, { "epoch": 2.0385290396310354, "grad_norm": 1.3752912282943726, "learning_rate": 1.934283694487769e-05, "loss": 0.488, "step": 12488 }, { "epoch": 2.03869229827354, "grad_norm": 2.0109174251556396, "learning_rate": 1.934272380143471e-05, "loss": 0.5553, "step": 12489 }, { "epoch": 2.0388555569160443, "grad_norm": 1.4402562379837036, "learning_rate": 1.934261064858361e-05, "loss": 0.5427, "step": 12490 }, { "epoch": 2.0390188155585487, "grad_norm": 1.9702736139297485, "learning_rate": 1.9342497486324504e-05, "loss": 0.6181, "step": 12491 }, { "epoch": 2.039182074201053, "grad_norm": 1.692156434059143, "learning_rate": 1.9342384314657506e-05, "loss": 0.5389, "step": 12492 }, { "epoch": 2.0393453328435576, "grad_norm": 1.6527729034423828, "learning_rate": 1.934227113358273e-05, "loss": 0.5567, "step": 12493 }, { "epoch": 2.039508591486062, "grad_norm": 1.7281928062438965, "learning_rate": 1.934215794310029e-05, "loss": 0.5414, "step": 12494 }, { "epoch": 2.039671850128566, "grad_norm": 1.7402275800704956, "learning_rate": 1.9342044743210295e-05, "loss": 0.5785, "step": 12495 }, { "epoch": 2.0398351087710704, "grad_norm": 1.8919745683670044, "learning_rate": 1.934193153391287e-05, "loss": 0.5979, "step": 12496 }, { "epoch": 2.039998367413575, "grad_norm": 1.5552284717559814, "learning_rate": 1.9341818315208117e-05, "loss": 0.5182, "step": 12497 }, { "epoch": 2.0401616260560793, "grad_norm": 1.9799880981445312, "learning_rate": 1.9341705087096158e-05, "loss": 0.608, "step": 12498 }, { "epoch": 2.0403248846985838, "grad_norm": 1.6899117231369019, "learning_rate": 1.9341591849577102e-05, "loss": 0.5076, "step": 12499 }, { "epoch": 2.040488143341088, "grad_norm": 2.5051069259643555, "learning_rate": 1.9341478602651068e-05, "loss": 0.5333, "step": 12500 }, { "epoch": 2.0406514019835926, "grad_norm": 1.6199774742126465, "learning_rate": 1.9341365346318167e-05, "loss": 0.5833, "step": 12501 }, { "epoch": 2.040814660626097, "grad_norm": 1.7962948083877563, "learning_rate": 1.9341252080578515e-05, "loss": 0.6494, "step": 12502 }, { "epoch": 2.0409779192686015, "grad_norm": 1.9111764430999756, "learning_rate": 1.9341138805432223e-05, "loss": 0.5882, "step": 12503 }, { "epoch": 2.0411411779111055, "grad_norm": 2.0144999027252197, "learning_rate": 1.9341025520879406e-05, "loss": 0.6614, "step": 12504 }, { "epoch": 2.04130443655361, "grad_norm": 1.5839784145355225, "learning_rate": 1.934091222692018e-05, "loss": 0.5596, "step": 12505 }, { "epoch": 2.0414676951961144, "grad_norm": 1.8219949007034302, "learning_rate": 1.9340798923554657e-05, "loss": 0.621, "step": 12506 }, { "epoch": 2.041630953838619, "grad_norm": 1.5695778131484985, "learning_rate": 1.934068561078295e-05, "loss": 0.5569, "step": 12507 }, { "epoch": 2.0417942124811232, "grad_norm": 1.4816148281097412, "learning_rate": 1.9340572288605178e-05, "loss": 0.5715, "step": 12508 }, { "epoch": 2.0419574711236277, "grad_norm": 1.5732719898223877, "learning_rate": 1.9340458957021453e-05, "loss": 0.4809, "step": 12509 }, { "epoch": 2.042120729766132, "grad_norm": 2.169745445251465, "learning_rate": 1.9340345616031885e-05, "loss": 0.613, "step": 12510 }, { "epoch": 2.0422839884086366, "grad_norm": 1.6917805671691895, "learning_rate": 1.9340232265636596e-05, "loss": 0.6739, "step": 12511 }, { "epoch": 2.0424472470511406, "grad_norm": 1.7692861557006836, "learning_rate": 1.9340118905835693e-05, "loss": 0.5478, "step": 12512 }, { "epoch": 2.042610505693645, "grad_norm": 1.4979761838912964, "learning_rate": 1.9340005536629292e-05, "loss": 0.4662, "step": 12513 }, { "epoch": 2.0427737643361494, "grad_norm": 1.532591700553894, "learning_rate": 1.933989215801751e-05, "loss": 0.4955, "step": 12514 }, { "epoch": 2.042937022978654, "grad_norm": 1.9427249431610107, "learning_rate": 1.9339778770000458e-05, "loss": 0.5909, "step": 12515 }, { "epoch": 2.0431002816211583, "grad_norm": 1.8342094421386719, "learning_rate": 1.9339665372578248e-05, "loss": 0.8122, "step": 12516 }, { "epoch": 2.0432635402636627, "grad_norm": 1.7162177562713623, "learning_rate": 1.9339551965751e-05, "loss": 0.4843, "step": 12517 }, { "epoch": 2.043426798906167, "grad_norm": 1.5974608659744263, "learning_rate": 1.933943854951883e-05, "loss": 0.4792, "step": 12518 }, { "epoch": 2.0435900575486716, "grad_norm": 1.6007624864578247, "learning_rate": 1.933932512388184e-05, "loss": 0.5269, "step": 12519 }, { "epoch": 2.043753316191176, "grad_norm": 1.6031157970428467, "learning_rate": 1.933921168884016e-05, "loss": 0.616, "step": 12520 }, { "epoch": 2.04391657483368, "grad_norm": 1.3850005865097046, "learning_rate": 1.933909824439389e-05, "loss": 0.3902, "step": 12521 }, { "epoch": 2.0440798334761845, "grad_norm": 1.3859142065048218, "learning_rate": 1.9338984790543153e-05, "loss": 0.5398, "step": 12522 }, { "epoch": 2.044243092118689, "grad_norm": 1.790244460105896, "learning_rate": 1.9338871327288062e-05, "loss": 0.6564, "step": 12523 }, { "epoch": 2.0444063507611934, "grad_norm": 1.4430124759674072, "learning_rate": 1.9338757854628726e-05, "loss": 0.4684, "step": 12524 }, { "epoch": 2.044569609403698, "grad_norm": 1.8428969383239746, "learning_rate": 1.933864437256527e-05, "loss": 0.5077, "step": 12525 }, { "epoch": 2.0447328680462022, "grad_norm": 1.6809026002883911, "learning_rate": 1.9338530881097793e-05, "loss": 0.5707, "step": 12526 }, { "epoch": 2.0448961266887067, "grad_norm": 2.2282097339630127, "learning_rate": 1.9338417380226423e-05, "loss": 0.6724, "step": 12527 }, { "epoch": 2.045059385331211, "grad_norm": 1.7579717636108398, "learning_rate": 1.933830386995127e-05, "loss": 0.5445, "step": 12528 }, { "epoch": 2.0452226439737156, "grad_norm": 1.9988796710968018, "learning_rate": 1.9338190350272447e-05, "loss": 0.6335, "step": 12529 }, { "epoch": 2.0453859026162196, "grad_norm": 1.5773271322250366, "learning_rate": 1.9338076821190065e-05, "loss": 0.555, "step": 12530 }, { "epoch": 2.045549161258724, "grad_norm": 1.8481450080871582, "learning_rate": 1.9337963282704245e-05, "loss": 0.647, "step": 12531 }, { "epoch": 2.0457124199012284, "grad_norm": 1.876672387123108, "learning_rate": 1.93378497348151e-05, "loss": 0.6028, "step": 12532 }, { "epoch": 2.045875678543733, "grad_norm": 2.0604612827301025, "learning_rate": 1.9337736177522742e-05, "loss": 0.68, "step": 12533 }, { "epoch": 2.0460389371862373, "grad_norm": 1.5987591743469238, "learning_rate": 1.9337622610827286e-05, "loss": 0.5647, "step": 12534 }, { "epoch": 2.0462021958287417, "grad_norm": 2.2853779792785645, "learning_rate": 1.9337509034728846e-05, "loss": 0.6818, "step": 12535 }, { "epoch": 2.046365454471246, "grad_norm": 1.705328106880188, "learning_rate": 1.9337395449227534e-05, "loss": 0.6408, "step": 12536 }, { "epoch": 2.0465287131137506, "grad_norm": 1.797687292098999, "learning_rate": 1.933728185432347e-05, "loss": 0.5666, "step": 12537 }, { "epoch": 2.046691971756255, "grad_norm": 1.8294883966445923, "learning_rate": 1.9337168250016768e-05, "loss": 0.5539, "step": 12538 }, { "epoch": 2.046855230398759, "grad_norm": 1.8282926082611084, "learning_rate": 1.9337054636307537e-05, "loss": 0.5123, "step": 12539 }, { "epoch": 2.0470184890412635, "grad_norm": 2.043750286102295, "learning_rate": 1.9336941013195892e-05, "loss": 0.5285, "step": 12540 }, { "epoch": 2.047181747683768, "grad_norm": 1.9946180582046509, "learning_rate": 1.9336827380681958e-05, "loss": 0.5975, "step": 12541 }, { "epoch": 2.0473450063262724, "grad_norm": 1.500851035118103, "learning_rate": 1.9336713738765836e-05, "loss": 0.5388, "step": 12542 }, { "epoch": 2.047508264968777, "grad_norm": 2.602703332901001, "learning_rate": 1.9336600087447645e-05, "loss": 0.5361, "step": 12543 }, { "epoch": 2.0476715236112812, "grad_norm": 1.8026779890060425, "learning_rate": 1.93364864267275e-05, "loss": 0.6221, "step": 12544 }, { "epoch": 2.0478347822537857, "grad_norm": 1.512753963470459, "learning_rate": 1.933637275660552e-05, "loss": 0.4695, "step": 12545 }, { "epoch": 2.04799804089629, "grad_norm": 1.9472894668579102, "learning_rate": 1.933625907708181e-05, "loss": 0.5246, "step": 12546 }, { "epoch": 2.048161299538794, "grad_norm": 1.7606332302093506, "learning_rate": 1.933614538815649e-05, "loss": 0.6655, "step": 12547 }, { "epoch": 2.0483245581812985, "grad_norm": 1.8288816213607788, "learning_rate": 1.9336031689829677e-05, "loss": 0.5786, "step": 12548 }, { "epoch": 2.048487816823803, "grad_norm": 1.6529552936553955, "learning_rate": 1.933591798210148e-05, "loss": 0.6159, "step": 12549 }, { "epoch": 2.0486510754663074, "grad_norm": 1.7312804460525513, "learning_rate": 1.9335804264972018e-05, "loss": 0.5779, "step": 12550 }, { "epoch": 2.048814334108812, "grad_norm": 1.4990112781524658, "learning_rate": 1.9335690538441404e-05, "loss": 0.4872, "step": 12551 }, { "epoch": 2.0489775927513163, "grad_norm": 1.7711176872253418, "learning_rate": 1.933557680250975e-05, "loss": 0.5639, "step": 12552 }, { "epoch": 2.0491408513938207, "grad_norm": 2.063809394836426, "learning_rate": 1.9335463057177174e-05, "loss": 0.6599, "step": 12553 }, { "epoch": 2.049304110036325, "grad_norm": 2.1126773357391357, "learning_rate": 1.9335349302443788e-05, "loss": 0.7738, "step": 12554 }, { "epoch": 2.0494673686788296, "grad_norm": 1.9001386165618896, "learning_rate": 1.9335235538309712e-05, "loss": 0.6021, "step": 12555 }, { "epoch": 2.0496306273213336, "grad_norm": 1.6554162502288818, "learning_rate": 1.933512176477505e-05, "loss": 0.5893, "step": 12556 }, { "epoch": 2.049793885963838, "grad_norm": 2.0169413089752197, "learning_rate": 1.9335007981839928e-05, "loss": 0.603, "step": 12557 }, { "epoch": 2.0499571446063425, "grad_norm": 1.7343255281448364, "learning_rate": 1.9334894189504452e-05, "loss": 0.5544, "step": 12558 }, { "epoch": 2.050120403248847, "grad_norm": 2.0387775897979736, "learning_rate": 1.933478038776874e-05, "loss": 0.647, "step": 12559 }, { "epoch": 2.0502836618913514, "grad_norm": 1.725394368171692, "learning_rate": 1.933466657663291e-05, "loss": 0.5494, "step": 12560 }, { "epoch": 2.050446920533856, "grad_norm": 2.0786938667297363, "learning_rate": 1.933455275609707e-05, "loss": 0.664, "step": 12561 }, { "epoch": 2.0506101791763602, "grad_norm": 1.697468638420105, "learning_rate": 1.933443892616134e-05, "loss": 0.4705, "step": 12562 }, { "epoch": 2.0507734378188647, "grad_norm": 1.6789138317108154, "learning_rate": 1.933432508682583e-05, "loss": 0.5362, "step": 12563 }, { "epoch": 2.050936696461369, "grad_norm": 1.6684296131134033, "learning_rate": 1.933421123809066e-05, "loss": 0.6823, "step": 12564 }, { "epoch": 2.051099955103873, "grad_norm": 1.9454288482666016, "learning_rate": 1.9334097379955938e-05, "loss": 0.6874, "step": 12565 }, { "epoch": 2.0512632137463775, "grad_norm": 1.6074955463409424, "learning_rate": 1.9333983512421785e-05, "loss": 0.5413, "step": 12566 }, { "epoch": 2.051426472388882, "grad_norm": 1.3516676425933838, "learning_rate": 1.9333869635488315e-05, "loss": 0.4735, "step": 12567 }, { "epoch": 2.0515897310313864, "grad_norm": 1.6292965412139893, "learning_rate": 1.9333755749155635e-05, "loss": 0.5705, "step": 12568 }, { "epoch": 2.051752989673891, "grad_norm": 1.8955661058425903, "learning_rate": 1.933364185342387e-05, "loss": 0.6148, "step": 12569 }, { "epoch": 2.0519162483163953, "grad_norm": 1.6887623071670532, "learning_rate": 1.9333527948293128e-05, "loss": 0.5179, "step": 12570 }, { "epoch": 2.0520795069588997, "grad_norm": 1.8155368566513062, "learning_rate": 1.9333414033763528e-05, "loss": 0.5887, "step": 12571 }, { "epoch": 2.052242765601404, "grad_norm": 1.8958420753479004, "learning_rate": 1.9333300109835182e-05, "loss": 0.579, "step": 12572 }, { "epoch": 2.0524060242439086, "grad_norm": 1.960214376449585, "learning_rate": 1.9333186176508207e-05, "loss": 0.6479, "step": 12573 }, { "epoch": 2.0525692828864126, "grad_norm": 1.780833125114441, "learning_rate": 1.9333072233782713e-05, "loss": 0.4443, "step": 12574 }, { "epoch": 2.052732541528917, "grad_norm": 1.7124276161193848, "learning_rate": 1.9332958281658815e-05, "loss": 0.7264, "step": 12575 }, { "epoch": 2.0528958001714215, "grad_norm": 1.6373205184936523, "learning_rate": 1.9332844320136637e-05, "loss": 0.4935, "step": 12576 }, { "epoch": 2.053059058813926, "grad_norm": 1.945823073387146, "learning_rate": 1.9332730349216283e-05, "loss": 0.6495, "step": 12577 }, { "epoch": 2.0532223174564304, "grad_norm": 1.7909729480743408, "learning_rate": 1.9332616368897874e-05, "loss": 0.5061, "step": 12578 }, { "epoch": 2.053385576098935, "grad_norm": 1.9581807851791382, "learning_rate": 1.9332502379181523e-05, "loss": 0.4968, "step": 12579 }, { "epoch": 2.0535488347414392, "grad_norm": 1.9305682182312012, "learning_rate": 1.933238838006734e-05, "loss": 0.5987, "step": 12580 }, { "epoch": 2.0537120933839437, "grad_norm": 1.540681004524231, "learning_rate": 1.933227437155545e-05, "loss": 0.4776, "step": 12581 }, { "epoch": 2.053875352026448, "grad_norm": 1.877291202545166, "learning_rate": 1.933216035364596e-05, "loss": 0.5797, "step": 12582 }, { "epoch": 2.054038610668952, "grad_norm": 1.8473215103149414, "learning_rate": 1.9332046326338985e-05, "loss": 0.6114, "step": 12583 }, { "epoch": 2.0542018693114565, "grad_norm": 1.7840884923934937, "learning_rate": 1.9331932289634644e-05, "loss": 0.658, "step": 12584 }, { "epoch": 2.054365127953961, "grad_norm": 1.6521497964859009, "learning_rate": 1.933181824353305e-05, "loss": 0.5061, "step": 12585 }, { "epoch": 2.0545283865964654, "grad_norm": 1.8632780313491821, "learning_rate": 1.933170418803432e-05, "loss": 0.6222, "step": 12586 }, { "epoch": 2.05469164523897, "grad_norm": 1.6111079454421997, "learning_rate": 1.9331590123138562e-05, "loss": 0.435, "step": 12587 }, { "epoch": 2.0548549038814743, "grad_norm": 1.8012115955352783, "learning_rate": 1.9331476048845897e-05, "loss": 0.5256, "step": 12588 }, { "epoch": 2.0550181625239787, "grad_norm": 1.7738654613494873, "learning_rate": 1.9331361965156438e-05, "loss": 0.5197, "step": 12589 }, { "epoch": 2.055181421166483, "grad_norm": 1.762214183807373, "learning_rate": 1.93312478720703e-05, "loss": 0.5451, "step": 12590 }, { "epoch": 2.0553446798089876, "grad_norm": 1.6847076416015625, "learning_rate": 1.93311337695876e-05, "loss": 0.4882, "step": 12591 }, { "epoch": 2.0555079384514916, "grad_norm": 1.8761510848999023, "learning_rate": 1.9331019657708446e-05, "loss": 0.5782, "step": 12592 }, { "epoch": 2.055671197093996, "grad_norm": 1.5720994472503662, "learning_rate": 1.933090553643296e-05, "loss": 0.5495, "step": 12593 }, { "epoch": 2.0558344557365005, "grad_norm": 1.8600852489471436, "learning_rate": 1.9330791405761254e-05, "loss": 0.5643, "step": 12594 }, { "epoch": 2.055997714379005, "grad_norm": 1.7638306617736816, "learning_rate": 1.9330677265693444e-05, "loss": 0.6003, "step": 12595 }, { "epoch": 2.0561609730215094, "grad_norm": 1.5942293405532837, "learning_rate": 1.9330563116229647e-05, "loss": 0.5426, "step": 12596 }, { "epoch": 2.056324231664014, "grad_norm": 1.6974012851715088, "learning_rate": 1.933044895736997e-05, "loss": 0.5384, "step": 12597 }, { "epoch": 2.0564874903065182, "grad_norm": 1.6993228197097778, "learning_rate": 1.933033478911454e-05, "loss": 0.5113, "step": 12598 }, { "epoch": 2.0566507489490227, "grad_norm": 1.7888575792312622, "learning_rate": 1.933022061146346e-05, "loss": 0.7109, "step": 12599 }, { "epoch": 2.0568140075915267, "grad_norm": 1.951226830482483, "learning_rate": 1.9330106424416852e-05, "loss": 0.6587, "step": 12600 }, { "epoch": 2.056977266234031, "grad_norm": 1.4595797061920166, "learning_rate": 1.932999222797483e-05, "loss": 0.4788, "step": 12601 }, { "epoch": 2.0571405248765355, "grad_norm": 1.8984992504119873, "learning_rate": 1.9329878022137507e-05, "loss": 0.6361, "step": 12602 }, { "epoch": 2.05730378351904, "grad_norm": 1.605939269065857, "learning_rate": 1.9329763806905e-05, "loss": 0.5431, "step": 12603 }, { "epoch": 2.0574670421615444, "grad_norm": 1.856075644493103, "learning_rate": 1.9329649582277424e-05, "loss": 0.4899, "step": 12604 }, { "epoch": 2.057630300804049, "grad_norm": 2.3160364627838135, "learning_rate": 1.9329535348254893e-05, "loss": 0.6828, "step": 12605 }, { "epoch": 2.0577935594465533, "grad_norm": 1.8986384868621826, "learning_rate": 1.932942110483752e-05, "loss": 0.5909, "step": 12606 }, { "epoch": 2.0579568180890577, "grad_norm": 2.1761670112609863, "learning_rate": 1.932930685202543e-05, "loss": 0.7399, "step": 12607 }, { "epoch": 2.058120076731562, "grad_norm": 1.7712815999984741, "learning_rate": 1.932919258981872e-05, "loss": 0.6028, "step": 12608 }, { "epoch": 2.058283335374066, "grad_norm": 1.920531988143921, "learning_rate": 1.9329078318217523e-05, "loss": 0.5751, "step": 12609 }, { "epoch": 2.0584465940165706, "grad_norm": 1.7769887447357178, "learning_rate": 1.9328964037221944e-05, "loss": 0.5855, "step": 12610 }, { "epoch": 2.058609852659075, "grad_norm": 1.8933099508285522, "learning_rate": 1.9328849746832098e-05, "loss": 0.5895, "step": 12611 }, { "epoch": 2.0587731113015795, "grad_norm": 1.7800958156585693, "learning_rate": 1.9328735447048105e-05, "loss": 0.5172, "step": 12612 }, { "epoch": 2.058936369944084, "grad_norm": 1.8484512567520142, "learning_rate": 1.932862113787008e-05, "loss": 0.5871, "step": 12613 }, { "epoch": 2.0590996285865883, "grad_norm": 2.0675179958343506, "learning_rate": 1.9328506819298134e-05, "loss": 0.573, "step": 12614 }, { "epoch": 2.059262887229093, "grad_norm": 1.616305947303772, "learning_rate": 1.9328392491332385e-05, "loss": 0.5152, "step": 12615 }, { "epoch": 2.0594261458715972, "grad_norm": 1.860044240951538, "learning_rate": 1.9328278153972947e-05, "loss": 0.6316, "step": 12616 }, { "epoch": 2.0595894045141017, "grad_norm": 1.9057366847991943, "learning_rate": 1.9328163807219937e-05, "loss": 0.542, "step": 12617 }, { "epoch": 2.0597526631566057, "grad_norm": 1.7970547676086426, "learning_rate": 1.9328049451073467e-05, "loss": 0.6663, "step": 12618 }, { "epoch": 2.05991592179911, "grad_norm": 2.053895950317383, "learning_rate": 1.9327935085533652e-05, "loss": 0.4785, "step": 12619 }, { "epoch": 2.0600791804416145, "grad_norm": 1.5729748010635376, "learning_rate": 1.932782071060061e-05, "loss": 0.4702, "step": 12620 }, { "epoch": 2.060242439084119, "grad_norm": 1.899421215057373, "learning_rate": 1.9327706326274453e-05, "loss": 0.5488, "step": 12621 }, { "epoch": 2.0604056977266234, "grad_norm": 1.6139971017837524, "learning_rate": 1.9327591932555302e-05, "loss": 0.5064, "step": 12622 }, { "epoch": 2.060568956369128, "grad_norm": 2.0870306491851807, "learning_rate": 1.9327477529443264e-05, "loss": 0.5855, "step": 12623 }, { "epoch": 2.0607322150116323, "grad_norm": 1.9327545166015625, "learning_rate": 1.9327363116938464e-05, "loss": 0.5225, "step": 12624 }, { "epoch": 2.0608954736541367, "grad_norm": 1.848079800605774, "learning_rate": 1.932724869504101e-05, "loss": 0.6117, "step": 12625 }, { "epoch": 2.061058732296641, "grad_norm": 1.5856224298477173, "learning_rate": 1.9327134263751016e-05, "loss": 0.5516, "step": 12626 }, { "epoch": 2.061221990939145, "grad_norm": 1.5233838558197021, "learning_rate": 1.9327019823068605e-05, "loss": 0.4796, "step": 12627 }, { "epoch": 2.0613852495816496, "grad_norm": 1.8427157402038574, "learning_rate": 1.9326905372993886e-05, "loss": 0.6382, "step": 12628 }, { "epoch": 2.061548508224154, "grad_norm": 1.8353664875030518, "learning_rate": 1.9326790913526974e-05, "loss": 0.5899, "step": 12629 }, { "epoch": 2.0617117668666585, "grad_norm": 1.675552487373352, "learning_rate": 1.9326676444667988e-05, "loss": 0.5601, "step": 12630 }, { "epoch": 2.061875025509163, "grad_norm": 1.7763067483901978, "learning_rate": 1.932656196641704e-05, "loss": 0.5633, "step": 12631 }, { "epoch": 2.0620382841516673, "grad_norm": 2.267127513885498, "learning_rate": 1.9326447478774244e-05, "loss": 1.1416, "step": 12632 }, { "epoch": 2.062201542794172, "grad_norm": 1.8018039464950562, "learning_rate": 1.9326332981739723e-05, "loss": 0.5626, "step": 12633 }, { "epoch": 2.062364801436676, "grad_norm": 1.7337849140167236, "learning_rate": 1.9326218475313583e-05, "loss": 0.5809, "step": 12634 }, { "epoch": 2.06252806007918, "grad_norm": 1.8127861022949219, "learning_rate": 1.9326103959495947e-05, "loss": 0.588, "step": 12635 }, { "epoch": 2.0626913187216847, "grad_norm": 1.949481725692749, "learning_rate": 1.9325989434286922e-05, "loss": 0.5974, "step": 12636 }, { "epoch": 2.062854577364189, "grad_norm": 1.8584425449371338, "learning_rate": 1.9325874899686632e-05, "loss": 0.6104, "step": 12637 }, { "epoch": 2.0630178360066935, "grad_norm": 1.9630550146102905, "learning_rate": 1.932576035569519e-05, "loss": 0.5538, "step": 12638 }, { "epoch": 2.063181094649198, "grad_norm": 1.6239497661590576, "learning_rate": 1.9325645802312704e-05, "loss": 0.587, "step": 12639 }, { "epoch": 2.0633443532917024, "grad_norm": 2.3025307655334473, "learning_rate": 1.9325531239539303e-05, "loss": 0.6148, "step": 12640 }, { "epoch": 2.063507611934207, "grad_norm": 1.8608815670013428, "learning_rate": 1.9325416667375087e-05, "loss": 0.6554, "step": 12641 }, { "epoch": 2.0636708705767113, "grad_norm": 1.7780479192733765, "learning_rate": 1.9325302085820184e-05, "loss": 0.667, "step": 12642 }, { "epoch": 2.0638341292192157, "grad_norm": 2.0076966285705566, "learning_rate": 1.93251874948747e-05, "loss": 0.5612, "step": 12643 }, { "epoch": 2.06399738786172, "grad_norm": 1.4718210697174072, "learning_rate": 1.932507289453876e-05, "loss": 0.5589, "step": 12644 }, { "epoch": 2.064160646504224, "grad_norm": 1.916379451751709, "learning_rate": 1.9324958284812468e-05, "loss": 0.5264, "step": 12645 }, { "epoch": 2.0643239051467286, "grad_norm": 1.9040310382843018, "learning_rate": 1.932484366569595e-05, "loss": 0.5728, "step": 12646 }, { "epoch": 2.064487163789233, "grad_norm": 1.8359315395355225, "learning_rate": 1.9324729037189314e-05, "loss": 0.542, "step": 12647 }, { "epoch": 2.0646504224317375, "grad_norm": 1.491902470588684, "learning_rate": 1.932461439929268e-05, "loss": 0.5532, "step": 12648 }, { "epoch": 2.064813681074242, "grad_norm": 1.7316187620162964, "learning_rate": 1.932449975200616e-05, "loss": 0.5702, "step": 12649 }, { "epoch": 2.0649769397167463, "grad_norm": 1.763700246810913, "learning_rate": 1.9324385095329875e-05, "loss": 0.5254, "step": 12650 }, { "epoch": 2.065140198359251, "grad_norm": 2.0634474754333496, "learning_rate": 1.9324270429263933e-05, "loss": 0.5955, "step": 12651 }, { "epoch": 2.065303457001755, "grad_norm": 1.8930258750915527, "learning_rate": 1.9324155753808454e-05, "loss": 0.5506, "step": 12652 }, { "epoch": 2.065466715644259, "grad_norm": 1.843095302581787, "learning_rate": 1.9324041068963554e-05, "loss": 0.7134, "step": 12653 }, { "epoch": 2.0656299742867636, "grad_norm": 1.580033302307129, "learning_rate": 1.932392637472935e-05, "loss": 0.5362, "step": 12654 }, { "epoch": 2.065793232929268, "grad_norm": 1.6660610437393188, "learning_rate": 1.932381167110595e-05, "loss": 0.5189, "step": 12655 }, { "epoch": 2.0659564915717725, "grad_norm": 1.7034409046173096, "learning_rate": 1.9323696958093473e-05, "loss": 0.6897, "step": 12656 }, { "epoch": 2.066119750214277, "grad_norm": 1.7669264078140259, "learning_rate": 1.9323582235692037e-05, "loss": 0.5185, "step": 12657 }, { "epoch": 2.0662830088567814, "grad_norm": 1.8125616312026978, "learning_rate": 1.9323467503901756e-05, "loss": 0.6183, "step": 12658 }, { "epoch": 2.066446267499286, "grad_norm": 1.959320068359375, "learning_rate": 1.9323352762722748e-05, "loss": 0.6294, "step": 12659 }, { "epoch": 2.0666095261417903, "grad_norm": 1.6433420181274414, "learning_rate": 1.9323238012155125e-05, "loss": 0.5146, "step": 12660 }, { "epoch": 2.0667727847842947, "grad_norm": 1.512661337852478, "learning_rate": 1.9323123252199003e-05, "loss": 0.5128, "step": 12661 }, { "epoch": 2.0669360434267987, "grad_norm": 1.5553524494171143, "learning_rate": 1.9323008482854496e-05, "loss": 0.4267, "step": 12662 }, { "epoch": 2.067099302069303, "grad_norm": 1.6307882070541382, "learning_rate": 1.9322893704121726e-05, "loss": 0.5761, "step": 12663 }, { "epoch": 2.0672625607118076, "grad_norm": 1.757677674293518, "learning_rate": 1.9322778916000803e-05, "loss": 0.5991, "step": 12664 }, { "epoch": 2.067425819354312, "grad_norm": 1.939069390296936, "learning_rate": 1.9322664118491844e-05, "loss": 0.5635, "step": 12665 }, { "epoch": 2.0675890779968165, "grad_norm": 1.7910054922103882, "learning_rate": 1.932254931159497e-05, "loss": 0.5479, "step": 12666 }, { "epoch": 2.067752336639321, "grad_norm": 1.8593946695327759, "learning_rate": 1.932243449531028e-05, "loss": 0.5603, "step": 12667 }, { "epoch": 2.0679155952818253, "grad_norm": 1.5384607315063477, "learning_rate": 1.932231966963791e-05, "loss": 0.4445, "step": 12668 }, { "epoch": 2.0680788539243298, "grad_norm": 1.777850866317749, "learning_rate": 1.9322204834577965e-05, "loss": 0.611, "step": 12669 }, { "epoch": 2.068242112566834, "grad_norm": 1.8123424053192139, "learning_rate": 1.932208999013056e-05, "loss": 0.5212, "step": 12670 }, { "epoch": 2.068405371209338, "grad_norm": 1.6231980323791504, "learning_rate": 1.9321975136295815e-05, "loss": 0.5443, "step": 12671 }, { "epoch": 2.0685686298518426, "grad_norm": 1.8646429777145386, "learning_rate": 1.9321860273073843e-05, "loss": 0.5765, "step": 12672 }, { "epoch": 2.068731888494347, "grad_norm": 1.7683428525924683, "learning_rate": 1.932174540046476e-05, "loss": 0.6043, "step": 12673 }, { "epoch": 2.0688951471368515, "grad_norm": 1.8449351787567139, "learning_rate": 1.932163051846868e-05, "loss": 0.6074, "step": 12674 }, { "epoch": 2.069058405779356, "grad_norm": 2.0063798427581787, "learning_rate": 1.932151562708572e-05, "loss": 0.6125, "step": 12675 }, { "epoch": 2.0692216644218604, "grad_norm": 1.6139687299728394, "learning_rate": 1.9321400726316e-05, "loss": 0.4728, "step": 12676 }, { "epoch": 2.069384923064365, "grad_norm": 1.8459407091140747, "learning_rate": 1.9321285816159633e-05, "loss": 0.5456, "step": 12677 }, { "epoch": 2.0695481817068693, "grad_norm": 1.8801714181900024, "learning_rate": 1.9321170896616726e-05, "loss": 0.7617, "step": 12678 }, { "epoch": 2.0697114403493737, "grad_norm": 1.7647229433059692, "learning_rate": 1.932105596768741e-05, "loss": 0.6596, "step": 12679 }, { "epoch": 2.0698746989918777, "grad_norm": 1.5715336799621582, "learning_rate": 1.932094102937179e-05, "loss": 0.4616, "step": 12680 }, { "epoch": 2.070037957634382, "grad_norm": 1.7332555055618286, "learning_rate": 1.9320826081669986e-05, "loss": 0.6306, "step": 12681 }, { "epoch": 2.0702012162768866, "grad_norm": 1.8285822868347168, "learning_rate": 1.932071112458211e-05, "loss": 0.5698, "step": 12682 }, { "epoch": 2.070364474919391, "grad_norm": 1.7740310430526733, "learning_rate": 1.9320596158108283e-05, "loss": 0.5412, "step": 12683 }, { "epoch": 2.0705277335618955, "grad_norm": 1.881882905960083, "learning_rate": 1.932048118224862e-05, "loss": 0.656, "step": 12684 }, { "epoch": 2.0706909922044, "grad_norm": 1.8201229572296143, "learning_rate": 1.9320366197003227e-05, "loss": 0.6844, "step": 12685 }, { "epoch": 2.0708542508469043, "grad_norm": 1.67357337474823, "learning_rate": 1.932025120237224e-05, "loss": 0.5128, "step": 12686 }, { "epoch": 2.0710175094894088, "grad_norm": 1.7050341367721558, "learning_rate": 1.9320136198355753e-05, "loss": 0.5759, "step": 12687 }, { "epoch": 2.0711807681319128, "grad_norm": 1.924009084701538, "learning_rate": 1.932002118495389e-05, "loss": 0.6153, "step": 12688 }, { "epoch": 2.071344026774417, "grad_norm": 1.8531798124313354, "learning_rate": 1.9319906162166776e-05, "loss": 0.6291, "step": 12689 }, { "epoch": 2.0715072854169216, "grad_norm": 1.772476077079773, "learning_rate": 1.9319791129994515e-05, "loss": 0.5986, "step": 12690 }, { "epoch": 2.071670544059426, "grad_norm": 1.6120312213897705, "learning_rate": 1.9319676088437224e-05, "loss": 0.538, "step": 12691 }, { "epoch": 2.0718338027019305, "grad_norm": 1.7034331560134888, "learning_rate": 1.9319561037495025e-05, "loss": 0.593, "step": 12692 }, { "epoch": 2.071997061344435, "grad_norm": 1.7978113889694214, "learning_rate": 1.931944597716803e-05, "loss": 0.6591, "step": 12693 }, { "epoch": 2.0721603199869394, "grad_norm": 1.4312381744384766, "learning_rate": 1.9319330907456356e-05, "loss": 0.5417, "step": 12694 }, { "epoch": 2.072323578629444, "grad_norm": 1.745811104774475, "learning_rate": 1.9319215828360117e-05, "loss": 0.542, "step": 12695 }, { "epoch": 2.0724868372719483, "grad_norm": 1.7988154888153076, "learning_rate": 1.931910073987943e-05, "loss": 0.5179, "step": 12696 }, { "epoch": 2.0726500959144523, "grad_norm": 2.2833430767059326, "learning_rate": 1.931898564201441e-05, "loss": 0.5247, "step": 12697 }, { "epoch": 2.0728133545569567, "grad_norm": 1.4173334836959839, "learning_rate": 1.9318870534765178e-05, "loss": 0.4447, "step": 12698 }, { "epoch": 2.072976613199461, "grad_norm": 1.5551263093948364, "learning_rate": 1.9318755418131844e-05, "loss": 0.4973, "step": 12699 }, { "epoch": 2.0731398718419656, "grad_norm": 1.6092066764831543, "learning_rate": 1.9318640292114526e-05, "loss": 0.565, "step": 12700 }, { "epoch": 2.07330313048447, "grad_norm": 1.602735161781311, "learning_rate": 1.931852515671334e-05, "loss": 0.5195, "step": 12701 }, { "epoch": 2.0734663891269745, "grad_norm": 1.7686277627944946, "learning_rate": 1.9318410011928398e-05, "loss": 0.5639, "step": 12702 }, { "epoch": 2.073629647769479, "grad_norm": 1.7777825593948364, "learning_rate": 1.931829485775982e-05, "loss": 0.547, "step": 12703 }, { "epoch": 2.0737929064119833, "grad_norm": 2.0533862113952637, "learning_rate": 1.9318179694207726e-05, "loss": 0.6574, "step": 12704 }, { "epoch": 2.0739561650544878, "grad_norm": 1.7774242162704468, "learning_rate": 1.9318064521272223e-05, "loss": 0.6212, "step": 12705 }, { "epoch": 2.0741194236969918, "grad_norm": 1.6633682250976562, "learning_rate": 1.9317949338953435e-05, "loss": 0.5105, "step": 12706 }, { "epoch": 2.074282682339496, "grad_norm": 1.607882022857666, "learning_rate": 1.9317834147251477e-05, "loss": 0.5881, "step": 12707 }, { "epoch": 2.0744459409820006, "grad_norm": 2.4702701568603516, "learning_rate": 1.9317718946166457e-05, "loss": 1.0495, "step": 12708 }, { "epoch": 2.074609199624505, "grad_norm": 2.163066864013672, "learning_rate": 1.9317603735698497e-05, "loss": 0.7426, "step": 12709 }, { "epoch": 2.0747724582670095, "grad_norm": 1.581940770149231, "learning_rate": 1.9317488515847717e-05, "loss": 0.4888, "step": 12710 }, { "epoch": 2.074935716909514, "grad_norm": 2.1653759479522705, "learning_rate": 1.9317373286614223e-05, "loss": 0.6081, "step": 12711 }, { "epoch": 2.0750989755520184, "grad_norm": 1.4381012916564941, "learning_rate": 1.931725804799814e-05, "loss": 0.4, "step": 12712 }, { "epoch": 2.075262234194523, "grad_norm": 1.4134166240692139, "learning_rate": 1.9317142799999576e-05, "loss": 0.4162, "step": 12713 }, { "epoch": 2.0754254928370273, "grad_norm": 1.5760836601257324, "learning_rate": 1.9317027542618656e-05, "loss": 0.5043, "step": 12714 }, { "epoch": 2.0755887514795313, "grad_norm": 1.5688426494598389, "learning_rate": 1.931691227585549e-05, "loss": 0.5073, "step": 12715 }, { "epoch": 2.0757520101220357, "grad_norm": 1.4302319288253784, "learning_rate": 1.93167969997102e-05, "loss": 0.4648, "step": 12716 }, { "epoch": 2.07591526876454, "grad_norm": 1.7129186391830444, "learning_rate": 1.9316681714182893e-05, "loss": 0.5514, "step": 12717 }, { "epoch": 2.0760785274070446, "grad_norm": 2.374008893966675, "learning_rate": 1.931656641927369e-05, "loss": 0.6021, "step": 12718 }, { "epoch": 2.076241786049549, "grad_norm": 1.6718158721923828, "learning_rate": 1.931645111498271e-05, "loss": 0.5376, "step": 12719 }, { "epoch": 2.0764050446920534, "grad_norm": 1.8470888137817383, "learning_rate": 1.9316335801310064e-05, "loss": 0.5037, "step": 12720 }, { "epoch": 2.076568303334558, "grad_norm": 1.7440561056137085, "learning_rate": 1.931622047825587e-05, "loss": 0.5293, "step": 12721 }, { "epoch": 2.0767315619770623, "grad_norm": 1.5683214664459229, "learning_rate": 1.931610514582025e-05, "loss": 0.5407, "step": 12722 }, { "epoch": 2.0768948206195668, "grad_norm": 1.9340459108352661, "learning_rate": 1.9315989804003307e-05, "loss": 0.5647, "step": 12723 }, { "epoch": 2.0770580792620708, "grad_norm": 1.668990135192871, "learning_rate": 1.9315874452805167e-05, "loss": 0.5884, "step": 12724 }, { "epoch": 2.077221337904575, "grad_norm": 1.6062568426132202, "learning_rate": 1.9315759092225947e-05, "loss": 0.5312, "step": 12725 }, { "epoch": 2.0773845965470796, "grad_norm": 1.3526465892791748, "learning_rate": 1.931564372226576e-05, "loss": 0.4637, "step": 12726 }, { "epoch": 2.077547855189584, "grad_norm": 1.8592619895935059, "learning_rate": 1.9315528342924716e-05, "loss": 0.6208, "step": 12727 }, { "epoch": 2.0777111138320885, "grad_norm": 1.641052007675171, "learning_rate": 1.9315412954202945e-05, "loss": 0.5761, "step": 12728 }, { "epoch": 2.077874372474593, "grad_norm": 1.8448584079742432, "learning_rate": 1.9315297556100548e-05, "loss": 0.5195, "step": 12729 }, { "epoch": 2.0780376311170974, "grad_norm": 1.907055139541626, "learning_rate": 1.9315182148617655e-05, "loss": 0.5774, "step": 12730 }, { "epoch": 2.078200889759602, "grad_norm": 1.950094223022461, "learning_rate": 1.9315066731754373e-05, "loss": 0.5712, "step": 12731 }, { "epoch": 2.0783641484021063, "grad_norm": 1.4499282836914062, "learning_rate": 1.931495130551082e-05, "loss": 0.4431, "step": 12732 }, { "epoch": 2.0785274070446103, "grad_norm": 2.6697895526885986, "learning_rate": 1.931483586988712e-05, "loss": 0.5708, "step": 12733 }, { "epoch": 2.0786906656871147, "grad_norm": 2.078718662261963, "learning_rate": 1.9314720424883376e-05, "loss": 0.7159, "step": 12734 }, { "epoch": 2.078853924329619, "grad_norm": 1.626994013786316, "learning_rate": 1.9314604970499716e-05, "loss": 0.46, "step": 12735 }, { "epoch": 2.0790171829721236, "grad_norm": 1.812000036239624, "learning_rate": 1.9314489506736247e-05, "loss": 0.579, "step": 12736 }, { "epoch": 2.079180441614628, "grad_norm": 1.9553996324539185, "learning_rate": 1.931437403359309e-05, "loss": 0.6228, "step": 12737 }, { "epoch": 2.0793437002571324, "grad_norm": 1.7759252786636353, "learning_rate": 1.9314258551070363e-05, "loss": 0.5596, "step": 12738 }, { "epoch": 2.079506958899637, "grad_norm": 2.067488431930542, "learning_rate": 1.931414305916818e-05, "loss": 0.5906, "step": 12739 }, { "epoch": 2.0796702175421413, "grad_norm": 1.7963440418243408, "learning_rate": 1.9314027557886655e-05, "loss": 0.6192, "step": 12740 }, { "epoch": 2.0798334761846453, "grad_norm": 1.6041064262390137, "learning_rate": 1.931391204722591e-05, "loss": 0.5709, "step": 12741 }, { "epoch": 2.0799967348271498, "grad_norm": 1.804365634918213, "learning_rate": 1.9313796527186056e-05, "loss": 0.5542, "step": 12742 }, { "epoch": 2.080159993469654, "grad_norm": 1.6745089292526245, "learning_rate": 1.9313680997767213e-05, "loss": 0.5464, "step": 12743 }, { "epoch": 2.0803232521121586, "grad_norm": 1.7077293395996094, "learning_rate": 1.9313565458969493e-05, "loss": 0.6145, "step": 12744 }, { "epoch": 2.080486510754663, "grad_norm": 1.7200086116790771, "learning_rate": 1.9313449910793015e-05, "loss": 0.5089, "step": 12745 }, { "epoch": 2.0806497693971675, "grad_norm": 2.014143705368042, "learning_rate": 1.93133343532379e-05, "loss": 0.6371, "step": 12746 }, { "epoch": 2.080813028039672, "grad_norm": 1.9303611516952515, "learning_rate": 1.9313218786304255e-05, "loss": 0.6676, "step": 12747 }, { "epoch": 2.0809762866821764, "grad_norm": 1.9105255603790283, "learning_rate": 1.9313103209992205e-05, "loss": 0.5546, "step": 12748 }, { "epoch": 2.081139545324681, "grad_norm": 1.661882996559143, "learning_rate": 1.931298762430186e-05, "loss": 0.4664, "step": 12749 }, { "epoch": 2.081302803967185, "grad_norm": 1.94900381565094, "learning_rate": 1.931287202923334e-05, "loss": 0.6287, "step": 12750 }, { "epoch": 2.0814660626096892, "grad_norm": 1.7645976543426514, "learning_rate": 1.9312756424786758e-05, "loss": 0.616, "step": 12751 }, { "epoch": 2.0816293212521937, "grad_norm": 1.8442646265029907, "learning_rate": 1.9312640810962237e-05, "loss": 0.5621, "step": 12752 }, { "epoch": 2.081792579894698, "grad_norm": 1.9327770471572876, "learning_rate": 1.9312525187759886e-05, "loss": 0.5189, "step": 12753 }, { "epoch": 2.0819558385372026, "grad_norm": 1.5647932291030884, "learning_rate": 1.9312409555179827e-05, "loss": 0.5038, "step": 12754 }, { "epoch": 2.082119097179707, "grad_norm": 2.269789218902588, "learning_rate": 1.931229391322217e-05, "loss": 0.6673, "step": 12755 }, { "epoch": 2.0822823558222114, "grad_norm": 1.7380080223083496, "learning_rate": 1.9312178261887037e-05, "loss": 0.6179, "step": 12756 }, { "epoch": 2.082445614464716, "grad_norm": 1.6851855516433716, "learning_rate": 1.9312062601174543e-05, "loss": 0.5069, "step": 12757 }, { "epoch": 2.0826088731072203, "grad_norm": 1.553062915802002, "learning_rate": 1.9311946931084806e-05, "loss": 0.5146, "step": 12758 }, { "epoch": 2.0827721317497243, "grad_norm": 1.7488071918487549, "learning_rate": 1.9311831251617942e-05, "loss": 0.6017, "step": 12759 }, { "epoch": 2.0829353903922287, "grad_norm": 2.0195295810699463, "learning_rate": 1.931171556277406e-05, "loss": 0.5826, "step": 12760 }, { "epoch": 2.083098649034733, "grad_norm": 1.4833500385284424, "learning_rate": 1.9311599864553292e-05, "loss": 0.4804, "step": 12761 }, { "epoch": 2.0832619076772376, "grad_norm": 1.6721388101577759, "learning_rate": 1.931148415695574e-05, "loss": 0.5314, "step": 12762 }, { "epoch": 2.083425166319742, "grad_norm": 1.6811213493347168, "learning_rate": 1.9311368439981526e-05, "loss": 0.5465, "step": 12763 }, { "epoch": 2.0835884249622465, "grad_norm": 2.176949977874756, "learning_rate": 1.931125271363077e-05, "loss": 0.6339, "step": 12764 }, { "epoch": 2.083751683604751, "grad_norm": 1.8629239797592163, "learning_rate": 1.931113697790358e-05, "loss": 0.596, "step": 12765 }, { "epoch": 2.0839149422472554, "grad_norm": 1.9455536603927612, "learning_rate": 1.931102123280008e-05, "loss": 0.78, "step": 12766 }, { "epoch": 2.08407820088976, "grad_norm": 1.7788071632385254, "learning_rate": 1.931090547832038e-05, "loss": 0.555, "step": 12767 }, { "epoch": 2.084241459532264, "grad_norm": 1.8842626810073853, "learning_rate": 1.9310789714464605e-05, "loss": 0.6504, "step": 12768 }, { "epoch": 2.0844047181747682, "grad_norm": 1.8657252788543701, "learning_rate": 1.9310673941232868e-05, "loss": 0.4955, "step": 12769 }, { "epoch": 2.0845679768172727, "grad_norm": 1.6530393362045288, "learning_rate": 1.9310558158625286e-05, "loss": 0.4669, "step": 12770 }, { "epoch": 2.084731235459777, "grad_norm": 2.150618553161621, "learning_rate": 1.931044236664197e-05, "loss": 0.5823, "step": 12771 }, { "epoch": 2.0848944941022816, "grad_norm": 2.16544771194458, "learning_rate": 1.9310326565283045e-05, "loss": 0.7343, "step": 12772 }, { "epoch": 2.085057752744786, "grad_norm": 1.8436336517333984, "learning_rate": 1.931021075454862e-05, "loss": 0.6213, "step": 12773 }, { "epoch": 2.0852210113872904, "grad_norm": 2.0119311809539795, "learning_rate": 1.9310094934438816e-05, "loss": 0.7431, "step": 12774 }, { "epoch": 2.085384270029795, "grad_norm": 1.5817744731903076, "learning_rate": 1.9309979104953747e-05, "loss": 0.4391, "step": 12775 }, { "epoch": 2.085547528672299, "grad_norm": 1.7501106262207031, "learning_rate": 1.930986326609354e-05, "loss": 0.5823, "step": 12776 }, { "epoch": 2.0857107873148033, "grad_norm": 1.8551634550094604, "learning_rate": 1.9309747417858295e-05, "loss": 0.6388, "step": 12777 }, { "epoch": 2.0858740459573077, "grad_norm": 2.226353883743286, "learning_rate": 1.9309631560248137e-05, "loss": 0.6789, "step": 12778 }, { "epoch": 2.086037304599812, "grad_norm": 2.0928761959075928, "learning_rate": 1.9309515693263185e-05, "loss": 0.6382, "step": 12779 }, { "epoch": 2.0862005632423166, "grad_norm": 1.9373363256454468, "learning_rate": 1.9309399816903554e-05, "loss": 0.5996, "step": 12780 }, { "epoch": 2.086363821884821, "grad_norm": 1.7545485496520996, "learning_rate": 1.930928393116936e-05, "loss": 0.4969, "step": 12781 }, { "epoch": 2.0865270805273255, "grad_norm": 1.7791355848312378, "learning_rate": 1.9309168036060717e-05, "loss": 0.5554, "step": 12782 }, { "epoch": 2.08669033916983, "grad_norm": 1.6384872198104858, "learning_rate": 1.9309052131577746e-05, "loss": 0.4806, "step": 12783 }, { "epoch": 2.0868535978123344, "grad_norm": 1.8091706037521362, "learning_rate": 1.930893621772056e-05, "loss": 0.5309, "step": 12784 }, { "epoch": 2.0870168564548384, "grad_norm": 1.9727658033370972, "learning_rate": 1.9308820294489278e-05, "loss": 0.5911, "step": 12785 }, { "epoch": 2.087180115097343, "grad_norm": 1.785212755203247, "learning_rate": 1.930870436188402e-05, "loss": 0.5195, "step": 12786 }, { "epoch": 2.0873433737398472, "grad_norm": 1.5724595785140991, "learning_rate": 1.9308588419904895e-05, "loss": 0.5063, "step": 12787 }, { "epoch": 2.0875066323823517, "grad_norm": 1.9323153495788574, "learning_rate": 1.9308472468552026e-05, "loss": 0.6168, "step": 12788 }, { "epoch": 2.087669891024856, "grad_norm": 1.6479941606521606, "learning_rate": 1.930835650782553e-05, "loss": 0.5812, "step": 12789 }, { "epoch": 2.0878331496673606, "grad_norm": 1.8004562854766846, "learning_rate": 1.930824053772552e-05, "loss": 0.6025, "step": 12790 }, { "epoch": 2.087996408309865, "grad_norm": 1.6458659172058105, "learning_rate": 1.9308124558252112e-05, "loss": 0.4834, "step": 12791 }, { "epoch": 2.0881596669523694, "grad_norm": 2.229768753051758, "learning_rate": 1.9308008569405424e-05, "loss": 0.7925, "step": 12792 }, { "epoch": 2.088322925594874, "grad_norm": 1.835206389427185, "learning_rate": 1.9307892571185575e-05, "loss": 0.5327, "step": 12793 }, { "epoch": 2.088486184237378, "grad_norm": 1.8722730875015259, "learning_rate": 1.9307776563592683e-05, "loss": 0.5966, "step": 12794 }, { "epoch": 2.0886494428798823, "grad_norm": 1.6240946054458618, "learning_rate": 1.930766054662686e-05, "loss": 0.5512, "step": 12795 }, { "epoch": 2.0888127015223867, "grad_norm": 1.616065502166748, "learning_rate": 1.9307544520288227e-05, "loss": 0.5672, "step": 12796 }, { "epoch": 2.088975960164891, "grad_norm": 1.912476658821106, "learning_rate": 1.93074284845769e-05, "loss": 0.6024, "step": 12797 }, { "epoch": 2.0891392188073956, "grad_norm": 1.7547523975372314, "learning_rate": 1.9307312439492992e-05, "loss": 0.6347, "step": 12798 }, { "epoch": 2.0893024774499, "grad_norm": 1.6140174865722656, "learning_rate": 1.9307196385036623e-05, "loss": 0.6588, "step": 12799 }, { "epoch": 2.0894657360924045, "grad_norm": 2.0069427490234375, "learning_rate": 1.9307080321207913e-05, "loss": 0.5967, "step": 12800 }, { "epoch": 2.089628994734909, "grad_norm": 1.824831485748291, "learning_rate": 1.9306964248006973e-05, "loss": 0.5794, "step": 12801 }, { "epoch": 2.0897922533774134, "grad_norm": 1.7463593482971191, "learning_rate": 1.9306848165433924e-05, "loss": 0.5485, "step": 12802 }, { "epoch": 2.0899555120199174, "grad_norm": 1.568799376487732, "learning_rate": 1.930673207348888e-05, "loss": 0.5203, "step": 12803 }, { "epoch": 2.090118770662422, "grad_norm": 1.9064339399337769, "learning_rate": 1.9306615972171962e-05, "loss": 0.6937, "step": 12804 }, { "epoch": 2.0902820293049262, "grad_norm": 1.4719138145446777, "learning_rate": 1.9306499861483278e-05, "loss": 0.549, "step": 12805 }, { "epoch": 2.0904452879474307, "grad_norm": 1.8527660369873047, "learning_rate": 1.9306383741422957e-05, "loss": 0.6776, "step": 12806 }, { "epoch": 2.090608546589935, "grad_norm": 1.8649863004684448, "learning_rate": 1.9306267611991108e-05, "loss": 0.5637, "step": 12807 }, { "epoch": 2.0907718052324396, "grad_norm": 1.808889627456665, "learning_rate": 1.930615147318785e-05, "loss": 0.6557, "step": 12808 }, { "epoch": 2.090935063874944, "grad_norm": 1.9260441064834595, "learning_rate": 1.9306035325013298e-05, "loss": 0.5877, "step": 12809 }, { "epoch": 2.0910983225174484, "grad_norm": 1.9437551498413086, "learning_rate": 1.9305919167467575e-05, "loss": 0.6026, "step": 12810 }, { "epoch": 2.091261581159953, "grad_norm": 1.8337372541427612, "learning_rate": 1.930580300055079e-05, "loss": 0.6183, "step": 12811 }, { "epoch": 2.091424839802457, "grad_norm": 2.004124879837036, "learning_rate": 1.9305686824263067e-05, "loss": 0.6373, "step": 12812 }, { "epoch": 2.0915880984449613, "grad_norm": 1.938170313835144, "learning_rate": 1.9305570638604517e-05, "loss": 0.6469, "step": 12813 }, { "epoch": 2.0917513570874657, "grad_norm": 1.8495286703109741, "learning_rate": 1.930545444357526e-05, "loss": 0.6266, "step": 12814 }, { "epoch": 2.09191461572997, "grad_norm": 1.817981481552124, "learning_rate": 1.9305338239175416e-05, "loss": 0.581, "step": 12815 }, { "epoch": 2.0920778743724746, "grad_norm": 1.730657935142517, "learning_rate": 1.9305222025405096e-05, "loss": 0.529, "step": 12816 }, { "epoch": 2.092241133014979, "grad_norm": 1.6864612102508545, "learning_rate": 1.930510580226442e-05, "loss": 0.6214, "step": 12817 }, { "epoch": 2.0924043916574835, "grad_norm": 1.9470428228378296, "learning_rate": 1.9304989569753506e-05, "loss": 0.6677, "step": 12818 }, { "epoch": 2.092567650299988, "grad_norm": 2.1851179599761963, "learning_rate": 1.930487332787247e-05, "loss": 0.6372, "step": 12819 }, { "epoch": 2.0927309089424924, "grad_norm": 1.6719392538070679, "learning_rate": 1.930475707662143e-05, "loss": 0.5754, "step": 12820 }, { "epoch": 2.0928941675849964, "grad_norm": 2.1996285915374756, "learning_rate": 1.93046408160005e-05, "loss": 0.6308, "step": 12821 }, { "epoch": 2.093057426227501, "grad_norm": 1.7618937492370605, "learning_rate": 1.93045245460098e-05, "loss": 0.6064, "step": 12822 }, { "epoch": 2.0932206848700052, "grad_norm": 1.930561900138855, "learning_rate": 1.9304408266649444e-05, "loss": 0.5459, "step": 12823 }, { "epoch": 2.0933839435125097, "grad_norm": 1.7970324754714966, "learning_rate": 1.9304291977919554e-05, "loss": 0.6112, "step": 12824 }, { "epoch": 2.093547202155014, "grad_norm": 1.4806278944015503, "learning_rate": 1.9304175679820247e-05, "loss": 0.5414, "step": 12825 }, { "epoch": 2.0937104607975185, "grad_norm": 1.8206032514572144, "learning_rate": 1.9304059372351633e-05, "loss": 0.6972, "step": 12826 }, { "epoch": 2.093873719440023, "grad_norm": 1.4162654876708984, "learning_rate": 1.9303943055513836e-05, "loss": 0.4925, "step": 12827 }, { "epoch": 2.0940369780825274, "grad_norm": 1.4881614446640015, "learning_rate": 1.9303826729306973e-05, "loss": 0.4947, "step": 12828 }, { "epoch": 2.0942002367250314, "grad_norm": 1.645896077156067, "learning_rate": 1.9303710393731153e-05, "loss": 0.571, "step": 12829 }, { "epoch": 2.094363495367536, "grad_norm": 2.346682071685791, "learning_rate": 1.9303594048786505e-05, "loss": 0.6869, "step": 12830 }, { "epoch": 2.0945267540100403, "grad_norm": 1.619586706161499, "learning_rate": 1.9303477694473135e-05, "loss": 0.5221, "step": 12831 }, { "epoch": 2.0946900126525447, "grad_norm": 1.8854387998580933, "learning_rate": 1.930336133079117e-05, "loss": 0.5595, "step": 12832 }, { "epoch": 2.094853271295049, "grad_norm": 1.6953614950180054, "learning_rate": 1.9303244957740718e-05, "loss": 1.0982, "step": 12833 }, { "epoch": 2.0950165299375536, "grad_norm": 1.6432044506072998, "learning_rate": 1.9303128575321906e-05, "loss": 0.5428, "step": 12834 }, { "epoch": 2.095179788580058, "grad_norm": 1.9454517364501953, "learning_rate": 1.9303012183534842e-05, "loss": 0.5796, "step": 12835 }, { "epoch": 2.0953430472225625, "grad_norm": 1.6888647079467773, "learning_rate": 1.9302895782379648e-05, "loss": 0.5245, "step": 12836 }, { "epoch": 2.095506305865067, "grad_norm": 2.2397663593292236, "learning_rate": 1.9302779371856443e-05, "loss": 0.719, "step": 12837 }, { "epoch": 2.095669564507571, "grad_norm": 2.0789718627929688, "learning_rate": 1.9302662951965337e-05, "loss": 0.7743, "step": 12838 }, { "epoch": 2.0958328231500754, "grad_norm": 1.7546110153198242, "learning_rate": 1.9302546522706454e-05, "loss": 0.5634, "step": 12839 }, { "epoch": 2.09599608179258, "grad_norm": 1.5024144649505615, "learning_rate": 1.930243008407991e-05, "loss": 0.4643, "step": 12840 }, { "epoch": 2.0961593404350842, "grad_norm": 1.6136242151260376, "learning_rate": 1.930231363608582e-05, "loss": 0.5467, "step": 12841 }, { "epoch": 2.0963225990775887, "grad_norm": 1.9911699295043945, "learning_rate": 1.9302197178724306e-05, "loss": 0.6726, "step": 12842 }, { "epoch": 2.096485857720093, "grad_norm": 1.9862892627716064, "learning_rate": 1.9302080711995477e-05, "loss": 0.7915, "step": 12843 }, { "epoch": 2.0966491163625975, "grad_norm": 1.5588868856430054, "learning_rate": 1.930196423589946e-05, "loss": 0.4626, "step": 12844 }, { "epoch": 2.096812375005102, "grad_norm": 1.8360767364501953, "learning_rate": 1.9301847750436362e-05, "loss": 0.4991, "step": 12845 }, { "epoch": 2.0969756336476064, "grad_norm": 1.7421984672546387, "learning_rate": 1.930173125560631e-05, "loss": 0.6701, "step": 12846 }, { "epoch": 2.0971388922901104, "grad_norm": 1.665395736694336, "learning_rate": 1.9301614751409416e-05, "loss": 0.546, "step": 12847 }, { "epoch": 2.097302150932615, "grad_norm": 1.9602199792861938, "learning_rate": 1.93014982378458e-05, "loss": 0.578, "step": 12848 }, { "epoch": 2.0974654095751193, "grad_norm": 1.969148874282837, "learning_rate": 1.9301381714915573e-05, "loss": 0.6749, "step": 12849 }, { "epoch": 2.0976286682176237, "grad_norm": 1.9804785251617432, "learning_rate": 1.9301265182618862e-05, "loss": 0.5903, "step": 12850 }, { "epoch": 2.097791926860128, "grad_norm": 1.754127025604248, "learning_rate": 1.930114864095578e-05, "loss": 0.4807, "step": 12851 }, { "epoch": 2.0979551855026326, "grad_norm": 2.0960655212402344, "learning_rate": 1.930103208992644e-05, "loss": 0.6274, "step": 12852 }, { "epoch": 2.098118444145137, "grad_norm": 1.4841028451919556, "learning_rate": 1.9300915529530963e-05, "loss": 0.5314, "step": 12853 }, { "epoch": 2.0982817027876415, "grad_norm": 1.9858944416046143, "learning_rate": 1.930079895976947e-05, "loss": 0.6127, "step": 12854 }, { "epoch": 2.098444961430146, "grad_norm": 2.44119930267334, "learning_rate": 1.9300682380642072e-05, "loss": 0.6278, "step": 12855 }, { "epoch": 2.09860822007265, "grad_norm": 1.89239501953125, "learning_rate": 1.9300565792148892e-05, "loss": 0.5974, "step": 12856 }, { "epoch": 2.0987714787151543, "grad_norm": 1.5920915603637695, "learning_rate": 1.9300449194290042e-05, "loss": 0.506, "step": 12857 }, { "epoch": 2.098934737357659, "grad_norm": 1.789681315422058, "learning_rate": 1.9300332587065644e-05, "loss": 0.5882, "step": 12858 }, { "epoch": 2.0990979960001632, "grad_norm": 2.000105857849121, "learning_rate": 1.9300215970475812e-05, "loss": 0.5771, "step": 12859 }, { "epoch": 2.0992612546426677, "grad_norm": 1.5265791416168213, "learning_rate": 1.9300099344520665e-05, "loss": 0.5304, "step": 12860 }, { "epoch": 2.099424513285172, "grad_norm": 2.1608119010925293, "learning_rate": 1.9299982709200323e-05, "loss": 0.697, "step": 12861 }, { "epoch": 2.0995877719276765, "grad_norm": 1.6674336194992065, "learning_rate": 1.9299866064514896e-05, "loss": 0.5152, "step": 12862 }, { "epoch": 2.099751030570181, "grad_norm": 2.232393264770508, "learning_rate": 1.929974941046451e-05, "loss": 0.6372, "step": 12863 }, { "epoch": 2.099914289212685, "grad_norm": 1.7510950565338135, "learning_rate": 1.9299632747049278e-05, "loss": 0.6015, "step": 12864 }, { "epoch": 2.1000775478551894, "grad_norm": 1.6037365198135376, "learning_rate": 1.929951607426932e-05, "loss": 0.5706, "step": 12865 }, { "epoch": 2.100240806497694, "grad_norm": 1.9804211854934692, "learning_rate": 1.929939939212475e-05, "loss": 0.6736, "step": 12866 }, { "epoch": 2.1004040651401983, "grad_norm": 2.003833770751953, "learning_rate": 1.9299282700615687e-05, "loss": 0.6562, "step": 12867 }, { "epoch": 2.1005673237827027, "grad_norm": 2.0173146724700928, "learning_rate": 1.9299165999742248e-05, "loss": 0.6591, "step": 12868 }, { "epoch": 2.100730582425207, "grad_norm": 1.878605842590332, "learning_rate": 1.9299049289504555e-05, "loss": 0.5936, "step": 12869 }, { "epoch": 2.1008938410677116, "grad_norm": 1.564061164855957, "learning_rate": 1.9298932569902717e-05, "loss": 0.6235, "step": 12870 }, { "epoch": 2.101057099710216, "grad_norm": 1.8321222066879272, "learning_rate": 1.929881584093686e-05, "loss": 0.5163, "step": 12871 }, { "epoch": 2.1012203583527205, "grad_norm": 1.5386427640914917, "learning_rate": 1.9298699102607097e-05, "loss": 0.4882, "step": 12872 }, { "epoch": 2.101383616995225, "grad_norm": 1.909063458442688, "learning_rate": 1.9298582354913543e-05, "loss": 0.5134, "step": 12873 }, { "epoch": 2.101546875637729, "grad_norm": 1.92169189453125, "learning_rate": 1.9298465597856328e-05, "loss": 0.5799, "step": 12874 }, { "epoch": 2.1017101342802333, "grad_norm": 2.3973891735076904, "learning_rate": 1.9298348831435553e-05, "loss": 0.504, "step": 12875 }, { "epoch": 2.101873392922738, "grad_norm": 1.7647136449813843, "learning_rate": 1.9298232055651344e-05, "loss": 0.5765, "step": 12876 }, { "epoch": 2.102036651565242, "grad_norm": 2.097569227218628, "learning_rate": 1.929811527050382e-05, "loss": 0.5802, "step": 12877 }, { "epoch": 2.1021999102077467, "grad_norm": 1.5814440250396729, "learning_rate": 1.9297998475993094e-05, "loss": 0.5572, "step": 12878 }, { "epoch": 2.102363168850251, "grad_norm": 1.9188121557235718, "learning_rate": 1.9297881672119287e-05, "loss": 0.6783, "step": 12879 }, { "epoch": 2.1025264274927555, "grad_norm": 1.8461754322052002, "learning_rate": 1.9297764858882516e-05, "loss": 0.6291, "step": 12880 }, { "epoch": 2.10268968613526, "grad_norm": 1.695420742034912, "learning_rate": 1.9297648036282898e-05, "loss": 0.5469, "step": 12881 }, { "epoch": 2.102852944777764, "grad_norm": 1.5807846784591675, "learning_rate": 1.929753120432055e-05, "loss": 0.5666, "step": 12882 }, { "epoch": 2.1030162034202684, "grad_norm": 2.0511667728424072, "learning_rate": 1.929741436299559e-05, "loss": 0.7042, "step": 12883 }, { "epoch": 2.103179462062773, "grad_norm": 1.9001705646514893, "learning_rate": 1.929729751230814e-05, "loss": 0.5536, "step": 12884 }, { "epoch": 2.1033427207052773, "grad_norm": 1.7495828866958618, "learning_rate": 1.929718065225831e-05, "loss": 0.5722, "step": 12885 }, { "epoch": 2.1035059793477817, "grad_norm": 2.039482831954956, "learning_rate": 1.9297063782846224e-05, "loss": 0.6866, "step": 12886 }, { "epoch": 2.103669237990286, "grad_norm": 1.6972050666809082, "learning_rate": 1.9296946904071998e-05, "loss": 0.545, "step": 12887 }, { "epoch": 2.1038324966327906, "grad_norm": 1.448533535003662, "learning_rate": 1.9296830015935746e-05, "loss": 0.5019, "step": 12888 }, { "epoch": 2.103995755275295, "grad_norm": 1.7679989337921143, "learning_rate": 1.9296713118437588e-05, "loss": 0.6086, "step": 12889 }, { "epoch": 2.1041590139177995, "grad_norm": 1.6184943914413452, "learning_rate": 1.9296596211577646e-05, "loss": 0.5443, "step": 12890 }, { "epoch": 2.1043222725603035, "grad_norm": 1.554587483406067, "learning_rate": 1.9296479295356035e-05, "loss": 0.505, "step": 12891 }, { "epoch": 2.104485531202808, "grad_norm": 1.683470368385315, "learning_rate": 1.9296362369772867e-05, "loss": 0.5609, "step": 12892 }, { "epoch": 2.1046487898453123, "grad_norm": 1.8868921995162964, "learning_rate": 1.9296245434828266e-05, "loss": 0.5713, "step": 12893 }, { "epoch": 2.104812048487817, "grad_norm": 1.7698732614517212, "learning_rate": 1.929612849052235e-05, "loss": 0.5312, "step": 12894 }, { "epoch": 2.104975307130321, "grad_norm": 1.8295979499816895, "learning_rate": 1.9296011536855235e-05, "loss": 0.5849, "step": 12895 }, { "epoch": 2.1051385657728257, "grad_norm": 2.1352109909057617, "learning_rate": 1.929589457382704e-05, "loss": 0.6152, "step": 12896 }, { "epoch": 2.10530182441533, "grad_norm": 2.173583507537842, "learning_rate": 1.9295777601437878e-05, "loss": 0.6982, "step": 12897 }, { "epoch": 2.1054650830578345, "grad_norm": 1.594587802886963, "learning_rate": 1.929566061968787e-05, "loss": 0.6216, "step": 12898 }, { "epoch": 2.105628341700339, "grad_norm": 1.94664466381073, "learning_rate": 1.929554362857714e-05, "loss": 0.6313, "step": 12899 }, { "epoch": 2.105791600342843, "grad_norm": 1.4956679344177246, "learning_rate": 1.9295426628105792e-05, "loss": 0.4595, "step": 12900 }, { "epoch": 2.1059548589853474, "grad_norm": 1.496766448020935, "learning_rate": 1.929530961827396e-05, "loss": 0.5557, "step": 12901 }, { "epoch": 2.106118117627852, "grad_norm": 2.3608617782592773, "learning_rate": 1.9295192599081747e-05, "loss": 0.6138, "step": 12902 }, { "epoch": 2.1062813762703563, "grad_norm": 1.79420804977417, "learning_rate": 1.929507557052928e-05, "loss": 0.6272, "step": 12903 }, { "epoch": 2.1064446349128607, "grad_norm": 1.6578106880187988, "learning_rate": 1.9294958532616675e-05, "loss": 0.4415, "step": 12904 }, { "epoch": 2.106607893555365, "grad_norm": 1.9367291927337646, "learning_rate": 1.929484148534405e-05, "loss": 0.6192, "step": 12905 }, { "epoch": 2.1067711521978696, "grad_norm": 1.8938994407653809, "learning_rate": 1.929472442871152e-05, "loss": 0.5565, "step": 12906 }, { "epoch": 2.106934410840374, "grad_norm": 1.9660028219223022, "learning_rate": 1.9294607362719206e-05, "loss": 0.6419, "step": 12907 }, { "epoch": 2.1070976694828785, "grad_norm": 1.880381464958191, "learning_rate": 1.9294490287367226e-05, "loss": 0.5692, "step": 12908 }, { "epoch": 2.1072609281253825, "grad_norm": 1.6451698541641235, "learning_rate": 1.9294373202655694e-05, "loss": 0.5156, "step": 12909 }, { "epoch": 2.107424186767887, "grad_norm": 1.4751999378204346, "learning_rate": 1.9294256108584734e-05, "loss": 0.4374, "step": 12910 }, { "epoch": 2.1075874454103913, "grad_norm": 2.071787118911743, "learning_rate": 1.929413900515446e-05, "loss": 0.6966, "step": 12911 }, { "epoch": 2.1077507040528958, "grad_norm": 1.7881094217300415, "learning_rate": 1.9294021892364988e-05, "loss": 0.6526, "step": 12912 }, { "epoch": 2.1079139626954, "grad_norm": 2.0441176891326904, "learning_rate": 1.929390477021644e-05, "loss": 0.5501, "step": 12913 }, { "epoch": 2.1080772213379046, "grad_norm": 1.5230073928833008, "learning_rate": 1.929378763870893e-05, "loss": 0.4732, "step": 12914 }, { "epoch": 2.108240479980409, "grad_norm": 1.7061731815338135, "learning_rate": 1.9293670497842584e-05, "loss": 0.5125, "step": 12915 }, { "epoch": 2.1084037386229135, "grad_norm": 2.0876922607421875, "learning_rate": 1.9293553347617506e-05, "loss": 0.6393, "step": 12916 }, { "epoch": 2.1085669972654175, "grad_norm": 1.8641828298568726, "learning_rate": 1.9293436188033826e-05, "loss": 0.5623, "step": 12917 }, { "epoch": 2.108730255907922, "grad_norm": 1.638543963432312, "learning_rate": 1.929331901909166e-05, "loss": 0.5653, "step": 12918 }, { "epoch": 2.1088935145504264, "grad_norm": 2.119798183441162, "learning_rate": 1.9293201840791124e-05, "loss": 0.7147, "step": 12919 }, { "epoch": 2.109056773192931, "grad_norm": 2.2072949409484863, "learning_rate": 1.9293084653132336e-05, "loss": 0.6604, "step": 12920 }, { "epoch": 2.1092200318354353, "grad_norm": 1.8547977209091187, "learning_rate": 1.9292967456115414e-05, "loss": 0.5887, "step": 12921 }, { "epoch": 2.1093832904779397, "grad_norm": 2.3774960041046143, "learning_rate": 1.9292850249740474e-05, "loss": 0.577, "step": 12922 }, { "epoch": 2.109546549120444, "grad_norm": 1.7576109170913696, "learning_rate": 1.929273303400764e-05, "loss": 0.707, "step": 12923 }, { "epoch": 2.1097098077629486, "grad_norm": 1.6765239238739014, "learning_rate": 1.9292615808917027e-05, "loss": 0.5163, "step": 12924 }, { "epoch": 2.109873066405453, "grad_norm": 1.9253649711608887, "learning_rate": 1.929249857446875e-05, "loss": 0.5313, "step": 12925 }, { "epoch": 2.110036325047957, "grad_norm": 1.8469806909561157, "learning_rate": 1.9292381330662926e-05, "loss": 0.6545, "step": 12926 }, { "epoch": 2.1101995836904615, "grad_norm": 1.8150173425674438, "learning_rate": 1.929226407749968e-05, "loss": 0.6059, "step": 12927 }, { "epoch": 2.110362842332966, "grad_norm": 1.2963793277740479, "learning_rate": 1.929214681497913e-05, "loss": 0.528, "step": 12928 }, { "epoch": 2.1105261009754703, "grad_norm": 1.7331339120864868, "learning_rate": 1.9292029543101385e-05, "loss": 0.5856, "step": 12929 }, { "epoch": 2.1106893596179748, "grad_norm": 1.7204519510269165, "learning_rate": 1.9291912261866568e-05, "loss": 0.6, "step": 12930 }, { "epoch": 2.110852618260479, "grad_norm": 1.9969788789749146, "learning_rate": 1.9291794971274802e-05, "loss": 0.6849, "step": 12931 }, { "epoch": 2.1110158769029836, "grad_norm": 1.696711778640747, "learning_rate": 1.92916776713262e-05, "loss": 0.5093, "step": 12932 }, { "epoch": 2.111179135545488, "grad_norm": 2.358063220977783, "learning_rate": 1.929156036202088e-05, "loss": 0.6199, "step": 12933 }, { "epoch": 2.1113423941879925, "grad_norm": 1.5103769302368164, "learning_rate": 1.9291443043358963e-05, "loss": 0.5052, "step": 12934 }, { "epoch": 2.1115056528304965, "grad_norm": 1.847495675086975, "learning_rate": 1.9291325715340562e-05, "loss": 0.6394, "step": 12935 }, { "epoch": 2.111668911473001, "grad_norm": 1.923357605934143, "learning_rate": 1.92912083779658e-05, "loss": 0.4998, "step": 12936 }, { "epoch": 2.1118321701155054, "grad_norm": 2.1979894638061523, "learning_rate": 1.9291091031234795e-05, "loss": 0.5148, "step": 12937 }, { "epoch": 2.11199542875801, "grad_norm": 1.7269877195358276, "learning_rate": 1.9290973675147663e-05, "loss": 0.6196, "step": 12938 }, { "epoch": 2.1121586874005143, "grad_norm": 2.001535177230835, "learning_rate": 1.929085630970452e-05, "loss": 0.5706, "step": 12939 }, { "epoch": 2.1123219460430187, "grad_norm": 1.6670235395431519, "learning_rate": 1.9290738934905492e-05, "loss": 0.5754, "step": 12940 }, { "epoch": 2.112485204685523, "grad_norm": 1.1788421869277954, "learning_rate": 1.929062155075069e-05, "loss": 0.3482, "step": 12941 }, { "epoch": 2.1126484633280276, "grad_norm": 1.6506545543670654, "learning_rate": 1.9290504157240234e-05, "loss": 0.561, "step": 12942 }, { "epoch": 2.112811721970532, "grad_norm": 1.6213057041168213, "learning_rate": 1.9290386754374247e-05, "loss": 0.5665, "step": 12943 }, { "epoch": 2.112974980613036, "grad_norm": 1.6599394083023071, "learning_rate": 1.929026934215284e-05, "loss": 0.5726, "step": 12944 }, { "epoch": 2.1131382392555405, "grad_norm": 1.4732892513275146, "learning_rate": 1.9290151920576132e-05, "loss": 0.5081, "step": 12945 }, { "epoch": 2.113301497898045, "grad_norm": 1.743784785270691, "learning_rate": 1.9290034489644247e-05, "loss": 0.551, "step": 12946 }, { "epoch": 2.1134647565405493, "grad_norm": 2.028871774673462, "learning_rate": 1.9289917049357297e-05, "loss": 0.6447, "step": 12947 }, { "epoch": 2.1136280151830538, "grad_norm": 1.8813812732696533, "learning_rate": 1.9289799599715403e-05, "loss": 0.6653, "step": 12948 }, { "epoch": 2.113791273825558, "grad_norm": 1.7372645139694214, "learning_rate": 1.9289682140718685e-05, "loss": 0.5415, "step": 12949 }, { "epoch": 2.1139545324680626, "grad_norm": 1.7192362546920776, "learning_rate": 1.928956467236726e-05, "loss": 0.5472, "step": 12950 }, { "epoch": 2.114117791110567, "grad_norm": 1.5034383535385132, "learning_rate": 1.9289447194661243e-05, "loss": 0.5089, "step": 12951 }, { "epoch": 2.1142810497530715, "grad_norm": 1.4364275932312012, "learning_rate": 1.9289329707600758e-05, "loss": 0.4967, "step": 12952 }, { "epoch": 2.1144443083955755, "grad_norm": 1.6427183151245117, "learning_rate": 1.9289212211185918e-05, "loss": 0.5045, "step": 12953 }, { "epoch": 2.11460756703808, "grad_norm": 1.8998587131500244, "learning_rate": 1.9289094705416846e-05, "loss": 0.5558, "step": 12954 }, { "epoch": 2.1147708256805844, "grad_norm": 1.8749254941940308, "learning_rate": 1.9288977190293658e-05, "loss": 0.5944, "step": 12955 }, { "epoch": 2.114934084323089, "grad_norm": 1.710814118385315, "learning_rate": 1.928885966581647e-05, "loss": 0.5232, "step": 12956 }, { "epoch": 2.1150973429655933, "grad_norm": 1.962756872177124, "learning_rate": 1.9288742131985408e-05, "loss": 0.6692, "step": 12957 }, { "epoch": 2.1152606016080977, "grad_norm": 2.016226291656494, "learning_rate": 1.928862458880058e-05, "loss": 0.5967, "step": 12958 }, { "epoch": 2.115423860250602, "grad_norm": 1.709696650505066, "learning_rate": 1.928850703626211e-05, "loss": 0.566, "step": 12959 }, { "epoch": 2.1155871188931066, "grad_norm": 1.9613150358200073, "learning_rate": 1.9288389474370116e-05, "loss": 0.6301, "step": 12960 }, { "epoch": 2.115750377535611, "grad_norm": 1.7462822198867798, "learning_rate": 1.928827190312472e-05, "loss": 0.4582, "step": 12961 }, { "epoch": 2.115913636178115, "grad_norm": 1.916809320449829, "learning_rate": 1.9288154322526033e-05, "loss": 0.5294, "step": 12962 }, { "epoch": 2.1160768948206194, "grad_norm": 1.6629599332809448, "learning_rate": 1.928803673257418e-05, "loss": 0.5439, "step": 12963 }, { "epoch": 2.116240153463124, "grad_norm": 2.01239275932312, "learning_rate": 1.928791913326927e-05, "loss": 0.5594, "step": 12964 }, { "epoch": 2.1164034121056283, "grad_norm": 2.9051990509033203, "learning_rate": 1.9287801524611436e-05, "loss": 0.5719, "step": 12965 }, { "epoch": 2.1165666707481328, "grad_norm": 2.0981526374816895, "learning_rate": 1.928768390660078e-05, "loss": 0.6224, "step": 12966 }, { "epoch": 2.116729929390637, "grad_norm": 1.9037827253341675, "learning_rate": 1.9287566279237437e-05, "loss": 0.4912, "step": 12967 }, { "epoch": 2.1168931880331416, "grad_norm": 1.8557428121566772, "learning_rate": 1.9287448642521513e-05, "loss": 0.6067, "step": 12968 }, { "epoch": 2.117056446675646, "grad_norm": 2.0474653244018555, "learning_rate": 1.9287330996453133e-05, "loss": 0.5754, "step": 12969 }, { "epoch": 2.11721970531815, "grad_norm": 1.8348212242126465, "learning_rate": 1.928721334103241e-05, "loss": 0.5261, "step": 12970 }, { "epoch": 2.1173829639606545, "grad_norm": 1.9003703594207764, "learning_rate": 1.9287095676259467e-05, "loss": 0.5738, "step": 12971 }, { "epoch": 2.117546222603159, "grad_norm": 1.6596264839172363, "learning_rate": 1.928697800213442e-05, "loss": 0.563, "step": 12972 }, { "epoch": 2.1177094812456634, "grad_norm": 1.8570271730422974, "learning_rate": 1.928686031865739e-05, "loss": 0.5492, "step": 12973 }, { "epoch": 2.117872739888168, "grad_norm": 1.8317410945892334, "learning_rate": 1.9286742625828495e-05, "loss": 0.4928, "step": 12974 }, { "epoch": 2.1180359985306723, "grad_norm": 1.449820876121521, "learning_rate": 1.928662492364785e-05, "loss": 0.4936, "step": 12975 }, { "epoch": 2.1181992571731767, "grad_norm": 1.746020793914795, "learning_rate": 1.9286507212115578e-05, "loss": 0.5303, "step": 12976 }, { "epoch": 2.118362515815681, "grad_norm": 2.1669554710388184, "learning_rate": 1.9286389491231796e-05, "loss": 1.0245, "step": 12977 }, { "epoch": 2.1185257744581856, "grad_norm": 1.7132627964019775, "learning_rate": 1.9286271760996622e-05, "loss": 0.5513, "step": 12978 }, { "epoch": 2.1186890331006896, "grad_norm": 1.8609387874603271, "learning_rate": 1.9286154021410177e-05, "loss": 0.6054, "step": 12979 }, { "epoch": 2.118852291743194, "grad_norm": 1.9595474004745483, "learning_rate": 1.9286036272472572e-05, "loss": 0.59, "step": 12980 }, { "epoch": 2.1190155503856984, "grad_norm": 1.9528062343597412, "learning_rate": 1.9285918514183934e-05, "loss": 0.4918, "step": 12981 }, { "epoch": 2.119178809028203, "grad_norm": 1.806640863418579, "learning_rate": 1.9285800746544378e-05, "loss": 0.512, "step": 12982 }, { "epoch": 2.1193420676707073, "grad_norm": 1.9533607959747314, "learning_rate": 1.9285682969554025e-05, "loss": 0.5615, "step": 12983 }, { "epoch": 2.1195053263132118, "grad_norm": 1.6812686920166016, "learning_rate": 1.9285565183212987e-05, "loss": 0.5775, "step": 12984 }, { "epoch": 2.119668584955716, "grad_norm": 2.073803663253784, "learning_rate": 1.9285447387521394e-05, "loss": 0.7774, "step": 12985 }, { "epoch": 2.1198318435982206, "grad_norm": 1.6792383193969727, "learning_rate": 1.9285329582479353e-05, "loss": 0.5182, "step": 12986 }, { "epoch": 2.119995102240725, "grad_norm": 1.5999168157577515, "learning_rate": 1.928521176808699e-05, "loss": 0.5586, "step": 12987 }, { "epoch": 2.120158360883229, "grad_norm": 1.8149020671844482, "learning_rate": 1.928509394434442e-05, "loss": 0.5401, "step": 12988 }, { "epoch": 2.1203216195257335, "grad_norm": 1.7051281929016113, "learning_rate": 1.928497611125176e-05, "loss": 0.5132, "step": 12989 }, { "epoch": 2.120484878168238, "grad_norm": 1.6804461479187012, "learning_rate": 1.9284858268809135e-05, "loss": 0.5076, "step": 12990 }, { "epoch": 2.1206481368107424, "grad_norm": 1.8424662351608276, "learning_rate": 1.9284740417016663e-05, "loss": 0.5278, "step": 12991 }, { "epoch": 2.120811395453247, "grad_norm": 1.6963834762573242, "learning_rate": 1.9284622555874457e-05, "loss": 0.6294, "step": 12992 }, { "epoch": 2.1209746540957513, "grad_norm": 1.7007827758789062, "learning_rate": 1.9284504685382638e-05, "loss": 0.4885, "step": 12993 }, { "epoch": 2.1211379127382557, "grad_norm": 1.9952865839004517, "learning_rate": 1.9284386805541323e-05, "loss": 0.5565, "step": 12994 }, { "epoch": 2.12130117138076, "grad_norm": 1.8487213850021362, "learning_rate": 1.9284268916350637e-05, "loss": 0.6852, "step": 12995 }, { "epoch": 2.1214644300232646, "grad_norm": 1.9019463062286377, "learning_rate": 1.9284151017810694e-05, "loss": 0.5997, "step": 12996 }, { "epoch": 2.1216276886657686, "grad_norm": 1.7040404081344604, "learning_rate": 1.928403310992161e-05, "loss": 0.5941, "step": 12997 }, { "epoch": 2.121790947308273, "grad_norm": 1.6581823825836182, "learning_rate": 1.928391519268351e-05, "loss": 0.4844, "step": 12998 }, { "epoch": 2.1219542059507774, "grad_norm": 1.5634639263153076, "learning_rate": 1.928379726609651e-05, "loss": 0.5327, "step": 12999 }, { "epoch": 2.122117464593282, "grad_norm": 1.648926019668579, "learning_rate": 1.9283679330160726e-05, "loss": 0.5486, "step": 13000 }, { "epoch": 2.1222807232357863, "grad_norm": 1.9633926153182983, "learning_rate": 1.9283561384876284e-05, "loss": 0.6063, "step": 13001 }, { "epoch": 2.1224439818782908, "grad_norm": 1.9149971008300781, "learning_rate": 1.9283443430243298e-05, "loss": 0.5935, "step": 13002 }, { "epoch": 2.122607240520795, "grad_norm": 1.838114619255066, "learning_rate": 1.9283325466261883e-05, "loss": 0.5686, "step": 13003 }, { "epoch": 2.1227704991632996, "grad_norm": 1.6921919584274292, "learning_rate": 1.928320749293216e-05, "loss": 0.553, "step": 13004 }, { "epoch": 2.1229337578058036, "grad_norm": 1.5522513389587402, "learning_rate": 1.9283089510254255e-05, "loss": 0.4657, "step": 13005 }, { "epoch": 2.123097016448308, "grad_norm": 1.7602649927139282, "learning_rate": 1.9282971518228278e-05, "loss": 0.6145, "step": 13006 }, { "epoch": 2.1232602750908125, "grad_norm": 1.9693505764007568, "learning_rate": 1.928285351685435e-05, "loss": 0.5497, "step": 13007 }, { "epoch": 2.123423533733317, "grad_norm": 1.6961129903793335, "learning_rate": 1.9282735506132594e-05, "loss": 0.4944, "step": 13008 }, { "epoch": 2.1235867923758214, "grad_norm": 1.7691706418991089, "learning_rate": 1.9282617486063125e-05, "loss": 0.5929, "step": 13009 }, { "epoch": 2.123750051018326, "grad_norm": 1.7694048881530762, "learning_rate": 1.9282499456646064e-05, "loss": 0.5965, "step": 13010 }, { "epoch": 2.1239133096608303, "grad_norm": 1.679465651512146, "learning_rate": 1.9282381417881528e-05, "loss": 0.5286, "step": 13011 }, { "epoch": 2.1240765683033347, "grad_norm": 2.418351888656616, "learning_rate": 1.9282263369769633e-05, "loss": 0.6385, "step": 13012 }, { "epoch": 2.124239826945839, "grad_norm": 1.5249971151351929, "learning_rate": 1.9282145312310507e-05, "loss": 0.4818, "step": 13013 }, { "epoch": 2.124403085588343, "grad_norm": 1.717676043510437, "learning_rate": 1.9282027245504256e-05, "loss": 0.5133, "step": 13014 }, { "epoch": 2.1245663442308476, "grad_norm": 1.8878817558288574, "learning_rate": 1.928190916935101e-05, "loss": 0.5087, "step": 13015 }, { "epoch": 2.124729602873352, "grad_norm": 1.6088862419128418, "learning_rate": 1.9281791083850886e-05, "loss": 0.4869, "step": 13016 }, { "epoch": 2.1248928615158564, "grad_norm": 1.5224419832229614, "learning_rate": 1.9281672989004e-05, "loss": 0.5759, "step": 13017 }, { "epoch": 2.125056120158361, "grad_norm": 1.6167081594467163, "learning_rate": 1.928155488481047e-05, "loss": 0.5962, "step": 13018 }, { "epoch": 2.1252193788008653, "grad_norm": 1.6737920045852661, "learning_rate": 1.9281436771270417e-05, "loss": 0.5134, "step": 13019 }, { "epoch": 2.1253826374433697, "grad_norm": 1.5267287492752075, "learning_rate": 1.928131864838396e-05, "loss": 0.5508, "step": 13020 }, { "epoch": 2.125545896085874, "grad_norm": 1.6343398094177246, "learning_rate": 1.9281200516151216e-05, "loss": 0.5567, "step": 13021 }, { "epoch": 2.1257091547283786, "grad_norm": 1.6703914403915405, "learning_rate": 1.928108237457231e-05, "loss": 0.6666, "step": 13022 }, { "epoch": 2.1258724133708826, "grad_norm": 1.598329782485962, "learning_rate": 1.928096422364735e-05, "loss": 0.5472, "step": 13023 }, { "epoch": 2.126035672013387, "grad_norm": 1.754770040512085, "learning_rate": 1.9280846063376465e-05, "loss": 0.6124, "step": 13024 }, { "epoch": 2.1261989306558915, "grad_norm": 1.6449315547943115, "learning_rate": 1.928072789375977e-05, "loss": 0.4821, "step": 13025 }, { "epoch": 2.126362189298396, "grad_norm": 1.987069845199585, "learning_rate": 1.9280609714797383e-05, "loss": 0.5595, "step": 13026 }, { "epoch": 2.1265254479409004, "grad_norm": 1.9565759897232056, "learning_rate": 1.928049152648943e-05, "loss": 0.6535, "step": 13027 }, { "epoch": 2.126688706583405, "grad_norm": 1.4868226051330566, "learning_rate": 1.928037332883602e-05, "loss": 0.5597, "step": 13028 }, { "epoch": 2.1268519652259092, "grad_norm": 1.940415620803833, "learning_rate": 1.9280255121837276e-05, "loss": 0.5797, "step": 13029 }, { "epoch": 2.1270152238684137, "grad_norm": 1.524004340171814, "learning_rate": 1.9280136905493322e-05, "loss": 0.5554, "step": 13030 }, { "epoch": 2.127178482510918, "grad_norm": 1.7778284549713135, "learning_rate": 1.928001867980427e-05, "loss": 0.5625, "step": 13031 }, { "epoch": 2.127341741153422, "grad_norm": 1.687001347541809, "learning_rate": 1.927990044477024e-05, "loss": 0.6283, "step": 13032 }, { "epoch": 2.1275049997959266, "grad_norm": 2.341477155685425, "learning_rate": 1.927978220039135e-05, "loss": 0.6157, "step": 13033 }, { "epoch": 2.127668258438431, "grad_norm": 2.1837122440338135, "learning_rate": 1.927966394666773e-05, "loss": 0.6189, "step": 13034 }, { "epoch": 2.1278315170809354, "grad_norm": 1.8442182540893555, "learning_rate": 1.9279545683599482e-05, "loss": 0.4427, "step": 13035 }, { "epoch": 2.12799477572344, "grad_norm": 1.9116489887237549, "learning_rate": 1.927942741118674e-05, "loss": 0.6565, "step": 13036 }, { "epoch": 2.1281580343659443, "grad_norm": 2.3225440979003906, "learning_rate": 1.9279309129429617e-05, "loss": 0.8159, "step": 13037 }, { "epoch": 2.1283212930084487, "grad_norm": 1.9097423553466797, "learning_rate": 1.927919083832823e-05, "loss": 0.5791, "step": 13038 }, { "epoch": 2.128484551650953, "grad_norm": 1.9382257461547852, "learning_rate": 1.92790725378827e-05, "loss": 0.5696, "step": 13039 }, { "epoch": 2.128647810293457, "grad_norm": 1.5638095140457153, "learning_rate": 1.927895422809315e-05, "loss": 0.5278, "step": 13040 }, { "epoch": 2.1288110689359616, "grad_norm": 1.467095136642456, "learning_rate": 1.9278835908959694e-05, "loss": 0.5028, "step": 13041 }, { "epoch": 2.128974327578466, "grad_norm": 1.5715941190719604, "learning_rate": 1.927871758048245e-05, "loss": 0.5404, "step": 13042 }, { "epoch": 2.1291375862209705, "grad_norm": 1.6392290592193604, "learning_rate": 1.927859924266154e-05, "loss": 0.6445, "step": 13043 }, { "epoch": 2.129300844863475, "grad_norm": 2.0634756088256836, "learning_rate": 1.9278480895497086e-05, "loss": 0.6739, "step": 13044 }, { "epoch": 2.1294641035059794, "grad_norm": 1.6276482343673706, "learning_rate": 1.92783625389892e-05, "loss": 0.4989, "step": 13045 }, { "epoch": 2.129627362148484, "grad_norm": 1.6199721097946167, "learning_rate": 1.927824417313801e-05, "loss": 0.5221, "step": 13046 }, { "epoch": 2.1297906207909882, "grad_norm": 1.7544598579406738, "learning_rate": 1.9278125797943626e-05, "loss": 0.6904, "step": 13047 }, { "epoch": 2.1299538794334927, "grad_norm": 1.6774381399154663, "learning_rate": 1.9278007413406176e-05, "loss": 0.5101, "step": 13048 }, { "epoch": 2.130117138075997, "grad_norm": 1.7682301998138428, "learning_rate": 1.9277889019525773e-05, "loss": 0.5897, "step": 13049 }, { "epoch": 2.130280396718501, "grad_norm": 1.5831581354141235, "learning_rate": 1.927777061630254e-05, "loss": 0.5085, "step": 13050 }, { "epoch": 2.1304436553610056, "grad_norm": 2.016889810562134, "learning_rate": 1.9277652203736593e-05, "loss": 0.5934, "step": 13051 }, { "epoch": 2.13060691400351, "grad_norm": 1.8319611549377441, "learning_rate": 1.9277533781828053e-05, "loss": 0.7135, "step": 13052 }, { "epoch": 2.1307701726460144, "grad_norm": 1.475691795349121, "learning_rate": 1.9277415350577037e-05, "loss": 0.4819, "step": 13053 }, { "epoch": 2.130933431288519, "grad_norm": 1.8789970874786377, "learning_rate": 1.927729690998367e-05, "loss": 0.6174, "step": 13054 }, { "epoch": 2.1310966899310233, "grad_norm": 1.7319424152374268, "learning_rate": 1.9277178460048065e-05, "loss": 0.5609, "step": 13055 }, { "epoch": 2.1312599485735277, "grad_norm": 1.6316282749176025, "learning_rate": 1.9277060000770342e-05, "loss": 0.4712, "step": 13056 }, { "epoch": 2.131423207216032, "grad_norm": 1.7025171518325806, "learning_rate": 1.9276941532150625e-05, "loss": 0.5782, "step": 13057 }, { "epoch": 2.131586465858536, "grad_norm": 1.869409203529358, "learning_rate": 1.9276823054189032e-05, "loss": 0.5458, "step": 13058 }, { "epoch": 2.1317497245010406, "grad_norm": 1.722122073173523, "learning_rate": 1.9276704566885676e-05, "loss": 0.6209, "step": 13059 }, { "epoch": 2.131912983143545, "grad_norm": 1.9541490077972412, "learning_rate": 1.9276586070240684e-05, "loss": 0.6257, "step": 13060 }, { "epoch": 2.1320762417860495, "grad_norm": 1.6492030620574951, "learning_rate": 1.9276467564254173e-05, "loss": 0.5336, "step": 13061 }, { "epoch": 2.132239500428554, "grad_norm": 1.7177084684371948, "learning_rate": 1.9276349048926257e-05, "loss": 0.5809, "step": 13062 }, { "epoch": 2.1324027590710584, "grad_norm": 1.829740047454834, "learning_rate": 1.9276230524257066e-05, "loss": 0.6908, "step": 13063 }, { "epoch": 2.132566017713563, "grad_norm": 1.8301079273223877, "learning_rate": 1.927611199024671e-05, "loss": 0.5622, "step": 13064 }, { "epoch": 2.1327292763560672, "grad_norm": 1.6498534679412842, "learning_rate": 1.9275993446895312e-05, "loss": 0.5438, "step": 13065 }, { "epoch": 2.1328925349985717, "grad_norm": 1.8239853382110596, "learning_rate": 1.927587489420299e-05, "loss": 0.5542, "step": 13066 }, { "epoch": 2.1330557936410757, "grad_norm": 1.7599238157272339, "learning_rate": 1.9275756332169865e-05, "loss": 0.5621, "step": 13067 }, { "epoch": 2.13321905228358, "grad_norm": 1.4714633226394653, "learning_rate": 1.927563776079606e-05, "loss": 0.5733, "step": 13068 }, { "epoch": 2.1333823109260845, "grad_norm": 1.7328193187713623, "learning_rate": 1.9275519180081682e-05, "loss": 0.5929, "step": 13069 }, { "epoch": 2.133545569568589, "grad_norm": 1.851211428642273, "learning_rate": 1.9275400590026866e-05, "loss": 0.5593, "step": 13070 }, { "epoch": 2.1337088282110934, "grad_norm": 1.7401132583618164, "learning_rate": 1.927528199063172e-05, "loss": 0.6928, "step": 13071 }, { "epoch": 2.133872086853598, "grad_norm": 1.8318625688552856, "learning_rate": 1.9275163381896368e-05, "loss": 0.6206, "step": 13072 }, { "epoch": 2.1340353454961023, "grad_norm": 1.8565585613250732, "learning_rate": 1.927504476382093e-05, "loss": 0.6266, "step": 13073 }, { "epoch": 2.1341986041386067, "grad_norm": 1.7599871158599854, "learning_rate": 1.9274926136405524e-05, "loss": 0.566, "step": 13074 }, { "epoch": 2.134361862781111, "grad_norm": 1.7907156944274902, "learning_rate": 1.927480749965027e-05, "loss": 0.6216, "step": 13075 }, { "epoch": 2.134525121423615, "grad_norm": 1.6799432039260864, "learning_rate": 1.9274688853555288e-05, "loss": 0.5685, "step": 13076 }, { "epoch": 2.1346883800661196, "grad_norm": 1.7050100564956665, "learning_rate": 1.9274570198120696e-05, "loss": 0.5368, "step": 13077 }, { "epoch": 2.134851638708624, "grad_norm": 1.5879122018814087, "learning_rate": 1.9274451533346617e-05, "loss": 0.5423, "step": 13078 }, { "epoch": 2.1350148973511285, "grad_norm": 1.7706385850906372, "learning_rate": 1.9274332859233163e-05, "loss": 0.5228, "step": 13079 }, { "epoch": 2.135178155993633, "grad_norm": 1.5726711750030518, "learning_rate": 1.927421417578046e-05, "loss": 0.518, "step": 13080 }, { "epoch": 2.1353414146361374, "grad_norm": 1.7866709232330322, "learning_rate": 1.9274095482988627e-05, "loss": 0.595, "step": 13081 }, { "epoch": 2.135504673278642, "grad_norm": 1.7645374536514282, "learning_rate": 1.927397678085778e-05, "loss": 0.574, "step": 13082 }, { "epoch": 2.1356679319211462, "grad_norm": 1.7819381952285767, "learning_rate": 1.927385806938804e-05, "loss": 0.7238, "step": 13083 }, { "epoch": 2.1358311905636507, "grad_norm": 1.8026423454284668, "learning_rate": 1.927373934857953e-05, "loss": 0.674, "step": 13084 }, { "epoch": 2.1359944492061547, "grad_norm": 2.2905080318450928, "learning_rate": 1.927362061843237e-05, "loss": 0.7526, "step": 13085 }, { "epoch": 2.136157707848659, "grad_norm": 1.7917044162750244, "learning_rate": 1.9273501878946672e-05, "loss": 0.6069, "step": 13086 }, { "epoch": 2.1363209664911635, "grad_norm": 1.6695772409439087, "learning_rate": 1.927338313012256e-05, "loss": 0.5842, "step": 13087 }, { "epoch": 2.136484225133668, "grad_norm": 2.1657235622406006, "learning_rate": 1.9273264371960155e-05, "loss": 0.7708, "step": 13088 }, { "epoch": 2.1366474837761724, "grad_norm": 1.5874484777450562, "learning_rate": 1.9273145604459577e-05, "loss": 0.4867, "step": 13089 }, { "epoch": 2.136810742418677, "grad_norm": 2.5058553218841553, "learning_rate": 1.9273026827620942e-05, "loss": 0.6271, "step": 13090 }, { "epoch": 2.1369740010611813, "grad_norm": 1.798949122428894, "learning_rate": 1.9272908041444372e-05, "loss": 0.6136, "step": 13091 }, { "epoch": 2.1371372597036857, "grad_norm": 1.543386697769165, "learning_rate": 1.9272789245929985e-05, "loss": 0.4961, "step": 13092 }, { "epoch": 2.1373005183461897, "grad_norm": 1.8983169794082642, "learning_rate": 1.92726704410779e-05, "loss": 0.615, "step": 13093 }, { "epoch": 2.137463776988694, "grad_norm": 1.5488721132278442, "learning_rate": 1.927255162688824e-05, "loss": 0.4531, "step": 13094 }, { "epoch": 2.1376270356311986, "grad_norm": 1.9322967529296875, "learning_rate": 1.9272432803361124e-05, "loss": 0.5782, "step": 13095 }, { "epoch": 2.137790294273703, "grad_norm": 1.7699416875839233, "learning_rate": 1.9272313970496674e-05, "loss": 0.5419, "step": 13096 }, { "epoch": 2.1379535529162075, "grad_norm": 1.9889681339263916, "learning_rate": 1.9272195128295e-05, "loss": 0.5764, "step": 13097 }, { "epoch": 2.138116811558712, "grad_norm": 1.914892315864563, "learning_rate": 1.927207627675623e-05, "loss": 0.5599, "step": 13098 }, { "epoch": 2.1382800702012164, "grad_norm": 1.8267251253128052, "learning_rate": 1.9271957415880482e-05, "loss": 0.6368, "step": 13099 }, { "epoch": 2.138443328843721, "grad_norm": 1.7069894075393677, "learning_rate": 1.9271838545667876e-05, "loss": 0.5844, "step": 13100 }, { "epoch": 2.1386065874862252, "grad_norm": 2.155895709991455, "learning_rate": 1.927171966611853e-05, "loss": 0.64, "step": 13101 }, { "epoch": 2.1387698461287297, "grad_norm": 1.8097240924835205, "learning_rate": 1.9271600777232567e-05, "loss": 0.5209, "step": 13102 }, { "epoch": 2.1389331047712337, "grad_norm": 1.8916829824447632, "learning_rate": 1.9271481879010103e-05, "loss": 0.5459, "step": 13103 }, { "epoch": 2.139096363413738, "grad_norm": 1.7086131572723389, "learning_rate": 1.9271362971451255e-05, "loss": 0.5533, "step": 13104 }, { "epoch": 2.1392596220562425, "grad_norm": 1.851006031036377, "learning_rate": 1.9271244054556152e-05, "loss": 0.6274, "step": 13105 }, { "epoch": 2.139422880698747, "grad_norm": 1.6000010967254639, "learning_rate": 1.9271125128324906e-05, "loss": 0.5284, "step": 13106 }, { "epoch": 2.1395861393412514, "grad_norm": 1.5376876592636108, "learning_rate": 1.9271006192757643e-05, "loss": 0.4894, "step": 13107 }, { "epoch": 2.139749397983756, "grad_norm": 1.7992032766342163, "learning_rate": 1.9270887247854478e-05, "loss": 0.5119, "step": 13108 }, { "epoch": 2.1399126566262603, "grad_norm": 1.824082374572754, "learning_rate": 1.927076829361553e-05, "loss": 0.5893, "step": 13109 }, { "epoch": 2.1400759152687647, "grad_norm": 1.5587283372879028, "learning_rate": 1.927064933004092e-05, "loss": 0.5201, "step": 13110 }, { "epoch": 2.1402391739112687, "grad_norm": 1.8371176719665527, "learning_rate": 1.927053035713077e-05, "loss": 0.6409, "step": 13111 }, { "epoch": 2.140402432553773, "grad_norm": 1.7079236507415771, "learning_rate": 1.92704113748852e-05, "loss": 0.5633, "step": 13112 }, { "epoch": 2.1405656911962776, "grad_norm": 2.1155457496643066, "learning_rate": 1.927029238330433e-05, "loss": 0.6251, "step": 13113 }, { "epoch": 2.140728949838782, "grad_norm": 1.4213157892227173, "learning_rate": 1.9270173382388274e-05, "loss": 0.4607, "step": 13114 }, { "epoch": 2.1408922084812865, "grad_norm": 1.6777281761169434, "learning_rate": 1.9270054372137154e-05, "loss": 0.5148, "step": 13115 }, { "epoch": 2.141055467123791, "grad_norm": 1.7544119358062744, "learning_rate": 1.9269935352551097e-05, "loss": 0.5974, "step": 13116 }, { "epoch": 2.1412187257662953, "grad_norm": 1.826151728630066, "learning_rate": 1.9269816323630215e-05, "loss": 0.5302, "step": 13117 }, { "epoch": 2.1413819844088, "grad_norm": 1.797020435333252, "learning_rate": 1.926969728537463e-05, "loss": 0.5746, "step": 13118 }, { "epoch": 2.1415452430513042, "grad_norm": 1.9144299030303955, "learning_rate": 1.926957823778446e-05, "loss": 0.583, "step": 13119 }, { "epoch": 2.141708501693808, "grad_norm": 1.6890424489974976, "learning_rate": 1.9269459180859834e-05, "loss": 0.5882, "step": 13120 }, { "epoch": 2.1418717603363127, "grad_norm": 1.6111280918121338, "learning_rate": 1.9269340114600862e-05, "loss": 0.4042, "step": 13121 }, { "epoch": 2.142035018978817, "grad_norm": 2.025073766708374, "learning_rate": 1.9269221039007666e-05, "loss": 0.6863, "step": 13122 }, { "epoch": 2.1421982776213215, "grad_norm": 1.650171160697937, "learning_rate": 1.9269101954080366e-05, "loss": 0.5196, "step": 13123 }, { "epoch": 2.142361536263826, "grad_norm": 1.6853817701339722, "learning_rate": 1.9268982859819085e-05, "loss": 0.5148, "step": 13124 }, { "epoch": 2.1425247949063304, "grad_norm": 1.796764612197876, "learning_rate": 1.9268863756223937e-05, "loss": 0.5443, "step": 13125 }, { "epoch": 2.142688053548835, "grad_norm": 1.546323299407959, "learning_rate": 1.926874464329505e-05, "loss": 0.5152, "step": 13126 }, { "epoch": 2.1428513121913393, "grad_norm": 1.8875670433044434, "learning_rate": 1.9268625521032536e-05, "loss": 0.5158, "step": 13127 }, { "epoch": 2.1430145708338437, "grad_norm": 2.0400378704071045, "learning_rate": 1.926850638943652e-05, "loss": 0.6719, "step": 13128 }, { "epoch": 2.1431778294763477, "grad_norm": 1.8072713613510132, "learning_rate": 1.926838724850712e-05, "loss": 0.5579, "step": 13129 }, { "epoch": 2.143341088118852, "grad_norm": 1.831193208694458, "learning_rate": 1.926826809824446e-05, "loss": 0.6237, "step": 13130 }, { "epoch": 2.1435043467613566, "grad_norm": 1.7746719121932983, "learning_rate": 1.926814893864865e-05, "loss": 0.5449, "step": 13131 }, { "epoch": 2.143667605403861, "grad_norm": 1.7644745111465454, "learning_rate": 1.9268029769719824e-05, "loss": 0.5521, "step": 13132 }, { "epoch": 2.1438308640463655, "grad_norm": 1.879843831062317, "learning_rate": 1.926791059145809e-05, "loss": 0.6075, "step": 13133 }, { "epoch": 2.14399412268887, "grad_norm": 1.881650447845459, "learning_rate": 1.9267791403863575e-05, "loss": 0.6647, "step": 13134 }, { "epoch": 2.1441573813313743, "grad_norm": 1.6046339273452759, "learning_rate": 1.9267672206936395e-05, "loss": 0.4598, "step": 13135 }, { "epoch": 2.144320639973879, "grad_norm": 2.1067168712615967, "learning_rate": 1.9267553000676667e-05, "loss": 0.6689, "step": 13136 }, { "epoch": 2.144483898616383, "grad_norm": 1.7307701110839844, "learning_rate": 1.9267433785084523e-05, "loss": 0.4428, "step": 13137 }, { "epoch": 2.144647157258887, "grad_norm": 1.6352276802062988, "learning_rate": 1.9267314560160072e-05, "loss": 0.5099, "step": 13138 }, { "epoch": 2.1448104159013917, "grad_norm": 2.214813232421875, "learning_rate": 1.926719532590344e-05, "loss": 0.6402, "step": 13139 }, { "epoch": 2.144973674543896, "grad_norm": 1.831155776977539, "learning_rate": 1.926707608231474e-05, "loss": 0.5639, "step": 13140 }, { "epoch": 2.1451369331864005, "grad_norm": 1.5921908617019653, "learning_rate": 1.9266956829394103e-05, "loss": 0.5579, "step": 13141 }, { "epoch": 2.145300191828905, "grad_norm": 2.1882338523864746, "learning_rate": 1.9266837567141638e-05, "loss": 0.691, "step": 13142 }, { "epoch": 2.1454634504714094, "grad_norm": 1.9765212535858154, "learning_rate": 1.9266718295557472e-05, "loss": 0.6538, "step": 13143 }, { "epoch": 2.145626709113914, "grad_norm": 1.86708402633667, "learning_rate": 1.9266599014641724e-05, "loss": 0.5008, "step": 13144 }, { "epoch": 2.1457899677564183, "grad_norm": 1.6254773139953613, "learning_rate": 1.926647972439451e-05, "loss": 0.5784, "step": 13145 }, { "epoch": 2.1459532263989223, "grad_norm": 1.7189005613327026, "learning_rate": 1.9266360424815957e-05, "loss": 0.5603, "step": 13146 }, { "epoch": 2.1461164850414267, "grad_norm": 1.7170103788375854, "learning_rate": 1.926624111590618e-05, "loss": 0.5072, "step": 13147 }, { "epoch": 2.146279743683931, "grad_norm": 1.7181285619735718, "learning_rate": 1.92661217976653e-05, "loss": 0.5086, "step": 13148 }, { "epoch": 2.1464430023264356, "grad_norm": 2.139894485473633, "learning_rate": 1.926600247009344e-05, "loss": 0.657, "step": 13149 }, { "epoch": 2.14660626096894, "grad_norm": 1.9918763637542725, "learning_rate": 1.9265883133190715e-05, "loss": 0.6001, "step": 13150 }, { "epoch": 2.1467695196114445, "grad_norm": 1.7548608779907227, "learning_rate": 1.926576378695725e-05, "loss": 0.6883, "step": 13151 }, { "epoch": 2.146932778253949, "grad_norm": 1.5560482740402222, "learning_rate": 1.9265644431393166e-05, "loss": 0.4754, "step": 13152 }, { "epoch": 2.1470960368964533, "grad_norm": 1.2967911958694458, "learning_rate": 1.9265525066498577e-05, "loss": 0.4751, "step": 13153 }, { "epoch": 2.147259295538958, "grad_norm": 2.20422625541687, "learning_rate": 1.9265405692273608e-05, "loss": 0.6701, "step": 13154 }, { "epoch": 2.1474225541814618, "grad_norm": 1.3159680366516113, "learning_rate": 1.9265286308718374e-05, "loss": 0.4389, "step": 13155 }, { "epoch": 2.147585812823966, "grad_norm": 2.35994815826416, "learning_rate": 1.9265166915833005e-05, "loss": 0.64, "step": 13156 }, { "epoch": 2.1477490714664706, "grad_norm": 1.6138525009155273, "learning_rate": 1.926504751361761e-05, "loss": 0.4502, "step": 13157 }, { "epoch": 2.147912330108975, "grad_norm": 1.8167921304702759, "learning_rate": 1.9264928102072318e-05, "loss": 0.5672, "step": 13158 }, { "epoch": 2.1480755887514795, "grad_norm": 1.911700963973999, "learning_rate": 1.9264808681197246e-05, "loss": 0.5956, "step": 13159 }, { "epoch": 2.148238847393984, "grad_norm": 1.746031403541565, "learning_rate": 1.9264689250992514e-05, "loss": 0.6001, "step": 13160 }, { "epoch": 2.1484021060364884, "grad_norm": 1.4843804836273193, "learning_rate": 1.926456981145824e-05, "loss": 0.4442, "step": 13161 }, { "epoch": 2.148565364678993, "grad_norm": 2.0404913425445557, "learning_rate": 1.926445036259455e-05, "loss": 0.5969, "step": 13162 }, { "epoch": 2.1487286233214973, "grad_norm": 1.5024774074554443, "learning_rate": 1.9264330904401557e-05, "loss": 0.4421, "step": 13163 }, { "epoch": 2.1488918819640013, "grad_norm": 1.955682635307312, "learning_rate": 1.926421143687939e-05, "loss": 0.596, "step": 13164 }, { "epoch": 2.1490551406065057, "grad_norm": 1.6358774900436401, "learning_rate": 1.926409196002816e-05, "loss": 0.5451, "step": 13165 }, { "epoch": 2.14921839924901, "grad_norm": 2.0252084732055664, "learning_rate": 1.9263972473847995e-05, "loss": 0.5922, "step": 13166 }, { "epoch": 2.1493816578915146, "grad_norm": 1.8088293075561523, "learning_rate": 1.926385297833901e-05, "loss": 0.5594, "step": 13167 }, { "epoch": 2.149544916534019, "grad_norm": 1.9169856309890747, "learning_rate": 1.9263733473501328e-05, "loss": 0.6319, "step": 13168 }, { "epoch": 2.1497081751765235, "grad_norm": 2.1439688205718994, "learning_rate": 1.926361395933507e-05, "loss": 0.6, "step": 13169 }, { "epoch": 2.149871433819028, "grad_norm": 2.30246639251709, "learning_rate": 1.9263494435840355e-05, "loss": 0.7072, "step": 13170 }, { "epoch": 2.1500346924615323, "grad_norm": 1.481549620628357, "learning_rate": 1.9263374903017303e-05, "loss": 0.5321, "step": 13171 }, { "epoch": 2.1501979511040368, "grad_norm": 1.6236931085586548, "learning_rate": 1.9263255360866037e-05, "loss": 0.4655, "step": 13172 }, { "epoch": 2.1503612097465408, "grad_norm": 1.913177728652954, "learning_rate": 1.926313580938667e-05, "loss": 0.6358, "step": 13173 }, { "epoch": 2.150524468389045, "grad_norm": 1.6828123331069946, "learning_rate": 1.926301624857933e-05, "loss": 0.5827, "step": 13174 }, { "epoch": 2.1506877270315496, "grad_norm": 1.8531734943389893, "learning_rate": 1.9262896678444138e-05, "loss": 0.6032, "step": 13175 }, { "epoch": 2.150850985674054, "grad_norm": 2.0760715007781982, "learning_rate": 1.9262777098981212e-05, "loss": 0.6962, "step": 13176 }, { "epoch": 2.1510142443165585, "grad_norm": 1.9740575551986694, "learning_rate": 1.926265751019067e-05, "loss": 0.6351, "step": 13177 }, { "epoch": 2.151177502959063, "grad_norm": 1.6746083498001099, "learning_rate": 1.926253791207263e-05, "loss": 0.6021, "step": 13178 }, { "epoch": 2.1513407616015674, "grad_norm": 1.5780713558197021, "learning_rate": 1.9262418304627224e-05, "loss": 0.4793, "step": 13179 }, { "epoch": 2.151504020244072, "grad_norm": 2.1342222690582275, "learning_rate": 1.926229868785456e-05, "loss": 0.7751, "step": 13180 }, { "epoch": 2.151667278886576, "grad_norm": 1.9073501825332642, "learning_rate": 1.9262179061754766e-05, "loss": 0.621, "step": 13181 }, { "epoch": 2.1518305375290803, "grad_norm": 1.872809886932373, "learning_rate": 1.926205942632796e-05, "loss": 0.613, "step": 13182 }, { "epoch": 2.1519937961715847, "grad_norm": 1.8838062286376953, "learning_rate": 1.9261939781574264e-05, "loss": 0.5195, "step": 13183 }, { "epoch": 2.152157054814089, "grad_norm": 1.78023099899292, "learning_rate": 1.9261820127493794e-05, "loss": 0.6679, "step": 13184 }, { "epoch": 2.1523203134565936, "grad_norm": 1.7167141437530518, "learning_rate": 1.9261700464086677e-05, "loss": 0.6751, "step": 13185 }, { "epoch": 2.152483572099098, "grad_norm": 2.216970205307007, "learning_rate": 1.9261580791353024e-05, "loss": 0.5846, "step": 13186 }, { "epoch": 2.1526468307416025, "grad_norm": 1.689347267150879, "learning_rate": 1.9261461109292968e-05, "loss": 0.5116, "step": 13187 }, { "epoch": 2.152810089384107, "grad_norm": 1.6530646085739136, "learning_rate": 1.9261341417906622e-05, "loss": 0.5166, "step": 13188 }, { "epoch": 2.1529733480266113, "grad_norm": 1.799710750579834, "learning_rate": 1.9261221717194105e-05, "loss": 0.588, "step": 13189 }, { "epoch": 2.1531366066691158, "grad_norm": 1.815379023551941, "learning_rate": 1.926110200715554e-05, "loss": 0.5929, "step": 13190 }, { "epoch": 2.1532998653116198, "grad_norm": 2.0140457153320312, "learning_rate": 1.9260982287791053e-05, "loss": 0.6932, "step": 13191 }, { "epoch": 2.153463123954124, "grad_norm": 1.8705366849899292, "learning_rate": 1.9260862559100756e-05, "loss": 0.642, "step": 13192 }, { "epoch": 2.1536263825966286, "grad_norm": 1.9946731328964233, "learning_rate": 1.926074282108477e-05, "loss": 0.6607, "step": 13193 }, { "epoch": 2.153789641239133, "grad_norm": 1.6843860149383545, "learning_rate": 1.926062307374322e-05, "loss": 0.5892, "step": 13194 }, { "epoch": 2.1539528998816375, "grad_norm": 1.8102991580963135, "learning_rate": 1.9260503317076228e-05, "loss": 0.5409, "step": 13195 }, { "epoch": 2.154116158524142, "grad_norm": 1.634926199913025, "learning_rate": 1.926038355108391e-05, "loss": 0.5554, "step": 13196 }, { "epoch": 2.1542794171666464, "grad_norm": 1.5880622863769531, "learning_rate": 1.9260263775766388e-05, "loss": 0.5064, "step": 13197 }, { "epoch": 2.154442675809151, "grad_norm": 1.5944206714630127, "learning_rate": 1.926014399112378e-05, "loss": 0.6022, "step": 13198 }, { "epoch": 2.154605934451655, "grad_norm": 1.6530132293701172, "learning_rate": 1.9260024197156216e-05, "loss": 0.5597, "step": 13199 }, { "epoch": 2.1547691930941593, "grad_norm": 1.8063910007476807, "learning_rate": 1.9259904393863804e-05, "loss": 0.5044, "step": 13200 }, { "epoch": 2.1549324517366637, "grad_norm": 1.698850393295288, "learning_rate": 1.9259784581246674e-05, "loss": 0.5267, "step": 13201 }, { "epoch": 2.155095710379168, "grad_norm": 1.6063040494918823, "learning_rate": 1.925966475930494e-05, "loss": 0.5123, "step": 13202 }, { "epoch": 2.1552589690216726, "grad_norm": 1.8648347854614258, "learning_rate": 1.925954492803873e-05, "loss": 0.5763, "step": 13203 }, { "epoch": 2.155422227664177, "grad_norm": 1.5986442565917969, "learning_rate": 1.925942508744816e-05, "loss": 0.5386, "step": 13204 }, { "epoch": 2.1555854863066815, "grad_norm": 1.562756061553955, "learning_rate": 1.9259305237533352e-05, "loss": 0.5908, "step": 13205 }, { "epoch": 2.155748744949186, "grad_norm": 1.5717968940734863, "learning_rate": 1.9259185378294424e-05, "loss": 0.4781, "step": 13206 }, { "epoch": 2.1559120035916903, "grad_norm": 1.7932546138763428, "learning_rate": 1.92590655097315e-05, "loss": 0.573, "step": 13207 }, { "epoch": 2.1560752622341943, "grad_norm": 1.8231627941131592, "learning_rate": 1.9258945631844697e-05, "loss": 0.5617, "step": 13208 }, { "epoch": 2.1562385208766988, "grad_norm": 2.121133804321289, "learning_rate": 1.925882574463414e-05, "loss": 0.6376, "step": 13209 }, { "epoch": 2.156401779519203, "grad_norm": 1.7184325456619263, "learning_rate": 1.925870584809995e-05, "loss": 0.5598, "step": 13210 }, { "epoch": 2.1565650381617076, "grad_norm": 2.033714532852173, "learning_rate": 1.9258585942242244e-05, "loss": 0.6907, "step": 13211 }, { "epoch": 2.156728296804212, "grad_norm": 1.9756031036376953, "learning_rate": 1.9258466027061143e-05, "loss": 0.6249, "step": 13212 }, { "epoch": 2.1568915554467165, "grad_norm": 1.8345876932144165, "learning_rate": 1.9258346102556768e-05, "loss": 0.5917, "step": 13213 }, { "epoch": 2.157054814089221, "grad_norm": 2.030219316482544, "learning_rate": 1.9258226168729247e-05, "loss": 0.6446, "step": 13214 }, { "epoch": 2.1572180727317254, "grad_norm": 1.7602249383926392, "learning_rate": 1.9258106225578688e-05, "loss": 0.5641, "step": 13215 }, { "epoch": 2.15738133137423, "grad_norm": 2.131706476211548, "learning_rate": 1.9257986273105224e-05, "loss": 0.6314, "step": 13216 }, { "epoch": 2.157544590016734, "grad_norm": 1.7575105428695679, "learning_rate": 1.9257866311308966e-05, "loss": 0.5612, "step": 13217 }, { "epoch": 2.1577078486592383, "grad_norm": 1.7343469858169556, "learning_rate": 1.925774634019004e-05, "loss": 0.5473, "step": 13218 }, { "epoch": 2.1578711073017427, "grad_norm": 1.8957282304763794, "learning_rate": 1.925762635974857e-05, "loss": 0.572, "step": 13219 }, { "epoch": 2.158034365944247, "grad_norm": 1.7767362594604492, "learning_rate": 1.925750636998467e-05, "loss": 0.5033, "step": 13220 }, { "epoch": 2.1581976245867516, "grad_norm": 1.8607797622680664, "learning_rate": 1.925738637089846e-05, "loss": 0.7595, "step": 13221 }, { "epoch": 2.158360883229256, "grad_norm": 1.633574366569519, "learning_rate": 1.9257266362490067e-05, "loss": 0.4543, "step": 13222 }, { "epoch": 2.1585241418717604, "grad_norm": 1.7657698392868042, "learning_rate": 1.925714634475961e-05, "loss": 0.5089, "step": 13223 }, { "epoch": 2.158687400514265, "grad_norm": 1.797600269317627, "learning_rate": 1.925702631770721e-05, "loss": 0.5885, "step": 13224 }, { "epoch": 2.1588506591567693, "grad_norm": 2.0702497959136963, "learning_rate": 1.9256906281332983e-05, "loss": 0.5202, "step": 13225 }, { "epoch": 2.1590139177992733, "grad_norm": 1.7848105430603027, "learning_rate": 1.9256786235637058e-05, "loss": 0.5817, "step": 13226 }, { "epoch": 2.1591771764417778, "grad_norm": 1.9321907758712769, "learning_rate": 1.925666618061955e-05, "loss": 0.6298, "step": 13227 }, { "epoch": 2.159340435084282, "grad_norm": 1.898276925086975, "learning_rate": 1.925654611628058e-05, "loss": 0.5927, "step": 13228 }, { "epoch": 2.1595036937267866, "grad_norm": 1.6375820636749268, "learning_rate": 1.9256426042620274e-05, "loss": 0.4826, "step": 13229 }, { "epoch": 2.159666952369291, "grad_norm": 1.6119370460510254, "learning_rate": 1.9256305959638748e-05, "loss": 0.4631, "step": 13230 }, { "epoch": 2.1598302110117955, "grad_norm": 1.9640681743621826, "learning_rate": 1.9256185867336123e-05, "loss": 0.6279, "step": 13231 }, { "epoch": 2.1599934696543, "grad_norm": 1.7074779272079468, "learning_rate": 1.9256065765712524e-05, "loss": 0.5932, "step": 13232 }, { "epoch": 2.1601567282968044, "grad_norm": 1.7291450500488281, "learning_rate": 1.9255945654768065e-05, "loss": 0.4429, "step": 13233 }, { "epoch": 2.1603199869393084, "grad_norm": 1.8338356018066406, "learning_rate": 1.9255825534502873e-05, "loss": 0.5627, "step": 13234 }, { "epoch": 2.160483245581813, "grad_norm": 1.673629879951477, "learning_rate": 1.925570540491707e-05, "loss": 0.5329, "step": 13235 }, { "epoch": 2.1606465042243173, "grad_norm": 1.596173882484436, "learning_rate": 1.9255585266010773e-05, "loss": 0.5372, "step": 13236 }, { "epoch": 2.1608097628668217, "grad_norm": 1.7379825115203857, "learning_rate": 1.9255465117784102e-05, "loss": 0.5289, "step": 13237 }, { "epoch": 2.160973021509326, "grad_norm": 2.1120975017547607, "learning_rate": 1.925534496023718e-05, "loss": 0.6401, "step": 13238 }, { "epoch": 2.1611362801518306, "grad_norm": 1.7900390625, "learning_rate": 1.925522479337013e-05, "loss": 0.5491, "step": 13239 }, { "epoch": 2.161299538794335, "grad_norm": 1.7088052034378052, "learning_rate": 1.9255104617183068e-05, "loss": 0.5099, "step": 13240 }, { "epoch": 2.1614627974368394, "grad_norm": 2.258354663848877, "learning_rate": 1.9254984431676122e-05, "loss": 0.5915, "step": 13241 }, { "epoch": 2.161626056079344, "grad_norm": 1.8004249334335327, "learning_rate": 1.925486423684941e-05, "loss": 0.6788, "step": 13242 }, { "epoch": 2.1617893147218483, "grad_norm": 1.8633472919464111, "learning_rate": 1.925474403270305e-05, "loss": 0.6255, "step": 13243 }, { "epoch": 2.1619525733643523, "grad_norm": 1.8590691089630127, "learning_rate": 1.9254623819237165e-05, "loss": 0.6347, "step": 13244 }, { "epoch": 2.1621158320068568, "grad_norm": 1.8551139831542969, "learning_rate": 1.9254503596451875e-05, "loss": 0.696, "step": 13245 }, { "epoch": 2.162279090649361, "grad_norm": 1.7692632675170898, "learning_rate": 1.9254383364347302e-05, "loss": 0.6011, "step": 13246 }, { "epoch": 2.1624423492918656, "grad_norm": 1.6236116886138916, "learning_rate": 1.9254263122923568e-05, "loss": 0.5363, "step": 13247 }, { "epoch": 2.16260560793437, "grad_norm": 1.745067834854126, "learning_rate": 1.9254142872180797e-05, "loss": 0.6588, "step": 13248 }, { "epoch": 2.1627688665768745, "grad_norm": 1.8074969053268433, "learning_rate": 1.9254022612119102e-05, "loss": 0.6116, "step": 13249 }, { "epoch": 2.162932125219379, "grad_norm": 1.5905216932296753, "learning_rate": 1.9253902342738612e-05, "loss": 0.5311, "step": 13250 }, { "epoch": 2.1630953838618834, "grad_norm": 1.7934671640396118, "learning_rate": 1.9253782064039444e-05, "loss": 0.4831, "step": 13251 }, { "epoch": 2.1632586425043874, "grad_norm": 1.6281468868255615, "learning_rate": 1.9253661776021718e-05, "loss": 0.5404, "step": 13252 }, { "epoch": 2.163421901146892, "grad_norm": 1.7358895540237427, "learning_rate": 1.925354147868556e-05, "loss": 0.5318, "step": 13253 }, { "epoch": 2.1635851597893963, "grad_norm": 1.7757686376571655, "learning_rate": 1.9253421172031086e-05, "loss": 0.524, "step": 13254 }, { "epoch": 2.1637484184319007, "grad_norm": 1.7743455171585083, "learning_rate": 1.925330085605842e-05, "loss": 0.5903, "step": 13255 }, { "epoch": 2.163911677074405, "grad_norm": 1.8542088270187378, "learning_rate": 1.9253180530767683e-05, "loss": 0.631, "step": 13256 }, { "epoch": 2.1640749357169096, "grad_norm": 1.8892920017242432, "learning_rate": 1.9253060196158994e-05, "loss": 0.6456, "step": 13257 }, { "epoch": 2.164238194359414, "grad_norm": 2.0208425521850586, "learning_rate": 1.9252939852232476e-05, "loss": 0.8602, "step": 13258 }, { "epoch": 2.1644014530019184, "grad_norm": 1.846900463104248, "learning_rate": 1.9252819498988253e-05, "loss": 0.6158, "step": 13259 }, { "epoch": 2.164564711644423, "grad_norm": 1.6417129039764404, "learning_rate": 1.925269913642644e-05, "loss": 0.5304, "step": 13260 }, { "epoch": 2.164727970286927, "grad_norm": 1.9658843278884888, "learning_rate": 1.9252578764547164e-05, "loss": 0.6263, "step": 13261 }, { "epoch": 2.1648912289294313, "grad_norm": 1.8743219375610352, "learning_rate": 1.925245838335054e-05, "loss": 0.5888, "step": 13262 }, { "epoch": 2.1650544875719357, "grad_norm": 2.058260917663574, "learning_rate": 1.9252337992836696e-05, "loss": 0.6393, "step": 13263 }, { "epoch": 2.16521774621444, "grad_norm": 1.5320395231246948, "learning_rate": 1.9252217593005752e-05, "loss": 0.4925, "step": 13264 }, { "epoch": 2.1653810048569446, "grad_norm": 1.7881627082824707, "learning_rate": 1.9252097183857822e-05, "loss": 0.5587, "step": 13265 }, { "epoch": 2.165544263499449, "grad_norm": 1.7892240285873413, "learning_rate": 1.9251976765393038e-05, "loss": 0.5129, "step": 13266 }, { "epoch": 2.1657075221419535, "grad_norm": 1.7081775665283203, "learning_rate": 1.925185633761151e-05, "loss": 0.5148, "step": 13267 }, { "epoch": 2.165870780784458, "grad_norm": 1.7175195217132568, "learning_rate": 1.9251735900513367e-05, "loss": 0.5027, "step": 13268 }, { "epoch": 2.166034039426962, "grad_norm": 1.7063868045806885, "learning_rate": 1.9251615454098732e-05, "loss": 0.6208, "step": 13269 }, { "epoch": 2.1661972980694664, "grad_norm": 1.3373922109603882, "learning_rate": 1.925149499836772e-05, "loss": 0.4396, "step": 13270 }, { "epoch": 2.166360556711971, "grad_norm": 1.4201699495315552, "learning_rate": 1.9251374533320454e-05, "loss": 0.4165, "step": 13271 }, { "epoch": 2.1665238153544752, "grad_norm": 1.8484610319137573, "learning_rate": 1.9251254058957058e-05, "loss": 0.5992, "step": 13272 }, { "epoch": 2.1666870739969797, "grad_norm": 1.8344168663024902, "learning_rate": 1.9251133575277652e-05, "loss": 0.6136, "step": 13273 }, { "epoch": 2.166850332639484, "grad_norm": 1.8022921085357666, "learning_rate": 1.9251013082282357e-05, "loss": 0.5364, "step": 13274 }, { "epoch": 2.1670135912819886, "grad_norm": 1.5622553825378418, "learning_rate": 1.9250892579971293e-05, "loss": 0.4909, "step": 13275 }, { "epoch": 2.167176849924493, "grad_norm": 2.1457207202911377, "learning_rate": 1.925077206834458e-05, "loss": 0.8582, "step": 13276 }, { "epoch": 2.1673401085669974, "grad_norm": 1.829325556755066, "learning_rate": 1.9250651547402345e-05, "loss": 0.5328, "step": 13277 }, { "epoch": 2.167503367209502, "grad_norm": 1.7083674669265747, "learning_rate": 1.925053101714471e-05, "loss": 0.5332, "step": 13278 }, { "epoch": 2.167666625852006, "grad_norm": 1.7561875581741333, "learning_rate": 1.9250410477571787e-05, "loss": 0.5152, "step": 13279 }, { "epoch": 2.1678298844945103, "grad_norm": 2.0389132499694824, "learning_rate": 1.9250289928683706e-05, "loss": 0.6242, "step": 13280 }, { "epoch": 2.1679931431370147, "grad_norm": 1.6548242568969727, "learning_rate": 1.9250169370480582e-05, "loss": 0.5099, "step": 13281 }, { "epoch": 2.168156401779519, "grad_norm": 1.7876954078674316, "learning_rate": 1.9250048802962543e-05, "loss": 0.6512, "step": 13282 }, { "epoch": 2.1683196604220236, "grad_norm": 1.6596119403839111, "learning_rate": 1.924992822612971e-05, "loss": 0.5427, "step": 13283 }, { "epoch": 2.168482919064528, "grad_norm": 1.7031524181365967, "learning_rate": 1.9249807639982197e-05, "loss": 0.5989, "step": 13284 }, { "epoch": 2.1686461777070325, "grad_norm": 1.5483020544052124, "learning_rate": 1.924968704452013e-05, "loss": 0.4282, "step": 13285 }, { "epoch": 2.168809436349537, "grad_norm": 1.8957362174987793, "learning_rate": 1.9249566439743636e-05, "loss": 0.6492, "step": 13286 }, { "epoch": 2.168972694992041, "grad_norm": 1.879695177078247, "learning_rate": 1.9249445825652825e-05, "loss": 0.5606, "step": 13287 }, { "epoch": 2.1691359536345454, "grad_norm": 1.5606549978256226, "learning_rate": 1.9249325202247826e-05, "loss": 0.5832, "step": 13288 }, { "epoch": 2.16929921227705, "grad_norm": 1.8519388437271118, "learning_rate": 1.924920456952876e-05, "loss": 0.5079, "step": 13289 }, { "epoch": 2.1694624709195542, "grad_norm": 1.4684165716171265, "learning_rate": 1.924908392749575e-05, "loss": 0.4344, "step": 13290 }, { "epoch": 2.1696257295620587, "grad_norm": 1.822198748588562, "learning_rate": 1.924896327614891e-05, "loss": 0.6252, "step": 13291 }, { "epoch": 2.169788988204563, "grad_norm": 1.4850627183914185, "learning_rate": 1.924884261548837e-05, "loss": 0.4389, "step": 13292 }, { "epoch": 2.1699522468470676, "grad_norm": 1.7573785781860352, "learning_rate": 1.9248721945514248e-05, "loss": 0.58, "step": 13293 }, { "epoch": 2.170115505489572, "grad_norm": 1.7373608350753784, "learning_rate": 1.924860126622666e-05, "loss": 0.5564, "step": 13294 }, { "epoch": 2.1702787641320764, "grad_norm": 2.0441455841064453, "learning_rate": 1.924848057762574e-05, "loss": 0.6987, "step": 13295 }, { "epoch": 2.1704420227745804, "grad_norm": 1.7932547330856323, "learning_rate": 1.92483598797116e-05, "loss": 0.5548, "step": 13296 }, { "epoch": 2.170605281417085, "grad_norm": 1.781400203704834, "learning_rate": 1.924823917248436e-05, "loss": 0.5621, "step": 13297 }, { "epoch": 2.1707685400595893, "grad_norm": 1.7107067108154297, "learning_rate": 1.9248118455944153e-05, "loss": 0.5102, "step": 13298 }, { "epoch": 2.1709317987020937, "grad_norm": 1.8702898025512695, "learning_rate": 1.924799773009109e-05, "loss": 0.5524, "step": 13299 }, { "epoch": 2.171095057344598, "grad_norm": 1.6808828115463257, "learning_rate": 1.9247876994925293e-05, "loss": 0.627, "step": 13300 }, { "epoch": 2.1712583159871026, "grad_norm": 2.002847194671631, "learning_rate": 1.924775625044689e-05, "loss": 0.5551, "step": 13301 }, { "epoch": 2.171421574629607, "grad_norm": 2.0416860580444336, "learning_rate": 1.9247635496655994e-05, "loss": 0.6609, "step": 13302 }, { "epoch": 2.1715848332721115, "grad_norm": 2.192258596420288, "learning_rate": 1.9247514733552738e-05, "loss": 0.677, "step": 13303 }, { "epoch": 2.171748091914616, "grad_norm": 2.152134895324707, "learning_rate": 1.9247393961137232e-05, "loss": 0.7819, "step": 13304 }, { "epoch": 2.17191135055712, "grad_norm": 1.9223251342773438, "learning_rate": 1.9247273179409605e-05, "loss": 0.6318, "step": 13305 }, { "epoch": 2.1720746091996244, "grad_norm": 1.882735013961792, "learning_rate": 1.9247152388369976e-05, "loss": 0.5903, "step": 13306 }, { "epoch": 2.172237867842129, "grad_norm": 1.9254473447799683, "learning_rate": 1.9247031588018467e-05, "loss": 0.6631, "step": 13307 }, { "epoch": 2.1724011264846332, "grad_norm": 1.8240844011306763, "learning_rate": 1.9246910778355202e-05, "loss": 0.6525, "step": 13308 }, { "epoch": 2.1725643851271377, "grad_norm": 1.6929633617401123, "learning_rate": 1.9246789959380297e-05, "loss": 0.4961, "step": 13309 }, { "epoch": 2.172727643769642, "grad_norm": 1.691746473312378, "learning_rate": 1.9246669131093875e-05, "loss": 0.5295, "step": 13310 }, { "epoch": 2.1728909024121466, "grad_norm": 1.863141417503357, "learning_rate": 1.9246548293496063e-05, "loss": 0.6106, "step": 13311 }, { "epoch": 2.173054161054651, "grad_norm": 1.856151819229126, "learning_rate": 1.924642744658698e-05, "loss": 0.5864, "step": 13312 }, { "epoch": 2.1732174196971554, "grad_norm": 1.7903735637664795, "learning_rate": 1.9246306590366747e-05, "loss": 0.6202, "step": 13313 }, { "epoch": 2.1733806783396594, "grad_norm": 1.6766685247421265, "learning_rate": 1.9246185724835483e-05, "loss": 0.5529, "step": 13314 }, { "epoch": 2.173543936982164, "grad_norm": 1.7331205606460571, "learning_rate": 1.9246064849993314e-05, "loss": 0.6332, "step": 13315 }, { "epoch": 2.1737071956246683, "grad_norm": 1.8539085388183594, "learning_rate": 1.9245943965840363e-05, "loss": 0.6159, "step": 13316 }, { "epoch": 2.1738704542671727, "grad_norm": 1.651315450668335, "learning_rate": 1.9245823072376747e-05, "loss": 0.5413, "step": 13317 }, { "epoch": 2.174033712909677, "grad_norm": 1.856483817100525, "learning_rate": 1.9245702169602586e-05, "loss": 0.6459, "step": 13318 }, { "epoch": 2.1741969715521816, "grad_norm": 2.1413705348968506, "learning_rate": 1.9245581257518008e-05, "loss": 0.6285, "step": 13319 }, { "epoch": 2.174360230194686, "grad_norm": 1.7569209337234497, "learning_rate": 1.9245460336123136e-05, "loss": 0.5657, "step": 13320 }, { "epoch": 2.1745234888371905, "grad_norm": 1.9354357719421387, "learning_rate": 1.924533940541808e-05, "loss": 0.6508, "step": 13321 }, { "epoch": 2.1746867474796945, "grad_norm": 2.2263355255126953, "learning_rate": 1.9245218465402974e-05, "loss": 0.6645, "step": 13322 }, { "epoch": 2.174850006122199, "grad_norm": 1.7292553186416626, "learning_rate": 1.9245097516077935e-05, "loss": 0.5695, "step": 13323 }, { "epoch": 2.1750132647647034, "grad_norm": 1.8626881837844849, "learning_rate": 1.9244976557443086e-05, "loss": 0.6079, "step": 13324 }, { "epoch": 2.175176523407208, "grad_norm": 1.5982189178466797, "learning_rate": 1.924485558949855e-05, "loss": 0.4996, "step": 13325 }, { "epoch": 2.1753397820497122, "grad_norm": 1.5233722925186157, "learning_rate": 1.9244734612244442e-05, "loss": 0.474, "step": 13326 }, { "epoch": 2.1755030406922167, "grad_norm": 2.3430371284484863, "learning_rate": 1.924461362568089e-05, "loss": 0.7936, "step": 13327 }, { "epoch": 2.175666299334721, "grad_norm": 1.9748408794403076, "learning_rate": 1.9244492629808017e-05, "loss": 0.6437, "step": 13328 }, { "epoch": 2.1758295579772255, "grad_norm": 1.8745267391204834, "learning_rate": 1.924437162462594e-05, "loss": 0.6342, "step": 13329 }, { "epoch": 2.17599281661973, "grad_norm": 1.8029619455337524, "learning_rate": 1.9244250610134787e-05, "loss": 0.5692, "step": 13330 }, { "epoch": 2.1761560752622344, "grad_norm": 1.5643092393875122, "learning_rate": 1.924412958633467e-05, "loss": 0.4394, "step": 13331 }, { "epoch": 2.1763193339047384, "grad_norm": 1.904140830039978, "learning_rate": 1.9244008553225725e-05, "loss": 0.5619, "step": 13332 }, { "epoch": 2.176482592547243, "grad_norm": 1.8070687055587769, "learning_rate": 1.924388751080806e-05, "loss": 0.6492, "step": 13333 }, { "epoch": 2.1766458511897473, "grad_norm": 2.0452511310577393, "learning_rate": 1.9243766459081802e-05, "loss": 0.6885, "step": 13334 }, { "epoch": 2.1768091098322517, "grad_norm": 2.0648157596588135, "learning_rate": 1.9243645398047073e-05, "loss": 0.6516, "step": 13335 }, { "epoch": 2.176972368474756, "grad_norm": 2.163074254989624, "learning_rate": 1.9243524327703998e-05, "loss": 1.0042, "step": 13336 }, { "epoch": 2.1771356271172606, "grad_norm": 1.9026952981948853, "learning_rate": 1.92434032480527e-05, "loss": 0.5626, "step": 13337 }, { "epoch": 2.177298885759765, "grad_norm": 2.0409882068634033, "learning_rate": 1.9243282159093292e-05, "loss": 0.5259, "step": 13338 }, { "epoch": 2.1774621444022695, "grad_norm": 1.584869146347046, "learning_rate": 1.9243161060825906e-05, "loss": 0.5877, "step": 13339 }, { "epoch": 2.1776254030447735, "grad_norm": 1.7438567876815796, "learning_rate": 1.9243039953250654e-05, "loss": 0.6189, "step": 13340 }, { "epoch": 2.177788661687278, "grad_norm": 1.9519093036651611, "learning_rate": 1.924291883636767e-05, "loss": 0.6082, "step": 13341 }, { "epoch": 2.1779519203297824, "grad_norm": 1.9211331605911255, "learning_rate": 1.924279771017706e-05, "loss": 0.5631, "step": 13342 }, { "epoch": 2.178115178972287, "grad_norm": 1.781083345413208, "learning_rate": 1.924267657467896e-05, "loss": 0.5093, "step": 13343 }, { "epoch": 2.1782784376147912, "grad_norm": 1.6289771795272827, "learning_rate": 1.9242555429873488e-05, "loss": 0.4486, "step": 13344 }, { "epoch": 2.1784416962572957, "grad_norm": 1.8659967184066772, "learning_rate": 1.9242434275760765e-05, "loss": 0.5297, "step": 13345 }, { "epoch": 2.1786049548998, "grad_norm": 1.5624890327453613, "learning_rate": 1.9242313112340912e-05, "loss": 0.5076, "step": 13346 }, { "epoch": 2.1787682135423045, "grad_norm": 1.9392064809799194, "learning_rate": 1.9242191939614054e-05, "loss": 0.5934, "step": 13347 }, { "epoch": 2.178931472184809, "grad_norm": 2.0035412311553955, "learning_rate": 1.924207075758031e-05, "loss": 0.5403, "step": 13348 }, { "epoch": 2.179094730827313, "grad_norm": 1.8310558795928955, "learning_rate": 1.92419495662398e-05, "loss": 0.5981, "step": 13349 }, { "epoch": 2.1792579894698174, "grad_norm": 1.8200416564941406, "learning_rate": 1.9241828365592653e-05, "loss": 0.5224, "step": 13350 }, { "epoch": 2.179421248112322, "grad_norm": 1.7179251909255981, "learning_rate": 1.9241707155638985e-05, "loss": 0.5132, "step": 13351 }, { "epoch": 2.1795845067548263, "grad_norm": 1.9483088254928589, "learning_rate": 1.9241585936378926e-05, "loss": 0.6387, "step": 13352 }, { "epoch": 2.1797477653973307, "grad_norm": 1.8727397918701172, "learning_rate": 1.9241464707812586e-05, "loss": 0.6614, "step": 13353 }, { "epoch": 2.179911024039835, "grad_norm": 1.6678638458251953, "learning_rate": 1.9241343469940096e-05, "loss": 0.5025, "step": 13354 }, { "epoch": 2.1800742826823396, "grad_norm": 1.7355008125305176, "learning_rate": 1.9241222222761576e-05, "loss": 0.5095, "step": 13355 }, { "epoch": 2.180237541324844, "grad_norm": 2.2528584003448486, "learning_rate": 1.9241100966277146e-05, "loss": 0.7494, "step": 13356 }, { "epoch": 2.180400799967348, "grad_norm": 1.528761625289917, "learning_rate": 1.9240979700486934e-05, "loss": 0.513, "step": 13357 }, { "epoch": 2.1805640586098525, "grad_norm": 1.707395076751709, "learning_rate": 1.924085842539105e-05, "loss": 0.504, "step": 13358 }, { "epoch": 2.180727317252357, "grad_norm": 1.741479516029358, "learning_rate": 1.9240737140989632e-05, "loss": 0.566, "step": 13359 }, { "epoch": 2.1808905758948613, "grad_norm": 2.0772225856781006, "learning_rate": 1.924061584728279e-05, "loss": 0.6394, "step": 13360 }, { "epoch": 2.181053834537366, "grad_norm": 1.7293297052383423, "learning_rate": 1.9240494544270653e-05, "loss": 0.5311, "step": 13361 }, { "epoch": 2.1812170931798702, "grad_norm": 2.201112985610962, "learning_rate": 1.9240373231953334e-05, "loss": 0.5863, "step": 13362 }, { "epoch": 2.1813803518223747, "grad_norm": 1.8002632856369019, "learning_rate": 1.924025191033097e-05, "loss": 0.6145, "step": 13363 }, { "epoch": 2.181543610464879, "grad_norm": 1.7468950748443604, "learning_rate": 1.924013057940367e-05, "loss": 0.5685, "step": 13364 }, { "epoch": 2.1817068691073835, "grad_norm": 1.7704704999923706, "learning_rate": 1.9240009239171564e-05, "loss": 0.6277, "step": 13365 }, { "epoch": 2.181870127749888, "grad_norm": 2.0964515209198, "learning_rate": 1.9239887889634764e-05, "loss": 0.6714, "step": 13366 }, { "epoch": 2.182033386392392, "grad_norm": 1.9130288362503052, "learning_rate": 1.9239766530793405e-05, "loss": 0.6312, "step": 13367 }, { "epoch": 2.1821966450348964, "grad_norm": 1.6292823553085327, "learning_rate": 1.9239645162647603e-05, "loss": 0.5424, "step": 13368 }, { "epoch": 2.182359903677401, "grad_norm": 1.9070968627929688, "learning_rate": 1.9239523785197483e-05, "loss": 0.5828, "step": 13369 }, { "epoch": 2.1825231623199053, "grad_norm": 1.6151317358016968, "learning_rate": 1.923940239844316e-05, "loss": 0.5367, "step": 13370 }, { "epoch": 2.1826864209624097, "grad_norm": 1.9887224435806274, "learning_rate": 1.9239281002384766e-05, "loss": 0.5508, "step": 13371 }, { "epoch": 2.182849679604914, "grad_norm": 1.6799800395965576, "learning_rate": 1.9239159597022416e-05, "loss": 0.5255, "step": 13372 }, { "epoch": 2.1830129382474186, "grad_norm": 1.644047737121582, "learning_rate": 1.9239038182356236e-05, "loss": 0.5753, "step": 13373 }, { "epoch": 2.183176196889923, "grad_norm": 1.7875139713287354, "learning_rate": 1.9238916758386345e-05, "loss": 0.6355, "step": 13374 }, { "epoch": 2.183339455532427, "grad_norm": 2.339210271835327, "learning_rate": 1.9238795325112867e-05, "loss": 0.7761, "step": 13375 }, { "epoch": 2.1835027141749315, "grad_norm": 1.7501087188720703, "learning_rate": 1.923867388253593e-05, "loss": 0.5876, "step": 13376 }, { "epoch": 2.183665972817436, "grad_norm": 1.6933979988098145, "learning_rate": 1.9238552430655645e-05, "loss": 0.56, "step": 13377 }, { "epoch": 2.1838292314599403, "grad_norm": 1.9278876781463623, "learning_rate": 1.9238430969472143e-05, "loss": 0.6243, "step": 13378 }, { "epoch": 2.183992490102445, "grad_norm": 2.16371488571167, "learning_rate": 1.923830949898554e-05, "loss": 0.5745, "step": 13379 }, { "epoch": 2.184155748744949, "grad_norm": 1.969250202178955, "learning_rate": 1.9238188019195964e-05, "loss": 0.5714, "step": 13380 }, { "epoch": 2.1843190073874537, "grad_norm": 1.540621280670166, "learning_rate": 1.9238066530103537e-05, "loss": 0.5353, "step": 13381 }, { "epoch": 2.184482266029958, "grad_norm": 1.8438284397125244, "learning_rate": 1.9237945031708378e-05, "loss": 0.6587, "step": 13382 }, { "epoch": 2.1846455246724625, "grad_norm": 1.608363151550293, "learning_rate": 1.923782352401061e-05, "loss": 0.4409, "step": 13383 }, { "epoch": 2.1848087833149665, "grad_norm": 1.6656283140182495, "learning_rate": 1.9237702007010356e-05, "loss": 0.5441, "step": 13384 }, { "epoch": 2.184972041957471, "grad_norm": 1.4580650329589844, "learning_rate": 1.923758048070774e-05, "loss": 0.5641, "step": 13385 }, { "epoch": 2.1851353005999754, "grad_norm": 1.6788207292556763, "learning_rate": 1.923745894510288e-05, "loss": 0.5347, "step": 13386 }, { "epoch": 2.18529855924248, "grad_norm": 1.7024714946746826, "learning_rate": 1.9237337400195906e-05, "loss": 0.5595, "step": 13387 }, { "epoch": 2.1854618178849843, "grad_norm": 1.7942980527877808, "learning_rate": 1.9237215845986933e-05, "loss": 0.6056, "step": 13388 }, { "epoch": 2.1856250765274887, "grad_norm": 1.6418932676315308, "learning_rate": 1.923709428247609e-05, "loss": 0.4852, "step": 13389 }, { "epoch": 2.185788335169993, "grad_norm": 1.5173143148422241, "learning_rate": 1.9236972709663487e-05, "loss": 0.4785, "step": 13390 }, { "epoch": 2.1859515938124976, "grad_norm": 2.240436553955078, "learning_rate": 1.9236851127549262e-05, "loss": 0.6179, "step": 13391 }, { "epoch": 2.186114852455002, "grad_norm": 1.967621088027954, "learning_rate": 1.9236729536133527e-05, "loss": 0.5886, "step": 13392 }, { "epoch": 2.186278111097506, "grad_norm": 1.6209501028060913, "learning_rate": 1.9236607935416408e-05, "loss": 0.4573, "step": 13393 }, { "epoch": 2.1864413697400105, "grad_norm": 1.7111046314239502, "learning_rate": 1.923648632539803e-05, "loss": 0.5861, "step": 13394 }, { "epoch": 2.186604628382515, "grad_norm": 2.1182708740234375, "learning_rate": 1.9236364706078512e-05, "loss": 0.6681, "step": 13395 }, { "epoch": 2.1867678870250193, "grad_norm": 1.714556336402893, "learning_rate": 1.9236243077457973e-05, "loss": 0.5123, "step": 13396 }, { "epoch": 2.186931145667524, "grad_norm": 1.823424220085144, "learning_rate": 1.9236121439536544e-05, "loss": 0.566, "step": 13397 }, { "epoch": 2.187094404310028, "grad_norm": 1.9098000526428223, "learning_rate": 1.9235999792314342e-05, "loss": 0.5211, "step": 13398 }, { "epoch": 2.1872576629525327, "grad_norm": 1.5521291494369507, "learning_rate": 1.923587813579149e-05, "loss": 0.5918, "step": 13399 }, { "epoch": 2.187420921595037, "grad_norm": 1.7459940910339355, "learning_rate": 1.9235756469968112e-05, "loss": 0.6029, "step": 13400 }, { "epoch": 2.1875841802375415, "grad_norm": 2.00018048286438, "learning_rate": 1.923563479484433e-05, "loss": 0.9114, "step": 13401 }, { "epoch": 2.1877474388800455, "grad_norm": 1.6527129411697388, "learning_rate": 1.9235513110420267e-05, "loss": 0.5736, "step": 13402 }, { "epoch": 2.18791069752255, "grad_norm": 1.4924031496047974, "learning_rate": 1.923539141669604e-05, "loss": 0.5062, "step": 13403 }, { "epoch": 2.1880739561650544, "grad_norm": 1.8272820711135864, "learning_rate": 1.923526971367178e-05, "loss": 0.5093, "step": 13404 }, { "epoch": 2.188237214807559, "grad_norm": 1.8849310874938965, "learning_rate": 1.923514800134761e-05, "loss": 0.6635, "step": 13405 }, { "epoch": 2.1884004734500633, "grad_norm": 1.7769842147827148, "learning_rate": 1.923502627972364e-05, "loss": 0.5692, "step": 13406 }, { "epoch": 2.1885637320925677, "grad_norm": 1.963986873626709, "learning_rate": 1.9234904548800008e-05, "loss": 0.5406, "step": 13407 }, { "epoch": 2.188726990735072, "grad_norm": 1.981879711151123, "learning_rate": 1.9234782808576823e-05, "loss": 0.5592, "step": 13408 }, { "epoch": 2.1888902493775766, "grad_norm": 1.6330256462097168, "learning_rate": 1.923466105905422e-05, "loss": 0.5981, "step": 13409 }, { "epoch": 2.1890535080200806, "grad_norm": 1.7463089227676392, "learning_rate": 1.9234539300232312e-05, "loss": 0.5694, "step": 13410 }, { "epoch": 2.189216766662585, "grad_norm": 1.9125310182571411, "learning_rate": 1.9234417532111227e-05, "loss": 0.5796, "step": 13411 }, { "epoch": 2.1893800253050895, "grad_norm": 1.978279948234558, "learning_rate": 1.9234295754691085e-05, "loss": 0.5798, "step": 13412 }, { "epoch": 2.189543283947594, "grad_norm": 1.6654285192489624, "learning_rate": 1.9234173967972012e-05, "loss": 0.5525, "step": 13413 }, { "epoch": 2.1897065425900983, "grad_norm": 1.899915337562561, "learning_rate": 1.9234052171954127e-05, "loss": 0.635, "step": 13414 }, { "epoch": 2.1898698012326028, "grad_norm": 1.8322410583496094, "learning_rate": 1.923393036663755e-05, "loss": 0.6394, "step": 13415 }, { "epoch": 2.190033059875107, "grad_norm": 1.889222264289856, "learning_rate": 1.9233808552022414e-05, "loss": 0.559, "step": 13416 }, { "epoch": 2.1901963185176117, "grad_norm": 1.769713044166565, "learning_rate": 1.923368672810883e-05, "loss": 0.4811, "step": 13417 }, { "epoch": 2.190359577160116, "grad_norm": 2.021545886993408, "learning_rate": 1.923356489489693e-05, "loss": 0.6794, "step": 13418 }, { "epoch": 2.1905228358026205, "grad_norm": 2.222835063934326, "learning_rate": 1.9233443052386832e-05, "loss": 0.6443, "step": 13419 }, { "epoch": 2.1906860944451245, "grad_norm": 1.3782037496566772, "learning_rate": 1.9233321200578657e-05, "loss": 0.5168, "step": 13420 }, { "epoch": 2.190849353087629, "grad_norm": 1.6466208696365356, "learning_rate": 1.923319933947253e-05, "loss": 0.5173, "step": 13421 }, { "epoch": 2.1910126117301334, "grad_norm": 1.6654553413391113, "learning_rate": 1.923307746906858e-05, "loss": 0.6111, "step": 13422 }, { "epoch": 2.191175870372638, "grad_norm": 1.6676487922668457, "learning_rate": 1.9232955589366914e-05, "loss": 0.5175, "step": 13423 }, { "epoch": 2.1913391290151423, "grad_norm": 1.8205662965774536, "learning_rate": 1.923283370036767e-05, "loss": 0.6621, "step": 13424 }, { "epoch": 2.1915023876576467, "grad_norm": 1.918403148651123, "learning_rate": 1.923271180207096e-05, "loss": 0.5198, "step": 13425 }, { "epoch": 2.191665646300151, "grad_norm": 1.8093727827072144, "learning_rate": 1.923258989447692e-05, "loss": 0.5758, "step": 13426 }, { "epoch": 2.1918289049426556, "grad_norm": 1.6721630096435547, "learning_rate": 1.9232467977585657e-05, "loss": 0.5093, "step": 13427 }, { "epoch": 2.1919921635851596, "grad_norm": 2.337087392807007, "learning_rate": 1.9232346051397303e-05, "loss": 0.6548, "step": 13428 }, { "epoch": 2.192155422227664, "grad_norm": 1.796854853630066, "learning_rate": 1.923222411591198e-05, "loss": 0.597, "step": 13429 }, { "epoch": 2.1923186808701685, "grad_norm": 2.088977575302124, "learning_rate": 1.923210217112981e-05, "loss": 0.7017, "step": 13430 }, { "epoch": 2.192481939512673, "grad_norm": 1.794472098350525, "learning_rate": 1.9231980217050916e-05, "loss": 0.5673, "step": 13431 }, { "epoch": 2.1926451981551773, "grad_norm": 1.7347118854522705, "learning_rate": 1.923185825367542e-05, "loss": 0.5805, "step": 13432 }, { "epoch": 2.1928084567976818, "grad_norm": 1.9131489992141724, "learning_rate": 1.9231736281003444e-05, "loss": 0.6746, "step": 13433 }, { "epoch": 2.192971715440186, "grad_norm": 2.022796154022217, "learning_rate": 1.923161429903511e-05, "loss": 0.7153, "step": 13434 }, { "epoch": 2.1931349740826906, "grad_norm": 1.7130711078643799, "learning_rate": 1.9231492307770548e-05, "loss": 0.5937, "step": 13435 }, { "epoch": 2.193298232725195, "grad_norm": 1.5773271322250366, "learning_rate": 1.9231370307209873e-05, "loss": 0.4632, "step": 13436 }, { "epoch": 2.193461491367699, "grad_norm": 2.2565109729766846, "learning_rate": 1.923124829735321e-05, "loss": 0.6851, "step": 13437 }, { "epoch": 2.1936247500102035, "grad_norm": 1.5666379928588867, "learning_rate": 1.923112627820068e-05, "loss": 0.4837, "step": 13438 }, { "epoch": 2.193788008652708, "grad_norm": 1.834142804145813, "learning_rate": 1.9231004249752415e-05, "loss": 0.6526, "step": 13439 }, { "epoch": 2.1939512672952124, "grad_norm": 1.8211784362792969, "learning_rate": 1.9230882212008528e-05, "loss": 0.5908, "step": 13440 }, { "epoch": 2.194114525937717, "grad_norm": 1.8099595308303833, "learning_rate": 1.9230760164969146e-05, "loss": 0.5487, "step": 13441 }, { "epoch": 2.1942777845802213, "grad_norm": 1.7351914644241333, "learning_rate": 1.9230638108634387e-05, "loss": 0.5711, "step": 13442 }, { "epoch": 2.1944410432227257, "grad_norm": 1.9830671548843384, "learning_rate": 1.923051604300438e-05, "loss": 0.5731, "step": 13443 }, { "epoch": 2.19460430186523, "grad_norm": 1.9587249755859375, "learning_rate": 1.9230393968079247e-05, "loss": 0.545, "step": 13444 }, { "epoch": 2.1947675605077346, "grad_norm": 1.9675657749176025, "learning_rate": 1.923027188385911e-05, "loss": 0.6309, "step": 13445 }, { "epoch": 2.1949308191502386, "grad_norm": 1.7375847101211548, "learning_rate": 1.923014979034409e-05, "loss": 0.6055, "step": 13446 }, { "epoch": 2.195094077792743, "grad_norm": 1.9729317426681519, "learning_rate": 1.9230027687534313e-05, "loss": 0.6547, "step": 13447 }, { "epoch": 2.1952573364352475, "grad_norm": 2.4664926528930664, "learning_rate": 1.92299055754299e-05, "loss": 0.6233, "step": 13448 }, { "epoch": 2.195420595077752, "grad_norm": 1.6607191562652588, "learning_rate": 1.9229783454030975e-05, "loss": 0.6075, "step": 13449 }, { "epoch": 2.1955838537202563, "grad_norm": 1.4437894821166992, "learning_rate": 1.922966132333766e-05, "loss": 0.5114, "step": 13450 }, { "epoch": 2.1957471123627608, "grad_norm": 1.8462218046188354, "learning_rate": 1.9229539183350076e-05, "loss": 0.6588, "step": 13451 }, { "epoch": 2.195910371005265, "grad_norm": 1.777529001235962, "learning_rate": 1.9229417034068352e-05, "loss": 0.5058, "step": 13452 }, { "epoch": 2.1960736296477696, "grad_norm": 1.6091852188110352, "learning_rate": 1.9229294875492606e-05, "loss": 0.5175, "step": 13453 }, { "epoch": 2.196236888290274, "grad_norm": 2.086238384246826, "learning_rate": 1.922917270762296e-05, "loss": 0.5934, "step": 13454 }, { "epoch": 2.196400146932778, "grad_norm": 1.6014565229415894, "learning_rate": 1.9229050530459545e-05, "loss": 0.5169, "step": 13455 }, { "epoch": 2.1965634055752825, "grad_norm": 1.6017178297042847, "learning_rate": 1.9228928344002477e-05, "loss": 0.4643, "step": 13456 }, { "epoch": 2.196726664217787, "grad_norm": 1.7577970027923584, "learning_rate": 1.9228806148251878e-05, "loss": 0.5202, "step": 13457 }, { "epoch": 2.1968899228602914, "grad_norm": 1.6741358041763306, "learning_rate": 1.9228683943207875e-05, "loss": 0.562, "step": 13458 }, { "epoch": 2.197053181502796, "grad_norm": 1.987088680267334, "learning_rate": 1.9228561728870588e-05, "loss": 0.628, "step": 13459 }, { "epoch": 2.1972164401453003, "grad_norm": 1.5931546688079834, "learning_rate": 1.9228439505240147e-05, "loss": 0.4406, "step": 13460 }, { "epoch": 2.1973796987878047, "grad_norm": 1.8192857503890991, "learning_rate": 1.9228317272316664e-05, "loss": 0.5608, "step": 13461 }, { "epoch": 2.197542957430309, "grad_norm": 1.5892902612686157, "learning_rate": 1.922819503010027e-05, "loss": 0.5075, "step": 13462 }, { "epoch": 2.197706216072813, "grad_norm": 1.7999153137207031, "learning_rate": 1.922807277859109e-05, "loss": 0.5398, "step": 13463 }, { "epoch": 2.1978694747153176, "grad_norm": 1.64551842212677, "learning_rate": 1.9227950517789238e-05, "loss": 0.5547, "step": 13464 }, { "epoch": 2.198032733357822, "grad_norm": 1.8345314264297485, "learning_rate": 1.9227828247694845e-05, "loss": 0.6431, "step": 13465 }, { "epoch": 2.1981959920003264, "grad_norm": 1.669814944267273, "learning_rate": 1.922770596830803e-05, "loss": 0.5025, "step": 13466 }, { "epoch": 2.198359250642831, "grad_norm": 1.76397705078125, "learning_rate": 1.922758367962892e-05, "loss": 0.5114, "step": 13467 }, { "epoch": 2.1985225092853353, "grad_norm": 1.790084719657898, "learning_rate": 1.9227461381657632e-05, "loss": 0.475, "step": 13468 }, { "epoch": 2.1986857679278398, "grad_norm": 1.4444540739059448, "learning_rate": 1.9227339074394295e-05, "loss": 0.469, "step": 13469 }, { "epoch": 2.198849026570344, "grad_norm": 1.3645896911621094, "learning_rate": 1.9227216757839028e-05, "loss": 0.5128, "step": 13470 }, { "epoch": 2.1990122852128486, "grad_norm": 1.8578639030456543, "learning_rate": 1.922709443199196e-05, "loss": 0.6214, "step": 13471 }, { "epoch": 2.199175543855353, "grad_norm": 1.8554776906967163, "learning_rate": 1.9226972096853207e-05, "loss": 0.647, "step": 13472 }, { "epoch": 2.199338802497857, "grad_norm": 1.5406322479248047, "learning_rate": 1.9226849752422895e-05, "loss": 0.5386, "step": 13473 }, { "epoch": 2.1995020611403615, "grad_norm": 2.119314670562744, "learning_rate": 1.922672739870115e-05, "loss": 0.6352, "step": 13474 }, { "epoch": 2.199665319782866, "grad_norm": 1.2740099430084229, "learning_rate": 1.922660503568809e-05, "loss": 0.4432, "step": 13475 }, { "epoch": 2.1998285784253704, "grad_norm": 1.9824882745742798, "learning_rate": 1.9226482663383845e-05, "loss": 0.5975, "step": 13476 }, { "epoch": 2.199991837067875, "grad_norm": 1.7407184839248657, "learning_rate": 1.9226360281788536e-05, "loss": 0.5823, "step": 13477 }, { "epoch": 2.2001550957103793, "grad_norm": 1.8667157888412476, "learning_rate": 1.9226237890902278e-05, "loss": 0.5907, "step": 13478 }, { "epoch": 2.2003183543528837, "grad_norm": 1.4921947717666626, "learning_rate": 1.9226115490725207e-05, "loss": 0.4381, "step": 13479 }, { "epoch": 2.200481612995388, "grad_norm": 2.118964433670044, "learning_rate": 1.9225993081257435e-05, "loss": 0.6268, "step": 13480 }, { "epoch": 2.200644871637892, "grad_norm": 1.8599441051483154, "learning_rate": 1.9225870662499094e-05, "loss": 0.5398, "step": 13481 }, { "epoch": 2.2008081302803966, "grad_norm": 1.5314562320709229, "learning_rate": 1.92257482344503e-05, "loss": 0.465, "step": 13482 }, { "epoch": 2.200971388922901, "grad_norm": 1.7340726852416992, "learning_rate": 1.9225625797111186e-05, "loss": 0.4932, "step": 13483 }, { "epoch": 2.2011346475654054, "grad_norm": 1.5865488052368164, "learning_rate": 1.9225503350481863e-05, "loss": 0.403, "step": 13484 }, { "epoch": 2.20129790620791, "grad_norm": 1.6666765213012695, "learning_rate": 1.9225380894562466e-05, "loss": 0.462, "step": 13485 }, { "epoch": 2.2014611648504143, "grad_norm": 1.6721558570861816, "learning_rate": 1.922525842935311e-05, "loss": 0.5228, "step": 13486 }, { "epoch": 2.2016244234929188, "grad_norm": 1.8734030723571777, "learning_rate": 1.922513595485392e-05, "loss": 0.5889, "step": 13487 }, { "epoch": 2.201787682135423, "grad_norm": 1.8705997467041016, "learning_rate": 1.9225013471065022e-05, "loss": 0.5843, "step": 13488 }, { "epoch": 2.2019509407779276, "grad_norm": 1.887967586517334, "learning_rate": 1.922489097798654e-05, "loss": 0.5936, "step": 13489 }, { "epoch": 2.2021141994204316, "grad_norm": 2.0482609272003174, "learning_rate": 1.922476847561859e-05, "loss": 0.565, "step": 13490 }, { "epoch": 2.202277458062936, "grad_norm": 1.8741295337677002, "learning_rate": 1.9224645963961308e-05, "loss": 0.5712, "step": 13491 }, { "epoch": 2.2024407167054405, "grad_norm": 2.0207552909851074, "learning_rate": 1.9224523443014804e-05, "loss": 0.6382, "step": 13492 }, { "epoch": 2.202603975347945, "grad_norm": 1.6354000568389893, "learning_rate": 1.922440091277921e-05, "loss": 0.4657, "step": 13493 }, { "epoch": 2.2027672339904494, "grad_norm": 1.8365674018859863, "learning_rate": 1.9224278373254644e-05, "loss": 0.5391, "step": 13494 }, { "epoch": 2.202930492632954, "grad_norm": 2.068016529083252, "learning_rate": 1.922415582444123e-05, "loss": 0.709, "step": 13495 }, { "epoch": 2.2030937512754583, "grad_norm": 1.5636731386184692, "learning_rate": 1.9224033266339103e-05, "loss": 0.499, "step": 13496 }, { "epoch": 2.2032570099179627, "grad_norm": 1.9505335092544556, "learning_rate": 1.922391069894837e-05, "loss": 0.6586, "step": 13497 }, { "epoch": 2.2034202685604667, "grad_norm": 1.7289849519729614, "learning_rate": 1.9223788122269163e-05, "loss": 0.6301, "step": 13498 }, { "epoch": 2.203583527202971, "grad_norm": 2.1119890213012695, "learning_rate": 1.9223665536301602e-05, "loss": 0.6882, "step": 13499 }, { "epoch": 2.2037467858454756, "grad_norm": 1.773877739906311, "learning_rate": 1.9223542941045817e-05, "loss": 0.5544, "step": 13500 }, { "epoch": 2.20391004448798, "grad_norm": 1.6729321479797363, "learning_rate": 1.9223420336501922e-05, "loss": 0.5634, "step": 13501 }, { "epoch": 2.2040733031304844, "grad_norm": 1.437172532081604, "learning_rate": 1.9223297722670047e-05, "loss": 0.4753, "step": 13502 }, { "epoch": 2.204236561772989, "grad_norm": 1.5925832986831665, "learning_rate": 1.9223175099550313e-05, "loss": 0.5886, "step": 13503 }, { "epoch": 2.2043998204154933, "grad_norm": 1.923754096031189, "learning_rate": 1.9223052467142846e-05, "loss": 0.5966, "step": 13504 }, { "epoch": 2.2045630790579978, "grad_norm": 1.6006555557250977, "learning_rate": 1.9222929825447764e-05, "loss": 0.612, "step": 13505 }, { "epoch": 2.204726337700502, "grad_norm": 1.749432921409607, "learning_rate": 1.9222807174465196e-05, "loss": 0.6333, "step": 13506 }, { "epoch": 2.2048895963430066, "grad_norm": 1.9583512544631958, "learning_rate": 1.9222684514195265e-05, "loss": 0.6154, "step": 13507 }, { "epoch": 2.2050528549855106, "grad_norm": 1.8669524192810059, "learning_rate": 1.922256184463809e-05, "loss": 0.5986, "step": 13508 }, { "epoch": 2.205216113628015, "grad_norm": 1.385091781616211, "learning_rate": 1.92224391657938e-05, "loss": 0.4544, "step": 13509 }, { "epoch": 2.2053793722705195, "grad_norm": 1.8015996217727661, "learning_rate": 1.9222316477662517e-05, "loss": 0.604, "step": 13510 }, { "epoch": 2.205542630913024, "grad_norm": 1.626786708831787, "learning_rate": 1.9222193780244363e-05, "loss": 0.5264, "step": 13511 }, { "epoch": 2.2057058895555284, "grad_norm": 2.028411626815796, "learning_rate": 1.9222071073539462e-05, "loss": 0.591, "step": 13512 }, { "epoch": 2.205869148198033, "grad_norm": 1.7009451389312744, "learning_rate": 1.9221948357547936e-05, "loss": 0.671, "step": 13513 }, { "epoch": 2.2060324068405373, "grad_norm": 1.5573978424072266, "learning_rate": 1.9221825632269913e-05, "loss": 0.4957, "step": 13514 }, { "epoch": 2.2061956654830417, "grad_norm": 1.6353881359100342, "learning_rate": 1.9221702897705516e-05, "loss": 0.4891, "step": 13515 }, { "epoch": 2.2063589241255457, "grad_norm": 1.687212586402893, "learning_rate": 1.9221580153854862e-05, "loss": 0.5559, "step": 13516 }, { "epoch": 2.20652218276805, "grad_norm": 2.0059144496917725, "learning_rate": 1.9221457400718078e-05, "loss": 0.6417, "step": 13517 }, { "epoch": 2.2066854414105546, "grad_norm": 1.771941900253296, "learning_rate": 1.9221334638295296e-05, "loss": 0.5782, "step": 13518 }, { "epoch": 2.206848700053059, "grad_norm": 1.8332910537719727, "learning_rate": 1.9221211866586627e-05, "loss": 0.6157, "step": 13519 }, { "epoch": 2.2070119586955634, "grad_norm": 1.806488037109375, "learning_rate": 1.9221089085592203e-05, "loss": 0.5286, "step": 13520 }, { "epoch": 2.207175217338068, "grad_norm": 1.764085292816162, "learning_rate": 1.9220966295312143e-05, "loss": 0.5275, "step": 13521 }, { "epoch": 2.2073384759805723, "grad_norm": 1.7545183897018433, "learning_rate": 1.9220843495746573e-05, "loss": 0.5164, "step": 13522 }, { "epoch": 2.2075017346230768, "grad_norm": 1.907439112663269, "learning_rate": 1.9220720686895614e-05, "loss": 0.577, "step": 13523 }, { "epoch": 2.207664993265581, "grad_norm": 1.778931736946106, "learning_rate": 1.9220597868759395e-05, "loss": 0.5667, "step": 13524 }, { "epoch": 2.207828251908085, "grad_norm": 2.207648515701294, "learning_rate": 1.9220475041338035e-05, "loss": 0.6942, "step": 13525 }, { "epoch": 2.2079915105505896, "grad_norm": 1.6599267721176147, "learning_rate": 1.922035220463166e-05, "loss": 0.5335, "step": 13526 }, { "epoch": 2.208154769193094, "grad_norm": 1.9255611896514893, "learning_rate": 1.922022935864039e-05, "loss": 0.5976, "step": 13527 }, { "epoch": 2.2083180278355985, "grad_norm": 1.6730433702468872, "learning_rate": 1.9220106503364354e-05, "loss": 0.557, "step": 13528 }, { "epoch": 2.208481286478103, "grad_norm": 1.9317359924316406, "learning_rate": 1.9219983638803672e-05, "loss": 0.6123, "step": 13529 }, { "epoch": 2.2086445451206074, "grad_norm": 1.6502612829208374, "learning_rate": 1.9219860764958466e-05, "loss": 0.5258, "step": 13530 }, { "epoch": 2.208807803763112, "grad_norm": 1.9240596294403076, "learning_rate": 1.9219737881828867e-05, "loss": 0.562, "step": 13531 }, { "epoch": 2.2089710624056162, "grad_norm": 2.038191795349121, "learning_rate": 1.9219614989414994e-05, "loss": 0.6322, "step": 13532 }, { "epoch": 2.2091343210481207, "grad_norm": 1.7270886898040771, "learning_rate": 1.921949208771697e-05, "loss": 0.6074, "step": 13533 }, { "epoch": 2.2092975796906247, "grad_norm": 1.8884261846542358, "learning_rate": 1.921936917673492e-05, "loss": 0.583, "step": 13534 }, { "epoch": 2.209460838333129, "grad_norm": 1.6123483180999756, "learning_rate": 1.921924625646897e-05, "loss": 0.5285, "step": 13535 }, { "epoch": 2.2096240969756336, "grad_norm": 1.6566444635391235, "learning_rate": 1.9219123326919237e-05, "loss": 0.6298, "step": 13536 }, { "epoch": 2.209787355618138, "grad_norm": 1.7489402294158936, "learning_rate": 1.9219000388085855e-05, "loss": 0.5581, "step": 13537 }, { "epoch": 2.2099506142606424, "grad_norm": 1.5402017831802368, "learning_rate": 1.9218877439968937e-05, "loss": 0.4795, "step": 13538 }, { "epoch": 2.210113872903147, "grad_norm": 1.8335278034210205, "learning_rate": 1.9218754482568613e-05, "loss": 0.5245, "step": 13539 }, { "epoch": 2.2102771315456513, "grad_norm": 1.7608600854873657, "learning_rate": 1.9218631515885007e-05, "loss": 0.5421, "step": 13540 }, { "epoch": 2.2104403901881557, "grad_norm": 1.7252782583236694, "learning_rate": 1.9218508539918243e-05, "loss": 0.6085, "step": 13541 }, { "epoch": 2.21060364883066, "grad_norm": 1.7853487730026245, "learning_rate": 1.921838555466844e-05, "loss": 0.5746, "step": 13542 }, { "epoch": 2.210766907473164, "grad_norm": 1.7860963344573975, "learning_rate": 1.9218262560135727e-05, "loss": 0.5188, "step": 13543 }, { "epoch": 2.2109301661156686, "grad_norm": 1.9549198150634766, "learning_rate": 1.9218139556320223e-05, "loss": 0.6255, "step": 13544 }, { "epoch": 2.211093424758173, "grad_norm": 1.5937292575836182, "learning_rate": 1.9218016543222058e-05, "loss": 0.5692, "step": 13545 }, { "epoch": 2.2112566834006775, "grad_norm": 1.8778589963912964, "learning_rate": 1.9217893520841354e-05, "loss": 0.5808, "step": 13546 }, { "epoch": 2.211419942043182, "grad_norm": 1.771462321281433, "learning_rate": 1.921777048917823e-05, "loss": 0.6087, "step": 13547 }, { "epoch": 2.2115832006856864, "grad_norm": 1.6931540966033936, "learning_rate": 1.9217647448232816e-05, "loss": 0.51, "step": 13548 }, { "epoch": 2.211746459328191, "grad_norm": 2.114635944366455, "learning_rate": 1.9217524398005233e-05, "loss": 0.6827, "step": 13549 }, { "epoch": 2.2119097179706952, "grad_norm": 1.8352621793746948, "learning_rate": 1.9217401338495605e-05, "loss": 0.6093, "step": 13550 }, { "epoch": 2.2120729766131992, "grad_norm": 1.6217172145843506, "learning_rate": 1.9217278269704055e-05, "loss": 0.501, "step": 13551 }, { "epoch": 2.2122362352557037, "grad_norm": 1.7867323160171509, "learning_rate": 1.921715519163071e-05, "loss": 0.6063, "step": 13552 }, { "epoch": 2.212399493898208, "grad_norm": 1.7343645095825195, "learning_rate": 1.9217032104275692e-05, "loss": 0.5364, "step": 13553 }, { "epoch": 2.2125627525407126, "grad_norm": 1.8269546031951904, "learning_rate": 1.9216909007639126e-05, "loss": 0.5633, "step": 13554 }, { "epoch": 2.212726011183217, "grad_norm": 1.51897132396698, "learning_rate": 1.9216785901721136e-05, "loss": 0.5591, "step": 13555 }, { "epoch": 2.2128892698257214, "grad_norm": 1.6662660837173462, "learning_rate": 1.9216662786521843e-05, "loss": 0.5975, "step": 13556 }, { "epoch": 2.213052528468226, "grad_norm": 1.9194362163543701, "learning_rate": 1.921653966204137e-05, "loss": 0.6315, "step": 13557 }, { "epoch": 2.2132157871107303, "grad_norm": 2.01161527633667, "learning_rate": 1.9216416528279848e-05, "loss": 0.6387, "step": 13558 }, { "epoch": 2.2133790457532347, "grad_norm": 1.324353575706482, "learning_rate": 1.9216293385237396e-05, "loss": 0.4356, "step": 13559 }, { "epoch": 2.213542304395739, "grad_norm": 2.0111591815948486, "learning_rate": 1.921617023291414e-05, "loss": 0.7215, "step": 13560 }, { "epoch": 2.213705563038243, "grad_norm": 1.8432612419128418, "learning_rate": 1.9216047071310202e-05, "loss": 0.54, "step": 13561 }, { "epoch": 2.2138688216807476, "grad_norm": 1.6190499067306519, "learning_rate": 1.921592390042571e-05, "loss": 0.533, "step": 13562 }, { "epoch": 2.214032080323252, "grad_norm": 1.7931301593780518, "learning_rate": 1.921580072026078e-05, "loss": 0.6241, "step": 13563 }, { "epoch": 2.2141953389657565, "grad_norm": 1.547144889831543, "learning_rate": 1.921567753081554e-05, "loss": 0.5226, "step": 13564 }, { "epoch": 2.214358597608261, "grad_norm": 1.7574440240859985, "learning_rate": 1.921555433209012e-05, "loss": 0.6014, "step": 13565 }, { "epoch": 2.2145218562507654, "grad_norm": 1.9926255941390991, "learning_rate": 1.921543112408464e-05, "loss": 0.5248, "step": 13566 }, { "epoch": 2.21468511489327, "grad_norm": 1.995849609375, "learning_rate": 1.921530790679922e-05, "loss": 0.6959, "step": 13567 }, { "epoch": 2.2148483735357742, "grad_norm": 1.853157877922058, "learning_rate": 1.9215184680233988e-05, "loss": 0.4849, "step": 13568 }, { "epoch": 2.2150116321782782, "grad_norm": 1.7080700397491455, "learning_rate": 1.9215061444389068e-05, "loss": 0.5376, "step": 13569 }, { "epoch": 2.2151748908207827, "grad_norm": 2.378742218017578, "learning_rate": 1.9214938199264584e-05, "loss": 0.5555, "step": 13570 }, { "epoch": 2.215338149463287, "grad_norm": 1.8883365392684937, "learning_rate": 1.921481494486066e-05, "loss": 0.6269, "step": 13571 }, { "epoch": 2.2155014081057915, "grad_norm": 1.768269419670105, "learning_rate": 1.921469168117742e-05, "loss": 0.5501, "step": 13572 }, { "epoch": 2.215664666748296, "grad_norm": 1.536367416381836, "learning_rate": 1.9214568408214986e-05, "loss": 0.5422, "step": 13573 }, { "epoch": 2.2158279253908004, "grad_norm": 1.4148045778274536, "learning_rate": 1.9214445125973484e-05, "loss": 0.4636, "step": 13574 }, { "epoch": 2.215991184033305, "grad_norm": 1.997836709022522, "learning_rate": 1.9214321834453042e-05, "loss": 0.5852, "step": 13575 }, { "epoch": 2.2161544426758093, "grad_norm": 1.787115216255188, "learning_rate": 1.9214198533653777e-05, "loss": 0.6068, "step": 13576 }, { "epoch": 2.2163177013183137, "grad_norm": 1.6745878458023071, "learning_rate": 1.9214075223575818e-05, "loss": 0.5591, "step": 13577 }, { "epoch": 2.2164809599608177, "grad_norm": 2.0698964595794678, "learning_rate": 1.9213951904219284e-05, "loss": 0.6532, "step": 13578 }, { "epoch": 2.216644218603322, "grad_norm": 1.9353781938552856, "learning_rate": 1.9213828575584304e-05, "loss": 0.6464, "step": 13579 }, { "epoch": 2.2168074772458266, "grad_norm": 2.049626588821411, "learning_rate": 1.9213705237671007e-05, "loss": 0.483, "step": 13580 }, { "epoch": 2.216970735888331, "grad_norm": 1.7843250036239624, "learning_rate": 1.9213581890479503e-05, "loss": 0.4853, "step": 13581 }, { "epoch": 2.2171339945308355, "grad_norm": 1.8463362455368042, "learning_rate": 1.9213458534009933e-05, "loss": 0.5713, "step": 13582 }, { "epoch": 2.21729725317334, "grad_norm": 1.8813565969467163, "learning_rate": 1.9213335168262407e-05, "loss": 0.6072, "step": 13583 }, { "epoch": 2.2174605118158444, "grad_norm": 2.41782283782959, "learning_rate": 1.9213211793237056e-05, "loss": 0.6551, "step": 13584 }, { "epoch": 2.217623770458349, "grad_norm": 1.6749900579452515, "learning_rate": 1.9213088408934003e-05, "loss": 0.5389, "step": 13585 }, { "epoch": 2.217787029100853, "grad_norm": 1.705018401145935, "learning_rate": 1.921296501535337e-05, "loss": 0.5046, "step": 13586 }, { "epoch": 2.2179502877433572, "grad_norm": 1.6513890027999878, "learning_rate": 1.921284161249529e-05, "loss": 0.5133, "step": 13587 }, { "epoch": 2.2181135463858617, "grad_norm": 1.7584933042526245, "learning_rate": 1.9212718200359876e-05, "loss": 0.5469, "step": 13588 }, { "epoch": 2.218276805028366, "grad_norm": 1.9311105012893677, "learning_rate": 1.9212594778947256e-05, "loss": 0.6027, "step": 13589 }, { "epoch": 2.2184400636708705, "grad_norm": 1.5126897096633911, "learning_rate": 1.9212471348257562e-05, "loss": 0.4942, "step": 13590 }, { "epoch": 2.218603322313375, "grad_norm": 1.4252760410308838, "learning_rate": 1.9212347908290906e-05, "loss": 0.4826, "step": 13591 }, { "epoch": 2.2187665809558794, "grad_norm": 1.8107327222824097, "learning_rate": 1.921222445904742e-05, "loss": 0.5526, "step": 13592 }, { "epoch": 2.218929839598384, "grad_norm": 2.0095150470733643, "learning_rate": 1.9212101000527225e-05, "loss": 0.5954, "step": 13593 }, { "epoch": 2.2190930982408883, "grad_norm": 1.8584787845611572, "learning_rate": 1.9211977532730448e-05, "loss": 0.5531, "step": 13594 }, { "epoch": 2.2192563568833927, "grad_norm": 1.8032480478286743, "learning_rate": 1.9211854055657216e-05, "loss": 0.5584, "step": 13595 }, { "epoch": 2.2194196155258967, "grad_norm": 1.5795607566833496, "learning_rate": 1.9211730569307642e-05, "loss": 0.5806, "step": 13596 }, { "epoch": 2.219582874168401, "grad_norm": 1.893045425415039, "learning_rate": 1.9211607073681865e-05, "loss": 0.6217, "step": 13597 }, { "epoch": 2.2197461328109056, "grad_norm": 1.8879095315933228, "learning_rate": 1.9211483568779996e-05, "loss": 0.6045, "step": 13598 }, { "epoch": 2.21990939145341, "grad_norm": 3.2096142768859863, "learning_rate": 1.9211360054602167e-05, "loss": 0.6962, "step": 13599 }, { "epoch": 2.2200726500959145, "grad_norm": 1.289203405380249, "learning_rate": 1.92112365311485e-05, "loss": 0.453, "step": 13600 }, { "epoch": 2.220235908738419, "grad_norm": 1.6069953441619873, "learning_rate": 1.9211112998419127e-05, "loss": 0.5033, "step": 13601 }, { "epoch": 2.2203991673809234, "grad_norm": 1.496275782585144, "learning_rate": 1.921098945641416e-05, "loss": 0.5011, "step": 13602 }, { "epoch": 2.220562426023428, "grad_norm": 1.8447808027267456, "learning_rate": 1.921086590513373e-05, "loss": 0.5403, "step": 13603 }, { "epoch": 2.220725684665932, "grad_norm": 1.7715197801589966, "learning_rate": 1.921074234457796e-05, "loss": 0.5345, "step": 13604 }, { "epoch": 2.2208889433084362, "grad_norm": 1.9201494455337524, "learning_rate": 1.9210618774746974e-05, "loss": 0.5244, "step": 13605 }, { "epoch": 2.2210522019509407, "grad_norm": 1.7460635900497437, "learning_rate": 1.9210495195640895e-05, "loss": 0.6641, "step": 13606 }, { "epoch": 2.221215460593445, "grad_norm": 1.421812653541565, "learning_rate": 1.9210371607259857e-05, "loss": 0.5238, "step": 13607 }, { "epoch": 2.2213787192359495, "grad_norm": 1.7292649745941162, "learning_rate": 1.9210248009603974e-05, "loss": 0.4437, "step": 13608 }, { "epoch": 2.221541977878454, "grad_norm": 1.6033774614334106, "learning_rate": 1.921012440267337e-05, "loss": 0.5585, "step": 13609 }, { "epoch": 2.2217052365209584, "grad_norm": 1.7057805061340332, "learning_rate": 1.9210000786468178e-05, "loss": 0.5226, "step": 13610 }, { "epoch": 2.221868495163463, "grad_norm": 1.9785815477371216, "learning_rate": 1.9209877160988516e-05, "loss": 0.5909, "step": 13611 }, { "epoch": 2.2220317538059673, "grad_norm": 1.5307905673980713, "learning_rate": 1.920975352623451e-05, "loss": 0.4785, "step": 13612 }, { "epoch": 2.2221950124484713, "grad_norm": 1.7326740026474, "learning_rate": 1.9209629882206284e-05, "loss": 0.5041, "step": 13613 }, { "epoch": 2.2223582710909757, "grad_norm": 2.1607391834259033, "learning_rate": 1.9209506228903965e-05, "loss": 0.632, "step": 13614 }, { "epoch": 2.22252152973348, "grad_norm": 2.011892795562744, "learning_rate": 1.9209382566327675e-05, "loss": 0.6038, "step": 13615 }, { "epoch": 2.2226847883759846, "grad_norm": 1.4998561143875122, "learning_rate": 1.9209258894477537e-05, "loss": 0.5252, "step": 13616 }, { "epoch": 2.222848047018489, "grad_norm": 1.5509207248687744, "learning_rate": 1.920913521335368e-05, "loss": 0.5204, "step": 13617 }, { "epoch": 2.2230113056609935, "grad_norm": 2.1628544330596924, "learning_rate": 1.9209011522956226e-05, "loss": 0.7085, "step": 13618 }, { "epoch": 2.223174564303498, "grad_norm": 1.7155131101608276, "learning_rate": 1.92088878232853e-05, "loss": 0.5317, "step": 13619 }, { "epoch": 2.2233378229460024, "grad_norm": 1.5279570817947388, "learning_rate": 1.9208764114341028e-05, "loss": 0.4553, "step": 13620 }, { "epoch": 2.223501081588507, "grad_norm": 1.885162591934204, "learning_rate": 1.920864039612353e-05, "loss": 0.6281, "step": 13621 }, { "epoch": 2.223664340231011, "grad_norm": 2.0555436611175537, "learning_rate": 1.9208516668632936e-05, "loss": 0.6174, "step": 13622 }, { "epoch": 2.223827598873515, "grad_norm": 1.6779931783676147, "learning_rate": 1.9208392931869367e-05, "loss": 0.5731, "step": 13623 }, { "epoch": 2.2239908575160197, "grad_norm": 1.7192808389663696, "learning_rate": 1.920826918583295e-05, "loss": 0.6876, "step": 13624 }, { "epoch": 2.224154116158524, "grad_norm": 2.0194222927093506, "learning_rate": 1.9208145430523804e-05, "loss": 0.6239, "step": 13625 }, { "epoch": 2.2243173748010285, "grad_norm": 1.5343436002731323, "learning_rate": 1.920802166594206e-05, "loss": 0.5947, "step": 13626 }, { "epoch": 2.224480633443533, "grad_norm": 1.7890980243682861, "learning_rate": 1.9207897892087844e-05, "loss": 0.5279, "step": 13627 }, { "epoch": 2.2246438920860374, "grad_norm": 1.6333937644958496, "learning_rate": 1.9207774108961273e-05, "loss": 0.5045, "step": 13628 }, { "epoch": 2.224807150728542, "grad_norm": 1.6821261644363403, "learning_rate": 1.920765031656248e-05, "loss": 0.5682, "step": 13629 }, { "epoch": 2.2249704093710463, "grad_norm": 2.1920340061187744, "learning_rate": 1.9207526514891582e-05, "loss": 0.9176, "step": 13630 }, { "epoch": 2.2251336680135503, "grad_norm": 2.1574747562408447, "learning_rate": 1.9207402703948707e-05, "loss": 0.5489, "step": 13631 }, { "epoch": 2.2252969266560547, "grad_norm": 1.9292341470718384, "learning_rate": 1.920727888373398e-05, "loss": 0.6237, "step": 13632 }, { "epoch": 2.225460185298559, "grad_norm": 1.6960676908493042, "learning_rate": 1.920715505424753e-05, "loss": 0.541, "step": 13633 }, { "epoch": 2.2256234439410636, "grad_norm": 1.442441463470459, "learning_rate": 1.9207031215489474e-05, "loss": 0.4037, "step": 13634 }, { "epoch": 2.225786702583568, "grad_norm": 1.270574927330017, "learning_rate": 1.920690736745994e-05, "loss": 0.3262, "step": 13635 }, { "epoch": 2.2259499612260725, "grad_norm": 2.1209495067596436, "learning_rate": 1.9206783510159054e-05, "loss": 0.7687, "step": 13636 }, { "epoch": 2.226113219868577, "grad_norm": 2.033233880996704, "learning_rate": 1.9206659643586938e-05, "loss": 0.6396, "step": 13637 }, { "epoch": 2.2262764785110813, "grad_norm": 1.7493888139724731, "learning_rate": 1.9206535767743717e-05, "loss": 0.6214, "step": 13638 }, { "epoch": 2.2264397371535853, "grad_norm": 1.8327149152755737, "learning_rate": 1.920641188262952e-05, "loss": 0.5637, "step": 13639 }, { "epoch": 2.22660299579609, "grad_norm": 2.0350399017333984, "learning_rate": 1.9206287988244467e-05, "loss": 0.7454, "step": 13640 }, { "epoch": 2.226766254438594, "grad_norm": 1.6738038063049316, "learning_rate": 1.9206164084588685e-05, "loss": 0.5479, "step": 13641 }, { "epoch": 2.2269295130810987, "grad_norm": 1.7716044187545776, "learning_rate": 1.92060401716623e-05, "loss": 0.5822, "step": 13642 }, { "epoch": 2.227092771723603, "grad_norm": 1.9412842988967896, "learning_rate": 1.9205916249465432e-05, "loss": 0.6407, "step": 13643 }, { "epoch": 2.2272560303661075, "grad_norm": 1.6795233488082886, "learning_rate": 1.9205792317998208e-05, "loss": 0.4843, "step": 13644 }, { "epoch": 2.227419289008612, "grad_norm": 1.7944285869598389, "learning_rate": 1.9205668377260757e-05, "loss": 0.5733, "step": 13645 }, { "epoch": 2.2275825476511164, "grad_norm": 1.9152007102966309, "learning_rate": 1.9205544427253198e-05, "loss": 0.6878, "step": 13646 }, { "epoch": 2.227745806293621, "grad_norm": 1.965161919593811, "learning_rate": 1.9205420467975656e-05, "loss": 0.7353, "step": 13647 }, { "epoch": 2.2279090649361253, "grad_norm": 1.8432848453521729, "learning_rate": 1.9205296499428264e-05, "loss": 0.4868, "step": 13648 }, { "epoch": 2.2280723235786293, "grad_norm": 1.7385543584823608, "learning_rate": 1.9205172521611136e-05, "loss": 0.5237, "step": 13649 }, { "epoch": 2.2282355822211337, "grad_norm": 1.6774332523345947, "learning_rate": 1.9205048534524405e-05, "loss": 0.5438, "step": 13650 }, { "epoch": 2.228398840863638, "grad_norm": 1.697908878326416, "learning_rate": 1.9204924538168192e-05, "loss": 0.5761, "step": 13651 }, { "epoch": 2.2285620995061426, "grad_norm": 1.8424638509750366, "learning_rate": 1.920480053254262e-05, "loss": 0.5592, "step": 13652 }, { "epoch": 2.228725358148647, "grad_norm": 1.926652193069458, "learning_rate": 1.9204676517647818e-05, "loss": 0.6077, "step": 13653 }, { "epoch": 2.2288886167911515, "grad_norm": 1.8519768714904785, "learning_rate": 1.920455249348391e-05, "loss": 0.6069, "step": 13654 }, { "epoch": 2.229051875433656, "grad_norm": 1.6059880256652832, "learning_rate": 1.920442846005102e-05, "loss": 0.6116, "step": 13655 }, { "epoch": 2.2292151340761603, "grad_norm": 2.3965744972229004, "learning_rate": 1.920430441734927e-05, "loss": 0.6136, "step": 13656 }, { "epoch": 2.2293783927186643, "grad_norm": 1.9318472146987915, "learning_rate": 1.9204180365378792e-05, "loss": 0.6887, "step": 13657 }, { "epoch": 2.2295416513611688, "grad_norm": 1.564235806465149, "learning_rate": 1.9204056304139703e-05, "loss": 0.5458, "step": 13658 }, { "epoch": 2.229704910003673, "grad_norm": 2.087393283843994, "learning_rate": 1.9203932233632133e-05, "loss": 0.6895, "step": 13659 }, { "epoch": 2.2298681686461777, "grad_norm": 1.6476703882217407, "learning_rate": 1.9203808153856206e-05, "loss": 0.5532, "step": 13660 }, { "epoch": 2.230031427288682, "grad_norm": 1.919629454612732, "learning_rate": 1.9203684064812047e-05, "loss": 0.5048, "step": 13661 }, { "epoch": 2.2301946859311865, "grad_norm": 2.098191022872925, "learning_rate": 1.920355996649978e-05, "loss": 0.6111, "step": 13662 }, { "epoch": 2.230357944573691, "grad_norm": 1.801698088645935, "learning_rate": 1.9203435858919532e-05, "loss": 0.5761, "step": 13663 }, { "epoch": 2.2305212032161954, "grad_norm": 2.014416456222534, "learning_rate": 1.9203311742071426e-05, "loss": 0.7002, "step": 13664 }, { "epoch": 2.2306844618587, "grad_norm": 1.6290326118469238, "learning_rate": 1.9203187615955587e-05, "loss": 0.5589, "step": 13665 }, { "epoch": 2.230847720501204, "grad_norm": 1.9970515966415405, "learning_rate": 1.920306348057214e-05, "loss": 0.6231, "step": 13666 }, { "epoch": 2.2310109791437083, "grad_norm": 1.9401311874389648, "learning_rate": 1.920293933592121e-05, "loss": 0.6371, "step": 13667 }, { "epoch": 2.2311742377862127, "grad_norm": 2.0539445877075195, "learning_rate": 1.9202815182002924e-05, "loss": 0.652, "step": 13668 }, { "epoch": 2.231337496428717, "grad_norm": 1.8345069885253906, "learning_rate": 1.9202691018817406e-05, "loss": 0.5604, "step": 13669 }, { "epoch": 2.2315007550712216, "grad_norm": 1.569280743598938, "learning_rate": 1.9202566846364782e-05, "loss": 0.4602, "step": 13670 }, { "epoch": 2.231664013713726, "grad_norm": 1.7116658687591553, "learning_rate": 1.920244266464517e-05, "loss": 0.5133, "step": 13671 }, { "epoch": 2.2318272723562305, "grad_norm": 2.007737398147583, "learning_rate": 1.9202318473658707e-05, "loss": 0.622, "step": 13672 }, { "epoch": 2.231990530998735, "grad_norm": 1.6424188613891602, "learning_rate": 1.9202194273405506e-05, "loss": 0.546, "step": 13673 }, { "epoch": 2.2321537896412393, "grad_norm": 1.7496731281280518, "learning_rate": 1.9202070063885703e-05, "loss": 0.5046, "step": 13674 }, { "epoch": 2.2323170482837433, "grad_norm": 1.5988408327102661, "learning_rate": 1.9201945845099415e-05, "loss": 0.5005, "step": 13675 }, { "epoch": 2.2324803069262478, "grad_norm": 1.7019726037979126, "learning_rate": 1.920182161704677e-05, "loss": 0.519, "step": 13676 }, { "epoch": 2.232643565568752, "grad_norm": 1.8633251190185547, "learning_rate": 1.9201697379727894e-05, "loss": 0.5243, "step": 13677 }, { "epoch": 2.2328068242112566, "grad_norm": 1.7601333856582642, "learning_rate": 1.920157313314291e-05, "loss": 0.5426, "step": 13678 }, { "epoch": 2.232970082853761, "grad_norm": 1.72750723361969, "learning_rate": 1.9201448877291942e-05, "loss": 0.5703, "step": 13679 }, { "epoch": 2.2331333414962655, "grad_norm": 1.9131797552108765, "learning_rate": 1.9201324612175123e-05, "loss": 0.5821, "step": 13680 }, { "epoch": 2.23329660013877, "grad_norm": 1.756535530090332, "learning_rate": 1.920120033779257e-05, "loss": 0.5365, "step": 13681 }, { "epoch": 2.2334598587812744, "grad_norm": 1.9451779127120972, "learning_rate": 1.9201076054144412e-05, "loss": 0.5343, "step": 13682 }, { "epoch": 2.233623117423779, "grad_norm": 1.742173433303833, "learning_rate": 1.920095176123077e-05, "loss": 0.5621, "step": 13683 }, { "epoch": 2.233786376066283, "grad_norm": 1.9369580745697021, "learning_rate": 1.9200827459051774e-05, "loss": 0.5254, "step": 13684 }, { "epoch": 2.2339496347087873, "grad_norm": 1.525497317314148, "learning_rate": 1.9200703147607545e-05, "loss": 0.5334, "step": 13685 }, { "epoch": 2.2341128933512917, "grad_norm": 1.8263508081436157, "learning_rate": 1.9200578826898212e-05, "loss": 0.5596, "step": 13686 }, { "epoch": 2.234276151993796, "grad_norm": 1.809680461883545, "learning_rate": 1.92004544969239e-05, "loss": 0.5233, "step": 13687 }, { "epoch": 2.2344394106363006, "grad_norm": 1.5661643743515015, "learning_rate": 1.920033015768473e-05, "loss": 0.4759, "step": 13688 }, { "epoch": 2.234602669278805, "grad_norm": 2.0662524700164795, "learning_rate": 1.920020580918083e-05, "loss": 0.6404, "step": 13689 }, { "epoch": 2.2347659279213095, "grad_norm": 1.9582611322402954, "learning_rate": 1.920008145141233e-05, "loss": 0.5272, "step": 13690 }, { "epoch": 2.234929186563814, "grad_norm": 1.7234505414962769, "learning_rate": 1.9199957084379347e-05, "loss": 0.5547, "step": 13691 }, { "epoch": 2.235092445206318, "grad_norm": 1.6556750535964966, "learning_rate": 1.919983270808201e-05, "loss": 0.5056, "step": 13692 }, { "epoch": 2.2352557038488223, "grad_norm": 1.8328620195388794, "learning_rate": 1.9199708322520443e-05, "loss": 0.5382, "step": 13693 }, { "epoch": 2.2354189624913268, "grad_norm": 1.713192343711853, "learning_rate": 1.9199583927694775e-05, "loss": 0.4894, "step": 13694 }, { "epoch": 2.235582221133831, "grad_norm": 1.8599199056625366, "learning_rate": 1.919945952360512e-05, "loss": 0.5441, "step": 13695 }, { "epoch": 2.2357454797763356, "grad_norm": 1.7368930578231812, "learning_rate": 1.919933511025162e-05, "loss": 0.4791, "step": 13696 }, { "epoch": 2.23590873841884, "grad_norm": 1.895760416984558, "learning_rate": 1.9199210687634392e-05, "loss": 0.6363, "step": 13697 }, { "epoch": 2.2360719970613445, "grad_norm": 2.0075573921203613, "learning_rate": 1.9199086255753557e-05, "loss": 0.58, "step": 13698 }, { "epoch": 2.236235255703849, "grad_norm": 1.6400787830352783, "learning_rate": 1.9198961814609248e-05, "loss": 0.5148, "step": 13699 }, { "epoch": 2.2363985143463534, "grad_norm": 1.8108351230621338, "learning_rate": 1.9198837364201587e-05, "loss": 0.5497, "step": 13700 }, { "epoch": 2.236561772988858, "grad_norm": 1.849120020866394, "learning_rate": 1.9198712904530695e-05, "loss": 0.6571, "step": 13701 }, { "epoch": 2.236725031631362, "grad_norm": 1.6454564332962036, "learning_rate": 1.9198588435596705e-05, "loss": 0.4953, "step": 13702 }, { "epoch": 2.2368882902738663, "grad_norm": 1.6178202629089355, "learning_rate": 1.919846395739974e-05, "loss": 0.5045, "step": 13703 }, { "epoch": 2.2370515489163707, "grad_norm": 1.6802425384521484, "learning_rate": 1.919833946993992e-05, "loss": 0.5456, "step": 13704 }, { "epoch": 2.237214807558875, "grad_norm": 1.8576278686523438, "learning_rate": 1.919821497321738e-05, "loss": 0.6614, "step": 13705 }, { "epoch": 2.2373780662013796, "grad_norm": 1.723831295967102, "learning_rate": 1.9198090467232235e-05, "loss": 0.4974, "step": 13706 }, { "epoch": 2.237541324843884, "grad_norm": 1.556442141532898, "learning_rate": 1.9197965951984618e-05, "loss": 0.5439, "step": 13707 }, { "epoch": 2.2377045834863885, "grad_norm": 1.8674726486206055, "learning_rate": 1.9197841427474652e-05, "loss": 0.5716, "step": 13708 }, { "epoch": 2.237867842128893, "grad_norm": 1.748146414756775, "learning_rate": 1.9197716893702458e-05, "loss": 0.6134, "step": 13709 }, { "epoch": 2.238031100771397, "grad_norm": 1.9865103960037231, "learning_rate": 1.919759235066817e-05, "loss": 0.5648, "step": 13710 }, { "epoch": 2.2381943594139013, "grad_norm": 1.5851380825042725, "learning_rate": 1.919746779837191e-05, "loss": 0.5418, "step": 13711 }, { "epoch": 2.2383576180564058, "grad_norm": 1.774580717086792, "learning_rate": 1.9197343236813798e-05, "loss": 0.5809, "step": 13712 }, { "epoch": 2.23852087669891, "grad_norm": 2.014620304107666, "learning_rate": 1.9197218665993965e-05, "loss": 0.5974, "step": 13713 }, { "epoch": 2.2386841353414146, "grad_norm": 1.7066744565963745, "learning_rate": 1.9197094085912536e-05, "loss": 0.5592, "step": 13714 }, { "epoch": 2.238847393983919, "grad_norm": 1.8475141525268555, "learning_rate": 1.9196969496569638e-05, "loss": 0.6696, "step": 13715 }, { "epoch": 2.2390106526264235, "grad_norm": 2.2379250526428223, "learning_rate": 1.9196844897965393e-05, "loss": 0.7083, "step": 13716 }, { "epoch": 2.239173911268928, "grad_norm": 1.96945059299469, "learning_rate": 1.9196720290099925e-05, "loss": 0.616, "step": 13717 }, { "epoch": 2.2393371699114324, "grad_norm": 1.854867696762085, "learning_rate": 1.919659567297336e-05, "loss": 0.677, "step": 13718 }, { "epoch": 2.2395004285539364, "grad_norm": 1.9322553873062134, "learning_rate": 1.9196471046585832e-05, "loss": 0.5433, "step": 13719 }, { "epoch": 2.239663687196441, "grad_norm": 1.7704932689666748, "learning_rate": 1.9196346410937455e-05, "loss": 0.4946, "step": 13720 }, { "epoch": 2.2398269458389453, "grad_norm": 2.099966526031494, "learning_rate": 1.9196221766028366e-05, "loss": 0.6185, "step": 13721 }, { "epoch": 2.2399902044814497, "grad_norm": 1.8084121942520142, "learning_rate": 1.919609711185868e-05, "loss": 0.661, "step": 13722 }, { "epoch": 2.240153463123954, "grad_norm": 1.846971869468689, "learning_rate": 1.9195972448428523e-05, "loss": 0.5587, "step": 13723 }, { "epoch": 2.2403167217664586, "grad_norm": 1.7566790580749512, "learning_rate": 1.919584777573803e-05, "loss": 0.6137, "step": 13724 }, { "epoch": 2.240479980408963, "grad_norm": 1.6030325889587402, "learning_rate": 1.9195723093787316e-05, "loss": 0.527, "step": 13725 }, { "epoch": 2.2406432390514675, "grad_norm": 2.250749111175537, "learning_rate": 1.9195598402576516e-05, "loss": 0.6198, "step": 13726 }, { "epoch": 2.2408064976939714, "grad_norm": 1.690442442893982, "learning_rate": 1.9195473702105748e-05, "loss": 0.5035, "step": 13727 }, { "epoch": 2.240969756336476, "grad_norm": 2.0565600395202637, "learning_rate": 1.919534899237514e-05, "loss": 0.7277, "step": 13728 }, { "epoch": 2.2411330149789803, "grad_norm": 1.8652559518814087, "learning_rate": 1.919522427338482e-05, "loss": 0.4829, "step": 13729 }, { "epoch": 2.2412962736214848, "grad_norm": 1.6995346546173096, "learning_rate": 1.9195099545134913e-05, "loss": 0.5189, "step": 13730 }, { "epoch": 2.241459532263989, "grad_norm": 2.6613330841064453, "learning_rate": 1.9194974807625543e-05, "loss": 0.7003, "step": 13731 }, { "epoch": 2.2416227909064936, "grad_norm": 2.0136265754699707, "learning_rate": 1.9194850060856832e-05, "loss": 0.6499, "step": 13732 }, { "epoch": 2.241786049548998, "grad_norm": 2.1207540035247803, "learning_rate": 1.919472530482891e-05, "loss": 0.6356, "step": 13733 }, { "epoch": 2.2419493081915025, "grad_norm": 1.6578316688537598, "learning_rate": 1.9194600539541906e-05, "loss": 0.4922, "step": 13734 }, { "epoch": 2.242112566834007, "grad_norm": 1.7077155113220215, "learning_rate": 1.919447576499594e-05, "loss": 0.6153, "step": 13735 }, { "epoch": 2.2422758254765114, "grad_norm": 2.079155445098877, "learning_rate": 1.9194350981191135e-05, "loss": 0.6113, "step": 13736 }, { "epoch": 2.2424390841190154, "grad_norm": 1.8367098569869995, "learning_rate": 1.9194226188127625e-05, "loss": 0.6027, "step": 13737 }, { "epoch": 2.24260234276152, "grad_norm": 2.134187698364258, "learning_rate": 1.919410138580553e-05, "loss": 0.594, "step": 13738 }, { "epoch": 2.2427656014040243, "grad_norm": 1.9266157150268555, "learning_rate": 1.919397657422498e-05, "loss": 0.5071, "step": 13739 }, { "epoch": 2.2429288600465287, "grad_norm": 1.9382504224777222, "learning_rate": 1.9193851753386095e-05, "loss": 0.661, "step": 13740 }, { "epoch": 2.243092118689033, "grad_norm": 1.9716973304748535, "learning_rate": 1.9193726923289006e-05, "loss": 0.6356, "step": 13741 }, { "epoch": 2.2432553773315376, "grad_norm": 1.9010920524597168, "learning_rate": 1.9193602083933834e-05, "loss": 0.7011, "step": 13742 }, { "epoch": 2.243418635974042, "grad_norm": 2.4176313877105713, "learning_rate": 1.919347723532071e-05, "loss": 0.7279, "step": 13743 }, { "epoch": 2.2435818946165464, "grad_norm": 1.9597762823104858, "learning_rate": 1.9193352377449757e-05, "loss": 0.591, "step": 13744 }, { "epoch": 2.2437451532590504, "grad_norm": 2.0583512783050537, "learning_rate": 1.9193227510321096e-05, "loss": 0.6118, "step": 13745 }, { "epoch": 2.243908411901555, "grad_norm": 1.846514344215393, "learning_rate": 1.919310263393486e-05, "loss": 0.5783, "step": 13746 }, { "epoch": 2.2440716705440593, "grad_norm": 1.6857624053955078, "learning_rate": 1.9192977748291174e-05, "loss": 0.5163, "step": 13747 }, { "epoch": 2.2442349291865638, "grad_norm": 1.8853702545166016, "learning_rate": 1.919285285339016e-05, "loss": 0.5414, "step": 13748 }, { "epoch": 2.244398187829068, "grad_norm": 1.7761893272399902, "learning_rate": 1.9192727949231945e-05, "loss": 0.5489, "step": 13749 }, { "epoch": 2.2445614464715726, "grad_norm": 1.9203208684921265, "learning_rate": 1.9192603035816657e-05, "loss": 0.5609, "step": 13750 }, { "epoch": 2.244724705114077, "grad_norm": 1.6524074077606201, "learning_rate": 1.919247811314442e-05, "loss": 0.5297, "step": 13751 }, { "epoch": 2.2448879637565815, "grad_norm": 1.7700055837631226, "learning_rate": 1.919235318121536e-05, "loss": 0.5477, "step": 13752 }, { "epoch": 2.245051222399086, "grad_norm": 1.578218936920166, "learning_rate": 1.9192228240029604e-05, "loss": 0.5708, "step": 13753 }, { "epoch": 2.24521448104159, "grad_norm": 1.9086763858795166, "learning_rate": 1.9192103289587273e-05, "loss": 0.523, "step": 13754 }, { "epoch": 2.2453777396840944, "grad_norm": 1.6069018840789795, "learning_rate": 1.91919783298885e-05, "loss": 0.4637, "step": 13755 }, { "epoch": 2.245540998326599, "grad_norm": 1.522386908531189, "learning_rate": 1.9191853360933403e-05, "loss": 0.4893, "step": 13756 }, { "epoch": 2.2457042569691033, "grad_norm": 1.5854510068893433, "learning_rate": 1.9191728382722114e-05, "loss": 0.5097, "step": 13757 }, { "epoch": 2.2458675156116077, "grad_norm": 1.8278493881225586, "learning_rate": 1.9191603395254758e-05, "loss": 0.623, "step": 13758 }, { "epoch": 2.246030774254112, "grad_norm": 1.8326702117919922, "learning_rate": 1.919147839853146e-05, "loss": 0.5889, "step": 13759 }, { "epoch": 2.2461940328966166, "grad_norm": 2.199416160583496, "learning_rate": 1.9191353392552346e-05, "loss": 0.7342, "step": 13760 }, { "epoch": 2.246357291539121, "grad_norm": 2.0428128242492676, "learning_rate": 1.9191228377317542e-05, "loss": 0.8464, "step": 13761 }, { "epoch": 2.2465205501816254, "grad_norm": 1.8300420045852661, "learning_rate": 1.919110335282717e-05, "loss": 0.5663, "step": 13762 }, { "epoch": 2.2466838088241294, "grad_norm": 1.7930339574813843, "learning_rate": 1.9190978319081363e-05, "loss": 0.5249, "step": 13763 }, { "epoch": 2.246847067466634, "grad_norm": 1.9573358297348022, "learning_rate": 1.9190853276080243e-05, "loss": 0.6438, "step": 13764 }, { "epoch": 2.2470103261091383, "grad_norm": 2.268261671066284, "learning_rate": 1.9190728223823932e-05, "loss": 0.7009, "step": 13765 }, { "epoch": 2.2471735847516427, "grad_norm": 1.532370686531067, "learning_rate": 1.9190603162312564e-05, "loss": 0.4785, "step": 13766 }, { "epoch": 2.247336843394147, "grad_norm": 2.237332582473755, "learning_rate": 1.9190478091546262e-05, "loss": 0.6256, "step": 13767 }, { "epoch": 2.2475001020366516, "grad_norm": 2.086373805999756, "learning_rate": 1.9190353011525147e-05, "loss": 0.6136, "step": 13768 }, { "epoch": 2.247663360679156, "grad_norm": 2.136364698410034, "learning_rate": 1.9190227922249353e-05, "loss": 0.6996, "step": 13769 }, { "epoch": 2.2478266193216605, "grad_norm": 1.9089447259902954, "learning_rate": 1.9190102823719e-05, "loss": 0.6719, "step": 13770 }, { "epoch": 2.247989877964165, "grad_norm": 1.6479437351226807, "learning_rate": 1.9189977715934214e-05, "loss": 0.5249, "step": 13771 }, { "epoch": 2.248153136606669, "grad_norm": 1.7013919353485107, "learning_rate": 1.9189852598895126e-05, "loss": 0.5759, "step": 13772 }, { "epoch": 2.2483163952491734, "grad_norm": 2.2088284492492676, "learning_rate": 1.9189727472601858e-05, "loss": 0.6854, "step": 13773 }, { "epoch": 2.248479653891678, "grad_norm": 1.5300652980804443, "learning_rate": 1.9189602337054537e-05, "loss": 0.4941, "step": 13774 }, { "epoch": 2.2486429125341822, "grad_norm": 2.220555067062378, "learning_rate": 1.9189477192253288e-05, "loss": 0.7048, "step": 13775 }, { "epoch": 2.2488061711766867, "grad_norm": 1.9535768032073975, "learning_rate": 1.9189352038198237e-05, "loss": 0.5705, "step": 13776 }, { "epoch": 2.248969429819191, "grad_norm": 2.086860179901123, "learning_rate": 1.9189226874889513e-05, "loss": 0.5631, "step": 13777 }, { "epoch": 2.2491326884616956, "grad_norm": 1.9180946350097656, "learning_rate": 1.918910170232724e-05, "loss": 0.6258, "step": 13778 }, { "epoch": 2.2492959471042, "grad_norm": 1.7438100576400757, "learning_rate": 1.9188976520511544e-05, "loss": 0.5196, "step": 13779 }, { "epoch": 2.249459205746704, "grad_norm": 1.5394387245178223, "learning_rate": 1.918885132944255e-05, "loss": 0.524, "step": 13780 }, { "epoch": 2.2496224643892084, "grad_norm": 1.583945870399475, "learning_rate": 1.9188726129120384e-05, "loss": 0.5357, "step": 13781 }, { "epoch": 2.249785723031713, "grad_norm": 1.5881071090698242, "learning_rate": 1.9188600919545176e-05, "loss": 0.4859, "step": 13782 }, { "epoch": 2.2499489816742173, "grad_norm": 1.777314305305481, "learning_rate": 1.9188475700717048e-05, "loss": 0.5029, "step": 13783 }, { "epoch": 2.2501122403167217, "grad_norm": 1.783778190612793, "learning_rate": 1.918835047263613e-05, "loss": 0.6128, "step": 13784 }, { "epoch": 2.250275498959226, "grad_norm": 1.4925121068954468, "learning_rate": 1.918822523530254e-05, "loss": 0.492, "step": 13785 }, { "epoch": 2.2504387576017306, "grad_norm": 2.04084849357605, "learning_rate": 1.9188099988716413e-05, "loss": 0.7082, "step": 13786 }, { "epoch": 2.250602016244235, "grad_norm": 2.0846080780029297, "learning_rate": 1.918797473287787e-05, "loss": 0.6181, "step": 13787 }, { "epoch": 2.2507652748867395, "grad_norm": 1.808350682258606, "learning_rate": 1.918784946778704e-05, "loss": 0.5681, "step": 13788 }, { "epoch": 2.250928533529244, "grad_norm": 1.5540213584899902, "learning_rate": 1.9187724193444048e-05, "loss": 0.4644, "step": 13789 }, { "epoch": 2.251091792171748, "grad_norm": 1.8823357820510864, "learning_rate": 1.9187598909849023e-05, "loss": 0.5541, "step": 13790 }, { "epoch": 2.2512550508142524, "grad_norm": 1.7802637815475464, "learning_rate": 1.9187473617002084e-05, "loss": 0.5994, "step": 13791 }, { "epoch": 2.251418309456757, "grad_norm": 1.6765565872192383, "learning_rate": 1.9187348314903363e-05, "loss": 0.4512, "step": 13792 }, { "epoch": 2.2515815680992612, "grad_norm": 1.6791355609893799, "learning_rate": 1.9187223003552986e-05, "loss": 0.5165, "step": 13793 }, { "epoch": 2.2517448267417657, "grad_norm": 1.7535674571990967, "learning_rate": 1.9187097682951078e-05, "loss": 0.6803, "step": 13794 }, { "epoch": 2.25190808538427, "grad_norm": 1.520835041999817, "learning_rate": 1.9186972353097764e-05, "loss": 0.5403, "step": 13795 }, { "epoch": 2.2520713440267746, "grad_norm": 2.1422929763793945, "learning_rate": 1.918684701399317e-05, "loss": 0.7642, "step": 13796 }, { "epoch": 2.252234602669279, "grad_norm": 2.0887372493743896, "learning_rate": 1.9186721665637424e-05, "loss": 0.668, "step": 13797 }, { "epoch": 2.252397861311783, "grad_norm": 1.7719815969467163, "learning_rate": 1.9186596308030652e-05, "loss": 0.577, "step": 13798 }, { "epoch": 2.2525611199542874, "grad_norm": 2.3719582557678223, "learning_rate": 1.918647094117298e-05, "loss": 0.6861, "step": 13799 }, { "epoch": 2.252724378596792, "grad_norm": 1.7096679210662842, "learning_rate": 1.918634556506454e-05, "loss": 0.5144, "step": 13800 }, { "epoch": 2.2528876372392963, "grad_norm": 1.9554489850997925, "learning_rate": 1.9186220179705444e-05, "loss": 0.6177, "step": 13801 }, { "epoch": 2.2530508958818007, "grad_norm": 1.6707007884979248, "learning_rate": 1.9186094785095827e-05, "loss": 0.5346, "step": 13802 }, { "epoch": 2.253214154524305, "grad_norm": 1.5249888896942139, "learning_rate": 1.9185969381235822e-05, "loss": 0.4965, "step": 13803 }, { "epoch": 2.2533774131668096, "grad_norm": 1.7888262271881104, "learning_rate": 1.9185843968125543e-05, "loss": 0.5457, "step": 13804 }, { "epoch": 2.253540671809314, "grad_norm": 1.7828142642974854, "learning_rate": 1.918571854576512e-05, "loss": 0.5668, "step": 13805 }, { "epoch": 2.2537039304518185, "grad_norm": 1.419290542602539, "learning_rate": 1.9185593114154683e-05, "loss": 0.5243, "step": 13806 }, { "epoch": 2.2538671890943225, "grad_norm": 1.5581082105636597, "learning_rate": 1.9185467673294358e-05, "loss": 0.5372, "step": 13807 }, { "epoch": 2.254030447736827, "grad_norm": 1.8153951168060303, "learning_rate": 1.918534222318427e-05, "loss": 0.5529, "step": 13808 }, { "epoch": 2.2541937063793314, "grad_norm": 2.0038979053497314, "learning_rate": 1.918521676382454e-05, "loss": 0.7703, "step": 13809 }, { "epoch": 2.254356965021836, "grad_norm": 1.8248510360717773, "learning_rate": 1.9185091295215305e-05, "loss": 0.5376, "step": 13810 }, { "epoch": 2.2545202236643402, "grad_norm": 1.948110818862915, "learning_rate": 1.918496581735668e-05, "loss": 0.5838, "step": 13811 }, { "epoch": 2.2546834823068447, "grad_norm": 1.3878388404846191, "learning_rate": 1.91848403302488e-05, "loss": 0.4536, "step": 13812 }, { "epoch": 2.254846740949349, "grad_norm": 1.7024873495101929, "learning_rate": 1.9184714833891788e-05, "loss": 0.5511, "step": 13813 }, { "epoch": 2.2550099995918536, "grad_norm": 1.5209946632385254, "learning_rate": 1.918458932828577e-05, "loss": 0.4905, "step": 13814 }, { "epoch": 2.2551732582343575, "grad_norm": 2.1684179306030273, "learning_rate": 1.9184463813430874e-05, "loss": 0.6477, "step": 13815 }, { "epoch": 2.255336516876862, "grad_norm": 1.8219887018203735, "learning_rate": 1.9184338289327223e-05, "loss": 0.4933, "step": 13816 }, { "epoch": 2.2554997755193664, "grad_norm": 2.1536450386047363, "learning_rate": 1.918421275597495e-05, "loss": 0.6975, "step": 13817 }, { "epoch": 2.255663034161871, "grad_norm": 1.6983513832092285, "learning_rate": 1.9184087213374175e-05, "loss": 0.541, "step": 13818 }, { "epoch": 2.2558262928043753, "grad_norm": 2.055955648422241, "learning_rate": 1.9183961661525025e-05, "loss": 0.6257, "step": 13819 }, { "epoch": 2.2559895514468797, "grad_norm": 1.859616994857788, "learning_rate": 1.9183836100427627e-05, "loss": 0.5175, "step": 13820 }, { "epoch": 2.256152810089384, "grad_norm": 1.765570878982544, "learning_rate": 1.918371053008211e-05, "loss": 0.5113, "step": 13821 }, { "epoch": 2.2563160687318886, "grad_norm": 1.5935189723968506, "learning_rate": 1.9183584950488603e-05, "loss": 0.468, "step": 13822 }, { "epoch": 2.256479327374393, "grad_norm": 2.0314278602600098, "learning_rate": 1.9183459361647223e-05, "loss": 0.7029, "step": 13823 }, { "epoch": 2.2566425860168975, "grad_norm": 1.4410511255264282, "learning_rate": 1.9183333763558104e-05, "loss": 0.4078, "step": 13824 }, { "epoch": 2.2568058446594015, "grad_norm": 1.612371802330017, "learning_rate": 1.918320815622137e-05, "loss": 0.4749, "step": 13825 }, { "epoch": 2.256969103301906, "grad_norm": 1.6539796590805054, "learning_rate": 1.918308253963715e-05, "loss": 0.6211, "step": 13826 }, { "epoch": 2.2571323619444104, "grad_norm": 1.6949870586395264, "learning_rate": 1.9182956913805566e-05, "loss": 0.5949, "step": 13827 }, { "epoch": 2.257295620586915, "grad_norm": 1.9051918983459473, "learning_rate": 1.9182831278726746e-05, "loss": 0.6761, "step": 13828 }, { "epoch": 2.2574588792294192, "grad_norm": 1.7296584844589233, "learning_rate": 1.918270563440082e-05, "loss": 0.5487, "step": 13829 }, { "epoch": 2.2576221378719237, "grad_norm": 1.65151846408844, "learning_rate": 1.9182579980827908e-05, "loss": 0.6251, "step": 13830 }, { "epoch": 2.257785396514428, "grad_norm": 1.6073336601257324, "learning_rate": 1.9182454318008144e-05, "loss": 0.5555, "step": 13831 }, { "epoch": 2.2579486551569325, "grad_norm": 1.6467797756195068, "learning_rate": 1.918232864594165e-05, "loss": 0.563, "step": 13832 }, { "epoch": 2.2581119137994365, "grad_norm": 1.6051732301712036, "learning_rate": 1.918220296462855e-05, "loss": 0.4862, "step": 13833 }, { "epoch": 2.258275172441941, "grad_norm": 1.9760619401931763, "learning_rate": 1.918207727406898e-05, "loss": 0.681, "step": 13834 }, { "epoch": 2.2584384310844454, "grad_norm": 1.5145838260650635, "learning_rate": 1.918195157426306e-05, "loss": 0.4378, "step": 13835 }, { "epoch": 2.25860168972695, "grad_norm": 1.745262622833252, "learning_rate": 1.9181825865210913e-05, "loss": 0.5508, "step": 13836 }, { "epoch": 2.2587649483694543, "grad_norm": 1.4948962926864624, "learning_rate": 1.918170014691267e-05, "loss": 0.51, "step": 13837 }, { "epoch": 2.2589282070119587, "grad_norm": 1.5989919900894165, "learning_rate": 1.9181574419368463e-05, "loss": 0.5212, "step": 13838 }, { "epoch": 2.259091465654463, "grad_norm": 1.9504706859588623, "learning_rate": 1.9181448682578408e-05, "loss": 0.6199, "step": 13839 }, { "epoch": 2.2592547242969676, "grad_norm": 1.9903374910354614, "learning_rate": 1.9181322936542638e-05, "loss": 0.6058, "step": 13840 }, { "epoch": 2.259417982939472, "grad_norm": 1.735977053642273, "learning_rate": 1.9181197181261277e-05, "loss": 0.5242, "step": 13841 }, { "epoch": 2.2595812415819765, "grad_norm": 1.8422589302062988, "learning_rate": 1.9181071416734453e-05, "loss": 0.5314, "step": 13842 }, { "epoch": 2.2597445002244805, "grad_norm": 1.844992995262146, "learning_rate": 1.9180945642962294e-05, "loss": 0.5738, "step": 13843 }, { "epoch": 2.259907758866985, "grad_norm": 1.797642469406128, "learning_rate": 1.9180819859944927e-05, "loss": 0.5951, "step": 13844 }, { "epoch": 2.2600710175094894, "grad_norm": 1.7090747356414795, "learning_rate": 1.9180694067682474e-05, "loss": 0.5398, "step": 13845 }, { "epoch": 2.260234276151994, "grad_norm": 1.628329873085022, "learning_rate": 1.9180568266175065e-05, "loss": 0.5362, "step": 13846 }, { "epoch": 2.2603975347944982, "grad_norm": 1.6464911699295044, "learning_rate": 1.9180442455422824e-05, "loss": 0.58, "step": 13847 }, { "epoch": 2.2605607934370027, "grad_norm": 1.969878911972046, "learning_rate": 1.9180316635425883e-05, "loss": 0.7173, "step": 13848 }, { "epoch": 2.260724052079507, "grad_norm": 1.8124635219573975, "learning_rate": 1.9180190806184366e-05, "loss": 0.525, "step": 13849 }, { "epoch": 2.260887310722011, "grad_norm": 2.1061315536499023, "learning_rate": 1.91800649676984e-05, "loss": 0.6079, "step": 13850 }, { "epoch": 2.2610505693645155, "grad_norm": 1.582314372062683, "learning_rate": 1.9179939119968105e-05, "loss": 0.5472, "step": 13851 }, { "epoch": 2.26121382800702, "grad_norm": 1.6782244443893433, "learning_rate": 1.9179813262993618e-05, "loss": 0.5281, "step": 13852 }, { "epoch": 2.2613770866495244, "grad_norm": 1.934603214263916, "learning_rate": 1.917968739677506e-05, "loss": 0.6356, "step": 13853 }, { "epoch": 2.261540345292029, "grad_norm": 1.730089783668518, "learning_rate": 1.9179561521312562e-05, "loss": 0.5705, "step": 13854 }, { "epoch": 2.2617036039345333, "grad_norm": 1.7121391296386719, "learning_rate": 1.9179435636606247e-05, "loss": 0.5205, "step": 13855 }, { "epoch": 2.2618668625770377, "grad_norm": 1.9675350189208984, "learning_rate": 1.917930974265624e-05, "loss": 0.5935, "step": 13856 }, { "epoch": 2.262030121219542, "grad_norm": 1.6442006826400757, "learning_rate": 1.9179183839462673e-05, "loss": 0.4458, "step": 13857 }, { "epoch": 2.2621933798620466, "grad_norm": 2.1272647380828857, "learning_rate": 1.917905792702567e-05, "loss": 0.7081, "step": 13858 }, { "epoch": 2.262356638504551, "grad_norm": 1.631751537322998, "learning_rate": 1.917893200534536e-05, "loss": 0.4702, "step": 13859 }, { "epoch": 2.262519897147055, "grad_norm": 1.8436928987503052, "learning_rate": 1.9178806074421866e-05, "loss": 0.5533, "step": 13860 }, { "epoch": 2.2626831557895595, "grad_norm": 1.3239208459854126, "learning_rate": 1.9178680134255314e-05, "loss": 0.5252, "step": 13861 }, { "epoch": 2.262846414432064, "grad_norm": 1.4567985534667969, "learning_rate": 1.917855418484584e-05, "loss": 0.4545, "step": 13862 }, { "epoch": 2.2630096730745684, "grad_norm": 1.751530647277832, "learning_rate": 1.9178428226193558e-05, "loss": 0.6543, "step": 13863 }, { "epoch": 2.263172931717073, "grad_norm": 1.664368748664856, "learning_rate": 1.9178302258298606e-05, "loss": 0.5076, "step": 13864 }, { "epoch": 2.2633361903595772, "grad_norm": 2.0118162631988525, "learning_rate": 1.9178176281161104e-05, "loss": 0.5549, "step": 13865 }, { "epoch": 2.2634994490020817, "grad_norm": 1.7365078926086426, "learning_rate": 1.917805029478118e-05, "loss": 0.5534, "step": 13866 }, { "epoch": 2.263662707644586, "grad_norm": 1.823733925819397, "learning_rate": 1.9177924299158963e-05, "loss": 0.5633, "step": 13867 }, { "epoch": 2.26382596628709, "grad_norm": 1.671911358833313, "learning_rate": 1.9177798294294576e-05, "loss": 0.5678, "step": 13868 }, { "epoch": 2.2639892249295945, "grad_norm": 1.678178310394287, "learning_rate": 1.917767228018815e-05, "loss": 0.5135, "step": 13869 }, { "epoch": 2.264152483572099, "grad_norm": 1.7610039710998535, "learning_rate": 1.9177546256839814e-05, "loss": 0.5588, "step": 13870 }, { "epoch": 2.2643157422146034, "grad_norm": 1.5994195938110352, "learning_rate": 1.9177420224249688e-05, "loss": 0.5346, "step": 13871 }, { "epoch": 2.264479000857108, "grad_norm": 1.4797704219818115, "learning_rate": 1.9177294182417904e-05, "loss": 0.5345, "step": 13872 }, { "epoch": 2.2646422594996123, "grad_norm": 1.9207110404968262, "learning_rate": 1.9177168131344587e-05, "loss": 0.6362, "step": 13873 }, { "epoch": 2.2648055181421167, "grad_norm": 1.6864339113235474, "learning_rate": 1.917704207102986e-05, "loss": 0.4922, "step": 13874 }, { "epoch": 2.264968776784621, "grad_norm": 2.0660033226013184, "learning_rate": 1.9176916001473857e-05, "loss": 0.753, "step": 13875 }, { "epoch": 2.2651320354271256, "grad_norm": 2.0036473274230957, "learning_rate": 1.9176789922676705e-05, "loss": 0.6404, "step": 13876 }, { "epoch": 2.26529529406963, "grad_norm": 1.9450055360794067, "learning_rate": 1.9176663834638525e-05, "loss": 0.6422, "step": 13877 }, { "epoch": 2.265458552712134, "grad_norm": 1.956730604171753, "learning_rate": 1.9176537737359446e-05, "loss": 0.5563, "step": 13878 }, { "epoch": 2.2656218113546385, "grad_norm": 1.7608140707015991, "learning_rate": 1.9176411630839597e-05, "loss": 0.6208, "step": 13879 }, { "epoch": 2.265785069997143, "grad_norm": 1.691562533378601, "learning_rate": 1.9176285515079102e-05, "loss": 0.6006, "step": 13880 }, { "epoch": 2.2659483286396473, "grad_norm": 1.8563393354415894, "learning_rate": 1.9176159390078095e-05, "loss": 0.5589, "step": 13881 }, { "epoch": 2.266111587282152, "grad_norm": 1.8398960828781128, "learning_rate": 1.9176033255836694e-05, "loss": 0.641, "step": 13882 }, { "epoch": 2.2662748459246562, "grad_norm": 1.6929503679275513, "learning_rate": 1.9175907112355034e-05, "loss": 0.4726, "step": 13883 }, { "epoch": 2.2664381045671607, "grad_norm": 1.6380404233932495, "learning_rate": 1.9175780959633234e-05, "loss": 0.4983, "step": 13884 }, { "epoch": 2.266601363209665, "grad_norm": 2.0752813816070557, "learning_rate": 1.9175654797671422e-05, "loss": 0.5857, "step": 13885 }, { "epoch": 2.266764621852169, "grad_norm": 1.9312282800674438, "learning_rate": 1.917552862646973e-05, "loss": 0.5801, "step": 13886 }, { "epoch": 2.2669278804946735, "grad_norm": 1.9264646768569946, "learning_rate": 1.917540244602829e-05, "loss": 0.5609, "step": 13887 }, { "epoch": 2.267091139137178, "grad_norm": 1.870976448059082, "learning_rate": 1.9175276256347217e-05, "loss": 0.6939, "step": 13888 }, { "epoch": 2.2672543977796824, "grad_norm": 1.9260129928588867, "learning_rate": 1.917515005742664e-05, "loss": 0.5571, "step": 13889 }, { "epoch": 2.267417656422187, "grad_norm": 1.7759521007537842, "learning_rate": 1.9175023849266697e-05, "loss": 0.5906, "step": 13890 }, { "epoch": 2.2675809150646913, "grad_norm": 2.107051134109497, "learning_rate": 1.91748976318675e-05, "loss": 0.5653, "step": 13891 }, { "epoch": 2.2677441737071957, "grad_norm": 1.763473391532898, "learning_rate": 1.9174771405229187e-05, "loss": 0.555, "step": 13892 }, { "epoch": 2.2679074323497, "grad_norm": 1.7075201272964478, "learning_rate": 1.9174645169351882e-05, "loss": 0.5686, "step": 13893 }, { "epoch": 2.2680706909922046, "grad_norm": 1.8023390769958496, "learning_rate": 1.917451892423571e-05, "loss": 0.5845, "step": 13894 }, { "epoch": 2.268233949634709, "grad_norm": 1.7866251468658447, "learning_rate": 1.9174392669880803e-05, "loss": 0.5897, "step": 13895 }, { "epoch": 2.268397208277213, "grad_norm": 1.7991306781768799, "learning_rate": 1.9174266406287282e-05, "loss": 0.6519, "step": 13896 }, { "epoch": 2.2685604669197175, "grad_norm": 1.4861586093902588, "learning_rate": 1.9174140133455278e-05, "loss": 0.4436, "step": 13897 }, { "epoch": 2.268723725562222, "grad_norm": 1.8733670711517334, "learning_rate": 1.917401385138492e-05, "loss": 0.7434, "step": 13898 }, { "epoch": 2.2688869842047263, "grad_norm": 1.8632951974868774, "learning_rate": 1.917388756007633e-05, "loss": 0.4958, "step": 13899 }, { "epoch": 2.269050242847231, "grad_norm": 1.8713417053222656, "learning_rate": 1.9173761259529634e-05, "loss": 0.5695, "step": 13900 }, { "epoch": 2.269213501489735, "grad_norm": 1.8278506994247437, "learning_rate": 1.9173634949744967e-05, "loss": 0.5632, "step": 13901 }, { "epoch": 2.2693767601322397, "grad_norm": 2.0684211254119873, "learning_rate": 1.9173508630722454e-05, "loss": 0.6358, "step": 13902 }, { "epoch": 2.2695400187747437, "grad_norm": 1.6291264295578003, "learning_rate": 1.9173382302462217e-05, "loss": 0.5608, "step": 13903 }, { "epoch": 2.269703277417248, "grad_norm": 1.6471129655838013, "learning_rate": 1.9173255964964384e-05, "loss": 0.5843, "step": 13904 }, { "epoch": 2.2698665360597525, "grad_norm": 1.525292158126831, "learning_rate": 1.917312961822909e-05, "loss": 0.5178, "step": 13905 }, { "epoch": 2.270029794702257, "grad_norm": 1.6771358251571655, "learning_rate": 1.9173003262256453e-05, "loss": 0.5718, "step": 13906 }, { "epoch": 2.2701930533447614, "grad_norm": 1.5734871625900269, "learning_rate": 1.9172876897046606e-05, "loss": 0.5007, "step": 13907 }, { "epoch": 2.270356311987266, "grad_norm": 1.906890630722046, "learning_rate": 1.9172750522599678e-05, "loss": 0.6257, "step": 13908 }, { "epoch": 2.2705195706297703, "grad_norm": 2.0448012351989746, "learning_rate": 1.9172624138915784e-05, "loss": 0.7494, "step": 13909 }, { "epoch": 2.2706828292722747, "grad_norm": 1.5969666242599487, "learning_rate": 1.9172497745995068e-05, "loss": 0.5192, "step": 13910 }, { "epoch": 2.270846087914779, "grad_norm": 1.8770338296890259, "learning_rate": 1.9172371343837643e-05, "loss": 0.702, "step": 13911 }, { "epoch": 2.2710093465572836, "grad_norm": 1.6171174049377441, "learning_rate": 1.9172244932443646e-05, "loss": 0.6126, "step": 13912 }, { "epoch": 2.2711726051997876, "grad_norm": 1.7274426221847534, "learning_rate": 1.9172118511813202e-05, "loss": 0.562, "step": 13913 }, { "epoch": 2.271335863842292, "grad_norm": 1.8385990858078003, "learning_rate": 1.9171992081946436e-05, "loss": 0.5759, "step": 13914 }, { "epoch": 2.2714991224847965, "grad_norm": 1.3460861444473267, "learning_rate": 1.9171865642843474e-05, "loss": 0.5354, "step": 13915 }, { "epoch": 2.271662381127301, "grad_norm": 2.1049082279205322, "learning_rate": 1.9171739194504448e-05, "loss": 0.579, "step": 13916 }, { "epoch": 2.2718256397698053, "grad_norm": 1.9142627716064453, "learning_rate": 1.9171612736929483e-05, "loss": 0.5955, "step": 13917 }, { "epoch": 2.2719888984123098, "grad_norm": 1.6016350984573364, "learning_rate": 1.9171486270118708e-05, "loss": 0.4039, "step": 13918 }, { "epoch": 2.272152157054814, "grad_norm": 1.8482050895690918, "learning_rate": 1.9171359794072245e-05, "loss": 0.5555, "step": 13919 }, { "epoch": 2.2723154156973187, "grad_norm": 1.7666816711425781, "learning_rate": 1.9171233308790225e-05, "loss": 0.6542, "step": 13920 }, { "epoch": 2.2724786743398226, "grad_norm": 1.8585255146026611, "learning_rate": 1.917110681427278e-05, "loss": 0.5775, "step": 13921 }, { "epoch": 2.272641932982327, "grad_norm": 1.8754884004592896, "learning_rate": 1.917098031052003e-05, "loss": 0.7501, "step": 13922 }, { "epoch": 2.2728051916248315, "grad_norm": 1.4371553659439087, "learning_rate": 1.9170853797532106e-05, "loss": 0.4502, "step": 13923 }, { "epoch": 2.272968450267336, "grad_norm": 1.476637363433838, "learning_rate": 1.9170727275309133e-05, "loss": 0.5945, "step": 13924 }, { "epoch": 2.2731317089098404, "grad_norm": 1.504349946975708, "learning_rate": 1.917060074385124e-05, "loss": 0.4571, "step": 13925 }, { "epoch": 2.273294967552345, "grad_norm": 1.3802858591079712, "learning_rate": 1.9170474203158556e-05, "loss": 0.4233, "step": 13926 }, { "epoch": 2.2734582261948493, "grad_norm": 1.9984899759292603, "learning_rate": 1.9170347653231206e-05, "loss": 0.6941, "step": 13927 }, { "epoch": 2.2736214848373537, "grad_norm": 1.4699420928955078, "learning_rate": 1.917022109406932e-05, "loss": 0.523, "step": 13928 }, { "epoch": 2.273784743479858, "grad_norm": 1.716710090637207, "learning_rate": 1.9170094525673023e-05, "loss": 0.5732, "step": 13929 }, { "epoch": 2.2739480021223626, "grad_norm": 2.2451701164245605, "learning_rate": 1.9169967948042444e-05, "loss": 0.6466, "step": 13930 }, { "epoch": 2.2741112607648666, "grad_norm": 1.795754075050354, "learning_rate": 1.9169841361177708e-05, "loss": 0.5759, "step": 13931 }, { "epoch": 2.274274519407371, "grad_norm": 1.625292181968689, "learning_rate": 1.9169714765078947e-05, "loss": 0.5369, "step": 13932 }, { "epoch": 2.2744377780498755, "grad_norm": 1.8310788869857788, "learning_rate": 1.916958815974628e-05, "loss": 0.6478, "step": 13933 }, { "epoch": 2.27460103669238, "grad_norm": 1.5734957456588745, "learning_rate": 1.9169461545179848e-05, "loss": 0.52, "step": 13934 }, { "epoch": 2.2747642953348843, "grad_norm": 1.4443728923797607, "learning_rate": 1.9169334921379766e-05, "loss": 0.4994, "step": 13935 }, { "epoch": 2.2749275539773888, "grad_norm": 1.7485495805740356, "learning_rate": 1.9169208288346168e-05, "loss": 0.5886, "step": 13936 }, { "epoch": 2.275090812619893, "grad_norm": 1.7580534219741821, "learning_rate": 1.9169081646079175e-05, "loss": 0.5276, "step": 13937 }, { "epoch": 2.275254071262397, "grad_norm": 1.6626437902450562, "learning_rate": 1.9168954994578924e-05, "loss": 0.5694, "step": 13938 }, { "epoch": 2.2754173299049016, "grad_norm": 1.5509529113769531, "learning_rate": 1.9168828333845536e-05, "loss": 0.475, "step": 13939 }, { "epoch": 2.275580588547406, "grad_norm": 1.7139511108398438, "learning_rate": 1.9168701663879143e-05, "loss": 0.4913, "step": 13940 }, { "epoch": 2.2757438471899105, "grad_norm": 1.8559212684631348, "learning_rate": 1.9168574984679864e-05, "loss": 0.527, "step": 13941 }, { "epoch": 2.275907105832415, "grad_norm": 1.9713969230651855, "learning_rate": 1.9168448296247834e-05, "loss": 0.568, "step": 13942 }, { "epoch": 2.2760703644749194, "grad_norm": 1.736005187034607, "learning_rate": 1.9168321598583183e-05, "loss": 0.6284, "step": 13943 }, { "epoch": 2.276233623117424, "grad_norm": 1.8689674139022827, "learning_rate": 1.9168194891686035e-05, "loss": 0.5551, "step": 13944 }, { "epoch": 2.2763968817599283, "grad_norm": 1.9291325807571411, "learning_rate": 1.9168068175556512e-05, "loss": 0.5484, "step": 13945 }, { "epoch": 2.2765601404024327, "grad_norm": 1.8394428491592407, "learning_rate": 1.916794145019475e-05, "loss": 0.5823, "step": 13946 }, { "epoch": 2.276723399044937, "grad_norm": 1.834821105003357, "learning_rate": 1.9167814715600872e-05, "loss": 0.6726, "step": 13947 }, { "epoch": 2.276886657687441, "grad_norm": 1.7654497623443604, "learning_rate": 1.916768797177501e-05, "loss": 0.5592, "step": 13948 }, { "epoch": 2.2770499163299456, "grad_norm": 1.7998294830322266, "learning_rate": 1.9167561218717283e-05, "loss": 0.5517, "step": 13949 }, { "epoch": 2.27721317497245, "grad_norm": 1.5590392351150513, "learning_rate": 1.916743445642783e-05, "loss": 0.4697, "step": 13950 }, { "epoch": 2.2773764336149545, "grad_norm": 1.7349083423614502, "learning_rate": 1.916730768490677e-05, "loss": 0.5409, "step": 13951 }, { "epoch": 2.277539692257459, "grad_norm": 2.072094202041626, "learning_rate": 1.9167180904154234e-05, "loss": 0.5964, "step": 13952 }, { "epoch": 2.2777029508999633, "grad_norm": 1.7526817321777344, "learning_rate": 1.916705411417035e-05, "loss": 0.5473, "step": 13953 }, { "epoch": 2.2778662095424678, "grad_norm": 1.4860246181488037, "learning_rate": 1.9166927314955244e-05, "loss": 0.495, "step": 13954 }, { "epoch": 2.278029468184972, "grad_norm": 1.520379662513733, "learning_rate": 1.9166800506509044e-05, "loss": 0.4581, "step": 13955 }, { "epoch": 2.278192726827476, "grad_norm": 2.184462785720825, "learning_rate": 1.9166673688831882e-05, "loss": 0.7387, "step": 13956 }, { "epoch": 2.2783559854699806, "grad_norm": 2.0433216094970703, "learning_rate": 1.9166546861923877e-05, "loss": 0.6148, "step": 13957 }, { "epoch": 2.278519244112485, "grad_norm": 1.6449540853500366, "learning_rate": 1.9166420025785165e-05, "loss": 0.5342, "step": 13958 }, { "epoch": 2.2786825027549895, "grad_norm": 2.091167449951172, "learning_rate": 1.916629318041587e-05, "loss": 0.686, "step": 13959 }, { "epoch": 2.278845761397494, "grad_norm": 2.1705434322357178, "learning_rate": 1.916616632581612e-05, "loss": 0.5623, "step": 13960 }, { "epoch": 2.2790090200399984, "grad_norm": 1.7093569040298462, "learning_rate": 1.9166039461986043e-05, "loss": 0.5136, "step": 13961 }, { "epoch": 2.279172278682503, "grad_norm": 1.945244312286377, "learning_rate": 1.9165912588925764e-05, "loss": 0.6018, "step": 13962 }, { "epoch": 2.2793355373250073, "grad_norm": 1.7293232679367065, "learning_rate": 1.9165785706635418e-05, "loss": 0.6074, "step": 13963 }, { "epoch": 2.2794987959675117, "grad_norm": 2.224622964859009, "learning_rate": 1.9165658815115123e-05, "loss": 0.54, "step": 13964 }, { "epoch": 2.279662054610016, "grad_norm": 1.6428642272949219, "learning_rate": 1.9165531914365015e-05, "loss": 0.5152, "step": 13965 }, { "epoch": 2.27982531325252, "grad_norm": 1.4884840250015259, "learning_rate": 1.9165405004385223e-05, "loss": 0.4862, "step": 13966 }, { "epoch": 2.2799885718950246, "grad_norm": 1.6254823207855225, "learning_rate": 1.9165278085175865e-05, "loss": 0.5543, "step": 13967 }, { "epoch": 2.280151830537529, "grad_norm": 1.8934619426727295, "learning_rate": 1.9165151156737077e-05, "loss": 0.6022, "step": 13968 }, { "epoch": 2.2803150891800334, "grad_norm": 2.0586204528808594, "learning_rate": 1.916502421906898e-05, "loss": 0.6284, "step": 13969 }, { "epoch": 2.280478347822538, "grad_norm": 1.8739511966705322, "learning_rate": 1.916489727217171e-05, "loss": 0.6291, "step": 13970 }, { "epoch": 2.2806416064650423, "grad_norm": 1.8632862567901611, "learning_rate": 1.9164770316045392e-05, "loss": 0.5492, "step": 13971 }, { "epoch": 2.2808048651075468, "grad_norm": 1.848002314567566, "learning_rate": 1.916464335069015e-05, "loss": 0.6224, "step": 13972 }, { "epoch": 2.280968123750051, "grad_norm": 2.01834774017334, "learning_rate": 1.9164516376106115e-05, "loss": 0.6815, "step": 13973 }, { "epoch": 2.281131382392555, "grad_norm": 2.079613447189331, "learning_rate": 1.9164389392293415e-05, "loss": 0.5925, "step": 13974 }, { "epoch": 2.2812946410350596, "grad_norm": 1.7629497051239014, "learning_rate": 1.9164262399252176e-05, "loss": 0.5652, "step": 13975 }, { "epoch": 2.281457899677564, "grad_norm": 1.8439525365829468, "learning_rate": 1.9164135396982527e-05, "loss": 0.5682, "step": 13976 }, { "epoch": 2.2816211583200685, "grad_norm": 1.7546031475067139, "learning_rate": 1.91640083854846e-05, "loss": 0.5225, "step": 13977 }, { "epoch": 2.281784416962573, "grad_norm": 1.9836018085479736, "learning_rate": 1.9163881364758516e-05, "loss": 0.5627, "step": 13978 }, { "epoch": 2.2819476756050774, "grad_norm": 1.817509412765503, "learning_rate": 1.9163754334804404e-05, "loss": 0.5638, "step": 13979 }, { "epoch": 2.282110934247582, "grad_norm": 1.97105073928833, "learning_rate": 1.9163627295622397e-05, "loss": 0.5787, "step": 13980 }, { "epoch": 2.2822741928900863, "grad_norm": 1.803890585899353, "learning_rate": 1.916350024721262e-05, "loss": 0.6098, "step": 13981 }, { "epoch": 2.2824374515325907, "grad_norm": 1.6055262088775635, "learning_rate": 1.91633731895752e-05, "loss": 0.5156, "step": 13982 }, { "epoch": 2.282600710175095, "grad_norm": 1.7024269104003906, "learning_rate": 1.9163246122710265e-05, "loss": 0.5403, "step": 13983 }, { "epoch": 2.282763968817599, "grad_norm": 1.7593562602996826, "learning_rate": 1.9163119046617944e-05, "loss": 0.6545, "step": 13984 }, { "epoch": 2.2829272274601036, "grad_norm": 1.645037055015564, "learning_rate": 1.9162991961298364e-05, "loss": 0.5592, "step": 13985 }, { "epoch": 2.283090486102608, "grad_norm": 2.214200019836426, "learning_rate": 1.916286486675165e-05, "loss": 0.7321, "step": 13986 }, { "epoch": 2.2832537447451124, "grad_norm": 1.9174823760986328, "learning_rate": 1.916273776297794e-05, "loss": 0.6998, "step": 13987 }, { "epoch": 2.283417003387617, "grad_norm": 1.563968539237976, "learning_rate": 1.9162610649977355e-05, "loss": 0.5357, "step": 13988 }, { "epoch": 2.2835802620301213, "grad_norm": 1.6708457469940186, "learning_rate": 1.916248352775002e-05, "loss": 0.5448, "step": 13989 }, { "epoch": 2.2837435206726258, "grad_norm": 1.9790898561477661, "learning_rate": 1.9162356396296068e-05, "loss": 0.635, "step": 13990 }, { "epoch": 2.2839067793151298, "grad_norm": 1.683519721031189, "learning_rate": 1.9162229255615624e-05, "loss": 0.5981, "step": 13991 }, { "epoch": 2.284070037957634, "grad_norm": 1.5720479488372803, "learning_rate": 1.916210210570882e-05, "loss": 0.5401, "step": 13992 }, { "epoch": 2.2842332966001386, "grad_norm": 1.8959178924560547, "learning_rate": 1.916197494657578e-05, "loss": 0.56, "step": 13993 }, { "epoch": 2.284396555242643, "grad_norm": 1.7567049264907837, "learning_rate": 1.9161847778216635e-05, "loss": 0.5012, "step": 13994 }, { "epoch": 2.2845598138851475, "grad_norm": 1.9192942380905151, "learning_rate": 1.916172060063151e-05, "loss": 0.6129, "step": 13995 }, { "epoch": 2.284723072527652, "grad_norm": 1.5325020551681519, "learning_rate": 1.9161593413820535e-05, "loss": 0.4445, "step": 13996 }, { "epoch": 2.2848863311701564, "grad_norm": 1.5968296527862549, "learning_rate": 1.916146621778384e-05, "loss": 0.4667, "step": 13997 }, { "epoch": 2.285049589812661, "grad_norm": 1.6320220232009888, "learning_rate": 1.9161339012521548e-05, "loss": 0.5763, "step": 13998 }, { "epoch": 2.2852128484551653, "grad_norm": 2.0445926189422607, "learning_rate": 1.916121179803379e-05, "loss": 0.6495, "step": 13999 }, { "epoch": 2.2853761070976697, "grad_norm": 1.9737379550933838, "learning_rate": 1.9161084574320696e-05, "loss": 0.6285, "step": 14000 }, { "epoch": 2.2855393657401737, "grad_norm": 2.0363075733184814, "learning_rate": 1.9160957341382393e-05, "loss": 0.7316, "step": 14001 }, { "epoch": 2.285702624382678, "grad_norm": 1.5856351852416992, "learning_rate": 1.9160830099219007e-05, "loss": 0.5161, "step": 14002 }, { "epoch": 2.2858658830251826, "grad_norm": 1.8207496404647827, "learning_rate": 1.916070284783067e-05, "loss": 0.5728, "step": 14003 }, { "epoch": 2.286029141667687, "grad_norm": 1.6594120264053345, "learning_rate": 1.9160575587217506e-05, "loss": 0.6058, "step": 14004 }, { "epoch": 2.2861924003101914, "grad_norm": 1.884810447692871, "learning_rate": 1.9160448317379642e-05, "loss": 0.6148, "step": 14005 }, { "epoch": 2.286355658952696, "grad_norm": 1.9922125339508057, "learning_rate": 1.916032103831721e-05, "loss": 0.656, "step": 14006 }, { "epoch": 2.2865189175952003, "grad_norm": 1.8830081224441528, "learning_rate": 1.9160193750030342e-05, "loss": 0.5808, "step": 14007 }, { "epoch": 2.2866821762377048, "grad_norm": 1.6651763916015625, "learning_rate": 1.9160066452519156e-05, "loss": 0.4672, "step": 14008 }, { "epoch": 2.2868454348802087, "grad_norm": 2.015568494796753, "learning_rate": 1.915993914578379e-05, "loss": 0.6941, "step": 14009 }, { "epoch": 2.287008693522713, "grad_norm": 1.7918131351470947, "learning_rate": 1.9159811829824364e-05, "loss": 0.6605, "step": 14010 }, { "epoch": 2.2871719521652176, "grad_norm": 1.711225986480713, "learning_rate": 1.915968450464101e-05, "loss": 0.6412, "step": 14011 }, { "epoch": 2.287335210807722, "grad_norm": 1.6190986633300781, "learning_rate": 1.9159557170233857e-05, "loss": 0.5695, "step": 14012 }, { "epoch": 2.2874984694502265, "grad_norm": 1.6358613967895508, "learning_rate": 1.9159429826603032e-05, "loss": 0.6199, "step": 14013 }, { "epoch": 2.287661728092731, "grad_norm": 1.7216846942901611, "learning_rate": 1.9159302473748665e-05, "loss": 0.5687, "step": 14014 }, { "epoch": 2.2878249867352354, "grad_norm": 1.7303502559661865, "learning_rate": 1.9159175111670882e-05, "loss": 0.514, "step": 14015 }, { "epoch": 2.28798824537774, "grad_norm": 1.743077039718628, "learning_rate": 1.915904774036981e-05, "loss": 0.4675, "step": 14016 }, { "epoch": 2.2881515040202443, "grad_norm": 1.7576959133148193, "learning_rate": 1.915892035984558e-05, "loss": 0.5728, "step": 14017 }, { "epoch": 2.2883147626627487, "grad_norm": 1.7040982246398926, "learning_rate": 1.9158792970098322e-05, "loss": 0.5756, "step": 14018 }, { "epoch": 2.2884780213052527, "grad_norm": 1.888571858406067, "learning_rate": 1.9158665571128164e-05, "loss": 0.6309, "step": 14019 }, { "epoch": 2.288641279947757, "grad_norm": 1.9732773303985596, "learning_rate": 1.9158538162935227e-05, "loss": 0.583, "step": 14020 }, { "epoch": 2.2888045385902616, "grad_norm": 1.9240121841430664, "learning_rate": 1.9158410745519647e-05, "loss": 0.6326, "step": 14021 }, { "epoch": 2.288967797232766, "grad_norm": 2.009688138961792, "learning_rate": 1.9158283318881548e-05, "loss": 0.6401, "step": 14022 }, { "epoch": 2.2891310558752704, "grad_norm": 1.5068720579147339, "learning_rate": 1.9158155883021062e-05, "loss": 0.4659, "step": 14023 }, { "epoch": 2.289294314517775, "grad_norm": 1.6102581024169922, "learning_rate": 1.9158028437938316e-05, "loss": 0.5174, "step": 14024 }, { "epoch": 2.2894575731602793, "grad_norm": 1.8191735744476318, "learning_rate": 1.9157900983633437e-05, "loss": 0.5618, "step": 14025 }, { "epoch": 2.2896208318027838, "grad_norm": 1.8497214317321777, "learning_rate": 1.9157773520106552e-05, "loss": 0.5995, "step": 14026 }, { "epoch": 2.2897840904452877, "grad_norm": 1.8703248500823975, "learning_rate": 1.9157646047357795e-05, "loss": 0.5425, "step": 14027 }, { "epoch": 2.289947349087792, "grad_norm": 1.9407916069030762, "learning_rate": 1.915751856538729e-05, "loss": 0.669, "step": 14028 }, { "epoch": 2.2901106077302966, "grad_norm": 1.5875822305679321, "learning_rate": 1.9157391074195163e-05, "loss": 0.4915, "step": 14029 }, { "epoch": 2.290273866372801, "grad_norm": 2.180553674697876, "learning_rate": 1.915726357378155e-05, "loss": 0.7204, "step": 14030 }, { "epoch": 2.2904371250153055, "grad_norm": 1.9645739793777466, "learning_rate": 1.9157136064146573e-05, "loss": 0.4869, "step": 14031 }, { "epoch": 2.29060038365781, "grad_norm": 1.8563637733459473, "learning_rate": 1.915700854529036e-05, "loss": 0.551, "step": 14032 }, { "epoch": 2.2907636423003144, "grad_norm": 1.72524094581604, "learning_rate": 1.9156881017213045e-05, "loss": 0.542, "step": 14033 }, { "epoch": 2.290926900942819, "grad_norm": 1.5695867538452148, "learning_rate": 1.915675347991475e-05, "loss": 0.5227, "step": 14034 }, { "epoch": 2.2910901595853232, "grad_norm": 1.8060929775238037, "learning_rate": 1.9156625933395614e-05, "loss": 0.6802, "step": 14035 }, { "epoch": 2.2912534182278272, "grad_norm": 1.7625160217285156, "learning_rate": 1.915649837765575e-05, "loss": 0.6382, "step": 14036 }, { "epoch": 2.2914166768703317, "grad_norm": 1.6379420757293701, "learning_rate": 1.91563708126953e-05, "loss": 0.5415, "step": 14037 }, { "epoch": 2.291579935512836, "grad_norm": 2.006383180618286, "learning_rate": 1.9156243238514384e-05, "loss": 0.6792, "step": 14038 }, { "epoch": 2.2917431941553406, "grad_norm": 1.5719441175460815, "learning_rate": 1.9156115655113136e-05, "loss": 0.5896, "step": 14039 }, { "epoch": 2.291906452797845, "grad_norm": 1.9235527515411377, "learning_rate": 1.915598806249168e-05, "loss": 0.6069, "step": 14040 }, { "epoch": 2.2920697114403494, "grad_norm": 1.7831084728240967, "learning_rate": 1.9155860460650145e-05, "loss": 0.5955, "step": 14041 }, { "epoch": 2.292232970082854, "grad_norm": 1.7408894300460815, "learning_rate": 1.9155732849588664e-05, "loss": 0.514, "step": 14042 }, { "epoch": 2.2923962287253583, "grad_norm": 2.004639148712158, "learning_rate": 1.915560522930736e-05, "loss": 0.758, "step": 14043 }, { "epoch": 2.2925594873678623, "grad_norm": 1.3885266780853271, "learning_rate": 1.9155477599806365e-05, "loss": 0.3964, "step": 14044 }, { "epoch": 2.2927227460103667, "grad_norm": 1.8664318323135376, "learning_rate": 1.915534996108581e-05, "loss": 0.6531, "step": 14045 }, { "epoch": 2.292886004652871, "grad_norm": 1.8279571533203125, "learning_rate": 1.9155222313145817e-05, "loss": 0.5377, "step": 14046 }, { "epoch": 2.2930492632953756, "grad_norm": 1.7790271043777466, "learning_rate": 1.9155094655986515e-05, "loss": 0.5529, "step": 14047 }, { "epoch": 2.29321252193788, "grad_norm": 1.754335641860962, "learning_rate": 1.9154966989608036e-05, "loss": 0.6506, "step": 14048 }, { "epoch": 2.2933757805803845, "grad_norm": 1.8760497570037842, "learning_rate": 1.915483931401051e-05, "loss": 0.7208, "step": 14049 }, { "epoch": 2.293539039222889, "grad_norm": 1.6028623580932617, "learning_rate": 1.9154711629194062e-05, "loss": 0.5117, "step": 14050 }, { "epoch": 2.2937022978653934, "grad_norm": 1.6003228425979614, "learning_rate": 1.9154583935158822e-05, "loss": 0.5233, "step": 14051 }, { "epoch": 2.293865556507898, "grad_norm": 1.939603328704834, "learning_rate": 1.915445623190492e-05, "loss": 0.6351, "step": 14052 }, { "epoch": 2.2940288151504022, "grad_norm": 1.49372136592865, "learning_rate": 1.9154328519432483e-05, "loss": 0.5051, "step": 14053 }, { "epoch": 2.2941920737929062, "grad_norm": 1.8941140174865723, "learning_rate": 1.9154200797741637e-05, "loss": 0.6131, "step": 14054 }, { "epoch": 2.2943553324354107, "grad_norm": 1.7595065832138062, "learning_rate": 1.9154073066832512e-05, "loss": 0.5542, "step": 14055 }, { "epoch": 2.294518591077915, "grad_norm": 1.6753801107406616, "learning_rate": 1.9153945326705242e-05, "loss": 0.4709, "step": 14056 }, { "epoch": 2.2946818497204196, "grad_norm": 1.8019970655441284, "learning_rate": 1.915381757735995e-05, "loss": 0.5084, "step": 14057 }, { "epoch": 2.294845108362924, "grad_norm": 1.8557394742965698, "learning_rate": 1.9153689818796764e-05, "loss": 0.578, "step": 14058 }, { "epoch": 2.2950083670054284, "grad_norm": 1.7790182828903198, "learning_rate": 1.9153562051015817e-05, "loss": 0.5592, "step": 14059 }, { "epoch": 2.295171625647933, "grad_norm": 1.8044377565383911, "learning_rate": 1.9153434274017234e-05, "loss": 0.5492, "step": 14060 }, { "epoch": 2.2953348842904373, "grad_norm": 1.6618716716766357, "learning_rate": 1.9153306487801146e-05, "loss": 0.5, "step": 14061 }, { "epoch": 2.2954981429329413, "grad_norm": 1.8789710998535156, "learning_rate": 1.9153178692367684e-05, "loss": 0.6739, "step": 14062 }, { "epoch": 2.2956614015754457, "grad_norm": 1.8143547773361206, "learning_rate": 1.9153050887716968e-05, "loss": 0.491, "step": 14063 }, { "epoch": 2.29582466021795, "grad_norm": 2.187600612640381, "learning_rate": 1.9152923073849134e-05, "loss": 0.6486, "step": 14064 }, { "epoch": 2.2959879188604546, "grad_norm": 1.7732521295547485, "learning_rate": 1.9152795250764308e-05, "loss": 0.5036, "step": 14065 }, { "epoch": 2.296151177502959, "grad_norm": 1.6295660734176636, "learning_rate": 1.915266741846262e-05, "loss": 0.5818, "step": 14066 }, { "epoch": 2.2963144361454635, "grad_norm": 1.7453280687332153, "learning_rate": 1.9152539576944203e-05, "loss": 0.5373, "step": 14067 }, { "epoch": 2.296477694787968, "grad_norm": 2.1421189308166504, "learning_rate": 1.9152411726209176e-05, "loss": 0.6456, "step": 14068 }, { "epoch": 2.2966409534304724, "grad_norm": 1.9632409811019897, "learning_rate": 1.9152283866257674e-05, "loss": 0.5898, "step": 14069 }, { "epoch": 2.296804212072977, "grad_norm": 1.945724606513977, "learning_rate": 1.9152155997089824e-05, "loss": 0.6625, "step": 14070 }, { "epoch": 2.2969674707154812, "grad_norm": 2.2041547298431396, "learning_rate": 1.9152028118705757e-05, "loss": 0.6348, "step": 14071 }, { "epoch": 2.2971307293579852, "grad_norm": 1.9027799367904663, "learning_rate": 1.9151900231105603e-05, "loss": 0.6368, "step": 14072 }, { "epoch": 2.2972939880004897, "grad_norm": 1.7713886499404907, "learning_rate": 1.9151772334289483e-05, "loss": 0.5514, "step": 14073 }, { "epoch": 2.297457246642994, "grad_norm": 1.8441851139068604, "learning_rate": 1.915164442825753e-05, "loss": 0.5982, "step": 14074 }, { "epoch": 2.2976205052854985, "grad_norm": 1.478020191192627, "learning_rate": 1.9151516513009877e-05, "loss": 0.4825, "step": 14075 }, { "epoch": 2.297783763928003, "grad_norm": 1.828572154045105, "learning_rate": 1.9151388588546645e-05, "loss": 0.6346, "step": 14076 }, { "epoch": 2.2979470225705074, "grad_norm": 2.098597288131714, "learning_rate": 1.9151260654867973e-05, "loss": 0.6596, "step": 14077 }, { "epoch": 2.298110281213012, "grad_norm": 1.7956889867782593, "learning_rate": 1.915113271197398e-05, "loss": 0.586, "step": 14078 }, { "epoch": 2.298273539855516, "grad_norm": 1.848307490348816, "learning_rate": 1.91510047598648e-05, "loss": 0.6178, "step": 14079 }, { "epoch": 2.2984367984980203, "grad_norm": 1.7283990383148193, "learning_rate": 1.915087679854056e-05, "loss": 0.5527, "step": 14080 }, { "epoch": 2.2986000571405247, "grad_norm": 1.5191999673843384, "learning_rate": 1.915074882800139e-05, "loss": 0.5149, "step": 14081 }, { "epoch": 2.298763315783029, "grad_norm": 1.6771363019943237, "learning_rate": 1.915062084824742e-05, "loss": 0.5611, "step": 14082 }, { "epoch": 2.2989265744255336, "grad_norm": 1.9180035591125488, "learning_rate": 1.9150492859278774e-05, "loss": 0.5653, "step": 14083 }, { "epoch": 2.299089833068038, "grad_norm": 1.9641889333724976, "learning_rate": 1.9150364861095582e-05, "loss": 0.6864, "step": 14084 }, { "epoch": 2.2992530917105425, "grad_norm": 1.5821303129196167, "learning_rate": 1.9150236853697982e-05, "loss": 0.5575, "step": 14085 }, { "epoch": 2.299416350353047, "grad_norm": 1.8300634622573853, "learning_rate": 1.9150108837086094e-05, "loss": 0.5832, "step": 14086 }, { "epoch": 2.2995796089955514, "grad_norm": 1.8990364074707031, "learning_rate": 1.9149980811260045e-05, "loss": 0.5506, "step": 14087 }, { "epoch": 2.299742867638056, "grad_norm": 1.5598406791687012, "learning_rate": 1.914985277621997e-05, "loss": 0.5713, "step": 14088 }, { "epoch": 2.29990612628056, "grad_norm": 2.1838178634643555, "learning_rate": 1.9149724731965994e-05, "loss": 0.5592, "step": 14089 }, { "epoch": 2.3000693849230642, "grad_norm": 1.5228079557418823, "learning_rate": 1.914959667849825e-05, "loss": 0.4385, "step": 14090 }, { "epoch": 2.3002326435655687, "grad_norm": 1.4911350011825562, "learning_rate": 1.9149468615816862e-05, "loss": 0.5544, "step": 14091 }, { "epoch": 2.300395902208073, "grad_norm": 2.0439836978912354, "learning_rate": 1.9149340543921962e-05, "loss": 0.687, "step": 14092 }, { "epoch": 2.3005591608505775, "grad_norm": 1.4747322797775269, "learning_rate": 1.9149212462813682e-05, "loss": 0.5479, "step": 14093 }, { "epoch": 2.300722419493082, "grad_norm": 1.6484969854354858, "learning_rate": 1.9149084372492146e-05, "loss": 0.4587, "step": 14094 }, { "epoch": 2.3008856781355864, "grad_norm": 1.674045443534851, "learning_rate": 1.9148956272957484e-05, "loss": 0.6373, "step": 14095 }, { "epoch": 2.301048936778091, "grad_norm": 1.902550220489502, "learning_rate": 1.9148828164209824e-05, "loss": 0.6373, "step": 14096 }, { "epoch": 2.301212195420595, "grad_norm": 2.0768749713897705, "learning_rate": 1.91487000462493e-05, "loss": 0.6566, "step": 14097 }, { "epoch": 2.3013754540630993, "grad_norm": 1.6310875415802002, "learning_rate": 1.9148571919076032e-05, "loss": 0.5571, "step": 14098 }, { "epoch": 2.3015387127056037, "grad_norm": 1.6493828296661377, "learning_rate": 1.914844378269016e-05, "loss": 0.4933, "step": 14099 }, { "epoch": 2.301701971348108, "grad_norm": 1.9574527740478516, "learning_rate": 1.9148315637091805e-05, "loss": 0.6504, "step": 14100 }, { "epoch": 2.3018652299906126, "grad_norm": 1.8924041986465454, "learning_rate": 1.9148187482281097e-05, "loss": 0.5752, "step": 14101 }, { "epoch": 2.302028488633117, "grad_norm": 1.6946848630905151, "learning_rate": 1.914805931825817e-05, "loss": 0.5395, "step": 14102 }, { "epoch": 2.3021917472756215, "grad_norm": 1.5627840757369995, "learning_rate": 1.914793114502315e-05, "loss": 0.4926, "step": 14103 }, { "epoch": 2.302355005918126, "grad_norm": 1.950657844543457, "learning_rate": 1.914780296257616e-05, "loss": 0.5717, "step": 14104 }, { "epoch": 2.3025182645606304, "grad_norm": 2.0718977451324463, "learning_rate": 1.9147674770917342e-05, "loss": 0.6216, "step": 14105 }, { "epoch": 2.302681523203135, "grad_norm": 1.5791618824005127, "learning_rate": 1.9147546570046812e-05, "loss": 0.4456, "step": 14106 }, { "epoch": 2.302844781845639, "grad_norm": 1.7787023782730103, "learning_rate": 1.9147418359964708e-05, "loss": 0.5692, "step": 14107 }, { "epoch": 2.3030080404881432, "grad_norm": 1.9981205463409424, "learning_rate": 1.9147290140671154e-05, "loss": 0.5548, "step": 14108 }, { "epoch": 2.3031712991306477, "grad_norm": 1.9247280359268188, "learning_rate": 1.9147161912166286e-05, "loss": 0.655, "step": 14109 }, { "epoch": 2.303334557773152, "grad_norm": 1.9564175605773926, "learning_rate": 1.9147033674450223e-05, "loss": 0.5562, "step": 14110 }, { "epoch": 2.3034978164156565, "grad_norm": 2.096853494644165, "learning_rate": 1.9146905427523104e-05, "loss": 0.687, "step": 14111 }, { "epoch": 2.303661075058161, "grad_norm": 2.0932681560516357, "learning_rate": 1.914677717138505e-05, "loss": 0.6163, "step": 14112 }, { "epoch": 2.3038243337006654, "grad_norm": 2.260066270828247, "learning_rate": 1.9146648906036197e-05, "loss": 0.638, "step": 14113 }, { "epoch": 2.30398759234317, "grad_norm": 1.5339609384536743, "learning_rate": 1.914652063147667e-05, "loss": 0.4889, "step": 14114 }, { "epoch": 2.304150850985674, "grad_norm": 1.5624340772628784, "learning_rate": 1.9146392347706596e-05, "loss": 0.516, "step": 14115 }, { "epoch": 2.3043141096281783, "grad_norm": 2.204514503479004, "learning_rate": 1.9146264054726112e-05, "loss": 0.6039, "step": 14116 }, { "epoch": 2.3044773682706827, "grad_norm": 1.8411991596221924, "learning_rate": 1.914613575253534e-05, "loss": 0.4855, "step": 14117 }, { "epoch": 2.304640626913187, "grad_norm": 1.8212238550186157, "learning_rate": 1.9146007441134412e-05, "loss": 0.5599, "step": 14118 }, { "epoch": 2.3048038855556916, "grad_norm": 1.7575026750564575, "learning_rate": 1.914587912052346e-05, "loss": 0.6144, "step": 14119 }, { "epoch": 2.304967144198196, "grad_norm": 1.8553175926208496, "learning_rate": 1.9145750790702606e-05, "loss": 0.6296, "step": 14120 }, { "epoch": 2.3051304028407005, "grad_norm": 2.024660587310791, "learning_rate": 1.9145622451671983e-05, "loss": 0.6517, "step": 14121 }, { "epoch": 2.305293661483205, "grad_norm": 2.256958484649658, "learning_rate": 1.914549410343172e-05, "loss": 0.6391, "step": 14122 }, { "epoch": 2.3054569201257094, "grad_norm": 1.6476469039916992, "learning_rate": 1.914536574598195e-05, "loss": 0.5352, "step": 14123 }, { "epoch": 2.305620178768214, "grad_norm": 1.9945690631866455, "learning_rate": 1.91452373793228e-05, "loss": 0.638, "step": 14124 }, { "epoch": 2.305783437410718, "grad_norm": 1.8745495080947876, "learning_rate": 1.9145109003454396e-05, "loss": 0.5366, "step": 14125 }, { "epoch": 2.3059466960532222, "grad_norm": 2.0358731746673584, "learning_rate": 1.9144980618376873e-05, "loss": 0.583, "step": 14126 }, { "epoch": 2.3061099546957267, "grad_norm": 1.4960285425186157, "learning_rate": 1.914485222409035e-05, "loss": 0.4974, "step": 14127 }, { "epoch": 2.306273213338231, "grad_norm": 1.9147897958755493, "learning_rate": 1.9144723820594968e-05, "loss": 0.5022, "step": 14128 }, { "epoch": 2.3064364719807355, "grad_norm": 1.941955327987671, "learning_rate": 1.9144595407890853e-05, "loss": 0.7099, "step": 14129 }, { "epoch": 2.30659973062324, "grad_norm": 1.6317088603973389, "learning_rate": 1.914446698597813e-05, "loss": 0.523, "step": 14130 }, { "epoch": 2.3067629892657444, "grad_norm": 1.5471748113632202, "learning_rate": 1.914433855485693e-05, "loss": 0.492, "step": 14131 }, { "epoch": 2.3069262479082484, "grad_norm": 1.6434290409088135, "learning_rate": 1.914421011452739e-05, "loss": 0.5822, "step": 14132 }, { "epoch": 2.307089506550753, "grad_norm": 1.5649094581604004, "learning_rate": 1.9144081664989626e-05, "loss": 0.5127, "step": 14133 }, { "epoch": 2.3072527651932573, "grad_norm": 1.8652970790863037, "learning_rate": 1.9143953206243778e-05, "loss": 0.5234, "step": 14134 }, { "epoch": 2.3074160238357617, "grad_norm": 1.956431269645691, "learning_rate": 1.914382473828997e-05, "loss": 0.5477, "step": 14135 }, { "epoch": 2.307579282478266, "grad_norm": 2.069138765335083, "learning_rate": 1.914369626112833e-05, "loss": 0.604, "step": 14136 }, { "epoch": 2.3077425411207706, "grad_norm": 1.891721248626709, "learning_rate": 1.9143567774758994e-05, "loss": 0.6642, "step": 14137 }, { "epoch": 2.307905799763275, "grad_norm": 1.613600254058838, "learning_rate": 1.9143439279182087e-05, "loss": 0.5552, "step": 14138 }, { "epoch": 2.3080690584057795, "grad_norm": 1.5516656637191772, "learning_rate": 1.914331077439774e-05, "loss": 0.5555, "step": 14139 }, { "epoch": 2.308232317048284, "grad_norm": 1.6903424263000488, "learning_rate": 1.914318226040608e-05, "loss": 0.4903, "step": 14140 }, { "epoch": 2.3083955756907883, "grad_norm": 1.584118127822876, "learning_rate": 1.914305373720724e-05, "loss": 0.54, "step": 14141 }, { "epoch": 2.3085588343332923, "grad_norm": 1.6930195093154907, "learning_rate": 1.9142925204801343e-05, "loss": 0.5386, "step": 14142 }, { "epoch": 2.308722092975797, "grad_norm": 1.9773590564727783, "learning_rate": 1.9142796663188527e-05, "loss": 0.6823, "step": 14143 }, { "epoch": 2.308885351618301, "grad_norm": 1.552107334136963, "learning_rate": 1.9142668112368917e-05, "loss": 0.5271, "step": 14144 }, { "epoch": 2.3090486102608057, "grad_norm": 1.7365341186523438, "learning_rate": 1.9142539552342638e-05, "loss": 0.4922, "step": 14145 }, { "epoch": 2.30921186890331, "grad_norm": 2.3511784076690674, "learning_rate": 1.9142410983109826e-05, "loss": 0.992, "step": 14146 }, { "epoch": 2.3093751275458145, "grad_norm": 1.538337230682373, "learning_rate": 1.9142282404670613e-05, "loss": 0.502, "step": 14147 }, { "epoch": 2.309538386188319, "grad_norm": 1.6977447271347046, "learning_rate": 1.914215381702512e-05, "loss": 0.5336, "step": 14148 }, { "epoch": 2.3097016448308234, "grad_norm": 1.9761089086532593, "learning_rate": 1.914202522017348e-05, "loss": 0.6506, "step": 14149 }, { "epoch": 2.3098649034733274, "grad_norm": 1.6668850183486938, "learning_rate": 1.9141896614115824e-05, "loss": 0.5473, "step": 14150 }, { "epoch": 2.310028162115832, "grad_norm": 1.6683109998703003, "learning_rate": 1.9141767998852283e-05, "loss": 0.5639, "step": 14151 }, { "epoch": 2.3101914207583363, "grad_norm": 1.8965120315551758, "learning_rate": 1.9141639374382982e-05, "loss": 0.6094, "step": 14152 }, { "epoch": 2.3103546794008407, "grad_norm": 1.781065821647644, "learning_rate": 1.9141510740708053e-05, "loss": 0.5865, "step": 14153 }, { "epoch": 2.310517938043345, "grad_norm": 1.341018795967102, "learning_rate": 1.9141382097827623e-05, "loss": 0.4409, "step": 14154 }, { "epoch": 2.3106811966858496, "grad_norm": 1.6383600234985352, "learning_rate": 1.9141253445741826e-05, "loss": 0.5083, "step": 14155 }, { "epoch": 2.310844455328354, "grad_norm": 1.5889445543289185, "learning_rate": 1.914112478445079e-05, "loss": 0.502, "step": 14156 }, { "epoch": 2.3110077139708585, "grad_norm": 1.8370862007141113, "learning_rate": 1.9140996113954643e-05, "loss": 0.6326, "step": 14157 }, { "epoch": 2.311170972613363, "grad_norm": 2.0889790058135986, "learning_rate": 1.9140867434253515e-05, "loss": 0.6836, "step": 14158 }, { "epoch": 2.3113342312558673, "grad_norm": 2.2921338081359863, "learning_rate": 1.9140738745347535e-05, "loss": 0.6075, "step": 14159 }, { "epoch": 2.3114974898983713, "grad_norm": 1.751992106437683, "learning_rate": 1.9140610047236834e-05, "loss": 0.513, "step": 14160 }, { "epoch": 2.3116607485408758, "grad_norm": 1.9034423828125, "learning_rate": 1.914048133992154e-05, "loss": 0.5331, "step": 14161 }, { "epoch": 2.31182400718338, "grad_norm": 1.8179794549942017, "learning_rate": 1.9140352623401785e-05, "loss": 0.6295, "step": 14162 }, { "epoch": 2.3119872658258847, "grad_norm": 1.812585473060608, "learning_rate": 1.9140223897677697e-05, "loss": 0.4853, "step": 14163 }, { "epoch": 2.312150524468389, "grad_norm": 1.7048133611679077, "learning_rate": 1.9140095162749408e-05, "loss": 0.524, "step": 14164 }, { "epoch": 2.3123137831108935, "grad_norm": 1.662611722946167, "learning_rate": 1.9139966418617045e-05, "loss": 0.5854, "step": 14165 }, { "epoch": 2.312477041753398, "grad_norm": 2.0646820068359375, "learning_rate": 1.9139837665280735e-05, "loss": 0.6356, "step": 14166 }, { "epoch": 2.312640300395902, "grad_norm": 1.9403454065322876, "learning_rate": 1.913970890274061e-05, "loss": 0.6402, "step": 14167 }, { "epoch": 2.3128035590384064, "grad_norm": 1.8528285026550293, "learning_rate": 1.9139580130996807e-05, "loss": 0.586, "step": 14168 }, { "epoch": 2.312966817680911, "grad_norm": 2.365898370742798, "learning_rate": 1.913945135004945e-05, "loss": 0.66, "step": 14169 }, { "epoch": 2.3131300763234153, "grad_norm": 1.7872637510299683, "learning_rate": 1.913932255989866e-05, "loss": 0.5508, "step": 14170 }, { "epoch": 2.3132933349659197, "grad_norm": 1.7277421951293945, "learning_rate": 1.913919376054458e-05, "loss": 0.5171, "step": 14171 }, { "epoch": 2.313456593608424, "grad_norm": 1.4254661798477173, "learning_rate": 1.9139064951987332e-05, "loss": 0.5041, "step": 14172 }, { "epoch": 2.3136198522509286, "grad_norm": 1.7253443002700806, "learning_rate": 1.9138936134227047e-05, "loss": 0.601, "step": 14173 }, { "epoch": 2.313783110893433, "grad_norm": 1.595473051071167, "learning_rate": 1.9138807307263858e-05, "loss": 0.4817, "step": 14174 }, { "epoch": 2.3139463695359375, "grad_norm": 2.0962610244750977, "learning_rate": 1.9138678471097892e-05, "loss": 0.6712, "step": 14175 }, { "epoch": 2.314109628178442, "grad_norm": 1.4534828662872314, "learning_rate": 1.913854962572928e-05, "loss": 0.4836, "step": 14176 }, { "epoch": 2.314272886820946, "grad_norm": 1.6335846185684204, "learning_rate": 1.913842077115815e-05, "loss": 0.5543, "step": 14177 }, { "epoch": 2.3144361454634503, "grad_norm": 1.7109801769256592, "learning_rate": 1.9138291907384632e-05, "loss": 0.4767, "step": 14178 }, { "epoch": 2.3145994041059548, "grad_norm": 1.7814111709594727, "learning_rate": 1.9138163034408858e-05, "loss": 0.5595, "step": 14179 }, { "epoch": 2.314762662748459, "grad_norm": 1.659364938735962, "learning_rate": 1.9138034152230952e-05, "loss": 0.5119, "step": 14180 }, { "epoch": 2.3149259213909636, "grad_norm": 1.566986083984375, "learning_rate": 1.9137905260851056e-05, "loss": 0.5273, "step": 14181 }, { "epoch": 2.315089180033468, "grad_norm": 2.0784876346588135, "learning_rate": 1.9137776360269285e-05, "loss": 0.7071, "step": 14182 }, { "epoch": 2.3152524386759725, "grad_norm": 1.762555718421936, "learning_rate": 1.9137647450485775e-05, "loss": 0.6498, "step": 14183 }, { "epoch": 2.315415697318477, "grad_norm": 1.6993416547775269, "learning_rate": 1.9137518531500662e-05, "loss": 0.5871, "step": 14184 }, { "epoch": 2.315578955960981, "grad_norm": 1.7905813455581665, "learning_rate": 1.913738960331407e-05, "loss": 0.6284, "step": 14185 }, { "epoch": 2.3157422146034854, "grad_norm": 1.7983088493347168, "learning_rate": 1.9137260665926123e-05, "loss": 0.5997, "step": 14186 }, { "epoch": 2.31590547324599, "grad_norm": 1.7449491024017334, "learning_rate": 1.9137131719336962e-05, "loss": 0.5514, "step": 14187 }, { "epoch": 2.3160687318884943, "grad_norm": 1.5621578693389893, "learning_rate": 1.913700276354671e-05, "loss": 0.4959, "step": 14188 }, { "epoch": 2.3162319905309987, "grad_norm": 1.7015128135681152, "learning_rate": 1.91368737985555e-05, "loss": 0.5658, "step": 14189 }, { "epoch": 2.316395249173503, "grad_norm": 1.746320366859436, "learning_rate": 1.913674482436346e-05, "loss": 0.5591, "step": 14190 }, { "epoch": 2.3165585078160076, "grad_norm": 1.8337692022323608, "learning_rate": 1.913661584097072e-05, "loss": 0.5585, "step": 14191 }, { "epoch": 2.316721766458512, "grad_norm": 1.8575688600540161, "learning_rate": 1.913648684837741e-05, "loss": 0.5424, "step": 14192 }, { "epoch": 2.3168850251010165, "grad_norm": 1.9168716669082642, "learning_rate": 1.9136357846583658e-05, "loss": 0.6102, "step": 14193 }, { "epoch": 2.317048283743521, "grad_norm": 1.627753496170044, "learning_rate": 1.91362288355896e-05, "loss": 0.5121, "step": 14194 }, { "epoch": 2.317211542386025, "grad_norm": 1.8297207355499268, "learning_rate": 1.913609981539536e-05, "loss": 0.5474, "step": 14195 }, { "epoch": 2.3173748010285293, "grad_norm": 1.5398311614990234, "learning_rate": 1.913597078600107e-05, "loss": 0.478, "step": 14196 }, { "epoch": 2.3175380596710338, "grad_norm": 1.639058232307434, "learning_rate": 1.9135841747406863e-05, "loss": 0.5408, "step": 14197 }, { "epoch": 2.317701318313538, "grad_norm": 1.6240248680114746, "learning_rate": 1.913571269961286e-05, "loss": 0.5214, "step": 14198 }, { "epoch": 2.3178645769560426, "grad_norm": 1.5397125482559204, "learning_rate": 1.91355836426192e-05, "loss": 0.5547, "step": 14199 }, { "epoch": 2.318027835598547, "grad_norm": 2.00486159324646, "learning_rate": 1.913545457642601e-05, "loss": 0.6436, "step": 14200 }, { "epoch": 2.3181910942410515, "grad_norm": 1.72153639793396, "learning_rate": 1.913532550103342e-05, "loss": 0.4965, "step": 14201 }, { "epoch": 2.318354352883556, "grad_norm": 1.600995659828186, "learning_rate": 1.9135196416441558e-05, "loss": 0.5547, "step": 14202 }, { "epoch": 2.31851761152606, "grad_norm": 1.9395190477371216, "learning_rate": 1.913506732265056e-05, "loss": 0.7194, "step": 14203 }, { "epoch": 2.3186808701685644, "grad_norm": 2.1096675395965576, "learning_rate": 1.9134938219660545e-05, "loss": 0.6371, "step": 14204 }, { "epoch": 2.318844128811069, "grad_norm": 1.8336228132247925, "learning_rate": 1.9134809107471652e-05, "loss": 0.5803, "step": 14205 }, { "epoch": 2.3190073874535733, "grad_norm": 2.0344910621643066, "learning_rate": 1.9134679986084008e-05, "loss": 0.6741, "step": 14206 }, { "epoch": 2.3191706460960777, "grad_norm": 1.856908917427063, "learning_rate": 1.9134550855497745e-05, "loss": 0.6134, "step": 14207 }, { "epoch": 2.319333904738582, "grad_norm": 1.5771139860153198, "learning_rate": 1.9134421715712993e-05, "loss": 0.4426, "step": 14208 }, { "epoch": 2.3194971633810866, "grad_norm": 1.9810947179794312, "learning_rate": 1.913429256672988e-05, "loss": 0.5809, "step": 14209 }, { "epoch": 2.319660422023591, "grad_norm": 1.7833067178726196, "learning_rate": 1.9134163408548537e-05, "loss": 0.5816, "step": 14210 }, { "epoch": 2.3198236806660955, "grad_norm": 1.6517349481582642, "learning_rate": 1.913403424116909e-05, "loss": 0.4788, "step": 14211 }, { "epoch": 2.3199869393086, "grad_norm": 1.6973838806152344, "learning_rate": 1.9133905064591677e-05, "loss": 0.5069, "step": 14212 }, { "epoch": 2.320150197951104, "grad_norm": 1.4741166830062866, "learning_rate": 1.9133775878816423e-05, "loss": 0.5158, "step": 14213 }, { "epoch": 2.3203134565936083, "grad_norm": 1.7736214399337769, "learning_rate": 1.913364668384346e-05, "loss": 0.5577, "step": 14214 }, { "epoch": 2.3204767152361128, "grad_norm": 1.7309151887893677, "learning_rate": 1.9133517479672917e-05, "loss": 0.5824, "step": 14215 }, { "epoch": 2.320639973878617, "grad_norm": 1.7922288179397583, "learning_rate": 1.9133388266304925e-05, "loss": 0.6659, "step": 14216 }, { "epoch": 2.3208032325211216, "grad_norm": 1.6768302917480469, "learning_rate": 1.9133259043739612e-05, "loss": 0.5371, "step": 14217 }, { "epoch": 2.320966491163626, "grad_norm": 2.1058316230773926, "learning_rate": 1.913312981197711e-05, "loss": 0.6223, "step": 14218 }, { "epoch": 2.3211297498061305, "grad_norm": 2.086566925048828, "learning_rate": 1.9133000571017546e-05, "loss": 0.6546, "step": 14219 }, { "epoch": 2.3212930084486345, "grad_norm": 1.7139840126037598, "learning_rate": 1.9132871320861056e-05, "loss": 0.6002, "step": 14220 }, { "epoch": 2.321456267091139, "grad_norm": 1.8418947458267212, "learning_rate": 1.9132742061507764e-05, "loss": 0.5489, "step": 14221 }, { "epoch": 2.3216195257336434, "grad_norm": 1.9892988204956055, "learning_rate": 1.9132612792957808e-05, "loss": 0.6459, "step": 14222 }, { "epoch": 2.321782784376148, "grad_norm": 1.4540761709213257, "learning_rate": 1.913248351521131e-05, "loss": 0.4882, "step": 14223 }, { "epoch": 2.3219460430186523, "grad_norm": 2.1280593872070312, "learning_rate": 1.9132354228268407e-05, "loss": 0.6558, "step": 14224 }, { "epoch": 2.3221093016611567, "grad_norm": 1.5609755516052246, "learning_rate": 1.913222493212922e-05, "loss": 0.4896, "step": 14225 }, { "epoch": 2.322272560303661, "grad_norm": 2.295138120651245, "learning_rate": 1.913209562679389e-05, "loss": 0.5907, "step": 14226 }, { "epoch": 2.3224358189461656, "grad_norm": 1.9804073572158813, "learning_rate": 1.9131966312262538e-05, "loss": 0.6443, "step": 14227 }, { "epoch": 2.32259907758867, "grad_norm": 1.8519036769866943, "learning_rate": 1.91318369885353e-05, "loss": 0.5636, "step": 14228 }, { "epoch": 2.3227623362311745, "grad_norm": 1.998543620109558, "learning_rate": 1.9131707655612305e-05, "loss": 0.6263, "step": 14229 }, { "epoch": 2.3229255948736784, "grad_norm": 1.9605021476745605, "learning_rate": 1.9131578313493685e-05, "loss": 0.6959, "step": 14230 }, { "epoch": 2.323088853516183, "grad_norm": 1.8227728605270386, "learning_rate": 1.9131448962179564e-05, "loss": 0.6289, "step": 14231 }, { "epoch": 2.3232521121586873, "grad_norm": 1.7512179613113403, "learning_rate": 1.9131319601670077e-05, "loss": 0.5964, "step": 14232 }, { "epoch": 2.3234153708011918, "grad_norm": 1.6004233360290527, "learning_rate": 1.9131190231965356e-05, "loss": 0.5552, "step": 14233 }, { "epoch": 2.323578629443696, "grad_norm": 1.7075024843215942, "learning_rate": 1.9131060853065527e-05, "loss": 0.4941, "step": 14234 }, { "epoch": 2.3237418880862006, "grad_norm": 1.756809115409851, "learning_rate": 1.9130931464970725e-05, "loss": 0.4692, "step": 14235 }, { "epoch": 2.323905146728705, "grad_norm": 2.066211223602295, "learning_rate": 1.9130802067681075e-05, "loss": 0.6146, "step": 14236 }, { "epoch": 2.3240684053712095, "grad_norm": 1.6379774808883667, "learning_rate": 1.913067266119671e-05, "loss": 0.5181, "step": 14237 }, { "epoch": 2.3242316640137135, "grad_norm": 1.8124704360961914, "learning_rate": 1.9130543245517762e-05, "loss": 0.4951, "step": 14238 }, { "epoch": 2.324394922656218, "grad_norm": 2.002039909362793, "learning_rate": 1.9130413820644355e-05, "loss": 0.5836, "step": 14239 }, { "epoch": 2.3245581812987224, "grad_norm": 1.8837475776672363, "learning_rate": 1.9130284386576627e-05, "loss": 0.5724, "step": 14240 }, { "epoch": 2.324721439941227, "grad_norm": 1.7485705614089966, "learning_rate": 1.9130154943314704e-05, "loss": 0.5909, "step": 14241 }, { "epoch": 2.3248846985837313, "grad_norm": 1.8324661254882812, "learning_rate": 1.9130025490858717e-05, "loss": 0.5851, "step": 14242 }, { "epoch": 2.3250479572262357, "grad_norm": 1.813321828842163, "learning_rate": 1.9129896029208796e-05, "loss": 0.6307, "step": 14243 }, { "epoch": 2.32521121586874, "grad_norm": 1.611656665802002, "learning_rate": 1.9129766558365076e-05, "loss": 0.5568, "step": 14244 }, { "epoch": 2.3253744745112446, "grad_norm": 2.147583246231079, "learning_rate": 1.912963707832768e-05, "loss": 0.7279, "step": 14245 }, { "epoch": 2.325537733153749, "grad_norm": 1.8664380311965942, "learning_rate": 1.9129507589096744e-05, "loss": 0.6078, "step": 14246 }, { "epoch": 2.3257009917962534, "grad_norm": 1.6883580684661865, "learning_rate": 1.9129378090672393e-05, "loss": 0.5709, "step": 14247 }, { "epoch": 2.3258642504387574, "grad_norm": 1.8360906839370728, "learning_rate": 1.912924858305476e-05, "loss": 0.5584, "step": 14248 }, { "epoch": 2.326027509081262, "grad_norm": 1.8159902095794678, "learning_rate": 1.9129119066243982e-05, "loss": 0.5408, "step": 14249 }, { "epoch": 2.3261907677237663, "grad_norm": 1.7340307235717773, "learning_rate": 1.9128989540240178e-05, "loss": 0.5493, "step": 14250 }, { "epoch": 2.3263540263662708, "grad_norm": 1.8884925842285156, "learning_rate": 1.9128860005043488e-05, "loss": 0.6084, "step": 14251 }, { "epoch": 2.326517285008775, "grad_norm": 1.6388918161392212, "learning_rate": 1.9128730460654038e-05, "loss": 0.5564, "step": 14252 }, { "epoch": 2.3266805436512796, "grad_norm": 2.0416810512542725, "learning_rate": 1.9128600907071956e-05, "loss": 0.6814, "step": 14253 }, { "epoch": 2.326843802293784, "grad_norm": 2.058668613433838, "learning_rate": 1.9128471344297377e-05, "loss": 0.6637, "step": 14254 }, { "epoch": 2.3270070609362885, "grad_norm": 1.6928290128707886, "learning_rate": 1.9128341772330428e-05, "loss": 0.5371, "step": 14255 }, { "epoch": 2.3271703195787925, "grad_norm": 2.1240878105163574, "learning_rate": 1.9128212191171244e-05, "loss": 0.6247, "step": 14256 }, { "epoch": 2.327333578221297, "grad_norm": 1.7627924680709839, "learning_rate": 1.9128082600819953e-05, "loss": 0.5704, "step": 14257 }, { "epoch": 2.3274968368638014, "grad_norm": 1.7222764492034912, "learning_rate": 1.9127953001276684e-05, "loss": 0.6094, "step": 14258 }, { "epoch": 2.327660095506306, "grad_norm": 1.4306830167770386, "learning_rate": 1.912782339254157e-05, "loss": 0.4233, "step": 14259 }, { "epoch": 2.3278233541488103, "grad_norm": 1.4827498197555542, "learning_rate": 1.9127693774614738e-05, "loss": 0.5053, "step": 14260 }, { "epoch": 2.3279866127913147, "grad_norm": 1.9849332571029663, "learning_rate": 1.912756414749632e-05, "loss": 0.626, "step": 14261 }, { "epoch": 2.328149871433819, "grad_norm": 1.6904468536376953, "learning_rate": 1.912743451118645e-05, "loss": 0.5314, "step": 14262 }, { "epoch": 2.3283131300763236, "grad_norm": 1.6485413312911987, "learning_rate": 1.9127304865685253e-05, "loss": 0.498, "step": 14263 }, { "epoch": 2.328476388718828, "grad_norm": 1.930696725845337, "learning_rate": 1.9127175210992865e-05, "loss": 0.6132, "step": 14264 }, { "epoch": 2.328639647361332, "grad_norm": 1.8501136302947998, "learning_rate": 1.9127045547109414e-05, "loss": 0.6081, "step": 14265 }, { "epoch": 2.3288029060038364, "grad_norm": 2.0912435054779053, "learning_rate": 1.912691587403503e-05, "loss": 0.7638, "step": 14266 }, { "epoch": 2.328966164646341, "grad_norm": 1.9085915088653564, "learning_rate": 1.9126786191769845e-05, "loss": 0.654, "step": 14267 }, { "epoch": 2.3291294232888453, "grad_norm": 1.686488151550293, "learning_rate": 1.9126656500313987e-05, "loss": 0.5002, "step": 14268 }, { "epoch": 2.3292926819313498, "grad_norm": 2.228661060333252, "learning_rate": 1.9126526799667588e-05, "loss": 0.592, "step": 14269 }, { "epoch": 2.329455940573854, "grad_norm": 2.001030445098877, "learning_rate": 1.912639708983078e-05, "loss": 0.567, "step": 14270 }, { "epoch": 2.3296191992163586, "grad_norm": 2.0792925357818604, "learning_rate": 1.9126267370803692e-05, "loss": 0.6462, "step": 14271 }, { "epoch": 2.329782457858863, "grad_norm": 1.6004130840301514, "learning_rate": 1.912613764258646e-05, "loss": 0.4556, "step": 14272 }, { "epoch": 2.329945716501367, "grad_norm": 1.4718183279037476, "learning_rate": 1.9126007905179203e-05, "loss": 0.4712, "step": 14273 }, { "epoch": 2.3301089751438715, "grad_norm": 1.7839319705963135, "learning_rate": 1.912587815858206e-05, "loss": 0.6048, "step": 14274 }, { "epoch": 2.330272233786376, "grad_norm": 1.712062954902649, "learning_rate": 1.912574840279516e-05, "loss": 0.5209, "step": 14275 }, { "epoch": 2.3304354924288804, "grad_norm": 1.8125019073486328, "learning_rate": 1.9125618637818636e-05, "loss": 0.5509, "step": 14276 }, { "epoch": 2.330598751071385, "grad_norm": 1.8435832262039185, "learning_rate": 1.9125488863652614e-05, "loss": 0.5736, "step": 14277 }, { "epoch": 2.3307620097138892, "grad_norm": 1.7678258419036865, "learning_rate": 1.912535908029723e-05, "loss": 0.5895, "step": 14278 }, { "epoch": 2.3309252683563937, "grad_norm": 1.8242915868759155, "learning_rate": 1.912522928775261e-05, "loss": 0.6025, "step": 14279 }, { "epoch": 2.331088526998898, "grad_norm": 1.5700633525848389, "learning_rate": 1.9125099486018887e-05, "loss": 0.5432, "step": 14280 }, { "epoch": 2.3312517856414026, "grad_norm": 1.8027968406677246, "learning_rate": 1.9124969675096188e-05, "loss": 0.5087, "step": 14281 }, { "epoch": 2.331415044283907, "grad_norm": 2.140265941619873, "learning_rate": 1.9124839854984652e-05, "loss": 0.5949, "step": 14282 }, { "epoch": 2.331578302926411, "grad_norm": 2.099541187286377, "learning_rate": 1.9124710025684405e-05, "loss": 0.7018, "step": 14283 }, { "epoch": 2.3317415615689154, "grad_norm": 1.5682073831558228, "learning_rate": 1.9124580187195574e-05, "loss": 0.4786, "step": 14284 }, { "epoch": 2.33190482021142, "grad_norm": 1.7691290378570557, "learning_rate": 1.9124450339518296e-05, "loss": 0.5646, "step": 14285 }, { "epoch": 2.3320680788539243, "grad_norm": 1.543060064315796, "learning_rate": 1.9124320482652696e-05, "loss": 0.5621, "step": 14286 }, { "epoch": 2.3322313374964287, "grad_norm": 1.9433298110961914, "learning_rate": 1.9124190616598906e-05, "loss": 0.53, "step": 14287 }, { "epoch": 2.332394596138933, "grad_norm": 1.4831067323684692, "learning_rate": 1.9124060741357065e-05, "loss": 0.4647, "step": 14288 }, { "epoch": 2.3325578547814376, "grad_norm": 1.80315363407135, "learning_rate": 1.9123930856927292e-05, "loss": 0.544, "step": 14289 }, { "epoch": 2.332721113423942, "grad_norm": 1.7740825414657593, "learning_rate": 1.9123800963309725e-05, "loss": 0.5292, "step": 14290 }, { "epoch": 2.332884372066446, "grad_norm": 1.4382270574569702, "learning_rate": 1.912367106050449e-05, "loss": 0.4602, "step": 14291 }, { "epoch": 2.3330476307089505, "grad_norm": 1.7135275602340698, "learning_rate": 1.9123541148511723e-05, "loss": 0.518, "step": 14292 }, { "epoch": 2.333210889351455, "grad_norm": 1.728021502494812, "learning_rate": 1.9123411227331555e-05, "loss": 0.4977, "step": 14293 }, { "epoch": 2.3333741479939594, "grad_norm": 1.7978817224502563, "learning_rate": 1.9123281296964112e-05, "loss": 0.5328, "step": 14294 }, { "epoch": 2.333537406636464, "grad_norm": 2.020028829574585, "learning_rate": 1.912315135740953e-05, "loss": 0.6713, "step": 14295 }, { "epoch": 2.3337006652789682, "grad_norm": 1.782649278640747, "learning_rate": 1.912302140866793e-05, "loss": 0.5856, "step": 14296 }, { "epoch": 2.3338639239214727, "grad_norm": 1.75487220287323, "learning_rate": 1.9122891450739453e-05, "loss": 0.5032, "step": 14297 }, { "epoch": 2.334027182563977, "grad_norm": 1.9601994752883911, "learning_rate": 1.912276148362423e-05, "loss": 0.6021, "step": 14298 }, { "epoch": 2.3341904412064816, "grad_norm": 2.075417995452881, "learning_rate": 1.9122631507322388e-05, "loss": 0.6338, "step": 14299 }, { "epoch": 2.334353699848986, "grad_norm": 1.8739588260650635, "learning_rate": 1.9122501521834052e-05, "loss": 0.4959, "step": 14300 }, { "epoch": 2.33451695849149, "grad_norm": 1.7790136337280273, "learning_rate": 1.9122371527159367e-05, "loss": 0.5995, "step": 14301 }, { "epoch": 2.3346802171339944, "grad_norm": 1.8788526058197021, "learning_rate": 1.9122241523298456e-05, "loss": 0.5384, "step": 14302 }, { "epoch": 2.334843475776499, "grad_norm": 1.642607569694519, "learning_rate": 1.9122111510251446e-05, "loss": 0.5308, "step": 14303 }, { "epoch": 2.3350067344190033, "grad_norm": 1.7622816562652588, "learning_rate": 1.9121981488018472e-05, "loss": 0.6074, "step": 14304 }, { "epoch": 2.3351699930615077, "grad_norm": 1.9012219905853271, "learning_rate": 1.912185145659967e-05, "loss": 0.5567, "step": 14305 }, { "epoch": 2.335333251704012, "grad_norm": 1.618207335472107, "learning_rate": 1.912172141599516e-05, "loss": 0.5222, "step": 14306 }, { "epoch": 2.3354965103465166, "grad_norm": 1.7620047330856323, "learning_rate": 1.9121591366205085e-05, "loss": 0.5918, "step": 14307 }, { "epoch": 2.3356597689890206, "grad_norm": 1.486535668373108, "learning_rate": 1.9121461307229566e-05, "loss": 0.5289, "step": 14308 }, { "epoch": 2.335823027631525, "grad_norm": 2.0269103050231934, "learning_rate": 1.9121331239068738e-05, "loss": 0.6905, "step": 14309 }, { "epoch": 2.3359862862740295, "grad_norm": 1.6623226404190063, "learning_rate": 1.9121201161722732e-05, "loss": 0.5609, "step": 14310 }, { "epoch": 2.336149544916534, "grad_norm": 1.5255972146987915, "learning_rate": 1.912107107519168e-05, "loss": 0.483, "step": 14311 }, { "epoch": 2.3363128035590384, "grad_norm": 1.5768585205078125, "learning_rate": 1.912094097947571e-05, "loss": 0.4628, "step": 14312 }, { "epoch": 2.336476062201543, "grad_norm": 1.6449605226516724, "learning_rate": 1.9120810874574958e-05, "loss": 0.5334, "step": 14313 }, { "epoch": 2.3366393208440472, "grad_norm": 1.7894138097763062, "learning_rate": 1.9120680760489548e-05, "loss": 0.6191, "step": 14314 }, { "epoch": 2.3368025794865517, "grad_norm": 2.2405810356140137, "learning_rate": 1.9120550637219618e-05, "loss": 0.661, "step": 14315 }, { "epoch": 2.336965838129056, "grad_norm": 1.530287265777588, "learning_rate": 1.9120420504765292e-05, "loss": 0.4335, "step": 14316 }, { "epoch": 2.3371290967715606, "grad_norm": 2.3999314308166504, "learning_rate": 1.912029036312671e-05, "loss": 0.6515, "step": 14317 }, { "epoch": 2.3372923554140645, "grad_norm": 1.6913529634475708, "learning_rate": 1.9120160212303995e-05, "loss": 0.5503, "step": 14318 }, { "epoch": 2.337455614056569, "grad_norm": 1.6279637813568115, "learning_rate": 1.9120030052297283e-05, "loss": 0.5677, "step": 14319 }, { "epoch": 2.3376188726990734, "grad_norm": 1.6507736444473267, "learning_rate": 1.9119899883106702e-05, "loss": 0.5793, "step": 14320 }, { "epoch": 2.337782131341578, "grad_norm": 1.957861304283142, "learning_rate": 1.9119769704732382e-05, "loss": 0.6089, "step": 14321 }, { "epoch": 2.3379453899840823, "grad_norm": 1.6451245546340942, "learning_rate": 1.911963951717446e-05, "loss": 0.5335, "step": 14322 }, { "epoch": 2.3381086486265867, "grad_norm": 2.011702537536621, "learning_rate": 1.9119509320433062e-05, "loss": 0.6383, "step": 14323 }, { "epoch": 2.338271907269091, "grad_norm": 1.5543642044067383, "learning_rate": 1.9119379114508322e-05, "loss": 0.5177, "step": 14324 }, { "epoch": 2.3384351659115956, "grad_norm": 1.8404370546340942, "learning_rate": 1.9119248899400366e-05, "loss": 0.6137, "step": 14325 }, { "epoch": 2.3385984245540996, "grad_norm": 2.0368075370788574, "learning_rate": 1.9119118675109332e-05, "loss": 0.559, "step": 14326 }, { "epoch": 2.338761683196604, "grad_norm": 2.5275943279266357, "learning_rate": 1.9118988441635347e-05, "loss": 0.6466, "step": 14327 }, { "epoch": 2.3389249418391085, "grad_norm": 2.061138153076172, "learning_rate": 1.9118858198978542e-05, "loss": 0.6275, "step": 14328 }, { "epoch": 2.339088200481613, "grad_norm": 1.6408828496932983, "learning_rate": 1.911872794713905e-05, "loss": 0.5534, "step": 14329 }, { "epoch": 2.3392514591241174, "grad_norm": 1.9304453134536743, "learning_rate": 1.9118597686117e-05, "loss": 0.62, "step": 14330 }, { "epoch": 2.339414717766622, "grad_norm": 1.8575464487075806, "learning_rate": 1.9118467415912526e-05, "loss": 0.6228, "step": 14331 }, { "epoch": 2.3395779764091262, "grad_norm": 2.1903560161590576, "learning_rate": 1.911833713652576e-05, "loss": 0.6428, "step": 14332 }, { "epoch": 2.3397412350516307, "grad_norm": 1.8867651224136353, "learning_rate": 1.911820684795683e-05, "loss": 0.6746, "step": 14333 }, { "epoch": 2.339904493694135, "grad_norm": 1.8304061889648438, "learning_rate": 1.9118076550205864e-05, "loss": 0.5753, "step": 14334 }, { "epoch": 2.3400677523366396, "grad_norm": 2.2735514640808105, "learning_rate": 1.9117946243273003e-05, "loss": 0.5904, "step": 14335 }, { "epoch": 2.3402310109791435, "grad_norm": 1.6983143091201782, "learning_rate": 1.9117815927158367e-05, "loss": 0.5738, "step": 14336 }, { "epoch": 2.340394269621648, "grad_norm": 1.6759073734283447, "learning_rate": 1.9117685601862094e-05, "loss": 0.6015, "step": 14337 }, { "epoch": 2.3405575282641524, "grad_norm": 2.011970043182373, "learning_rate": 1.911755526738432e-05, "loss": 0.6494, "step": 14338 }, { "epoch": 2.340720786906657, "grad_norm": 2.054267168045044, "learning_rate": 1.9117424923725164e-05, "loss": 0.6877, "step": 14339 }, { "epoch": 2.3408840455491613, "grad_norm": 2.092139959335327, "learning_rate": 1.9117294570884764e-05, "loss": 0.7091, "step": 14340 }, { "epoch": 2.3410473041916657, "grad_norm": 2.2219839096069336, "learning_rate": 1.9117164208863253e-05, "loss": 0.7297, "step": 14341 }, { "epoch": 2.34121056283417, "grad_norm": 1.974481225013733, "learning_rate": 1.911703383766076e-05, "loss": 0.5656, "step": 14342 }, { "epoch": 2.3413738214766746, "grad_norm": 1.9667754173278809, "learning_rate": 1.9116903457277413e-05, "loss": 0.5152, "step": 14343 }, { "epoch": 2.3415370801191786, "grad_norm": 2.1050865650177, "learning_rate": 1.911677306771335e-05, "loss": 0.5313, "step": 14344 }, { "epoch": 2.341700338761683, "grad_norm": 2.0704214572906494, "learning_rate": 1.91166426689687e-05, "loss": 0.7297, "step": 14345 }, { "epoch": 2.3418635974041875, "grad_norm": 1.6567240953445435, "learning_rate": 1.911651226104359e-05, "loss": 0.6524, "step": 14346 }, { "epoch": 2.342026856046692, "grad_norm": 1.7096531391143799, "learning_rate": 1.9116381843938153e-05, "loss": 0.5473, "step": 14347 }, { "epoch": 2.3421901146891964, "grad_norm": 1.4323474168777466, "learning_rate": 1.9116251417652527e-05, "loss": 0.4942, "step": 14348 }, { "epoch": 2.342353373331701, "grad_norm": 1.7105293273925781, "learning_rate": 1.9116120982186835e-05, "loss": 0.674, "step": 14349 }, { "epoch": 2.3425166319742052, "grad_norm": 1.4878153800964355, "learning_rate": 1.9115990537541217e-05, "loss": 0.4937, "step": 14350 }, { "epoch": 2.3426798906167097, "grad_norm": 1.825160026550293, "learning_rate": 1.911586008371579e-05, "loss": 0.5951, "step": 14351 }, { "epoch": 2.342843149259214, "grad_norm": 1.8508250713348389, "learning_rate": 1.91157296207107e-05, "loss": 0.5814, "step": 14352 }, { "epoch": 2.3430064079017185, "grad_norm": 1.5965656042099, "learning_rate": 1.9115599148526073e-05, "loss": 0.4927, "step": 14353 }, { "epoch": 2.3431696665442225, "grad_norm": 1.9318604469299316, "learning_rate": 1.9115468667162038e-05, "loss": 0.5331, "step": 14354 }, { "epoch": 2.343332925186727, "grad_norm": 1.62886381149292, "learning_rate": 1.911533817661873e-05, "loss": 0.5146, "step": 14355 }, { "epoch": 2.3434961838292314, "grad_norm": 1.7920275926589966, "learning_rate": 1.911520767689628e-05, "loss": 0.5501, "step": 14356 }, { "epoch": 2.343659442471736, "grad_norm": 1.8077408075332642, "learning_rate": 1.9115077167994812e-05, "loss": 0.5271, "step": 14357 }, { "epoch": 2.3438227011142403, "grad_norm": 1.6737130880355835, "learning_rate": 1.9114946649914467e-05, "loss": 0.519, "step": 14358 }, { "epoch": 2.3439859597567447, "grad_norm": 1.6862773895263672, "learning_rate": 1.9114816122655378e-05, "loss": 0.4691, "step": 14359 }, { "epoch": 2.344149218399249, "grad_norm": 1.6092745065689087, "learning_rate": 1.9114685586217666e-05, "loss": 0.5611, "step": 14360 }, { "epoch": 2.344312477041753, "grad_norm": 1.782630205154419, "learning_rate": 1.911455504060147e-05, "loss": 0.5303, "step": 14361 }, { "epoch": 2.3444757356842576, "grad_norm": 1.6904139518737793, "learning_rate": 1.911442448580692e-05, "loss": 0.4815, "step": 14362 }, { "epoch": 2.344638994326762, "grad_norm": 2.0363881587982178, "learning_rate": 1.9114293921834144e-05, "loss": 0.4933, "step": 14363 }, { "epoch": 2.3448022529692665, "grad_norm": 1.9386905431747437, "learning_rate": 1.9114163348683277e-05, "loss": 0.6176, "step": 14364 }, { "epoch": 2.344965511611771, "grad_norm": 1.7429319620132446, "learning_rate": 1.9114032766354453e-05, "loss": 0.5071, "step": 14365 }, { "epoch": 2.3451287702542754, "grad_norm": 1.5838344097137451, "learning_rate": 1.91139021748478e-05, "loss": 0.5488, "step": 14366 }, { "epoch": 2.34529202889678, "grad_norm": 2.0674376487731934, "learning_rate": 1.911377157416345e-05, "loss": 0.6119, "step": 14367 }, { "epoch": 2.3454552875392842, "grad_norm": 1.8318235874176025, "learning_rate": 1.911364096430153e-05, "loss": 0.5395, "step": 14368 }, { "epoch": 2.3456185461817887, "grad_norm": 1.494236707687378, "learning_rate": 1.9113510345262183e-05, "loss": 0.4589, "step": 14369 }, { "epoch": 2.345781804824293, "grad_norm": 1.6821712255477905, "learning_rate": 1.9113379717045528e-05, "loss": 0.5136, "step": 14370 }, { "epoch": 2.345945063466797, "grad_norm": 1.8862628936767578, "learning_rate": 1.9113249079651705e-05, "loss": 0.5677, "step": 14371 }, { "epoch": 2.3461083221093015, "grad_norm": 1.7608035802841187, "learning_rate": 1.911311843308084e-05, "loss": 0.5561, "step": 14372 }, { "epoch": 2.346271580751806, "grad_norm": 1.6609976291656494, "learning_rate": 1.911298777733307e-05, "loss": 0.5127, "step": 14373 }, { "epoch": 2.3464348393943104, "grad_norm": 1.4505976438522339, "learning_rate": 1.911285711240852e-05, "loss": 0.4316, "step": 14374 }, { "epoch": 2.346598098036815, "grad_norm": 1.6782935857772827, "learning_rate": 1.9112726438307328e-05, "loss": 0.5032, "step": 14375 }, { "epoch": 2.3467613566793193, "grad_norm": 1.8453587293624878, "learning_rate": 1.9112595755029625e-05, "loss": 0.5802, "step": 14376 }, { "epoch": 2.3469246153218237, "grad_norm": 1.7005547285079956, "learning_rate": 1.9112465062575536e-05, "loss": 0.6038, "step": 14377 }, { "epoch": 2.347087873964328, "grad_norm": 2.008389472961426, "learning_rate": 1.91123343609452e-05, "loss": 0.6202, "step": 14378 }, { "epoch": 2.347251132606832, "grad_norm": 1.6299771070480347, "learning_rate": 1.9112203650138744e-05, "loss": 0.5347, "step": 14379 }, { "epoch": 2.3474143912493366, "grad_norm": 1.9562451839447021, "learning_rate": 1.91120729301563e-05, "loss": 0.5753, "step": 14380 }, { "epoch": 2.347577649891841, "grad_norm": 1.6553243398666382, "learning_rate": 1.9111942200998005e-05, "loss": 0.5225, "step": 14381 }, { "epoch": 2.3477409085343455, "grad_norm": 1.9632201194763184, "learning_rate": 1.9111811462663987e-05, "loss": 0.7094, "step": 14382 }, { "epoch": 2.34790416717685, "grad_norm": 1.9656965732574463, "learning_rate": 1.9111680715154373e-05, "loss": 0.6056, "step": 14383 }, { "epoch": 2.3480674258193543, "grad_norm": 1.7770682573318481, "learning_rate": 1.9111549958469302e-05, "loss": 0.4968, "step": 14384 }, { "epoch": 2.348230684461859, "grad_norm": 1.6981698274612427, "learning_rate": 1.91114191926089e-05, "loss": 0.5233, "step": 14385 }, { "epoch": 2.3483939431043632, "grad_norm": 2.042553186416626, "learning_rate": 1.9111288417573302e-05, "loss": 0.6057, "step": 14386 }, { "epoch": 2.3485572017468677, "grad_norm": 1.9041385650634766, "learning_rate": 1.9111157633362642e-05, "loss": 0.6243, "step": 14387 }, { "epoch": 2.348720460389372, "grad_norm": 1.7111514806747437, "learning_rate": 1.9111026839977046e-05, "loss": 0.5369, "step": 14388 }, { "epoch": 2.348883719031876, "grad_norm": 1.933585286140442, "learning_rate": 1.911089603741665e-05, "loss": 0.6223, "step": 14389 }, { "epoch": 2.3490469776743805, "grad_norm": 1.7815806865692139, "learning_rate": 1.9110765225681582e-05, "loss": 0.5755, "step": 14390 }, { "epoch": 2.349210236316885, "grad_norm": 1.6987594366073608, "learning_rate": 1.9110634404771976e-05, "loss": 0.5437, "step": 14391 }, { "epoch": 2.3493734949593894, "grad_norm": 1.580695390701294, "learning_rate": 1.9110503574687963e-05, "loss": 0.5305, "step": 14392 }, { "epoch": 2.349536753601894, "grad_norm": 1.701859712600708, "learning_rate": 1.9110372735429678e-05, "loss": 0.5509, "step": 14393 }, { "epoch": 2.3497000122443983, "grad_norm": 1.5803920030593872, "learning_rate": 1.911024188699725e-05, "loss": 0.5826, "step": 14394 }, { "epoch": 2.3498632708869027, "grad_norm": 1.7201087474822998, "learning_rate": 1.911011102939081e-05, "loss": 0.5015, "step": 14395 }, { "epoch": 2.3500265295294067, "grad_norm": 1.5218333005905151, "learning_rate": 1.910998016261049e-05, "loss": 0.5102, "step": 14396 }, { "epoch": 2.350189788171911, "grad_norm": 1.6910895109176636, "learning_rate": 1.9109849286656422e-05, "loss": 0.5927, "step": 14397 }, { "epoch": 2.3503530468144156, "grad_norm": 1.8621644973754883, "learning_rate": 1.9109718401528742e-05, "loss": 0.6189, "step": 14398 }, { "epoch": 2.35051630545692, "grad_norm": 1.8819994926452637, "learning_rate": 1.9109587507227573e-05, "loss": 0.5461, "step": 14399 }, { "epoch": 2.3506795640994245, "grad_norm": 1.9560438394546509, "learning_rate": 1.910945660375305e-05, "loss": 0.6483, "step": 14400 }, { "epoch": 2.350842822741929, "grad_norm": 1.5872201919555664, "learning_rate": 1.9109325691105314e-05, "loss": 0.5485, "step": 14401 }, { "epoch": 2.3510060813844333, "grad_norm": 1.6689941883087158, "learning_rate": 1.9109194769284484e-05, "loss": 0.5875, "step": 14402 }, { "epoch": 2.351169340026938, "grad_norm": 1.8009836673736572, "learning_rate": 1.9109063838290702e-05, "loss": 0.651, "step": 14403 }, { "epoch": 2.351332598669442, "grad_norm": 1.8500860929489136, "learning_rate": 1.910893289812409e-05, "loss": 0.6987, "step": 14404 }, { "epoch": 2.3514958573119467, "grad_norm": 1.9255222082138062, "learning_rate": 1.9108801948784788e-05, "loss": 0.5873, "step": 14405 }, { "epoch": 2.3516591159544507, "grad_norm": 1.519940733909607, "learning_rate": 1.9108670990272924e-05, "loss": 0.5779, "step": 14406 }, { "epoch": 2.351822374596955, "grad_norm": 1.5802947282791138, "learning_rate": 1.9108540022588632e-05, "loss": 0.494, "step": 14407 }, { "epoch": 2.3519856332394595, "grad_norm": 2.043674945831299, "learning_rate": 1.910840904573204e-05, "loss": 0.5983, "step": 14408 }, { "epoch": 2.352148891881964, "grad_norm": 1.8385940790176392, "learning_rate": 1.910827805970328e-05, "loss": 0.6363, "step": 14409 }, { "epoch": 2.3523121505244684, "grad_norm": 1.7474274635314941, "learning_rate": 1.9108147064502494e-05, "loss": 0.6258, "step": 14410 }, { "epoch": 2.352475409166973, "grad_norm": 1.6245766878128052, "learning_rate": 1.9108016060129803e-05, "loss": 0.5469, "step": 14411 }, { "epoch": 2.3526386678094773, "grad_norm": 1.6365611553192139, "learning_rate": 1.910788504658534e-05, "loss": 0.5482, "step": 14412 }, { "epoch": 2.3528019264519817, "grad_norm": 1.7441145181655884, "learning_rate": 1.910775402386924e-05, "loss": 0.5945, "step": 14413 }, { "epoch": 2.3529651850944857, "grad_norm": 1.8089488744735718, "learning_rate": 1.9107622991981637e-05, "loss": 0.5811, "step": 14414 }, { "epoch": 2.35312844373699, "grad_norm": 1.6562858819961548, "learning_rate": 1.9107491950922658e-05, "loss": 0.534, "step": 14415 }, { "epoch": 2.3532917023794946, "grad_norm": 1.5324574708938599, "learning_rate": 1.9107360900692437e-05, "loss": 0.4788, "step": 14416 }, { "epoch": 2.353454961021999, "grad_norm": 1.6852049827575684, "learning_rate": 1.9107229841291106e-05, "loss": 0.5934, "step": 14417 }, { "epoch": 2.3536182196645035, "grad_norm": 1.945752501487732, "learning_rate": 1.9107098772718796e-05, "loss": 0.6156, "step": 14418 }, { "epoch": 2.353781478307008, "grad_norm": 1.561339259147644, "learning_rate": 1.910696769497564e-05, "loss": 0.5102, "step": 14419 }, { "epoch": 2.3539447369495123, "grad_norm": 1.7985044717788696, "learning_rate": 1.910683660806177e-05, "loss": 0.6483, "step": 14420 }, { "epoch": 2.3541079955920168, "grad_norm": 1.8315913677215576, "learning_rate": 1.910670551197732e-05, "loss": 0.6206, "step": 14421 }, { "epoch": 2.354271254234521, "grad_norm": 1.9382907152175903, "learning_rate": 1.910657440672242e-05, "loss": 0.575, "step": 14422 }, { "epoch": 2.3544345128770257, "grad_norm": 1.43495512008667, "learning_rate": 1.91064432922972e-05, "loss": 0.5026, "step": 14423 }, { "epoch": 2.3545977715195296, "grad_norm": 2.062392473220825, "learning_rate": 1.9106312168701797e-05, "loss": 0.6579, "step": 14424 }, { "epoch": 2.354761030162034, "grad_norm": 1.9551305770874023, "learning_rate": 1.9106181035936337e-05, "loss": 0.6234, "step": 14425 }, { "epoch": 2.3549242888045385, "grad_norm": 1.7322885990142822, "learning_rate": 1.9106049894000955e-05, "loss": 0.5475, "step": 14426 }, { "epoch": 2.355087547447043, "grad_norm": 1.791019320487976, "learning_rate": 1.9105918742895785e-05, "loss": 0.6421, "step": 14427 }, { "epoch": 2.3552508060895474, "grad_norm": 1.533620834350586, "learning_rate": 1.9105787582620957e-05, "loss": 0.5514, "step": 14428 }, { "epoch": 2.355414064732052, "grad_norm": 1.7709577083587646, "learning_rate": 1.9105656413176602e-05, "loss": 0.5494, "step": 14429 }, { "epoch": 2.3555773233745563, "grad_norm": 1.6765358448028564, "learning_rate": 1.9105525234562858e-05, "loss": 0.5618, "step": 14430 }, { "epoch": 2.3557405820170607, "grad_norm": 1.8906395435333252, "learning_rate": 1.9105394046779846e-05, "loss": 0.5855, "step": 14431 }, { "epoch": 2.3559038406595647, "grad_norm": 1.9154672622680664, "learning_rate": 1.910526284982771e-05, "loss": 0.6469, "step": 14432 }, { "epoch": 2.356067099302069, "grad_norm": 2.0492875576019287, "learning_rate": 1.910513164370657e-05, "loss": 0.6619, "step": 14433 }, { "epoch": 2.3562303579445736, "grad_norm": 2.400404930114746, "learning_rate": 1.910500042841657e-05, "loss": 0.7213, "step": 14434 }, { "epoch": 2.356393616587078, "grad_norm": 2.19319748878479, "learning_rate": 1.9104869203957835e-05, "loss": 0.7499, "step": 14435 }, { "epoch": 2.3565568752295825, "grad_norm": 1.7461258172988892, "learning_rate": 1.91047379703305e-05, "loss": 0.5462, "step": 14436 }, { "epoch": 2.356720133872087, "grad_norm": 1.8114628791809082, "learning_rate": 1.9104606727534698e-05, "loss": 0.5827, "step": 14437 }, { "epoch": 2.3568833925145913, "grad_norm": 2.1949691772460938, "learning_rate": 1.910447547557056e-05, "loss": 0.6456, "step": 14438 }, { "epoch": 2.3570466511570958, "grad_norm": 1.6933521032333374, "learning_rate": 1.910434421443821e-05, "loss": 0.5158, "step": 14439 }, { "epoch": 2.3572099097996, "grad_norm": 1.886391043663025, "learning_rate": 1.9104212944137796e-05, "loss": 0.5743, "step": 14440 }, { "epoch": 2.3573731684421046, "grad_norm": 1.5729193687438965, "learning_rate": 1.9104081664669437e-05, "loss": 0.51, "step": 14441 }, { "epoch": 2.3575364270846086, "grad_norm": 1.7811365127563477, "learning_rate": 1.9103950376033276e-05, "loss": 0.5074, "step": 14442 }, { "epoch": 2.357699685727113, "grad_norm": 1.8287787437438965, "learning_rate": 1.9103819078229432e-05, "loss": 0.6224, "step": 14443 }, { "epoch": 2.3578629443696175, "grad_norm": 1.912573218345642, "learning_rate": 1.910368777125805e-05, "loss": 0.5924, "step": 14444 }, { "epoch": 2.358026203012122, "grad_norm": 1.4859349727630615, "learning_rate": 1.9103556455119253e-05, "loss": 0.4939, "step": 14445 }, { "epoch": 2.3581894616546264, "grad_norm": 1.7044672966003418, "learning_rate": 1.910342512981318e-05, "loss": 0.5543, "step": 14446 }, { "epoch": 2.358352720297131, "grad_norm": 1.663388729095459, "learning_rate": 1.910329379533996e-05, "loss": 0.5162, "step": 14447 }, { "epoch": 2.3585159789396353, "grad_norm": 1.9986932277679443, "learning_rate": 1.9103162451699726e-05, "loss": 0.6826, "step": 14448 }, { "epoch": 2.3586792375821393, "grad_norm": 1.6204969882965088, "learning_rate": 1.910303109889261e-05, "loss": 0.5178, "step": 14449 }, { "epoch": 2.3588424962246437, "grad_norm": 1.6501067876815796, "learning_rate": 1.9102899736918742e-05, "loss": 0.5335, "step": 14450 }, { "epoch": 2.359005754867148, "grad_norm": 1.5919413566589355, "learning_rate": 1.9102768365778258e-05, "loss": 0.5026, "step": 14451 }, { "epoch": 2.3591690135096526, "grad_norm": 1.7240034341812134, "learning_rate": 1.9102636985471288e-05, "loss": 0.5139, "step": 14452 }, { "epoch": 2.359332272152157, "grad_norm": 1.749560832977295, "learning_rate": 1.9102505595997965e-05, "loss": 0.6168, "step": 14453 }, { "epoch": 2.3594955307946615, "grad_norm": 1.7278269529342651, "learning_rate": 1.9102374197358423e-05, "loss": 0.5518, "step": 14454 }, { "epoch": 2.359658789437166, "grad_norm": 2.265058994293213, "learning_rate": 1.910224278955279e-05, "loss": 0.736, "step": 14455 }, { "epoch": 2.3598220480796703, "grad_norm": 1.7528774738311768, "learning_rate": 1.91021113725812e-05, "loss": 0.5364, "step": 14456 }, { "epoch": 2.3599853067221748, "grad_norm": 1.5723092555999756, "learning_rate": 1.9101979946443787e-05, "loss": 0.5318, "step": 14457 }, { "epoch": 2.360148565364679, "grad_norm": 1.6260379552841187, "learning_rate": 1.9101848511140682e-05, "loss": 0.52, "step": 14458 }, { "epoch": 2.360311824007183, "grad_norm": 2.1655828952789307, "learning_rate": 1.9101717066672024e-05, "loss": 0.6565, "step": 14459 }, { "epoch": 2.3604750826496876, "grad_norm": 1.7303217649459839, "learning_rate": 1.9101585613037933e-05, "loss": 0.6312, "step": 14460 }, { "epoch": 2.360638341292192, "grad_norm": 2.1012842655181885, "learning_rate": 1.910145415023855e-05, "loss": 0.6315, "step": 14461 }, { "epoch": 2.3608015999346965, "grad_norm": 1.7669142484664917, "learning_rate": 1.9101322678274002e-05, "loss": 0.5596, "step": 14462 }, { "epoch": 2.360964858577201, "grad_norm": 1.7769362926483154, "learning_rate": 1.910119119714443e-05, "loss": 0.5343, "step": 14463 }, { "epoch": 2.3611281172197054, "grad_norm": 1.7003995180130005, "learning_rate": 1.9101059706849957e-05, "loss": 0.5863, "step": 14464 }, { "epoch": 2.36129137586221, "grad_norm": 1.8442423343658447, "learning_rate": 1.910092820739072e-05, "loss": 0.5672, "step": 14465 }, { "epoch": 2.3614546345047143, "grad_norm": 1.6677004098892212, "learning_rate": 1.9100796698766854e-05, "loss": 0.5233, "step": 14466 }, { "epoch": 2.3616178931472183, "grad_norm": 1.6248165369033813, "learning_rate": 1.9100665180978485e-05, "loss": 0.4656, "step": 14467 }, { "epoch": 2.3617811517897227, "grad_norm": 1.4707319736480713, "learning_rate": 1.9100533654025748e-05, "loss": 0.4492, "step": 14468 }, { "epoch": 2.361944410432227, "grad_norm": 1.9002872705459595, "learning_rate": 1.9100402117908775e-05, "loss": 0.5751, "step": 14469 }, { "epoch": 2.3621076690747316, "grad_norm": 1.5169641971588135, "learning_rate": 1.9100270572627705e-05, "loss": 0.535, "step": 14470 }, { "epoch": 2.362270927717236, "grad_norm": 2.03393292427063, "learning_rate": 1.910013901818266e-05, "loss": 0.6056, "step": 14471 }, { "epoch": 2.3624341863597405, "grad_norm": 2.0755615234375, "learning_rate": 1.910000745457378e-05, "loss": 0.6416, "step": 14472 }, { "epoch": 2.362597445002245, "grad_norm": 1.8453636169433594, "learning_rate": 1.909987588180119e-05, "loss": 0.5917, "step": 14473 }, { "epoch": 2.3627607036447493, "grad_norm": 1.7990072965621948, "learning_rate": 1.909974429986503e-05, "loss": 0.584, "step": 14474 }, { "epoch": 2.3629239622872538, "grad_norm": 1.7349976301193237, "learning_rate": 1.9099612708765432e-05, "loss": 0.5147, "step": 14475 }, { "epoch": 2.363087220929758, "grad_norm": 2.2915091514587402, "learning_rate": 1.9099481108502526e-05, "loss": 0.9908, "step": 14476 }, { "epoch": 2.363250479572262, "grad_norm": 1.7935649156570435, "learning_rate": 1.9099349499076443e-05, "loss": 0.5623, "step": 14477 }, { "epoch": 2.3634137382147666, "grad_norm": 2.100945472717285, "learning_rate": 1.9099217880487318e-05, "loss": 0.7619, "step": 14478 }, { "epoch": 2.363576996857271, "grad_norm": 1.7584258317947388, "learning_rate": 1.9099086252735285e-05, "loss": 0.496, "step": 14479 }, { "epoch": 2.3637402554997755, "grad_norm": 1.7087547779083252, "learning_rate": 1.9098954615820477e-05, "loss": 0.4775, "step": 14480 }, { "epoch": 2.36390351414228, "grad_norm": 2.043039321899414, "learning_rate": 1.9098822969743018e-05, "loss": 0.6354, "step": 14481 }, { "epoch": 2.3640667727847844, "grad_norm": 1.8453885316848755, "learning_rate": 1.909869131450305e-05, "loss": 0.6038, "step": 14482 }, { "epoch": 2.364230031427289, "grad_norm": 1.8179603815078735, "learning_rate": 1.9098559650100702e-05, "loss": 0.5857, "step": 14483 }, { "epoch": 2.3643932900697933, "grad_norm": 1.8247332572937012, "learning_rate": 1.9098427976536103e-05, "loss": 0.6672, "step": 14484 }, { "epoch": 2.3645565487122973, "grad_norm": 1.4729063510894775, "learning_rate": 1.9098296293809396e-05, "loss": 0.4669, "step": 14485 }, { "epoch": 2.3647198073548017, "grad_norm": 1.6245359182357788, "learning_rate": 1.9098164601920702e-05, "loss": 0.5561, "step": 14486 }, { "epoch": 2.364883065997306, "grad_norm": 1.9141895771026611, "learning_rate": 1.909803290087016e-05, "loss": 0.6332, "step": 14487 }, { "epoch": 2.3650463246398106, "grad_norm": 1.5456106662750244, "learning_rate": 1.9097901190657902e-05, "loss": 0.5045, "step": 14488 }, { "epoch": 2.365209583282315, "grad_norm": 1.813405990600586, "learning_rate": 1.909776947128406e-05, "loss": 0.529, "step": 14489 }, { "epoch": 2.3653728419248194, "grad_norm": 1.9589474201202393, "learning_rate": 1.9097637742748768e-05, "loss": 0.663, "step": 14490 }, { "epoch": 2.365536100567324, "grad_norm": 1.8977588415145874, "learning_rate": 1.9097506005052153e-05, "loss": 0.5374, "step": 14491 }, { "epoch": 2.3656993592098283, "grad_norm": 1.9282033443450928, "learning_rate": 1.9097374258194355e-05, "loss": 0.665, "step": 14492 }, { "epoch": 2.3658626178523328, "grad_norm": 1.8801733255386353, "learning_rate": 1.9097242502175503e-05, "loss": 0.512, "step": 14493 }, { "epoch": 2.3660258764948368, "grad_norm": 1.8180233240127563, "learning_rate": 1.909711073699573e-05, "loss": 0.6267, "step": 14494 }, { "epoch": 2.366189135137341, "grad_norm": 1.6442453861236572, "learning_rate": 1.9096978962655167e-05, "loss": 0.5371, "step": 14495 }, { "epoch": 2.3663523937798456, "grad_norm": 2.0334718227386475, "learning_rate": 1.909684717915395e-05, "loss": 0.5985, "step": 14496 }, { "epoch": 2.36651565242235, "grad_norm": 1.63556706905365, "learning_rate": 1.9096715386492214e-05, "loss": 0.5117, "step": 14497 }, { "epoch": 2.3666789110648545, "grad_norm": 2.0091745853424072, "learning_rate": 1.9096583584670082e-05, "loss": 0.6487, "step": 14498 }, { "epoch": 2.366842169707359, "grad_norm": 1.5969957113265991, "learning_rate": 1.9096451773687695e-05, "loss": 0.5736, "step": 14499 }, { "epoch": 2.3670054283498634, "grad_norm": 1.6788409948349, "learning_rate": 1.9096319953545186e-05, "loss": 0.6032, "step": 14500 }, { "epoch": 2.367168686992368, "grad_norm": 1.6106635332107544, "learning_rate": 1.9096188124242683e-05, "loss": 0.5147, "step": 14501 }, { "epoch": 2.367331945634872, "grad_norm": 1.7689419984817505, "learning_rate": 1.909605628578032e-05, "loss": 0.6423, "step": 14502 }, { "epoch": 2.3674952042773763, "grad_norm": 1.7424798011779785, "learning_rate": 1.9095924438158235e-05, "loss": 0.539, "step": 14503 }, { "epoch": 2.3676584629198807, "grad_norm": 1.6533763408660889, "learning_rate": 1.909579258137655e-05, "loss": 0.5102, "step": 14504 }, { "epoch": 2.367821721562385, "grad_norm": 1.7636202573776245, "learning_rate": 1.909566071543541e-05, "loss": 0.6133, "step": 14505 }, { "epoch": 2.3679849802048896, "grad_norm": 1.7142084836959839, "learning_rate": 1.909552884033494e-05, "loss": 0.587, "step": 14506 }, { "epoch": 2.368148238847394, "grad_norm": 1.8404573202133179, "learning_rate": 1.9095396956075276e-05, "loss": 0.4405, "step": 14507 }, { "epoch": 2.3683114974898984, "grad_norm": 1.5434514284133911, "learning_rate": 1.9095265062656546e-05, "loss": 0.5483, "step": 14508 }, { "epoch": 2.368474756132403, "grad_norm": 2.1177189350128174, "learning_rate": 1.909513316007889e-05, "loss": 0.6488, "step": 14509 }, { "epoch": 2.3686380147749073, "grad_norm": 1.8193331956863403, "learning_rate": 1.9095001248342436e-05, "loss": 0.6492, "step": 14510 }, { "epoch": 2.3688012734174118, "grad_norm": 1.6815204620361328, "learning_rate": 1.9094869327447316e-05, "loss": 0.4814, "step": 14511 }, { "epoch": 2.3689645320599158, "grad_norm": 1.64766263961792, "learning_rate": 1.909473739739367e-05, "loss": 0.5084, "step": 14512 }, { "epoch": 2.36912779070242, "grad_norm": 2.028972625732422, "learning_rate": 1.909460545818162e-05, "loss": 0.5884, "step": 14513 }, { "epoch": 2.3692910493449246, "grad_norm": 1.8911207914352417, "learning_rate": 1.909447350981131e-05, "loss": 0.5427, "step": 14514 }, { "epoch": 2.369454307987429, "grad_norm": 2.079075813293457, "learning_rate": 1.9094341552282866e-05, "loss": 0.6702, "step": 14515 }, { "epoch": 2.3696175666299335, "grad_norm": 1.9665350914001465, "learning_rate": 1.909420958559642e-05, "loss": 0.702, "step": 14516 }, { "epoch": 2.369780825272438, "grad_norm": 2.0632164478302, "learning_rate": 1.9094077609752108e-05, "loss": 0.6227, "step": 14517 }, { "epoch": 2.3699440839149424, "grad_norm": 1.4412235021591187, "learning_rate": 1.9093945624750065e-05, "loss": 0.4507, "step": 14518 }, { "epoch": 2.370107342557447, "grad_norm": 1.6485024690628052, "learning_rate": 1.9093813630590417e-05, "loss": 0.4806, "step": 14519 }, { "epoch": 2.370270601199951, "grad_norm": 1.6636178493499756, "learning_rate": 1.9093681627273306e-05, "loss": 0.5798, "step": 14520 }, { "epoch": 2.3704338598424552, "grad_norm": 1.6252580881118774, "learning_rate": 1.9093549614798858e-05, "loss": 0.5549, "step": 14521 }, { "epoch": 2.3705971184849597, "grad_norm": 1.8036388158798218, "learning_rate": 1.9093417593167207e-05, "loss": 0.5645, "step": 14522 }, { "epoch": 2.370760377127464, "grad_norm": 1.6162010431289673, "learning_rate": 1.9093285562378487e-05, "loss": 0.5239, "step": 14523 }, { "epoch": 2.3709236357699686, "grad_norm": 2.0794332027435303, "learning_rate": 1.9093153522432832e-05, "loss": 0.5933, "step": 14524 }, { "epoch": 2.371086894412473, "grad_norm": 1.7037785053253174, "learning_rate": 1.9093021473330372e-05, "loss": 0.5221, "step": 14525 }, { "epoch": 2.3712501530549774, "grad_norm": 1.7603610754013062, "learning_rate": 1.9092889415071245e-05, "loss": 0.5339, "step": 14526 }, { "epoch": 2.371413411697482, "grad_norm": 1.7453241348266602, "learning_rate": 1.909275734765558e-05, "loss": 0.6, "step": 14527 }, { "epoch": 2.3715766703399863, "grad_norm": 1.862191081047058, "learning_rate": 1.909262527108351e-05, "loss": 0.6564, "step": 14528 }, { "epoch": 2.3717399289824908, "grad_norm": 1.6965841054916382, "learning_rate": 1.9092493185355168e-05, "loss": 0.6068, "step": 14529 }, { "epoch": 2.3719031876249947, "grad_norm": 1.6654671430587769, "learning_rate": 1.9092361090470688e-05, "loss": 0.6282, "step": 14530 }, { "epoch": 2.372066446267499, "grad_norm": 1.637475609779358, "learning_rate": 1.9092228986430203e-05, "loss": 0.5804, "step": 14531 }, { "epoch": 2.3722297049100036, "grad_norm": 1.7522648572921753, "learning_rate": 1.9092096873233847e-05, "loss": 0.5775, "step": 14532 }, { "epoch": 2.372392963552508, "grad_norm": 1.7098793983459473, "learning_rate": 1.909196475088175e-05, "loss": 0.5717, "step": 14533 }, { "epoch": 2.3725562221950125, "grad_norm": 1.4197559356689453, "learning_rate": 1.9091832619374045e-05, "loss": 0.5261, "step": 14534 }, { "epoch": 2.372719480837517, "grad_norm": 1.7026376724243164, "learning_rate": 1.909170047871087e-05, "loss": 0.5689, "step": 14535 }, { "epoch": 2.3728827394800214, "grad_norm": 2.0012881755828857, "learning_rate": 1.9091568328892354e-05, "loss": 0.5652, "step": 14536 }, { "epoch": 2.3730459981225254, "grad_norm": 1.7350362539291382, "learning_rate": 1.9091436169918634e-05, "loss": 0.5033, "step": 14537 }, { "epoch": 2.37320925676503, "grad_norm": 1.8039628267288208, "learning_rate": 1.9091304001789837e-05, "loss": 0.5297, "step": 14538 }, { "epoch": 2.3733725154075342, "grad_norm": 1.4128865003585815, "learning_rate": 1.90911718245061e-05, "loss": 0.5091, "step": 14539 }, { "epoch": 2.3735357740500387, "grad_norm": 1.803348183631897, "learning_rate": 1.9091039638067555e-05, "loss": 0.5167, "step": 14540 }, { "epoch": 2.373699032692543, "grad_norm": 1.8317461013793945, "learning_rate": 1.9090907442474334e-05, "loss": 0.4822, "step": 14541 }, { "epoch": 2.3738622913350476, "grad_norm": 1.6216344833374023, "learning_rate": 1.9090775237726575e-05, "loss": 0.465, "step": 14542 }, { "epoch": 2.374025549977552, "grad_norm": 1.6102354526519775, "learning_rate": 1.909064302382441e-05, "loss": 0.5395, "step": 14543 }, { "epoch": 2.3741888086200564, "grad_norm": 1.7881611585617065, "learning_rate": 1.9090510800767964e-05, "loss": 0.5165, "step": 14544 }, { "epoch": 2.374352067262561, "grad_norm": 1.8531694412231445, "learning_rate": 1.9090378568557377e-05, "loss": 0.5406, "step": 14545 }, { "epoch": 2.3745153259050653, "grad_norm": 1.7482800483703613, "learning_rate": 1.9090246327192783e-05, "loss": 0.5487, "step": 14546 }, { "epoch": 2.3746785845475693, "grad_norm": 1.7140703201293945, "learning_rate": 1.909011407667431e-05, "loss": 0.5155, "step": 14547 }, { "epoch": 2.3748418431900737, "grad_norm": 1.9508895874023438, "learning_rate": 1.9089981817002102e-05, "loss": 0.6524, "step": 14548 }, { "epoch": 2.375005101832578, "grad_norm": 1.6928495168685913, "learning_rate": 1.9089849548176276e-05, "loss": 0.6571, "step": 14549 }, { "epoch": 2.3751683604750826, "grad_norm": 1.7684071063995361, "learning_rate": 1.9089717270196982e-05, "loss": 0.5536, "step": 14550 }, { "epoch": 2.375331619117587, "grad_norm": 1.759058952331543, "learning_rate": 1.908958498306434e-05, "loss": 0.5448, "step": 14551 }, { "epoch": 2.3754948777600915, "grad_norm": 1.5509284734725952, "learning_rate": 1.908945268677849e-05, "loss": 0.4936, "step": 14552 }, { "epoch": 2.375658136402596, "grad_norm": 1.928565502166748, "learning_rate": 1.908932038133956e-05, "loss": 0.612, "step": 14553 }, { "epoch": 2.3758213950451004, "grad_norm": 1.989676833152771, "learning_rate": 1.908918806674769e-05, "loss": 0.5859, "step": 14554 }, { "epoch": 2.3759846536876044, "grad_norm": 1.637134313583374, "learning_rate": 1.908905574300301e-05, "loss": 0.5531, "step": 14555 }, { "epoch": 2.376147912330109, "grad_norm": 1.8181407451629639, "learning_rate": 1.9088923410105654e-05, "loss": 0.6026, "step": 14556 }, { "epoch": 2.3763111709726132, "grad_norm": 1.7707496881484985, "learning_rate": 1.9088791068055755e-05, "loss": 0.5673, "step": 14557 }, { "epoch": 2.3764744296151177, "grad_norm": 1.8202983140945435, "learning_rate": 1.9088658716853444e-05, "loss": 0.5302, "step": 14558 }, { "epoch": 2.376637688257622, "grad_norm": 1.8425580263137817, "learning_rate": 1.9088526356498854e-05, "loss": 0.6265, "step": 14559 }, { "epoch": 2.3768009469001266, "grad_norm": 1.8676517009735107, "learning_rate": 1.9088393986992124e-05, "loss": 0.5433, "step": 14560 }, { "epoch": 2.376964205542631, "grad_norm": 1.8974515199661255, "learning_rate": 1.9088261608333382e-05, "loss": 0.5695, "step": 14561 }, { "epoch": 2.3771274641851354, "grad_norm": 1.7107239961624146, "learning_rate": 1.9088129220522765e-05, "loss": 0.5522, "step": 14562 }, { "epoch": 2.37729072282764, "grad_norm": 2.1135764122009277, "learning_rate": 1.9087996823560404e-05, "loss": 0.6722, "step": 14563 }, { "epoch": 2.3774539814701443, "grad_norm": 1.9616618156433105, "learning_rate": 1.9087864417446428e-05, "loss": 0.6052, "step": 14564 }, { "epoch": 2.3776172401126483, "grad_norm": 1.6884421110153198, "learning_rate": 1.9087732002180982e-05, "loss": 0.5562, "step": 14565 }, { "epoch": 2.3777804987551527, "grad_norm": 1.6799652576446533, "learning_rate": 1.9087599577764186e-05, "loss": 0.489, "step": 14566 }, { "epoch": 2.377943757397657, "grad_norm": 1.6229826211929321, "learning_rate": 1.9087467144196185e-05, "loss": 0.567, "step": 14567 }, { "epoch": 2.3781070160401616, "grad_norm": 1.6193634271621704, "learning_rate": 1.9087334701477104e-05, "loss": 0.5388, "step": 14568 }, { "epoch": 2.378270274682666, "grad_norm": 1.818680763244629, "learning_rate": 1.908720224960708e-05, "loss": 0.5694, "step": 14569 }, { "epoch": 2.3784335333251705, "grad_norm": 1.7210971117019653, "learning_rate": 1.9087069788586245e-05, "loss": 0.5807, "step": 14570 }, { "epoch": 2.378596791967675, "grad_norm": 2.1259872913360596, "learning_rate": 1.9086937318414735e-05, "loss": 0.7285, "step": 14571 }, { "epoch": 2.3787600506101794, "grad_norm": 1.6096590757369995, "learning_rate": 1.908680483909268e-05, "loss": 0.5941, "step": 14572 }, { "epoch": 2.3789233092526834, "grad_norm": 1.6167224645614624, "learning_rate": 1.9086672350620213e-05, "loss": 0.5888, "step": 14573 }, { "epoch": 2.379086567895188, "grad_norm": 1.708115577697754, "learning_rate": 1.908653985299747e-05, "loss": 0.5102, "step": 14574 }, { "epoch": 2.3792498265376922, "grad_norm": 2.078360080718994, "learning_rate": 1.908640734622459e-05, "loss": 0.7995, "step": 14575 }, { "epoch": 2.3794130851801967, "grad_norm": 1.5572773218154907, "learning_rate": 1.908627483030169e-05, "loss": 0.4823, "step": 14576 }, { "epoch": 2.379576343822701, "grad_norm": 2.022814989089966, "learning_rate": 1.908614230522892e-05, "loss": 0.6716, "step": 14577 }, { "epoch": 2.3797396024652056, "grad_norm": 1.779279112815857, "learning_rate": 1.9086009771006405e-05, "loss": 0.6936, "step": 14578 }, { "epoch": 2.37990286110771, "grad_norm": 1.8015373945236206, "learning_rate": 1.908587722763428e-05, "loss": 0.6473, "step": 14579 }, { "epoch": 2.3800661197502144, "grad_norm": 1.9395558834075928, "learning_rate": 1.9085744675112682e-05, "loss": 0.6401, "step": 14580 }, { "epoch": 2.380229378392719, "grad_norm": 1.5874874591827393, "learning_rate": 1.9085612113441742e-05, "loss": 0.5598, "step": 14581 }, { "epoch": 2.380392637035223, "grad_norm": 2.0969114303588867, "learning_rate": 1.9085479542621593e-05, "loss": 0.6516, "step": 14582 }, { "epoch": 2.3805558956777273, "grad_norm": 1.743098497390747, "learning_rate": 1.9085346962652366e-05, "loss": 0.6153, "step": 14583 }, { "epoch": 2.3807191543202317, "grad_norm": 1.717746615409851, "learning_rate": 1.9085214373534198e-05, "loss": 0.5523, "step": 14584 }, { "epoch": 2.380882412962736, "grad_norm": 1.7856336832046509, "learning_rate": 1.908508177526722e-05, "loss": 0.5338, "step": 14585 }, { "epoch": 2.3810456716052406, "grad_norm": 1.3245549201965332, "learning_rate": 1.9084949167851567e-05, "loss": 0.4829, "step": 14586 }, { "epoch": 2.381208930247745, "grad_norm": 1.897556185722351, "learning_rate": 1.9084816551287376e-05, "loss": 0.5764, "step": 14587 }, { "epoch": 2.3813721888902495, "grad_norm": 1.4993222951889038, "learning_rate": 1.9084683925574772e-05, "loss": 0.509, "step": 14588 }, { "epoch": 2.381535447532754, "grad_norm": 1.6357264518737793, "learning_rate": 1.90845512907139e-05, "loss": 0.5312, "step": 14589 }, { "epoch": 2.381698706175258, "grad_norm": 1.6717731952667236, "learning_rate": 1.9084418646704884e-05, "loss": 0.5115, "step": 14590 }, { "epoch": 2.3818619648177624, "grad_norm": 1.9612261056900024, "learning_rate": 1.908428599354786e-05, "loss": 0.5858, "step": 14591 }, { "epoch": 2.382025223460267, "grad_norm": 1.5940357446670532, "learning_rate": 1.908415333124296e-05, "loss": 0.5609, "step": 14592 }, { "epoch": 2.3821884821027712, "grad_norm": 1.9829597473144531, "learning_rate": 1.9084020659790328e-05, "loss": 0.5778, "step": 14593 }, { "epoch": 2.3823517407452757, "grad_norm": 1.8137401342391968, "learning_rate": 1.9083887979190084e-05, "loss": 0.6343, "step": 14594 }, { "epoch": 2.38251499938778, "grad_norm": 1.647303581237793, "learning_rate": 1.9083755289442368e-05, "loss": 0.5873, "step": 14595 }, { "epoch": 2.3826782580302845, "grad_norm": 1.4914467334747314, "learning_rate": 1.9083622590547313e-05, "loss": 0.4778, "step": 14596 }, { "epoch": 2.382841516672789, "grad_norm": 1.6948738098144531, "learning_rate": 1.9083489882505052e-05, "loss": 0.6132, "step": 14597 }, { "epoch": 2.3830047753152934, "grad_norm": 2.0593371391296387, "learning_rate": 1.908335716531572e-05, "loss": 0.6427, "step": 14598 }, { "epoch": 2.383168033957798, "grad_norm": 2.097874164581299, "learning_rate": 1.908322443897945e-05, "loss": 0.5341, "step": 14599 }, { "epoch": 2.383331292600302, "grad_norm": 1.9144996404647827, "learning_rate": 1.9083091703496373e-05, "loss": 0.5461, "step": 14600 }, { "epoch": 2.3834945512428063, "grad_norm": 2.0969812870025635, "learning_rate": 1.9082958958866628e-05, "loss": 0.5864, "step": 14601 }, { "epoch": 2.3836578098853107, "grad_norm": 1.8062586784362793, "learning_rate": 1.9082826205090343e-05, "loss": 0.6145, "step": 14602 }, { "epoch": 2.383821068527815, "grad_norm": 1.794722080230713, "learning_rate": 1.9082693442167658e-05, "loss": 0.5225, "step": 14603 }, { "epoch": 2.3839843271703196, "grad_norm": 1.8394451141357422, "learning_rate": 1.90825606700987e-05, "loss": 0.5762, "step": 14604 }, { "epoch": 2.384147585812824, "grad_norm": 1.8057453632354736, "learning_rate": 1.9082427888883604e-05, "loss": 0.6581, "step": 14605 }, { "epoch": 2.3843108444553285, "grad_norm": 1.522560715675354, "learning_rate": 1.9082295098522513e-05, "loss": 0.4934, "step": 14606 }, { "epoch": 2.384474103097833, "grad_norm": 1.5829105377197266, "learning_rate": 1.9082162299015547e-05, "loss": 0.503, "step": 14607 }, { "epoch": 2.384637361740337, "grad_norm": 2.348994731903076, "learning_rate": 1.9082029490362844e-05, "loss": 0.6501, "step": 14608 }, { "epoch": 2.3848006203828414, "grad_norm": 1.94192636013031, "learning_rate": 1.9081896672564547e-05, "loss": 0.5549, "step": 14609 }, { "epoch": 2.384963879025346, "grad_norm": 1.696832537651062, "learning_rate": 1.9081763845620777e-05, "loss": 0.517, "step": 14610 }, { "epoch": 2.3851271376678502, "grad_norm": 1.5302447080612183, "learning_rate": 1.9081631009531677e-05, "loss": 0.5225, "step": 14611 }, { "epoch": 2.3852903963103547, "grad_norm": 1.8243528604507446, "learning_rate": 1.9081498164297373e-05, "loss": 0.6125, "step": 14612 }, { "epoch": 2.385453654952859, "grad_norm": 1.79802668094635, "learning_rate": 1.9081365309918006e-05, "loss": 0.5831, "step": 14613 }, { "epoch": 2.3856169135953635, "grad_norm": 1.8712985515594482, "learning_rate": 1.9081232446393706e-05, "loss": 0.5618, "step": 14614 }, { "epoch": 2.385780172237868, "grad_norm": 1.8246898651123047, "learning_rate": 1.9081099573724607e-05, "loss": 0.5415, "step": 14615 }, { "epoch": 2.3859434308803724, "grad_norm": 1.6117380857467651, "learning_rate": 1.908096669191084e-05, "loss": 0.4432, "step": 14616 }, { "epoch": 2.386106689522877, "grad_norm": 1.6681281328201294, "learning_rate": 1.9080833800952545e-05, "loss": 0.5841, "step": 14617 }, { "epoch": 2.386269948165381, "grad_norm": 1.7787386178970337, "learning_rate": 1.9080700900849855e-05, "loss": 0.6334, "step": 14618 }, { "epoch": 2.3864332068078853, "grad_norm": 2.475806713104248, "learning_rate": 1.90805679916029e-05, "loss": 0.8241, "step": 14619 }, { "epoch": 2.3865964654503897, "grad_norm": 1.8686087131500244, "learning_rate": 1.9080435073211812e-05, "loss": 0.5138, "step": 14620 }, { "epoch": 2.386759724092894, "grad_norm": 2.107147216796875, "learning_rate": 1.908030214567673e-05, "loss": 0.7122, "step": 14621 }, { "epoch": 2.3869229827353986, "grad_norm": 1.6094859838485718, "learning_rate": 1.9080169208997786e-05, "loss": 0.4895, "step": 14622 }, { "epoch": 2.387086241377903, "grad_norm": 1.7905776500701904, "learning_rate": 1.9080036263175118e-05, "loss": 0.6505, "step": 14623 }, { "epoch": 2.3872495000204075, "grad_norm": 1.4778149127960205, "learning_rate": 1.907990330820885e-05, "loss": 0.5193, "step": 14624 }, { "epoch": 2.3874127586629115, "grad_norm": 1.8297946453094482, "learning_rate": 1.9079770344099126e-05, "loss": 0.5953, "step": 14625 }, { "epoch": 2.387576017305416, "grad_norm": 1.744399070739746, "learning_rate": 1.9079637370846075e-05, "loss": 0.6365, "step": 14626 }, { "epoch": 2.3877392759479203, "grad_norm": 1.756588101387024, "learning_rate": 1.907950438844983e-05, "loss": 0.4671, "step": 14627 }, { "epoch": 2.387902534590425, "grad_norm": 1.6498795747756958, "learning_rate": 1.9079371396910528e-05, "loss": 0.5224, "step": 14628 }, { "epoch": 2.3880657932329292, "grad_norm": 1.7140398025512695, "learning_rate": 1.90792383962283e-05, "loss": 0.5012, "step": 14629 }, { "epoch": 2.3882290518754337, "grad_norm": 2.0745060443878174, "learning_rate": 1.9079105386403283e-05, "loss": 0.6148, "step": 14630 }, { "epoch": 2.388392310517938, "grad_norm": 1.9971262216567993, "learning_rate": 1.9078972367435606e-05, "loss": 0.5599, "step": 14631 }, { "epoch": 2.3885555691604425, "grad_norm": 1.9585063457489014, "learning_rate": 1.907883933932541e-05, "loss": 0.6518, "step": 14632 }, { "epoch": 2.388718827802947, "grad_norm": 1.6268223524093628, "learning_rate": 1.9078706302072824e-05, "loss": 0.5663, "step": 14633 }, { "epoch": 2.3888820864454514, "grad_norm": 1.8286161422729492, "learning_rate": 1.9078573255677983e-05, "loss": 0.5789, "step": 14634 }, { "epoch": 2.3890453450879554, "grad_norm": 1.783400297164917, "learning_rate": 1.907844020014102e-05, "loss": 0.5839, "step": 14635 }, { "epoch": 2.38920860373046, "grad_norm": 1.869644284248352, "learning_rate": 1.9078307135462072e-05, "loss": 0.5291, "step": 14636 }, { "epoch": 2.3893718623729643, "grad_norm": 1.6742433309555054, "learning_rate": 1.907817406164127e-05, "loss": 0.5154, "step": 14637 }, { "epoch": 2.3895351210154687, "grad_norm": 1.7840083837509155, "learning_rate": 1.907804097867875e-05, "loss": 0.5395, "step": 14638 }, { "epoch": 2.389698379657973, "grad_norm": 1.7052687406539917, "learning_rate": 1.9077907886574646e-05, "loss": 0.5652, "step": 14639 }, { "epoch": 2.3898616383004776, "grad_norm": 2.168511152267456, "learning_rate": 1.907777478532909e-05, "loss": 0.559, "step": 14640 }, { "epoch": 2.390024896942982, "grad_norm": 1.8107311725616455, "learning_rate": 1.9077641674942216e-05, "loss": 0.6268, "step": 14641 }, { "epoch": 2.3901881555854865, "grad_norm": 1.5295219421386719, "learning_rate": 1.907750855541416e-05, "loss": 0.4994, "step": 14642 }, { "epoch": 2.3903514142279905, "grad_norm": 1.6677207946777344, "learning_rate": 1.9077375426745055e-05, "loss": 0.5429, "step": 14643 }, { "epoch": 2.390514672870495, "grad_norm": 1.7574526071548462, "learning_rate": 1.9077242288935036e-05, "loss": 0.5775, "step": 14644 }, { "epoch": 2.3906779315129993, "grad_norm": 1.4158931970596313, "learning_rate": 1.9077109141984235e-05, "loss": 0.4598, "step": 14645 }, { "epoch": 2.390841190155504, "grad_norm": 1.8125561475753784, "learning_rate": 1.907697598589279e-05, "loss": 0.5748, "step": 14646 }, { "epoch": 2.391004448798008, "grad_norm": 1.7812215089797974, "learning_rate": 1.9076842820660832e-05, "loss": 0.5778, "step": 14647 }, { "epoch": 2.3911677074405127, "grad_norm": 2.1403253078460693, "learning_rate": 1.9076709646288495e-05, "loss": 0.6951, "step": 14648 }, { "epoch": 2.391330966083017, "grad_norm": 1.9364113807678223, "learning_rate": 1.9076576462775913e-05, "loss": 0.5481, "step": 14649 }, { "epoch": 2.3914942247255215, "grad_norm": 1.9292993545532227, "learning_rate": 1.9076443270123222e-05, "loss": 0.6017, "step": 14650 }, { "epoch": 2.391657483368026, "grad_norm": 1.7391222715377808, "learning_rate": 1.9076310068330554e-05, "loss": 0.5621, "step": 14651 }, { "epoch": 2.3918207420105304, "grad_norm": 1.964460849761963, "learning_rate": 1.9076176857398045e-05, "loss": 0.6621, "step": 14652 }, { "epoch": 2.3919840006530344, "grad_norm": 1.7468537092208862, "learning_rate": 1.907604363732583e-05, "loss": 0.5074, "step": 14653 }, { "epoch": 2.392147259295539, "grad_norm": 1.6747630834579468, "learning_rate": 1.907591040811404e-05, "loss": 0.5047, "step": 14654 }, { "epoch": 2.3923105179380433, "grad_norm": 1.6960432529449463, "learning_rate": 1.907577716976281e-05, "loss": 0.4634, "step": 14655 }, { "epoch": 2.3924737765805477, "grad_norm": 1.7186912298202515, "learning_rate": 1.9075643922272277e-05, "loss": 0.5235, "step": 14656 }, { "epoch": 2.392637035223052, "grad_norm": 1.6262362003326416, "learning_rate": 1.9075510665642567e-05, "loss": 0.4903, "step": 14657 }, { "epoch": 2.3928002938655566, "grad_norm": 1.9649072885513306, "learning_rate": 1.9075377399873827e-05, "loss": 0.6927, "step": 14658 }, { "epoch": 2.392963552508061, "grad_norm": 2.14729380607605, "learning_rate": 1.907524412496618e-05, "loss": 0.7139, "step": 14659 }, { "epoch": 2.3931268111505655, "grad_norm": 1.8604710102081299, "learning_rate": 1.9075110840919765e-05, "loss": 0.5795, "step": 14660 }, { "epoch": 2.3932900697930695, "grad_norm": 1.7697038650512695, "learning_rate": 1.907497754773472e-05, "loss": 0.693, "step": 14661 }, { "epoch": 2.393453328435574, "grad_norm": 2.3853352069854736, "learning_rate": 1.907484424541117e-05, "loss": 0.7036, "step": 14662 }, { "epoch": 2.3936165870780783, "grad_norm": 1.9073824882507324, "learning_rate": 1.9074710933949257e-05, "loss": 0.5918, "step": 14663 }, { "epoch": 2.3937798457205828, "grad_norm": 1.7636950016021729, "learning_rate": 1.9074577613349113e-05, "loss": 0.5373, "step": 14664 }, { "epoch": 2.393943104363087, "grad_norm": 1.7081987857818604, "learning_rate": 1.907444428361087e-05, "loss": 0.6219, "step": 14665 }, { "epoch": 2.3941063630055917, "grad_norm": 1.7382813692092896, "learning_rate": 1.9074310944734663e-05, "loss": 0.5226, "step": 14666 }, { "epoch": 2.394269621648096, "grad_norm": 1.7222046852111816, "learning_rate": 1.907417759672063e-05, "loss": 0.5302, "step": 14667 }, { "epoch": 2.3944328802906005, "grad_norm": 1.750404953956604, "learning_rate": 1.9074044239568904e-05, "loss": 0.5986, "step": 14668 }, { "epoch": 2.394596138933105, "grad_norm": 1.6623457670211792, "learning_rate": 1.9073910873279613e-05, "loss": 0.5085, "step": 14669 }, { "epoch": 2.3947593975756094, "grad_norm": 2.331394672393799, "learning_rate": 1.90737774978529e-05, "loss": 0.7563, "step": 14670 }, { "epoch": 2.3949226562181134, "grad_norm": 1.7024861574172974, "learning_rate": 1.907364411328889e-05, "loss": 0.7184, "step": 14671 }, { "epoch": 2.395085914860618, "grad_norm": 1.8200258016586304, "learning_rate": 1.907351071958773e-05, "loss": 0.5787, "step": 14672 }, { "epoch": 2.3952491735031223, "grad_norm": 1.5663678646087646, "learning_rate": 1.9073377316749543e-05, "loss": 0.5011, "step": 14673 }, { "epoch": 2.3954124321456267, "grad_norm": 2.1348984241485596, "learning_rate": 1.9073243904774468e-05, "loss": 0.6318, "step": 14674 }, { "epoch": 2.395575690788131, "grad_norm": 1.499284029006958, "learning_rate": 1.907311048366264e-05, "loss": 0.4455, "step": 14675 }, { "epoch": 2.3957389494306356, "grad_norm": 1.3987929821014404, "learning_rate": 1.907297705341419e-05, "loss": 0.5374, "step": 14676 }, { "epoch": 2.39590220807314, "grad_norm": 1.3970788717269897, "learning_rate": 1.9072843614029256e-05, "loss": 0.5357, "step": 14677 }, { "epoch": 2.396065466715644, "grad_norm": 1.793351411819458, "learning_rate": 1.907271016550797e-05, "loss": 0.5541, "step": 14678 }, { "epoch": 2.3962287253581485, "grad_norm": 1.7402982711791992, "learning_rate": 1.9072576707850467e-05, "loss": 0.4924, "step": 14679 }, { "epoch": 2.396391984000653, "grad_norm": 2.1115381717681885, "learning_rate": 1.9072443241056884e-05, "loss": 0.686, "step": 14680 }, { "epoch": 2.3965552426431573, "grad_norm": 1.940934419631958, "learning_rate": 1.907230976512735e-05, "loss": 0.5689, "step": 14681 }, { "epoch": 2.3967185012856618, "grad_norm": 1.57938551902771, "learning_rate": 1.9072176280062006e-05, "loss": 0.5357, "step": 14682 }, { "epoch": 2.396881759928166, "grad_norm": 1.8515774011611938, "learning_rate": 1.907204278586098e-05, "loss": 0.6576, "step": 14683 }, { "epoch": 2.3970450185706706, "grad_norm": 1.6698349714279175, "learning_rate": 1.907190928252441e-05, "loss": 0.5405, "step": 14684 }, { "epoch": 2.397208277213175, "grad_norm": 2.2046680450439453, "learning_rate": 1.9071775770052432e-05, "loss": 0.6873, "step": 14685 }, { "epoch": 2.3973715358556795, "grad_norm": 1.9014753103256226, "learning_rate": 1.9071642248445176e-05, "loss": 0.6229, "step": 14686 }, { "epoch": 2.397534794498184, "grad_norm": 1.6532944440841675, "learning_rate": 1.9071508717702777e-05, "loss": 0.5228, "step": 14687 }, { "epoch": 2.397698053140688, "grad_norm": 1.8697896003723145, "learning_rate": 1.9071375177825375e-05, "loss": 0.5669, "step": 14688 }, { "epoch": 2.3978613117831924, "grad_norm": 1.5795090198516846, "learning_rate": 1.9071241628813096e-05, "loss": 0.4954, "step": 14689 }, { "epoch": 2.398024570425697, "grad_norm": 1.8542009592056274, "learning_rate": 1.907110807066608e-05, "loss": 0.6128, "step": 14690 }, { "epoch": 2.3981878290682013, "grad_norm": 1.5209999084472656, "learning_rate": 1.9070974503384464e-05, "loss": 0.5493, "step": 14691 }, { "epoch": 2.3983510877107057, "grad_norm": 1.774599552154541, "learning_rate": 1.9070840926968378e-05, "loss": 0.5764, "step": 14692 }, { "epoch": 2.39851434635321, "grad_norm": 1.7918599843978882, "learning_rate": 1.907070734141796e-05, "loss": 0.6262, "step": 14693 }, { "epoch": 2.3986776049957146, "grad_norm": 1.5264027118682861, "learning_rate": 1.907057374673334e-05, "loss": 0.4734, "step": 14694 }, { "epoch": 2.398840863638219, "grad_norm": 2.0934627056121826, "learning_rate": 1.907044014291465e-05, "loss": 0.7255, "step": 14695 }, { "epoch": 2.399004122280723, "grad_norm": 1.6206717491149902, "learning_rate": 1.9070306529962033e-05, "loss": 0.5541, "step": 14696 }, { "epoch": 2.3991673809232275, "grad_norm": 1.9858759641647339, "learning_rate": 1.9070172907875618e-05, "loss": 0.645, "step": 14697 }, { "epoch": 2.399330639565732, "grad_norm": 1.8839404582977295, "learning_rate": 1.9070039276655545e-05, "loss": 0.661, "step": 14698 }, { "epoch": 2.3994938982082363, "grad_norm": 1.7998151779174805, "learning_rate": 1.9069905636301943e-05, "loss": 0.6131, "step": 14699 }, { "epoch": 2.3996571568507408, "grad_norm": 1.6512155532836914, "learning_rate": 1.9069771986814948e-05, "loss": 0.5403, "step": 14700 }, { "epoch": 2.399820415493245, "grad_norm": 1.6819300651550293, "learning_rate": 1.9069638328194698e-05, "loss": 0.5892, "step": 14701 }, { "epoch": 2.3999836741357496, "grad_norm": 1.523626685142517, "learning_rate": 1.906950466044132e-05, "loss": 0.5304, "step": 14702 }, { "epoch": 2.400146932778254, "grad_norm": 1.759896993637085, "learning_rate": 1.9069370983554955e-05, "loss": 0.7048, "step": 14703 }, { "epoch": 2.4003101914207585, "grad_norm": 1.4748098850250244, "learning_rate": 1.9069237297535737e-05, "loss": 0.4646, "step": 14704 }, { "epoch": 2.400473450063263, "grad_norm": 1.749779462814331, "learning_rate": 1.9069103602383796e-05, "loss": 0.6186, "step": 14705 }, { "epoch": 2.400636708705767, "grad_norm": 1.6567038297653198, "learning_rate": 1.906896989809927e-05, "loss": 0.5227, "step": 14706 }, { "epoch": 2.4007999673482714, "grad_norm": 1.6152395009994507, "learning_rate": 1.90688361846823e-05, "loss": 0.6302, "step": 14707 }, { "epoch": 2.400963225990776, "grad_norm": 1.5224673748016357, "learning_rate": 1.9068702462133012e-05, "loss": 0.5862, "step": 14708 }, { "epoch": 2.4011264846332803, "grad_norm": 1.981552004814148, "learning_rate": 1.906856873045154e-05, "loss": 0.6635, "step": 14709 }, { "epoch": 2.4012897432757847, "grad_norm": 1.6401937007904053, "learning_rate": 1.9068434989638023e-05, "loss": 0.5192, "step": 14710 }, { "epoch": 2.401453001918289, "grad_norm": 1.7433654069900513, "learning_rate": 1.9068301239692595e-05, "loss": 0.5727, "step": 14711 }, { "epoch": 2.4016162605607936, "grad_norm": 1.8013827800750732, "learning_rate": 1.906816748061539e-05, "loss": 0.5871, "step": 14712 }, { "epoch": 2.4017795192032976, "grad_norm": 1.7176638841629028, "learning_rate": 1.906803371240654e-05, "loss": 0.5521, "step": 14713 }, { "epoch": 2.401942777845802, "grad_norm": 1.7259089946746826, "learning_rate": 1.9067899935066187e-05, "loss": 0.5915, "step": 14714 }, { "epoch": 2.4021060364883065, "grad_norm": 1.5743516683578491, "learning_rate": 1.906776614859446e-05, "loss": 0.4919, "step": 14715 }, { "epoch": 2.402269295130811, "grad_norm": 2.2616286277770996, "learning_rate": 1.906763235299149e-05, "loss": 0.706, "step": 14716 }, { "epoch": 2.4024325537733153, "grad_norm": 2.226349353790283, "learning_rate": 1.9067498548257425e-05, "loss": 0.7012, "step": 14717 }, { "epoch": 2.4025958124158198, "grad_norm": 2.07782244682312, "learning_rate": 1.9067364734392386e-05, "loss": 0.6988, "step": 14718 }, { "epoch": 2.402759071058324, "grad_norm": 1.454687476158142, "learning_rate": 1.9067230911396512e-05, "loss": 0.444, "step": 14719 }, { "epoch": 2.4029223297008286, "grad_norm": 2.0480470657348633, "learning_rate": 1.9067097079269942e-05, "loss": 0.6606, "step": 14720 }, { "epoch": 2.403085588343333, "grad_norm": 1.7591325044631958, "learning_rate": 1.9066963238012807e-05, "loss": 0.4727, "step": 14721 }, { "epoch": 2.4032488469858375, "grad_norm": 1.9034820795059204, "learning_rate": 1.9066829387625243e-05, "loss": 0.577, "step": 14722 }, { "epoch": 2.4034121056283415, "grad_norm": 1.6604607105255127, "learning_rate": 1.906669552810738e-05, "loss": 0.5901, "step": 14723 }, { "epoch": 2.403575364270846, "grad_norm": 1.922224998474121, "learning_rate": 1.9066561659459363e-05, "loss": 0.6552, "step": 14724 }, { "epoch": 2.4037386229133504, "grad_norm": 1.7556606531143188, "learning_rate": 1.9066427781681314e-05, "loss": 0.6136, "step": 14725 }, { "epoch": 2.403901881555855, "grad_norm": 1.7586984634399414, "learning_rate": 1.9066293894773383e-05, "loss": 0.5203, "step": 14726 }, { "epoch": 2.4040651401983593, "grad_norm": 1.5935351848602295, "learning_rate": 1.906615999873569e-05, "loss": 0.4635, "step": 14727 }, { "epoch": 2.4042283988408637, "grad_norm": 1.7105528116226196, "learning_rate": 1.906602609356838e-05, "loss": 0.5311, "step": 14728 }, { "epoch": 2.404391657483368, "grad_norm": 1.5983600616455078, "learning_rate": 1.906589217927158e-05, "loss": 0.4762, "step": 14729 }, { "epoch": 2.4045549161258726, "grad_norm": 1.4015930891036987, "learning_rate": 1.9065758255845432e-05, "loss": 0.4643, "step": 14730 }, { "epoch": 2.4047181747683766, "grad_norm": 1.5947763919830322, "learning_rate": 1.906562432329007e-05, "loss": 0.5042, "step": 14731 }, { "epoch": 2.404881433410881, "grad_norm": 1.8998064994812012, "learning_rate": 1.9065490381605624e-05, "loss": 0.5755, "step": 14732 }, { "epoch": 2.4050446920533854, "grad_norm": 1.704728126525879, "learning_rate": 1.906535643079223e-05, "loss": 0.4731, "step": 14733 }, { "epoch": 2.40520795069589, "grad_norm": 1.8300803899765015, "learning_rate": 1.9065222470850025e-05, "loss": 0.63, "step": 14734 }, { "epoch": 2.4053712093383943, "grad_norm": 1.6360726356506348, "learning_rate": 1.9065088501779145e-05, "loss": 0.5193, "step": 14735 }, { "epoch": 2.4055344679808988, "grad_norm": 1.6334954500198364, "learning_rate": 1.906495452357972e-05, "loss": 0.5124, "step": 14736 }, { "epoch": 2.405697726623403, "grad_norm": 1.6755024194717407, "learning_rate": 1.9064820536251892e-05, "loss": 0.4698, "step": 14737 }, { "epoch": 2.4058609852659076, "grad_norm": 1.803622841835022, "learning_rate": 1.9064686539795794e-05, "loss": 0.5628, "step": 14738 }, { "epoch": 2.406024243908412, "grad_norm": 1.3860976696014404, "learning_rate": 1.9064552534211556e-05, "loss": 0.4295, "step": 14739 }, { "epoch": 2.4061875025509165, "grad_norm": 2.0035171508789062, "learning_rate": 1.9064418519499316e-05, "loss": 0.6024, "step": 14740 }, { "epoch": 2.4063507611934205, "grad_norm": 1.7655789852142334, "learning_rate": 1.9064284495659208e-05, "loss": 0.5239, "step": 14741 }, { "epoch": 2.406514019835925, "grad_norm": 1.8731989860534668, "learning_rate": 1.906415046269137e-05, "loss": 0.5741, "step": 14742 }, { "epoch": 2.4066772784784294, "grad_norm": 1.5570639371871948, "learning_rate": 1.9064016420595934e-05, "loss": 0.521, "step": 14743 }, { "epoch": 2.406840537120934, "grad_norm": 1.9177916049957275, "learning_rate": 1.9063882369373036e-05, "loss": 0.6063, "step": 14744 }, { "epoch": 2.4070037957634383, "grad_norm": 1.9931893348693848, "learning_rate": 1.906374830902281e-05, "loss": 0.6578, "step": 14745 }, { "epoch": 2.4071670544059427, "grad_norm": 2.3861536979675293, "learning_rate": 1.9063614239545393e-05, "loss": 0.4748, "step": 14746 }, { "epoch": 2.407330313048447, "grad_norm": 2.101872444152832, "learning_rate": 1.906348016094092e-05, "loss": 0.632, "step": 14747 }, { "epoch": 2.4074935716909516, "grad_norm": 1.6305328607559204, "learning_rate": 1.9063346073209522e-05, "loss": 0.475, "step": 14748 }, { "epoch": 2.4076568303334556, "grad_norm": 1.6323062181472778, "learning_rate": 1.906321197635134e-05, "loss": 0.5701, "step": 14749 }, { "epoch": 2.40782008897596, "grad_norm": 2.069180727005005, "learning_rate": 1.9063077870366504e-05, "loss": 0.5457, "step": 14750 }, { "epoch": 2.4079833476184644, "grad_norm": 1.7335231304168701, "learning_rate": 1.9062943755255146e-05, "loss": 0.6336, "step": 14751 }, { "epoch": 2.408146606260969, "grad_norm": 1.6940103769302368, "learning_rate": 1.9062809631017412e-05, "loss": 0.5197, "step": 14752 }, { "epoch": 2.4083098649034733, "grad_norm": 1.7385491132736206, "learning_rate": 1.9062675497653433e-05, "loss": 0.545, "step": 14753 }, { "epoch": 2.4084731235459778, "grad_norm": 1.8014689683914185, "learning_rate": 1.906254135516334e-05, "loss": 0.6122, "step": 14754 }, { "epoch": 2.408636382188482, "grad_norm": 1.9219131469726562, "learning_rate": 1.9062407203547267e-05, "loss": 0.6147, "step": 14755 }, { "epoch": 2.4087996408309866, "grad_norm": 1.8238698244094849, "learning_rate": 1.9062273042805354e-05, "loss": 0.6405, "step": 14756 }, { "epoch": 2.408962899473491, "grad_norm": 1.8655375242233276, "learning_rate": 1.9062138872937738e-05, "loss": 0.5127, "step": 14757 }, { "epoch": 2.4091261581159955, "grad_norm": 1.5455180406570435, "learning_rate": 1.9062004693944548e-05, "loss": 0.5114, "step": 14758 }, { "epoch": 2.4092894167584995, "grad_norm": 2.050560235977173, "learning_rate": 1.906187050582592e-05, "loss": 0.6829, "step": 14759 }, { "epoch": 2.409452675401004, "grad_norm": 1.7802586555480957, "learning_rate": 1.9061736308581996e-05, "loss": 0.545, "step": 14760 }, { "epoch": 2.4096159340435084, "grad_norm": 1.9317597150802612, "learning_rate": 1.9061602102212898e-05, "loss": 0.643, "step": 14761 }, { "epoch": 2.409779192686013, "grad_norm": 1.6242008209228516, "learning_rate": 1.9061467886718775e-05, "loss": 0.5194, "step": 14762 }, { "epoch": 2.4099424513285173, "grad_norm": 2.0996603965759277, "learning_rate": 1.9061333662099756e-05, "loss": 0.652, "step": 14763 }, { "epoch": 2.4101057099710217, "grad_norm": 2.262483596801758, "learning_rate": 1.9061199428355973e-05, "loss": 0.7058, "step": 14764 }, { "epoch": 2.410268968613526, "grad_norm": 1.7398741245269775, "learning_rate": 1.9061065185487568e-05, "loss": 0.5245, "step": 14765 }, { "epoch": 2.41043222725603, "grad_norm": 1.7028193473815918, "learning_rate": 1.9060930933494673e-05, "loss": 0.4912, "step": 14766 }, { "epoch": 2.4105954858985346, "grad_norm": 1.6147454977035522, "learning_rate": 1.906079667237742e-05, "loss": 0.4913, "step": 14767 }, { "epoch": 2.410758744541039, "grad_norm": 2.5599560737609863, "learning_rate": 1.906066240213595e-05, "loss": 0.6997, "step": 14768 }, { "epoch": 2.4109220031835434, "grad_norm": 2.267305374145508, "learning_rate": 1.9060528122770393e-05, "loss": 0.6033, "step": 14769 }, { "epoch": 2.411085261826048, "grad_norm": 1.720969796180725, "learning_rate": 1.906039383428089e-05, "loss": 0.5541, "step": 14770 }, { "epoch": 2.4112485204685523, "grad_norm": 1.8384428024291992, "learning_rate": 1.906025953666757e-05, "loss": 0.5723, "step": 14771 }, { "epoch": 2.4114117791110568, "grad_norm": 1.7477805614471436, "learning_rate": 1.9060125229930572e-05, "loss": 0.5779, "step": 14772 }, { "epoch": 2.411575037753561, "grad_norm": 1.7447000741958618, "learning_rate": 1.9059990914070025e-05, "loss": 0.5695, "step": 14773 }, { "epoch": 2.4117382963960656, "grad_norm": 1.604271650314331, "learning_rate": 1.9059856589086075e-05, "loss": 0.5674, "step": 14774 }, { "epoch": 2.41190155503857, "grad_norm": 1.7622084617614746, "learning_rate": 1.9059722254978852e-05, "loss": 0.5518, "step": 14775 }, { "epoch": 2.412064813681074, "grad_norm": 1.586521863937378, "learning_rate": 1.9059587911748488e-05, "loss": 0.5407, "step": 14776 }, { "epoch": 2.4122280723235785, "grad_norm": 1.6929539442062378, "learning_rate": 1.9059453559395128e-05, "loss": 0.5309, "step": 14777 }, { "epoch": 2.412391330966083, "grad_norm": 2.2214932441711426, "learning_rate": 1.9059319197918895e-05, "loss": 0.8017, "step": 14778 }, { "epoch": 2.4125545896085874, "grad_norm": 1.7591204643249512, "learning_rate": 1.905918482731993e-05, "loss": 0.5062, "step": 14779 }, { "epoch": 2.412717848251092, "grad_norm": 1.8807835578918457, "learning_rate": 1.905905044759837e-05, "loss": 0.7022, "step": 14780 }, { "epoch": 2.4128811068935963, "grad_norm": 1.6877732276916504, "learning_rate": 1.9058916058754347e-05, "loss": 0.5284, "step": 14781 }, { "epoch": 2.4130443655361007, "grad_norm": 1.597824215888977, "learning_rate": 1.9058781660788003e-05, "loss": 0.5613, "step": 14782 }, { "epoch": 2.413207624178605, "grad_norm": 1.8140716552734375, "learning_rate": 1.9058647253699462e-05, "loss": 0.5816, "step": 14783 }, { "epoch": 2.413370882821109, "grad_norm": 1.638429045677185, "learning_rate": 1.9058512837488868e-05, "loss": 0.5773, "step": 14784 }, { "epoch": 2.4135341414636136, "grad_norm": 1.5304937362670898, "learning_rate": 1.9058378412156353e-05, "loss": 0.4968, "step": 14785 }, { "epoch": 2.413697400106118, "grad_norm": 1.8113245964050293, "learning_rate": 1.9058243977702054e-05, "loss": 0.5965, "step": 14786 }, { "epoch": 2.4138606587486224, "grad_norm": 1.7824804782867432, "learning_rate": 1.9058109534126106e-05, "loss": 0.5699, "step": 14787 }, { "epoch": 2.414023917391127, "grad_norm": 1.809175968170166, "learning_rate": 1.9057975081428646e-05, "loss": 0.5645, "step": 14788 }, { "epoch": 2.4141871760336313, "grad_norm": 1.492437720298767, "learning_rate": 1.9057840619609804e-05, "loss": 0.4711, "step": 14789 }, { "epoch": 2.4143504346761357, "grad_norm": 1.4843319654464722, "learning_rate": 1.905770614866972e-05, "loss": 0.4885, "step": 14790 }, { "epoch": 2.41451369331864, "grad_norm": 1.9791122674942017, "learning_rate": 1.905757166860853e-05, "loss": 0.6397, "step": 14791 }, { "epoch": 2.4146769519611446, "grad_norm": 1.8094377517700195, "learning_rate": 1.9057437179426365e-05, "loss": 0.5596, "step": 14792 }, { "epoch": 2.414840210603649, "grad_norm": 1.8248212337493896, "learning_rate": 1.9057302681123367e-05, "loss": 0.5714, "step": 14793 }, { "epoch": 2.415003469246153, "grad_norm": 1.7966430187225342, "learning_rate": 1.9057168173699664e-05, "loss": 0.6608, "step": 14794 }, { "epoch": 2.4151667278886575, "grad_norm": 1.5687203407287598, "learning_rate": 1.90570336571554e-05, "loss": 0.5298, "step": 14795 }, { "epoch": 2.415329986531162, "grad_norm": 2.063647508621216, "learning_rate": 1.90568991314907e-05, "loss": 0.656, "step": 14796 }, { "epoch": 2.4154932451736664, "grad_norm": 1.8128690719604492, "learning_rate": 1.9056764596705704e-05, "loss": 0.546, "step": 14797 }, { "epoch": 2.415656503816171, "grad_norm": 1.4784066677093506, "learning_rate": 1.9056630052800553e-05, "loss": 0.4318, "step": 14798 }, { "epoch": 2.4158197624586752, "grad_norm": 1.7448246479034424, "learning_rate": 1.9056495499775374e-05, "loss": 0.5502, "step": 14799 }, { "epoch": 2.4159830211011797, "grad_norm": 1.9303172826766968, "learning_rate": 1.905636093763031e-05, "loss": 0.5205, "step": 14800 }, { "epoch": 2.416146279743684, "grad_norm": 1.5416854619979858, "learning_rate": 1.905622636636549e-05, "loss": 0.5259, "step": 14801 }, { "epoch": 2.416309538386188, "grad_norm": 2.1024954319000244, "learning_rate": 1.9056091785981056e-05, "loss": 0.6584, "step": 14802 }, { "epoch": 2.4164727970286926, "grad_norm": 1.7921574115753174, "learning_rate": 1.9055957196477137e-05, "loss": 0.6165, "step": 14803 }, { "epoch": 2.416636055671197, "grad_norm": 2.083899736404419, "learning_rate": 1.9055822597853874e-05, "loss": 0.6694, "step": 14804 }, { "epoch": 2.4167993143137014, "grad_norm": 1.9700617790222168, "learning_rate": 1.9055687990111397e-05, "loss": 0.6902, "step": 14805 }, { "epoch": 2.416962572956206, "grad_norm": 1.5287399291992188, "learning_rate": 1.9055553373249848e-05, "loss": 0.5092, "step": 14806 }, { "epoch": 2.4171258315987103, "grad_norm": 1.9218041896820068, "learning_rate": 1.9055418747269356e-05, "loss": 0.5973, "step": 14807 }, { "epoch": 2.4172890902412147, "grad_norm": 1.8346599340438843, "learning_rate": 1.9055284112170062e-05, "loss": 0.4647, "step": 14808 }, { "epoch": 2.417452348883719, "grad_norm": 1.8453583717346191, "learning_rate": 1.9055149467952097e-05, "loss": 0.5577, "step": 14809 }, { "epoch": 2.4176156075262236, "grad_norm": 1.9742813110351562, "learning_rate": 1.90550148146156e-05, "loss": 0.5864, "step": 14810 }, { "epoch": 2.4177788661687276, "grad_norm": 1.7055878639221191, "learning_rate": 1.9054880152160705e-05, "loss": 0.5656, "step": 14811 }, { "epoch": 2.417942124811232, "grad_norm": 2.0273289680480957, "learning_rate": 1.905474548058755e-05, "loss": 0.6992, "step": 14812 }, { "epoch": 2.4181053834537365, "grad_norm": 1.8495516777038574, "learning_rate": 1.9054610799896268e-05, "loss": 0.6063, "step": 14813 }, { "epoch": 2.418268642096241, "grad_norm": 2.204801082611084, "learning_rate": 1.9054476110086995e-05, "loss": 0.5926, "step": 14814 }, { "epoch": 2.4184319007387454, "grad_norm": 1.861804485321045, "learning_rate": 1.9054341411159866e-05, "loss": 0.6655, "step": 14815 }, { "epoch": 2.41859515938125, "grad_norm": 1.7758255004882812, "learning_rate": 1.905420670311502e-05, "loss": 0.5918, "step": 14816 }, { "epoch": 2.4187584180237542, "grad_norm": 1.5058903694152832, "learning_rate": 1.9054071985952587e-05, "loss": 0.4629, "step": 14817 }, { "epoch": 2.4189216766662587, "grad_norm": 1.7889235019683838, "learning_rate": 1.9053937259672707e-05, "loss": 0.5187, "step": 14818 }, { "epoch": 2.4190849353087627, "grad_norm": 1.6276010274887085, "learning_rate": 1.9053802524275514e-05, "loss": 0.6057, "step": 14819 }, { "epoch": 2.419248193951267, "grad_norm": 1.5704641342163086, "learning_rate": 1.9053667779761147e-05, "loss": 0.6078, "step": 14820 }, { "epoch": 2.4194114525937715, "grad_norm": 1.8271715641021729, "learning_rate": 1.9053533026129737e-05, "loss": 0.5453, "step": 14821 }, { "epoch": 2.419574711236276, "grad_norm": 1.6790430545806885, "learning_rate": 1.9053398263381423e-05, "loss": 0.589, "step": 14822 }, { "epoch": 2.4197379698787804, "grad_norm": 1.749173879623413, "learning_rate": 1.9053263491516338e-05, "loss": 0.4726, "step": 14823 }, { "epoch": 2.419901228521285, "grad_norm": 1.710088849067688, "learning_rate": 1.905312871053462e-05, "loss": 0.549, "step": 14824 }, { "epoch": 2.4200644871637893, "grad_norm": 2.0475499629974365, "learning_rate": 1.9052993920436402e-05, "loss": 0.7673, "step": 14825 }, { "epoch": 2.4202277458062937, "grad_norm": 1.6885813474655151, "learning_rate": 1.9052859121221822e-05, "loss": 0.5865, "step": 14826 }, { "epoch": 2.420391004448798, "grad_norm": 1.534018635749817, "learning_rate": 1.9052724312891017e-05, "loss": 0.5714, "step": 14827 }, { "epoch": 2.4205542630913026, "grad_norm": 2.4394185543060303, "learning_rate": 1.905258949544412e-05, "loss": 0.5351, "step": 14828 }, { "epoch": 2.4207175217338066, "grad_norm": 1.395139455795288, "learning_rate": 1.905245466888127e-05, "loss": 0.4477, "step": 14829 }, { "epoch": 2.420880780376311, "grad_norm": 1.9183334112167358, "learning_rate": 1.9052319833202596e-05, "loss": 0.6233, "step": 14830 }, { "epoch": 2.4210440390188155, "grad_norm": 1.765745759010315, "learning_rate": 1.905218498840824e-05, "loss": 0.5411, "step": 14831 }, { "epoch": 2.42120729766132, "grad_norm": 1.7402782440185547, "learning_rate": 1.905205013449834e-05, "loss": 0.5483, "step": 14832 }, { "epoch": 2.4213705563038244, "grad_norm": 1.7447539567947388, "learning_rate": 1.9051915271473024e-05, "loss": 0.5911, "step": 14833 }, { "epoch": 2.421533814946329, "grad_norm": 1.9866255521774292, "learning_rate": 1.9051780399332427e-05, "loss": 0.6231, "step": 14834 }, { "epoch": 2.4216970735888332, "grad_norm": 1.777695894241333, "learning_rate": 1.9051645518076696e-05, "loss": 0.5308, "step": 14835 }, { "epoch": 2.4218603322313377, "grad_norm": 1.5730823278427124, "learning_rate": 1.905151062770596e-05, "loss": 0.5256, "step": 14836 }, { "epoch": 2.4220235908738417, "grad_norm": 1.7182598114013672, "learning_rate": 1.9051375728220358e-05, "loss": 0.5752, "step": 14837 }, { "epoch": 2.422186849516346, "grad_norm": 2.0204169750213623, "learning_rate": 1.9051240819620018e-05, "loss": 0.7307, "step": 14838 }, { "epoch": 2.4223501081588505, "grad_norm": 1.667989730834961, "learning_rate": 1.905110590190508e-05, "loss": 0.5753, "step": 14839 }, { "epoch": 2.422513366801355, "grad_norm": 1.7466654777526855, "learning_rate": 1.9050970975075685e-05, "loss": 0.5848, "step": 14840 }, { "epoch": 2.4226766254438594, "grad_norm": 1.8040430545806885, "learning_rate": 1.9050836039131962e-05, "loss": 0.5613, "step": 14841 }, { "epoch": 2.422839884086364, "grad_norm": 1.7952693700790405, "learning_rate": 1.905070109407405e-05, "loss": 0.5722, "step": 14842 }, { "epoch": 2.4230031427288683, "grad_norm": 1.8103914260864258, "learning_rate": 1.9050566139902088e-05, "loss": 0.5457, "step": 14843 }, { "epoch": 2.4231664013713727, "grad_norm": 2.134258985519409, "learning_rate": 1.9050431176616203e-05, "loss": 0.6451, "step": 14844 }, { "epoch": 2.423329660013877, "grad_norm": 1.7964415550231934, "learning_rate": 1.905029620421654e-05, "loss": 0.5329, "step": 14845 }, { "epoch": 2.4234929186563816, "grad_norm": 1.764803171157837, "learning_rate": 1.9050161222703227e-05, "loss": 0.6333, "step": 14846 }, { "epoch": 2.4236561772988856, "grad_norm": 1.7330634593963623, "learning_rate": 1.905002623207641e-05, "loss": 0.5338, "step": 14847 }, { "epoch": 2.42381943594139, "grad_norm": 2.092863082885742, "learning_rate": 1.9049891232336212e-05, "loss": 0.62, "step": 14848 }, { "epoch": 2.4239826945838945, "grad_norm": 1.8405756950378418, "learning_rate": 1.9049756223482777e-05, "loss": 0.6262, "step": 14849 }, { "epoch": 2.424145953226399, "grad_norm": 1.886425256729126, "learning_rate": 1.9049621205516243e-05, "loss": 0.6508, "step": 14850 }, { "epoch": 2.4243092118689034, "grad_norm": 1.5993865728378296, "learning_rate": 1.9049486178436743e-05, "loss": 0.5966, "step": 14851 }, { "epoch": 2.424472470511408, "grad_norm": 1.4431703090667725, "learning_rate": 1.904935114224441e-05, "loss": 0.5062, "step": 14852 }, { "epoch": 2.4246357291539122, "grad_norm": 1.6261473894119263, "learning_rate": 1.9049216096939388e-05, "loss": 0.5105, "step": 14853 }, { "epoch": 2.4247989877964162, "grad_norm": 1.8378595113754272, "learning_rate": 1.9049081042521804e-05, "loss": 0.5607, "step": 14854 }, { "epoch": 2.4249622464389207, "grad_norm": 1.5016839504241943, "learning_rate": 1.9048945978991796e-05, "loss": 0.4837, "step": 14855 }, { "epoch": 2.425125505081425, "grad_norm": 1.5996778011322021, "learning_rate": 1.9048810906349506e-05, "loss": 0.5098, "step": 14856 }, { "epoch": 2.4252887637239295, "grad_norm": 1.747170090675354, "learning_rate": 1.9048675824595066e-05, "loss": 0.5879, "step": 14857 }, { "epoch": 2.425452022366434, "grad_norm": 1.7887680530548096, "learning_rate": 1.9048540733728608e-05, "loss": 0.5943, "step": 14858 }, { "epoch": 2.4256152810089384, "grad_norm": 1.8542648553848267, "learning_rate": 1.9048405633750274e-05, "loss": 0.5916, "step": 14859 }, { "epoch": 2.425778539651443, "grad_norm": 2.09249210357666, "learning_rate": 1.9048270524660197e-05, "loss": 0.5938, "step": 14860 }, { "epoch": 2.4259417982939473, "grad_norm": 2.090928316116333, "learning_rate": 1.9048135406458515e-05, "loss": 0.6596, "step": 14861 }, { "epoch": 2.4261050569364517, "grad_norm": 1.7580206394195557, "learning_rate": 1.9048000279145364e-05, "loss": 0.5455, "step": 14862 }, { "epoch": 2.426268315578956, "grad_norm": 1.9046690464019775, "learning_rate": 1.9047865142720876e-05, "loss": 0.5714, "step": 14863 }, { "epoch": 2.42643157422146, "grad_norm": 2.2106893062591553, "learning_rate": 1.904772999718519e-05, "loss": 0.6831, "step": 14864 }, { "epoch": 2.4265948328639646, "grad_norm": 1.7953835725784302, "learning_rate": 1.9047594842538445e-05, "loss": 0.5246, "step": 14865 }, { "epoch": 2.426758091506469, "grad_norm": 1.7825596332550049, "learning_rate": 1.9047459678780774e-05, "loss": 0.5379, "step": 14866 }, { "epoch": 2.4269213501489735, "grad_norm": 1.8099297285079956, "learning_rate": 1.9047324505912314e-05, "loss": 0.514, "step": 14867 }, { "epoch": 2.427084608791478, "grad_norm": 1.9682555198669434, "learning_rate": 1.9047189323933198e-05, "loss": 0.6363, "step": 14868 }, { "epoch": 2.4272478674339824, "grad_norm": 1.9289470911026, "learning_rate": 1.904705413284357e-05, "loss": 0.6305, "step": 14869 }, { "epoch": 2.427411126076487, "grad_norm": 1.7994563579559326, "learning_rate": 1.9046918932643555e-05, "loss": 0.5964, "step": 14870 }, { "epoch": 2.4275743847189912, "grad_norm": 2.031937599182129, "learning_rate": 1.9046783723333298e-05, "loss": 0.5701, "step": 14871 }, { "epoch": 2.4277376433614952, "grad_norm": 1.6080982685089111, "learning_rate": 1.904664850491293e-05, "loss": 0.5139, "step": 14872 }, { "epoch": 2.4279009020039997, "grad_norm": 1.848858118057251, "learning_rate": 1.9046513277382592e-05, "loss": 0.5961, "step": 14873 }, { "epoch": 2.428064160646504, "grad_norm": 1.8274022340774536, "learning_rate": 1.9046378040742418e-05, "loss": 0.5792, "step": 14874 }, { "epoch": 2.4282274192890085, "grad_norm": 1.9679005146026611, "learning_rate": 1.904624279499254e-05, "loss": 0.5385, "step": 14875 }, { "epoch": 2.428390677931513, "grad_norm": 2.0087180137634277, "learning_rate": 1.90461075401331e-05, "loss": 0.5514, "step": 14876 }, { "epoch": 2.4285539365740174, "grad_norm": 1.6504930257797241, "learning_rate": 1.904597227616423e-05, "loss": 0.5953, "step": 14877 }, { "epoch": 2.428717195216522, "grad_norm": 2.1415722370147705, "learning_rate": 1.9045837003086074e-05, "loss": 0.596, "step": 14878 }, { "epoch": 2.4288804538590263, "grad_norm": 1.9976701736450195, "learning_rate": 1.9045701720898756e-05, "loss": 0.5605, "step": 14879 }, { "epoch": 2.4290437125015307, "grad_norm": 1.840915322303772, "learning_rate": 1.9045566429602425e-05, "loss": 0.6397, "step": 14880 }, { "epoch": 2.429206971144035, "grad_norm": 2.1169936656951904, "learning_rate": 1.9045431129197207e-05, "loss": 0.5956, "step": 14881 }, { "epoch": 2.429370229786539, "grad_norm": 2.191061496734619, "learning_rate": 1.904529581968324e-05, "loss": 0.6675, "step": 14882 }, { "epoch": 2.4295334884290436, "grad_norm": 1.7287367582321167, "learning_rate": 1.9045160501060665e-05, "loss": 0.4869, "step": 14883 }, { "epoch": 2.429696747071548, "grad_norm": 1.813077449798584, "learning_rate": 1.904502517332962e-05, "loss": 0.5146, "step": 14884 }, { "epoch": 2.4298600057140525, "grad_norm": 2.002856731414795, "learning_rate": 1.904488983649023e-05, "loss": 0.5986, "step": 14885 }, { "epoch": 2.430023264356557, "grad_norm": 1.5827481746673584, "learning_rate": 1.9044754490542643e-05, "loss": 0.5903, "step": 14886 }, { "epoch": 2.4301865229990613, "grad_norm": 1.7373671531677246, "learning_rate": 1.904461913548699e-05, "loss": 0.5271, "step": 14887 }, { "epoch": 2.430349781641566, "grad_norm": 1.7727181911468506, "learning_rate": 1.9044483771323408e-05, "loss": 0.5905, "step": 14888 }, { "epoch": 2.4305130402840702, "grad_norm": 1.6190342903137207, "learning_rate": 1.9044348398052032e-05, "loss": 0.5697, "step": 14889 }, { "epoch": 2.430676298926574, "grad_norm": 1.8190252780914307, "learning_rate": 1.9044213015672998e-05, "loss": 0.5705, "step": 14890 }, { "epoch": 2.4308395575690787, "grad_norm": 1.8055258989334106, "learning_rate": 1.904407762418645e-05, "loss": 0.6185, "step": 14891 }, { "epoch": 2.431002816211583, "grad_norm": 1.4095808267593384, "learning_rate": 1.904394222359251e-05, "loss": 0.4806, "step": 14892 }, { "epoch": 2.4311660748540875, "grad_norm": 1.7190310955047607, "learning_rate": 1.904380681389133e-05, "loss": 0.6455, "step": 14893 }, { "epoch": 2.431329333496592, "grad_norm": 1.48473060131073, "learning_rate": 1.9043671395083034e-05, "loss": 0.5749, "step": 14894 }, { "epoch": 2.4314925921390964, "grad_norm": 1.797882318496704, "learning_rate": 1.9043535967167766e-05, "loss": 0.5967, "step": 14895 }, { "epoch": 2.431655850781601, "grad_norm": 1.6980783939361572, "learning_rate": 1.9043400530145658e-05, "loss": 0.523, "step": 14896 }, { "epoch": 2.4318191094241053, "grad_norm": 1.9749912023544312, "learning_rate": 1.9043265084016848e-05, "loss": 0.6604, "step": 14897 }, { "epoch": 2.4319823680666097, "grad_norm": 1.783868432044983, "learning_rate": 1.9043129628781474e-05, "loss": 0.5797, "step": 14898 }, { "epoch": 2.432145626709114, "grad_norm": 1.9679243564605713, "learning_rate": 1.904299416443967e-05, "loss": 0.6334, "step": 14899 }, { "epoch": 2.432308885351618, "grad_norm": 2.1388704776763916, "learning_rate": 1.9042858690991574e-05, "loss": 0.6447, "step": 14900 }, { "epoch": 2.4324721439941226, "grad_norm": 1.7700965404510498, "learning_rate": 1.9042723208437318e-05, "loss": 0.6459, "step": 14901 }, { "epoch": 2.432635402636627, "grad_norm": 1.8787976503372192, "learning_rate": 1.9042587716777048e-05, "loss": 0.5728, "step": 14902 }, { "epoch": 2.4327986612791315, "grad_norm": 2.136075496673584, "learning_rate": 1.9042452216010893e-05, "loss": 0.627, "step": 14903 }, { "epoch": 2.432961919921636, "grad_norm": 1.864426851272583, "learning_rate": 1.9042316706138987e-05, "loss": 0.6036, "step": 14904 }, { "epoch": 2.4331251785641403, "grad_norm": 1.6448875665664673, "learning_rate": 1.9042181187161474e-05, "loss": 0.5031, "step": 14905 }, { "epoch": 2.433288437206645, "grad_norm": 1.9625319242477417, "learning_rate": 1.904204565907849e-05, "loss": 0.5304, "step": 14906 }, { "epoch": 2.4334516958491488, "grad_norm": 2.187103271484375, "learning_rate": 1.904191012189016e-05, "loss": 0.8059, "step": 14907 }, { "epoch": 2.433614954491653, "grad_norm": 1.5603947639465332, "learning_rate": 1.9041774575596635e-05, "loss": 0.4557, "step": 14908 }, { "epoch": 2.4337782131341577, "grad_norm": 2.010899782180786, "learning_rate": 1.9041639020198044e-05, "loss": 0.5878, "step": 14909 }, { "epoch": 2.433941471776662, "grad_norm": 2.4543817043304443, "learning_rate": 1.9041503455694522e-05, "loss": 0.6061, "step": 14910 }, { "epoch": 2.4341047304191665, "grad_norm": 1.7556078433990479, "learning_rate": 1.9041367882086213e-05, "loss": 0.6362, "step": 14911 }, { "epoch": 2.434267989061671, "grad_norm": 1.9327290058135986, "learning_rate": 1.9041232299373245e-05, "loss": 0.6683, "step": 14912 }, { "epoch": 2.4344312477041754, "grad_norm": 1.7654426097869873, "learning_rate": 1.9041096707555764e-05, "loss": 0.6395, "step": 14913 }, { "epoch": 2.43459450634668, "grad_norm": 1.897874355316162, "learning_rate": 1.90409611066339e-05, "loss": 0.6443, "step": 14914 }, { "epoch": 2.4347577649891843, "grad_norm": 1.5646510124206543, "learning_rate": 1.9040825496607788e-05, "loss": 0.5478, "step": 14915 }, { "epoch": 2.4349210236316887, "grad_norm": 1.668532371520996, "learning_rate": 1.9040689877477567e-05, "loss": 0.5525, "step": 14916 }, { "epoch": 2.4350842822741927, "grad_norm": 1.7304482460021973, "learning_rate": 1.904055424924337e-05, "loss": 0.5968, "step": 14917 }, { "epoch": 2.435247540916697, "grad_norm": 1.4615495204925537, "learning_rate": 1.9040418611905343e-05, "loss": 0.4702, "step": 14918 }, { "epoch": 2.4354107995592016, "grad_norm": 1.5160919427871704, "learning_rate": 1.9040282965463615e-05, "loss": 0.4733, "step": 14919 }, { "epoch": 2.435574058201706, "grad_norm": 1.9439952373504639, "learning_rate": 1.9040147309918326e-05, "loss": 0.5766, "step": 14920 }, { "epoch": 2.4357373168442105, "grad_norm": 1.8062443733215332, "learning_rate": 1.904001164526961e-05, "loss": 0.5706, "step": 14921 }, { "epoch": 2.435900575486715, "grad_norm": 2.034292697906494, "learning_rate": 1.9039875971517604e-05, "loss": 0.6815, "step": 14922 }, { "epoch": 2.4360638341292193, "grad_norm": 1.5407143831253052, "learning_rate": 1.9039740288662445e-05, "loss": 0.4941, "step": 14923 }, { "epoch": 2.436227092771724, "grad_norm": 1.4836732149124146, "learning_rate": 1.903960459670427e-05, "loss": 0.5115, "step": 14924 }, { "epoch": 2.4363903514142278, "grad_norm": 1.9302396774291992, "learning_rate": 1.9039468895643215e-05, "loss": 0.7025, "step": 14925 }, { "epoch": 2.436553610056732, "grad_norm": 1.770150065422058, "learning_rate": 1.903933318547942e-05, "loss": 0.5931, "step": 14926 }, { "epoch": 2.4367168686992366, "grad_norm": 1.4910515546798706, "learning_rate": 1.9039197466213017e-05, "loss": 0.4523, "step": 14927 }, { "epoch": 2.436880127341741, "grad_norm": 1.749051809310913, "learning_rate": 1.9039061737844145e-05, "loss": 0.4911, "step": 14928 }, { "epoch": 2.4370433859842455, "grad_norm": 1.8939744234085083, "learning_rate": 1.9038926000372938e-05, "loss": 0.5764, "step": 14929 }, { "epoch": 2.43720664462675, "grad_norm": 1.5359153747558594, "learning_rate": 1.903879025379954e-05, "loss": 0.5389, "step": 14930 }, { "epoch": 2.4373699032692544, "grad_norm": 1.6750926971435547, "learning_rate": 1.903865449812408e-05, "loss": 0.5044, "step": 14931 }, { "epoch": 2.437533161911759, "grad_norm": 1.5458006858825684, "learning_rate": 1.9038518733346696e-05, "loss": 0.4845, "step": 14932 }, { "epoch": 2.4376964205542633, "grad_norm": 1.6655356884002686, "learning_rate": 1.9038382959467526e-05, "loss": 0.4993, "step": 14933 }, { "epoch": 2.4378596791967677, "grad_norm": 1.591012716293335, "learning_rate": 1.9038247176486706e-05, "loss": 0.5029, "step": 14934 }, { "epoch": 2.4380229378392717, "grad_norm": 1.887017846107483, "learning_rate": 1.9038111384404375e-05, "loss": 0.5149, "step": 14935 }, { "epoch": 2.438186196481776, "grad_norm": 1.9391728639602661, "learning_rate": 1.9037975583220668e-05, "loss": 0.6618, "step": 14936 }, { "epoch": 2.4383494551242806, "grad_norm": 1.9103147983551025, "learning_rate": 1.903783977293572e-05, "loss": 0.5757, "step": 14937 }, { "epoch": 2.438512713766785, "grad_norm": 1.8828321695327759, "learning_rate": 1.9037703953549675e-05, "loss": 0.6099, "step": 14938 }, { "epoch": 2.4386759724092895, "grad_norm": 2.043605089187622, "learning_rate": 1.903756812506266e-05, "loss": 0.6691, "step": 14939 }, { "epoch": 2.438839231051794, "grad_norm": 2.107741355895996, "learning_rate": 1.903743228747482e-05, "loss": 0.5656, "step": 14940 }, { "epoch": 2.4390024896942983, "grad_norm": 1.573472499847412, "learning_rate": 1.9037296440786287e-05, "loss": 0.5112, "step": 14941 }, { "epoch": 2.4391657483368023, "grad_norm": 1.7890576124191284, "learning_rate": 1.9037160584997195e-05, "loss": 0.5929, "step": 14942 }, { "epoch": 2.4393290069793068, "grad_norm": 1.8422499895095825, "learning_rate": 1.903702472010769e-05, "loss": 0.6321, "step": 14943 }, { "epoch": 2.439492265621811, "grad_norm": 1.359668254852295, "learning_rate": 1.90368888461179e-05, "loss": 0.4669, "step": 14944 }, { "epoch": 2.4396555242643156, "grad_norm": 1.8932963609695435, "learning_rate": 1.903675296302797e-05, "loss": 0.6175, "step": 14945 }, { "epoch": 2.43981878290682, "grad_norm": 1.9303944110870361, "learning_rate": 1.9036617070838026e-05, "loss": 0.5268, "step": 14946 }, { "epoch": 2.4399820415493245, "grad_norm": 1.621153712272644, "learning_rate": 1.9036481169548215e-05, "loss": 0.4906, "step": 14947 }, { "epoch": 2.440145300191829, "grad_norm": 1.9538962841033936, "learning_rate": 1.9036345259158667e-05, "loss": 0.6298, "step": 14948 }, { "epoch": 2.4403085588343334, "grad_norm": 1.7463122606277466, "learning_rate": 1.9036209339669523e-05, "loss": 0.6015, "step": 14949 }, { "epoch": 2.440471817476838, "grad_norm": 1.76893949508667, "learning_rate": 1.9036073411080917e-05, "loss": 0.5653, "step": 14950 }, { "epoch": 2.4406350761193423, "grad_norm": 1.826669692993164, "learning_rate": 1.9035937473392992e-05, "loss": 0.5307, "step": 14951 }, { "epoch": 2.4407983347618463, "grad_norm": 1.7388789653778076, "learning_rate": 1.9035801526605876e-05, "loss": 0.5117, "step": 14952 }, { "epoch": 2.4409615934043507, "grad_norm": 1.8112494945526123, "learning_rate": 1.9035665570719713e-05, "loss": 0.5061, "step": 14953 }, { "epoch": 2.441124852046855, "grad_norm": 1.9270402193069458, "learning_rate": 1.9035529605734637e-05, "loss": 0.615, "step": 14954 }, { "epoch": 2.4412881106893596, "grad_norm": 2.0388882160186768, "learning_rate": 1.9035393631650783e-05, "loss": 0.5641, "step": 14955 }, { "epoch": 2.441451369331864, "grad_norm": 1.9731343984603882, "learning_rate": 1.903525764846829e-05, "loss": 0.6547, "step": 14956 }, { "epoch": 2.4416146279743685, "grad_norm": 1.829236626625061, "learning_rate": 1.9035121656187297e-05, "loss": 0.5669, "step": 14957 }, { "epoch": 2.441777886616873, "grad_norm": 1.9590879678726196, "learning_rate": 1.903498565480794e-05, "loss": 0.6146, "step": 14958 }, { "epoch": 2.4419411452593773, "grad_norm": 1.857578992843628, "learning_rate": 1.903484964433035e-05, "loss": 0.5316, "step": 14959 }, { "epoch": 2.4421044039018813, "grad_norm": 2.309530735015869, "learning_rate": 1.903471362475467e-05, "loss": 0.6668, "step": 14960 }, { "epoch": 2.4422676625443858, "grad_norm": 1.5466243028640747, "learning_rate": 1.903457759608104e-05, "loss": 0.4598, "step": 14961 }, { "epoch": 2.44243092118689, "grad_norm": 1.8385168313980103, "learning_rate": 1.9034441558309588e-05, "loss": 0.5117, "step": 14962 }, { "epoch": 2.4425941798293946, "grad_norm": 1.7189327478408813, "learning_rate": 1.9034305511440457e-05, "loss": 0.5935, "step": 14963 }, { "epoch": 2.442757438471899, "grad_norm": 1.6266891956329346, "learning_rate": 1.9034169455473787e-05, "loss": 0.5592, "step": 14964 }, { "epoch": 2.4429206971144035, "grad_norm": 1.8783230781555176, "learning_rate": 1.9034033390409704e-05, "loss": 0.5342, "step": 14965 }, { "epoch": 2.443083955756908, "grad_norm": 1.6742109060287476, "learning_rate": 1.9033897316248356e-05, "loss": 0.5205, "step": 14966 }, { "epoch": 2.4432472143994124, "grad_norm": 2.421536922454834, "learning_rate": 1.9033761232989877e-05, "loss": 0.607, "step": 14967 }, { "epoch": 2.443410473041917, "grad_norm": 1.8752896785736084, "learning_rate": 1.90336251406344e-05, "loss": 0.5878, "step": 14968 }, { "epoch": 2.4435737316844213, "grad_norm": 1.5289314985275269, "learning_rate": 1.9033489039182063e-05, "loss": 0.4939, "step": 14969 }, { "epoch": 2.4437369903269253, "grad_norm": 1.9914402961730957, "learning_rate": 1.903335292863301e-05, "loss": 0.5923, "step": 14970 }, { "epoch": 2.4439002489694297, "grad_norm": 1.845375657081604, "learning_rate": 1.903321680898737e-05, "loss": 0.5593, "step": 14971 }, { "epoch": 2.444063507611934, "grad_norm": 1.656806230545044, "learning_rate": 1.9033080680245283e-05, "loss": 0.5411, "step": 14972 }, { "epoch": 2.4442267662544386, "grad_norm": 1.857143521308899, "learning_rate": 1.9032944542406884e-05, "loss": 0.5469, "step": 14973 }, { "epoch": 2.444390024896943, "grad_norm": 1.881013035774231, "learning_rate": 1.903280839547232e-05, "loss": 0.5027, "step": 14974 }, { "epoch": 2.4445532835394475, "grad_norm": 1.9495882987976074, "learning_rate": 1.9032672239441715e-05, "loss": 0.6646, "step": 14975 }, { "epoch": 2.444716542181952, "grad_norm": 1.4398552179336548, "learning_rate": 1.903253607431521e-05, "loss": 0.4165, "step": 14976 }, { "epoch": 2.4448798008244563, "grad_norm": 1.7296417951583862, "learning_rate": 1.9032399900092946e-05, "loss": 0.6142, "step": 14977 }, { "epoch": 2.4450430594669603, "grad_norm": 1.6350985765457153, "learning_rate": 1.9032263716775058e-05, "loss": 0.5574, "step": 14978 }, { "epoch": 2.4452063181094648, "grad_norm": 1.6692779064178467, "learning_rate": 1.903212752436168e-05, "loss": 0.4892, "step": 14979 }, { "epoch": 2.445369576751969, "grad_norm": 2.1317837238311768, "learning_rate": 1.9031991322852956e-05, "loss": 0.5617, "step": 14980 }, { "epoch": 2.4455328353944736, "grad_norm": 1.774700403213501, "learning_rate": 1.9031855112249016e-05, "loss": 0.5384, "step": 14981 }, { "epoch": 2.445696094036978, "grad_norm": 1.7266255617141724, "learning_rate": 1.9031718892550003e-05, "loss": 0.5124, "step": 14982 }, { "epoch": 2.4458593526794825, "grad_norm": 2.062485933303833, "learning_rate": 1.9031582663756048e-05, "loss": 0.6594, "step": 14983 }, { "epoch": 2.446022611321987, "grad_norm": 2.1809816360473633, "learning_rate": 1.9031446425867296e-05, "loss": 0.7766, "step": 14984 }, { "epoch": 2.4461858699644914, "grad_norm": 1.9755696058273315, "learning_rate": 1.9031310178883874e-05, "loss": 0.5768, "step": 14985 }, { "epoch": 2.446349128606996, "grad_norm": 1.7174519300460815, "learning_rate": 1.903117392280593e-05, "loss": 0.527, "step": 14986 }, { "epoch": 2.4465123872495003, "grad_norm": 1.7275046110153198, "learning_rate": 1.9031037657633594e-05, "loss": 0.4424, "step": 14987 }, { "epoch": 2.4466756458920043, "grad_norm": 1.404295563697815, "learning_rate": 1.9030901383367007e-05, "loss": 0.4538, "step": 14988 }, { "epoch": 2.4468389045345087, "grad_norm": 1.7492846250534058, "learning_rate": 1.9030765100006302e-05, "loss": 0.6055, "step": 14989 }, { "epoch": 2.447002163177013, "grad_norm": 1.8098983764648438, "learning_rate": 1.903062880755162e-05, "loss": 0.6113, "step": 14990 }, { "epoch": 2.4471654218195176, "grad_norm": 2.1297707557678223, "learning_rate": 1.90304925060031e-05, "loss": 0.6535, "step": 14991 }, { "epoch": 2.447328680462022, "grad_norm": 2.0867292881011963, "learning_rate": 1.9030356195360875e-05, "loss": 0.6314, "step": 14992 }, { "epoch": 2.4474919391045264, "grad_norm": 1.5578144788742065, "learning_rate": 1.903021987562508e-05, "loss": 0.5092, "step": 14993 }, { "epoch": 2.447655197747031, "grad_norm": 1.9064526557922363, "learning_rate": 1.903008354679586e-05, "loss": 0.6382, "step": 14994 }, { "epoch": 2.447818456389535, "grad_norm": 2.0602874755859375, "learning_rate": 1.902994720887335e-05, "loss": 0.6535, "step": 14995 }, { "epoch": 2.4479817150320393, "grad_norm": 1.568360686302185, "learning_rate": 1.902981086185768e-05, "loss": 0.6401, "step": 14996 }, { "epoch": 2.4481449736745438, "grad_norm": 1.6843199729919434, "learning_rate": 1.9029674505748998e-05, "loss": 0.5393, "step": 14997 }, { "epoch": 2.448308232317048, "grad_norm": 1.482163667678833, "learning_rate": 1.9029538140547434e-05, "loss": 0.4759, "step": 14998 }, { "epoch": 2.4484714909595526, "grad_norm": 1.6809319257736206, "learning_rate": 1.9029401766253127e-05, "loss": 0.492, "step": 14999 }, { "epoch": 2.448634749602057, "grad_norm": 1.6218905448913574, "learning_rate": 1.9029265382866216e-05, "loss": 0.4917, "step": 15000 }, { "epoch": 2.4487980082445615, "grad_norm": 1.687637209892273, "learning_rate": 1.9029128990386833e-05, "loss": 0.5065, "step": 15001 }, { "epoch": 2.448961266887066, "grad_norm": 2.033961057662964, "learning_rate": 1.9028992588815124e-05, "loss": 0.6625, "step": 15002 }, { "epoch": 2.4491245255295704, "grad_norm": 1.5695745944976807, "learning_rate": 1.9028856178151222e-05, "loss": 0.4937, "step": 15003 }, { "epoch": 2.449287784172075, "grad_norm": 1.7977287769317627, "learning_rate": 1.902871975839526e-05, "loss": 0.5313, "step": 15004 }, { "epoch": 2.449451042814579, "grad_norm": 1.871834397315979, "learning_rate": 1.9028583329547383e-05, "loss": 0.5812, "step": 15005 }, { "epoch": 2.4496143014570833, "grad_norm": 1.6361526250839233, "learning_rate": 1.9028446891607726e-05, "loss": 0.591, "step": 15006 }, { "epoch": 2.4497775600995877, "grad_norm": 1.6358771324157715, "learning_rate": 1.9028310444576423e-05, "loss": 0.5985, "step": 15007 }, { "epoch": 2.449940818742092, "grad_norm": 1.9050759077072144, "learning_rate": 1.9028173988453617e-05, "loss": 0.6309, "step": 15008 }, { "epoch": 2.4501040773845966, "grad_norm": 1.615654706954956, "learning_rate": 1.9028037523239437e-05, "loss": 0.5583, "step": 15009 }, { "epoch": 2.450267336027101, "grad_norm": 1.4890351295471191, "learning_rate": 1.902790104893403e-05, "loss": 0.5137, "step": 15010 }, { "epoch": 2.4504305946696054, "grad_norm": 2.1243813037872314, "learning_rate": 1.9027764565537525e-05, "loss": 0.6493, "step": 15011 }, { "epoch": 2.45059385331211, "grad_norm": 1.8206160068511963, "learning_rate": 1.9027628073050067e-05, "loss": 0.6266, "step": 15012 }, { "epoch": 2.450757111954614, "grad_norm": 1.9584447145462036, "learning_rate": 1.902749157147179e-05, "loss": 0.5887, "step": 15013 }, { "epoch": 2.4509203705971183, "grad_norm": 1.7082204818725586, "learning_rate": 1.902735506080283e-05, "loss": 0.5551, "step": 15014 }, { "epoch": 2.4510836292396228, "grad_norm": 1.491708517074585, "learning_rate": 1.9027218541043327e-05, "loss": 0.4827, "step": 15015 }, { "epoch": 2.451246887882127, "grad_norm": 1.8928724527359009, "learning_rate": 1.9027082012193416e-05, "loss": 0.5443, "step": 15016 }, { "epoch": 2.4514101465246316, "grad_norm": 1.6438833475112915, "learning_rate": 1.9026945474253234e-05, "loss": 0.6198, "step": 15017 }, { "epoch": 2.451573405167136, "grad_norm": 1.7827366590499878, "learning_rate": 1.9026808927222923e-05, "loss": 0.6596, "step": 15018 }, { "epoch": 2.4517366638096405, "grad_norm": 1.6143018007278442, "learning_rate": 1.902667237110262e-05, "loss": 0.5806, "step": 15019 }, { "epoch": 2.451899922452145, "grad_norm": 1.8547422885894775, "learning_rate": 1.9026535805892456e-05, "loss": 0.5297, "step": 15020 }, { "epoch": 2.4520631810946494, "grad_norm": 1.5370044708251953, "learning_rate": 1.9026399231592572e-05, "loss": 0.5152, "step": 15021 }, { "epoch": 2.452226439737154, "grad_norm": 1.8321579694747925, "learning_rate": 1.902626264820311e-05, "loss": 0.5173, "step": 15022 }, { "epoch": 2.452389698379658, "grad_norm": 1.5255614519119263, "learning_rate": 1.9026126055724202e-05, "loss": 0.5068, "step": 15023 }, { "epoch": 2.4525529570221622, "grad_norm": 1.5533872842788696, "learning_rate": 1.902598945415599e-05, "loss": 0.5514, "step": 15024 }, { "epoch": 2.4527162156646667, "grad_norm": 1.9138580560684204, "learning_rate": 1.902585284349861e-05, "loss": 0.6053, "step": 15025 }, { "epoch": 2.452879474307171, "grad_norm": 2.018965244293213, "learning_rate": 1.9025716223752193e-05, "loss": 0.6203, "step": 15026 }, { "epoch": 2.4530427329496756, "grad_norm": 1.786868929862976, "learning_rate": 1.9025579594916886e-05, "loss": 0.5624, "step": 15027 }, { "epoch": 2.45320599159218, "grad_norm": 1.641600489616394, "learning_rate": 1.902544295699282e-05, "loss": 0.5317, "step": 15028 }, { "epoch": 2.4533692502346844, "grad_norm": 1.9949982166290283, "learning_rate": 1.902530630998014e-05, "loss": 0.5588, "step": 15029 }, { "epoch": 2.453532508877189, "grad_norm": 1.7500920295715332, "learning_rate": 1.9025169653878973e-05, "loss": 0.6008, "step": 15030 }, { "epoch": 2.453695767519693, "grad_norm": 2.1440834999084473, "learning_rate": 1.9025032988689466e-05, "loss": 0.6528, "step": 15031 }, { "epoch": 2.4538590261621973, "grad_norm": 1.8911616802215576, "learning_rate": 1.9024896314411753e-05, "loss": 0.5395, "step": 15032 }, { "epoch": 2.4540222848047017, "grad_norm": 1.6172834634780884, "learning_rate": 1.9024759631045972e-05, "loss": 0.5412, "step": 15033 }, { "epoch": 2.454185543447206, "grad_norm": 1.9009448289871216, "learning_rate": 1.902462293859226e-05, "loss": 0.5641, "step": 15034 }, { "epoch": 2.4543488020897106, "grad_norm": 1.596114993095398, "learning_rate": 1.9024486237050755e-05, "loss": 0.4583, "step": 15035 }, { "epoch": 2.454512060732215, "grad_norm": 1.8259263038635254, "learning_rate": 1.9024349526421596e-05, "loss": 0.587, "step": 15036 }, { "epoch": 2.4546753193747195, "grad_norm": 2.1422815322875977, "learning_rate": 1.902421280670492e-05, "loss": 0.5244, "step": 15037 }, { "epoch": 2.454838578017224, "grad_norm": 1.8045785427093506, "learning_rate": 1.902407607790086e-05, "loss": 0.5979, "step": 15038 }, { "epoch": 2.4550018366597284, "grad_norm": 1.9214386940002441, "learning_rate": 1.9023939340009558e-05, "loss": 0.5193, "step": 15039 }, { "epoch": 2.4551650953022324, "grad_norm": 1.7562785148620605, "learning_rate": 1.9023802593031156e-05, "loss": 0.5341, "step": 15040 }, { "epoch": 2.455328353944737, "grad_norm": 1.5503790378570557, "learning_rate": 1.9023665836965784e-05, "loss": 0.5894, "step": 15041 }, { "epoch": 2.4554916125872412, "grad_norm": 2.0818519592285156, "learning_rate": 1.9023529071813582e-05, "loss": 0.53, "step": 15042 }, { "epoch": 2.4556548712297457, "grad_norm": 2.168790578842163, "learning_rate": 1.902339229757469e-05, "loss": 0.6292, "step": 15043 }, { "epoch": 2.45581812987225, "grad_norm": 1.5458272695541382, "learning_rate": 1.902325551424925e-05, "loss": 0.4547, "step": 15044 }, { "epoch": 2.4559813885147546, "grad_norm": 2.122361183166504, "learning_rate": 1.9023118721837385e-05, "loss": 0.6254, "step": 15045 }, { "epoch": 2.456144647157259, "grad_norm": 1.8119949102401733, "learning_rate": 1.9022981920339246e-05, "loss": 0.5938, "step": 15046 }, { "epoch": 2.4563079057997634, "grad_norm": 1.815977692604065, "learning_rate": 1.9022845109754965e-05, "loss": 0.4857, "step": 15047 }, { "epoch": 2.4564711644422674, "grad_norm": 1.611280083656311, "learning_rate": 1.9022708290084683e-05, "loss": 0.4708, "step": 15048 }, { "epoch": 2.456634423084772, "grad_norm": 2.2484607696533203, "learning_rate": 1.9022571461328536e-05, "loss": 0.6244, "step": 15049 }, { "epoch": 2.4567976817272763, "grad_norm": 1.7807224988937378, "learning_rate": 1.902243462348666e-05, "loss": 0.5949, "step": 15050 }, { "epoch": 2.4569609403697807, "grad_norm": 1.8739080429077148, "learning_rate": 1.90222977765592e-05, "loss": 0.5853, "step": 15051 }, { "epoch": 2.457124199012285, "grad_norm": 1.6931546926498413, "learning_rate": 1.9022160920546282e-05, "loss": 0.609, "step": 15052 }, { "epoch": 2.4572874576547896, "grad_norm": 1.7945717573165894, "learning_rate": 1.9022024055448055e-05, "loss": 0.6308, "step": 15053 }, { "epoch": 2.457450716297294, "grad_norm": 1.745080828666687, "learning_rate": 1.902188718126465e-05, "loss": 0.5564, "step": 15054 }, { "epoch": 2.4576139749397985, "grad_norm": 1.9419431686401367, "learning_rate": 1.9021750297996207e-05, "loss": 0.6249, "step": 15055 }, { "epoch": 2.457777233582303, "grad_norm": 1.8568979501724243, "learning_rate": 1.9021613405642865e-05, "loss": 0.6111, "step": 15056 }, { "epoch": 2.4579404922248074, "grad_norm": 1.5583370923995972, "learning_rate": 1.902147650420476e-05, "loss": 0.4446, "step": 15057 }, { "epoch": 2.4581037508673114, "grad_norm": 1.7760329246520996, "learning_rate": 1.902133959368203e-05, "loss": 0.5152, "step": 15058 }, { "epoch": 2.458267009509816, "grad_norm": 2.060270071029663, "learning_rate": 1.9021202674074812e-05, "loss": 0.6859, "step": 15059 }, { "epoch": 2.4584302681523202, "grad_norm": 2.0132858753204346, "learning_rate": 1.9021065745383248e-05, "loss": 0.6735, "step": 15060 }, { "epoch": 2.4585935267948247, "grad_norm": 1.5735855102539062, "learning_rate": 1.9020928807607473e-05, "loss": 0.5342, "step": 15061 }, { "epoch": 2.458756785437329, "grad_norm": 1.746121883392334, "learning_rate": 1.9020791860747625e-05, "loss": 0.4461, "step": 15062 }, { "epoch": 2.4589200440798336, "grad_norm": 2.1104133129119873, "learning_rate": 1.902065490480384e-05, "loss": 0.6795, "step": 15063 }, { "epoch": 2.459083302722338, "grad_norm": 2.395312547683716, "learning_rate": 1.902051793977626e-05, "loss": 0.7587, "step": 15064 }, { "epoch": 2.4592465613648424, "grad_norm": 1.2433826923370361, "learning_rate": 1.902038096566502e-05, "loss": 0.4197, "step": 15065 }, { "epoch": 2.4594098200073464, "grad_norm": 2.1077568531036377, "learning_rate": 1.9020243982470262e-05, "loss": 0.6682, "step": 15066 }, { "epoch": 2.459573078649851, "grad_norm": 1.9799845218658447, "learning_rate": 1.9020106990192114e-05, "loss": 0.6173, "step": 15067 }, { "epoch": 2.4597363372923553, "grad_norm": 1.9480339288711548, "learning_rate": 1.9019969988830728e-05, "loss": 0.5817, "step": 15068 }, { "epoch": 2.4598995959348597, "grad_norm": 2.01764178276062, "learning_rate": 1.9019832978386227e-05, "loss": 0.6017, "step": 15069 }, { "epoch": 2.460062854577364, "grad_norm": 1.6369704008102417, "learning_rate": 1.9019695958858762e-05, "loss": 0.5295, "step": 15070 }, { "epoch": 2.4602261132198686, "grad_norm": 1.9070568084716797, "learning_rate": 1.9019558930248464e-05, "loss": 0.4758, "step": 15071 }, { "epoch": 2.460389371862373, "grad_norm": 1.7836257219314575, "learning_rate": 1.9019421892555473e-05, "loss": 0.5111, "step": 15072 }, { "epoch": 2.4605526305048775, "grad_norm": 1.7310724258422852, "learning_rate": 1.9019284845779927e-05, "loss": 0.5942, "step": 15073 }, { "epoch": 2.460715889147382, "grad_norm": 2.0403265953063965, "learning_rate": 1.9019147789921965e-05, "loss": 0.6915, "step": 15074 }, { "epoch": 2.4608791477898864, "grad_norm": 2.352276086807251, "learning_rate": 1.9019010724981716e-05, "loss": 0.737, "step": 15075 }, { "epoch": 2.4610424064323904, "grad_norm": 1.679337978363037, "learning_rate": 1.9018873650959333e-05, "loss": 0.4336, "step": 15076 }, { "epoch": 2.461205665074895, "grad_norm": 2.0560288429260254, "learning_rate": 1.9018736567854943e-05, "loss": 0.7122, "step": 15077 }, { "epoch": 2.4613689237173992, "grad_norm": 1.8166484832763672, "learning_rate": 1.901859947566869e-05, "loss": 0.6747, "step": 15078 }, { "epoch": 2.4615321823599037, "grad_norm": 1.947636604309082, "learning_rate": 1.901846237440071e-05, "loss": 0.5478, "step": 15079 }, { "epoch": 2.461695441002408, "grad_norm": 1.780179500579834, "learning_rate": 1.901832526405114e-05, "loss": 0.5968, "step": 15080 }, { "epoch": 2.4618586996449126, "grad_norm": 1.8683199882507324, "learning_rate": 1.901818814462012e-05, "loss": 0.6298, "step": 15081 }, { "epoch": 2.462021958287417, "grad_norm": 1.784684658050537, "learning_rate": 1.9018051016107784e-05, "loss": 0.6407, "step": 15082 }, { "epoch": 2.462185216929921, "grad_norm": 1.8813763856887817, "learning_rate": 1.9017913878514274e-05, "loss": 0.6266, "step": 15083 }, { "epoch": 2.4623484755724254, "grad_norm": 1.8458768129348755, "learning_rate": 1.9017776731839726e-05, "loss": 0.6466, "step": 15084 }, { "epoch": 2.46251173421493, "grad_norm": 2.0297114849090576, "learning_rate": 1.901763957608428e-05, "loss": 0.6608, "step": 15085 }, { "epoch": 2.4626749928574343, "grad_norm": 1.710324764251709, "learning_rate": 1.9017502411248076e-05, "loss": 0.5125, "step": 15086 }, { "epoch": 2.4628382514999387, "grad_norm": 1.6096725463867188, "learning_rate": 1.9017365237331245e-05, "loss": 0.5301, "step": 15087 }, { "epoch": 2.463001510142443, "grad_norm": 1.76421320438385, "learning_rate": 1.9017228054333936e-05, "loss": 0.5355, "step": 15088 }, { "epoch": 2.4631647687849476, "grad_norm": 1.6556580066680908, "learning_rate": 1.9017090862256275e-05, "loss": 0.5233, "step": 15089 }, { "epoch": 2.463328027427452, "grad_norm": 1.7011058330535889, "learning_rate": 1.901695366109841e-05, "loss": 0.4933, "step": 15090 }, { "epoch": 2.4634912860699565, "grad_norm": 1.8047064542770386, "learning_rate": 1.9016816450860474e-05, "loss": 0.5453, "step": 15091 }, { "epoch": 2.463654544712461, "grad_norm": 1.6429277658462524, "learning_rate": 1.9016679231542602e-05, "loss": 0.5101, "step": 15092 }, { "epoch": 2.463817803354965, "grad_norm": 1.814264178276062, "learning_rate": 1.9016542003144943e-05, "loss": 0.611, "step": 15093 }, { "epoch": 2.4639810619974694, "grad_norm": 1.573014736175537, "learning_rate": 1.9016404765667624e-05, "loss": 0.4746, "step": 15094 }, { "epoch": 2.464144320639974, "grad_norm": 1.8303940296173096, "learning_rate": 1.901626751911079e-05, "loss": 0.6112, "step": 15095 }, { "epoch": 2.4643075792824782, "grad_norm": 1.7938103675842285, "learning_rate": 1.9016130263474573e-05, "loss": 0.613, "step": 15096 }, { "epoch": 2.4644708379249827, "grad_norm": 1.808402419090271, "learning_rate": 1.901599299875912e-05, "loss": 0.5745, "step": 15097 }, { "epoch": 2.464634096567487, "grad_norm": 1.6637253761291504, "learning_rate": 1.901585572496456e-05, "loss": 0.6278, "step": 15098 }, { "epoch": 2.4647973552099915, "grad_norm": 1.875270128250122, "learning_rate": 1.901571844209104e-05, "loss": 0.5814, "step": 15099 }, { "epoch": 2.464960613852496, "grad_norm": 1.7218828201293945, "learning_rate": 1.9015581150138693e-05, "loss": 0.5247, "step": 15100 }, { "epoch": 2.465123872495, "grad_norm": 1.9707529544830322, "learning_rate": 1.9015443849107655e-05, "loss": 0.6169, "step": 15101 }, { "epoch": 2.4652871311375044, "grad_norm": 1.5089385509490967, "learning_rate": 1.901530653899807e-05, "loss": 0.4602, "step": 15102 }, { "epoch": 2.465450389780009, "grad_norm": 1.6014118194580078, "learning_rate": 1.9015169219810073e-05, "loss": 0.4803, "step": 15103 }, { "epoch": 2.4656136484225133, "grad_norm": 1.8710399866104126, "learning_rate": 1.9015031891543805e-05, "loss": 0.6258, "step": 15104 }, { "epoch": 2.4657769070650177, "grad_norm": 2.395296335220337, "learning_rate": 1.90148945541994e-05, "loss": 0.4889, "step": 15105 }, { "epoch": 2.465940165707522, "grad_norm": 1.8598685264587402, "learning_rate": 1.9014757207776998e-05, "loss": 0.6156, "step": 15106 }, { "epoch": 2.4661034243500266, "grad_norm": 2.0251171588897705, "learning_rate": 1.901461985227674e-05, "loss": 0.7113, "step": 15107 }, { "epoch": 2.466266682992531, "grad_norm": 1.9036893844604492, "learning_rate": 1.9014482487698762e-05, "loss": 0.6309, "step": 15108 }, { "epoch": 2.4664299416350355, "grad_norm": 1.8295382261276245, "learning_rate": 1.9014345114043203e-05, "loss": 0.6087, "step": 15109 }, { "epoch": 2.46659320027754, "grad_norm": 1.6802847385406494, "learning_rate": 1.9014207731310202e-05, "loss": 0.5688, "step": 15110 }, { "epoch": 2.466756458920044, "grad_norm": 1.7458312511444092, "learning_rate": 1.901407033949989e-05, "loss": 0.4958, "step": 15111 }, { "epoch": 2.4669197175625484, "grad_norm": 2.0647404193878174, "learning_rate": 1.9013932938612417e-05, "loss": 0.6858, "step": 15112 }, { "epoch": 2.467082976205053, "grad_norm": 1.7774913311004639, "learning_rate": 1.9013795528647913e-05, "loss": 0.6144, "step": 15113 }, { "epoch": 2.4672462348475572, "grad_norm": 1.5004442930221558, "learning_rate": 1.9013658109606523e-05, "loss": 0.4796, "step": 15114 }, { "epoch": 2.4674094934900617, "grad_norm": 1.7838020324707031, "learning_rate": 1.901352068148838e-05, "loss": 0.5532, "step": 15115 }, { "epoch": 2.467572752132566, "grad_norm": 1.670720100402832, "learning_rate": 1.9013383244293623e-05, "loss": 0.5051, "step": 15116 }, { "epoch": 2.4677360107750705, "grad_norm": 1.9448769092559814, "learning_rate": 1.901324579802239e-05, "loss": 0.6159, "step": 15117 }, { "epoch": 2.467899269417575, "grad_norm": 1.898565411567688, "learning_rate": 1.9013108342674824e-05, "loss": 0.6134, "step": 15118 }, { "epoch": 2.468062528060079, "grad_norm": 1.9035621881484985, "learning_rate": 1.9012970878251062e-05, "loss": 0.5897, "step": 15119 }, { "epoch": 2.4682257867025834, "grad_norm": 1.769464135169983, "learning_rate": 1.9012833404751237e-05, "loss": 0.511, "step": 15120 }, { "epoch": 2.468389045345088, "grad_norm": 1.6830134391784668, "learning_rate": 1.901269592217549e-05, "loss": 0.5636, "step": 15121 }, { "epoch": 2.4685523039875923, "grad_norm": 1.6824496984481812, "learning_rate": 1.9012558430523964e-05, "loss": 0.6103, "step": 15122 }, { "epoch": 2.4687155626300967, "grad_norm": 1.6571964025497437, "learning_rate": 1.901242092979679e-05, "loss": 0.4837, "step": 15123 }, { "epoch": 2.468878821272601, "grad_norm": 1.5567094087600708, "learning_rate": 1.9012283419994115e-05, "loss": 0.478, "step": 15124 }, { "epoch": 2.4690420799151056, "grad_norm": 1.8253682851791382, "learning_rate": 1.9012145901116072e-05, "loss": 0.5947, "step": 15125 }, { "epoch": 2.46920533855761, "grad_norm": 1.6572198867797852, "learning_rate": 1.9012008373162796e-05, "loss": 0.5015, "step": 15126 }, { "epoch": 2.4693685972001145, "grad_norm": 1.8171839714050293, "learning_rate": 1.9011870836134437e-05, "loss": 0.5597, "step": 15127 }, { "epoch": 2.469531855842619, "grad_norm": 1.7802621126174927, "learning_rate": 1.901173329003112e-05, "loss": 0.5633, "step": 15128 }, { "epoch": 2.469695114485123, "grad_norm": 1.8197767734527588, "learning_rate": 1.9011595734852997e-05, "loss": 0.5921, "step": 15129 }, { "epoch": 2.4698583731276273, "grad_norm": 1.727607011795044, "learning_rate": 1.9011458170600195e-05, "loss": 0.4911, "step": 15130 }, { "epoch": 2.470021631770132, "grad_norm": 1.8526906967163086, "learning_rate": 1.9011320597272855e-05, "loss": 0.6218, "step": 15131 }, { "epoch": 2.4701848904126362, "grad_norm": 1.9520831108093262, "learning_rate": 1.9011183014871122e-05, "loss": 0.5625, "step": 15132 }, { "epoch": 2.4703481490551407, "grad_norm": 1.952811598777771, "learning_rate": 1.9011045423395126e-05, "loss": 0.6225, "step": 15133 }, { "epoch": 2.470511407697645, "grad_norm": 1.8954098224639893, "learning_rate": 1.9010907822845014e-05, "loss": 0.6284, "step": 15134 }, { "epoch": 2.4706746663401495, "grad_norm": 2.031651258468628, "learning_rate": 1.9010770213220916e-05, "loss": 0.7121, "step": 15135 }, { "epoch": 2.4708379249826535, "grad_norm": 1.8550819158554077, "learning_rate": 1.9010632594522978e-05, "loss": 0.5702, "step": 15136 }, { "epoch": 2.471001183625158, "grad_norm": 1.689990758895874, "learning_rate": 1.9010494966751334e-05, "loss": 0.5347, "step": 15137 }, { "epoch": 2.4711644422676624, "grad_norm": 1.8603922128677368, "learning_rate": 1.9010357329906125e-05, "loss": 0.5533, "step": 15138 }, { "epoch": 2.471327700910167, "grad_norm": 1.677438497543335, "learning_rate": 1.901021968398749e-05, "loss": 0.5382, "step": 15139 }, { "epoch": 2.4714909595526713, "grad_norm": 1.8486850261688232, "learning_rate": 1.901008202899556e-05, "loss": 0.5667, "step": 15140 }, { "epoch": 2.4716542181951757, "grad_norm": 1.883485198020935, "learning_rate": 1.9009944364930484e-05, "loss": 0.6736, "step": 15141 }, { "epoch": 2.47181747683768, "grad_norm": 1.6772841215133667, "learning_rate": 1.90098066917924e-05, "loss": 0.522, "step": 15142 }, { "epoch": 2.4719807354801846, "grad_norm": 1.6935551166534424, "learning_rate": 1.9009669009581437e-05, "loss": 0.5793, "step": 15143 }, { "epoch": 2.472143994122689, "grad_norm": 1.7868235111236572, "learning_rate": 1.900953131829774e-05, "loss": 0.4982, "step": 15144 }, { "epoch": 2.4723072527651935, "grad_norm": 2.2121779918670654, "learning_rate": 1.900939361794145e-05, "loss": 0.5456, "step": 15145 }, { "epoch": 2.4724705114076975, "grad_norm": 1.795838475227356, "learning_rate": 1.9009255908512704e-05, "loss": 0.7068, "step": 15146 }, { "epoch": 2.472633770050202, "grad_norm": 1.62517511844635, "learning_rate": 1.9009118190011638e-05, "loss": 0.57, "step": 15147 }, { "epoch": 2.4727970286927063, "grad_norm": 1.8500266075134277, "learning_rate": 1.900898046243839e-05, "loss": 0.5699, "step": 15148 }, { "epoch": 2.472960287335211, "grad_norm": 1.863038420677185, "learning_rate": 1.9008842725793105e-05, "loss": 0.6256, "step": 15149 }, { "epoch": 2.473123545977715, "grad_norm": 1.8703656196594238, "learning_rate": 1.9008704980075915e-05, "loss": 0.6495, "step": 15150 }, { "epoch": 2.4732868046202197, "grad_norm": 1.809712529182434, "learning_rate": 1.900856722528696e-05, "loss": 0.6218, "step": 15151 }, { "epoch": 2.473450063262724, "grad_norm": 1.9566702842712402, "learning_rate": 1.9008429461426384e-05, "loss": 0.56, "step": 15152 }, { "epoch": 2.4736133219052285, "grad_norm": 1.5516674518585205, "learning_rate": 1.9008291688494323e-05, "loss": 0.5836, "step": 15153 }, { "epoch": 2.4737765805477325, "grad_norm": 1.9827086925506592, "learning_rate": 1.9008153906490913e-05, "loss": 0.6463, "step": 15154 }, { "epoch": 2.473939839190237, "grad_norm": 1.7164757251739502, "learning_rate": 1.900801611541629e-05, "loss": 0.4705, "step": 15155 }, { "epoch": 2.4741030978327414, "grad_norm": 1.7821171283721924, "learning_rate": 1.9007878315270604e-05, "loss": 0.478, "step": 15156 }, { "epoch": 2.474266356475246, "grad_norm": 2.3209540843963623, "learning_rate": 1.9007740506053983e-05, "loss": 0.6818, "step": 15157 }, { "epoch": 2.4744296151177503, "grad_norm": 1.5763410329818726, "learning_rate": 1.900760268776657e-05, "loss": 0.5306, "step": 15158 }, { "epoch": 2.4745928737602547, "grad_norm": 2.298036813735962, "learning_rate": 1.9007464860408504e-05, "loss": 0.6126, "step": 15159 }, { "epoch": 2.474756132402759, "grad_norm": 2.6756398677825928, "learning_rate": 1.9007327023979924e-05, "loss": 0.6266, "step": 15160 }, { "epoch": 2.4749193910452636, "grad_norm": 1.3603636026382446, "learning_rate": 1.900718917848097e-05, "loss": 0.4576, "step": 15161 }, { "epoch": 2.475082649687768, "grad_norm": 1.667629361152649, "learning_rate": 1.900705132391177e-05, "loss": 0.5269, "step": 15162 }, { "epoch": 2.4752459083302725, "grad_norm": 1.5204596519470215, "learning_rate": 1.900691346027248e-05, "loss": 0.475, "step": 15163 }, { "epoch": 2.4754091669727765, "grad_norm": 1.5564417839050293, "learning_rate": 1.900677558756323e-05, "loss": 0.5074, "step": 15164 }, { "epoch": 2.475572425615281, "grad_norm": 1.821802020072937, "learning_rate": 1.9006637705784155e-05, "loss": 0.5144, "step": 15165 }, { "epoch": 2.4757356842577853, "grad_norm": 2.4843497276306152, "learning_rate": 1.9006499814935405e-05, "loss": 0.6783, "step": 15166 }, { "epoch": 2.47589894290029, "grad_norm": 1.717063546180725, "learning_rate": 1.9006361915017107e-05, "loss": 0.4608, "step": 15167 }, { "epoch": 2.476062201542794, "grad_norm": 1.9928948879241943, "learning_rate": 1.9006224006029404e-05, "loss": 0.662, "step": 15168 }, { "epoch": 2.4762254601852987, "grad_norm": 2.0073435306549072, "learning_rate": 1.9006086087972438e-05, "loss": 0.4893, "step": 15169 }, { "epoch": 2.476388718827803, "grad_norm": 2.0309934616088867, "learning_rate": 1.9005948160846347e-05, "loss": 0.5092, "step": 15170 }, { "epoch": 2.476551977470307, "grad_norm": 1.8023977279663086, "learning_rate": 1.9005810224651265e-05, "loss": 0.5427, "step": 15171 }, { "epoch": 2.4767152361128115, "grad_norm": 1.6927844285964966, "learning_rate": 1.900567227938734e-05, "loss": 0.486, "step": 15172 }, { "epoch": 2.476878494755316, "grad_norm": 1.8854275941848755, "learning_rate": 1.9005534325054703e-05, "loss": 0.5394, "step": 15173 }, { "epoch": 2.4770417533978204, "grad_norm": 1.5753405094146729, "learning_rate": 1.9005396361653492e-05, "loss": 0.5319, "step": 15174 }, { "epoch": 2.477205012040325, "grad_norm": 1.9788743257522583, "learning_rate": 1.900525838918385e-05, "loss": 0.6757, "step": 15175 }, { "epoch": 2.4773682706828293, "grad_norm": 1.8647326231002808, "learning_rate": 1.900512040764592e-05, "loss": 0.5689, "step": 15176 }, { "epoch": 2.4775315293253337, "grad_norm": 1.7844104766845703, "learning_rate": 1.9004982417039832e-05, "loss": 0.6098, "step": 15177 }, { "epoch": 2.477694787967838, "grad_norm": 2.419398069381714, "learning_rate": 1.900484441736573e-05, "loss": 0.6842, "step": 15178 }, { "epoch": 2.4778580466103426, "grad_norm": 2.2698049545288086, "learning_rate": 1.900470640862375e-05, "loss": 0.6117, "step": 15179 }, { "epoch": 2.478021305252847, "grad_norm": 1.9199270009994507, "learning_rate": 1.900456839081404e-05, "loss": 0.5222, "step": 15180 }, { "epoch": 2.478184563895351, "grad_norm": 1.8950188159942627, "learning_rate": 1.9004430363936724e-05, "loss": 0.5419, "step": 15181 }, { "epoch": 2.4783478225378555, "grad_norm": 1.6892343759536743, "learning_rate": 1.9004292327991952e-05, "loss": 0.5608, "step": 15182 }, { "epoch": 2.47851108118036, "grad_norm": 1.599321961402893, "learning_rate": 1.900415428297986e-05, "loss": 0.5418, "step": 15183 }, { "epoch": 2.4786743398228643, "grad_norm": 1.99397611618042, "learning_rate": 1.9004016228900588e-05, "loss": 0.6084, "step": 15184 }, { "epoch": 2.4788375984653688, "grad_norm": 1.6892507076263428, "learning_rate": 1.9003878165754274e-05, "loss": 0.6074, "step": 15185 }, { "epoch": 2.479000857107873, "grad_norm": 1.6449456214904785, "learning_rate": 1.9003740093541055e-05, "loss": 0.5475, "step": 15186 }, { "epoch": 2.4791641157503777, "grad_norm": 2.0181963443756104, "learning_rate": 1.9003602012261074e-05, "loss": 0.6803, "step": 15187 }, { "epoch": 2.479327374392882, "grad_norm": 1.7729406356811523, "learning_rate": 1.900346392191447e-05, "loss": 0.575, "step": 15188 }, { "epoch": 2.479490633035386, "grad_norm": 2.0336103439331055, "learning_rate": 1.900332582250138e-05, "loss": 0.6859, "step": 15189 }, { "epoch": 2.4796538916778905, "grad_norm": 1.8749920129776, "learning_rate": 1.9003187714021936e-05, "loss": 0.5831, "step": 15190 }, { "epoch": 2.479817150320395, "grad_norm": 1.7840384244918823, "learning_rate": 1.900304959647629e-05, "loss": 0.5962, "step": 15191 }, { "epoch": 2.4799804089628994, "grad_norm": 1.561258316040039, "learning_rate": 1.9002911469864576e-05, "loss": 0.5054, "step": 15192 }, { "epoch": 2.480143667605404, "grad_norm": 1.976454496383667, "learning_rate": 1.900277333418693e-05, "loss": 0.7426, "step": 15193 }, { "epoch": 2.4803069262479083, "grad_norm": 1.9106090068817139, "learning_rate": 1.90026351894435e-05, "loss": 0.6512, "step": 15194 }, { "epoch": 2.4804701848904127, "grad_norm": 1.602236270904541, "learning_rate": 1.900249703563441e-05, "loss": 0.581, "step": 15195 }, { "epoch": 2.480633443532917, "grad_norm": 1.7835651636123657, "learning_rate": 1.9002358872759813e-05, "loss": 0.5008, "step": 15196 }, { "epoch": 2.4807967021754216, "grad_norm": 1.5464705228805542, "learning_rate": 1.900222070081984e-05, "loss": 0.4333, "step": 15197 }, { "epoch": 2.480959960817926, "grad_norm": 2.0875637531280518, "learning_rate": 1.9002082519814636e-05, "loss": 0.5532, "step": 15198 }, { "epoch": 2.48112321946043, "grad_norm": 1.9468439817428589, "learning_rate": 1.9001944329744335e-05, "loss": 0.5839, "step": 15199 }, { "epoch": 2.4812864781029345, "grad_norm": 1.879669189453125, "learning_rate": 1.900180613060908e-05, "loss": 0.5882, "step": 15200 }, { "epoch": 2.481449736745439, "grad_norm": 1.751906394958496, "learning_rate": 1.9001667922409008e-05, "loss": 0.5233, "step": 15201 }, { "epoch": 2.4816129953879433, "grad_norm": 1.9696546792984009, "learning_rate": 1.900152970514426e-05, "loss": 0.5081, "step": 15202 }, { "epoch": 2.4817762540304478, "grad_norm": 1.6874808073043823, "learning_rate": 1.900139147881497e-05, "loss": 0.5831, "step": 15203 }, { "epoch": 2.481939512672952, "grad_norm": 1.6555726528167725, "learning_rate": 1.9001253243421286e-05, "loss": 0.537, "step": 15204 }, { "epoch": 2.4821027713154566, "grad_norm": 1.921563982963562, "learning_rate": 1.900111499896334e-05, "loss": 0.5775, "step": 15205 }, { "epoch": 2.482266029957961, "grad_norm": 1.6779253482818604, "learning_rate": 1.9000976745441277e-05, "loss": 0.6254, "step": 15206 }, { "epoch": 2.482429288600465, "grad_norm": 1.539422631263733, "learning_rate": 1.9000838482855228e-05, "loss": 0.5699, "step": 15207 }, { "epoch": 2.4825925472429695, "grad_norm": 1.6548807621002197, "learning_rate": 1.9000700211205337e-05, "loss": 0.5166, "step": 15208 }, { "epoch": 2.482755805885474, "grad_norm": 1.6303443908691406, "learning_rate": 1.9000561930491746e-05, "loss": 0.5526, "step": 15209 }, { "epoch": 2.4829190645279784, "grad_norm": 2.0284645557403564, "learning_rate": 1.9000423640714595e-05, "loss": 0.5512, "step": 15210 }, { "epoch": 2.483082323170483, "grad_norm": 1.4915478229522705, "learning_rate": 1.9000285341874014e-05, "loss": 0.4477, "step": 15211 }, { "epoch": 2.4832455818129873, "grad_norm": 1.590324878692627, "learning_rate": 1.9000147033970148e-05, "loss": 0.5298, "step": 15212 }, { "epoch": 2.4834088404554917, "grad_norm": 1.695208191871643, "learning_rate": 1.9000008717003137e-05, "loss": 0.515, "step": 15213 }, { "epoch": 2.483572099097996, "grad_norm": 1.4615570306777954, "learning_rate": 1.899987039097312e-05, "loss": 0.4946, "step": 15214 }, { "epoch": 2.4837353577405006, "grad_norm": 1.7002546787261963, "learning_rate": 1.899973205588024e-05, "loss": 0.5536, "step": 15215 }, { "epoch": 2.483898616383005, "grad_norm": 1.7983248233795166, "learning_rate": 1.8999593711724626e-05, "loss": 0.5377, "step": 15216 }, { "epoch": 2.484061875025509, "grad_norm": 1.9844273328781128, "learning_rate": 1.8999455358506427e-05, "loss": 0.6496, "step": 15217 }, { "epoch": 2.4842251336680135, "grad_norm": 1.8883588314056396, "learning_rate": 1.899931699622578e-05, "loss": 0.6048, "step": 15218 }, { "epoch": 2.484388392310518, "grad_norm": 1.7291942834854126, "learning_rate": 1.8999178624882818e-05, "loss": 0.5524, "step": 15219 }, { "epoch": 2.4845516509530223, "grad_norm": 1.7158102989196777, "learning_rate": 1.899904024447769e-05, "loss": 0.5377, "step": 15220 }, { "epoch": 2.4847149095955268, "grad_norm": 2.1454694271087646, "learning_rate": 1.899890185501053e-05, "loss": 0.6982, "step": 15221 }, { "epoch": 2.484878168238031, "grad_norm": 1.630250096321106, "learning_rate": 1.899876345648148e-05, "loss": 0.5719, "step": 15222 }, { "epoch": 2.4850414268805356, "grad_norm": 1.9126834869384766, "learning_rate": 1.8998625048890674e-05, "loss": 0.6892, "step": 15223 }, { "epoch": 2.4852046855230396, "grad_norm": 1.623711347579956, "learning_rate": 1.8998486632238256e-05, "loss": 0.5224, "step": 15224 }, { "epoch": 2.485367944165544, "grad_norm": 1.9641931056976318, "learning_rate": 1.8998348206524365e-05, "loss": 0.6337, "step": 15225 }, { "epoch": 2.4855312028080485, "grad_norm": 1.6714107990264893, "learning_rate": 1.899820977174914e-05, "loss": 0.4543, "step": 15226 }, { "epoch": 2.485694461450553, "grad_norm": 2.3408260345458984, "learning_rate": 1.899807132791272e-05, "loss": 0.7726, "step": 15227 }, { "epoch": 2.4858577200930574, "grad_norm": 1.7402749061584473, "learning_rate": 1.899793287501524e-05, "loss": 0.5057, "step": 15228 }, { "epoch": 2.486020978735562, "grad_norm": 1.8257228136062622, "learning_rate": 1.899779441305685e-05, "loss": 0.5099, "step": 15229 }, { "epoch": 2.4861842373780663, "grad_norm": 1.818774700164795, "learning_rate": 1.899765594203768e-05, "loss": 0.5561, "step": 15230 }, { "epoch": 2.4863474960205707, "grad_norm": 2.010359764099121, "learning_rate": 1.8997517461957877e-05, "loss": 0.6246, "step": 15231 }, { "epoch": 2.486510754663075, "grad_norm": 1.5944195985794067, "learning_rate": 1.8997378972817572e-05, "loss": 0.5231, "step": 15232 }, { "epoch": 2.4866740133055796, "grad_norm": 1.987410545349121, "learning_rate": 1.8997240474616912e-05, "loss": 0.7832, "step": 15233 }, { "epoch": 2.4868372719480836, "grad_norm": 1.8529878854751587, "learning_rate": 1.899710196735603e-05, "loss": 0.6229, "step": 15234 }, { "epoch": 2.487000530590588, "grad_norm": 1.608444333076477, "learning_rate": 1.899696345103507e-05, "loss": 0.4981, "step": 15235 }, { "epoch": 2.4871637892330924, "grad_norm": 1.7475303411483765, "learning_rate": 1.8996824925654172e-05, "loss": 0.5922, "step": 15236 }, { "epoch": 2.487327047875597, "grad_norm": 1.798302173614502, "learning_rate": 1.8996686391213472e-05, "loss": 0.5363, "step": 15237 }, { "epoch": 2.4874903065181013, "grad_norm": 2.215925693511963, "learning_rate": 1.899654784771311e-05, "loss": 0.6162, "step": 15238 }, { "epoch": 2.4876535651606058, "grad_norm": 1.8501368761062622, "learning_rate": 1.899640929515323e-05, "loss": 0.5736, "step": 15239 }, { "epoch": 2.48781682380311, "grad_norm": 1.763005018234253, "learning_rate": 1.8996270733533965e-05, "loss": 0.6036, "step": 15240 }, { "epoch": 2.4879800824456146, "grad_norm": 1.5302706956863403, "learning_rate": 1.899613216285546e-05, "loss": 0.5074, "step": 15241 }, { "epoch": 2.4881433410881186, "grad_norm": 1.887856364250183, "learning_rate": 1.899599358311785e-05, "loss": 0.615, "step": 15242 }, { "epoch": 2.488306599730623, "grad_norm": 2.036198616027832, "learning_rate": 1.899585499432128e-05, "loss": 0.631, "step": 15243 }, { "epoch": 2.4884698583731275, "grad_norm": 1.6444265842437744, "learning_rate": 1.8995716396465886e-05, "loss": 0.5222, "step": 15244 }, { "epoch": 2.488633117015632, "grad_norm": 1.5952168703079224, "learning_rate": 1.8995577789551806e-05, "loss": 0.4757, "step": 15245 }, { "epoch": 2.4887963756581364, "grad_norm": 1.681412935256958, "learning_rate": 1.8995439173579183e-05, "loss": 0.5971, "step": 15246 }, { "epoch": 2.488959634300641, "grad_norm": 1.9150434732437134, "learning_rate": 1.8995300548548153e-05, "loss": 0.622, "step": 15247 }, { "epoch": 2.4891228929431453, "grad_norm": 1.6267591714859009, "learning_rate": 1.8995161914458858e-05, "loss": 0.5587, "step": 15248 }, { "epoch": 2.4892861515856497, "grad_norm": 1.592461347579956, "learning_rate": 1.899502327131144e-05, "loss": 0.4647, "step": 15249 }, { "epoch": 2.489449410228154, "grad_norm": 2.1651246547698975, "learning_rate": 1.8994884619106034e-05, "loss": 0.503, "step": 15250 }, { "epoch": 2.4896126688706586, "grad_norm": 1.6857830286026, "learning_rate": 1.8994745957842783e-05, "loss": 0.5857, "step": 15251 }, { "epoch": 2.4897759275131626, "grad_norm": 2.2691988945007324, "learning_rate": 1.8994607287521822e-05, "loss": 0.7148, "step": 15252 }, { "epoch": 2.489939186155667, "grad_norm": 1.685960054397583, "learning_rate": 1.8994468608143295e-05, "loss": 0.5004, "step": 15253 }, { "epoch": 2.4901024447981714, "grad_norm": 2.007960557937622, "learning_rate": 1.8994329919707342e-05, "loss": 0.5989, "step": 15254 }, { "epoch": 2.490265703440676, "grad_norm": 1.7944884300231934, "learning_rate": 1.89941912222141e-05, "loss": 0.6293, "step": 15255 }, { "epoch": 2.4904289620831803, "grad_norm": 1.9970473051071167, "learning_rate": 1.899405251566371e-05, "loss": 0.571, "step": 15256 }, { "epoch": 2.4905922207256848, "grad_norm": 1.4883344173431396, "learning_rate": 1.8993913800056314e-05, "loss": 0.468, "step": 15257 }, { "epoch": 2.490755479368189, "grad_norm": 2.1223599910736084, "learning_rate": 1.8993775075392047e-05, "loss": 0.7051, "step": 15258 }, { "epoch": 2.4909187380106936, "grad_norm": 1.7278823852539062, "learning_rate": 1.899363634167105e-05, "loss": 0.5523, "step": 15259 }, { "epoch": 2.4910819966531976, "grad_norm": 1.8149518966674805, "learning_rate": 1.8993497598893465e-05, "loss": 0.5478, "step": 15260 }, { "epoch": 2.491245255295702, "grad_norm": 1.7363868951797485, "learning_rate": 1.899335884705943e-05, "loss": 0.4921, "step": 15261 }, { "epoch": 2.4914085139382065, "grad_norm": 1.8275532722473145, "learning_rate": 1.8993220086169083e-05, "loss": 0.5882, "step": 15262 }, { "epoch": 2.491571772580711, "grad_norm": 1.6526087522506714, "learning_rate": 1.899308131622257e-05, "loss": 0.5515, "step": 15263 }, { "epoch": 2.4917350312232154, "grad_norm": 1.7846477031707764, "learning_rate": 1.8992942537220026e-05, "loss": 0.6172, "step": 15264 }, { "epoch": 2.49189828986572, "grad_norm": 1.6031692028045654, "learning_rate": 1.8992803749161587e-05, "loss": 0.5059, "step": 15265 }, { "epoch": 2.4920615485082243, "grad_norm": 1.8497732877731323, "learning_rate": 1.89926649520474e-05, "loss": 0.6631, "step": 15266 }, { "epoch": 2.4922248071507287, "grad_norm": 1.8849421739578247, "learning_rate": 1.8992526145877603e-05, "loss": 0.6304, "step": 15267 }, { "epoch": 2.492388065793233, "grad_norm": 1.8591523170471191, "learning_rate": 1.899238733065233e-05, "loss": 0.5659, "step": 15268 }, { "epoch": 2.492551324435737, "grad_norm": 1.6644717454910278, "learning_rate": 1.899224850637173e-05, "loss": 0.4635, "step": 15269 }, { "epoch": 2.4927145830782416, "grad_norm": 1.6555181741714478, "learning_rate": 1.8992109673035936e-05, "loss": 0.6116, "step": 15270 }, { "epoch": 2.492877841720746, "grad_norm": 1.6697585582733154, "learning_rate": 1.899197083064509e-05, "loss": 0.5205, "step": 15271 }, { "epoch": 2.4930411003632504, "grad_norm": 1.789363145828247, "learning_rate": 1.8991831979199335e-05, "loss": 0.5597, "step": 15272 }, { "epoch": 2.493204359005755, "grad_norm": 2.005833148956299, "learning_rate": 1.8991693118698803e-05, "loss": 0.5309, "step": 15273 }, { "epoch": 2.4933676176482593, "grad_norm": 1.5895578861236572, "learning_rate": 1.899155424914364e-05, "loss": 0.5387, "step": 15274 }, { "epoch": 2.4935308762907638, "grad_norm": 1.6257562637329102, "learning_rate": 1.8991415370533984e-05, "loss": 0.4927, "step": 15275 }, { "epoch": 2.493694134933268, "grad_norm": 1.803202748298645, "learning_rate": 1.8991276482869976e-05, "loss": 0.5953, "step": 15276 }, { "epoch": 2.493857393575772, "grad_norm": 1.58625328540802, "learning_rate": 1.8991137586151753e-05, "loss": 0.4788, "step": 15277 }, { "epoch": 2.4940206522182766, "grad_norm": 2.1425929069519043, "learning_rate": 1.8990998680379458e-05, "loss": 0.6907, "step": 15278 }, { "epoch": 2.494183910860781, "grad_norm": 1.7461820840835571, "learning_rate": 1.899085976555323e-05, "loss": 0.6068, "step": 15279 }, { "epoch": 2.4943471695032855, "grad_norm": 1.4905539751052856, "learning_rate": 1.8990720841673206e-05, "loss": 0.4801, "step": 15280 }, { "epoch": 2.49451042814579, "grad_norm": 1.4298346042633057, "learning_rate": 1.8990581908739533e-05, "loss": 0.4701, "step": 15281 }, { "epoch": 2.4946736867882944, "grad_norm": 1.7368241548538208, "learning_rate": 1.8990442966752345e-05, "loss": 0.5527, "step": 15282 }, { "epoch": 2.494836945430799, "grad_norm": 1.7311266660690308, "learning_rate": 1.899030401571178e-05, "loss": 0.5393, "step": 15283 }, { "epoch": 2.4950002040733033, "grad_norm": 2.0342414379119873, "learning_rate": 1.8990165055617987e-05, "loss": 0.7085, "step": 15284 }, { "epoch": 2.4951634627158077, "grad_norm": 1.289905309677124, "learning_rate": 1.8990026086471097e-05, "loss": 0.4247, "step": 15285 }, { "epoch": 2.495326721358312, "grad_norm": 1.7010374069213867, "learning_rate": 1.8989887108271255e-05, "loss": 0.5278, "step": 15286 }, { "epoch": 2.495489980000816, "grad_norm": 1.576651930809021, "learning_rate": 1.8989748121018596e-05, "loss": 0.4877, "step": 15287 }, { "epoch": 2.4956532386433206, "grad_norm": 4.0991716384887695, "learning_rate": 1.8989609124713265e-05, "loss": 0.5511, "step": 15288 }, { "epoch": 2.495816497285825, "grad_norm": 1.574345350265503, "learning_rate": 1.89894701193554e-05, "loss": 0.4738, "step": 15289 }, { "epoch": 2.4959797559283294, "grad_norm": 1.840479850769043, "learning_rate": 1.8989331104945135e-05, "loss": 0.5494, "step": 15290 }, { "epoch": 2.496143014570834, "grad_norm": 1.7220674753189087, "learning_rate": 1.8989192081482627e-05, "loss": 0.5814, "step": 15291 }, { "epoch": 2.4963062732133383, "grad_norm": 1.5821717977523804, "learning_rate": 1.8989053048967997e-05, "loss": 0.5492, "step": 15292 }, { "epoch": 2.4964695318558427, "grad_norm": 1.813076376914978, "learning_rate": 1.8988914007401393e-05, "loss": 0.4876, "step": 15293 }, { "epoch": 2.496632790498347, "grad_norm": 1.4857866764068604, "learning_rate": 1.898877495678296e-05, "loss": 0.4909, "step": 15294 }, { "epoch": 2.496796049140851, "grad_norm": 1.3186414241790771, "learning_rate": 1.898863589711283e-05, "loss": 0.4719, "step": 15295 }, { "epoch": 2.4969593077833556, "grad_norm": 2.0170066356658936, "learning_rate": 1.8988496828391147e-05, "loss": 0.5521, "step": 15296 }, { "epoch": 2.49712256642586, "grad_norm": 1.9184571504592896, "learning_rate": 1.8988357750618048e-05, "loss": 0.5693, "step": 15297 }, { "epoch": 2.4972858250683645, "grad_norm": 1.8366007804870605, "learning_rate": 1.8988218663793678e-05, "loss": 0.5973, "step": 15298 }, { "epoch": 2.497449083710869, "grad_norm": 1.6250534057617188, "learning_rate": 1.898807956791817e-05, "loss": 0.4282, "step": 15299 }, { "epoch": 2.4976123423533734, "grad_norm": 2.224246025085449, "learning_rate": 1.8987940462991673e-05, "loss": 0.6863, "step": 15300 }, { "epoch": 2.497775600995878, "grad_norm": 1.6314862966537476, "learning_rate": 1.898780134901432e-05, "loss": 0.5315, "step": 15301 }, { "epoch": 2.4979388596383822, "grad_norm": 1.4989501237869263, "learning_rate": 1.8987662225986253e-05, "loss": 0.4616, "step": 15302 }, { "epoch": 2.4981021182808867, "grad_norm": 2.0461878776550293, "learning_rate": 1.8987523093907612e-05, "loss": 0.6376, "step": 15303 }, { "epoch": 2.498265376923391, "grad_norm": 1.9393723011016846, "learning_rate": 1.898738395277854e-05, "loss": 0.679, "step": 15304 }, { "epoch": 2.498428635565895, "grad_norm": 2.1441428661346436, "learning_rate": 1.8987244802599175e-05, "loss": 0.5527, "step": 15305 }, { "epoch": 2.4985918942083996, "grad_norm": 2.1467645168304443, "learning_rate": 1.8987105643369652e-05, "loss": 0.762, "step": 15306 }, { "epoch": 2.498755152850904, "grad_norm": 1.7199220657348633, "learning_rate": 1.898696647509012e-05, "loss": 0.5999, "step": 15307 }, { "epoch": 2.4989184114934084, "grad_norm": 1.9264482259750366, "learning_rate": 1.8986827297760714e-05, "loss": 0.6411, "step": 15308 }, { "epoch": 2.499081670135913, "grad_norm": 1.6712530851364136, "learning_rate": 1.8986688111381576e-05, "loss": 0.5775, "step": 15309 }, { "epoch": 2.4992449287784173, "grad_norm": 1.4681607484817505, "learning_rate": 1.8986548915952843e-05, "loss": 0.4458, "step": 15310 }, { "epoch": 2.4994081874209217, "grad_norm": 1.9930959939956665, "learning_rate": 1.8986409711474665e-05, "loss": 0.5564, "step": 15311 }, { "epoch": 2.4995714460634257, "grad_norm": 1.7959718704223633, "learning_rate": 1.898627049794717e-05, "loss": 0.5402, "step": 15312 }, { "epoch": 2.49973470470593, "grad_norm": 1.711512804031372, "learning_rate": 1.8986131275370502e-05, "loss": 0.5727, "step": 15313 }, { "epoch": 2.4998979633484346, "grad_norm": 1.5735743045806885, "learning_rate": 1.8985992043744802e-05, "loss": 0.5444, "step": 15314 }, { "epoch": 2.500061221990939, "grad_norm": 1.7886338233947754, "learning_rate": 1.8985852803070212e-05, "loss": 0.567, "step": 15315 }, { "epoch": 2.5002244806334435, "grad_norm": 1.5958024263381958, "learning_rate": 1.8985713553346867e-05, "loss": 0.5049, "step": 15316 }, { "epoch": 2.500387739275948, "grad_norm": 1.9063758850097656, "learning_rate": 1.8985574294574917e-05, "loss": 0.553, "step": 15317 }, { "epoch": 2.5005509979184524, "grad_norm": 1.4029306173324585, "learning_rate": 1.898543502675449e-05, "loss": 0.4095, "step": 15318 }, { "epoch": 2.500714256560957, "grad_norm": 1.8453274965286255, "learning_rate": 1.8985295749885738e-05, "loss": 0.6256, "step": 15319 }, { "epoch": 2.5008775152034612, "grad_norm": 1.4727449417114258, "learning_rate": 1.898515646396879e-05, "loss": 0.5519, "step": 15320 }, { "epoch": 2.5010407738459657, "grad_norm": 2.1756129264831543, "learning_rate": 1.8985017169003797e-05, "loss": 0.7058, "step": 15321 }, { "epoch": 2.50120403248847, "grad_norm": 1.8086318969726562, "learning_rate": 1.8984877864990888e-05, "loss": 0.4992, "step": 15322 }, { "epoch": 2.501367291130974, "grad_norm": 1.976905345916748, "learning_rate": 1.8984738551930214e-05, "loss": 0.6073, "step": 15323 }, { "epoch": 2.5015305497734786, "grad_norm": 1.7100523710250854, "learning_rate": 1.898459922982191e-05, "loss": 0.4957, "step": 15324 }, { "epoch": 2.501693808415983, "grad_norm": 2.042928695678711, "learning_rate": 1.8984459898666116e-05, "loss": 0.6483, "step": 15325 }, { "epoch": 2.5018570670584874, "grad_norm": 2.088162422180176, "learning_rate": 1.8984320558462976e-05, "loss": 0.6257, "step": 15326 }, { "epoch": 2.502020325700992, "grad_norm": 1.781080961227417, "learning_rate": 1.8984181209212623e-05, "loss": 0.5795, "step": 15327 }, { "epoch": 2.5021835843434963, "grad_norm": 2.4284236431121826, "learning_rate": 1.8984041850915206e-05, "loss": 0.687, "step": 15328 }, { "epoch": 2.5023468429860003, "grad_norm": 1.6588951349258423, "learning_rate": 1.898390248357086e-05, "loss": 0.4768, "step": 15329 }, { "epoch": 2.5025101016285047, "grad_norm": 1.8674017190933228, "learning_rate": 1.8983763107179726e-05, "loss": 0.6167, "step": 15330 }, { "epoch": 2.502673360271009, "grad_norm": 1.909936547279358, "learning_rate": 1.8983623721741943e-05, "loss": 0.5807, "step": 15331 }, { "epoch": 2.5028366189135136, "grad_norm": 1.4796059131622314, "learning_rate": 1.898348432725766e-05, "loss": 0.5099, "step": 15332 }, { "epoch": 2.502999877556018, "grad_norm": 1.7317863702774048, "learning_rate": 1.8983344923727002e-05, "loss": 0.5628, "step": 15333 }, { "epoch": 2.5031631361985225, "grad_norm": 1.5917835235595703, "learning_rate": 1.898320551115012e-05, "loss": 0.5213, "step": 15334 }, { "epoch": 2.503326394841027, "grad_norm": 1.8672573566436768, "learning_rate": 1.898306608952716e-05, "loss": 0.5514, "step": 15335 }, { "epoch": 2.5034896534835314, "grad_norm": 2.1263020038604736, "learning_rate": 1.8982926658858248e-05, "loss": 0.5521, "step": 15336 }, { "epoch": 2.503652912126036, "grad_norm": 1.7098876237869263, "learning_rate": 1.8982787219143532e-05, "loss": 0.5029, "step": 15337 }, { "epoch": 2.5038161707685402, "grad_norm": 1.8353744745254517, "learning_rate": 1.8982647770383152e-05, "loss": 0.5267, "step": 15338 }, { "epoch": 2.5039794294110447, "grad_norm": 2.0243725776672363, "learning_rate": 1.8982508312577247e-05, "loss": 0.7436, "step": 15339 }, { "epoch": 2.5041426880535487, "grad_norm": 1.8983629941940308, "learning_rate": 1.898236884572596e-05, "loss": 0.5658, "step": 15340 }, { "epoch": 2.504305946696053, "grad_norm": 1.69605553150177, "learning_rate": 1.898222936982943e-05, "loss": 0.5125, "step": 15341 }, { "epoch": 2.5044692053385575, "grad_norm": 1.5911725759506226, "learning_rate": 1.8982089884887797e-05, "loss": 0.4691, "step": 15342 }, { "epoch": 2.504632463981062, "grad_norm": 1.9559563398361206, "learning_rate": 1.89819503909012e-05, "loss": 0.6164, "step": 15343 }, { "epoch": 2.5047957226235664, "grad_norm": 1.741892695426941, "learning_rate": 1.8981810887869784e-05, "loss": 0.5469, "step": 15344 }, { "epoch": 2.504958981266071, "grad_norm": 1.7450881004333496, "learning_rate": 1.8981671375793688e-05, "loss": 0.5759, "step": 15345 }, { "epoch": 2.5051222399085753, "grad_norm": 1.718074917793274, "learning_rate": 1.8981531854673048e-05, "loss": 0.5642, "step": 15346 }, { "epoch": 2.5052854985510793, "grad_norm": 1.8150519132614136, "learning_rate": 1.898139232450801e-05, "loss": 0.554, "step": 15347 }, { "epoch": 2.5054487571935837, "grad_norm": 1.5116610527038574, "learning_rate": 1.8981252785298712e-05, "loss": 0.4169, "step": 15348 }, { "epoch": 2.505612015836088, "grad_norm": 1.7404770851135254, "learning_rate": 1.8981113237045297e-05, "loss": 0.5721, "step": 15349 }, { "epoch": 2.5057752744785926, "grad_norm": 2.4761667251586914, "learning_rate": 1.8980973679747897e-05, "loss": 0.6561, "step": 15350 }, { "epoch": 2.505938533121097, "grad_norm": 1.6084812879562378, "learning_rate": 1.8980834113406666e-05, "loss": 0.4764, "step": 15351 }, { "epoch": 2.5061017917636015, "grad_norm": 1.7925325632095337, "learning_rate": 1.8980694538021735e-05, "loss": 0.6335, "step": 15352 }, { "epoch": 2.506265050406106, "grad_norm": 1.8174947500228882, "learning_rate": 1.8980554953593243e-05, "loss": 0.5407, "step": 15353 }, { "epoch": 2.5064283090486104, "grad_norm": 2.1591715812683105, "learning_rate": 1.898041536012134e-05, "loss": 0.6912, "step": 15354 }, { "epoch": 2.506591567691115, "grad_norm": 1.8550106287002563, "learning_rate": 1.8980275757606157e-05, "loss": 0.5265, "step": 15355 }, { "epoch": 2.5067548263336192, "grad_norm": 1.8440481424331665, "learning_rate": 1.8980136146047843e-05, "loss": 0.6039, "step": 15356 }, { "epoch": 2.5069180849761237, "grad_norm": 1.8630213737487793, "learning_rate": 1.897999652544653e-05, "loss": 0.5567, "step": 15357 }, { "epoch": 2.5070813436186277, "grad_norm": 1.8569464683532715, "learning_rate": 1.8979856895802364e-05, "loss": 0.6151, "step": 15358 }, { "epoch": 2.507244602261132, "grad_norm": 1.953184962272644, "learning_rate": 1.8979717257115483e-05, "loss": 0.5485, "step": 15359 }, { "epoch": 2.5074078609036365, "grad_norm": 1.8059511184692383, "learning_rate": 1.8979577609386033e-05, "loss": 0.6601, "step": 15360 }, { "epoch": 2.507571119546141, "grad_norm": 2.0469164848327637, "learning_rate": 1.8979437952614152e-05, "loss": 0.5971, "step": 15361 }, { "epoch": 2.5077343781886454, "grad_norm": 2.0267255306243896, "learning_rate": 1.8979298286799973e-05, "loss": 0.5241, "step": 15362 }, { "epoch": 2.50789763683115, "grad_norm": 1.7859468460083008, "learning_rate": 1.8979158611943647e-05, "loss": 0.5398, "step": 15363 }, { "epoch": 2.5080608954736543, "grad_norm": 1.7014647722244263, "learning_rate": 1.897901892804531e-05, "loss": 0.5599, "step": 15364 }, { "epoch": 2.5082241541161583, "grad_norm": 1.8629844188690186, "learning_rate": 1.8978879235105102e-05, "loss": 0.523, "step": 15365 }, { "epoch": 2.5083874127586627, "grad_norm": 1.5623042583465576, "learning_rate": 1.897873953312317e-05, "loss": 0.559, "step": 15366 }, { "epoch": 2.508550671401167, "grad_norm": 1.8575050830841064, "learning_rate": 1.8978599822099647e-05, "loss": 0.5539, "step": 15367 }, { "epoch": 2.5087139300436716, "grad_norm": 1.934207797050476, "learning_rate": 1.897846010203467e-05, "loss": 0.6537, "step": 15368 }, { "epoch": 2.508877188686176, "grad_norm": 2.8371829986572266, "learning_rate": 1.8978320372928395e-05, "loss": 0.7919, "step": 15369 }, { "epoch": 2.5090404473286805, "grad_norm": 1.6814024448394775, "learning_rate": 1.897818063478095e-05, "loss": 0.5888, "step": 15370 }, { "epoch": 2.509203705971185, "grad_norm": 1.7262859344482422, "learning_rate": 1.897804088759248e-05, "loss": 0.5028, "step": 15371 }, { "epoch": 2.5093669646136894, "grad_norm": 1.9233598709106445, "learning_rate": 1.8977901131363124e-05, "loss": 0.7353, "step": 15372 }, { "epoch": 2.509530223256194, "grad_norm": 1.8921241760253906, "learning_rate": 1.8977761366093027e-05, "loss": 0.6059, "step": 15373 }, { "epoch": 2.5096934818986982, "grad_norm": 1.6143262386322021, "learning_rate": 1.8977621591782323e-05, "loss": 0.6245, "step": 15374 }, { "epoch": 2.5098567405412027, "grad_norm": 1.716952919960022, "learning_rate": 1.8977481808431156e-05, "loss": 0.5108, "step": 15375 }, { "epoch": 2.5100199991837067, "grad_norm": 1.907759666442871, "learning_rate": 1.897734201603967e-05, "loss": 0.5285, "step": 15376 }, { "epoch": 2.510183257826211, "grad_norm": 1.6511770486831665, "learning_rate": 1.8977202214608002e-05, "loss": 0.5196, "step": 15377 }, { "epoch": 2.5103465164687155, "grad_norm": 1.6389825344085693, "learning_rate": 1.8977062404136296e-05, "loss": 0.559, "step": 15378 }, { "epoch": 2.51050977511122, "grad_norm": 1.9579825401306152, "learning_rate": 1.8976922584624686e-05, "loss": 0.5964, "step": 15379 }, { "epoch": 2.5106730337537244, "grad_norm": 1.9952925443649292, "learning_rate": 1.8976782756073323e-05, "loss": 0.6116, "step": 15380 }, { "epoch": 2.510836292396229, "grad_norm": 1.7638075351715088, "learning_rate": 1.8976642918482337e-05, "loss": 0.6135, "step": 15381 }, { "epoch": 2.510999551038733, "grad_norm": 1.7285293340682983, "learning_rate": 1.8976503071851876e-05, "loss": 0.5496, "step": 15382 }, { "epoch": 2.5111628096812373, "grad_norm": 2.0081753730773926, "learning_rate": 1.8976363216182075e-05, "loss": 0.6473, "step": 15383 }, { "epoch": 2.5113260683237417, "grad_norm": 1.5133947134017944, "learning_rate": 1.8976223351473085e-05, "loss": 0.5066, "step": 15384 }, { "epoch": 2.511489326966246, "grad_norm": 1.7987388372421265, "learning_rate": 1.8976083477725037e-05, "loss": 0.6885, "step": 15385 }, { "epoch": 2.5116525856087506, "grad_norm": 1.9150711297988892, "learning_rate": 1.8975943594938077e-05, "loss": 0.7141, "step": 15386 }, { "epoch": 2.511815844251255, "grad_norm": 1.8509345054626465, "learning_rate": 1.8975803703112343e-05, "loss": 0.609, "step": 15387 }, { "epoch": 2.5119791028937595, "grad_norm": 1.4671988487243652, "learning_rate": 1.8975663802247978e-05, "loss": 0.5441, "step": 15388 }, { "epoch": 2.512142361536264, "grad_norm": 1.507286787033081, "learning_rate": 1.897552389234512e-05, "loss": 0.4773, "step": 15389 }, { "epoch": 2.5123056201787684, "grad_norm": 1.7129830121994019, "learning_rate": 1.8975383973403915e-05, "loss": 0.4857, "step": 15390 }, { "epoch": 2.512468878821273, "grad_norm": 1.5951813459396362, "learning_rate": 1.8975244045424498e-05, "loss": 0.6345, "step": 15391 }, { "epoch": 2.5126321374637772, "grad_norm": 1.9841580390930176, "learning_rate": 1.8975104108407012e-05, "loss": 0.7037, "step": 15392 }, { "epoch": 2.512795396106281, "grad_norm": 1.7159720659255981, "learning_rate": 1.89749641623516e-05, "loss": 0.503, "step": 15393 }, { "epoch": 2.5129586547487857, "grad_norm": 1.8520512580871582, "learning_rate": 1.8974824207258404e-05, "loss": 0.5101, "step": 15394 }, { "epoch": 2.51312191339129, "grad_norm": 1.7864210605621338, "learning_rate": 1.8974684243127556e-05, "loss": 0.666, "step": 15395 }, { "epoch": 2.5132851720337945, "grad_norm": 1.454616904258728, "learning_rate": 1.897454426995921e-05, "loss": 0.486, "step": 15396 }, { "epoch": 2.513448430676299, "grad_norm": 1.5657826662063599, "learning_rate": 1.8974404287753498e-05, "loss": 0.5622, "step": 15397 }, { "epoch": 2.5136116893188034, "grad_norm": 1.9600003957748413, "learning_rate": 1.897426429651056e-05, "loss": 0.6929, "step": 15398 }, { "epoch": 2.513774947961308, "grad_norm": 2.154644012451172, "learning_rate": 1.8974124296230543e-05, "loss": 0.6483, "step": 15399 }, { "epoch": 2.513938206603812, "grad_norm": 1.8486080169677734, "learning_rate": 1.8973984286913584e-05, "loss": 0.5765, "step": 15400 }, { "epoch": 2.5141014652463163, "grad_norm": 1.4991530179977417, "learning_rate": 1.8973844268559827e-05, "loss": 0.5031, "step": 15401 }, { "epoch": 2.5142647238888207, "grad_norm": 1.601968765258789, "learning_rate": 1.897370424116941e-05, "loss": 0.5304, "step": 15402 }, { "epoch": 2.514427982531325, "grad_norm": 1.7877416610717773, "learning_rate": 1.897356420474248e-05, "loss": 0.5772, "step": 15403 }, { "epoch": 2.5145912411738296, "grad_norm": 1.8593254089355469, "learning_rate": 1.8973424159279168e-05, "loss": 0.4647, "step": 15404 }, { "epoch": 2.514754499816334, "grad_norm": 1.4628194570541382, "learning_rate": 1.897328410477962e-05, "loss": 0.4336, "step": 15405 }, { "epoch": 2.5149177584588385, "grad_norm": 1.9044932126998901, "learning_rate": 1.897314404124398e-05, "loss": 0.5589, "step": 15406 }, { "epoch": 2.515081017101343, "grad_norm": 1.550415277481079, "learning_rate": 1.8973003968672382e-05, "loss": 0.5043, "step": 15407 }, { "epoch": 2.5152442757438473, "grad_norm": 1.948367714881897, "learning_rate": 1.8972863887064976e-05, "loss": 0.4931, "step": 15408 }, { "epoch": 2.515407534386352, "grad_norm": 1.710620641708374, "learning_rate": 1.8972723796421894e-05, "loss": 0.5084, "step": 15409 }, { "epoch": 2.5155707930288562, "grad_norm": 1.8493553400039673, "learning_rate": 1.8972583696743284e-05, "loss": 0.6129, "step": 15410 }, { "epoch": 2.51573405167136, "grad_norm": 1.5985106229782104, "learning_rate": 1.8972443588029285e-05, "loss": 0.5095, "step": 15411 }, { "epoch": 2.5158973103138647, "grad_norm": 1.7483115196228027, "learning_rate": 1.8972303470280037e-05, "loss": 0.5175, "step": 15412 }, { "epoch": 2.516060568956369, "grad_norm": 1.9234007596969604, "learning_rate": 1.8972163343495685e-05, "loss": 0.5876, "step": 15413 }, { "epoch": 2.5162238275988735, "grad_norm": 1.8653463125228882, "learning_rate": 1.897202320767636e-05, "loss": 0.5263, "step": 15414 }, { "epoch": 2.516387086241378, "grad_norm": 2.034271001815796, "learning_rate": 1.897188306282222e-05, "loss": 0.6845, "step": 15415 }, { "epoch": 2.5165503448838824, "grad_norm": 2.0700554847717285, "learning_rate": 1.8971742908933388e-05, "loss": 0.6745, "step": 15416 }, { "epoch": 2.516713603526387, "grad_norm": 1.6086506843566895, "learning_rate": 1.8971602746010016e-05, "loss": 0.5404, "step": 15417 }, { "epoch": 2.516876862168891, "grad_norm": 1.7305883169174194, "learning_rate": 1.8971462574052242e-05, "loss": 0.5603, "step": 15418 }, { "epoch": 2.5170401208113953, "grad_norm": 1.7362991571426392, "learning_rate": 1.8971322393060207e-05, "loss": 0.4697, "step": 15419 }, { "epoch": 2.5172033794538997, "grad_norm": 2.006256580352783, "learning_rate": 1.8971182203034055e-05, "loss": 0.574, "step": 15420 }, { "epoch": 2.517366638096404, "grad_norm": 1.6268655061721802, "learning_rate": 1.8971042003973923e-05, "loss": 0.4831, "step": 15421 }, { "epoch": 2.5175298967389086, "grad_norm": 2.052827835083008, "learning_rate": 1.897090179587995e-05, "loss": 0.6425, "step": 15422 }, { "epoch": 2.517693155381413, "grad_norm": 1.8361598253250122, "learning_rate": 1.8970761578752288e-05, "loss": 0.6706, "step": 15423 }, { "epoch": 2.5178564140239175, "grad_norm": 2.0285630226135254, "learning_rate": 1.897062135259107e-05, "loss": 0.674, "step": 15424 }, { "epoch": 2.518019672666422, "grad_norm": 1.724624752998352, "learning_rate": 1.8970481117396438e-05, "loss": 0.4434, "step": 15425 }, { "epoch": 2.5181829313089263, "grad_norm": 2.0234262943267822, "learning_rate": 1.897034087316853e-05, "loss": 0.5974, "step": 15426 }, { "epoch": 2.518346189951431, "grad_norm": 2.1521477699279785, "learning_rate": 1.8970200619907497e-05, "loss": 0.6329, "step": 15427 }, { "epoch": 2.518509448593935, "grad_norm": 2.5792434215545654, "learning_rate": 1.8970060357613472e-05, "loss": 0.6436, "step": 15428 }, { "epoch": 2.518672707236439, "grad_norm": 1.5217803716659546, "learning_rate": 1.89699200862866e-05, "loss": 0.4623, "step": 15429 }, { "epoch": 2.5188359658789437, "grad_norm": 1.6253303289413452, "learning_rate": 1.896977980592702e-05, "loss": 0.5392, "step": 15430 }, { "epoch": 2.518999224521448, "grad_norm": 1.8891643285751343, "learning_rate": 1.8969639516534873e-05, "loss": 0.5863, "step": 15431 }, { "epoch": 2.5191624831639525, "grad_norm": 1.9075859785079956, "learning_rate": 1.8969499218110302e-05, "loss": 0.5881, "step": 15432 }, { "epoch": 2.519325741806457, "grad_norm": 1.914431095123291, "learning_rate": 1.896935891065345e-05, "loss": 0.5814, "step": 15433 }, { "epoch": 2.5194890004489614, "grad_norm": 1.6938759088516235, "learning_rate": 1.896921859416445e-05, "loss": 0.5835, "step": 15434 }, { "epoch": 2.5196522590914654, "grad_norm": 1.9179179668426514, "learning_rate": 1.8969078268643453e-05, "loss": 0.5383, "step": 15435 }, { "epoch": 2.51981551773397, "grad_norm": 2.0753955841064453, "learning_rate": 1.8968937934090596e-05, "loss": 0.5461, "step": 15436 }, { "epoch": 2.5199787763764743, "grad_norm": 1.4770301580429077, "learning_rate": 1.8968797590506023e-05, "loss": 0.5085, "step": 15437 }, { "epoch": 2.5201420350189787, "grad_norm": 1.8185725212097168, "learning_rate": 1.896865723788987e-05, "loss": 0.5712, "step": 15438 }, { "epoch": 2.520305293661483, "grad_norm": 1.9534437656402588, "learning_rate": 1.8968516876242282e-05, "loss": 0.5489, "step": 15439 }, { "epoch": 2.5204685523039876, "grad_norm": 1.800718069076538, "learning_rate": 1.8968376505563402e-05, "loss": 0.5623, "step": 15440 }, { "epoch": 2.520631810946492, "grad_norm": 1.7886732816696167, "learning_rate": 1.896823612585337e-05, "loss": 0.5806, "step": 15441 }, { "epoch": 2.5207950695889965, "grad_norm": 3.0370326042175293, "learning_rate": 1.8968095737112325e-05, "loss": 0.52, "step": 15442 }, { "epoch": 2.520958328231501, "grad_norm": 1.6140779256820679, "learning_rate": 1.896795533934041e-05, "loss": 0.5862, "step": 15443 }, { "epoch": 2.5211215868740053, "grad_norm": 1.8379931449890137, "learning_rate": 1.8967814932537765e-05, "loss": 0.5331, "step": 15444 }, { "epoch": 2.5212848455165098, "grad_norm": 1.5922820568084717, "learning_rate": 1.896767451670453e-05, "loss": 0.5421, "step": 15445 }, { "epoch": 2.5214481041590138, "grad_norm": 1.796104073524475, "learning_rate": 1.896753409184086e-05, "loss": 0.5935, "step": 15446 }, { "epoch": 2.521611362801518, "grad_norm": 1.5361642837524414, "learning_rate": 1.8967393657946877e-05, "loss": 0.553, "step": 15447 }, { "epoch": 2.5217746214440226, "grad_norm": 1.5535531044006348, "learning_rate": 1.896725321502273e-05, "loss": 0.5761, "step": 15448 }, { "epoch": 2.521937880086527, "grad_norm": 2.1837503910064697, "learning_rate": 1.8967112763068566e-05, "loss": 0.5466, "step": 15449 }, { "epoch": 2.5221011387290315, "grad_norm": 1.7846993207931519, "learning_rate": 1.8966972302084516e-05, "loss": 0.4383, "step": 15450 }, { "epoch": 2.522264397371536, "grad_norm": 1.4909768104553223, "learning_rate": 1.8966831832070735e-05, "loss": 0.5374, "step": 15451 }, { "epoch": 2.5224276560140404, "grad_norm": 1.7795583009719849, "learning_rate": 1.8966691353027352e-05, "loss": 0.528, "step": 15452 }, { "epoch": 2.5225909146565444, "grad_norm": 1.8623428344726562, "learning_rate": 1.8966550864954513e-05, "loss": 0.6029, "step": 15453 }, { "epoch": 2.522754173299049, "grad_norm": 1.6299068927764893, "learning_rate": 1.896641036785236e-05, "loss": 0.5059, "step": 15454 }, { "epoch": 2.5229174319415533, "grad_norm": 1.6741482019424438, "learning_rate": 1.8966269861721037e-05, "loss": 0.4828, "step": 15455 }, { "epoch": 2.5230806905840577, "grad_norm": 1.8065836429595947, "learning_rate": 1.896612934656068e-05, "loss": 0.5465, "step": 15456 }, { "epoch": 2.523243949226562, "grad_norm": 2.06701922416687, "learning_rate": 1.8965988822371432e-05, "loss": 0.6215, "step": 15457 }, { "epoch": 2.5234072078690666, "grad_norm": 2.1135871410369873, "learning_rate": 1.8965848289153438e-05, "loss": 0.7192, "step": 15458 }, { "epoch": 2.523570466511571, "grad_norm": 1.7681896686553955, "learning_rate": 1.8965707746906835e-05, "loss": 0.525, "step": 15459 }, { "epoch": 2.5237337251540755, "grad_norm": 1.8735759258270264, "learning_rate": 1.8965567195631766e-05, "loss": 0.5356, "step": 15460 }, { "epoch": 2.52389698379658, "grad_norm": 1.756192922592163, "learning_rate": 1.8965426635328376e-05, "loss": 0.517, "step": 15461 }, { "epoch": 2.5240602424390843, "grad_norm": 1.905102252960205, "learning_rate": 1.8965286065996804e-05, "loss": 0.5389, "step": 15462 }, { "epoch": 2.5242235010815888, "grad_norm": 1.9529906511306763, "learning_rate": 1.896514548763719e-05, "loss": 0.7071, "step": 15463 }, { "epoch": 2.5243867597240928, "grad_norm": 1.8024744987487793, "learning_rate": 1.8965004900249676e-05, "loss": 0.5975, "step": 15464 }, { "epoch": 2.524550018366597, "grad_norm": 1.928567886352539, "learning_rate": 1.8964864303834408e-05, "loss": 0.6711, "step": 15465 }, { "epoch": 2.5247132770091016, "grad_norm": 1.501198172569275, "learning_rate": 1.896472369839152e-05, "loss": 0.5708, "step": 15466 }, { "epoch": 2.524876535651606, "grad_norm": 1.8302795886993408, "learning_rate": 1.896458308392116e-05, "loss": 0.546, "step": 15467 }, { "epoch": 2.5250397942941105, "grad_norm": 1.9496045112609863, "learning_rate": 1.8964442460423463e-05, "loss": 0.5482, "step": 15468 }, { "epoch": 2.525203052936615, "grad_norm": 1.6023030281066895, "learning_rate": 1.896430182789858e-05, "loss": 0.4724, "step": 15469 }, { "epoch": 2.525366311579119, "grad_norm": 1.7500455379486084, "learning_rate": 1.896416118634665e-05, "loss": 0.4877, "step": 15470 }, { "epoch": 2.5255295702216234, "grad_norm": 1.707309603691101, "learning_rate": 1.8964020535767805e-05, "loss": 0.509, "step": 15471 }, { "epoch": 2.525692828864128, "grad_norm": 1.752016305923462, "learning_rate": 1.8963879876162196e-05, "loss": 0.5319, "step": 15472 }, { "epoch": 2.5258560875066323, "grad_norm": 1.6784148216247559, "learning_rate": 1.8963739207529963e-05, "loss": 0.4793, "step": 15473 }, { "epoch": 2.5260193461491367, "grad_norm": 2.0695927143096924, "learning_rate": 1.8963598529871245e-05, "loss": 0.6274, "step": 15474 }, { "epoch": 2.526182604791641, "grad_norm": 1.80709707736969, "learning_rate": 1.896345784318619e-05, "loss": 0.5136, "step": 15475 }, { "epoch": 2.5263458634341456, "grad_norm": 1.9935286045074463, "learning_rate": 1.896331714747493e-05, "loss": 0.5119, "step": 15476 }, { "epoch": 2.52650912207665, "grad_norm": 1.9305263757705688, "learning_rate": 1.8963176442737616e-05, "loss": 0.5051, "step": 15477 }, { "epoch": 2.5266723807191545, "grad_norm": 1.718499779701233, "learning_rate": 1.8963035728974386e-05, "loss": 0.6092, "step": 15478 }, { "epoch": 2.526835639361659, "grad_norm": 1.955952763557434, "learning_rate": 1.8962895006185377e-05, "loss": 0.6377, "step": 15479 }, { "epoch": 2.5269988980041633, "grad_norm": 1.9952985048294067, "learning_rate": 1.896275427437074e-05, "loss": 0.5392, "step": 15480 }, { "epoch": 2.5271621566466673, "grad_norm": 1.9005556106567383, "learning_rate": 1.896261353353061e-05, "loss": 0.6465, "step": 15481 }, { "epoch": 2.5273254152891718, "grad_norm": 1.6104600429534912, "learning_rate": 1.896247278366513e-05, "loss": 0.4685, "step": 15482 }, { "epoch": 2.527488673931676, "grad_norm": 1.663163185119629, "learning_rate": 1.8962332024774444e-05, "loss": 0.5659, "step": 15483 }, { "epoch": 2.5276519325741806, "grad_norm": 1.6410837173461914, "learning_rate": 1.8962191256858687e-05, "loss": 0.5055, "step": 15484 }, { "epoch": 2.527815191216685, "grad_norm": 1.904684066772461, "learning_rate": 1.896205047991801e-05, "loss": 0.6281, "step": 15485 }, { "epoch": 2.5279784498591895, "grad_norm": 1.7176446914672852, "learning_rate": 1.8961909693952552e-05, "loss": 0.5319, "step": 15486 }, { "epoch": 2.528141708501694, "grad_norm": 1.823876976966858, "learning_rate": 1.8961768898962448e-05, "loss": 0.595, "step": 15487 }, { "epoch": 2.528304967144198, "grad_norm": 2.0251004695892334, "learning_rate": 1.896162809494785e-05, "loss": 0.6194, "step": 15488 }, { "epoch": 2.5284682257867024, "grad_norm": 1.6831303834915161, "learning_rate": 1.896148728190889e-05, "loss": 0.5433, "step": 15489 }, { "epoch": 2.528631484429207, "grad_norm": 1.906517744064331, "learning_rate": 1.8961346459845724e-05, "loss": 0.6347, "step": 15490 }, { "epoch": 2.5287947430717113, "grad_norm": 1.420283555984497, "learning_rate": 1.8961205628758477e-05, "loss": 0.5645, "step": 15491 }, { "epoch": 2.5289580017142157, "grad_norm": 1.9725983142852783, "learning_rate": 1.89610647886473e-05, "loss": 0.6512, "step": 15492 }, { "epoch": 2.52912126035672, "grad_norm": 1.9901896715164185, "learning_rate": 1.896092393951233e-05, "loss": 0.579, "step": 15493 }, { "epoch": 2.5292845189992246, "grad_norm": 1.5632842779159546, "learning_rate": 1.8960783081353716e-05, "loss": 0.4979, "step": 15494 }, { "epoch": 2.529447777641729, "grad_norm": 1.9501749277114868, "learning_rate": 1.8960642214171594e-05, "loss": 0.6494, "step": 15495 }, { "epoch": 2.5296110362842334, "grad_norm": 1.4787616729736328, "learning_rate": 1.8960501337966107e-05, "loss": 0.5249, "step": 15496 }, { "epoch": 2.529774294926738, "grad_norm": 1.8069329261779785, "learning_rate": 1.8960360452737402e-05, "loss": 0.4489, "step": 15497 }, { "epoch": 2.5299375535692423, "grad_norm": 2.054622173309326, "learning_rate": 1.896021955848561e-05, "loss": 0.7116, "step": 15498 }, { "epoch": 2.5301008122117463, "grad_norm": 2.12978196144104, "learning_rate": 1.8960078655210886e-05, "loss": 0.6414, "step": 15499 }, { "epoch": 2.5302640708542508, "grad_norm": 2.1424694061279297, "learning_rate": 1.895993774291336e-05, "loss": 0.6949, "step": 15500 }, { "epoch": 2.530427329496755, "grad_norm": 1.7027549743652344, "learning_rate": 1.895979682159318e-05, "loss": 0.599, "step": 15501 }, { "epoch": 2.5305905881392596, "grad_norm": 1.7298223972320557, "learning_rate": 1.8959655891250487e-05, "loss": 0.5505, "step": 15502 }, { "epoch": 2.530753846781764, "grad_norm": 1.855724811553955, "learning_rate": 1.8959514951885426e-05, "loss": 0.6658, "step": 15503 }, { "epoch": 2.5309171054242685, "grad_norm": 1.6589604616165161, "learning_rate": 1.8959374003498133e-05, "loss": 0.4898, "step": 15504 }, { "epoch": 2.531080364066773, "grad_norm": 1.9370160102844238, "learning_rate": 1.8959233046088753e-05, "loss": 0.59, "step": 15505 }, { "epoch": 2.531243622709277, "grad_norm": 1.7273080348968506, "learning_rate": 1.8959092079657426e-05, "loss": 0.4995, "step": 15506 }, { "epoch": 2.5314068813517814, "grad_norm": 1.750929594039917, "learning_rate": 1.89589511042043e-05, "loss": 0.5758, "step": 15507 }, { "epoch": 2.531570139994286, "grad_norm": 1.652711033821106, "learning_rate": 1.895881011972951e-05, "loss": 0.4901, "step": 15508 }, { "epoch": 2.5317333986367903, "grad_norm": 1.6591339111328125, "learning_rate": 1.89586691262332e-05, "loss": 0.5729, "step": 15509 }, { "epoch": 2.5318966572792947, "grad_norm": 1.7995052337646484, "learning_rate": 1.8958528123715513e-05, "loss": 0.4821, "step": 15510 }, { "epoch": 2.532059915921799, "grad_norm": 1.6878199577331543, "learning_rate": 1.895838711217659e-05, "loss": 0.5776, "step": 15511 }, { "epoch": 2.5322231745643036, "grad_norm": 1.5728720426559448, "learning_rate": 1.8958246091616574e-05, "loss": 0.4677, "step": 15512 }, { "epoch": 2.532386433206808, "grad_norm": 1.6997599601745605, "learning_rate": 1.895810506203561e-05, "loss": 0.5363, "step": 15513 }, { "epoch": 2.5325496918493124, "grad_norm": 1.8433016538619995, "learning_rate": 1.8957964023433833e-05, "loss": 0.592, "step": 15514 }, { "epoch": 2.532712950491817, "grad_norm": 1.5629569292068481, "learning_rate": 1.8957822975811388e-05, "loss": 0.4375, "step": 15515 }, { "epoch": 2.5328762091343213, "grad_norm": 1.7639583349227905, "learning_rate": 1.895768191916842e-05, "loss": 0.5267, "step": 15516 }, { "epoch": 2.5330394677768253, "grad_norm": 1.476737141609192, "learning_rate": 1.8957540853505067e-05, "loss": 0.5005, "step": 15517 }, { "epoch": 2.5332027264193298, "grad_norm": 1.859265923500061, "learning_rate": 1.8957399778821472e-05, "loss": 0.5351, "step": 15518 }, { "epoch": 2.533365985061834, "grad_norm": 1.825221061706543, "learning_rate": 1.895725869511778e-05, "loss": 0.5414, "step": 15519 }, { "epoch": 2.5335292437043386, "grad_norm": 1.7404991388320923, "learning_rate": 1.895711760239413e-05, "loss": 0.5549, "step": 15520 }, { "epoch": 2.533692502346843, "grad_norm": 1.8609822988510132, "learning_rate": 1.8956976500650664e-05, "loss": 0.5512, "step": 15521 }, { "epoch": 2.5338557609893475, "grad_norm": 2.158123731613159, "learning_rate": 1.8956835389887528e-05, "loss": 0.5479, "step": 15522 }, { "epoch": 2.5340190196318515, "grad_norm": 1.8159396648406982, "learning_rate": 1.895669427010486e-05, "loss": 0.6136, "step": 15523 }, { "epoch": 2.534182278274356, "grad_norm": 1.5652967691421509, "learning_rate": 1.89565531413028e-05, "loss": 0.4917, "step": 15524 }, { "epoch": 2.5343455369168604, "grad_norm": 1.7781562805175781, "learning_rate": 1.89564120034815e-05, "loss": 0.5603, "step": 15525 }, { "epoch": 2.534508795559365, "grad_norm": 1.5345760583877563, "learning_rate": 1.8956270856641087e-05, "loss": 0.5152, "step": 15526 }, { "epoch": 2.5346720542018693, "grad_norm": 1.6790302991867065, "learning_rate": 1.8956129700781717e-05, "loss": 0.4991, "step": 15527 }, { "epoch": 2.5348353128443737, "grad_norm": 1.9713993072509766, "learning_rate": 1.8955988535903526e-05, "loss": 0.6142, "step": 15528 }, { "epoch": 2.534998571486878, "grad_norm": 2.530395746231079, "learning_rate": 1.8955847362006653e-05, "loss": 0.5279, "step": 15529 }, { "epoch": 2.5351618301293826, "grad_norm": 1.7072526216506958, "learning_rate": 1.8955706179091248e-05, "loss": 0.526, "step": 15530 }, { "epoch": 2.535325088771887, "grad_norm": 1.8654084205627441, "learning_rate": 1.895556498715745e-05, "loss": 0.5038, "step": 15531 }, { "epoch": 2.5354883474143914, "grad_norm": 1.7975820302963257, "learning_rate": 1.8955423786205398e-05, "loss": 0.5622, "step": 15532 }, { "epoch": 2.535651606056896, "grad_norm": 1.7557883262634277, "learning_rate": 1.895528257623524e-05, "loss": 0.5618, "step": 15533 }, { "epoch": 2.5358148646994, "grad_norm": 1.7216062545776367, "learning_rate": 1.8955141357247113e-05, "loss": 0.5782, "step": 15534 }, { "epoch": 2.5359781233419043, "grad_norm": 1.8765580654144287, "learning_rate": 1.895500012924116e-05, "loss": 0.6088, "step": 15535 }, { "epoch": 2.5361413819844087, "grad_norm": 1.914957880973816, "learning_rate": 1.8954858892217522e-05, "loss": 0.6056, "step": 15536 }, { "epoch": 2.536304640626913, "grad_norm": 1.5626541376113892, "learning_rate": 1.895471764617635e-05, "loss": 0.4733, "step": 15537 }, { "epoch": 2.5364678992694176, "grad_norm": 1.852906584739685, "learning_rate": 1.8954576391117772e-05, "loss": 0.5996, "step": 15538 }, { "epoch": 2.536631157911922, "grad_norm": 1.8698798418045044, "learning_rate": 1.8954435127041947e-05, "loss": 0.5192, "step": 15539 }, { "epoch": 2.5367944165544265, "grad_norm": 1.8380168676376343, "learning_rate": 1.8954293853949002e-05, "loss": 0.5901, "step": 15540 }, { "epoch": 2.5369576751969305, "grad_norm": 1.6121821403503418, "learning_rate": 1.8954152571839082e-05, "loss": 0.4956, "step": 15541 }, { "epoch": 2.537120933839435, "grad_norm": 1.842129111289978, "learning_rate": 1.895401128071234e-05, "loss": 0.5084, "step": 15542 }, { "epoch": 2.5372841924819394, "grad_norm": 1.707373857498169, "learning_rate": 1.8953869980568908e-05, "loss": 0.6056, "step": 15543 }, { "epoch": 2.537447451124444, "grad_norm": 1.626772403717041, "learning_rate": 1.8953728671408934e-05, "loss": 0.537, "step": 15544 }, { "epoch": 2.5376107097669482, "grad_norm": 1.9726788997650146, "learning_rate": 1.8953587353232555e-05, "loss": 0.6284, "step": 15545 }, { "epoch": 2.5377739684094527, "grad_norm": 1.9556183815002441, "learning_rate": 1.8953446026039913e-05, "loss": 0.6291, "step": 15546 }, { "epoch": 2.537937227051957, "grad_norm": 1.956395149230957, "learning_rate": 1.895330468983116e-05, "loss": 0.7261, "step": 15547 }, { "epoch": 2.5381004856944616, "grad_norm": 1.6748870611190796, "learning_rate": 1.8953163344606426e-05, "loss": 0.6116, "step": 15548 }, { "epoch": 2.538263744336966, "grad_norm": 1.7852253913879395, "learning_rate": 1.8953021990365864e-05, "loss": 0.6092, "step": 15549 }, { "epoch": 2.5384270029794704, "grad_norm": 1.5583792924880981, "learning_rate": 1.8952880627109606e-05, "loss": 0.551, "step": 15550 }, { "epoch": 2.538590261621975, "grad_norm": 1.6606791019439697, "learning_rate": 1.8952739254837803e-05, "loss": 0.563, "step": 15551 }, { "epoch": 2.538753520264479, "grad_norm": 1.7670856714248657, "learning_rate": 1.8952597873550595e-05, "loss": 0.593, "step": 15552 }, { "epoch": 2.5389167789069833, "grad_norm": 1.5748131275177002, "learning_rate": 1.8952456483248117e-05, "loss": 0.5256, "step": 15553 }, { "epoch": 2.5390800375494877, "grad_norm": 1.585320234298706, "learning_rate": 1.8952315083930522e-05, "loss": 0.5012, "step": 15554 }, { "epoch": 2.539243296191992, "grad_norm": 1.7997065782546997, "learning_rate": 1.895217367559795e-05, "loss": 0.6637, "step": 15555 }, { "epoch": 2.5394065548344966, "grad_norm": 1.8043428659439087, "learning_rate": 1.895203225825054e-05, "loss": 0.5643, "step": 15556 }, { "epoch": 2.539569813477001, "grad_norm": 2.0066006183624268, "learning_rate": 1.8951890831888432e-05, "loss": 0.622, "step": 15557 }, { "epoch": 2.539733072119505, "grad_norm": 2.0161194801330566, "learning_rate": 1.8951749396511777e-05, "loss": 0.5959, "step": 15558 }, { "epoch": 2.5398963307620095, "grad_norm": 1.3443061113357544, "learning_rate": 1.8951607952120712e-05, "loss": 0.4416, "step": 15559 }, { "epoch": 2.540059589404514, "grad_norm": 1.6278437376022339, "learning_rate": 1.8951466498715378e-05, "loss": 0.5545, "step": 15560 }, { "epoch": 2.5402228480470184, "grad_norm": 1.6980572938919067, "learning_rate": 1.8951325036295922e-05, "loss": 0.5355, "step": 15561 }, { "epoch": 2.540386106689523, "grad_norm": 1.6848294734954834, "learning_rate": 1.8951183564862484e-05, "loss": 0.5526, "step": 15562 }, { "epoch": 2.5405493653320272, "grad_norm": 1.9395794868469238, "learning_rate": 1.8951042084415205e-05, "loss": 0.4949, "step": 15563 }, { "epoch": 2.5407126239745317, "grad_norm": 1.9922313690185547, "learning_rate": 1.8950900594954226e-05, "loss": 0.6193, "step": 15564 }, { "epoch": 2.540875882617036, "grad_norm": 1.7556859254837036, "learning_rate": 1.8950759096479698e-05, "loss": 0.5547, "step": 15565 }, { "epoch": 2.5410391412595406, "grad_norm": 1.9881080389022827, "learning_rate": 1.8950617588991757e-05, "loss": 0.6136, "step": 15566 }, { "epoch": 2.541202399902045, "grad_norm": 1.8095468282699585, "learning_rate": 1.8950476072490545e-05, "loss": 0.5569, "step": 15567 }, { "epoch": 2.5413656585445494, "grad_norm": 1.8556568622589111, "learning_rate": 1.8950334546976208e-05, "loss": 0.5566, "step": 15568 }, { "epoch": 2.5415289171870534, "grad_norm": 1.5052355527877808, "learning_rate": 1.895019301244888e-05, "loss": 0.4917, "step": 15569 }, { "epoch": 2.541692175829558, "grad_norm": 1.7763417959213257, "learning_rate": 1.8950051468908718e-05, "loss": 0.5765, "step": 15570 }, { "epoch": 2.5418554344720623, "grad_norm": 1.7401331663131714, "learning_rate": 1.8949909916355853e-05, "loss": 0.5738, "step": 15571 }, { "epoch": 2.5420186931145667, "grad_norm": 1.7547327280044556, "learning_rate": 1.894976835479043e-05, "loss": 0.5577, "step": 15572 }, { "epoch": 2.542181951757071, "grad_norm": 1.6878411769866943, "learning_rate": 1.89496267842126e-05, "loss": 0.5177, "step": 15573 }, { "epoch": 2.5423452103995756, "grad_norm": 1.9020100831985474, "learning_rate": 1.894948520462249e-05, "loss": 0.6596, "step": 15574 }, { "epoch": 2.54250846904208, "grad_norm": 2.1679320335388184, "learning_rate": 1.894934361602025e-05, "loss": 0.6023, "step": 15575 }, { "epoch": 2.542671727684584, "grad_norm": 1.7715985774993896, "learning_rate": 1.894920201840603e-05, "loss": 0.5435, "step": 15576 }, { "epoch": 2.5428349863270885, "grad_norm": 1.6473362445831299, "learning_rate": 1.894906041177996e-05, "loss": 0.5801, "step": 15577 }, { "epoch": 2.542998244969593, "grad_norm": 1.842539668083191, "learning_rate": 1.894891879614219e-05, "loss": 0.5775, "step": 15578 }, { "epoch": 2.5431615036120974, "grad_norm": 1.7441186904907227, "learning_rate": 1.8948777171492863e-05, "loss": 0.617, "step": 15579 }, { "epoch": 2.543324762254602, "grad_norm": 1.5394890308380127, "learning_rate": 1.894863553783212e-05, "loss": 0.4797, "step": 15580 }, { "epoch": 2.5434880208971062, "grad_norm": 1.685868501663208, "learning_rate": 1.8948493895160103e-05, "loss": 0.4872, "step": 15581 }, { "epoch": 2.5436512795396107, "grad_norm": 2.037733316421509, "learning_rate": 1.8948352243476953e-05, "loss": 0.6049, "step": 15582 }, { "epoch": 2.543814538182115, "grad_norm": 2.066713333129883, "learning_rate": 1.8948210582782818e-05, "loss": 0.6357, "step": 15583 }, { "epoch": 2.5439777968246196, "grad_norm": 2.1460177898406982, "learning_rate": 1.8948068913077832e-05, "loss": 0.5165, "step": 15584 }, { "epoch": 2.544141055467124, "grad_norm": 1.7273250818252563, "learning_rate": 1.894792723436215e-05, "loss": 0.5814, "step": 15585 }, { "epoch": 2.5443043141096284, "grad_norm": 1.7541208267211914, "learning_rate": 1.8947785546635905e-05, "loss": 0.5993, "step": 15586 }, { "epoch": 2.5444675727521324, "grad_norm": 1.8623584508895874, "learning_rate": 1.894764384989924e-05, "loss": 0.6044, "step": 15587 }, { "epoch": 2.544630831394637, "grad_norm": 1.8277976512908936, "learning_rate": 1.89475021441523e-05, "loss": 0.5522, "step": 15588 }, { "epoch": 2.5447940900371413, "grad_norm": 1.9964991807937622, "learning_rate": 1.894736042939523e-05, "loss": 0.5847, "step": 15589 }, { "epoch": 2.5449573486796457, "grad_norm": 1.558369755744934, "learning_rate": 1.894721870562817e-05, "loss": 0.5862, "step": 15590 }, { "epoch": 2.54512060732215, "grad_norm": 1.9518016576766968, "learning_rate": 1.8947076972851263e-05, "loss": 0.504, "step": 15591 }, { "epoch": 2.5452838659646546, "grad_norm": 1.5962103605270386, "learning_rate": 1.894693523106465e-05, "loss": 0.5302, "step": 15592 }, { "epoch": 2.545447124607159, "grad_norm": 1.7716047763824463, "learning_rate": 1.8946793480268474e-05, "loss": 0.4306, "step": 15593 }, { "epoch": 2.545610383249663, "grad_norm": 1.7575047016143799, "learning_rate": 1.8946651720462886e-05, "loss": 0.5178, "step": 15594 }, { "epoch": 2.5457736418921675, "grad_norm": 2.043139934539795, "learning_rate": 1.8946509951648018e-05, "loss": 0.705, "step": 15595 }, { "epoch": 2.545936900534672, "grad_norm": 1.8266173601150513, "learning_rate": 1.8946368173824017e-05, "loss": 0.6087, "step": 15596 }, { "epoch": 2.5461001591771764, "grad_norm": 1.7963411808013916, "learning_rate": 1.8946226386991027e-05, "loss": 0.5569, "step": 15597 }, { "epoch": 2.546263417819681, "grad_norm": 2.164484739303589, "learning_rate": 1.8946084591149187e-05, "loss": 0.663, "step": 15598 }, { "epoch": 2.5464266764621852, "grad_norm": 1.7659807205200195, "learning_rate": 1.8945942786298644e-05, "loss": 0.5304, "step": 15599 }, { "epoch": 2.5465899351046897, "grad_norm": 1.973142147064209, "learning_rate": 1.894580097243954e-05, "loss": 0.5616, "step": 15600 }, { "epoch": 2.546753193747194, "grad_norm": 1.531906008720398, "learning_rate": 1.8945659149572014e-05, "loss": 0.4796, "step": 15601 }, { "epoch": 2.5469164523896985, "grad_norm": 1.6793532371520996, "learning_rate": 1.8945517317696214e-05, "loss": 0.4864, "step": 15602 }, { "epoch": 2.547079711032203, "grad_norm": 1.5959155559539795, "learning_rate": 1.8945375476812276e-05, "loss": 0.4431, "step": 15603 }, { "epoch": 2.5472429696747074, "grad_norm": 2.0457799434661865, "learning_rate": 1.8945233626920353e-05, "loss": 0.5376, "step": 15604 }, { "epoch": 2.5474062283172114, "grad_norm": 1.5931330919265747, "learning_rate": 1.8945091768020577e-05, "loss": 0.5122, "step": 15605 }, { "epoch": 2.547569486959716, "grad_norm": 1.6402108669281006, "learning_rate": 1.89449499001131e-05, "loss": 0.4923, "step": 15606 }, { "epoch": 2.5477327456022203, "grad_norm": 1.8780555725097656, "learning_rate": 1.894480802319806e-05, "loss": 0.6072, "step": 15607 }, { "epoch": 2.5478960042447247, "grad_norm": 2.0728824138641357, "learning_rate": 1.89446661372756e-05, "loss": 1.2433, "step": 15608 }, { "epoch": 2.548059262887229, "grad_norm": 1.8120286464691162, "learning_rate": 1.8944524242345864e-05, "loss": 0.535, "step": 15609 }, { "epoch": 2.5482225215297336, "grad_norm": 1.6752923727035522, "learning_rate": 1.8944382338408994e-05, "loss": 0.4965, "step": 15610 }, { "epoch": 2.5483857801722376, "grad_norm": 1.6241793632507324, "learning_rate": 1.8944240425465134e-05, "loss": 0.6201, "step": 15611 }, { "epoch": 2.548549038814742, "grad_norm": 1.8830856084823608, "learning_rate": 1.8944098503514424e-05, "loss": 0.5452, "step": 15612 }, { "epoch": 2.5487122974572465, "grad_norm": 1.4826581478118896, "learning_rate": 1.8943956572557013e-05, "loss": 0.4514, "step": 15613 }, { "epoch": 2.548875556099751, "grad_norm": 1.3465677499771118, "learning_rate": 1.894381463259304e-05, "loss": 0.4039, "step": 15614 }, { "epoch": 2.5490388147422554, "grad_norm": 2.042473316192627, "learning_rate": 1.8943672683622646e-05, "loss": 0.5517, "step": 15615 }, { "epoch": 2.54920207338476, "grad_norm": 1.9467122554779053, "learning_rate": 1.8943530725645975e-05, "loss": 0.6433, "step": 15616 }, { "epoch": 2.5493653320272642, "grad_norm": 1.8233556747436523, "learning_rate": 1.894338875866317e-05, "loss": 0.6141, "step": 15617 }, { "epoch": 2.5495285906697687, "grad_norm": 2.173415184020996, "learning_rate": 1.894324678267438e-05, "loss": 0.7738, "step": 15618 }, { "epoch": 2.549691849312273, "grad_norm": 1.7372469902038574, "learning_rate": 1.894310479767974e-05, "loss": 0.5739, "step": 15619 }, { "epoch": 2.5498551079547775, "grad_norm": 1.7950176000595093, "learning_rate": 1.8942962803679393e-05, "loss": 0.5903, "step": 15620 }, { "epoch": 2.550018366597282, "grad_norm": 1.7515422105789185, "learning_rate": 1.8942820800673488e-05, "loss": 0.5195, "step": 15621 }, { "epoch": 2.550181625239786, "grad_norm": 1.5905182361602783, "learning_rate": 1.8942678788662166e-05, "loss": 0.4967, "step": 15622 }, { "epoch": 2.5503448838822904, "grad_norm": 1.6982676982879639, "learning_rate": 1.8942536767645566e-05, "loss": 0.5904, "step": 15623 }, { "epoch": 2.550508142524795, "grad_norm": 1.6693389415740967, "learning_rate": 1.8942394737623836e-05, "loss": 0.4844, "step": 15624 }, { "epoch": 2.5506714011672993, "grad_norm": 1.9208927154541016, "learning_rate": 1.8942252698597113e-05, "loss": 0.7133, "step": 15625 }, { "epoch": 2.5508346598098037, "grad_norm": 2.012354850769043, "learning_rate": 1.894211065056555e-05, "loss": 0.5379, "step": 15626 }, { "epoch": 2.550997918452308, "grad_norm": 1.8031601905822754, "learning_rate": 1.894196859352928e-05, "loss": 0.5064, "step": 15627 }, { "epoch": 2.5511611770948126, "grad_norm": 1.643278956413269, "learning_rate": 1.894182652748845e-05, "loss": 0.5472, "step": 15628 }, { "epoch": 2.5513244357373166, "grad_norm": 1.4583779573440552, "learning_rate": 1.8941684452443203e-05, "loss": 0.5828, "step": 15629 }, { "epoch": 2.551487694379821, "grad_norm": 1.809288740158081, "learning_rate": 1.8941542368393683e-05, "loss": 0.591, "step": 15630 }, { "epoch": 2.5516509530223255, "grad_norm": 1.8054394721984863, "learning_rate": 1.8941400275340034e-05, "loss": 0.6059, "step": 15631 }, { "epoch": 2.55181421166483, "grad_norm": 1.4790408611297607, "learning_rate": 1.894125817328239e-05, "loss": 0.4831, "step": 15632 }, { "epoch": 2.5519774703073344, "grad_norm": 1.5264103412628174, "learning_rate": 1.894111606222091e-05, "loss": 0.4468, "step": 15633 }, { "epoch": 2.552140728949839, "grad_norm": 1.927931308746338, "learning_rate": 1.8940973942155726e-05, "loss": 0.6412, "step": 15634 }, { "epoch": 2.5523039875923432, "grad_norm": 1.6289501190185547, "learning_rate": 1.8940831813086982e-05, "loss": 0.6249, "step": 15635 }, { "epoch": 2.5524672462348477, "grad_norm": 1.8751503229141235, "learning_rate": 1.8940689675014826e-05, "loss": 0.6228, "step": 15636 }, { "epoch": 2.552630504877352, "grad_norm": 1.7840107679367065, "learning_rate": 1.8940547527939395e-05, "loss": 0.6338, "step": 15637 }, { "epoch": 2.5527937635198565, "grad_norm": 2.008552074432373, "learning_rate": 1.8940405371860836e-05, "loss": 0.6181, "step": 15638 }, { "epoch": 2.552957022162361, "grad_norm": 1.4462943077087402, "learning_rate": 1.894026320677929e-05, "loss": 0.4949, "step": 15639 }, { "epoch": 2.553120280804865, "grad_norm": 1.5256214141845703, "learning_rate": 1.8940121032694902e-05, "loss": 0.4637, "step": 15640 }, { "epoch": 2.5532835394473694, "grad_norm": 1.7905299663543701, "learning_rate": 1.8939978849607814e-05, "loss": 0.578, "step": 15641 }, { "epoch": 2.553446798089874, "grad_norm": 1.6936005353927612, "learning_rate": 1.8939836657518168e-05, "loss": 0.4902, "step": 15642 }, { "epoch": 2.5536100567323783, "grad_norm": 2.2653205394744873, "learning_rate": 1.893969445642611e-05, "loss": 0.5904, "step": 15643 }, { "epoch": 2.5537733153748827, "grad_norm": 1.896876573562622, "learning_rate": 1.8939552246331783e-05, "loss": 0.6364, "step": 15644 }, { "epoch": 2.553936574017387, "grad_norm": 1.8304800987243652, "learning_rate": 1.8939410027235332e-05, "loss": 0.6611, "step": 15645 }, { "epoch": 2.554099832659891, "grad_norm": 2.0894553661346436, "learning_rate": 1.8939267799136896e-05, "loss": 0.6249, "step": 15646 }, { "epoch": 2.5542630913023956, "grad_norm": 1.9280693531036377, "learning_rate": 1.8939125562036618e-05, "loss": 0.5937, "step": 15647 }, { "epoch": 2.5544263499449, "grad_norm": 1.822906732559204, "learning_rate": 1.8938983315934646e-05, "loss": 0.5671, "step": 15648 }, { "epoch": 2.5545896085874045, "grad_norm": 2.0976510047912598, "learning_rate": 1.8938841060831116e-05, "loss": 0.7772, "step": 15649 }, { "epoch": 2.554752867229909, "grad_norm": 1.9113601446151733, "learning_rate": 1.8938698796726177e-05, "loss": 0.5702, "step": 15650 }, { "epoch": 2.5549161258724133, "grad_norm": 1.9257571697235107, "learning_rate": 1.8938556523619967e-05, "loss": 0.6007, "step": 15651 }, { "epoch": 2.555079384514918, "grad_norm": 1.712743878364563, "learning_rate": 1.893841424151264e-05, "loss": 0.566, "step": 15652 }, { "epoch": 2.5552426431574222, "grad_norm": 1.7775379419326782, "learning_rate": 1.893827195040433e-05, "loss": 0.5243, "step": 15653 }, { "epoch": 2.5554059017999267, "grad_norm": 1.8614720106124878, "learning_rate": 1.893812965029518e-05, "loss": 0.6168, "step": 15654 }, { "epoch": 2.555569160442431, "grad_norm": 1.7139643430709839, "learning_rate": 1.8937987341185337e-05, "loss": 0.5732, "step": 15655 }, { "epoch": 2.5557324190849355, "grad_norm": 1.6032263040542603, "learning_rate": 1.8937845023074943e-05, "loss": 0.5256, "step": 15656 }, { "epoch": 2.5558956777274395, "grad_norm": 1.7292115688323975, "learning_rate": 1.8937702695964142e-05, "loss": 0.5963, "step": 15657 }, { "epoch": 2.556058936369944, "grad_norm": 1.4581458568572998, "learning_rate": 1.8937560359853074e-05, "loss": 0.4983, "step": 15658 }, { "epoch": 2.5562221950124484, "grad_norm": 1.5375405550003052, "learning_rate": 1.8937418014741892e-05, "loss": 0.4256, "step": 15659 }, { "epoch": 2.556385453654953, "grad_norm": 1.7295705080032349, "learning_rate": 1.8937275660630727e-05, "loss": 0.5194, "step": 15660 }, { "epoch": 2.5565487122974573, "grad_norm": 1.4985508918762207, "learning_rate": 1.893713329751973e-05, "loss": 0.4739, "step": 15661 }, { "epoch": 2.5567119709399617, "grad_norm": 1.9848827123641968, "learning_rate": 1.893699092540904e-05, "loss": 0.6396, "step": 15662 }, { "epoch": 2.556875229582466, "grad_norm": 1.4446439743041992, "learning_rate": 1.8936848544298804e-05, "loss": 0.458, "step": 15663 }, { "epoch": 2.55703848822497, "grad_norm": 1.621735692024231, "learning_rate": 1.8936706154189162e-05, "loss": 0.551, "step": 15664 }, { "epoch": 2.5572017468674746, "grad_norm": 1.9785785675048828, "learning_rate": 1.8936563755080263e-05, "loss": 0.5974, "step": 15665 }, { "epoch": 2.557365005509979, "grad_norm": 1.8769173622131348, "learning_rate": 1.8936421346972242e-05, "loss": 0.561, "step": 15666 }, { "epoch": 2.5575282641524835, "grad_norm": 1.7787165641784668, "learning_rate": 1.8936278929865248e-05, "loss": 0.5403, "step": 15667 }, { "epoch": 2.557691522794988, "grad_norm": 1.7975636720657349, "learning_rate": 1.8936136503759427e-05, "loss": 0.5953, "step": 15668 }, { "epoch": 2.5578547814374923, "grad_norm": 1.7871112823486328, "learning_rate": 1.8935994068654917e-05, "loss": 0.6264, "step": 15669 }, { "epoch": 2.558018040079997, "grad_norm": 2.076077938079834, "learning_rate": 1.893585162455186e-05, "loss": 0.7105, "step": 15670 }, { "epoch": 2.558181298722501, "grad_norm": 1.8031333684921265, "learning_rate": 1.893570917145041e-05, "loss": 0.5939, "step": 15671 }, { "epoch": 2.5583445573650057, "grad_norm": 1.9196693897247314, "learning_rate": 1.8935566709350695e-05, "loss": 0.6698, "step": 15672 }, { "epoch": 2.55850781600751, "grad_norm": 2.058677911758423, "learning_rate": 1.8935424238252872e-05, "loss": 0.7123, "step": 15673 }, { "epoch": 2.5586710746500145, "grad_norm": 1.5348143577575684, "learning_rate": 1.893528175815708e-05, "loss": 0.4883, "step": 15674 }, { "epoch": 2.5588343332925185, "grad_norm": 1.6589624881744385, "learning_rate": 1.893513926906346e-05, "loss": 0.5548, "step": 15675 }, { "epoch": 2.558997591935023, "grad_norm": 1.827877402305603, "learning_rate": 1.8934996770972157e-05, "loss": 0.703, "step": 15676 }, { "epoch": 2.5591608505775274, "grad_norm": 1.819804310798645, "learning_rate": 1.8934854263883314e-05, "loss": 0.5253, "step": 15677 }, { "epoch": 2.559324109220032, "grad_norm": 2.0691587924957275, "learning_rate": 1.8934711747797075e-05, "loss": 0.6284, "step": 15678 }, { "epoch": 2.5594873678625363, "grad_norm": 1.7142539024353027, "learning_rate": 1.8934569222713583e-05, "loss": 0.5859, "step": 15679 }, { "epoch": 2.5596506265050407, "grad_norm": 2.1480462551116943, "learning_rate": 1.8934426688632986e-05, "loss": 0.6399, "step": 15680 }, { "epoch": 2.559813885147545, "grad_norm": 1.5304440259933472, "learning_rate": 1.8934284145555418e-05, "loss": 0.5315, "step": 15681 }, { "epoch": 2.559977143790049, "grad_norm": 1.8432364463806152, "learning_rate": 1.8934141593481032e-05, "loss": 0.6141, "step": 15682 }, { "epoch": 2.5601404024325536, "grad_norm": 1.966802954673767, "learning_rate": 1.893399903240997e-05, "loss": 0.5964, "step": 15683 }, { "epoch": 2.560303661075058, "grad_norm": 1.6444041728973389, "learning_rate": 1.8933856462342368e-05, "loss": 0.5788, "step": 15684 }, { "epoch": 2.5604669197175625, "grad_norm": 1.7112160921096802, "learning_rate": 1.893371388327838e-05, "loss": 0.6076, "step": 15685 }, { "epoch": 2.560630178360067, "grad_norm": 1.9970309734344482, "learning_rate": 1.893357129521814e-05, "loss": 0.6931, "step": 15686 }, { "epoch": 2.5607934370025713, "grad_norm": 1.9249969720840454, "learning_rate": 1.8933428698161798e-05, "loss": 0.6374, "step": 15687 }, { "epoch": 2.5609566956450758, "grad_norm": 1.7529265880584717, "learning_rate": 1.8933286092109492e-05, "loss": 0.5781, "step": 15688 }, { "epoch": 2.56111995428758, "grad_norm": 1.374541997909546, "learning_rate": 1.8933143477061374e-05, "loss": 0.486, "step": 15689 }, { "epoch": 2.5612832129300847, "grad_norm": 1.8761184215545654, "learning_rate": 1.8933000853017584e-05, "loss": 0.6065, "step": 15690 }, { "epoch": 2.561446471572589, "grad_norm": 1.9811145067214966, "learning_rate": 1.893285821997826e-05, "loss": 0.6722, "step": 15691 }, { "epoch": 2.5616097302150935, "grad_norm": 2.0062708854675293, "learning_rate": 1.8932715577943553e-05, "loss": 0.7309, "step": 15692 }, { "epoch": 2.5617729888575975, "grad_norm": 1.9147820472717285, "learning_rate": 1.8932572926913605e-05, "loss": 0.5837, "step": 15693 }, { "epoch": 2.561936247500102, "grad_norm": 1.5340783596038818, "learning_rate": 1.8932430266888556e-05, "loss": 0.5541, "step": 15694 }, { "epoch": 2.5620995061426064, "grad_norm": 1.5542755126953125, "learning_rate": 1.893228759786855e-05, "loss": 0.5202, "step": 15695 }, { "epoch": 2.562262764785111, "grad_norm": 1.6196397542953491, "learning_rate": 1.893214491985374e-05, "loss": 0.4854, "step": 15696 }, { "epoch": 2.5624260234276153, "grad_norm": 1.7390884160995483, "learning_rate": 1.8932002232844254e-05, "loss": 0.567, "step": 15697 }, { "epoch": 2.5625892820701197, "grad_norm": 1.4687983989715576, "learning_rate": 1.8931859536840248e-05, "loss": 0.4662, "step": 15698 }, { "epoch": 2.5627525407126237, "grad_norm": 1.6067798137664795, "learning_rate": 1.8931716831841863e-05, "loss": 0.5649, "step": 15699 }, { "epoch": 2.562915799355128, "grad_norm": 1.681220531463623, "learning_rate": 1.893157411784924e-05, "loss": 0.519, "step": 15700 }, { "epoch": 2.5630790579976326, "grad_norm": 1.347490906715393, "learning_rate": 1.8931431394862526e-05, "loss": 0.4217, "step": 15701 }, { "epoch": 2.563242316640137, "grad_norm": 2.174295663833618, "learning_rate": 1.8931288662881862e-05, "loss": 0.6446, "step": 15702 }, { "epoch": 2.5634055752826415, "grad_norm": 1.7396668195724487, "learning_rate": 1.893114592190739e-05, "loss": 0.5685, "step": 15703 }, { "epoch": 2.563568833925146, "grad_norm": 1.9732019901275635, "learning_rate": 1.893100317193926e-05, "loss": 0.6087, "step": 15704 }, { "epoch": 2.5637320925676503, "grad_norm": 1.8301602602005005, "learning_rate": 1.893086041297761e-05, "loss": 0.6439, "step": 15705 }, { "epoch": 2.5638953512101548, "grad_norm": 1.9886420965194702, "learning_rate": 1.8930717645022584e-05, "loss": 0.6108, "step": 15706 }, { "epoch": 2.564058609852659, "grad_norm": 1.9181303977966309, "learning_rate": 1.8930574868074333e-05, "loss": 0.6773, "step": 15707 }, { "epoch": 2.5642218684951636, "grad_norm": 1.5719108581542969, "learning_rate": 1.8930432082132992e-05, "loss": 0.5201, "step": 15708 }, { "epoch": 2.564385127137668, "grad_norm": 1.9036204814910889, "learning_rate": 1.8930289287198712e-05, "loss": 0.6377, "step": 15709 }, { "epoch": 2.564548385780172, "grad_norm": 1.7010380029678345, "learning_rate": 1.893014648327163e-05, "loss": 0.5555, "step": 15710 }, { "epoch": 2.5647116444226765, "grad_norm": 1.7698684930801392, "learning_rate": 1.893000367035189e-05, "loss": 0.6181, "step": 15711 }, { "epoch": 2.564874903065181, "grad_norm": 1.8683429956436157, "learning_rate": 1.8929860848439642e-05, "loss": 0.5407, "step": 15712 }, { "epoch": 2.5650381617076854, "grad_norm": 1.6565359830856323, "learning_rate": 1.8929718017535027e-05, "loss": 0.4555, "step": 15713 }, { "epoch": 2.56520142035019, "grad_norm": 1.6794636249542236, "learning_rate": 1.8929575177638185e-05, "loss": 0.5679, "step": 15714 }, { "epoch": 2.5653646789926943, "grad_norm": 1.561374545097351, "learning_rate": 1.892943232874927e-05, "loss": 0.5297, "step": 15715 }, { "epoch": 2.5655279376351987, "grad_norm": 1.7824444770812988, "learning_rate": 1.8929289470868412e-05, "loss": 0.5657, "step": 15716 }, { "epoch": 2.5656911962777027, "grad_norm": 1.5982351303100586, "learning_rate": 1.8929146603995766e-05, "loss": 0.5162, "step": 15717 }, { "epoch": 2.565854454920207, "grad_norm": 1.9045556783676147, "learning_rate": 1.892900372813147e-05, "loss": 0.5869, "step": 15718 }, { "epoch": 2.5660177135627116, "grad_norm": 1.7640644311904907, "learning_rate": 1.892886084327567e-05, "loss": 0.5462, "step": 15719 }, { "epoch": 2.566180972205216, "grad_norm": 1.9124782085418701, "learning_rate": 1.8928717949428508e-05, "loss": 0.6403, "step": 15720 }, { "epoch": 2.5663442308477205, "grad_norm": 1.7458771467208862, "learning_rate": 1.892857504659013e-05, "loss": 0.4842, "step": 15721 }, { "epoch": 2.566507489490225, "grad_norm": 1.74147367477417, "learning_rate": 1.8928432134760683e-05, "loss": 0.6693, "step": 15722 }, { "epoch": 2.5666707481327293, "grad_norm": 1.7702559232711792, "learning_rate": 1.8928289213940302e-05, "loss": 0.6016, "step": 15723 }, { "epoch": 2.5668340067752338, "grad_norm": 1.966869831085205, "learning_rate": 1.892814628412914e-05, "loss": 0.7009, "step": 15724 }, { "epoch": 2.566997265417738, "grad_norm": 2.0722568035125732, "learning_rate": 1.8928003345327334e-05, "loss": 0.6248, "step": 15725 }, { "epoch": 2.5671605240602426, "grad_norm": 1.7482815980911255, "learning_rate": 1.8927860397535035e-05, "loss": 0.5573, "step": 15726 }, { "epoch": 2.567323782702747, "grad_norm": 1.6302883625030518, "learning_rate": 1.892771744075238e-05, "loss": 0.5371, "step": 15727 }, { "epoch": 2.567487041345251, "grad_norm": 1.843042254447937, "learning_rate": 1.892757447497952e-05, "loss": 0.6029, "step": 15728 }, { "epoch": 2.5676502999877555, "grad_norm": 1.5761293172836304, "learning_rate": 1.8927431500216587e-05, "loss": 0.5286, "step": 15729 }, { "epoch": 2.56781355863026, "grad_norm": 1.8380481004714966, "learning_rate": 1.8927288516463738e-05, "loss": 0.6033, "step": 15730 }, { "epoch": 2.5679768172727644, "grad_norm": 1.7294728755950928, "learning_rate": 1.8927145523721112e-05, "loss": 0.5382, "step": 15731 }, { "epoch": 2.568140075915269, "grad_norm": 1.7725814580917358, "learning_rate": 1.892700252198885e-05, "loss": 0.575, "step": 15732 }, { "epoch": 2.5683033345577733, "grad_norm": 1.9153327941894531, "learning_rate": 1.8926859511267103e-05, "loss": 0.5352, "step": 15733 }, { "epoch": 2.5684665932002777, "grad_norm": 1.6355431079864502, "learning_rate": 1.8926716491556004e-05, "loss": 0.5071, "step": 15734 }, { "epoch": 2.5686298518427817, "grad_norm": 2.0125465393066406, "learning_rate": 1.892657346285571e-05, "loss": 0.5367, "step": 15735 }, { "epoch": 2.568793110485286, "grad_norm": 2.0137040615081787, "learning_rate": 1.892643042516636e-05, "loss": 0.6779, "step": 15736 }, { "epoch": 2.5689563691277906, "grad_norm": 2.2872300148010254, "learning_rate": 1.892628737848809e-05, "loss": 0.7026, "step": 15737 }, { "epoch": 2.569119627770295, "grad_norm": 1.7693305015563965, "learning_rate": 1.892614432282106e-05, "loss": 0.591, "step": 15738 }, { "epoch": 2.5692828864127994, "grad_norm": 1.7878426313400269, "learning_rate": 1.8926001258165397e-05, "loss": 0.6154, "step": 15739 }, { "epoch": 2.569446145055304, "grad_norm": 1.8393193483352661, "learning_rate": 1.892585818452126e-05, "loss": 0.5839, "step": 15740 }, { "epoch": 2.5696094036978083, "grad_norm": 1.7133667469024658, "learning_rate": 1.892571510188878e-05, "loss": 0.5093, "step": 15741 }, { "epoch": 2.5697726623403128, "grad_norm": 1.9416930675506592, "learning_rate": 1.892557201026811e-05, "loss": 0.665, "step": 15742 }, { "epoch": 2.569935920982817, "grad_norm": 1.9800057411193848, "learning_rate": 1.892542890965939e-05, "loss": 0.5711, "step": 15743 }, { "epoch": 2.5700991796253216, "grad_norm": 2.033428907394409, "learning_rate": 1.8925285800062763e-05, "loss": 0.5595, "step": 15744 }, { "epoch": 2.570262438267826, "grad_norm": 1.861674189567566, "learning_rate": 1.892514268147838e-05, "loss": 0.5523, "step": 15745 }, { "epoch": 2.57042569691033, "grad_norm": 1.7300775051116943, "learning_rate": 1.892499955390638e-05, "loss": 0.5899, "step": 15746 }, { "epoch": 2.5705889555528345, "grad_norm": 1.8629488945007324, "learning_rate": 1.892485641734691e-05, "loss": 0.5597, "step": 15747 }, { "epoch": 2.570752214195339, "grad_norm": 1.7867040634155273, "learning_rate": 1.8924713271800107e-05, "loss": 0.5897, "step": 15748 }, { "epoch": 2.5709154728378434, "grad_norm": 1.5962499380111694, "learning_rate": 1.8924570117266124e-05, "loss": 0.5598, "step": 15749 }, { "epoch": 2.571078731480348, "grad_norm": 1.5919663906097412, "learning_rate": 1.89244269537451e-05, "loss": 0.5145, "step": 15750 }, { "epoch": 2.5712419901228523, "grad_norm": 1.7783151865005493, "learning_rate": 1.892428378123718e-05, "loss": 0.5371, "step": 15751 }, { "epoch": 2.5714052487653563, "grad_norm": 1.3907052278518677, "learning_rate": 1.8924140599742512e-05, "loss": 0.4358, "step": 15752 }, { "epoch": 2.5715685074078607, "grad_norm": 2.055863618850708, "learning_rate": 1.892399740926123e-05, "loss": 0.5541, "step": 15753 }, { "epoch": 2.571731766050365, "grad_norm": 1.675504207611084, "learning_rate": 1.8923854209793487e-05, "loss": 0.5513, "step": 15754 }, { "epoch": 2.5718950246928696, "grad_norm": 2.0403356552124023, "learning_rate": 1.892371100133943e-05, "loss": 0.6035, "step": 15755 }, { "epoch": 2.572058283335374, "grad_norm": 1.5648068189620972, "learning_rate": 1.8923567783899193e-05, "loss": 0.6304, "step": 15756 }, { "epoch": 2.5722215419778784, "grad_norm": 1.6164612770080566, "learning_rate": 1.892342455747293e-05, "loss": 0.547, "step": 15757 }, { "epoch": 2.572384800620383, "grad_norm": 1.578420639038086, "learning_rate": 1.892328132206078e-05, "loss": 0.5086, "step": 15758 }, { "epoch": 2.5725480592628873, "grad_norm": 1.7190542221069336, "learning_rate": 1.8923138077662885e-05, "loss": 0.4716, "step": 15759 }, { "epoch": 2.5727113179053918, "grad_norm": 1.7485283613204956, "learning_rate": 1.8922994824279394e-05, "loss": 0.5989, "step": 15760 }, { "epoch": 2.572874576547896, "grad_norm": 1.7738200426101685, "learning_rate": 1.892285156191045e-05, "loss": 0.565, "step": 15761 }, { "epoch": 2.5730378351904006, "grad_norm": 1.8992691040039062, "learning_rate": 1.8922708290556197e-05, "loss": 0.5937, "step": 15762 }, { "epoch": 2.5732010938329046, "grad_norm": 1.6902225017547607, "learning_rate": 1.8922565010216778e-05, "loss": 0.4885, "step": 15763 }, { "epoch": 2.573364352475409, "grad_norm": 1.6345921754837036, "learning_rate": 1.8922421720892338e-05, "loss": 0.5224, "step": 15764 }, { "epoch": 2.5735276111179135, "grad_norm": 1.5695724487304688, "learning_rate": 1.8922278422583026e-05, "loss": 0.5419, "step": 15765 }, { "epoch": 2.573690869760418, "grad_norm": 1.8224223852157593, "learning_rate": 1.8922135115288976e-05, "loss": 0.4616, "step": 15766 }, { "epoch": 2.5738541284029224, "grad_norm": 1.7406085729599, "learning_rate": 1.892199179901034e-05, "loss": 0.5693, "step": 15767 }, { "epoch": 2.574017387045427, "grad_norm": 2.2137234210968018, "learning_rate": 1.8921848473747262e-05, "loss": 0.6735, "step": 15768 }, { "epoch": 2.5741806456879313, "grad_norm": 2.037755012512207, "learning_rate": 1.8921705139499885e-05, "loss": 0.5338, "step": 15769 }, { "epoch": 2.5743439043304353, "grad_norm": 2.189572334289551, "learning_rate": 1.8921561796268354e-05, "loss": 0.5991, "step": 15770 }, { "epoch": 2.5745071629729397, "grad_norm": 1.9346760511398315, "learning_rate": 1.8921418444052812e-05, "loss": 0.6172, "step": 15771 }, { "epoch": 2.574670421615444, "grad_norm": 2.050011396408081, "learning_rate": 1.89212750828534e-05, "loss": 0.6562, "step": 15772 }, { "epoch": 2.5748336802579486, "grad_norm": 1.7066612243652344, "learning_rate": 1.892113171267027e-05, "loss": 0.5855, "step": 15773 }, { "epoch": 2.574996938900453, "grad_norm": 1.7254384756088257, "learning_rate": 1.8920988333503564e-05, "loss": 0.4837, "step": 15774 }, { "epoch": 2.5751601975429574, "grad_norm": 1.6464064121246338, "learning_rate": 1.8920844945353425e-05, "loss": 0.5361, "step": 15775 }, { "epoch": 2.575323456185462, "grad_norm": 1.5578243732452393, "learning_rate": 1.8920701548219997e-05, "loss": 0.4661, "step": 15776 }, { "epoch": 2.5754867148279663, "grad_norm": 2.0118672847747803, "learning_rate": 1.8920558142103426e-05, "loss": 0.6217, "step": 15777 }, { "epoch": 2.5756499734704708, "grad_norm": 1.501422643661499, "learning_rate": 1.892041472700385e-05, "loss": 0.5418, "step": 15778 }, { "epoch": 2.575813232112975, "grad_norm": 1.822941780090332, "learning_rate": 1.892027130292142e-05, "loss": 0.572, "step": 15779 }, { "epoch": 2.5759764907554796, "grad_norm": 1.895145297050476, "learning_rate": 1.8920127869856283e-05, "loss": 0.6293, "step": 15780 }, { "epoch": 2.5761397493979836, "grad_norm": 1.561397671699524, "learning_rate": 1.891998442780858e-05, "loss": 0.4845, "step": 15781 }, { "epoch": 2.576303008040488, "grad_norm": 1.6171908378601074, "learning_rate": 1.8919840976778453e-05, "loss": 0.5607, "step": 15782 }, { "epoch": 2.5764662666829925, "grad_norm": 2.2037105560302734, "learning_rate": 1.8919697516766046e-05, "loss": 0.6366, "step": 15783 }, { "epoch": 2.576629525325497, "grad_norm": 1.674755573272705, "learning_rate": 1.8919554047771508e-05, "loss": 0.5069, "step": 15784 }, { "epoch": 2.5767927839680014, "grad_norm": 1.725561261177063, "learning_rate": 1.8919410569794982e-05, "loss": 0.547, "step": 15785 }, { "epoch": 2.576956042610506, "grad_norm": 1.6767146587371826, "learning_rate": 1.8919267082836614e-05, "loss": 0.5358, "step": 15786 }, { "epoch": 2.57711930125301, "grad_norm": 1.635693073272705, "learning_rate": 1.8919123586896545e-05, "loss": 0.4985, "step": 15787 }, { "epoch": 2.5772825598955142, "grad_norm": 2.0647835731506348, "learning_rate": 1.8918980081974922e-05, "loss": 0.6539, "step": 15788 }, { "epoch": 2.5774458185380187, "grad_norm": 1.804109811782837, "learning_rate": 1.8918836568071883e-05, "loss": 0.6401, "step": 15789 }, { "epoch": 2.577609077180523, "grad_norm": 1.7453705072402954, "learning_rate": 1.891869304518758e-05, "loss": 0.5421, "step": 15790 }, { "epoch": 2.5777723358230276, "grad_norm": 1.8223216533660889, "learning_rate": 1.891854951332216e-05, "loss": 0.5115, "step": 15791 }, { "epoch": 2.577935594465532, "grad_norm": 1.7414997816085815, "learning_rate": 1.8918405972475757e-05, "loss": 0.4422, "step": 15792 }, { "epoch": 2.5780988531080364, "grad_norm": 1.6820363998413086, "learning_rate": 1.8918262422648527e-05, "loss": 0.5098, "step": 15793 }, { "epoch": 2.578262111750541, "grad_norm": 1.5713824033737183, "learning_rate": 1.8918118863840605e-05, "loss": 0.574, "step": 15794 }, { "epoch": 2.5784253703930453, "grad_norm": 1.8226797580718994, "learning_rate": 1.8917975296052143e-05, "loss": 0.6404, "step": 15795 }, { "epoch": 2.5785886290355498, "grad_norm": 1.5766535997390747, "learning_rate": 1.891783171928328e-05, "loss": 0.4927, "step": 15796 }, { "epoch": 2.578751887678054, "grad_norm": 1.5046097040176392, "learning_rate": 1.8917688133534162e-05, "loss": 0.4988, "step": 15797 }, { "epoch": 2.578915146320558, "grad_norm": 1.7505607604980469, "learning_rate": 1.891754453880494e-05, "loss": 0.5072, "step": 15798 }, { "epoch": 2.5790784049630626, "grad_norm": 1.5969103574752808, "learning_rate": 1.8917400935095744e-05, "loss": 0.4862, "step": 15799 }, { "epoch": 2.579241663605567, "grad_norm": 2.0600175857543945, "learning_rate": 1.8917257322406735e-05, "loss": 0.6632, "step": 15800 }, { "epoch": 2.5794049222480715, "grad_norm": 1.8838386535644531, "learning_rate": 1.8917113700738046e-05, "loss": 0.5599, "step": 15801 }, { "epoch": 2.579568180890576, "grad_norm": 2.030688524246216, "learning_rate": 1.8916970070089828e-05, "loss": 0.62, "step": 15802 }, { "epoch": 2.5797314395330804, "grad_norm": 2.0646302700042725, "learning_rate": 1.8916826430462224e-05, "loss": 0.6633, "step": 15803 }, { "epoch": 2.579894698175585, "grad_norm": 1.6400400400161743, "learning_rate": 1.8916682781855377e-05, "loss": 0.5426, "step": 15804 }, { "epoch": 2.580057956818089, "grad_norm": 2.012605667114258, "learning_rate": 1.8916539124269435e-05, "loss": 0.535, "step": 15805 }, { "epoch": 2.5802212154605932, "grad_norm": 1.821671724319458, "learning_rate": 1.8916395457704536e-05, "loss": 0.8128, "step": 15806 }, { "epoch": 2.5803844741030977, "grad_norm": 1.6671395301818848, "learning_rate": 1.891625178216083e-05, "loss": 0.4903, "step": 15807 }, { "epoch": 2.580547732745602, "grad_norm": 1.6975494623184204, "learning_rate": 1.8916108097638462e-05, "loss": 0.5366, "step": 15808 }, { "epoch": 2.5807109913881066, "grad_norm": 1.6413053274154663, "learning_rate": 1.8915964404137577e-05, "loss": 0.5377, "step": 15809 }, { "epoch": 2.580874250030611, "grad_norm": 1.5170762538909912, "learning_rate": 1.891582070165832e-05, "loss": 0.4717, "step": 15810 }, { "epoch": 2.5810375086731154, "grad_norm": 1.4555954933166504, "learning_rate": 1.891567699020083e-05, "loss": 0.5238, "step": 15811 }, { "epoch": 2.58120076731562, "grad_norm": 1.7267125844955444, "learning_rate": 1.8915533269765258e-05, "loss": 0.5178, "step": 15812 }, { "epoch": 2.5813640259581243, "grad_norm": 1.5560095310211182, "learning_rate": 1.891538954035174e-05, "loss": 0.5476, "step": 15813 }, { "epoch": 2.5815272846006287, "grad_norm": 1.7520489692687988, "learning_rate": 1.8915245801960435e-05, "loss": 0.5535, "step": 15814 }, { "epoch": 2.581690543243133, "grad_norm": 1.7954952716827393, "learning_rate": 1.891510205459148e-05, "loss": 0.585, "step": 15815 }, { "epoch": 2.581853801885637, "grad_norm": 1.7255955934524536, "learning_rate": 1.8914958298245013e-05, "loss": 0.5262, "step": 15816 }, { "epoch": 2.5820170605281416, "grad_norm": 1.8265472650527954, "learning_rate": 1.891481453292119e-05, "loss": 0.5358, "step": 15817 }, { "epoch": 2.582180319170646, "grad_norm": 1.7701958417892456, "learning_rate": 1.891467075862015e-05, "loss": 0.5671, "step": 15818 }, { "epoch": 2.5823435778131505, "grad_norm": 1.921331524848938, "learning_rate": 1.891452697534204e-05, "loss": 0.6144, "step": 15819 }, { "epoch": 2.582506836455655, "grad_norm": 1.575853943824768, "learning_rate": 1.8914383183087e-05, "loss": 0.4378, "step": 15820 }, { "epoch": 2.5826700950981594, "grad_norm": 1.6944061517715454, "learning_rate": 1.8914239381855184e-05, "loss": 0.5377, "step": 15821 }, { "epoch": 2.582833353740664, "grad_norm": 1.879197597503662, "learning_rate": 1.8914095571646726e-05, "loss": 0.6653, "step": 15822 }, { "epoch": 2.582996612383168, "grad_norm": 1.566739797592163, "learning_rate": 1.8913951752461778e-05, "loss": 0.4407, "step": 15823 }, { "epoch": 2.5831598710256722, "grad_norm": 2.046302556991577, "learning_rate": 1.8913807924300486e-05, "loss": 0.6234, "step": 15824 }, { "epoch": 2.5833231296681767, "grad_norm": 1.9719228744506836, "learning_rate": 1.891366408716299e-05, "loss": 0.6525, "step": 15825 }, { "epoch": 2.583486388310681, "grad_norm": 1.6049559116363525, "learning_rate": 1.8913520241049435e-05, "loss": 0.5155, "step": 15826 }, { "epoch": 2.5836496469531856, "grad_norm": 1.7825756072998047, "learning_rate": 1.8913376385959966e-05, "loss": 0.5804, "step": 15827 }, { "epoch": 2.58381290559569, "grad_norm": 1.9748889207839966, "learning_rate": 1.8913232521894734e-05, "loss": 0.5435, "step": 15828 }, { "epoch": 2.5839761642381944, "grad_norm": 1.9985871315002441, "learning_rate": 1.891308864885388e-05, "loss": 0.6174, "step": 15829 }, { "epoch": 2.584139422880699, "grad_norm": 1.878721833229065, "learning_rate": 1.8912944766837542e-05, "loss": 0.5449, "step": 15830 }, { "epoch": 2.5843026815232033, "grad_norm": 1.6558092832565308, "learning_rate": 1.8912800875845874e-05, "loss": 0.5568, "step": 15831 }, { "epoch": 2.5844659401657077, "grad_norm": 1.6641840934753418, "learning_rate": 1.891265697587902e-05, "loss": 0.5293, "step": 15832 }, { "epoch": 2.584629198808212, "grad_norm": 1.6958576440811157, "learning_rate": 1.891251306693712e-05, "loss": 0.4627, "step": 15833 }, { "epoch": 2.584792457450716, "grad_norm": 1.9580596685409546, "learning_rate": 1.8912369149020323e-05, "loss": 0.6556, "step": 15834 }, { "epoch": 2.5849557160932206, "grad_norm": 1.9817659854888916, "learning_rate": 1.891222522212877e-05, "loss": 0.6182, "step": 15835 }, { "epoch": 2.585118974735725, "grad_norm": 1.73812735080719, "learning_rate": 1.891208128626261e-05, "loss": 0.5295, "step": 15836 }, { "epoch": 2.5852822333782295, "grad_norm": 1.6987968683242798, "learning_rate": 1.891193734142199e-05, "loss": 0.484, "step": 15837 }, { "epoch": 2.585445492020734, "grad_norm": 1.8195581436157227, "learning_rate": 1.8911793387607045e-05, "loss": 0.5954, "step": 15838 }, { "epoch": 2.5856087506632384, "grad_norm": 1.6944156885147095, "learning_rate": 1.8911649424817934e-05, "loss": 0.5161, "step": 15839 }, { "epoch": 2.5857720093057424, "grad_norm": 1.5771658420562744, "learning_rate": 1.891150545305479e-05, "loss": 0.5152, "step": 15840 }, { "epoch": 2.585935267948247, "grad_norm": 1.7848204374313354, "learning_rate": 1.8911361472317762e-05, "loss": 0.5362, "step": 15841 }, { "epoch": 2.5860985265907512, "grad_norm": 2.2011070251464844, "learning_rate": 1.8911217482606996e-05, "loss": 0.6322, "step": 15842 }, { "epoch": 2.5862617852332557, "grad_norm": 1.771855115890503, "learning_rate": 1.8911073483922634e-05, "loss": 0.6122, "step": 15843 }, { "epoch": 2.58642504387576, "grad_norm": 1.7070363759994507, "learning_rate": 1.891092947626483e-05, "loss": 0.5577, "step": 15844 }, { "epoch": 2.5865883025182645, "grad_norm": 1.6505470275878906, "learning_rate": 1.891078545963372e-05, "loss": 0.5344, "step": 15845 }, { "epoch": 2.586751561160769, "grad_norm": 1.484018325805664, "learning_rate": 1.8910641434029447e-05, "loss": 0.511, "step": 15846 }, { "epoch": 2.5869148198032734, "grad_norm": 1.7940258979797363, "learning_rate": 1.8910497399452162e-05, "loss": 0.5868, "step": 15847 }, { "epoch": 2.587078078445778, "grad_norm": 1.9618513584136963, "learning_rate": 1.891035335590201e-05, "loss": 0.5922, "step": 15848 }, { "epoch": 2.5872413370882823, "grad_norm": 1.543418049812317, "learning_rate": 1.8910209303379133e-05, "loss": 0.5329, "step": 15849 }, { "epoch": 2.5874045957307867, "grad_norm": 1.6834832429885864, "learning_rate": 1.891006524188368e-05, "loss": 0.5519, "step": 15850 }, { "epoch": 2.5875678543732907, "grad_norm": 1.712114691734314, "learning_rate": 1.8909921171415793e-05, "loss": 0.5092, "step": 15851 }, { "epoch": 2.587731113015795, "grad_norm": 1.7542380094528198, "learning_rate": 1.8909777091975615e-05, "loss": 0.6018, "step": 15852 }, { "epoch": 2.5878943716582996, "grad_norm": 1.6798354387283325, "learning_rate": 1.8909633003563298e-05, "loss": 0.5403, "step": 15853 }, { "epoch": 2.588057630300804, "grad_norm": 1.9015182256698608, "learning_rate": 1.890948890617898e-05, "loss": 0.5781, "step": 15854 }, { "epoch": 2.5882208889433085, "grad_norm": 1.5162180662155151, "learning_rate": 1.890934479982281e-05, "loss": 0.4704, "step": 15855 }, { "epoch": 2.588384147585813, "grad_norm": 1.6700900793075562, "learning_rate": 1.8909200684494933e-05, "loss": 0.526, "step": 15856 }, { "epoch": 2.5885474062283174, "grad_norm": 1.513726830482483, "learning_rate": 1.890905656019549e-05, "loss": 0.4144, "step": 15857 }, { "epoch": 2.5887106648708214, "grad_norm": 1.5931607484817505, "learning_rate": 1.8908912426924633e-05, "loss": 0.4991, "step": 15858 }, { "epoch": 2.588873923513326, "grad_norm": 1.903397798538208, "learning_rate": 1.8908768284682505e-05, "loss": 0.6478, "step": 15859 }, { "epoch": 2.5890371821558302, "grad_norm": 1.803954005241394, "learning_rate": 1.8908624133469248e-05, "loss": 0.6067, "step": 15860 }, { "epoch": 2.5892004407983347, "grad_norm": 1.8710887432098389, "learning_rate": 1.8908479973285007e-05, "loss": 0.647, "step": 15861 }, { "epoch": 2.589363699440839, "grad_norm": 1.6811472177505493, "learning_rate": 1.8908335804129928e-05, "loss": 0.4872, "step": 15862 }, { "epoch": 2.5895269580833435, "grad_norm": 1.5404911041259766, "learning_rate": 1.890819162600416e-05, "loss": 0.4736, "step": 15863 }, { "epoch": 2.589690216725848, "grad_norm": 1.798147439956665, "learning_rate": 1.8908047438907843e-05, "loss": 0.5745, "step": 15864 }, { "epoch": 2.5898534753683524, "grad_norm": 1.382952332496643, "learning_rate": 1.8907903242841124e-05, "loss": 0.4495, "step": 15865 }, { "epoch": 2.590016734010857, "grad_norm": 1.7136355638504028, "learning_rate": 1.8907759037804154e-05, "loss": 0.5679, "step": 15866 }, { "epoch": 2.5901799926533613, "grad_norm": 2.2534217834472656, "learning_rate": 1.8907614823797068e-05, "loss": 0.6794, "step": 15867 }, { "epoch": 2.5903432512958657, "grad_norm": 1.7388484477996826, "learning_rate": 1.890747060082002e-05, "loss": 0.5581, "step": 15868 }, { "epoch": 2.5905065099383697, "grad_norm": 1.9411038160324097, "learning_rate": 1.890732636887315e-05, "loss": 0.5773, "step": 15869 }, { "epoch": 2.590669768580874, "grad_norm": 1.9532725811004639, "learning_rate": 1.8907182127956604e-05, "loss": 0.6277, "step": 15870 }, { "epoch": 2.5908330272233786, "grad_norm": 1.7046955823898315, "learning_rate": 1.890703787807053e-05, "loss": 0.5807, "step": 15871 }, { "epoch": 2.590996285865883, "grad_norm": 1.6634621620178223, "learning_rate": 1.890689361921507e-05, "loss": 0.5699, "step": 15872 }, { "epoch": 2.5911595445083875, "grad_norm": 1.79916250705719, "learning_rate": 1.890674935139037e-05, "loss": 0.5174, "step": 15873 }, { "epoch": 2.591322803150892, "grad_norm": 1.8918606042861938, "learning_rate": 1.8906605074596575e-05, "loss": 0.6279, "step": 15874 }, { "epoch": 2.591486061793396, "grad_norm": 1.6218488216400146, "learning_rate": 1.890646078883383e-05, "loss": 0.5551, "step": 15875 }, { "epoch": 2.5916493204359003, "grad_norm": 2.553119659423828, "learning_rate": 1.8906316494102283e-05, "loss": 0.86, "step": 15876 }, { "epoch": 2.591812579078405, "grad_norm": 1.9147651195526123, "learning_rate": 1.890617219040208e-05, "loss": 0.6002, "step": 15877 }, { "epoch": 2.5919758377209092, "grad_norm": 2.0101771354675293, "learning_rate": 1.890602787773336e-05, "loss": 0.5656, "step": 15878 }, { "epoch": 2.5921390963634137, "grad_norm": 2.0387187004089355, "learning_rate": 1.8905883556096274e-05, "loss": 0.6124, "step": 15879 }, { "epoch": 2.592302355005918, "grad_norm": 1.7414919137954712, "learning_rate": 1.8905739225490966e-05, "loss": 0.5055, "step": 15880 }, { "epoch": 2.5924656136484225, "grad_norm": 1.8036211729049683, "learning_rate": 1.890559488591758e-05, "loss": 0.6259, "step": 15881 }, { "epoch": 2.592628872290927, "grad_norm": 1.829856514930725, "learning_rate": 1.8905450537376266e-05, "loss": 0.4774, "step": 15882 }, { "epoch": 2.5927921309334314, "grad_norm": 1.883961796760559, "learning_rate": 1.890530617986716e-05, "loss": 0.5315, "step": 15883 }, { "epoch": 2.592955389575936, "grad_norm": 1.7925584316253662, "learning_rate": 1.8905161813390415e-05, "loss": 0.4918, "step": 15884 }, { "epoch": 2.5931186482184403, "grad_norm": 2.084967613220215, "learning_rate": 1.8905017437946177e-05, "loss": 0.6604, "step": 15885 }, { "epoch": 2.5932819068609443, "grad_norm": 1.6382275819778442, "learning_rate": 1.8904873053534585e-05, "loss": 0.5518, "step": 15886 }, { "epoch": 2.5934451655034487, "grad_norm": 1.723232388496399, "learning_rate": 1.890472866015579e-05, "loss": 0.5846, "step": 15887 }, { "epoch": 2.593608424145953, "grad_norm": 1.9139111042022705, "learning_rate": 1.8904584257809936e-05, "loss": 0.5708, "step": 15888 }, { "epoch": 2.5937716827884576, "grad_norm": 1.7365732192993164, "learning_rate": 1.890443984649717e-05, "loss": 0.6446, "step": 15889 }, { "epoch": 2.593934941430962, "grad_norm": 1.9514857530593872, "learning_rate": 1.8904295426217632e-05, "loss": 0.5815, "step": 15890 }, { "epoch": 2.5940982000734665, "grad_norm": 1.9302114248275757, "learning_rate": 1.890415099697147e-05, "loss": 0.653, "step": 15891 }, { "epoch": 2.594261458715971, "grad_norm": 2.1644363403320312, "learning_rate": 1.8904006558758833e-05, "loss": 0.6152, "step": 15892 }, { "epoch": 2.594424717358475, "grad_norm": 2.203773021697998, "learning_rate": 1.8903862111579863e-05, "loss": 0.6157, "step": 15893 }, { "epoch": 2.5945879760009793, "grad_norm": 1.5776147842407227, "learning_rate": 1.8903717655434708e-05, "loss": 0.503, "step": 15894 }, { "epoch": 2.594751234643484, "grad_norm": 1.9565842151641846, "learning_rate": 1.8903573190323508e-05, "loss": 0.6873, "step": 15895 }, { "epoch": 2.594914493285988, "grad_norm": 1.9383671283721924, "learning_rate": 1.8903428716246414e-05, "loss": 0.5725, "step": 15896 }, { "epoch": 2.5950777519284927, "grad_norm": 1.7007063627243042, "learning_rate": 1.890328423320357e-05, "loss": 0.6438, "step": 15897 }, { "epoch": 2.595241010570997, "grad_norm": 1.829313039779663, "learning_rate": 1.8903139741195122e-05, "loss": 0.5856, "step": 15898 }, { "epoch": 2.5954042692135015, "grad_norm": 1.7949849367141724, "learning_rate": 1.8902995240221215e-05, "loss": 0.5748, "step": 15899 }, { "epoch": 2.595567527856006, "grad_norm": 1.934328317642212, "learning_rate": 1.8902850730281993e-05, "loss": 0.544, "step": 15900 }, { "epoch": 2.5957307864985104, "grad_norm": 1.6825941801071167, "learning_rate": 1.89027062113776e-05, "loss": 0.583, "step": 15901 }, { "epoch": 2.595894045141015, "grad_norm": 1.6888014078140259, "learning_rate": 1.8902561683508186e-05, "loss": 0.56, "step": 15902 }, { "epoch": 2.5960573037835193, "grad_norm": 1.4530231952667236, "learning_rate": 1.89024171466739e-05, "loss": 0.4513, "step": 15903 }, { "epoch": 2.5962205624260233, "grad_norm": 1.7298177480697632, "learning_rate": 1.8902272600874877e-05, "loss": 0.6552, "step": 15904 }, { "epoch": 2.5963838210685277, "grad_norm": 1.6382168531417847, "learning_rate": 1.8902128046111267e-05, "loss": 0.5871, "step": 15905 }, { "epoch": 2.596547079711032, "grad_norm": 1.8850725889205933, "learning_rate": 1.8901983482383217e-05, "loss": 0.5957, "step": 15906 }, { "epoch": 2.5967103383535366, "grad_norm": 2.123091697692871, "learning_rate": 1.8901838909690874e-05, "loss": 0.7189, "step": 15907 }, { "epoch": 2.596873596996041, "grad_norm": 1.7301981449127197, "learning_rate": 1.890169432803438e-05, "loss": 0.5927, "step": 15908 }, { "epoch": 2.5970368556385455, "grad_norm": 2.0424020290374756, "learning_rate": 1.8901549737413883e-05, "loss": 0.6074, "step": 15909 }, { "epoch": 2.59720011428105, "grad_norm": 1.440181016921997, "learning_rate": 1.890140513782953e-05, "loss": 0.4932, "step": 15910 }, { "epoch": 2.597363372923554, "grad_norm": 1.7189973592758179, "learning_rate": 1.890126052928146e-05, "loss": 0.5087, "step": 15911 }, { "epoch": 2.5975266315660583, "grad_norm": 1.9165375232696533, "learning_rate": 1.8901115911769824e-05, "loss": 0.6578, "step": 15912 }, { "epoch": 2.597689890208563, "grad_norm": 1.7366178035736084, "learning_rate": 1.890097128529477e-05, "loss": 0.5166, "step": 15913 }, { "epoch": 2.597853148851067, "grad_norm": 1.8777168989181519, "learning_rate": 1.8900826649856435e-05, "loss": 0.5918, "step": 15914 }, { "epoch": 2.5980164074935717, "grad_norm": 1.648166537284851, "learning_rate": 1.8900682005454974e-05, "loss": 0.5175, "step": 15915 }, { "epoch": 2.598179666136076, "grad_norm": 1.4606493711471558, "learning_rate": 1.8900537352090523e-05, "loss": 0.4997, "step": 15916 }, { "epoch": 2.5983429247785805, "grad_norm": 1.9237103462219238, "learning_rate": 1.890039268976324e-05, "loss": 0.6462, "step": 15917 }, { "epoch": 2.598506183421085, "grad_norm": 1.84309983253479, "learning_rate": 1.890024801847326e-05, "loss": 0.5394, "step": 15918 }, { "epoch": 2.5986694420635894, "grad_norm": 1.6959056854248047, "learning_rate": 1.8900103338220737e-05, "loss": 0.5698, "step": 15919 }, { "epoch": 2.598832700706094, "grad_norm": 1.764296054840088, "learning_rate": 1.889995864900581e-05, "loss": 0.5898, "step": 15920 }, { "epoch": 2.5989959593485983, "grad_norm": 1.6734576225280762, "learning_rate": 1.8899813950828626e-05, "loss": 0.5124, "step": 15921 }, { "epoch": 2.5991592179911023, "grad_norm": 1.6070983409881592, "learning_rate": 1.889966924368933e-05, "loss": 0.5523, "step": 15922 }, { "epoch": 2.5993224766336067, "grad_norm": 1.7419391870498657, "learning_rate": 1.889952452758807e-05, "loss": 0.6179, "step": 15923 }, { "epoch": 2.599485735276111, "grad_norm": 2.025076150894165, "learning_rate": 1.8899379802524994e-05, "loss": 0.5504, "step": 15924 }, { "epoch": 2.5996489939186156, "grad_norm": 1.7113566398620605, "learning_rate": 1.8899235068500245e-05, "loss": 0.615, "step": 15925 }, { "epoch": 2.59981225256112, "grad_norm": 1.3928203582763672, "learning_rate": 1.8899090325513968e-05, "loss": 0.4944, "step": 15926 }, { "epoch": 2.5999755112036245, "grad_norm": 1.744742751121521, "learning_rate": 1.8898945573566306e-05, "loss": 0.5388, "step": 15927 }, { "epoch": 2.6001387698461285, "grad_norm": 1.4217644929885864, "learning_rate": 1.8898800812657412e-05, "loss": 0.5056, "step": 15928 }, { "epoch": 2.600302028488633, "grad_norm": 2.317075490951538, "learning_rate": 1.8898656042787428e-05, "loss": 0.6882, "step": 15929 }, { "epoch": 2.6004652871311373, "grad_norm": 1.8306225538253784, "learning_rate": 1.88985112639565e-05, "loss": 0.5602, "step": 15930 }, { "epoch": 2.6006285457736418, "grad_norm": 1.4872678518295288, "learning_rate": 1.8898366476164768e-05, "loss": 0.5519, "step": 15931 }, { "epoch": 2.600791804416146, "grad_norm": 2.0384857654571533, "learning_rate": 1.889822167941239e-05, "loss": 0.5188, "step": 15932 }, { "epoch": 2.6009550630586507, "grad_norm": 1.783871054649353, "learning_rate": 1.88980768736995e-05, "loss": 0.5524, "step": 15933 }, { "epoch": 2.601118321701155, "grad_norm": 1.6538704633712769, "learning_rate": 1.8897932059026255e-05, "loss": 0.5456, "step": 15934 }, { "epoch": 2.6012815803436595, "grad_norm": 1.7779598236083984, "learning_rate": 1.889778723539279e-05, "loss": 0.6299, "step": 15935 }, { "epoch": 2.601444838986164, "grad_norm": 1.7048213481903076, "learning_rate": 1.8897642402799256e-05, "loss": 0.5981, "step": 15936 }, { "epoch": 2.6016080976286684, "grad_norm": 1.8793658018112183, "learning_rate": 1.8897497561245798e-05, "loss": 0.5167, "step": 15937 }, { "epoch": 2.601771356271173, "grad_norm": 1.89106285572052, "learning_rate": 1.8897352710732564e-05, "loss": 0.6911, "step": 15938 }, { "epoch": 2.601934614913677, "grad_norm": 1.621195912361145, "learning_rate": 1.8897207851259698e-05, "loss": 0.5344, "step": 15939 }, { "epoch": 2.6020978735561813, "grad_norm": 2.2734928131103516, "learning_rate": 1.8897062982827347e-05, "loss": 0.688, "step": 15940 }, { "epoch": 2.6022611321986857, "grad_norm": 1.7665585279464722, "learning_rate": 1.8896918105435654e-05, "loss": 0.5693, "step": 15941 }, { "epoch": 2.60242439084119, "grad_norm": 1.818091630935669, "learning_rate": 1.8896773219084768e-05, "loss": 0.5532, "step": 15942 }, { "epoch": 2.6025876494836946, "grad_norm": 1.6954485177993774, "learning_rate": 1.8896628323774832e-05, "loss": 0.5275, "step": 15943 }, { "epoch": 2.602750908126199, "grad_norm": 1.8419591188430786, "learning_rate": 1.8896483419505994e-05, "loss": 0.5672, "step": 15944 }, { "epoch": 2.6029141667687035, "grad_norm": 1.5640428066253662, "learning_rate": 1.88963385062784e-05, "loss": 0.5541, "step": 15945 }, { "epoch": 2.6030774254112075, "grad_norm": 2.0099897384643555, "learning_rate": 1.8896193584092197e-05, "loss": 0.6267, "step": 15946 }, { "epoch": 2.603240684053712, "grad_norm": 1.71855628490448, "learning_rate": 1.8896048652947526e-05, "loss": 0.5864, "step": 15947 }, { "epoch": 2.6034039426962163, "grad_norm": 1.6069624423980713, "learning_rate": 1.8895903712844542e-05, "loss": 0.5484, "step": 15948 }, { "epoch": 2.6035672013387208, "grad_norm": 1.8283424377441406, "learning_rate": 1.8895758763783383e-05, "loss": 0.5475, "step": 15949 }, { "epoch": 2.603730459981225, "grad_norm": 2.099742889404297, "learning_rate": 1.8895613805764196e-05, "loss": 0.6749, "step": 15950 }, { "epoch": 2.6038937186237296, "grad_norm": 1.3237226009368896, "learning_rate": 1.8895468838787127e-05, "loss": 0.4767, "step": 15951 }, { "epoch": 2.604056977266234, "grad_norm": 1.611938714981079, "learning_rate": 1.8895323862852322e-05, "loss": 0.5198, "step": 15952 }, { "epoch": 2.6042202359087385, "grad_norm": 1.5646326541900635, "learning_rate": 1.8895178877959934e-05, "loss": 0.4804, "step": 15953 }, { "epoch": 2.604383494551243, "grad_norm": 1.726051688194275, "learning_rate": 1.88950338841101e-05, "loss": 0.5261, "step": 15954 }, { "epoch": 2.6045467531937474, "grad_norm": 1.7808393239974976, "learning_rate": 1.889488888130297e-05, "loss": 0.5571, "step": 15955 }, { "epoch": 2.604710011836252, "grad_norm": 1.513533115386963, "learning_rate": 1.889474386953869e-05, "loss": 0.4762, "step": 15956 }, { "epoch": 2.604873270478756, "grad_norm": 1.9746071100234985, "learning_rate": 1.8894598848817403e-05, "loss": 0.6804, "step": 15957 }, { "epoch": 2.6050365291212603, "grad_norm": 1.8099535703659058, "learning_rate": 1.889445381913926e-05, "loss": 0.5938, "step": 15958 }, { "epoch": 2.6051997877637647, "grad_norm": 1.952694296836853, "learning_rate": 1.88943087805044e-05, "loss": 0.5928, "step": 15959 }, { "epoch": 2.605363046406269, "grad_norm": 1.9216512441635132, "learning_rate": 1.889416373291298e-05, "loss": 0.5502, "step": 15960 }, { "epoch": 2.6055263050487736, "grad_norm": 1.5623756647109985, "learning_rate": 1.8894018676365134e-05, "loss": 0.4726, "step": 15961 }, { "epoch": 2.605689563691278, "grad_norm": 1.5827032327651978, "learning_rate": 1.8893873610861013e-05, "loss": 0.577, "step": 15962 }, { "epoch": 2.6058528223337825, "grad_norm": 1.7254403829574585, "learning_rate": 1.8893728536400766e-05, "loss": 0.5333, "step": 15963 }, { "epoch": 2.6060160809762865, "grad_norm": 1.6910972595214844, "learning_rate": 1.8893583452984535e-05, "loss": 0.5862, "step": 15964 }, { "epoch": 2.606179339618791, "grad_norm": 1.5573484897613525, "learning_rate": 1.889343836061247e-05, "loss": 0.4901, "step": 15965 }, { "epoch": 2.6063425982612953, "grad_norm": 1.8216878175735474, "learning_rate": 1.8893293259284715e-05, "loss": 0.5334, "step": 15966 }, { "epoch": 2.6065058569037998, "grad_norm": 1.7856532335281372, "learning_rate": 1.8893148149001415e-05, "loss": 0.5492, "step": 15967 }, { "epoch": 2.606669115546304, "grad_norm": 1.9734309911727905, "learning_rate": 1.8893003029762717e-05, "loss": 0.6394, "step": 15968 }, { "epoch": 2.6068323741888086, "grad_norm": 2.0858616828918457, "learning_rate": 1.889285790156877e-05, "loss": 0.6078, "step": 15969 }, { "epoch": 2.606995632831313, "grad_norm": 2.152456760406494, "learning_rate": 1.8892712764419716e-05, "loss": 0.6943, "step": 15970 }, { "epoch": 2.6071588914738175, "grad_norm": 1.6914440393447876, "learning_rate": 1.88925676183157e-05, "loss": 0.5183, "step": 15971 }, { "epoch": 2.607322150116322, "grad_norm": 1.440643548965454, "learning_rate": 1.8892422463256873e-05, "loss": 0.3959, "step": 15972 }, { "epoch": 2.6074854087588264, "grad_norm": 1.933098554611206, "learning_rate": 1.889227729924338e-05, "loss": 0.6145, "step": 15973 }, { "epoch": 2.607648667401331, "grad_norm": 1.8300108909606934, "learning_rate": 1.8892132126275366e-05, "loss": 0.5894, "step": 15974 }, { "epoch": 2.607811926043835, "grad_norm": 1.7827945947647095, "learning_rate": 1.8891986944352974e-05, "loss": 0.5237, "step": 15975 }, { "epoch": 2.6079751846863393, "grad_norm": 2.116607189178467, "learning_rate": 1.8891841753476356e-05, "loss": 0.7397, "step": 15976 }, { "epoch": 2.6081384433288437, "grad_norm": 1.7372130155563354, "learning_rate": 1.8891696553645654e-05, "loss": 0.5921, "step": 15977 }, { "epoch": 2.608301701971348, "grad_norm": 1.721508264541626, "learning_rate": 1.8891551344861018e-05, "loss": 0.6362, "step": 15978 }, { "epoch": 2.6084649606138526, "grad_norm": 1.8527566194534302, "learning_rate": 1.8891406127122593e-05, "loss": 0.6013, "step": 15979 }, { "epoch": 2.608628219256357, "grad_norm": 1.9926713705062866, "learning_rate": 1.8891260900430518e-05, "loss": 0.6905, "step": 15980 }, { "epoch": 2.608791477898861, "grad_norm": 1.7163844108581543, "learning_rate": 1.8891115664784953e-05, "loss": 0.6407, "step": 15981 }, { "epoch": 2.6089547365413654, "grad_norm": 1.8728090524673462, "learning_rate": 1.8890970420186035e-05, "loss": 0.617, "step": 15982 }, { "epoch": 2.60911799518387, "grad_norm": 2.281320095062256, "learning_rate": 1.889082516663391e-05, "loss": 0.7085, "step": 15983 }, { "epoch": 2.6092812538263743, "grad_norm": 1.5436761379241943, "learning_rate": 1.889067990412873e-05, "loss": 0.5235, "step": 15984 }, { "epoch": 2.6094445124688788, "grad_norm": 1.6117883920669556, "learning_rate": 1.889053463267063e-05, "loss": 0.5525, "step": 15985 }, { "epoch": 2.609607771111383, "grad_norm": 1.634171485900879, "learning_rate": 1.8890389352259774e-05, "loss": 0.518, "step": 15986 }, { "epoch": 2.6097710297538876, "grad_norm": 1.7762027978897095, "learning_rate": 1.8890244062896294e-05, "loss": 0.6243, "step": 15987 }, { "epoch": 2.609934288396392, "grad_norm": 1.839897632598877, "learning_rate": 1.889009876458034e-05, "loss": 0.6744, "step": 15988 }, { "epoch": 2.6100975470388965, "grad_norm": 1.4397517442703247, "learning_rate": 1.8889953457312057e-05, "loss": 0.505, "step": 15989 }, { "epoch": 2.610260805681401, "grad_norm": 1.5810065269470215, "learning_rate": 1.8889808141091598e-05, "loss": 0.5364, "step": 15990 }, { "epoch": 2.6104240643239054, "grad_norm": 1.7853951454162598, "learning_rate": 1.8889662815919102e-05, "loss": 0.6085, "step": 15991 }, { "epoch": 2.6105873229664094, "grad_norm": 1.9415342807769775, "learning_rate": 1.8889517481794718e-05, "loss": 0.6611, "step": 15992 }, { "epoch": 2.610750581608914, "grad_norm": 1.524259090423584, "learning_rate": 1.888937213871859e-05, "loss": 0.5362, "step": 15993 }, { "epoch": 2.6109138402514183, "grad_norm": 1.7251135110855103, "learning_rate": 1.888922678669087e-05, "loss": 0.5772, "step": 15994 }, { "epoch": 2.6110770988939227, "grad_norm": 1.9065886735916138, "learning_rate": 1.8889081425711698e-05, "loss": 0.6064, "step": 15995 }, { "epoch": 2.611240357536427, "grad_norm": 1.726266860961914, "learning_rate": 1.8888936055781226e-05, "loss": 0.5668, "step": 15996 }, { "epoch": 2.6114036161789316, "grad_norm": 1.6911771297454834, "learning_rate": 1.8888790676899597e-05, "loss": 0.5419, "step": 15997 }, { "epoch": 2.611566874821436, "grad_norm": 1.5842859745025635, "learning_rate": 1.8888645289066958e-05, "loss": 0.5437, "step": 15998 }, { "epoch": 2.61173013346394, "grad_norm": 1.443833827972412, "learning_rate": 1.8888499892283458e-05, "loss": 0.4901, "step": 15999 }, { "epoch": 2.6118933921064444, "grad_norm": 1.9202293157577515, "learning_rate": 1.8888354486549238e-05, "loss": 0.73, "step": 16000 }, { "epoch": 2.612056650748949, "grad_norm": 2.20485782623291, "learning_rate": 1.8888209071864448e-05, "loss": 0.7008, "step": 16001 }, { "epoch": 2.6122199093914533, "grad_norm": 1.5755879878997803, "learning_rate": 1.8888063648229234e-05, "loss": 0.4696, "step": 16002 }, { "epoch": 2.6123831680339578, "grad_norm": 1.660231590270996, "learning_rate": 1.8887918215643738e-05, "loss": 0.479, "step": 16003 }, { "epoch": 2.612546426676462, "grad_norm": 1.9225854873657227, "learning_rate": 1.8887772774108116e-05, "loss": 0.674, "step": 16004 }, { "epoch": 2.6127096853189666, "grad_norm": 1.6600604057312012, "learning_rate": 1.888762732362251e-05, "loss": 0.5113, "step": 16005 }, { "epoch": 2.612872943961471, "grad_norm": 1.730082392692566, "learning_rate": 1.888748186418706e-05, "loss": 0.5264, "step": 16006 }, { "epoch": 2.6130362026039755, "grad_norm": 1.6567931175231934, "learning_rate": 1.888733639580192e-05, "loss": 0.5475, "step": 16007 }, { "epoch": 2.61319946124648, "grad_norm": 1.7413051128387451, "learning_rate": 1.888719091846724e-05, "loss": 0.6082, "step": 16008 }, { "epoch": 2.6133627198889844, "grad_norm": 1.8411896228790283, "learning_rate": 1.8887045432183156e-05, "loss": 0.5347, "step": 16009 }, { "epoch": 2.6135259785314884, "grad_norm": 1.6246861219406128, "learning_rate": 1.8886899936949822e-05, "loss": 0.5789, "step": 16010 }, { "epoch": 2.613689237173993, "grad_norm": 1.7005807161331177, "learning_rate": 1.8886754432767382e-05, "loss": 0.5952, "step": 16011 }, { "epoch": 2.6138524958164973, "grad_norm": 1.8778045177459717, "learning_rate": 1.888660891963598e-05, "loss": 0.6395, "step": 16012 }, { "epoch": 2.6140157544590017, "grad_norm": 1.590225100517273, "learning_rate": 1.888646339755577e-05, "loss": 0.4592, "step": 16013 }, { "epoch": 2.614179013101506, "grad_norm": 1.8885276317596436, "learning_rate": 1.8886317866526888e-05, "loss": 0.5128, "step": 16014 }, { "epoch": 2.6143422717440106, "grad_norm": 1.7249761819839478, "learning_rate": 1.888617232654949e-05, "loss": 0.5781, "step": 16015 }, { "epoch": 2.6145055303865146, "grad_norm": 1.9317954778671265, "learning_rate": 1.888602677762372e-05, "loss": 0.6543, "step": 16016 }, { "epoch": 2.614668789029019, "grad_norm": 1.8166687488555908, "learning_rate": 1.888588121974972e-05, "loss": 0.5995, "step": 16017 }, { "epoch": 2.6148320476715234, "grad_norm": 1.4822840690612793, "learning_rate": 1.888573565292764e-05, "loss": 0.5089, "step": 16018 }, { "epoch": 2.614995306314028, "grad_norm": 1.746072769165039, "learning_rate": 1.8885590077157627e-05, "loss": 0.576, "step": 16019 }, { "epoch": 2.6151585649565323, "grad_norm": 1.7825593948364258, "learning_rate": 1.888544449243983e-05, "loss": 0.5585, "step": 16020 }, { "epoch": 2.6153218235990368, "grad_norm": 1.7522176504135132, "learning_rate": 1.888529889877439e-05, "loss": 0.509, "step": 16021 }, { "epoch": 2.615485082241541, "grad_norm": 1.8622236251831055, "learning_rate": 1.8885153296161456e-05, "loss": 0.4859, "step": 16022 }, { "epoch": 2.6156483408840456, "grad_norm": 1.5628907680511475, "learning_rate": 1.8885007684601175e-05, "loss": 0.5187, "step": 16023 }, { "epoch": 2.61581159952655, "grad_norm": 1.306572675704956, "learning_rate": 1.8884862064093698e-05, "loss": 0.4316, "step": 16024 }, { "epoch": 2.6159748581690545, "grad_norm": 2.157076358795166, "learning_rate": 1.888471643463916e-05, "loss": 0.6938, "step": 16025 }, { "epoch": 2.616138116811559, "grad_norm": 1.960085153579712, "learning_rate": 1.888457079623772e-05, "loss": 0.5651, "step": 16026 }, { "epoch": 2.616301375454063, "grad_norm": 1.660498023033142, "learning_rate": 1.8884425148889517e-05, "loss": 0.5688, "step": 16027 }, { "epoch": 2.6164646340965674, "grad_norm": 1.741895079612732, "learning_rate": 1.8884279492594705e-05, "loss": 0.5044, "step": 16028 }, { "epoch": 2.616627892739072, "grad_norm": 1.8037664890289307, "learning_rate": 1.888413382735342e-05, "loss": 0.5938, "step": 16029 }, { "epoch": 2.6167911513815763, "grad_norm": 2.1374611854553223, "learning_rate": 1.8883988153165815e-05, "loss": 0.5723, "step": 16030 }, { "epoch": 2.6169544100240807, "grad_norm": 1.8800091743469238, "learning_rate": 1.8883842470032042e-05, "loss": 0.5698, "step": 16031 }, { "epoch": 2.617117668666585, "grad_norm": 2.136476755142212, "learning_rate": 1.8883696777952236e-05, "loss": 0.6524, "step": 16032 }, { "epoch": 2.6172809273090896, "grad_norm": 1.968754529953003, "learning_rate": 1.8883551076926552e-05, "loss": 0.535, "step": 16033 }, { "epoch": 2.6174441859515936, "grad_norm": 1.8754411935806274, "learning_rate": 1.8883405366955134e-05, "loss": 0.578, "step": 16034 }, { "epoch": 2.617607444594098, "grad_norm": 1.8510242700576782, "learning_rate": 1.888325964803813e-05, "loss": 0.574, "step": 16035 }, { "epoch": 2.6177707032366024, "grad_norm": 1.6279468536376953, "learning_rate": 1.8883113920175687e-05, "loss": 0.5077, "step": 16036 }, { "epoch": 2.617933961879107, "grad_norm": 1.721198320388794, "learning_rate": 1.888296818336795e-05, "loss": 0.5512, "step": 16037 }, { "epoch": 2.6180972205216113, "grad_norm": 1.9198247194290161, "learning_rate": 1.888282243761506e-05, "loss": 0.6996, "step": 16038 }, { "epoch": 2.6182604791641158, "grad_norm": 1.865804672241211, "learning_rate": 1.888267668291718e-05, "loss": 0.5449, "step": 16039 }, { "epoch": 2.61842373780662, "grad_norm": 1.718827247619629, "learning_rate": 1.8882530919274442e-05, "loss": 0.525, "step": 16040 }, { "epoch": 2.6185869964491246, "grad_norm": 1.5678411722183228, "learning_rate": 1.8882385146686997e-05, "loss": 0.5419, "step": 16041 }, { "epoch": 2.618750255091629, "grad_norm": 1.8673676252365112, "learning_rate": 1.8882239365154996e-05, "loss": 0.5889, "step": 16042 }, { "epoch": 2.6189135137341335, "grad_norm": 1.54221773147583, "learning_rate": 1.888209357467858e-05, "loss": 0.5131, "step": 16043 }, { "epoch": 2.619076772376638, "grad_norm": 1.9522465467453003, "learning_rate": 1.8881947775257898e-05, "loss": 0.6681, "step": 16044 }, { "epoch": 2.619240031019142, "grad_norm": 1.5264219045639038, "learning_rate": 1.8881801966893095e-05, "loss": 0.5317, "step": 16045 }, { "epoch": 2.6194032896616464, "grad_norm": 1.9213430881500244, "learning_rate": 1.8881656149584323e-05, "loss": 0.5448, "step": 16046 }, { "epoch": 2.619566548304151, "grad_norm": 2.132582664489746, "learning_rate": 1.8881510323331723e-05, "loss": 0.7566, "step": 16047 }, { "epoch": 2.6197298069466552, "grad_norm": 2.031709909439087, "learning_rate": 1.8881364488135448e-05, "loss": 0.5826, "step": 16048 }, { "epoch": 2.6198930655891597, "grad_norm": 1.8390910625457764, "learning_rate": 1.8881218643995637e-05, "loss": 0.6175, "step": 16049 }, { "epoch": 2.620056324231664, "grad_norm": 1.7144988775253296, "learning_rate": 1.8881072790912445e-05, "loss": 0.5647, "step": 16050 }, { "epoch": 2.6202195828741686, "grad_norm": 1.6184475421905518, "learning_rate": 1.8880926928886012e-05, "loss": 0.4858, "step": 16051 }, { "epoch": 2.6203828415166726, "grad_norm": 2.1351308822631836, "learning_rate": 1.888078105791649e-05, "loss": 0.6459, "step": 16052 }, { "epoch": 2.620546100159177, "grad_norm": 1.8463491201400757, "learning_rate": 1.8880635178004024e-05, "loss": 0.6501, "step": 16053 }, { "epoch": 2.6207093588016814, "grad_norm": 1.6715669631958008, "learning_rate": 1.888048928914876e-05, "loss": 0.5513, "step": 16054 }, { "epoch": 2.620872617444186, "grad_norm": 1.9554920196533203, "learning_rate": 1.8880343391350845e-05, "loss": 0.7249, "step": 16055 }, { "epoch": 2.6210358760866903, "grad_norm": 1.59914231300354, "learning_rate": 1.8880197484610427e-05, "loss": 0.5215, "step": 16056 }, { "epoch": 2.6211991347291947, "grad_norm": 1.7899322509765625, "learning_rate": 1.8880051568927655e-05, "loss": 0.6089, "step": 16057 }, { "epoch": 2.621362393371699, "grad_norm": 1.810563087463379, "learning_rate": 1.887990564430267e-05, "loss": 0.6498, "step": 16058 }, { "epoch": 2.6215256520142036, "grad_norm": 1.8868913650512695, "learning_rate": 1.8879759710735625e-05, "loss": 0.6287, "step": 16059 }, { "epoch": 2.621688910656708, "grad_norm": 1.8164985179901123, "learning_rate": 1.8879613768226662e-05, "loss": 0.621, "step": 16060 }, { "epoch": 2.6218521692992125, "grad_norm": 1.9097576141357422, "learning_rate": 1.887946781677593e-05, "loss": 0.5875, "step": 16061 }, { "epoch": 2.622015427941717, "grad_norm": 1.8151062726974487, "learning_rate": 1.887932185638358e-05, "loss": 0.5762, "step": 16062 }, { "epoch": 2.622178686584221, "grad_norm": 1.9798498153686523, "learning_rate": 1.887917588704975e-05, "loss": 0.6161, "step": 16063 }, { "epoch": 2.6223419452267254, "grad_norm": 1.6471327543258667, "learning_rate": 1.8879029908774594e-05, "loss": 0.5381, "step": 16064 }, { "epoch": 2.62250520386923, "grad_norm": 1.6532760858535767, "learning_rate": 1.887888392155826e-05, "loss": 0.5, "step": 16065 }, { "epoch": 2.6226684625117342, "grad_norm": 1.4578909873962402, "learning_rate": 1.887873792540089e-05, "loss": 0.4605, "step": 16066 }, { "epoch": 2.6228317211542387, "grad_norm": 1.70945143699646, "learning_rate": 1.8878591920302637e-05, "loss": 0.5323, "step": 16067 }, { "epoch": 2.622994979796743, "grad_norm": 1.795084834098816, "learning_rate": 1.887844590626364e-05, "loss": 0.6355, "step": 16068 }, { "epoch": 2.623158238439247, "grad_norm": 1.9677430391311646, "learning_rate": 1.887829988328405e-05, "loss": 0.5515, "step": 16069 }, { "epoch": 2.6233214970817516, "grad_norm": 1.7874765396118164, "learning_rate": 1.8878153851364013e-05, "loss": 0.5875, "step": 16070 }, { "epoch": 2.623484755724256, "grad_norm": 1.8282233476638794, "learning_rate": 1.8878007810503683e-05, "loss": 0.6815, "step": 16071 }, { "epoch": 2.6236480143667604, "grad_norm": 1.4319413900375366, "learning_rate": 1.8877861760703198e-05, "loss": 0.5353, "step": 16072 }, { "epoch": 2.623811273009265, "grad_norm": 1.6155452728271484, "learning_rate": 1.887771570196271e-05, "loss": 0.5258, "step": 16073 }, { "epoch": 2.6239745316517693, "grad_norm": 1.999325156211853, "learning_rate": 1.8877569634282363e-05, "loss": 0.6271, "step": 16074 }, { "epoch": 2.6241377902942737, "grad_norm": 1.917514443397522, "learning_rate": 1.8877423557662307e-05, "loss": 0.5758, "step": 16075 }, { "epoch": 2.624301048936778, "grad_norm": 1.7548868656158447, "learning_rate": 1.8877277472102687e-05, "loss": 0.5443, "step": 16076 }, { "epoch": 2.6244643075792826, "grad_norm": 1.596685528755188, "learning_rate": 1.887713137760365e-05, "loss": 0.5489, "step": 16077 }, { "epoch": 2.624627566221787, "grad_norm": 1.501184344291687, "learning_rate": 1.8876985274165345e-05, "loss": 0.4903, "step": 16078 }, { "epoch": 2.6247908248642915, "grad_norm": 2.070462226867676, "learning_rate": 1.887683916178792e-05, "loss": 0.594, "step": 16079 }, { "epoch": 2.6249540835067955, "grad_norm": 1.7820243835449219, "learning_rate": 1.8876693040471518e-05, "loss": 0.5281, "step": 16080 }, { "epoch": 2.6251173421493, "grad_norm": 2.274573802947998, "learning_rate": 1.887654691021629e-05, "loss": 0.6145, "step": 16081 }, { "epoch": 2.6252806007918044, "grad_norm": 1.9335205554962158, "learning_rate": 1.887640077102238e-05, "loss": 0.5572, "step": 16082 }, { "epoch": 2.625443859434309, "grad_norm": 1.6394728422164917, "learning_rate": 1.887625462288994e-05, "loss": 0.5724, "step": 16083 }, { "epoch": 2.6256071180768132, "grad_norm": 1.9937756061553955, "learning_rate": 1.887610846581911e-05, "loss": 0.6163, "step": 16084 }, { "epoch": 2.6257703767193177, "grad_norm": 1.7527790069580078, "learning_rate": 1.8875962299810042e-05, "loss": 0.507, "step": 16085 }, { "epoch": 2.625933635361822, "grad_norm": 1.701773762702942, "learning_rate": 1.8875816124862885e-05, "loss": 0.5451, "step": 16086 }, { "epoch": 2.626096894004326, "grad_norm": 1.7611727714538574, "learning_rate": 1.887566994097778e-05, "loss": 0.5088, "step": 16087 }, { "epoch": 2.6262601526468305, "grad_norm": 1.3634651899337769, "learning_rate": 1.887552374815488e-05, "loss": 0.4625, "step": 16088 }, { "epoch": 2.626423411289335, "grad_norm": 1.5296481847763062, "learning_rate": 1.887537754639433e-05, "loss": 0.4768, "step": 16089 }, { "epoch": 2.6265866699318394, "grad_norm": 1.5896655321121216, "learning_rate": 1.8875231335696277e-05, "loss": 0.5251, "step": 16090 }, { "epoch": 2.626749928574344, "grad_norm": 1.8863362073898315, "learning_rate": 1.8875085116060865e-05, "loss": 0.5552, "step": 16091 }, { "epoch": 2.6269131872168483, "grad_norm": 1.7911053895950317, "learning_rate": 1.887493888748825e-05, "loss": 0.625, "step": 16092 }, { "epoch": 2.6270764458593527, "grad_norm": 1.4910928010940552, "learning_rate": 1.887479264997857e-05, "loss": 0.459, "step": 16093 }, { "epoch": 2.627239704501857, "grad_norm": 1.7951254844665527, "learning_rate": 1.8874646403531978e-05, "loss": 0.6646, "step": 16094 }, { "epoch": 2.6274029631443616, "grad_norm": 1.73624849319458, "learning_rate": 1.8874500148148617e-05, "loss": 0.535, "step": 16095 }, { "epoch": 2.627566221786866, "grad_norm": 1.7552388906478882, "learning_rate": 1.8874353883828643e-05, "loss": 0.5179, "step": 16096 }, { "epoch": 2.6277294804293705, "grad_norm": 1.8437241315841675, "learning_rate": 1.887420761057219e-05, "loss": 0.5855, "step": 16097 }, { "epoch": 2.6278927390718745, "grad_norm": 1.7313461303710938, "learning_rate": 1.8874061328379416e-05, "loss": 0.6025, "step": 16098 }, { "epoch": 2.628055997714379, "grad_norm": 1.8763130903244019, "learning_rate": 1.8873915037250463e-05, "loss": 0.5781, "step": 16099 }, { "epoch": 2.6282192563568834, "grad_norm": 1.7636576890945435, "learning_rate": 1.887376873718548e-05, "loss": 0.5688, "step": 16100 }, { "epoch": 2.628382514999388, "grad_norm": 1.8299024105072021, "learning_rate": 1.8873622428184616e-05, "loss": 0.5631, "step": 16101 }, { "epoch": 2.6285457736418922, "grad_norm": 1.7506914138793945, "learning_rate": 1.8873476110248015e-05, "loss": 0.5106, "step": 16102 }, { "epoch": 2.6287090322843967, "grad_norm": 1.6863343715667725, "learning_rate": 1.8873329783375823e-05, "loss": 0.5548, "step": 16103 }, { "epoch": 2.6288722909269007, "grad_norm": 1.896637201309204, "learning_rate": 1.8873183447568195e-05, "loss": 0.6055, "step": 16104 }, { "epoch": 2.629035549569405, "grad_norm": 1.4419078826904297, "learning_rate": 1.8873037102825275e-05, "loss": 0.4617, "step": 16105 }, { "epoch": 2.6291988082119095, "grad_norm": 2.1398956775665283, "learning_rate": 1.8872890749147204e-05, "loss": 0.655, "step": 16106 }, { "epoch": 2.629362066854414, "grad_norm": 2.2492048740386963, "learning_rate": 1.8872744386534138e-05, "loss": 0.5584, "step": 16107 }, { "epoch": 2.6295253254969184, "grad_norm": 1.6752861738204956, "learning_rate": 1.8872598014986216e-05, "loss": 0.5328, "step": 16108 }, { "epoch": 2.629688584139423, "grad_norm": 1.6917341947555542, "learning_rate": 1.8872451634503594e-05, "loss": 0.5753, "step": 16109 }, { "epoch": 2.6298518427819273, "grad_norm": 2.009004592895508, "learning_rate": 1.8872305245086414e-05, "loss": 0.5951, "step": 16110 }, { "epoch": 2.6300151014244317, "grad_norm": 1.724295973777771, "learning_rate": 1.8872158846734826e-05, "loss": 0.6016, "step": 16111 }, { "epoch": 2.630178360066936, "grad_norm": 1.5764682292938232, "learning_rate": 1.8872012439448978e-05, "loss": 0.4724, "step": 16112 }, { "epoch": 2.6303416187094406, "grad_norm": 1.9193719625473022, "learning_rate": 1.8871866023229013e-05, "loss": 0.5805, "step": 16113 }, { "epoch": 2.630504877351945, "grad_norm": 1.9446511268615723, "learning_rate": 1.8871719598075083e-05, "loss": 0.638, "step": 16114 }, { "epoch": 2.630668135994449, "grad_norm": 1.6109675168991089, "learning_rate": 1.8871573163987334e-05, "loss": 0.5152, "step": 16115 }, { "epoch": 2.6308313946369535, "grad_norm": 1.8202100992202759, "learning_rate": 1.8871426720965915e-05, "loss": 0.637, "step": 16116 }, { "epoch": 2.630994653279458, "grad_norm": 1.6964645385742188, "learning_rate": 1.8871280269010964e-05, "loss": 0.5944, "step": 16117 }, { "epoch": 2.6311579119219624, "grad_norm": 1.760259985923767, "learning_rate": 1.8871133808122642e-05, "loss": 0.5621, "step": 16118 }, { "epoch": 2.631321170564467, "grad_norm": 1.672680139541626, "learning_rate": 1.887098733830109e-05, "loss": 0.5452, "step": 16119 }, { "epoch": 2.6314844292069712, "grad_norm": 1.8114062547683716, "learning_rate": 1.8870840859546455e-05, "loss": 0.4889, "step": 16120 }, { "epoch": 2.6316476878494757, "grad_norm": 1.7800145149230957, "learning_rate": 1.8870694371858888e-05, "loss": 0.6913, "step": 16121 }, { "epoch": 2.6318109464919797, "grad_norm": 1.6153721809387207, "learning_rate": 1.887054787523853e-05, "loss": 0.5644, "step": 16122 }, { "epoch": 2.631974205134484, "grad_norm": 1.689859390258789, "learning_rate": 1.8870401369685535e-05, "loss": 0.5183, "step": 16123 }, { "epoch": 2.6321374637769885, "grad_norm": 1.8619061708450317, "learning_rate": 1.887025485520005e-05, "loss": 0.5424, "step": 16124 }, { "epoch": 2.632300722419493, "grad_norm": 1.7084953784942627, "learning_rate": 1.887010833178222e-05, "loss": 0.5882, "step": 16125 }, { "epoch": 2.6324639810619974, "grad_norm": 1.764589786529541, "learning_rate": 1.886996179943219e-05, "loss": 0.62, "step": 16126 }, { "epoch": 2.632627239704502, "grad_norm": 2.0343589782714844, "learning_rate": 1.8869815258150114e-05, "loss": 0.6857, "step": 16127 }, { "epoch": 2.6327904983470063, "grad_norm": 1.246065616607666, "learning_rate": 1.8869668707936137e-05, "loss": 0.3738, "step": 16128 }, { "epoch": 2.6329537569895107, "grad_norm": 1.7290823459625244, "learning_rate": 1.8869522148790404e-05, "loss": 0.5381, "step": 16129 }, { "epoch": 2.633117015632015, "grad_norm": 1.8423837423324585, "learning_rate": 1.8869375580713064e-05, "loss": 0.6456, "step": 16130 }, { "epoch": 2.6332802742745196, "grad_norm": 1.8904454708099365, "learning_rate": 1.8869229003704266e-05, "loss": 0.6426, "step": 16131 }, { "epoch": 2.633443532917024, "grad_norm": 1.729357123374939, "learning_rate": 1.8869082417764154e-05, "loss": 0.5179, "step": 16132 }, { "epoch": 2.633606791559528, "grad_norm": 1.7497581243515015, "learning_rate": 1.8868935822892885e-05, "loss": 0.6118, "step": 16133 }, { "epoch": 2.6337700502020325, "grad_norm": 1.662306785583496, "learning_rate": 1.8868789219090596e-05, "loss": 0.5083, "step": 16134 }, { "epoch": 2.633933308844537, "grad_norm": 2.0248637199401855, "learning_rate": 1.886864260635744e-05, "loss": 0.7194, "step": 16135 }, { "epoch": 2.6340965674870414, "grad_norm": 1.7927619218826294, "learning_rate": 1.886849598469356e-05, "loss": 0.655, "step": 16136 }, { "epoch": 2.634259826129546, "grad_norm": 1.553969144821167, "learning_rate": 1.886834935409911e-05, "loss": 0.5198, "step": 16137 }, { "epoch": 2.6344230847720502, "grad_norm": 1.825742483139038, "learning_rate": 1.8868202714574232e-05, "loss": 0.5596, "step": 16138 }, { "epoch": 2.6345863434145547, "grad_norm": 1.827074408531189, "learning_rate": 1.886805606611908e-05, "loss": 0.5739, "step": 16139 }, { "epoch": 2.6347496020570587, "grad_norm": 1.9935625791549683, "learning_rate": 1.8867909408733794e-05, "loss": 0.6928, "step": 16140 }, { "epoch": 2.634912860699563, "grad_norm": 1.7250573635101318, "learning_rate": 1.886776274241853e-05, "loss": 0.5739, "step": 16141 }, { "epoch": 2.6350761193420675, "grad_norm": 1.816119909286499, "learning_rate": 1.8867616067173425e-05, "loss": 0.5935, "step": 16142 }, { "epoch": 2.635239377984572, "grad_norm": 1.7341365814208984, "learning_rate": 1.8867469382998636e-05, "loss": 0.4416, "step": 16143 }, { "epoch": 2.6354026366270764, "grad_norm": 2.303443431854248, "learning_rate": 1.8867322689894307e-05, "loss": 0.6315, "step": 16144 }, { "epoch": 2.635565895269581, "grad_norm": 2.281344175338745, "learning_rate": 1.886717598786059e-05, "loss": 0.7293, "step": 16145 }, { "epoch": 2.6357291539120853, "grad_norm": 1.8779067993164062, "learning_rate": 1.8867029276897625e-05, "loss": 0.5825, "step": 16146 }, { "epoch": 2.6358924125545897, "grad_norm": 1.6128805875778198, "learning_rate": 1.8866882557005567e-05, "loss": 0.5189, "step": 16147 }, { "epoch": 2.636055671197094, "grad_norm": 1.7921674251556396, "learning_rate": 1.886673582818456e-05, "loss": 0.514, "step": 16148 }, { "epoch": 2.6362189298395986, "grad_norm": 1.586660385131836, "learning_rate": 1.886658909043475e-05, "loss": 0.5449, "step": 16149 }, { "epoch": 2.636382188482103, "grad_norm": 1.739637851715088, "learning_rate": 1.8866442343756288e-05, "loss": 0.5927, "step": 16150 }, { "epoch": 2.636545447124607, "grad_norm": 1.9291633367538452, "learning_rate": 1.8866295588149323e-05, "loss": 0.6249, "step": 16151 }, { "epoch": 2.6367087057671115, "grad_norm": 1.7400315999984741, "learning_rate": 1.8866148823613998e-05, "loss": 0.5373, "step": 16152 }, { "epoch": 2.636871964409616, "grad_norm": 2.225931167602539, "learning_rate": 1.8866002050150463e-05, "loss": 0.596, "step": 16153 }, { "epoch": 2.6370352230521203, "grad_norm": 2.1829609870910645, "learning_rate": 1.886585526775887e-05, "loss": 0.7269, "step": 16154 }, { "epoch": 2.637198481694625, "grad_norm": 1.969670057296753, "learning_rate": 1.8865708476439363e-05, "loss": 0.5938, "step": 16155 }, { "epoch": 2.6373617403371292, "grad_norm": 1.846751093864441, "learning_rate": 1.8865561676192085e-05, "loss": 0.5662, "step": 16156 }, { "epoch": 2.637524998979633, "grad_norm": 2.0010201930999756, "learning_rate": 1.8865414867017194e-05, "loss": 0.6319, "step": 16157 }, { "epoch": 2.6376882576221377, "grad_norm": 1.7987765073776245, "learning_rate": 1.8865268048914828e-05, "loss": 0.6771, "step": 16158 }, { "epoch": 2.637851516264642, "grad_norm": 2.124239683151245, "learning_rate": 1.8865121221885143e-05, "loss": 0.62, "step": 16159 }, { "epoch": 2.6380147749071465, "grad_norm": 1.831083059310913, "learning_rate": 1.8864974385928284e-05, "loss": 0.5243, "step": 16160 }, { "epoch": 2.638178033549651, "grad_norm": 1.8009288311004639, "learning_rate": 1.8864827541044396e-05, "loss": 0.5931, "step": 16161 }, { "epoch": 2.6383412921921554, "grad_norm": 1.7230037450790405, "learning_rate": 1.8864680687233627e-05, "loss": 0.5868, "step": 16162 }, { "epoch": 2.63850455083466, "grad_norm": 1.6651865243911743, "learning_rate": 1.886453382449613e-05, "loss": 0.5005, "step": 16163 }, { "epoch": 2.6386678094771643, "grad_norm": 1.809056043624878, "learning_rate": 1.886438695283205e-05, "loss": 0.5999, "step": 16164 }, { "epoch": 2.6388310681196687, "grad_norm": 1.9765888452529907, "learning_rate": 1.8864240072241533e-05, "loss": 0.6539, "step": 16165 }, { "epoch": 2.638994326762173, "grad_norm": 1.644740104675293, "learning_rate": 1.886409318272473e-05, "loss": 0.4967, "step": 16166 }, { "epoch": 2.6391575854046776, "grad_norm": 1.8078258037567139, "learning_rate": 1.8863946284281787e-05, "loss": 0.5985, "step": 16167 }, { "epoch": 2.6393208440471816, "grad_norm": 2.0044445991516113, "learning_rate": 1.886379937691285e-05, "loss": 0.609, "step": 16168 }, { "epoch": 2.639484102689686, "grad_norm": 1.7423810958862305, "learning_rate": 1.886365246061807e-05, "loss": 0.5789, "step": 16169 }, { "epoch": 2.6396473613321905, "grad_norm": 1.798461675643921, "learning_rate": 1.8863505535397597e-05, "loss": 0.6076, "step": 16170 }, { "epoch": 2.639810619974695, "grad_norm": 1.9436919689178467, "learning_rate": 1.8863358601251577e-05, "loss": 0.6663, "step": 16171 }, { "epoch": 2.6399738786171993, "grad_norm": 1.5544646978378296, "learning_rate": 1.8863211658180154e-05, "loss": 0.5383, "step": 16172 }, { "epoch": 2.640137137259704, "grad_norm": 1.7842367887496948, "learning_rate": 1.886306470618348e-05, "loss": 0.5444, "step": 16173 }, { "epoch": 2.640300395902208, "grad_norm": 1.795445442199707, "learning_rate": 1.8862917745261703e-05, "loss": 0.5532, "step": 16174 }, { "epoch": 2.640463654544712, "grad_norm": 1.665205478668213, "learning_rate": 1.8862770775414973e-05, "loss": 0.5093, "step": 16175 }, { "epoch": 2.6406269131872167, "grad_norm": 1.816694736480713, "learning_rate": 1.8862623796643432e-05, "loss": 0.56, "step": 16176 }, { "epoch": 2.640790171829721, "grad_norm": 2.035830020904541, "learning_rate": 1.886247680894723e-05, "loss": 0.7155, "step": 16177 }, { "epoch": 2.6409534304722255, "grad_norm": 1.5601911544799805, "learning_rate": 1.8862329812326518e-05, "loss": 0.5756, "step": 16178 }, { "epoch": 2.64111668911473, "grad_norm": 1.8580330610275269, "learning_rate": 1.8862182806781438e-05, "loss": 0.5196, "step": 16179 }, { "epoch": 2.6412799477572344, "grad_norm": 1.6392097473144531, "learning_rate": 1.8862035792312148e-05, "loss": 0.5782, "step": 16180 }, { "epoch": 2.641443206399739, "grad_norm": 1.7013221979141235, "learning_rate": 1.886188876891879e-05, "loss": 0.5522, "step": 16181 }, { "epoch": 2.6416064650422433, "grad_norm": 1.7511489391326904, "learning_rate": 1.886174173660151e-05, "loss": 0.4866, "step": 16182 }, { "epoch": 2.6417697236847477, "grad_norm": 1.7694509029388428, "learning_rate": 1.886159469536046e-05, "loss": 0.5365, "step": 16183 }, { "epoch": 2.641932982327252, "grad_norm": 1.851187825202942, "learning_rate": 1.8861447645195784e-05, "loss": 0.5835, "step": 16184 }, { "epoch": 2.6420962409697566, "grad_norm": 2.531167507171631, "learning_rate": 1.8861300586107635e-05, "loss": 0.7188, "step": 16185 }, { "epoch": 2.6422594996122606, "grad_norm": 2.0404052734375, "learning_rate": 1.886115351809616e-05, "loss": 0.6644, "step": 16186 }, { "epoch": 2.642422758254765, "grad_norm": 1.6433522701263428, "learning_rate": 1.8861006441161502e-05, "loss": 0.5687, "step": 16187 }, { "epoch": 2.6425860168972695, "grad_norm": 1.792381763458252, "learning_rate": 1.8860859355303815e-05, "loss": 0.5123, "step": 16188 }, { "epoch": 2.642749275539774, "grad_norm": 1.9141826629638672, "learning_rate": 1.8860712260523245e-05, "loss": 0.6684, "step": 16189 }, { "epoch": 2.6429125341822783, "grad_norm": 1.4881551265716553, "learning_rate": 1.8860565156819935e-05, "loss": 0.4516, "step": 16190 }, { "epoch": 2.6430757928247828, "grad_norm": 1.9429618120193481, "learning_rate": 1.8860418044194048e-05, "loss": 0.6601, "step": 16191 }, { "epoch": 2.643239051467287, "grad_norm": 1.9214049577713013, "learning_rate": 1.8860270922645716e-05, "loss": 0.6405, "step": 16192 }, { "epoch": 2.643402310109791, "grad_norm": 1.4729013442993164, "learning_rate": 1.8860123792175094e-05, "loss": 0.4605, "step": 16193 }, { "epoch": 2.6435655687522956, "grad_norm": 1.8745107650756836, "learning_rate": 1.885997665278233e-05, "loss": 0.6432, "step": 16194 }, { "epoch": 2.6437288273948, "grad_norm": 1.6613126993179321, "learning_rate": 1.8859829504467573e-05, "loss": 0.6369, "step": 16195 }, { "epoch": 2.6438920860373045, "grad_norm": 1.7476122379302979, "learning_rate": 1.8859682347230968e-05, "loss": 0.5818, "step": 16196 }, { "epoch": 2.644055344679809, "grad_norm": 1.8865282535552979, "learning_rate": 1.885953518107267e-05, "loss": 0.6126, "step": 16197 }, { "epoch": 2.6442186033223134, "grad_norm": 1.722013235092163, "learning_rate": 1.8859388005992817e-05, "loss": 0.575, "step": 16198 }, { "epoch": 2.644381861964818, "grad_norm": 2.230564594268799, "learning_rate": 1.8859240821991563e-05, "loss": 0.6261, "step": 16199 }, { "epoch": 2.6445451206073223, "grad_norm": 1.7928712368011475, "learning_rate": 1.8859093629069057e-05, "loss": 0.608, "step": 16200 }, { "epoch": 2.6447083792498267, "grad_norm": 1.8802642822265625, "learning_rate": 1.8858946427225447e-05, "loss": 0.5181, "step": 16201 }, { "epoch": 2.644871637892331, "grad_norm": 1.7463246583938599, "learning_rate": 1.8858799216460883e-05, "loss": 0.5898, "step": 16202 }, { "epoch": 2.6450348965348356, "grad_norm": 1.863582730293274, "learning_rate": 1.8858651996775506e-05, "loss": 0.5035, "step": 16203 }, { "epoch": 2.6451981551773396, "grad_norm": 1.9661266803741455, "learning_rate": 1.8858504768169467e-05, "loss": 0.6467, "step": 16204 }, { "epoch": 2.645361413819844, "grad_norm": 1.652485966682434, "learning_rate": 1.885835753064292e-05, "loss": 0.4884, "step": 16205 }, { "epoch": 2.6455246724623485, "grad_norm": 1.7477139234542847, "learning_rate": 1.885821028419601e-05, "loss": 0.5954, "step": 16206 }, { "epoch": 2.645687931104853, "grad_norm": 2.143411874771118, "learning_rate": 1.8858063028828882e-05, "loss": 0.8235, "step": 16207 }, { "epoch": 2.6458511897473573, "grad_norm": 1.5023454427719116, "learning_rate": 1.8857915764541688e-05, "loss": 0.5487, "step": 16208 }, { "epoch": 2.6460144483898618, "grad_norm": 1.6875169277191162, "learning_rate": 1.8857768491334573e-05, "loss": 0.5038, "step": 16209 }, { "epoch": 2.6461777070323658, "grad_norm": 1.9951764345169067, "learning_rate": 1.8857621209207693e-05, "loss": 0.6007, "step": 16210 }, { "epoch": 2.64634096567487, "grad_norm": 1.3140172958374023, "learning_rate": 1.8857473918161187e-05, "loss": 0.4011, "step": 16211 }, { "epoch": 2.6465042243173746, "grad_norm": 1.5986403226852417, "learning_rate": 1.8857326618195207e-05, "loss": 0.6061, "step": 16212 }, { "epoch": 2.646667482959879, "grad_norm": 1.9875656366348267, "learning_rate": 1.8857179309309902e-05, "loss": 0.5583, "step": 16213 }, { "epoch": 2.6468307416023835, "grad_norm": 1.7265067100524902, "learning_rate": 1.885703199150542e-05, "loss": 0.5153, "step": 16214 }, { "epoch": 2.646994000244888, "grad_norm": 1.6552222967147827, "learning_rate": 1.8856884664781908e-05, "loss": 0.5433, "step": 16215 }, { "epoch": 2.6471572588873924, "grad_norm": 1.86235511302948, "learning_rate": 1.8856737329139517e-05, "loss": 0.587, "step": 16216 }, { "epoch": 2.647320517529897, "grad_norm": 1.6712238788604736, "learning_rate": 1.8856589984578394e-05, "loss": 0.5225, "step": 16217 }, { "epoch": 2.6474837761724013, "grad_norm": 1.5294249057769775, "learning_rate": 1.8856442631098685e-05, "loss": 0.5374, "step": 16218 }, { "epoch": 2.6476470348149057, "grad_norm": 1.8831952810287476, "learning_rate": 1.8856295268700542e-05, "loss": 0.5528, "step": 16219 }, { "epoch": 2.64781029345741, "grad_norm": 1.9194308519363403, "learning_rate": 1.8856147897384112e-05, "loss": 0.533, "step": 16220 }, { "epoch": 2.647973552099914, "grad_norm": 1.6087665557861328, "learning_rate": 1.8856000517149546e-05, "loss": 0.4849, "step": 16221 }, { "epoch": 2.6481368107424186, "grad_norm": 1.7546992301940918, "learning_rate": 1.8855853127996987e-05, "loss": 0.5249, "step": 16222 }, { "epoch": 2.648300069384923, "grad_norm": 1.6486681699752808, "learning_rate": 1.8855705729926583e-05, "loss": 0.5105, "step": 16223 }, { "epoch": 2.6484633280274275, "grad_norm": 1.7884869575500488, "learning_rate": 1.8855558322938492e-05, "loss": 0.558, "step": 16224 }, { "epoch": 2.648626586669932, "grad_norm": 1.7307721376419067, "learning_rate": 1.8855410907032854e-05, "loss": 0.5633, "step": 16225 }, { "epoch": 2.6487898453124363, "grad_norm": 1.789711356163025, "learning_rate": 1.8855263482209817e-05, "loss": 0.439, "step": 16226 }, { "epoch": 2.6489531039549408, "grad_norm": 1.885918378829956, "learning_rate": 1.885511604846953e-05, "loss": 0.6275, "step": 16227 }, { "epoch": 2.6491163625974448, "grad_norm": 1.5928608179092407, "learning_rate": 1.885496860581215e-05, "loss": 0.5639, "step": 16228 }, { "epoch": 2.649279621239949, "grad_norm": 1.3987754583358765, "learning_rate": 1.885482115423782e-05, "loss": 0.4712, "step": 16229 }, { "epoch": 2.6494428798824536, "grad_norm": 1.635408639907837, "learning_rate": 1.8854673693746683e-05, "loss": 0.529, "step": 16230 }, { "epoch": 2.649606138524958, "grad_norm": 1.9827603101730347, "learning_rate": 1.885452622433889e-05, "loss": 0.6958, "step": 16231 }, { "epoch": 2.6497693971674625, "grad_norm": 1.8621461391448975, "learning_rate": 1.8854378746014595e-05, "loss": 0.5694, "step": 16232 }, { "epoch": 2.649932655809967, "grad_norm": 1.4278231859207153, "learning_rate": 1.8854231258773944e-05, "loss": 0.5078, "step": 16233 }, { "epoch": 2.6500959144524714, "grad_norm": 1.6745015382766724, "learning_rate": 1.885408376261708e-05, "loss": 0.4491, "step": 16234 }, { "epoch": 2.650259173094976, "grad_norm": 1.8383516073226929, "learning_rate": 1.885393625754416e-05, "loss": 0.5309, "step": 16235 }, { "epoch": 2.6504224317374803, "grad_norm": 1.7920525074005127, "learning_rate": 1.885378874355533e-05, "loss": 0.5321, "step": 16236 }, { "epoch": 2.6505856903799847, "grad_norm": 1.893619179725647, "learning_rate": 1.885364122065073e-05, "loss": 0.6308, "step": 16237 }, { "epoch": 2.650748949022489, "grad_norm": 1.8555587530136108, "learning_rate": 1.8853493688830523e-05, "loss": 0.5851, "step": 16238 }, { "epoch": 2.650912207664993, "grad_norm": 2.1000163555145264, "learning_rate": 1.8853346148094848e-05, "loss": 1.1442, "step": 16239 }, { "epoch": 2.6510754663074976, "grad_norm": 2.2729551792144775, "learning_rate": 1.885319859844385e-05, "loss": 0.6307, "step": 16240 }, { "epoch": 2.651238724950002, "grad_norm": 2.029829502105713, "learning_rate": 1.885305103987769e-05, "loss": 0.6656, "step": 16241 }, { "epoch": 2.6514019835925065, "grad_norm": 1.7489749193191528, "learning_rate": 1.8852903472396507e-05, "loss": 0.5407, "step": 16242 }, { "epoch": 2.651565242235011, "grad_norm": 1.7285611629486084, "learning_rate": 1.8852755896000458e-05, "loss": 0.5766, "step": 16243 }, { "epoch": 2.6517285008775153, "grad_norm": 2.9301564693450928, "learning_rate": 1.885260831068968e-05, "loss": 0.6094, "step": 16244 }, { "epoch": 2.6518917595200193, "grad_norm": 1.8312351703643799, "learning_rate": 1.885246071646433e-05, "loss": 0.6549, "step": 16245 }, { "epoch": 2.6520550181625238, "grad_norm": 1.7664475440979004, "learning_rate": 1.8852313113324553e-05, "loss": 0.6417, "step": 16246 }, { "epoch": 2.652218276805028, "grad_norm": 1.6098604202270508, "learning_rate": 1.88521655012705e-05, "loss": 0.5551, "step": 16247 }, { "epoch": 2.6523815354475326, "grad_norm": 1.5812747478485107, "learning_rate": 1.885201788030232e-05, "loss": 0.4848, "step": 16248 }, { "epoch": 2.652544794090037, "grad_norm": 1.7860987186431885, "learning_rate": 1.8851870250420157e-05, "loss": 0.6558, "step": 16249 }, { "epoch": 2.6527080527325415, "grad_norm": 1.6866869926452637, "learning_rate": 1.8851722611624166e-05, "loss": 0.5354, "step": 16250 }, { "epoch": 2.652871311375046, "grad_norm": 1.724359154701233, "learning_rate": 1.8851574963914495e-05, "loss": 0.6485, "step": 16251 }, { "epoch": 2.6530345700175504, "grad_norm": 1.6165668964385986, "learning_rate": 1.8851427307291286e-05, "loss": 0.5707, "step": 16252 }, { "epoch": 2.653197828660055, "grad_norm": 1.714489221572876, "learning_rate": 1.885127964175469e-05, "loss": 0.5052, "step": 16253 }, { "epoch": 2.6533610873025593, "grad_norm": 1.3358865976333618, "learning_rate": 1.8851131967304864e-05, "loss": 0.4177, "step": 16254 }, { "epoch": 2.6535243459450637, "grad_norm": 1.8005118370056152, "learning_rate": 1.8850984283941947e-05, "loss": 0.6258, "step": 16255 }, { "epoch": 2.6536876045875677, "grad_norm": 1.8048179149627686, "learning_rate": 1.8850836591666094e-05, "loss": 0.5419, "step": 16256 }, { "epoch": 2.653850863230072, "grad_norm": 1.573318362236023, "learning_rate": 1.8850688890477446e-05, "loss": 0.4881, "step": 16257 }, { "epoch": 2.6540141218725766, "grad_norm": 1.639529824256897, "learning_rate": 1.885054118037616e-05, "loss": 0.5152, "step": 16258 }, { "epoch": 2.654177380515081, "grad_norm": 1.6184738874435425, "learning_rate": 1.885039346136238e-05, "loss": 0.5439, "step": 16259 }, { "epoch": 2.6543406391575854, "grad_norm": 2.0479931831359863, "learning_rate": 1.8850245733436255e-05, "loss": 0.5914, "step": 16260 }, { "epoch": 2.65450389780009, "grad_norm": 1.4357616901397705, "learning_rate": 1.885009799659794e-05, "loss": 0.4732, "step": 16261 }, { "epoch": 2.6546671564425943, "grad_norm": 1.6171537637710571, "learning_rate": 1.8849950250847575e-05, "loss": 0.5625, "step": 16262 }, { "epoch": 2.6548304150850983, "grad_norm": 1.7594616413116455, "learning_rate": 1.884980249618531e-05, "loss": 0.581, "step": 16263 }, { "epoch": 2.6549936737276028, "grad_norm": 1.5942631959915161, "learning_rate": 1.88496547326113e-05, "loss": 0.5962, "step": 16264 }, { "epoch": 2.655156932370107, "grad_norm": 1.7833468914031982, "learning_rate": 1.884950696012569e-05, "loss": 0.5352, "step": 16265 }, { "epoch": 2.6553201910126116, "grad_norm": 1.8331431150436401, "learning_rate": 1.8849359178728628e-05, "loss": 0.5857, "step": 16266 }, { "epoch": 2.655483449655116, "grad_norm": 1.709747076034546, "learning_rate": 1.8849211388420262e-05, "loss": 0.5066, "step": 16267 }, { "epoch": 2.6556467082976205, "grad_norm": 1.4912408590316772, "learning_rate": 1.8849063589200744e-05, "loss": 0.4892, "step": 16268 }, { "epoch": 2.655809966940125, "grad_norm": 1.5374032258987427, "learning_rate": 1.8848915781070222e-05, "loss": 0.5589, "step": 16269 }, { "epoch": 2.6559732255826294, "grad_norm": 2.048097610473633, "learning_rate": 1.8848767964028846e-05, "loss": 0.5935, "step": 16270 }, { "epoch": 2.656136484225134, "grad_norm": 1.604047417640686, "learning_rate": 1.8848620138076758e-05, "loss": 0.5658, "step": 16271 }, { "epoch": 2.6562997428676383, "grad_norm": 1.857582688331604, "learning_rate": 1.8848472303214113e-05, "loss": 0.6611, "step": 16272 }, { "epoch": 2.6564630015101427, "grad_norm": 1.8161566257476807, "learning_rate": 1.884832445944106e-05, "loss": 0.5649, "step": 16273 }, { "epoch": 2.6566262601526467, "grad_norm": 1.669310450553894, "learning_rate": 1.8848176606757745e-05, "loss": 0.6082, "step": 16274 }, { "epoch": 2.656789518795151, "grad_norm": 2.02079176902771, "learning_rate": 1.8848028745164323e-05, "loss": 0.7283, "step": 16275 }, { "epoch": 2.6569527774376556, "grad_norm": 1.805809497833252, "learning_rate": 1.8847880874660934e-05, "loss": 0.5378, "step": 16276 }, { "epoch": 2.65711603608016, "grad_norm": 1.876278042793274, "learning_rate": 1.8847732995247735e-05, "loss": 0.6316, "step": 16277 }, { "epoch": 2.6572792947226644, "grad_norm": 1.9347690343856812, "learning_rate": 1.884758510692487e-05, "loss": 0.662, "step": 16278 }, { "epoch": 2.657442553365169, "grad_norm": 1.5389243364334106, "learning_rate": 1.8847437209692486e-05, "loss": 0.4492, "step": 16279 }, { "epoch": 2.6576058120076733, "grad_norm": 1.623598575592041, "learning_rate": 1.8847289303550738e-05, "loss": 0.6392, "step": 16280 }, { "epoch": 2.6577690706501773, "grad_norm": 1.7707563638687134, "learning_rate": 1.8847141388499772e-05, "loss": 0.479, "step": 16281 }, { "epoch": 2.6579323292926818, "grad_norm": 1.6646294593811035, "learning_rate": 1.8846993464539735e-05, "loss": 0.5562, "step": 16282 }, { "epoch": 2.658095587935186, "grad_norm": 1.5497221946716309, "learning_rate": 1.884684553167078e-05, "loss": 0.505, "step": 16283 }, { "epoch": 2.6582588465776906, "grad_norm": 1.5549051761627197, "learning_rate": 1.8846697589893052e-05, "loss": 0.5114, "step": 16284 }, { "epoch": 2.658422105220195, "grad_norm": 1.5753626823425293, "learning_rate": 1.8846549639206702e-05, "loss": 0.582, "step": 16285 }, { "epoch": 2.6585853638626995, "grad_norm": 1.6876548528671265, "learning_rate": 1.884640167961188e-05, "loss": 0.6231, "step": 16286 }, { "epoch": 2.658748622505204, "grad_norm": 1.7835813760757446, "learning_rate": 1.8846253711108734e-05, "loss": 0.6401, "step": 16287 }, { "epoch": 2.6589118811477084, "grad_norm": 2.055309295654297, "learning_rate": 1.8846105733697414e-05, "loss": 0.6458, "step": 16288 }, { "epoch": 2.659075139790213, "grad_norm": 1.672776222229004, "learning_rate": 1.8845957747378065e-05, "loss": 0.5901, "step": 16289 }, { "epoch": 2.6592383984327173, "grad_norm": 1.7826297283172607, "learning_rate": 1.884580975215084e-05, "loss": 0.521, "step": 16290 }, { "epoch": 2.6594016570752217, "grad_norm": 1.903480887413025, "learning_rate": 1.8845661748015888e-05, "loss": 0.6205, "step": 16291 }, { "epoch": 2.6595649157177257, "grad_norm": 2.035736560821533, "learning_rate": 1.8845513734973355e-05, "loss": 0.6213, "step": 16292 }, { "epoch": 2.65972817436023, "grad_norm": 1.8266675472259521, "learning_rate": 1.8845365713023396e-05, "loss": 0.5025, "step": 16293 }, { "epoch": 2.6598914330027346, "grad_norm": 1.570272445678711, "learning_rate": 1.8845217682166153e-05, "loss": 0.5163, "step": 16294 }, { "epoch": 2.660054691645239, "grad_norm": 1.885453224182129, "learning_rate": 1.8845069642401777e-05, "loss": 0.5661, "step": 16295 }, { "epoch": 2.6602179502877434, "grad_norm": 1.5214983224868774, "learning_rate": 1.8844921593730418e-05, "loss": 0.479, "step": 16296 }, { "epoch": 2.660381208930248, "grad_norm": 1.9252363443374634, "learning_rate": 1.884477353615223e-05, "loss": 0.6086, "step": 16297 }, { "epoch": 2.660544467572752, "grad_norm": 2.2613422870635986, "learning_rate": 1.8844625469667353e-05, "loss": 0.6426, "step": 16298 }, { "epoch": 2.6607077262152563, "grad_norm": 1.803234577178955, "learning_rate": 1.884447739427594e-05, "loss": 0.5315, "step": 16299 }, { "epoch": 2.6608709848577607, "grad_norm": 1.7807650566101074, "learning_rate": 1.8844329309978146e-05, "loss": 0.6508, "step": 16300 }, { "epoch": 2.661034243500265, "grad_norm": 1.4943678379058838, "learning_rate": 1.884418121677411e-05, "loss": 0.5016, "step": 16301 }, { "epoch": 2.6611975021427696, "grad_norm": 1.9605904817581177, "learning_rate": 1.8844033114663987e-05, "loss": 0.6572, "step": 16302 }, { "epoch": 2.661360760785274, "grad_norm": 1.72105872631073, "learning_rate": 1.8843885003647923e-05, "loss": 0.5905, "step": 16303 }, { "epoch": 2.6615240194277785, "grad_norm": 1.8847185373306274, "learning_rate": 1.8843736883726075e-05, "loss": 0.6238, "step": 16304 }, { "epoch": 2.661687278070283, "grad_norm": 1.846256971359253, "learning_rate": 1.884358875489858e-05, "loss": 0.5676, "step": 16305 }, { "epoch": 2.6618505367127874, "grad_norm": 1.717538595199585, "learning_rate": 1.8843440617165596e-05, "loss": 0.5632, "step": 16306 }, { "epoch": 2.662013795355292, "grad_norm": 1.641369104385376, "learning_rate": 1.884329247052727e-05, "loss": 0.5648, "step": 16307 }, { "epoch": 2.6621770539977962, "grad_norm": 1.630169153213501, "learning_rate": 1.8843144314983753e-05, "loss": 0.6239, "step": 16308 }, { "epoch": 2.6623403126403002, "grad_norm": 1.8606915473937988, "learning_rate": 1.8842996150535188e-05, "loss": 0.5993, "step": 16309 }, { "epoch": 2.6625035712828047, "grad_norm": 1.7253917455673218, "learning_rate": 1.8842847977181732e-05, "loss": 0.5894, "step": 16310 }, { "epoch": 2.662666829925309, "grad_norm": 1.6378947496414185, "learning_rate": 1.8842699794923523e-05, "loss": 0.5157, "step": 16311 }, { "epoch": 2.6628300885678136, "grad_norm": 1.9302153587341309, "learning_rate": 1.8842551603760725e-05, "loss": 0.5701, "step": 16312 }, { "epoch": 2.662993347210318, "grad_norm": 1.6023072004318237, "learning_rate": 1.8842403403693476e-05, "loss": 0.4996, "step": 16313 }, { "epoch": 2.6631566058528224, "grad_norm": 1.376382827758789, "learning_rate": 1.8842255194721932e-05, "loss": 0.4813, "step": 16314 }, { "epoch": 2.663319864495327, "grad_norm": 1.5550614595413208, "learning_rate": 1.884210697684624e-05, "loss": 0.5146, "step": 16315 }, { "epoch": 2.663483123137831, "grad_norm": 1.8244661092758179, "learning_rate": 1.8841958750066545e-05, "loss": 0.6391, "step": 16316 }, { "epoch": 2.6636463817803353, "grad_norm": 1.5459095239639282, "learning_rate": 1.8841810514383004e-05, "loss": 0.5313, "step": 16317 }, { "epoch": 2.6638096404228397, "grad_norm": 1.9217784404754639, "learning_rate": 1.8841662269795758e-05, "loss": 0.6477, "step": 16318 }, { "epoch": 2.663972899065344, "grad_norm": 2.1114747524261475, "learning_rate": 1.884151401630496e-05, "loss": 0.6372, "step": 16319 }, { "epoch": 2.6641361577078486, "grad_norm": 1.9109761714935303, "learning_rate": 1.8841365753910765e-05, "loss": 0.5919, "step": 16320 }, { "epoch": 2.664299416350353, "grad_norm": 1.7391923666000366, "learning_rate": 1.8841217482613313e-05, "loss": 0.6371, "step": 16321 }, { "epoch": 2.6644626749928575, "grad_norm": 1.9180046319961548, "learning_rate": 1.884106920241276e-05, "loss": 0.5316, "step": 16322 }, { "epoch": 2.664625933635362, "grad_norm": 1.6787445545196533, "learning_rate": 1.884092091330925e-05, "loss": 0.5097, "step": 16323 }, { "epoch": 2.6647891922778664, "grad_norm": 1.766187310218811, "learning_rate": 1.8840772615302935e-05, "loss": 0.559, "step": 16324 }, { "epoch": 2.664952450920371, "grad_norm": 2.1056883335113525, "learning_rate": 1.8840624308393965e-05, "loss": 0.5294, "step": 16325 }, { "epoch": 2.6651157095628752, "grad_norm": 1.7649235725402832, "learning_rate": 1.884047599258249e-05, "loss": 0.5074, "step": 16326 }, { "epoch": 2.6652789682053792, "grad_norm": 1.5243574380874634, "learning_rate": 1.8840327667868657e-05, "loss": 0.581, "step": 16327 }, { "epoch": 2.6654422268478837, "grad_norm": 1.3903610706329346, "learning_rate": 1.8840179334252617e-05, "loss": 0.5232, "step": 16328 }, { "epoch": 2.665605485490388, "grad_norm": 1.9203052520751953, "learning_rate": 1.8840030991734518e-05, "loss": 0.6296, "step": 16329 }, { "epoch": 2.6657687441328926, "grad_norm": 1.6550929546356201, "learning_rate": 1.8839882640314512e-05, "loss": 0.5643, "step": 16330 }, { "epoch": 2.665932002775397, "grad_norm": 1.4799842834472656, "learning_rate": 1.883973427999274e-05, "loss": 0.4881, "step": 16331 }, { "epoch": 2.6660952614179014, "grad_norm": 1.6732831001281738, "learning_rate": 1.8839585910769365e-05, "loss": 0.5598, "step": 16332 }, { "epoch": 2.6662585200604054, "grad_norm": 1.7194199562072754, "learning_rate": 1.883943753264453e-05, "loss": 0.4957, "step": 16333 }, { "epoch": 2.66642177870291, "grad_norm": 1.6308283805847168, "learning_rate": 1.8839289145618378e-05, "loss": 0.545, "step": 16334 }, { "epoch": 2.6665850373454143, "grad_norm": 1.8110735416412354, "learning_rate": 1.8839140749691064e-05, "loss": 0.5534, "step": 16335 }, { "epoch": 2.6667482959879187, "grad_norm": 1.539918303489685, "learning_rate": 1.8838992344862744e-05, "loss": 0.5283, "step": 16336 }, { "epoch": 2.666911554630423, "grad_norm": 1.7568938732147217, "learning_rate": 1.8838843931133555e-05, "loss": 0.576, "step": 16337 }, { "epoch": 2.6670748132729276, "grad_norm": 1.9397773742675781, "learning_rate": 1.8838695508503656e-05, "loss": 0.4885, "step": 16338 }, { "epoch": 2.667238071915432, "grad_norm": 1.9094289541244507, "learning_rate": 1.8838547076973192e-05, "loss": 0.7238, "step": 16339 }, { "epoch": 2.6674013305579365, "grad_norm": 1.950736165046692, "learning_rate": 1.8838398636542316e-05, "loss": 0.5922, "step": 16340 }, { "epoch": 2.667564589200441, "grad_norm": 1.8103185892105103, "learning_rate": 1.883825018721117e-05, "loss": 0.5568, "step": 16341 }, { "epoch": 2.6677278478429454, "grad_norm": 1.8735820055007935, "learning_rate": 1.8838101728979913e-05, "loss": 0.6055, "step": 16342 }, { "epoch": 2.66789110648545, "grad_norm": 1.6604368686676025, "learning_rate": 1.883795326184869e-05, "loss": 0.5426, "step": 16343 }, { "epoch": 2.668054365127954, "grad_norm": 1.6365835666656494, "learning_rate": 1.8837804785817644e-05, "loss": 0.5054, "step": 16344 }, { "epoch": 2.6682176237704582, "grad_norm": 1.7236286401748657, "learning_rate": 1.8837656300886937e-05, "loss": 0.5484, "step": 16345 }, { "epoch": 2.6683808824129627, "grad_norm": 1.8289629220962524, "learning_rate": 1.883750780705671e-05, "loss": 0.4917, "step": 16346 }, { "epoch": 2.668544141055467, "grad_norm": 1.9440494775772095, "learning_rate": 1.8837359304327115e-05, "loss": 0.6301, "step": 16347 }, { "epoch": 2.6687073996979715, "grad_norm": 1.7163053750991821, "learning_rate": 1.8837210792698305e-05, "loss": 0.5432, "step": 16348 }, { "epoch": 2.668870658340476, "grad_norm": 2.062157154083252, "learning_rate": 1.8837062272170418e-05, "loss": 0.6044, "step": 16349 }, { "epoch": 2.6690339169829804, "grad_norm": 1.9886558055877686, "learning_rate": 1.883691374274362e-05, "loss": 0.5685, "step": 16350 }, { "epoch": 2.6691971756254844, "grad_norm": 1.8802244663238525, "learning_rate": 1.883676520441805e-05, "loss": 0.5886, "step": 16351 }, { "epoch": 2.669360434267989, "grad_norm": 1.789820909500122, "learning_rate": 1.8836616657193855e-05, "loss": 0.6346, "step": 16352 }, { "epoch": 2.6695236929104933, "grad_norm": 1.5130960941314697, "learning_rate": 1.8836468101071194e-05, "loss": 0.5325, "step": 16353 }, { "epoch": 2.6696869515529977, "grad_norm": 1.7319607734680176, "learning_rate": 1.883631953605021e-05, "loss": 0.4436, "step": 16354 }, { "epoch": 2.669850210195502, "grad_norm": 1.9163321256637573, "learning_rate": 1.8836170962131056e-05, "loss": 0.5967, "step": 16355 }, { "epoch": 2.6700134688380066, "grad_norm": 2.0167734622955322, "learning_rate": 1.8836022379313884e-05, "loss": 0.6176, "step": 16356 }, { "epoch": 2.670176727480511, "grad_norm": 1.5765957832336426, "learning_rate": 1.8835873787598834e-05, "loss": 0.5518, "step": 16357 }, { "epoch": 2.6703399861230155, "grad_norm": 1.7647868394851685, "learning_rate": 1.8835725186986062e-05, "loss": 0.5673, "step": 16358 }, { "epoch": 2.67050324476552, "grad_norm": 1.7314445972442627, "learning_rate": 1.8835576577475717e-05, "loss": 0.539, "step": 16359 }, { "epoch": 2.6706665034080244, "grad_norm": 1.651344656944275, "learning_rate": 1.8835427959067952e-05, "loss": 0.6652, "step": 16360 }, { "epoch": 2.670829762050529, "grad_norm": 2.3495843410491943, "learning_rate": 1.883527933176291e-05, "loss": 0.7014, "step": 16361 }, { "epoch": 2.670993020693033, "grad_norm": 1.8624143600463867, "learning_rate": 1.8835130695560746e-05, "loss": 0.5578, "step": 16362 }, { "epoch": 2.6711562793355372, "grad_norm": 1.9127405881881714, "learning_rate": 1.8834982050461608e-05, "loss": 0.4842, "step": 16363 }, { "epoch": 2.6713195379780417, "grad_norm": 1.7331244945526123, "learning_rate": 1.8834833396465645e-05, "loss": 0.5711, "step": 16364 }, { "epoch": 2.671482796620546, "grad_norm": 1.5312727689743042, "learning_rate": 1.8834684733573007e-05, "loss": 0.5238, "step": 16365 }, { "epoch": 2.6716460552630505, "grad_norm": 1.526473045349121, "learning_rate": 1.8834536061783843e-05, "loss": 0.5668, "step": 16366 }, { "epoch": 2.671809313905555, "grad_norm": 2.219524621963501, "learning_rate": 1.8834387381098302e-05, "loss": 0.6601, "step": 16367 }, { "epoch": 2.6719725725480594, "grad_norm": 2.108445167541504, "learning_rate": 1.8834238691516537e-05, "loss": 0.6909, "step": 16368 }, { "epoch": 2.6721358311905634, "grad_norm": 1.6152878999710083, "learning_rate": 1.8834089993038696e-05, "loss": 0.5854, "step": 16369 }, { "epoch": 2.672299089833068, "grad_norm": 1.5773333311080933, "learning_rate": 1.883394128566493e-05, "loss": 0.5028, "step": 16370 }, { "epoch": 2.6724623484755723, "grad_norm": 2.3028788566589355, "learning_rate": 1.8833792569395385e-05, "loss": 0.5882, "step": 16371 }, { "epoch": 2.6726256071180767, "grad_norm": 1.9720379114151, "learning_rate": 1.8833643844230217e-05, "loss": 0.6176, "step": 16372 }, { "epoch": 2.672788865760581, "grad_norm": 1.9768034219741821, "learning_rate": 1.883349511016957e-05, "loss": 0.6645, "step": 16373 }, { "epoch": 2.6729521244030856, "grad_norm": 1.636730432510376, "learning_rate": 1.8833346367213595e-05, "loss": 0.6023, "step": 16374 }, { "epoch": 2.67311538304559, "grad_norm": 1.9114065170288086, "learning_rate": 1.8833197615362443e-05, "loss": 0.5746, "step": 16375 }, { "epoch": 2.6732786416880945, "grad_norm": 1.5204781293869019, "learning_rate": 1.8833048854616263e-05, "loss": 0.5572, "step": 16376 }, { "epoch": 2.673441900330599, "grad_norm": 1.6491953134536743, "learning_rate": 1.8832900084975203e-05, "loss": 0.5639, "step": 16377 }, { "epoch": 2.6736051589731034, "grad_norm": 2.0114426612854004, "learning_rate": 1.883275130643942e-05, "loss": 0.7387, "step": 16378 }, { "epoch": 2.673768417615608, "grad_norm": 1.8034809827804565, "learning_rate": 1.8832602519009056e-05, "loss": 0.5551, "step": 16379 }, { "epoch": 2.673931676258112, "grad_norm": 1.7541903257369995, "learning_rate": 1.8832453722684263e-05, "loss": 0.551, "step": 16380 }, { "epoch": 2.6740949349006162, "grad_norm": 1.9957820177078247, "learning_rate": 1.883230491746519e-05, "loss": 0.6291, "step": 16381 }, { "epoch": 2.6742581935431207, "grad_norm": 1.6463096141815186, "learning_rate": 1.8832156103351994e-05, "loss": 0.5892, "step": 16382 }, { "epoch": 2.674421452185625, "grad_norm": 2.114961862564087, "learning_rate": 1.8832007280344813e-05, "loss": 0.6794, "step": 16383 }, { "epoch": 2.6745847108281295, "grad_norm": 1.7838817834854126, "learning_rate": 1.8831858448443806e-05, "loss": 0.6838, "step": 16384 }, { "epoch": 2.674747969470634, "grad_norm": 1.6413896083831787, "learning_rate": 1.883170960764912e-05, "loss": 0.5618, "step": 16385 }, { "epoch": 2.674911228113138, "grad_norm": 2.0213186740875244, "learning_rate": 1.8831560757960906e-05, "loss": 0.6263, "step": 16386 }, { "epoch": 2.6750744867556424, "grad_norm": 1.7649832963943481, "learning_rate": 1.883141189937931e-05, "loss": 0.6743, "step": 16387 }, { "epoch": 2.675237745398147, "grad_norm": 1.4128681421279907, "learning_rate": 1.8831263031904485e-05, "loss": 0.5038, "step": 16388 }, { "epoch": 2.6754010040406513, "grad_norm": 1.7522263526916504, "learning_rate": 1.883111415553658e-05, "loss": 0.5897, "step": 16389 }, { "epoch": 2.6755642626831557, "grad_norm": 1.7551922798156738, "learning_rate": 1.8830965270275746e-05, "loss": 0.6048, "step": 16390 }, { "epoch": 2.67572752132566, "grad_norm": 2.0264551639556885, "learning_rate": 1.8830816376122134e-05, "loss": 0.5616, "step": 16391 }, { "epoch": 2.6758907799681646, "grad_norm": 1.5114728212356567, "learning_rate": 1.8830667473075892e-05, "loss": 0.4735, "step": 16392 }, { "epoch": 2.676054038610669, "grad_norm": 1.6736599206924438, "learning_rate": 1.883051856113717e-05, "loss": 0.5702, "step": 16393 }, { "epoch": 2.6762172972531735, "grad_norm": 2.0058517456054688, "learning_rate": 1.8830369640306117e-05, "loss": 0.6281, "step": 16394 }, { "epoch": 2.676380555895678, "grad_norm": 1.7630877494812012, "learning_rate": 1.883022071058288e-05, "loss": 0.6108, "step": 16395 }, { "epoch": 2.6765438145381824, "grad_norm": 1.886971116065979, "learning_rate": 1.883007177196762e-05, "loss": 0.5812, "step": 16396 }, { "epoch": 2.6767070731806863, "grad_norm": 1.9686473608016968, "learning_rate": 1.882992282446048e-05, "loss": 0.6015, "step": 16397 }, { "epoch": 2.676870331823191, "grad_norm": 1.6692005395889282, "learning_rate": 1.8829773868061604e-05, "loss": 0.5463, "step": 16398 }, { "epoch": 2.6770335904656952, "grad_norm": 1.9452427625656128, "learning_rate": 1.8829624902771153e-05, "loss": 0.7179, "step": 16399 }, { "epoch": 2.6771968491081997, "grad_norm": 1.4966535568237305, "learning_rate": 1.8829475928589272e-05, "loss": 0.4509, "step": 16400 }, { "epoch": 2.677360107750704, "grad_norm": 1.7980726957321167, "learning_rate": 1.882932694551611e-05, "loss": 0.6445, "step": 16401 }, { "epoch": 2.6775233663932085, "grad_norm": 1.9372528791427612, "learning_rate": 1.882917795355182e-05, "loss": 0.6071, "step": 16402 }, { "epoch": 2.677686625035713, "grad_norm": 1.7761136293411255, "learning_rate": 1.8829028952696545e-05, "loss": 0.5456, "step": 16403 }, { "epoch": 2.677849883678217, "grad_norm": 2.028926372528076, "learning_rate": 1.8828879942950444e-05, "loss": 0.5935, "step": 16404 }, { "epoch": 2.6780131423207214, "grad_norm": 1.7885468006134033, "learning_rate": 1.8828730924313662e-05, "loss": 0.5598, "step": 16405 }, { "epoch": 2.678176400963226, "grad_norm": 1.5184855461120605, "learning_rate": 1.8828581896786347e-05, "loss": 0.5006, "step": 16406 }, { "epoch": 2.6783396596057303, "grad_norm": 1.7889596223831177, "learning_rate": 1.882843286036866e-05, "loss": 0.5949, "step": 16407 }, { "epoch": 2.6785029182482347, "grad_norm": 1.5437264442443848, "learning_rate": 1.882828381506074e-05, "loss": 0.5261, "step": 16408 }, { "epoch": 2.678666176890739, "grad_norm": 1.5310728549957275, "learning_rate": 1.882813476086274e-05, "loss": 0.553, "step": 16409 }, { "epoch": 2.6788294355332436, "grad_norm": 1.9613131284713745, "learning_rate": 1.882798569777481e-05, "loss": 0.5911, "step": 16410 }, { "epoch": 2.678992694175748, "grad_norm": 1.9702357053756714, "learning_rate": 1.88278366257971e-05, "loss": 0.4882, "step": 16411 }, { "epoch": 2.6791559528182525, "grad_norm": 1.707716941833496, "learning_rate": 1.8827687544929763e-05, "loss": 0.5353, "step": 16412 }, { "epoch": 2.679319211460757, "grad_norm": 2.0635123252868652, "learning_rate": 1.8827538455172947e-05, "loss": 0.7019, "step": 16413 }, { "epoch": 2.6794824701032613, "grad_norm": 1.6630780696868896, "learning_rate": 1.88273893565268e-05, "loss": 0.5177, "step": 16414 }, { "epoch": 2.6796457287457653, "grad_norm": 1.9748984575271606, "learning_rate": 1.882724024899148e-05, "loss": 0.5502, "step": 16415 }, { "epoch": 2.67980898738827, "grad_norm": 1.7943633794784546, "learning_rate": 1.8827091132567124e-05, "loss": 0.6065, "step": 16416 }, { "epoch": 2.679972246030774, "grad_norm": 2.2598495483398438, "learning_rate": 1.8826942007253894e-05, "loss": 0.7273, "step": 16417 }, { "epoch": 2.6801355046732787, "grad_norm": 1.8314197063446045, "learning_rate": 1.8826792873051935e-05, "loss": 0.597, "step": 16418 }, { "epoch": 2.680298763315783, "grad_norm": 1.4663925170898438, "learning_rate": 1.8826643729961394e-05, "loss": 0.4544, "step": 16419 }, { "epoch": 2.6804620219582875, "grad_norm": 2.2930233478546143, "learning_rate": 1.8826494577982432e-05, "loss": 0.6963, "step": 16420 }, { "epoch": 2.680625280600792, "grad_norm": 1.8547337055206299, "learning_rate": 1.8826345417115188e-05, "loss": 0.6509, "step": 16421 }, { "epoch": 2.680788539243296, "grad_norm": 1.3991401195526123, "learning_rate": 1.882619624735982e-05, "loss": 0.4293, "step": 16422 }, { "epoch": 2.6809517978858004, "grad_norm": 1.5716923475265503, "learning_rate": 1.882604706871647e-05, "loss": 0.5636, "step": 16423 }, { "epoch": 2.681115056528305, "grad_norm": 1.9680172204971313, "learning_rate": 1.8825897881185296e-05, "loss": 0.5836, "step": 16424 }, { "epoch": 2.6812783151708093, "grad_norm": 1.5834296941757202, "learning_rate": 1.8825748684766442e-05, "loss": 0.4677, "step": 16425 }, { "epoch": 2.6814415738133137, "grad_norm": 1.818985104560852, "learning_rate": 1.8825599479460064e-05, "loss": 0.6159, "step": 16426 }, { "epoch": 2.681604832455818, "grad_norm": 1.916646122932434, "learning_rate": 1.882545026526631e-05, "loss": 0.5339, "step": 16427 }, { "epoch": 2.6817680910983226, "grad_norm": 1.809349536895752, "learning_rate": 1.8825301042185328e-05, "loss": 0.5584, "step": 16428 }, { "epoch": 2.681931349740827, "grad_norm": 1.521785020828247, "learning_rate": 1.8825151810217273e-05, "loss": 0.5263, "step": 16429 }, { "epoch": 2.6820946083833315, "grad_norm": 1.7768723964691162, "learning_rate": 1.882500256936229e-05, "loss": 0.6229, "step": 16430 }, { "epoch": 2.682257867025836, "grad_norm": 1.848021149635315, "learning_rate": 1.8824853319620532e-05, "loss": 0.5617, "step": 16431 }, { "epoch": 2.6824211256683403, "grad_norm": 1.4697195291519165, "learning_rate": 1.882470406099215e-05, "loss": 0.4573, "step": 16432 }, { "epoch": 2.6825843843108443, "grad_norm": 1.5814824104309082, "learning_rate": 1.8824554793477294e-05, "loss": 0.6007, "step": 16433 }, { "epoch": 2.6827476429533488, "grad_norm": 1.5659681558609009, "learning_rate": 1.882440551707611e-05, "loss": 0.4893, "step": 16434 }, { "epoch": 2.682910901595853, "grad_norm": 1.6011825799942017, "learning_rate": 1.8824256231788755e-05, "loss": 0.5045, "step": 16435 }, { "epoch": 2.6830741602383577, "grad_norm": 1.7143847942352295, "learning_rate": 1.8824106937615377e-05, "loss": 0.504, "step": 16436 }, { "epoch": 2.683237418880862, "grad_norm": 1.6532143354415894, "learning_rate": 1.8823957634556125e-05, "loss": 0.4922, "step": 16437 }, { "epoch": 2.6834006775233665, "grad_norm": 1.6793723106384277, "learning_rate": 1.882380832261115e-05, "loss": 0.5157, "step": 16438 }, { "epoch": 2.6835639361658705, "grad_norm": 1.5882152318954468, "learning_rate": 1.88236590017806e-05, "loss": 0.5313, "step": 16439 }, { "epoch": 2.683727194808375, "grad_norm": 2.365739583969116, "learning_rate": 1.882350967206463e-05, "loss": 0.652, "step": 16440 }, { "epoch": 2.6838904534508794, "grad_norm": 1.7916666269302368, "learning_rate": 1.8823360333463387e-05, "loss": 0.6072, "step": 16441 }, { "epoch": 2.684053712093384, "grad_norm": 1.6151963472366333, "learning_rate": 1.8823210985977024e-05, "loss": 0.5249, "step": 16442 }, { "epoch": 2.6842169707358883, "grad_norm": 1.4359116554260254, "learning_rate": 1.882306162960569e-05, "loss": 0.4668, "step": 16443 }, { "epoch": 2.6843802293783927, "grad_norm": 1.6685482263565063, "learning_rate": 1.8822912264349535e-05, "loss": 0.6116, "step": 16444 }, { "epoch": 2.684543488020897, "grad_norm": 1.4240553379058838, "learning_rate": 1.8822762890208712e-05, "loss": 0.5196, "step": 16445 }, { "epoch": 2.6847067466634016, "grad_norm": 2.044994354248047, "learning_rate": 1.8822613507183363e-05, "loss": 0.6244, "step": 16446 }, { "epoch": 2.684870005305906, "grad_norm": 1.5365080833435059, "learning_rate": 1.8822464115273645e-05, "loss": 0.5001, "step": 16447 }, { "epoch": 2.6850332639484105, "grad_norm": 1.5827518701553345, "learning_rate": 1.8822314714479714e-05, "loss": 0.4836, "step": 16448 }, { "epoch": 2.685196522590915, "grad_norm": 1.6679178476333618, "learning_rate": 1.882216530480171e-05, "loss": 0.5897, "step": 16449 }, { "epoch": 2.685359781233419, "grad_norm": 1.7004810571670532, "learning_rate": 1.882201588623979e-05, "loss": 0.6024, "step": 16450 }, { "epoch": 2.6855230398759233, "grad_norm": 1.6526628732681274, "learning_rate": 1.88218664587941e-05, "loss": 0.5092, "step": 16451 }, { "epoch": 2.6856862985184278, "grad_norm": 1.6409322023391724, "learning_rate": 1.8821717022464794e-05, "loss": 0.5299, "step": 16452 }, { "epoch": 2.685849557160932, "grad_norm": 1.5565569400787354, "learning_rate": 1.882156757725202e-05, "loss": 0.5748, "step": 16453 }, { "epoch": 2.6860128158034366, "grad_norm": 1.7933627367019653, "learning_rate": 1.8821418123155936e-05, "loss": 0.5654, "step": 16454 }, { "epoch": 2.686176074445941, "grad_norm": 1.9940145015716553, "learning_rate": 1.882126866017668e-05, "loss": 0.5634, "step": 16455 }, { "epoch": 2.6863393330884455, "grad_norm": 1.8357791900634766, "learning_rate": 1.8821119188314408e-05, "loss": 0.617, "step": 16456 }, { "epoch": 2.6865025917309495, "grad_norm": 1.6583068370819092, "learning_rate": 1.8820969707569278e-05, "loss": 0.5335, "step": 16457 }, { "epoch": 2.686665850373454, "grad_norm": 2.2446439266204834, "learning_rate": 1.8820820217941427e-05, "loss": 0.6181, "step": 16458 }, { "epoch": 2.6868291090159584, "grad_norm": 1.8129420280456543, "learning_rate": 1.8820670719431017e-05, "loss": 0.6143, "step": 16459 }, { "epoch": 2.686992367658463, "grad_norm": 1.750885009765625, "learning_rate": 1.882052121203819e-05, "loss": 0.6359, "step": 16460 }, { "epoch": 2.6871556263009673, "grad_norm": 1.7610363960266113, "learning_rate": 1.8820371695763103e-05, "loss": 0.5386, "step": 16461 }, { "epoch": 2.6873188849434717, "grad_norm": 1.8827223777770996, "learning_rate": 1.8820222170605903e-05, "loss": 0.6642, "step": 16462 }, { "epoch": 2.687482143585976, "grad_norm": 2.0353565216064453, "learning_rate": 1.882007263656674e-05, "loss": 0.6577, "step": 16463 }, { "epoch": 2.6876454022284806, "grad_norm": 2.02829909324646, "learning_rate": 1.8819923093645773e-05, "loss": 0.6335, "step": 16464 }, { "epoch": 2.687808660870985, "grad_norm": 1.4245282411575317, "learning_rate": 1.8819773541843136e-05, "loss": 0.4546, "step": 16465 }, { "epoch": 2.6879719195134895, "grad_norm": 1.769622564315796, "learning_rate": 1.8819623981158996e-05, "loss": 0.5955, "step": 16466 }, { "epoch": 2.688135178155994, "grad_norm": 1.7487099170684814, "learning_rate": 1.8819474411593496e-05, "loss": 0.5685, "step": 16467 }, { "epoch": 2.688298436798498, "grad_norm": 1.7942384481430054, "learning_rate": 1.8819324833146788e-05, "loss": 0.6007, "step": 16468 }, { "epoch": 2.6884616954410023, "grad_norm": 1.7670613527297974, "learning_rate": 1.881917524581902e-05, "loss": 0.6037, "step": 16469 }, { "epoch": 2.6886249540835068, "grad_norm": 1.563623070716858, "learning_rate": 1.8819025649610346e-05, "loss": 0.5186, "step": 16470 }, { "epoch": 2.688788212726011, "grad_norm": 1.5182452201843262, "learning_rate": 1.8818876044520914e-05, "loss": 0.5143, "step": 16471 }, { "epoch": 2.6889514713685156, "grad_norm": 2.2427406311035156, "learning_rate": 1.881872643055088e-05, "loss": 0.6879, "step": 16472 }, { "epoch": 2.68911473001102, "grad_norm": 1.8487319946289062, "learning_rate": 1.8818576807700387e-05, "loss": 0.5463, "step": 16473 }, { "epoch": 2.689277988653524, "grad_norm": 1.7995370626449585, "learning_rate": 1.881842717596959e-05, "loss": 0.5885, "step": 16474 }, { "epoch": 2.6894412472960285, "grad_norm": 1.601436972618103, "learning_rate": 1.881827753535864e-05, "loss": 0.5312, "step": 16475 }, { "epoch": 2.689604505938533, "grad_norm": 1.5976933240890503, "learning_rate": 1.881812788586769e-05, "loss": 0.4825, "step": 16476 }, { "epoch": 2.6897677645810374, "grad_norm": 1.3869500160217285, "learning_rate": 1.8817978227496883e-05, "loss": 0.4844, "step": 16477 }, { "epoch": 2.689931023223542, "grad_norm": 1.9042530059814453, "learning_rate": 1.8817828560246376e-05, "loss": 0.5218, "step": 16478 }, { "epoch": 2.6900942818660463, "grad_norm": 1.628491997718811, "learning_rate": 1.8817678884116318e-05, "loss": 0.5454, "step": 16479 }, { "epoch": 2.6902575405085507, "grad_norm": 1.6562044620513916, "learning_rate": 1.8817529199106858e-05, "loss": 0.5306, "step": 16480 }, { "epoch": 2.690420799151055, "grad_norm": 1.854171633720398, "learning_rate": 1.881737950521815e-05, "loss": 0.66, "step": 16481 }, { "epoch": 2.6905840577935596, "grad_norm": 1.4614008665084839, "learning_rate": 1.8817229802450347e-05, "loss": 0.4772, "step": 16482 }, { "epoch": 2.690747316436064, "grad_norm": 1.879970669746399, "learning_rate": 1.881708009080359e-05, "loss": 0.5882, "step": 16483 }, { "epoch": 2.6909105750785685, "grad_norm": 1.737597107887268, "learning_rate": 1.881693037027804e-05, "loss": 0.5556, "step": 16484 }, { "epoch": 2.6910738337210725, "grad_norm": 2.020453453063965, "learning_rate": 1.881678064087384e-05, "loss": 0.6428, "step": 16485 }, { "epoch": 2.691237092363577, "grad_norm": 2.0557048320770264, "learning_rate": 1.8816630902591143e-05, "loss": 0.6984, "step": 16486 }, { "epoch": 2.6914003510060813, "grad_norm": 1.9158618450164795, "learning_rate": 1.8816481155430105e-05, "loss": 0.5671, "step": 16487 }, { "epoch": 2.6915636096485858, "grad_norm": 1.7840900421142578, "learning_rate": 1.881633139939087e-05, "loss": 0.5578, "step": 16488 }, { "epoch": 2.69172686829109, "grad_norm": 1.990853190422058, "learning_rate": 1.8816181634473593e-05, "loss": 0.5474, "step": 16489 }, { "epoch": 2.6918901269335946, "grad_norm": 1.889896273612976, "learning_rate": 1.8816031860678423e-05, "loss": 0.578, "step": 16490 }, { "epoch": 2.692053385576099, "grad_norm": 1.517315149307251, "learning_rate": 1.8815882078005515e-05, "loss": 0.4624, "step": 16491 }, { "epoch": 2.692216644218603, "grad_norm": 1.7731789350509644, "learning_rate": 1.881573228645501e-05, "loss": 0.5488, "step": 16492 }, { "epoch": 2.6923799028611075, "grad_norm": 1.7199431657791138, "learning_rate": 1.881558248602707e-05, "loss": 0.5441, "step": 16493 }, { "epoch": 2.692543161503612, "grad_norm": 1.9129719734191895, "learning_rate": 1.8815432676721835e-05, "loss": 0.658, "step": 16494 }, { "epoch": 2.6927064201461164, "grad_norm": 1.8507459163665771, "learning_rate": 1.8815282858539466e-05, "loss": 0.6596, "step": 16495 }, { "epoch": 2.692869678788621, "grad_norm": 1.8156758546829224, "learning_rate": 1.8815133031480107e-05, "loss": 0.5235, "step": 16496 }, { "epoch": 2.6930329374311253, "grad_norm": 1.791508674621582, "learning_rate": 1.8814983195543918e-05, "loss": 0.6647, "step": 16497 }, { "epoch": 2.6931961960736297, "grad_norm": 2.2884979248046875, "learning_rate": 1.8814833350731036e-05, "loss": 0.5802, "step": 16498 }, { "epoch": 2.693359454716134, "grad_norm": 2.0803933143615723, "learning_rate": 1.8814683497041622e-05, "loss": 0.5026, "step": 16499 }, { "epoch": 2.6935227133586386, "grad_norm": 1.817204236984253, "learning_rate": 1.881453363447582e-05, "loss": 0.6821, "step": 16500 }, { "epoch": 2.693685972001143, "grad_norm": 1.7782094478607178, "learning_rate": 1.881438376303379e-05, "loss": 0.4712, "step": 16501 }, { "epoch": 2.6938492306436475, "grad_norm": 1.5835224390029907, "learning_rate": 1.8814233882715678e-05, "loss": 0.5595, "step": 16502 }, { "epoch": 2.6940124892861514, "grad_norm": 1.5017518997192383, "learning_rate": 1.881408399352163e-05, "loss": 0.5296, "step": 16503 }, { "epoch": 2.694175747928656, "grad_norm": 1.5261878967285156, "learning_rate": 1.8813934095451807e-05, "loss": 0.5171, "step": 16504 }, { "epoch": 2.6943390065711603, "grad_norm": 1.526418685913086, "learning_rate": 1.881378418850635e-05, "loss": 0.4374, "step": 16505 }, { "epoch": 2.6945022652136648, "grad_norm": 1.4413286447525024, "learning_rate": 1.881363427268542e-05, "loss": 0.4282, "step": 16506 }, { "epoch": 2.694665523856169, "grad_norm": 1.694679617881775, "learning_rate": 1.881348434798916e-05, "loss": 0.5875, "step": 16507 }, { "epoch": 2.6948287824986736, "grad_norm": 1.9033890962600708, "learning_rate": 1.881333441441772e-05, "loss": 0.6268, "step": 16508 }, { "epoch": 2.694992041141178, "grad_norm": 1.3697019815444946, "learning_rate": 1.881318447197126e-05, "loss": 0.4468, "step": 16509 }, { "epoch": 2.695155299783682, "grad_norm": 1.806424617767334, "learning_rate": 1.8813034520649923e-05, "loss": 0.6355, "step": 16510 }, { "epoch": 2.6953185584261865, "grad_norm": 1.8098347187042236, "learning_rate": 1.8812884560453865e-05, "loss": 0.5381, "step": 16511 }, { "epoch": 2.695481817068691, "grad_norm": 1.7840319871902466, "learning_rate": 1.8812734591383232e-05, "loss": 0.5588, "step": 16512 }, { "epoch": 2.6956450757111954, "grad_norm": 1.5228395462036133, "learning_rate": 1.8812584613438177e-05, "loss": 0.4827, "step": 16513 }, { "epoch": 2.6958083343537, "grad_norm": 1.6747251749038696, "learning_rate": 1.8812434626618853e-05, "loss": 0.5112, "step": 16514 }, { "epoch": 2.6959715929962043, "grad_norm": 1.563176155090332, "learning_rate": 1.881228463092541e-05, "loss": 0.4793, "step": 16515 }, { "epoch": 2.6961348516387087, "grad_norm": 1.8001735210418701, "learning_rate": 1.8812134626358e-05, "loss": 0.6773, "step": 16516 }, { "epoch": 2.696298110281213, "grad_norm": 2.195585012435913, "learning_rate": 1.881198461291677e-05, "loss": 0.6791, "step": 16517 }, { "epoch": 2.6964613689237176, "grad_norm": 1.9948112964630127, "learning_rate": 1.8811834590601872e-05, "loss": 0.5377, "step": 16518 }, { "epoch": 2.696624627566222, "grad_norm": 1.5578739643096924, "learning_rate": 1.8811684559413465e-05, "loss": 0.4841, "step": 16519 }, { "epoch": 2.6967878862087264, "grad_norm": 1.7769899368286133, "learning_rate": 1.881153451935169e-05, "loss": 0.6268, "step": 16520 }, { "epoch": 2.6969511448512304, "grad_norm": 1.8941421508789062, "learning_rate": 1.8811384470416705e-05, "loss": 0.6025, "step": 16521 }, { "epoch": 2.697114403493735, "grad_norm": 1.4604666233062744, "learning_rate": 1.8811234412608654e-05, "loss": 0.482, "step": 16522 }, { "epoch": 2.6972776621362393, "grad_norm": 1.617811679840088, "learning_rate": 1.8811084345927696e-05, "loss": 0.4978, "step": 16523 }, { "epoch": 2.6974409207787438, "grad_norm": 1.727844476699829, "learning_rate": 1.8810934270373977e-05, "loss": 0.5606, "step": 16524 }, { "epoch": 2.697604179421248, "grad_norm": 1.5739357471466064, "learning_rate": 1.8810784185947648e-05, "loss": 0.4965, "step": 16525 }, { "epoch": 2.6977674380637526, "grad_norm": 1.7645779848098755, "learning_rate": 1.8810634092648862e-05, "loss": 0.532, "step": 16526 }, { "epoch": 2.6979306967062566, "grad_norm": 2.123354196548462, "learning_rate": 1.8810483990477773e-05, "loss": 0.543, "step": 16527 }, { "epoch": 2.698093955348761, "grad_norm": 1.8157551288604736, "learning_rate": 1.8810333879434524e-05, "loss": 0.6277, "step": 16528 }, { "epoch": 2.6982572139912655, "grad_norm": 2.0122199058532715, "learning_rate": 1.8810183759519277e-05, "loss": 0.5866, "step": 16529 }, { "epoch": 2.69842047263377, "grad_norm": 1.9084804058074951, "learning_rate": 1.8810033630732172e-05, "loss": 0.5862, "step": 16530 }, { "epoch": 2.6985837312762744, "grad_norm": 1.8074448108673096, "learning_rate": 1.880988349307337e-05, "loss": 0.5745, "step": 16531 }, { "epoch": 2.698746989918779, "grad_norm": 1.8627989292144775, "learning_rate": 1.8809733346543013e-05, "loss": 0.5792, "step": 16532 }, { "epoch": 2.6989102485612833, "grad_norm": 2.193448305130005, "learning_rate": 1.8809583191141262e-05, "loss": 0.745, "step": 16533 }, { "epoch": 2.6990735072037877, "grad_norm": 1.813879370689392, "learning_rate": 1.8809433026868258e-05, "loss": 0.6248, "step": 16534 }, { "epoch": 2.699236765846292, "grad_norm": 1.8677297830581665, "learning_rate": 1.880928285372416e-05, "loss": 0.6189, "step": 16535 }, { "epoch": 2.6994000244887966, "grad_norm": 1.7241209745407104, "learning_rate": 1.8809132671709114e-05, "loss": 0.5318, "step": 16536 }, { "epoch": 2.699563283131301, "grad_norm": 1.7797558307647705, "learning_rate": 1.8808982480823277e-05, "loss": 0.5321, "step": 16537 }, { "epoch": 2.699726541773805, "grad_norm": 1.8045668601989746, "learning_rate": 1.8808832281066795e-05, "loss": 0.6273, "step": 16538 }, { "epoch": 2.6998898004163094, "grad_norm": 1.7744349241256714, "learning_rate": 1.8808682072439822e-05, "loss": 0.5851, "step": 16539 }, { "epoch": 2.700053059058814, "grad_norm": 1.7285329103469849, "learning_rate": 1.880853185494251e-05, "loss": 0.632, "step": 16540 }, { "epoch": 2.7002163177013183, "grad_norm": 2.214113712310791, "learning_rate": 1.8808381628575008e-05, "loss": 0.6095, "step": 16541 }, { "epoch": 2.7003795763438228, "grad_norm": 1.640531063079834, "learning_rate": 1.8808231393337464e-05, "loss": 0.5605, "step": 16542 }, { "epoch": 2.700542834986327, "grad_norm": 1.5705642700195312, "learning_rate": 1.8808081149230036e-05, "loss": 0.498, "step": 16543 }, { "epoch": 2.7007060936288316, "grad_norm": 1.8231168985366821, "learning_rate": 1.8807930896252875e-05, "loss": 0.5351, "step": 16544 }, { "epoch": 2.7008693522713356, "grad_norm": 1.7822043895721436, "learning_rate": 1.8807780634406127e-05, "loss": 0.5773, "step": 16545 }, { "epoch": 2.70103261091384, "grad_norm": 1.804272174835205, "learning_rate": 1.880763036368995e-05, "loss": 0.5759, "step": 16546 }, { "epoch": 2.7011958695563445, "grad_norm": 1.7340136766433716, "learning_rate": 1.8807480084104484e-05, "loss": 0.5341, "step": 16547 }, { "epoch": 2.701359128198849, "grad_norm": 1.7237608432769775, "learning_rate": 1.8807329795649897e-05, "loss": 0.6114, "step": 16548 }, { "epoch": 2.7015223868413534, "grad_norm": 1.5833439826965332, "learning_rate": 1.8807179498326323e-05, "loss": 0.539, "step": 16549 }, { "epoch": 2.701685645483858, "grad_norm": 1.5692659616470337, "learning_rate": 1.8807029192133927e-05, "loss": 0.489, "step": 16550 }, { "epoch": 2.7018489041263622, "grad_norm": 1.9736303091049194, "learning_rate": 1.8806878877072856e-05, "loss": 0.6699, "step": 16551 }, { "epoch": 2.7020121627688667, "grad_norm": 1.5361465215682983, "learning_rate": 1.8806728553143256e-05, "loss": 0.516, "step": 16552 }, { "epoch": 2.702175421411371, "grad_norm": 1.657161831855774, "learning_rate": 1.8806578220345284e-05, "loss": 0.5394, "step": 16553 }, { "epoch": 2.7023386800538756, "grad_norm": 2.107928991317749, "learning_rate": 1.880642787867909e-05, "loss": 0.5911, "step": 16554 }, { "epoch": 2.70250193869638, "grad_norm": 1.5459269285202026, "learning_rate": 1.8806277528144826e-05, "loss": 0.4879, "step": 16555 }, { "epoch": 2.702665197338884, "grad_norm": 1.8966012001037598, "learning_rate": 1.8806127168742644e-05, "loss": 0.521, "step": 16556 }, { "epoch": 2.7028284559813884, "grad_norm": 2.0177032947540283, "learning_rate": 1.8805976800472695e-05, "loss": 0.6944, "step": 16557 }, { "epoch": 2.702991714623893, "grad_norm": 1.823409080505371, "learning_rate": 1.8805826423335127e-05, "loss": 0.5546, "step": 16558 }, { "epoch": 2.7031549732663973, "grad_norm": 2.233720302581787, "learning_rate": 1.8805676037330093e-05, "loss": 0.572, "step": 16559 }, { "epoch": 2.7033182319089017, "grad_norm": 2.1746251583099365, "learning_rate": 1.880552564245775e-05, "loss": 0.8631, "step": 16560 }, { "epoch": 2.703481490551406, "grad_norm": 1.6702942848205566, "learning_rate": 1.880537523871824e-05, "loss": 0.5932, "step": 16561 }, { "epoch": 2.70364474919391, "grad_norm": 1.9586342573165894, "learning_rate": 1.8805224826111725e-05, "loss": 0.6639, "step": 16562 }, { "epoch": 2.7038080078364146, "grad_norm": 2.095334053039551, "learning_rate": 1.8805074404638345e-05, "loss": 0.6709, "step": 16563 }, { "epoch": 2.703971266478919, "grad_norm": 1.852708339691162, "learning_rate": 1.8804923974298265e-05, "loss": 0.5594, "step": 16564 }, { "epoch": 2.7041345251214235, "grad_norm": 1.661108374595642, "learning_rate": 1.880477353509162e-05, "loss": 0.4876, "step": 16565 }, { "epoch": 2.704297783763928, "grad_norm": 1.7343050241470337, "learning_rate": 1.8804623087018577e-05, "loss": 0.6197, "step": 16566 }, { "epoch": 2.7044610424064324, "grad_norm": 1.691110610961914, "learning_rate": 1.8804472630079277e-05, "loss": 0.613, "step": 16567 }, { "epoch": 2.704624301048937, "grad_norm": 1.8345650434494019, "learning_rate": 1.8804322164273877e-05, "loss": 0.6519, "step": 16568 }, { "epoch": 2.7047875596914412, "grad_norm": 1.3634482622146606, "learning_rate": 1.8804171689602525e-05, "loss": 0.4572, "step": 16569 }, { "epoch": 2.7049508183339457, "grad_norm": 2.1294054985046387, "learning_rate": 1.8804021206065378e-05, "loss": 0.6143, "step": 16570 }, { "epoch": 2.70511407697645, "grad_norm": 1.7674267292022705, "learning_rate": 1.880387071366258e-05, "loss": 0.5275, "step": 16571 }, { "epoch": 2.7052773356189546, "grad_norm": 1.4791922569274902, "learning_rate": 1.8803720212394293e-05, "loss": 0.4905, "step": 16572 }, { "epoch": 2.7054405942614586, "grad_norm": 1.7082043886184692, "learning_rate": 1.8803569702260657e-05, "loss": 0.5182, "step": 16573 }, { "epoch": 2.705603852903963, "grad_norm": 1.9373130798339844, "learning_rate": 1.8803419183261828e-05, "loss": 0.5749, "step": 16574 }, { "epoch": 2.7057671115464674, "grad_norm": 1.6909286975860596, "learning_rate": 1.8803268655397963e-05, "loss": 0.6454, "step": 16575 }, { "epoch": 2.705930370188972, "grad_norm": 1.77646803855896, "learning_rate": 1.8803118118669203e-05, "loss": 0.5989, "step": 16576 }, { "epoch": 2.7060936288314763, "grad_norm": 1.8345304727554321, "learning_rate": 1.880296757307571e-05, "loss": 0.4991, "step": 16577 }, { "epoch": 2.7062568874739807, "grad_norm": 1.7071967124938965, "learning_rate": 1.8802817018617627e-05, "loss": 0.6025, "step": 16578 }, { "epoch": 2.706420146116485, "grad_norm": 1.9636720418930054, "learning_rate": 1.8802666455295113e-05, "loss": 0.845, "step": 16579 }, { "epoch": 2.706583404758989, "grad_norm": 1.5531482696533203, "learning_rate": 1.8802515883108314e-05, "loss": 0.4535, "step": 16580 }, { "epoch": 2.7067466634014936, "grad_norm": 2.162168264389038, "learning_rate": 1.8802365302057386e-05, "loss": 0.6311, "step": 16581 }, { "epoch": 2.706909922043998, "grad_norm": 2.0841856002807617, "learning_rate": 1.8802214712142475e-05, "loss": 0.6327, "step": 16582 }, { "epoch": 2.7070731806865025, "grad_norm": 1.8149399757385254, "learning_rate": 1.8802064113363738e-05, "loss": 0.5667, "step": 16583 }, { "epoch": 2.707236439329007, "grad_norm": 1.9262553453445435, "learning_rate": 1.8801913505721325e-05, "loss": 0.5071, "step": 16584 }, { "epoch": 2.7073996979715114, "grad_norm": 1.4386838674545288, "learning_rate": 1.880176288921539e-05, "loss": 0.5018, "step": 16585 }, { "epoch": 2.707562956614016, "grad_norm": 1.5238865613937378, "learning_rate": 1.880161226384608e-05, "loss": 0.5378, "step": 16586 }, { "epoch": 2.7077262152565202, "grad_norm": 1.634042501449585, "learning_rate": 1.8801461629613548e-05, "loss": 0.4689, "step": 16587 }, { "epoch": 2.7078894738990247, "grad_norm": 1.6673656702041626, "learning_rate": 1.8801310986517945e-05, "loss": 0.6194, "step": 16588 }, { "epoch": 2.708052732541529, "grad_norm": 1.5786864757537842, "learning_rate": 1.8801160334559426e-05, "loss": 0.6182, "step": 16589 }, { "epoch": 2.7082159911840336, "grad_norm": 1.3529037237167358, "learning_rate": 1.880100967373814e-05, "loss": 0.4933, "step": 16590 }, { "epoch": 2.7083792498265375, "grad_norm": 1.7782104015350342, "learning_rate": 1.8800859004054238e-05, "loss": 0.6349, "step": 16591 }, { "epoch": 2.708542508469042, "grad_norm": 1.918752670288086, "learning_rate": 1.880070832550788e-05, "loss": 0.6142, "step": 16592 }, { "epoch": 2.7087057671115464, "grad_norm": 1.9540215730667114, "learning_rate": 1.8800557638099203e-05, "loss": 0.6851, "step": 16593 }, { "epoch": 2.708869025754051, "grad_norm": 1.85176420211792, "learning_rate": 1.8800406941828372e-05, "loss": 0.6341, "step": 16594 }, { "epoch": 2.7090322843965553, "grad_norm": 1.8179359436035156, "learning_rate": 1.8800256236695534e-05, "loss": 0.5486, "step": 16595 }, { "epoch": 2.7091955430390597, "grad_norm": 1.8988083600997925, "learning_rate": 1.8800105522700837e-05, "loss": 0.5393, "step": 16596 }, { "epoch": 2.709358801681564, "grad_norm": 1.580170750617981, "learning_rate": 1.8799954799844437e-05, "loss": 0.4873, "step": 16597 }, { "epoch": 2.709522060324068, "grad_norm": 2.1389758586883545, "learning_rate": 1.8799804068126487e-05, "loss": 0.711, "step": 16598 }, { "epoch": 2.7096853189665726, "grad_norm": 1.977710485458374, "learning_rate": 1.8799653327547133e-05, "loss": 0.6161, "step": 16599 }, { "epoch": 2.709848577609077, "grad_norm": 1.7854108810424805, "learning_rate": 1.8799502578106533e-05, "loss": 0.5118, "step": 16600 }, { "epoch": 2.7100118362515815, "grad_norm": 1.479286551475525, "learning_rate": 1.8799351819804837e-05, "loss": 0.4876, "step": 16601 }, { "epoch": 2.710175094894086, "grad_norm": 1.6649599075317383, "learning_rate": 1.8799201052642194e-05, "loss": 0.4743, "step": 16602 }, { "epoch": 2.7103383535365904, "grad_norm": 1.775039792060852, "learning_rate": 1.879905027661876e-05, "loss": 0.5546, "step": 16603 }, { "epoch": 2.710501612179095, "grad_norm": 1.8469204902648926, "learning_rate": 1.8798899491734682e-05, "loss": 0.6379, "step": 16604 }, { "epoch": 2.7106648708215992, "grad_norm": 1.8928533792495728, "learning_rate": 1.8798748697990115e-05, "loss": 0.5627, "step": 16605 }, { "epoch": 2.7108281294641037, "grad_norm": 1.8749570846557617, "learning_rate": 1.879859789538521e-05, "loss": 0.5143, "step": 16606 }, { "epoch": 2.710991388106608, "grad_norm": 1.9530638456344604, "learning_rate": 1.8798447083920123e-05, "loss": 0.6604, "step": 16607 }, { "epoch": 2.7111546467491126, "grad_norm": 2.0698938369750977, "learning_rate": 1.8798296263594998e-05, "loss": 0.6282, "step": 16608 }, { "epoch": 2.7113179053916165, "grad_norm": 1.7837302684783936, "learning_rate": 1.879814543440999e-05, "loss": 0.6364, "step": 16609 }, { "epoch": 2.711481164034121, "grad_norm": 2.018670082092285, "learning_rate": 1.8797994596365258e-05, "loss": 0.5643, "step": 16610 }, { "epoch": 2.7116444226766254, "grad_norm": 1.6347397565841675, "learning_rate": 1.879784374946094e-05, "loss": 0.4858, "step": 16611 }, { "epoch": 2.71180768131913, "grad_norm": 1.7204593420028687, "learning_rate": 1.8797692893697205e-05, "loss": 0.5143, "step": 16612 }, { "epoch": 2.7119709399616343, "grad_norm": 1.865268588066101, "learning_rate": 1.8797542029074188e-05, "loss": 0.5639, "step": 16613 }, { "epoch": 2.7121341986041387, "grad_norm": 1.8346552848815918, "learning_rate": 1.8797391155592054e-05, "loss": 0.6255, "step": 16614 }, { "epoch": 2.7122974572466427, "grad_norm": 1.6340967416763306, "learning_rate": 1.8797240273250945e-05, "loss": 0.5238, "step": 16615 }, { "epoch": 2.712460715889147, "grad_norm": 1.6979809999465942, "learning_rate": 1.8797089382051016e-05, "loss": 0.5662, "step": 16616 }, { "epoch": 2.7126239745316516, "grad_norm": 1.4810415506362915, "learning_rate": 1.8796938481992427e-05, "loss": 0.4577, "step": 16617 }, { "epoch": 2.712787233174156, "grad_norm": 1.8998157978057861, "learning_rate": 1.8796787573075316e-05, "loss": 0.5308, "step": 16618 }, { "epoch": 2.7129504918166605, "grad_norm": 2.0942726135253906, "learning_rate": 1.8796636655299847e-05, "loss": 0.5444, "step": 16619 }, { "epoch": 2.713113750459165, "grad_norm": 1.8948887586593628, "learning_rate": 1.879648572866617e-05, "loss": 0.5389, "step": 16620 }, { "epoch": 2.7132770091016694, "grad_norm": 1.6946784257888794, "learning_rate": 1.8796334793174426e-05, "loss": 0.5761, "step": 16621 }, { "epoch": 2.713440267744174, "grad_norm": 1.5329535007476807, "learning_rate": 1.879618384882478e-05, "loss": 0.5013, "step": 16622 }, { "epoch": 2.7136035263866782, "grad_norm": 1.8743880987167358, "learning_rate": 1.8796032895617377e-05, "loss": 0.6285, "step": 16623 }, { "epoch": 2.7137667850291827, "grad_norm": 1.8764699697494507, "learning_rate": 1.8795881933552374e-05, "loss": 0.5677, "step": 16624 }, { "epoch": 2.713930043671687, "grad_norm": 1.971200942993164, "learning_rate": 1.8795730962629918e-05, "loss": 0.632, "step": 16625 }, { "epoch": 2.714093302314191, "grad_norm": 1.7965399026870728, "learning_rate": 1.8795579982850167e-05, "loss": 0.5302, "step": 16626 }, { "epoch": 2.7142565609566955, "grad_norm": 2.2528023719787598, "learning_rate": 1.8795428994213266e-05, "loss": 0.6555, "step": 16627 }, { "epoch": 2.7144198195992, "grad_norm": 1.658044695854187, "learning_rate": 1.879527799671937e-05, "loss": 0.5568, "step": 16628 }, { "epoch": 2.7145830782417044, "grad_norm": 2.045572519302368, "learning_rate": 1.8795126990368632e-05, "loss": 0.5551, "step": 16629 }, { "epoch": 2.714746336884209, "grad_norm": 1.8214598894119263, "learning_rate": 1.8794975975161206e-05, "loss": 0.5636, "step": 16630 }, { "epoch": 2.7149095955267133, "grad_norm": 1.425578236579895, "learning_rate": 1.8794824951097237e-05, "loss": 0.4438, "step": 16631 }, { "epoch": 2.7150728541692177, "grad_norm": 1.7139400243759155, "learning_rate": 1.8794673918176882e-05, "loss": 0.6041, "step": 16632 }, { "epoch": 2.7152361128117217, "grad_norm": 2.133733034133911, "learning_rate": 1.8794522876400296e-05, "loss": 0.6604, "step": 16633 }, { "epoch": 2.715399371454226, "grad_norm": 1.8545811176300049, "learning_rate": 1.8794371825767624e-05, "loss": 0.5866, "step": 16634 }, { "epoch": 2.7155626300967306, "grad_norm": 1.8968883752822876, "learning_rate": 1.8794220766279027e-05, "loss": 0.6723, "step": 16635 }, { "epoch": 2.715725888739235, "grad_norm": 1.88693106174469, "learning_rate": 1.879406969793465e-05, "loss": 0.6755, "step": 16636 }, { "epoch": 2.7158891473817395, "grad_norm": 1.441852331161499, "learning_rate": 1.8793918620734643e-05, "loss": 0.497, "step": 16637 }, { "epoch": 2.716052406024244, "grad_norm": 1.3071404695510864, "learning_rate": 1.879376753467917e-05, "loss": 0.4534, "step": 16638 }, { "epoch": 2.7162156646667484, "grad_norm": 1.850710391998291, "learning_rate": 1.879361643976837e-05, "loss": 0.7048, "step": 16639 }, { "epoch": 2.716378923309253, "grad_norm": 1.9601540565490723, "learning_rate": 1.87934653360024e-05, "loss": 0.6449, "step": 16640 }, { "epoch": 2.7165421819517572, "grad_norm": 1.572615146636963, "learning_rate": 1.8793314223381416e-05, "loss": 0.5101, "step": 16641 }, { "epoch": 2.7167054405942617, "grad_norm": 1.519018292427063, "learning_rate": 1.8793163101905562e-05, "loss": 0.5297, "step": 16642 }, { "epoch": 2.716868699236766, "grad_norm": 1.9437828063964844, "learning_rate": 1.8793011971575e-05, "loss": 0.65, "step": 16643 }, { "epoch": 2.71703195787927, "grad_norm": 1.7062525749206543, "learning_rate": 1.8792860832389877e-05, "loss": 0.4476, "step": 16644 }, { "epoch": 2.7171952165217745, "grad_norm": 1.7000830173492432, "learning_rate": 1.8792709684350344e-05, "loss": 0.574, "step": 16645 }, { "epoch": 2.717358475164279, "grad_norm": 1.6324061155319214, "learning_rate": 1.8792558527456556e-05, "loss": 0.5573, "step": 16646 }, { "epoch": 2.7175217338067834, "grad_norm": 1.7867839336395264, "learning_rate": 1.879240736170866e-05, "loss": 0.5573, "step": 16647 }, { "epoch": 2.717684992449288, "grad_norm": 2.056640863418579, "learning_rate": 1.8792256187106816e-05, "loss": 0.6017, "step": 16648 }, { "epoch": 2.7178482510917923, "grad_norm": 1.9215099811553955, "learning_rate": 1.8792105003651172e-05, "loss": 0.6105, "step": 16649 }, { "epoch": 2.7180115097342963, "grad_norm": 1.9160763025283813, "learning_rate": 1.879195381134188e-05, "loss": 0.6402, "step": 16650 }, { "epoch": 2.7181747683768007, "grad_norm": 1.4280481338500977, "learning_rate": 1.879180261017909e-05, "loss": 0.4932, "step": 16651 }, { "epoch": 2.718338027019305, "grad_norm": 1.6068733930587769, "learning_rate": 1.879165140016296e-05, "loss": 0.5188, "step": 16652 }, { "epoch": 2.7185012856618096, "grad_norm": 1.447710633277893, "learning_rate": 1.879150018129364e-05, "loss": 0.4978, "step": 16653 }, { "epoch": 2.718664544304314, "grad_norm": 1.8120697736740112, "learning_rate": 1.879134895357128e-05, "loss": 0.5563, "step": 16654 }, { "epoch": 2.7188278029468185, "grad_norm": 2.2914133071899414, "learning_rate": 1.8791197716996038e-05, "loss": 0.5367, "step": 16655 }, { "epoch": 2.718991061589323, "grad_norm": 1.7524776458740234, "learning_rate": 1.8791046471568056e-05, "loss": 0.5722, "step": 16656 }, { "epoch": 2.7191543202318273, "grad_norm": 1.7793126106262207, "learning_rate": 1.8790895217287498e-05, "loss": 0.5813, "step": 16657 }, { "epoch": 2.719317578874332, "grad_norm": 1.802713394165039, "learning_rate": 1.8790743954154508e-05, "loss": 0.4965, "step": 16658 }, { "epoch": 2.7194808375168362, "grad_norm": 1.9872863292694092, "learning_rate": 1.879059268216924e-05, "loss": 0.6009, "step": 16659 }, { "epoch": 2.7196440961593407, "grad_norm": 2.0681357383728027, "learning_rate": 1.8790441401331848e-05, "loss": 0.629, "step": 16660 }, { "epoch": 2.7198073548018447, "grad_norm": 1.749673843383789, "learning_rate": 1.8790290111642484e-05, "loss": 0.5015, "step": 16661 }, { "epoch": 2.719970613444349, "grad_norm": 1.8369203805923462, "learning_rate": 1.87901388131013e-05, "loss": 0.6317, "step": 16662 }, { "epoch": 2.7201338720868535, "grad_norm": 1.3952044248580933, "learning_rate": 1.878998750570845e-05, "loss": 0.4118, "step": 16663 }, { "epoch": 2.720297130729358, "grad_norm": 1.8993173837661743, "learning_rate": 1.878983618946409e-05, "loss": 0.5701, "step": 16664 }, { "epoch": 2.7204603893718624, "grad_norm": 1.7989118099212646, "learning_rate": 1.8789684864368358e-05, "loss": 0.5316, "step": 16665 }, { "epoch": 2.720623648014367, "grad_norm": 1.7507249116897583, "learning_rate": 1.878953353042142e-05, "loss": 0.485, "step": 16666 }, { "epoch": 2.7207869066568713, "grad_norm": 1.9456791877746582, "learning_rate": 1.8789382187623423e-05, "loss": 0.6038, "step": 16667 }, { "epoch": 2.7209501652993753, "grad_norm": 1.4723124504089355, "learning_rate": 1.878923083597452e-05, "loss": 0.4495, "step": 16668 }, { "epoch": 2.7211134239418797, "grad_norm": 1.9290400743484497, "learning_rate": 1.8789079475474866e-05, "loss": 0.649, "step": 16669 }, { "epoch": 2.721276682584384, "grad_norm": 1.994280219078064, "learning_rate": 1.8788928106124608e-05, "loss": 0.6313, "step": 16670 }, { "epoch": 2.7214399412268886, "grad_norm": 2.1381192207336426, "learning_rate": 1.8788776727923906e-05, "loss": 0.6639, "step": 16671 }, { "epoch": 2.721603199869393, "grad_norm": 1.627790093421936, "learning_rate": 1.8788625340872906e-05, "loss": 0.6853, "step": 16672 }, { "epoch": 2.7217664585118975, "grad_norm": 1.889522671699524, "learning_rate": 1.8788473944971763e-05, "loss": 0.5948, "step": 16673 }, { "epoch": 2.721929717154402, "grad_norm": 1.3115930557250977, "learning_rate": 1.8788322540220627e-05, "loss": 0.417, "step": 16674 }, { "epoch": 2.7220929757969063, "grad_norm": 1.9176661968231201, "learning_rate": 1.8788171126619653e-05, "loss": 0.6588, "step": 16675 }, { "epoch": 2.722256234439411, "grad_norm": 1.703707218170166, "learning_rate": 1.8788019704168995e-05, "loss": 0.5996, "step": 16676 }, { "epoch": 2.722419493081915, "grad_norm": 1.491767406463623, "learning_rate": 1.8787868272868804e-05, "loss": 0.5016, "step": 16677 }, { "epoch": 2.7225827517244197, "grad_norm": 1.7939825057983398, "learning_rate": 1.8787716832719232e-05, "loss": 0.635, "step": 16678 }, { "epoch": 2.7227460103669237, "grad_norm": 1.7195117473602295, "learning_rate": 1.878756538372043e-05, "loss": 0.5065, "step": 16679 }, { "epoch": 2.722909269009428, "grad_norm": 1.9369292259216309, "learning_rate": 1.878741392587255e-05, "loss": 0.7323, "step": 16680 }, { "epoch": 2.7230725276519325, "grad_norm": 1.4369068145751953, "learning_rate": 1.878726245917575e-05, "loss": 0.4396, "step": 16681 }, { "epoch": 2.723235786294437, "grad_norm": 1.8907595872879028, "learning_rate": 1.878711098363018e-05, "loss": 0.51, "step": 16682 }, { "epoch": 2.7233990449369414, "grad_norm": 1.8821135759353638, "learning_rate": 1.8786959499235987e-05, "loss": 0.575, "step": 16683 }, { "epoch": 2.723562303579446, "grad_norm": 1.588097095489502, "learning_rate": 1.878680800599333e-05, "loss": 0.4821, "step": 16684 }, { "epoch": 2.7237255622219503, "grad_norm": 1.920490026473999, "learning_rate": 1.878665650390236e-05, "loss": 0.5614, "step": 16685 }, { "epoch": 2.7238888208644543, "grad_norm": 1.8171836137771606, "learning_rate": 1.878650499296323e-05, "loss": 0.6028, "step": 16686 }, { "epoch": 2.7240520795069587, "grad_norm": 1.9502956867218018, "learning_rate": 1.8786353473176093e-05, "loss": 0.6246, "step": 16687 }, { "epoch": 2.724215338149463, "grad_norm": 1.5336875915527344, "learning_rate": 1.8786201944541096e-05, "loss": 0.5273, "step": 16688 }, { "epoch": 2.7243785967919676, "grad_norm": 1.7241657972335815, "learning_rate": 1.87860504070584e-05, "loss": 0.5382, "step": 16689 }, { "epoch": 2.724541855434472, "grad_norm": 2.3924672603607178, "learning_rate": 1.8785898860728152e-05, "loss": 0.8048, "step": 16690 }, { "epoch": 2.7247051140769765, "grad_norm": 1.622147798538208, "learning_rate": 1.8785747305550507e-05, "loss": 0.5711, "step": 16691 }, { "epoch": 2.724868372719481, "grad_norm": 2.042023181915283, "learning_rate": 1.8785595741525613e-05, "loss": 0.622, "step": 16692 }, { "epoch": 2.7250316313619853, "grad_norm": 1.5831685066223145, "learning_rate": 1.878544416865363e-05, "loss": 0.479, "step": 16693 }, { "epoch": 2.72519489000449, "grad_norm": 1.8269283771514893, "learning_rate": 1.8785292586934705e-05, "loss": 0.4946, "step": 16694 }, { "epoch": 2.725358148646994, "grad_norm": 1.5305780172348022, "learning_rate": 1.8785140996368996e-05, "loss": 0.5501, "step": 16695 }, { "epoch": 2.7255214072894987, "grad_norm": 1.732620120048523, "learning_rate": 1.878498939695665e-05, "loss": 0.5566, "step": 16696 }, { "epoch": 2.7256846659320026, "grad_norm": 1.7710344791412354, "learning_rate": 1.8784837788697823e-05, "loss": 0.5024, "step": 16697 }, { "epoch": 2.725847924574507, "grad_norm": 1.466112732887268, "learning_rate": 1.8784686171592668e-05, "loss": 0.4514, "step": 16698 }, { "epoch": 2.7260111832170115, "grad_norm": 1.9642612934112549, "learning_rate": 1.8784534545641334e-05, "loss": 0.6457, "step": 16699 }, { "epoch": 2.726174441859516, "grad_norm": 1.8240896463394165, "learning_rate": 1.8784382910843978e-05, "loss": 0.5198, "step": 16700 }, { "epoch": 2.7263377005020204, "grad_norm": 1.9138877391815186, "learning_rate": 1.8784231267200748e-05, "loss": 0.5609, "step": 16701 }, { "epoch": 2.726500959144525, "grad_norm": 1.8895182609558105, "learning_rate": 1.8784079614711798e-05, "loss": 0.6141, "step": 16702 }, { "epoch": 2.726664217787029, "grad_norm": 1.8375157117843628, "learning_rate": 1.8783927953377285e-05, "loss": 0.5583, "step": 16703 }, { "epoch": 2.7268274764295333, "grad_norm": 1.9944090843200684, "learning_rate": 1.878377628319736e-05, "loss": 0.5868, "step": 16704 }, { "epoch": 2.7269907350720377, "grad_norm": 2.024186849594116, "learning_rate": 1.8783624604172175e-05, "loss": 0.6246, "step": 16705 }, { "epoch": 2.727153993714542, "grad_norm": 1.732542872428894, "learning_rate": 1.878347291630188e-05, "loss": 0.6237, "step": 16706 }, { "epoch": 2.7273172523570466, "grad_norm": 1.546504020690918, "learning_rate": 1.878332121958663e-05, "loss": 0.4664, "step": 16707 }, { "epoch": 2.727480510999551, "grad_norm": 1.6097654104232788, "learning_rate": 1.878316951402658e-05, "loss": 0.5469, "step": 16708 }, { "epoch": 2.7276437696420555, "grad_norm": 1.458191156387329, "learning_rate": 1.878301779962188e-05, "loss": 0.4594, "step": 16709 }, { "epoch": 2.72780702828456, "grad_norm": 1.7755744457244873, "learning_rate": 1.8782866076372685e-05, "loss": 0.5836, "step": 16710 }, { "epoch": 2.7279702869270643, "grad_norm": 1.8162027597427368, "learning_rate": 1.878271434427914e-05, "loss": 0.5578, "step": 16711 }, { "epoch": 2.7281335455695688, "grad_norm": 1.7298256158828735, "learning_rate": 1.878256260334141e-05, "loss": 0.5701, "step": 16712 }, { "epoch": 2.728296804212073, "grad_norm": 1.408016562461853, "learning_rate": 1.878241085355964e-05, "loss": 0.4861, "step": 16713 }, { "epoch": 2.728460062854577, "grad_norm": 1.7586253881454468, "learning_rate": 1.8782259094933985e-05, "loss": 0.5908, "step": 16714 }, { "epoch": 2.7286233214970816, "grad_norm": 1.6779379844665527, "learning_rate": 1.8782107327464594e-05, "loss": 0.5349, "step": 16715 }, { "epoch": 2.728786580139586, "grad_norm": 1.6519625186920166, "learning_rate": 1.8781955551151628e-05, "loss": 0.4597, "step": 16716 }, { "epoch": 2.7289498387820905, "grad_norm": 1.4247581958770752, "learning_rate": 1.8781803765995234e-05, "loss": 0.5584, "step": 16717 }, { "epoch": 2.729113097424595, "grad_norm": 2.082822799682617, "learning_rate": 1.878165197199556e-05, "loss": 0.8922, "step": 16718 }, { "epoch": 2.7292763560670994, "grad_norm": 2.524829387664795, "learning_rate": 1.8781500169152774e-05, "loss": 0.5864, "step": 16719 }, { "epoch": 2.729439614709604, "grad_norm": 1.9011597633361816, "learning_rate": 1.8781348357467017e-05, "loss": 0.5581, "step": 16720 }, { "epoch": 2.729602873352108, "grad_norm": 1.8007005453109741, "learning_rate": 1.878119653693844e-05, "loss": 0.5467, "step": 16721 }, { "epoch": 2.7297661319946123, "grad_norm": 1.3887399435043335, "learning_rate": 1.8781044707567203e-05, "loss": 0.4357, "step": 16722 }, { "epoch": 2.7299293906371167, "grad_norm": 1.507433533668518, "learning_rate": 1.8780892869353458e-05, "loss": 0.4686, "step": 16723 }, { "epoch": 2.730092649279621, "grad_norm": 1.9193627834320068, "learning_rate": 1.8780741022297353e-05, "loss": 0.5365, "step": 16724 }, { "epoch": 2.7302559079221256, "grad_norm": 1.6746186017990112, "learning_rate": 1.8780589166399047e-05, "loss": 0.5456, "step": 16725 }, { "epoch": 2.73041916656463, "grad_norm": 1.877157211303711, "learning_rate": 1.878043730165869e-05, "loss": 0.6339, "step": 16726 }, { "epoch": 2.7305824252071345, "grad_norm": 2.0553653240203857, "learning_rate": 1.8780285428076434e-05, "loss": 0.6164, "step": 16727 }, { "epoch": 2.730745683849639, "grad_norm": 2.1768383979797363, "learning_rate": 1.878013354565243e-05, "loss": 0.6683, "step": 16728 }, { "epoch": 2.7309089424921433, "grad_norm": 1.7574193477630615, "learning_rate": 1.877998165438684e-05, "loss": 0.5537, "step": 16729 }, { "epoch": 2.7310722011346478, "grad_norm": 1.8012056350708008, "learning_rate": 1.8779829754279806e-05, "loss": 0.5602, "step": 16730 }, { "epoch": 2.731235459777152, "grad_norm": 1.911354422569275, "learning_rate": 1.8779677845331488e-05, "loss": 0.6451, "step": 16731 }, { "epoch": 2.731398718419656, "grad_norm": 1.9095596075057983, "learning_rate": 1.8779525927542037e-05, "loss": 0.6009, "step": 16732 }, { "epoch": 2.7315619770621606, "grad_norm": 1.8988440036773682, "learning_rate": 1.8779374000911605e-05, "loss": 0.5955, "step": 16733 }, { "epoch": 2.731725235704665, "grad_norm": 1.9598146677017212, "learning_rate": 1.8779222065440344e-05, "loss": 0.6229, "step": 16734 }, { "epoch": 2.7318884943471695, "grad_norm": 1.763124942779541, "learning_rate": 1.877907012112841e-05, "loss": 0.5493, "step": 16735 }, { "epoch": 2.732051752989674, "grad_norm": 1.951460361480713, "learning_rate": 1.8778918167975958e-05, "loss": 0.6425, "step": 16736 }, { "epoch": 2.7322150116321784, "grad_norm": 2.147158622741699, "learning_rate": 1.8778766205983133e-05, "loss": 0.7509, "step": 16737 }, { "epoch": 2.732378270274683, "grad_norm": 1.8510369062423706, "learning_rate": 1.8778614235150094e-05, "loss": 0.606, "step": 16738 }, { "epoch": 2.732541528917187, "grad_norm": 1.7507165670394897, "learning_rate": 1.8778462255476995e-05, "loss": 0.6535, "step": 16739 }, { "epoch": 2.7327047875596913, "grad_norm": 1.7132530212402344, "learning_rate": 1.8778310266963985e-05, "loss": 0.6151, "step": 16740 }, { "epoch": 2.7328680462021957, "grad_norm": 1.9740732908248901, "learning_rate": 1.877815826961122e-05, "loss": 0.6662, "step": 16741 }, { "epoch": 2.7330313048447, "grad_norm": 1.7233861684799194, "learning_rate": 1.877800626341885e-05, "loss": 0.5857, "step": 16742 }, { "epoch": 2.7331945634872046, "grad_norm": 1.5028626918792725, "learning_rate": 1.877785424838703e-05, "loss": 0.4955, "step": 16743 }, { "epoch": 2.733357822129709, "grad_norm": 1.602030634880066, "learning_rate": 1.8777702224515915e-05, "loss": 0.5553, "step": 16744 }, { "epoch": 2.7335210807722135, "grad_norm": 1.7345693111419678, "learning_rate": 1.8777550191805652e-05, "loss": 0.513, "step": 16745 }, { "epoch": 2.733684339414718, "grad_norm": 1.8383170366287231, "learning_rate": 1.8777398150256406e-05, "loss": 0.6283, "step": 16746 }, { "epoch": 2.7338475980572223, "grad_norm": 1.5996431112289429, "learning_rate": 1.8777246099868315e-05, "loss": 0.5206, "step": 16747 }, { "epoch": 2.7340108566997268, "grad_norm": 1.6426550149917603, "learning_rate": 1.8777094040641542e-05, "loss": 0.498, "step": 16748 }, { "epoch": 2.734174115342231, "grad_norm": 1.6450637578964233, "learning_rate": 1.8776941972576242e-05, "loss": 0.5218, "step": 16749 }, { "epoch": 2.734337373984735, "grad_norm": 1.64905846118927, "learning_rate": 1.8776789895672557e-05, "loss": 0.5135, "step": 16750 }, { "epoch": 2.7345006326272396, "grad_norm": 1.8968874216079712, "learning_rate": 1.877663780993065e-05, "loss": 0.6455, "step": 16751 }, { "epoch": 2.734663891269744, "grad_norm": 2.083348035812378, "learning_rate": 1.8776485715350672e-05, "loss": 0.5825, "step": 16752 }, { "epoch": 2.7348271499122485, "grad_norm": 2.069751024246216, "learning_rate": 1.8776333611932775e-05, "loss": 0.6877, "step": 16753 }, { "epoch": 2.734990408554753, "grad_norm": 1.7509840726852417, "learning_rate": 1.877618149967711e-05, "loss": 0.5187, "step": 16754 }, { "epoch": 2.7351536671972574, "grad_norm": 1.5698353052139282, "learning_rate": 1.8776029378583836e-05, "loss": 0.5395, "step": 16755 }, { "epoch": 2.7353169258397614, "grad_norm": 1.9295275211334229, "learning_rate": 1.87758772486531e-05, "loss": 0.5725, "step": 16756 }, { "epoch": 2.735480184482266, "grad_norm": 1.5096006393432617, "learning_rate": 1.8775725109885058e-05, "loss": 0.4812, "step": 16757 }, { "epoch": 2.7356434431247703, "grad_norm": 1.7118198871612549, "learning_rate": 1.877557296227986e-05, "loss": 0.5771, "step": 16758 }, { "epoch": 2.7358067017672747, "grad_norm": 1.5132182836532593, "learning_rate": 1.877542080583767e-05, "loss": 0.5337, "step": 16759 }, { "epoch": 2.735969960409779, "grad_norm": 1.5363600254058838, "learning_rate": 1.877526864055863e-05, "loss": 0.4846, "step": 16760 }, { "epoch": 2.7361332190522836, "grad_norm": 1.838297963142395, "learning_rate": 1.8775116466442897e-05, "loss": 0.526, "step": 16761 }, { "epoch": 2.736296477694788, "grad_norm": 1.6352523565292358, "learning_rate": 1.8774964283490622e-05, "loss": 0.5389, "step": 16762 }, { "epoch": 2.7364597363372924, "grad_norm": 1.6770631074905396, "learning_rate": 1.877481209170196e-05, "loss": 0.4954, "step": 16763 }, { "epoch": 2.736622994979797, "grad_norm": 1.5595831871032715, "learning_rate": 1.877465989107707e-05, "loss": 0.4965, "step": 16764 }, { "epoch": 2.7367862536223013, "grad_norm": 1.7029294967651367, "learning_rate": 1.8774507681616093e-05, "loss": 0.5287, "step": 16765 }, { "epoch": 2.7369495122648058, "grad_norm": 1.8645366430282593, "learning_rate": 1.8774355463319193e-05, "loss": 0.6893, "step": 16766 }, { "epoch": 2.7371127709073098, "grad_norm": 1.6478272676467896, "learning_rate": 1.877420323618652e-05, "loss": 0.554, "step": 16767 }, { "epoch": 2.737276029549814, "grad_norm": 1.992116093635559, "learning_rate": 1.8774051000218226e-05, "loss": 0.5876, "step": 16768 }, { "epoch": 2.7374392881923186, "grad_norm": 1.8322604894638062, "learning_rate": 1.8773898755414463e-05, "loss": 0.5641, "step": 16769 }, { "epoch": 2.737602546834823, "grad_norm": 1.4089696407318115, "learning_rate": 1.8773746501775388e-05, "loss": 0.3949, "step": 16770 }, { "epoch": 2.7377658054773275, "grad_norm": 1.9402270317077637, "learning_rate": 1.8773594239301152e-05, "loss": 0.6209, "step": 16771 }, { "epoch": 2.737929064119832, "grad_norm": 2.4936766624450684, "learning_rate": 1.877344196799191e-05, "loss": 0.5362, "step": 16772 }, { "epoch": 2.7380923227623364, "grad_norm": 1.7907745838165283, "learning_rate": 1.8773289687847815e-05, "loss": 0.6112, "step": 16773 }, { "epoch": 2.7382555814048404, "grad_norm": 1.9813156127929688, "learning_rate": 1.8773137398869017e-05, "loss": 0.5451, "step": 16774 }, { "epoch": 2.738418840047345, "grad_norm": 1.6885403394699097, "learning_rate": 1.8772985101055673e-05, "loss": 0.52, "step": 16775 }, { "epoch": 2.7385820986898493, "grad_norm": 1.7506283521652222, "learning_rate": 1.8772832794407934e-05, "loss": 0.5701, "step": 16776 }, { "epoch": 2.7387453573323537, "grad_norm": 1.6005529165267944, "learning_rate": 1.8772680478925956e-05, "loss": 0.5207, "step": 16777 }, { "epoch": 2.738908615974858, "grad_norm": 2.098233699798584, "learning_rate": 1.877252815460989e-05, "loss": 0.6144, "step": 16778 }, { "epoch": 2.7390718746173626, "grad_norm": 1.5514872074127197, "learning_rate": 1.8772375821459893e-05, "loss": 0.5328, "step": 16779 }, { "epoch": 2.739235133259867, "grad_norm": 2.032209634780884, "learning_rate": 1.8772223479476114e-05, "loss": 0.6277, "step": 16780 }, { "epoch": 2.7393983919023714, "grad_norm": 1.6552964448928833, "learning_rate": 1.8772071128658708e-05, "loss": 0.6522, "step": 16781 }, { "epoch": 2.739561650544876, "grad_norm": 1.6934640407562256, "learning_rate": 1.877191876900783e-05, "loss": 0.5072, "step": 16782 }, { "epoch": 2.7397249091873803, "grad_norm": 1.8199266195297241, "learning_rate": 1.877176640052363e-05, "loss": 0.5343, "step": 16783 }, { "epoch": 2.7398881678298848, "grad_norm": 1.571297287940979, "learning_rate": 1.8771614023206267e-05, "loss": 0.5129, "step": 16784 }, { "epoch": 2.7400514264723888, "grad_norm": 2.4696574211120605, "learning_rate": 1.877146163705589e-05, "loss": 0.6305, "step": 16785 }, { "epoch": 2.740214685114893, "grad_norm": 1.9648605585098267, "learning_rate": 1.8771309242072654e-05, "loss": 0.6302, "step": 16786 }, { "epoch": 2.7403779437573976, "grad_norm": 1.6896098852157593, "learning_rate": 1.8771156838256707e-05, "loss": 0.557, "step": 16787 }, { "epoch": 2.740541202399902, "grad_norm": 1.9716390371322632, "learning_rate": 1.8771004425608213e-05, "loss": 0.6711, "step": 16788 }, { "epoch": 2.7407044610424065, "grad_norm": 2.393695592880249, "learning_rate": 1.877085200412732e-05, "loss": 0.6095, "step": 16789 }, { "epoch": 2.740867719684911, "grad_norm": 1.632895827293396, "learning_rate": 1.8770699573814176e-05, "loss": 0.5757, "step": 16790 }, { "epoch": 2.741030978327415, "grad_norm": 1.917678952217102, "learning_rate": 1.8770547134668943e-05, "loss": 0.5184, "step": 16791 }, { "epoch": 2.7411942369699194, "grad_norm": 1.729008674621582, "learning_rate": 1.8770394686691775e-05, "loss": 0.5548, "step": 16792 }, { "epoch": 2.741357495612424, "grad_norm": 1.8719189167022705, "learning_rate": 1.8770242229882814e-05, "loss": 0.4971, "step": 16793 }, { "epoch": 2.7415207542549282, "grad_norm": 1.6740864515304565, "learning_rate": 1.8770089764242224e-05, "loss": 0.5417, "step": 16794 }, { "epoch": 2.7416840128974327, "grad_norm": 1.6230462789535522, "learning_rate": 1.876993728977016e-05, "loss": 0.5379, "step": 16795 }, { "epoch": 2.741847271539937, "grad_norm": 1.8141647577285767, "learning_rate": 1.8769784806466768e-05, "loss": 0.6516, "step": 16796 }, { "epoch": 2.7420105301824416, "grad_norm": 2.278895616531372, "learning_rate": 1.8769632314332207e-05, "loss": 0.6947, "step": 16797 }, { "epoch": 2.742173788824946, "grad_norm": 1.9790170192718506, "learning_rate": 1.8769479813366626e-05, "loss": 0.5816, "step": 16798 }, { "epoch": 2.7423370474674504, "grad_norm": 1.7881243228912354, "learning_rate": 1.8769327303570185e-05, "loss": 0.6053, "step": 16799 }, { "epoch": 2.742500306109955, "grad_norm": 1.897326946258545, "learning_rate": 1.8769174784943032e-05, "loss": 0.5671, "step": 16800 }, { "epoch": 2.7426635647524593, "grad_norm": 1.8113373517990112, "learning_rate": 1.876902225748532e-05, "loss": 0.4837, "step": 16801 }, { "epoch": 2.7428268233949633, "grad_norm": 1.5825766324996948, "learning_rate": 1.876886972119721e-05, "loss": 0.5429, "step": 16802 }, { "epoch": 2.7429900820374677, "grad_norm": 1.944901704788208, "learning_rate": 1.8768717176078846e-05, "loss": 0.6478, "step": 16803 }, { "epoch": 2.743153340679972, "grad_norm": 1.6745465993881226, "learning_rate": 1.876856462213039e-05, "loss": 0.5718, "step": 16804 }, { "epoch": 2.7433165993224766, "grad_norm": 1.5432240962982178, "learning_rate": 1.8768412059351986e-05, "loss": 0.5526, "step": 16805 }, { "epoch": 2.743479857964981, "grad_norm": 2.0743541717529297, "learning_rate": 1.8768259487743796e-05, "loss": 0.697, "step": 16806 }, { "epoch": 2.7436431166074855, "grad_norm": 1.7069040536880493, "learning_rate": 1.8768106907305973e-05, "loss": 0.5564, "step": 16807 }, { "epoch": 2.74380637524999, "grad_norm": 1.960318922996521, "learning_rate": 1.8767954318038667e-05, "loss": 0.6121, "step": 16808 }, { "epoch": 2.743969633892494, "grad_norm": 1.8554294109344482, "learning_rate": 1.8767801719942033e-05, "loss": 0.5951, "step": 16809 }, { "epoch": 2.7441328925349984, "grad_norm": 1.765335202217102, "learning_rate": 1.8767649113016225e-05, "loss": 0.5592, "step": 16810 }, { "epoch": 2.744296151177503, "grad_norm": 1.6094262599945068, "learning_rate": 1.8767496497261397e-05, "loss": 0.4411, "step": 16811 }, { "epoch": 2.7444594098200072, "grad_norm": 1.9136241674423218, "learning_rate": 1.8767343872677703e-05, "loss": 0.6591, "step": 16812 }, { "epoch": 2.7446226684625117, "grad_norm": 1.9376603364944458, "learning_rate": 1.8767191239265297e-05, "loss": 0.6051, "step": 16813 }, { "epoch": 2.744785927105016, "grad_norm": 1.4612798690795898, "learning_rate": 1.876703859702433e-05, "loss": 0.5468, "step": 16814 }, { "epoch": 2.7449491857475206, "grad_norm": 2.2661523818969727, "learning_rate": 1.8766885945954958e-05, "loss": 0.7061, "step": 16815 }, { "epoch": 2.745112444390025, "grad_norm": 1.6905848979949951, "learning_rate": 1.8766733286057332e-05, "loss": 0.598, "step": 16816 }, { "epoch": 2.7452757030325294, "grad_norm": 1.8901417255401611, "learning_rate": 1.8766580617331608e-05, "loss": 0.6787, "step": 16817 }, { "epoch": 2.745438961675034, "grad_norm": 1.3594547510147095, "learning_rate": 1.8766427939777943e-05, "loss": 0.4933, "step": 16818 }, { "epoch": 2.7456022203175383, "grad_norm": 1.2098275423049927, "learning_rate": 1.8766275253396488e-05, "loss": 0.4076, "step": 16819 }, { "epoch": 2.7457654789600423, "grad_norm": 1.7732212543487549, "learning_rate": 1.8766122558187394e-05, "loss": 0.7026, "step": 16820 }, { "epoch": 2.7459287376025467, "grad_norm": 1.5285594463348389, "learning_rate": 1.8765969854150817e-05, "loss": 0.5351, "step": 16821 }, { "epoch": 2.746091996245051, "grad_norm": 1.7328625917434692, "learning_rate": 1.8765817141286907e-05, "loss": 0.6164, "step": 16822 }, { "epoch": 2.7462552548875556, "grad_norm": 1.8076311349868774, "learning_rate": 1.876566441959583e-05, "loss": 0.5931, "step": 16823 }, { "epoch": 2.74641851353006, "grad_norm": 2.3309121131896973, "learning_rate": 1.8765511689077723e-05, "loss": 0.592, "step": 16824 }, { "epoch": 2.7465817721725645, "grad_norm": 2.0019733905792236, "learning_rate": 1.876535894973275e-05, "loss": 0.6443, "step": 16825 }, { "epoch": 2.746745030815069, "grad_norm": 1.7664947509765625, "learning_rate": 1.876520620156107e-05, "loss": 0.5447, "step": 16826 }, { "epoch": 2.746908289457573, "grad_norm": 1.6529799699783325, "learning_rate": 1.8765053444562822e-05, "loss": 0.5613, "step": 16827 }, { "epoch": 2.7470715481000774, "grad_norm": 1.8328337669372559, "learning_rate": 1.876490067873817e-05, "loss": 0.6489, "step": 16828 }, { "epoch": 2.747234806742582, "grad_norm": 1.515157699584961, "learning_rate": 1.8764747904087262e-05, "loss": 0.5273, "step": 16829 }, { "epoch": 2.7473980653850862, "grad_norm": 2.0224263668060303, "learning_rate": 1.8764595120610258e-05, "loss": 0.6283, "step": 16830 }, { "epoch": 2.7475613240275907, "grad_norm": 1.8425991535186768, "learning_rate": 1.8764442328307312e-05, "loss": 0.6339, "step": 16831 }, { "epoch": 2.747724582670095, "grad_norm": 1.9475247859954834, "learning_rate": 1.876428952717857e-05, "loss": 0.6193, "step": 16832 }, { "epoch": 2.7478878413125996, "grad_norm": 1.8596323728561401, "learning_rate": 1.8764136717224197e-05, "loss": 0.5551, "step": 16833 }, { "epoch": 2.748051099955104, "grad_norm": 1.417097806930542, "learning_rate": 1.8763983898444332e-05, "loss": 0.4611, "step": 16834 }, { "epoch": 2.7482143585976084, "grad_norm": 1.6716039180755615, "learning_rate": 1.8763831070839144e-05, "loss": 0.5426, "step": 16835 }, { "epoch": 2.748377617240113, "grad_norm": 1.7773022651672363, "learning_rate": 1.8763678234408776e-05, "loss": 0.5088, "step": 16836 }, { "epoch": 2.7485408758826173, "grad_norm": 1.9376754760742188, "learning_rate": 1.876352538915339e-05, "loss": 0.6252, "step": 16837 }, { "epoch": 2.7487041345251213, "grad_norm": 1.9052051305770874, "learning_rate": 1.8763372535073136e-05, "loss": 0.6604, "step": 16838 }, { "epoch": 2.7488673931676257, "grad_norm": 1.6814298629760742, "learning_rate": 1.8763219672168164e-05, "loss": 0.6237, "step": 16839 }, { "epoch": 2.74903065181013, "grad_norm": 1.789160132408142, "learning_rate": 1.8763066800438638e-05, "loss": 0.5277, "step": 16840 }, { "epoch": 2.7491939104526346, "grad_norm": 1.9426923990249634, "learning_rate": 1.87629139198847e-05, "loss": 0.5578, "step": 16841 }, { "epoch": 2.749357169095139, "grad_norm": 1.9338479042053223, "learning_rate": 1.8762761030506514e-05, "loss": 0.5236, "step": 16842 }, { "epoch": 2.7495204277376435, "grad_norm": 1.8518275022506714, "learning_rate": 1.876260813230423e-05, "loss": 0.5908, "step": 16843 }, { "epoch": 2.7496836863801475, "grad_norm": 2.14155912399292, "learning_rate": 1.8762455225278003e-05, "loss": 0.6113, "step": 16844 }, { "epoch": 2.749846945022652, "grad_norm": 1.7078956365585327, "learning_rate": 1.8762302309427983e-05, "loss": 0.6079, "step": 16845 }, { "epoch": 2.7500102036651564, "grad_norm": 1.999302864074707, "learning_rate": 1.876214938475433e-05, "loss": 0.6705, "step": 16846 }, { "epoch": 2.750173462307661, "grad_norm": 1.9432547092437744, "learning_rate": 1.8761996451257192e-05, "loss": 0.6325, "step": 16847 }, { "epoch": 2.7503367209501652, "grad_norm": 1.8753087520599365, "learning_rate": 1.876184350893673e-05, "loss": 0.5627, "step": 16848 }, { "epoch": 2.7504999795926697, "grad_norm": 2.045257091522217, "learning_rate": 1.876169055779309e-05, "loss": 0.5809, "step": 16849 }, { "epoch": 2.750663238235174, "grad_norm": 1.702256679534912, "learning_rate": 1.8761537597826426e-05, "loss": 0.5033, "step": 16850 }, { "epoch": 2.7508264968776786, "grad_norm": 1.5831900835037231, "learning_rate": 1.87613846290369e-05, "loss": 0.5716, "step": 16851 }, { "epoch": 2.750989755520183, "grad_norm": 1.8476853370666504, "learning_rate": 1.8761231651424668e-05, "loss": 0.5678, "step": 16852 }, { "epoch": 2.7511530141626874, "grad_norm": 1.6984878778457642, "learning_rate": 1.8761078664989872e-05, "loss": 0.5655, "step": 16853 }, { "epoch": 2.751316272805192, "grad_norm": 1.846522569656372, "learning_rate": 1.8760925669732672e-05, "loss": 0.5436, "step": 16854 }, { "epoch": 2.751479531447696, "grad_norm": 1.9222733974456787, "learning_rate": 1.876077266565322e-05, "loss": 0.5139, "step": 16855 }, { "epoch": 2.7516427900902003, "grad_norm": 2.0479063987731934, "learning_rate": 1.8760619652751677e-05, "loss": 0.6918, "step": 16856 }, { "epoch": 2.7518060487327047, "grad_norm": 1.9014701843261719, "learning_rate": 1.876046663102819e-05, "loss": 0.6567, "step": 16857 }, { "epoch": 2.751969307375209, "grad_norm": 1.5004875659942627, "learning_rate": 1.8760313600482916e-05, "loss": 0.4818, "step": 16858 }, { "epoch": 2.7521325660177136, "grad_norm": 1.6105849742889404, "learning_rate": 1.8760160561116008e-05, "loss": 0.4964, "step": 16859 }, { "epoch": 2.752295824660218, "grad_norm": 1.8664335012435913, "learning_rate": 1.8760007512927624e-05, "loss": 0.5978, "step": 16860 }, { "epoch": 2.7524590833027225, "grad_norm": 1.8365596532821655, "learning_rate": 1.875985445591791e-05, "loss": 0.5759, "step": 16861 }, { "epoch": 2.7526223419452265, "grad_norm": 1.594326138496399, "learning_rate": 1.8759701390087026e-05, "loss": 0.47, "step": 16862 }, { "epoch": 2.752785600587731, "grad_norm": 1.9349603652954102, "learning_rate": 1.8759548315435124e-05, "loss": 0.6154, "step": 16863 }, { "epoch": 2.7529488592302354, "grad_norm": 1.6826462745666504, "learning_rate": 1.8759395231962366e-05, "loss": 0.5161, "step": 16864 }, { "epoch": 2.75311211787274, "grad_norm": 1.9525145292282104, "learning_rate": 1.8759242139668892e-05, "loss": 0.6068, "step": 16865 }, { "epoch": 2.7532753765152442, "grad_norm": 2.0448999404907227, "learning_rate": 1.8759089038554864e-05, "loss": 0.4898, "step": 16866 }, { "epoch": 2.7534386351577487, "grad_norm": 1.835203766822815, "learning_rate": 1.875893592862044e-05, "loss": 0.567, "step": 16867 }, { "epoch": 2.753601893800253, "grad_norm": 1.6607319116592407, "learning_rate": 1.8758782809865766e-05, "loss": 0.4964, "step": 16868 }, { "epoch": 2.7537651524427575, "grad_norm": 1.672486424446106, "learning_rate": 1.8758629682291002e-05, "loss": 0.5313, "step": 16869 }, { "epoch": 2.753928411085262, "grad_norm": 1.9060086011886597, "learning_rate": 1.87584765458963e-05, "loss": 0.6721, "step": 16870 }, { "epoch": 2.7540916697277664, "grad_norm": 1.7637985944747925, "learning_rate": 1.8758323400681813e-05, "loss": 0.5677, "step": 16871 }, { "epoch": 2.754254928370271, "grad_norm": 1.7966690063476562, "learning_rate": 1.87581702466477e-05, "loss": 0.5428, "step": 16872 }, { "epoch": 2.754418187012775, "grad_norm": 2.068167209625244, "learning_rate": 1.875801708379411e-05, "loss": 0.5976, "step": 16873 }, { "epoch": 2.7545814456552793, "grad_norm": 1.5054138898849487, "learning_rate": 1.8757863912121196e-05, "loss": 0.4965, "step": 16874 }, { "epoch": 2.7547447042977837, "grad_norm": 1.5841615200042725, "learning_rate": 1.875771073162912e-05, "loss": 0.4663, "step": 16875 }, { "epoch": 2.754907962940288, "grad_norm": 1.6457993984222412, "learning_rate": 1.875755754231803e-05, "loss": 0.5876, "step": 16876 }, { "epoch": 2.7550712215827926, "grad_norm": 1.9504085779190063, "learning_rate": 1.8757404344188083e-05, "loss": 0.5744, "step": 16877 }, { "epoch": 2.755234480225297, "grad_norm": 1.7165213823318481, "learning_rate": 1.8757251137239426e-05, "loss": 0.5207, "step": 16878 }, { "epoch": 2.755397738867801, "grad_norm": 1.7890019416809082, "learning_rate": 1.8757097921472228e-05, "loss": 0.6171, "step": 16879 }, { "epoch": 2.7555609975103055, "grad_norm": 2.007624626159668, "learning_rate": 1.875694469688663e-05, "loss": 0.6573, "step": 16880 }, { "epoch": 2.75572425615281, "grad_norm": 1.8155338764190674, "learning_rate": 1.875679146348279e-05, "loss": 0.5984, "step": 16881 }, { "epoch": 2.7558875147953144, "grad_norm": 1.9199455976486206, "learning_rate": 1.8756638221260866e-05, "loss": 0.6746, "step": 16882 }, { "epoch": 2.756050773437819, "grad_norm": 1.8992630243301392, "learning_rate": 1.875648497022101e-05, "loss": 0.6742, "step": 16883 }, { "epoch": 2.7562140320803232, "grad_norm": 1.6756820678710938, "learning_rate": 1.8756331710363375e-05, "loss": 0.5035, "step": 16884 }, { "epoch": 2.7563772907228277, "grad_norm": 1.7867639064788818, "learning_rate": 1.8756178441688117e-05, "loss": 0.555, "step": 16885 }, { "epoch": 2.756540549365332, "grad_norm": 1.5557234287261963, "learning_rate": 1.8756025164195385e-05, "loss": 0.5158, "step": 16886 }, { "epoch": 2.7567038080078365, "grad_norm": 1.9616869688034058, "learning_rate": 1.8755871877885345e-05, "loss": 0.6255, "step": 16887 }, { "epoch": 2.756867066650341, "grad_norm": 2.1582016944885254, "learning_rate": 1.875571858275814e-05, "loss": 0.7274, "step": 16888 }, { "epoch": 2.7570303252928454, "grad_norm": 1.8098737001419067, "learning_rate": 1.875556527881393e-05, "loss": 0.7092, "step": 16889 }, { "epoch": 2.7571935839353494, "grad_norm": 1.7838102579116821, "learning_rate": 1.8755411966052867e-05, "loss": 0.5013, "step": 16890 }, { "epoch": 2.757356842577854, "grad_norm": 1.5255036354064941, "learning_rate": 1.8755258644475106e-05, "loss": 0.5188, "step": 16891 }, { "epoch": 2.7575201012203583, "grad_norm": 1.5052714347839355, "learning_rate": 1.8755105314080803e-05, "loss": 0.4541, "step": 16892 }, { "epoch": 2.7576833598628627, "grad_norm": 2.1033787727355957, "learning_rate": 1.8754951974870113e-05, "loss": 0.698, "step": 16893 }, { "epoch": 2.757846618505367, "grad_norm": 1.7871756553649902, "learning_rate": 1.875479862684319e-05, "loss": 0.5934, "step": 16894 }, { "epoch": 2.7580098771478716, "grad_norm": 2.0921781063079834, "learning_rate": 1.875464527000018e-05, "loss": 0.6287, "step": 16895 }, { "epoch": 2.758173135790376, "grad_norm": 1.814234733581543, "learning_rate": 1.875449190434125e-05, "loss": 0.5955, "step": 16896 }, { "epoch": 2.75833639443288, "grad_norm": 1.800386905670166, "learning_rate": 1.8754338529866547e-05, "loss": 0.5795, "step": 16897 }, { "epoch": 2.7584996530753845, "grad_norm": 1.914287805557251, "learning_rate": 1.8754185146576226e-05, "loss": 0.5994, "step": 16898 }, { "epoch": 2.758662911717889, "grad_norm": 1.8128507137298584, "learning_rate": 1.8754031754470446e-05, "loss": 0.5569, "step": 16899 }, { "epoch": 2.7588261703603933, "grad_norm": 1.9445407390594482, "learning_rate": 1.8753878353549357e-05, "loss": 0.6656, "step": 16900 }, { "epoch": 2.758989429002898, "grad_norm": 1.5113492012023926, "learning_rate": 1.8753724943813118e-05, "loss": 0.4717, "step": 16901 }, { "epoch": 2.7591526876454022, "grad_norm": 1.8402085304260254, "learning_rate": 1.8753571525261875e-05, "loss": 0.5583, "step": 16902 }, { "epoch": 2.7593159462879067, "grad_norm": 1.9338228702545166, "learning_rate": 1.8753418097895794e-05, "loss": 0.6869, "step": 16903 }, { "epoch": 2.759479204930411, "grad_norm": 1.645857572555542, "learning_rate": 1.8753264661715017e-05, "loss": 0.59, "step": 16904 }, { "epoch": 2.7596424635729155, "grad_norm": 1.68617582321167, "learning_rate": 1.8753111216719707e-05, "loss": 0.6373, "step": 16905 }, { "epoch": 2.75980572221542, "grad_norm": 1.7602789402008057, "learning_rate": 1.8752957762910016e-05, "loss": 0.6192, "step": 16906 }, { "epoch": 2.7599689808579244, "grad_norm": 1.7922712564468384, "learning_rate": 1.87528043002861e-05, "loss": 0.6352, "step": 16907 }, { "epoch": 2.7601322395004284, "grad_norm": 2.0188441276550293, "learning_rate": 1.8752650828848113e-05, "loss": 0.7232, "step": 16908 }, { "epoch": 2.760295498142933, "grad_norm": 1.8489463329315186, "learning_rate": 1.8752497348596205e-05, "loss": 0.6326, "step": 16909 }, { "epoch": 2.7604587567854373, "grad_norm": 1.7247189283370972, "learning_rate": 1.875234385953054e-05, "loss": 0.5075, "step": 16910 }, { "epoch": 2.7606220154279417, "grad_norm": 2.0305428504943848, "learning_rate": 1.8752190361651263e-05, "loss": 0.6302, "step": 16911 }, { "epoch": 2.760785274070446, "grad_norm": 1.8630255460739136, "learning_rate": 1.8752036854958534e-05, "loss": 0.5643, "step": 16912 }, { "epoch": 2.7609485327129506, "grad_norm": 1.9256080389022827, "learning_rate": 1.875188333945251e-05, "loss": 0.6233, "step": 16913 }, { "epoch": 2.761111791355455, "grad_norm": 2.1069793701171875, "learning_rate": 1.8751729815133335e-05, "loss": 0.5763, "step": 16914 }, { "epoch": 2.761275049997959, "grad_norm": 1.69350266456604, "learning_rate": 1.8751576282001174e-05, "loss": 0.5775, "step": 16915 }, { "epoch": 2.7614383086404635, "grad_norm": 2.007568359375, "learning_rate": 1.8751422740056177e-05, "loss": 0.6677, "step": 16916 }, { "epoch": 2.761601567282968, "grad_norm": 1.5601775646209717, "learning_rate": 1.87512691892985e-05, "loss": 0.5225, "step": 16917 }, { "epoch": 2.7617648259254723, "grad_norm": 1.7741409540176392, "learning_rate": 1.8751115629728296e-05, "loss": 0.5656, "step": 16918 }, { "epoch": 2.761928084567977, "grad_norm": 1.7115882635116577, "learning_rate": 1.8750962061345724e-05, "loss": 0.552, "step": 16919 }, { "epoch": 2.762091343210481, "grad_norm": 1.8240242004394531, "learning_rate": 1.8750808484150936e-05, "loss": 0.514, "step": 16920 }, { "epoch": 2.7622546018529857, "grad_norm": 1.8704540729522705, "learning_rate": 1.875065489814408e-05, "loss": 0.6002, "step": 16921 }, { "epoch": 2.76241786049549, "grad_norm": 1.90280020236969, "learning_rate": 1.8750501303325323e-05, "loss": 0.5628, "step": 16922 }, { "epoch": 2.7625811191379945, "grad_norm": 1.679436206817627, "learning_rate": 1.875034769969481e-05, "loss": 0.5531, "step": 16923 }, { "epoch": 2.762744377780499, "grad_norm": 1.394688606262207, "learning_rate": 1.8750194087252704e-05, "loss": 0.4696, "step": 16924 }, { "epoch": 2.7629076364230034, "grad_norm": 1.52779221534729, "learning_rate": 1.8750040465999148e-05, "loss": 0.5005, "step": 16925 }, { "epoch": 2.7630708950655074, "grad_norm": 2.225602865219116, "learning_rate": 1.8749886835934308e-05, "loss": 0.6775, "step": 16926 }, { "epoch": 2.763234153708012, "grad_norm": 1.5751527547836304, "learning_rate": 1.8749733197058334e-05, "loss": 0.5114, "step": 16927 }, { "epoch": 2.7633974123505163, "grad_norm": 1.6829917430877686, "learning_rate": 1.874957954937138e-05, "loss": 0.6078, "step": 16928 }, { "epoch": 2.7635606709930207, "grad_norm": 1.8106110095977783, "learning_rate": 1.87494258928736e-05, "loss": 0.6205, "step": 16929 }, { "epoch": 2.763723929635525, "grad_norm": 1.9297690391540527, "learning_rate": 1.8749272227565153e-05, "loss": 0.6594, "step": 16930 }, { "epoch": 2.7638871882780296, "grad_norm": 1.6832027435302734, "learning_rate": 1.874911855344619e-05, "loss": 0.5847, "step": 16931 }, { "epoch": 2.7640504469205336, "grad_norm": 1.7857534885406494, "learning_rate": 1.874896487051687e-05, "loss": 0.5538, "step": 16932 }, { "epoch": 2.764213705563038, "grad_norm": 1.753980040550232, "learning_rate": 1.874881117877734e-05, "loss": 0.4916, "step": 16933 }, { "epoch": 2.7643769642055425, "grad_norm": 1.8775554895401, "learning_rate": 1.874865747822776e-05, "loss": 0.5821, "step": 16934 }, { "epoch": 2.764540222848047, "grad_norm": 2.005479574203491, "learning_rate": 1.8748503768868286e-05, "loss": 0.6104, "step": 16935 }, { "epoch": 2.7647034814905513, "grad_norm": 1.531949520111084, "learning_rate": 1.8748350050699074e-05, "loss": 0.5238, "step": 16936 }, { "epoch": 2.764866740133056, "grad_norm": 1.9429813623428345, "learning_rate": 1.8748196323720272e-05, "loss": 0.6242, "step": 16937 }, { "epoch": 2.76502999877556, "grad_norm": 2.273709297180176, "learning_rate": 1.874804258793204e-05, "loss": 0.5909, "step": 16938 }, { "epoch": 2.7651932574180647, "grad_norm": 1.535709023475647, "learning_rate": 1.8747888843334528e-05, "loss": 0.4223, "step": 16939 }, { "epoch": 2.765356516060569, "grad_norm": 1.8722307682037354, "learning_rate": 1.8747735089927898e-05, "loss": 0.5313, "step": 16940 }, { "epoch": 2.7655197747030735, "grad_norm": 1.79629385471344, "learning_rate": 1.8747581327712302e-05, "loss": 0.5732, "step": 16941 }, { "epoch": 2.765683033345578, "grad_norm": 1.6924244165420532, "learning_rate": 1.8747427556687888e-05, "loss": 0.6017, "step": 16942 }, { "epoch": 2.765846291988082, "grad_norm": 1.3711211681365967, "learning_rate": 1.8747273776854823e-05, "loss": 0.4404, "step": 16943 }, { "epoch": 2.7660095506305864, "grad_norm": 1.361436367034912, "learning_rate": 1.8747119988213252e-05, "loss": 0.4441, "step": 16944 }, { "epoch": 2.766172809273091, "grad_norm": 2.1270649433135986, "learning_rate": 1.8746966190763335e-05, "loss": 0.6017, "step": 16945 }, { "epoch": 2.7663360679155953, "grad_norm": 1.7463562488555908, "learning_rate": 1.8746812384505227e-05, "loss": 0.6078, "step": 16946 }, { "epoch": 2.7664993265580997, "grad_norm": 1.4082316160202026, "learning_rate": 1.874665856943908e-05, "loss": 0.442, "step": 16947 }, { "epoch": 2.766662585200604, "grad_norm": 1.8391592502593994, "learning_rate": 1.874650474556505e-05, "loss": 0.6308, "step": 16948 }, { "epoch": 2.7668258438431086, "grad_norm": 1.8903462886810303, "learning_rate": 1.874635091288329e-05, "loss": 0.6096, "step": 16949 }, { "epoch": 2.7669891024856126, "grad_norm": 1.5629572868347168, "learning_rate": 1.874619707139396e-05, "loss": 0.5136, "step": 16950 }, { "epoch": 2.767152361128117, "grad_norm": 1.8885281085968018, "learning_rate": 1.874604322109721e-05, "loss": 0.6094, "step": 16951 }, { "epoch": 2.7673156197706215, "grad_norm": 1.883802056312561, "learning_rate": 1.8745889361993197e-05, "loss": 0.6684, "step": 16952 }, { "epoch": 2.767478878413126, "grad_norm": 1.8270710706710815, "learning_rate": 1.874573549408208e-05, "loss": 0.5847, "step": 16953 }, { "epoch": 2.7676421370556303, "grad_norm": 1.7092247009277344, "learning_rate": 1.8745581617364003e-05, "loss": 0.5272, "step": 16954 }, { "epoch": 2.7678053956981348, "grad_norm": 1.7039164304733276, "learning_rate": 1.874542773183913e-05, "loss": 0.5858, "step": 16955 }, { "epoch": 2.767968654340639, "grad_norm": 2.011967182159424, "learning_rate": 1.8745273837507612e-05, "loss": 0.6049, "step": 16956 }, { "epoch": 2.7681319129831436, "grad_norm": 1.7507271766662598, "learning_rate": 1.874511993436961e-05, "loss": 0.6013, "step": 16957 }, { "epoch": 2.768295171625648, "grad_norm": 1.7641527652740479, "learning_rate": 1.874496602242527e-05, "loss": 0.6079, "step": 16958 }, { "epoch": 2.7684584302681525, "grad_norm": 1.7927541732788086, "learning_rate": 1.8744812101674755e-05, "loss": 0.465, "step": 16959 }, { "epoch": 2.768621688910657, "grad_norm": 1.8434326648712158, "learning_rate": 1.8744658172118215e-05, "loss": 0.5949, "step": 16960 }, { "epoch": 2.768784947553161, "grad_norm": 1.7073403596878052, "learning_rate": 1.874450423375581e-05, "loss": 0.5581, "step": 16961 }, { "epoch": 2.7689482061956654, "grad_norm": 2.008556842803955, "learning_rate": 1.8744350286587685e-05, "loss": 0.6232, "step": 16962 }, { "epoch": 2.76911146483817, "grad_norm": 2.1378302574157715, "learning_rate": 1.8744196330614003e-05, "loss": 0.7641, "step": 16963 }, { "epoch": 2.7692747234806743, "grad_norm": 1.867311716079712, "learning_rate": 1.8744042365834923e-05, "loss": 0.6203, "step": 16964 }, { "epoch": 2.7694379821231787, "grad_norm": 1.5785242319107056, "learning_rate": 1.874388839225059e-05, "loss": 0.556, "step": 16965 }, { "epoch": 2.769601240765683, "grad_norm": 1.8768928050994873, "learning_rate": 1.8743734409861165e-05, "loss": 0.6068, "step": 16966 }, { "epoch": 2.7697644994081876, "grad_norm": 1.7226799726486206, "learning_rate": 1.87435804186668e-05, "loss": 0.5626, "step": 16967 }, { "epoch": 2.7699277580506916, "grad_norm": 1.5295146703720093, "learning_rate": 1.8743426418667652e-05, "loss": 0.5435, "step": 16968 }, { "epoch": 2.770091016693196, "grad_norm": 1.7757264375686646, "learning_rate": 1.874327240986388e-05, "loss": 0.5592, "step": 16969 }, { "epoch": 2.7702542753357005, "grad_norm": 1.5258785486221313, "learning_rate": 1.874311839225563e-05, "loss": 0.5153, "step": 16970 }, { "epoch": 2.770417533978205, "grad_norm": 1.4512251615524292, "learning_rate": 1.8742964365843066e-05, "loss": 0.4998, "step": 16971 }, { "epoch": 2.7705807926207093, "grad_norm": 1.7135977745056152, "learning_rate": 1.8742810330626338e-05, "loss": 0.6472, "step": 16972 }, { "epoch": 2.7707440512632138, "grad_norm": 1.8380926847457886, "learning_rate": 1.87426562866056e-05, "loss": 0.6191, "step": 16973 }, { "epoch": 2.770907309905718, "grad_norm": 1.5992674827575684, "learning_rate": 1.8742502233781013e-05, "loss": 0.5958, "step": 16974 }, { "epoch": 2.7710705685482226, "grad_norm": 2.077838659286499, "learning_rate": 1.8742348172152728e-05, "loss": 0.5966, "step": 16975 }, { "epoch": 2.771233827190727, "grad_norm": 1.5644733905792236, "learning_rate": 1.87421941017209e-05, "loss": 0.5105, "step": 16976 }, { "epoch": 2.7713970858332315, "grad_norm": 1.7718591690063477, "learning_rate": 1.8742040022485683e-05, "loss": 0.5661, "step": 16977 }, { "epoch": 2.771560344475736, "grad_norm": 2.096832513809204, "learning_rate": 1.8741885934447235e-05, "loss": 0.7577, "step": 16978 }, { "epoch": 2.77172360311824, "grad_norm": 1.4819700717926025, "learning_rate": 1.8741731837605713e-05, "loss": 0.5233, "step": 16979 }, { "epoch": 2.7718868617607444, "grad_norm": 1.7022075653076172, "learning_rate": 1.8741577731961266e-05, "loss": 0.5784, "step": 16980 }, { "epoch": 2.772050120403249, "grad_norm": 1.6016932725906372, "learning_rate": 1.8741423617514055e-05, "loss": 0.5713, "step": 16981 }, { "epoch": 2.7722133790457533, "grad_norm": 1.6289968490600586, "learning_rate": 1.8741269494264228e-05, "loss": 0.5279, "step": 16982 }, { "epoch": 2.7723766376882577, "grad_norm": 1.9414409399032593, "learning_rate": 1.874111536221195e-05, "loss": 0.6329, "step": 16983 }, { "epoch": 2.772539896330762, "grad_norm": 1.9337849617004395, "learning_rate": 1.874096122135737e-05, "loss": 0.6237, "step": 16984 }, { "epoch": 2.772703154973266, "grad_norm": 1.6559940576553345, "learning_rate": 1.8740807071700642e-05, "loss": 0.6515, "step": 16985 }, { "epoch": 2.7728664136157706, "grad_norm": 2.0726354122161865, "learning_rate": 1.874065291324193e-05, "loss": 0.7788, "step": 16986 }, { "epoch": 2.773029672258275, "grad_norm": 1.705725073814392, "learning_rate": 1.8740498745981374e-05, "loss": 0.4955, "step": 16987 }, { "epoch": 2.7731929309007795, "grad_norm": 1.7953778505325317, "learning_rate": 1.8740344569919144e-05, "loss": 0.5691, "step": 16988 }, { "epoch": 2.773356189543284, "grad_norm": 1.7822595834732056, "learning_rate": 1.8740190385055384e-05, "loss": 0.628, "step": 16989 }, { "epoch": 2.7735194481857883, "grad_norm": 1.4764539003372192, "learning_rate": 1.874003619139026e-05, "loss": 0.5521, "step": 16990 }, { "epoch": 2.7736827068282928, "grad_norm": 1.725106120109558, "learning_rate": 1.8739881988923923e-05, "loss": 0.6145, "step": 16991 }, { "epoch": 2.773845965470797, "grad_norm": 1.8168293237686157, "learning_rate": 1.873972777765652e-05, "loss": 0.5614, "step": 16992 }, { "epoch": 2.7740092241133016, "grad_norm": 1.907143473625183, "learning_rate": 1.873957355758822e-05, "loss": 0.5186, "step": 16993 }, { "epoch": 2.774172482755806, "grad_norm": 1.8508812189102173, "learning_rate": 1.873941932871917e-05, "loss": 0.5601, "step": 16994 }, { "epoch": 2.7743357413983105, "grad_norm": 1.9566173553466797, "learning_rate": 1.8739265091049526e-05, "loss": 0.5614, "step": 16995 }, { "epoch": 2.7744990000408145, "grad_norm": 1.7837294340133667, "learning_rate": 1.8739110844579444e-05, "loss": 0.6121, "step": 16996 }, { "epoch": 2.774662258683319, "grad_norm": 1.5795108079910278, "learning_rate": 1.8738956589309083e-05, "loss": 0.5244, "step": 16997 }, { "epoch": 2.7748255173258234, "grad_norm": 1.720319390296936, "learning_rate": 1.873880232523859e-05, "loss": 0.5599, "step": 16998 }, { "epoch": 2.774988775968328, "grad_norm": 1.6609724760055542, "learning_rate": 1.873864805236813e-05, "loss": 0.5471, "step": 16999 }, { "epoch": 2.7751520346108323, "grad_norm": 1.7408483028411865, "learning_rate": 1.873849377069785e-05, "loss": 0.4542, "step": 17000 }, { "epoch": 2.7753152932533367, "grad_norm": 1.9549657106399536, "learning_rate": 1.8738339480227914e-05, "loss": 0.5408, "step": 17001 }, { "epoch": 2.775478551895841, "grad_norm": 2.4929585456848145, "learning_rate": 1.8738185180958467e-05, "loss": 0.6834, "step": 17002 }, { "epoch": 2.775641810538345, "grad_norm": 1.3876250982284546, "learning_rate": 1.8738030872889673e-05, "loss": 0.4875, "step": 17003 }, { "epoch": 2.7758050691808496, "grad_norm": 1.7589631080627441, "learning_rate": 1.8737876556021683e-05, "loss": 0.6145, "step": 17004 }, { "epoch": 2.775968327823354, "grad_norm": 1.6112018823623657, "learning_rate": 1.8737722230354654e-05, "loss": 0.5336, "step": 17005 }, { "epoch": 2.7761315864658584, "grad_norm": 2.0593907833099365, "learning_rate": 1.8737567895888742e-05, "loss": 0.6309, "step": 17006 }, { "epoch": 2.776294845108363, "grad_norm": 2.0985512733459473, "learning_rate": 1.87374135526241e-05, "loss": 0.7184, "step": 17007 }, { "epoch": 2.7764581037508673, "grad_norm": 1.8007304668426514, "learning_rate": 1.8737259200560884e-05, "loss": 0.5451, "step": 17008 }, { "epoch": 2.7766213623933718, "grad_norm": 1.6653670072555542, "learning_rate": 1.8737104839699253e-05, "loss": 0.5857, "step": 17009 }, { "epoch": 2.776784621035876, "grad_norm": 1.586082100868225, "learning_rate": 1.8736950470039355e-05, "loss": 0.5328, "step": 17010 }, { "epoch": 2.7769478796783806, "grad_norm": 1.762214183807373, "learning_rate": 1.8736796091581357e-05, "loss": 0.5489, "step": 17011 }, { "epoch": 2.777111138320885, "grad_norm": 1.9369316101074219, "learning_rate": 1.8736641704325404e-05, "loss": 0.6414, "step": 17012 }, { "epoch": 2.7772743969633895, "grad_norm": 2.3751633167266846, "learning_rate": 1.8736487308271655e-05, "loss": 0.7247, "step": 17013 }, { "epoch": 2.7774376556058935, "grad_norm": 1.6939313411712646, "learning_rate": 1.8736332903420266e-05, "loss": 0.5603, "step": 17014 }, { "epoch": 2.777600914248398, "grad_norm": 1.9070448875427246, "learning_rate": 1.8736178489771393e-05, "loss": 0.5707, "step": 17015 }, { "epoch": 2.7777641728909024, "grad_norm": 1.7823611497879028, "learning_rate": 1.8736024067325188e-05, "loss": 0.5066, "step": 17016 }, { "epoch": 2.777927431533407, "grad_norm": 1.689481496810913, "learning_rate": 1.873586963608181e-05, "loss": 0.5182, "step": 17017 }, { "epoch": 2.7780906901759113, "grad_norm": 1.8869339227676392, "learning_rate": 1.8735715196041414e-05, "loss": 0.6062, "step": 17018 }, { "epoch": 2.7782539488184157, "grad_norm": 1.6360747814178467, "learning_rate": 1.8735560747204156e-05, "loss": 0.4913, "step": 17019 }, { "epoch": 2.7784172074609197, "grad_norm": 1.7881152629852295, "learning_rate": 1.8735406289570193e-05, "loss": 0.6256, "step": 17020 }, { "epoch": 2.778580466103424, "grad_norm": 1.7792365550994873, "learning_rate": 1.8735251823139672e-05, "loss": 0.5059, "step": 17021 }, { "epoch": 2.7787437247459286, "grad_norm": 2.0360183715820312, "learning_rate": 1.8735097347912757e-05, "loss": 0.703, "step": 17022 }, { "epoch": 2.778906983388433, "grad_norm": 1.7216336727142334, "learning_rate": 1.8734942863889605e-05, "loss": 0.5431, "step": 17023 }, { "epoch": 2.7790702420309374, "grad_norm": 1.6142094135284424, "learning_rate": 1.8734788371070365e-05, "loss": 0.5498, "step": 17024 }, { "epoch": 2.779233500673442, "grad_norm": 1.9985442161560059, "learning_rate": 1.8734633869455196e-05, "loss": 0.6746, "step": 17025 }, { "epoch": 2.7793967593159463, "grad_norm": 1.6376078128814697, "learning_rate": 1.8734479359044253e-05, "loss": 0.5469, "step": 17026 }, { "epoch": 2.7795600179584508, "grad_norm": 1.4445065259933472, "learning_rate": 1.873432483983769e-05, "loss": 0.4502, "step": 17027 }, { "epoch": 2.779723276600955, "grad_norm": 1.7250950336456299, "learning_rate": 1.8734170311835663e-05, "loss": 0.5663, "step": 17028 }, { "epoch": 2.7798865352434596, "grad_norm": 1.860705018043518, "learning_rate": 1.8734015775038333e-05, "loss": 0.577, "step": 17029 }, { "epoch": 2.780049793885964, "grad_norm": 2.1266579627990723, "learning_rate": 1.873386122944585e-05, "loss": 0.5481, "step": 17030 }, { "epoch": 2.780213052528468, "grad_norm": 1.8674778938293457, "learning_rate": 1.873370667505837e-05, "loss": 0.5041, "step": 17031 }, { "epoch": 2.7803763111709725, "grad_norm": 1.5213466882705688, "learning_rate": 1.873355211187605e-05, "loss": 0.5444, "step": 17032 }, { "epoch": 2.780539569813477, "grad_norm": 1.6364349126815796, "learning_rate": 1.8733397539899046e-05, "loss": 0.5316, "step": 17033 }, { "epoch": 2.7807028284559814, "grad_norm": 1.8161762952804565, "learning_rate": 1.8733242959127512e-05, "loss": 0.6229, "step": 17034 }, { "epoch": 2.780866087098486, "grad_norm": 1.9668946266174316, "learning_rate": 1.8733088369561606e-05, "loss": 0.489, "step": 17035 }, { "epoch": 2.7810293457409903, "grad_norm": 1.5849436521530151, "learning_rate": 1.873293377120148e-05, "loss": 0.4852, "step": 17036 }, { "epoch": 2.7811926043834947, "grad_norm": 2.0606863498687744, "learning_rate": 1.8732779164047294e-05, "loss": 0.5733, "step": 17037 }, { "epoch": 2.7813558630259987, "grad_norm": 1.7260112762451172, "learning_rate": 1.8732624548099204e-05, "loss": 0.563, "step": 17038 }, { "epoch": 2.781519121668503, "grad_norm": 1.7873455286026, "learning_rate": 1.873246992335736e-05, "loss": 0.5471, "step": 17039 }, { "epoch": 2.7816823803110076, "grad_norm": 1.614669919013977, "learning_rate": 1.873231528982192e-05, "loss": 0.4987, "step": 17040 }, { "epoch": 2.781845638953512, "grad_norm": 1.6999400854110718, "learning_rate": 1.8732160647493046e-05, "loss": 0.5705, "step": 17041 }, { "epoch": 2.7820088975960164, "grad_norm": 1.3909984827041626, "learning_rate": 1.8732005996370883e-05, "loss": 0.4141, "step": 17042 }, { "epoch": 2.782172156238521, "grad_norm": 1.9530187845230103, "learning_rate": 1.8731851336455597e-05, "loss": 0.6404, "step": 17043 }, { "epoch": 2.7823354148810253, "grad_norm": 1.9728834629058838, "learning_rate": 1.8731696667747336e-05, "loss": 0.5424, "step": 17044 }, { "epoch": 2.7824986735235298, "grad_norm": 2.137962579727173, "learning_rate": 1.8731541990246256e-05, "loss": 0.701, "step": 17045 }, { "epoch": 2.782661932166034, "grad_norm": 1.5402780771255493, "learning_rate": 1.873138730395252e-05, "loss": 0.4745, "step": 17046 }, { "epoch": 2.7828251908085386, "grad_norm": 1.7344380617141724, "learning_rate": 1.873123260886628e-05, "loss": 0.5688, "step": 17047 }, { "epoch": 2.782988449451043, "grad_norm": 2.059652328491211, "learning_rate": 1.8731077904987688e-05, "loss": 0.6012, "step": 17048 }, { "epoch": 2.783151708093547, "grad_norm": 2.09590744972229, "learning_rate": 1.8730923192316903e-05, "loss": 0.6186, "step": 17049 }, { "epoch": 2.7833149667360515, "grad_norm": 1.9133718013763428, "learning_rate": 1.8730768470854085e-05, "loss": 0.5351, "step": 17050 }, { "epoch": 2.783478225378556, "grad_norm": 1.7149291038513184, "learning_rate": 1.873061374059938e-05, "loss": 0.5734, "step": 17051 }, { "epoch": 2.7836414840210604, "grad_norm": 2.2559893131256104, "learning_rate": 1.873045900155295e-05, "loss": 0.7068, "step": 17052 }, { "epoch": 2.783804742663565, "grad_norm": 1.9016553163528442, "learning_rate": 1.8730304253714954e-05, "loss": 0.6271, "step": 17053 }, { "epoch": 2.7839680013060693, "grad_norm": 1.6053084135055542, "learning_rate": 1.873014949708554e-05, "loss": 0.5697, "step": 17054 }, { "epoch": 2.7841312599485737, "grad_norm": 1.8142805099487305, "learning_rate": 1.872999473166487e-05, "loss": 0.532, "step": 17055 }, { "epoch": 2.7842945185910777, "grad_norm": 1.6667978763580322, "learning_rate": 1.87298399574531e-05, "loss": 0.574, "step": 17056 }, { "epoch": 2.784457777233582, "grad_norm": 1.54232656955719, "learning_rate": 1.872968517445038e-05, "loss": 0.5214, "step": 17057 }, { "epoch": 2.7846210358760866, "grad_norm": 1.6067293882369995, "learning_rate": 1.8729530382656868e-05, "loss": 0.5584, "step": 17058 }, { "epoch": 2.784784294518591, "grad_norm": 1.6169523000717163, "learning_rate": 1.8729375582072722e-05, "loss": 0.5651, "step": 17059 }, { "epoch": 2.7849475531610954, "grad_norm": 1.9359793663024902, "learning_rate": 1.8729220772698096e-05, "loss": 0.4096, "step": 17060 }, { "epoch": 2.7851108118036, "grad_norm": 1.6039377450942993, "learning_rate": 1.8729065954533152e-05, "loss": 0.502, "step": 17061 }, { "epoch": 2.7852740704461043, "grad_norm": 1.6492377519607544, "learning_rate": 1.872891112757804e-05, "loss": 0.5062, "step": 17062 }, { "epoch": 2.7854373290886087, "grad_norm": 1.6289323568344116, "learning_rate": 1.8728756291832912e-05, "loss": 0.5624, "step": 17063 }, { "epoch": 2.785600587731113, "grad_norm": 1.7960495948791504, "learning_rate": 1.872860144729793e-05, "loss": 0.6016, "step": 17064 }, { "epoch": 2.7857638463736176, "grad_norm": 1.5653235912322998, "learning_rate": 1.872844659397325e-05, "loss": 0.4977, "step": 17065 }, { "epoch": 2.785927105016122, "grad_norm": 1.8588969707489014, "learning_rate": 1.8728291731859028e-05, "loss": 0.6093, "step": 17066 }, { "epoch": 2.786090363658626, "grad_norm": 1.6995714902877808, "learning_rate": 1.8728136860955417e-05, "loss": 0.5664, "step": 17067 }, { "epoch": 2.7862536223011305, "grad_norm": 2.384856700897217, "learning_rate": 1.8727981981262576e-05, "loss": 0.9222, "step": 17068 }, { "epoch": 2.786416880943635, "grad_norm": 2.0632901191711426, "learning_rate": 1.8727827092780656e-05, "loss": 0.5646, "step": 17069 }, { "epoch": 2.7865801395861394, "grad_norm": 1.7999776601791382, "learning_rate": 1.872767219550982e-05, "loss": 0.5127, "step": 17070 }, { "epoch": 2.786743398228644, "grad_norm": 1.620798110961914, "learning_rate": 1.872751728945022e-05, "loss": 0.6278, "step": 17071 }, { "epoch": 2.7869066568711482, "grad_norm": 1.822139024734497, "learning_rate": 1.872736237460201e-05, "loss": 0.5738, "step": 17072 }, { "epoch": 2.7870699155136522, "grad_norm": 1.8186088800430298, "learning_rate": 1.872720745096535e-05, "loss": 0.5764, "step": 17073 }, { "epoch": 2.7872331741561567, "grad_norm": 1.8539931774139404, "learning_rate": 1.8727052518540395e-05, "loss": 0.6174, "step": 17074 }, { "epoch": 2.787396432798661, "grad_norm": 1.7007077932357788, "learning_rate": 1.87268975773273e-05, "loss": 0.5026, "step": 17075 }, { "epoch": 2.7875596914411656, "grad_norm": 1.686411738395691, "learning_rate": 1.872674262732622e-05, "loss": 0.5571, "step": 17076 }, { "epoch": 2.78772295008367, "grad_norm": 1.556025505065918, "learning_rate": 1.8726587668537314e-05, "loss": 0.5077, "step": 17077 }, { "epoch": 2.7878862087261744, "grad_norm": 1.6237934827804565, "learning_rate": 1.8726432700960738e-05, "loss": 0.5509, "step": 17078 }, { "epoch": 2.788049467368679, "grad_norm": 1.8861494064331055, "learning_rate": 1.8726277724596643e-05, "loss": 0.5755, "step": 17079 }, { "epoch": 2.7882127260111833, "grad_norm": 1.9063094854354858, "learning_rate": 1.872612273944519e-05, "loss": 0.5177, "step": 17080 }, { "epoch": 2.7883759846536877, "grad_norm": 1.7446725368499756, "learning_rate": 1.8725967745506538e-05, "loss": 0.5418, "step": 17081 }, { "epoch": 2.788539243296192, "grad_norm": 2.0573136806488037, "learning_rate": 1.8725812742780832e-05, "loss": 0.6485, "step": 17082 }, { "epoch": 2.7887025019386966, "grad_norm": 1.8344477415084839, "learning_rate": 1.8725657731268238e-05, "loss": 0.4873, "step": 17083 }, { "epoch": 2.7888657605812006, "grad_norm": 1.7149672508239746, "learning_rate": 1.872550271096891e-05, "loss": 0.5654, "step": 17084 }, { "epoch": 2.789029019223705, "grad_norm": 1.780392050743103, "learning_rate": 1.8725347681883e-05, "loss": 0.5549, "step": 17085 }, { "epoch": 2.7891922778662095, "grad_norm": 1.728710412979126, "learning_rate": 1.872519264401067e-05, "loss": 0.5184, "step": 17086 }, { "epoch": 2.789355536508714, "grad_norm": 1.779109239578247, "learning_rate": 1.8725037597352075e-05, "loss": 0.5754, "step": 17087 }, { "epoch": 2.7895187951512184, "grad_norm": 1.7444349527359009, "learning_rate": 1.8724882541907365e-05, "loss": 0.6128, "step": 17088 }, { "epoch": 2.789682053793723, "grad_norm": 1.7429448366165161, "learning_rate": 1.8724727477676703e-05, "loss": 0.5194, "step": 17089 }, { "epoch": 2.7898453124362272, "grad_norm": 1.955499291419983, "learning_rate": 1.8724572404660243e-05, "loss": 0.5169, "step": 17090 }, { "epoch": 2.7900085710787312, "grad_norm": 1.8044217824935913, "learning_rate": 1.8724417322858137e-05, "loss": 0.6319, "step": 17091 }, { "epoch": 2.7901718297212357, "grad_norm": 2.0699164867401123, "learning_rate": 1.872426223227055e-05, "loss": 0.5836, "step": 17092 }, { "epoch": 2.79033508836374, "grad_norm": 1.8543236255645752, "learning_rate": 1.872410713289763e-05, "loss": 0.6447, "step": 17093 }, { "epoch": 2.7904983470062446, "grad_norm": 1.9564565420150757, "learning_rate": 1.8723952024739536e-05, "loss": 0.5402, "step": 17094 }, { "epoch": 2.790661605648749, "grad_norm": 1.7094992399215698, "learning_rate": 1.872379690779643e-05, "loss": 0.6222, "step": 17095 }, { "epoch": 2.7908248642912534, "grad_norm": 1.8307757377624512, "learning_rate": 1.872364178206846e-05, "loss": 0.5281, "step": 17096 }, { "epoch": 2.790988122933758, "grad_norm": 1.814497709274292, "learning_rate": 1.8723486647555783e-05, "loss": 0.6575, "step": 17097 }, { "epoch": 2.7911513815762623, "grad_norm": 2.0067806243896484, "learning_rate": 1.8723331504258557e-05, "loss": 0.6343, "step": 17098 }, { "epoch": 2.7913146402187667, "grad_norm": 2.057681083679199, "learning_rate": 1.8723176352176945e-05, "loss": 0.5795, "step": 17099 }, { "epoch": 2.791477898861271, "grad_norm": 1.8357090950012207, "learning_rate": 1.872302119131109e-05, "loss": 0.6084, "step": 17100 }, { "epoch": 2.7916411575037756, "grad_norm": 1.8608717918395996, "learning_rate": 1.8722866021661157e-05, "loss": 0.5463, "step": 17101 }, { "epoch": 2.7918044161462796, "grad_norm": 2.0890164375305176, "learning_rate": 1.87227108432273e-05, "loss": 0.7032, "step": 17102 }, { "epoch": 2.791967674788784, "grad_norm": 1.7082842588424683, "learning_rate": 1.8722555656009677e-05, "loss": 0.5671, "step": 17103 }, { "epoch": 2.7921309334312885, "grad_norm": 2.0605380535125732, "learning_rate": 1.8722400460008437e-05, "loss": 0.7509, "step": 17104 }, { "epoch": 2.792294192073793, "grad_norm": 1.736609697341919, "learning_rate": 1.8722245255223748e-05, "loss": 0.5872, "step": 17105 }, { "epoch": 2.7924574507162974, "grad_norm": 1.7840811014175415, "learning_rate": 1.872209004165576e-05, "loss": 0.6096, "step": 17106 }, { "epoch": 2.792620709358802, "grad_norm": 2.0116987228393555, "learning_rate": 1.8721934819304627e-05, "loss": 0.7399, "step": 17107 }, { "epoch": 2.792783968001306, "grad_norm": 1.9538835287094116, "learning_rate": 1.872177958817051e-05, "loss": 0.6343, "step": 17108 }, { "epoch": 2.7929472266438102, "grad_norm": 1.5346919298171997, "learning_rate": 1.872162434825356e-05, "loss": 0.4795, "step": 17109 }, { "epoch": 2.7931104852863147, "grad_norm": 1.6239686012268066, "learning_rate": 1.8721469099553943e-05, "loss": 0.4918, "step": 17110 }, { "epoch": 2.793273743928819, "grad_norm": 1.5280585289001465, "learning_rate": 1.8721313842071803e-05, "loss": 0.574, "step": 17111 }, { "epoch": 2.7934370025713235, "grad_norm": 1.464654803276062, "learning_rate": 1.8721158575807307e-05, "loss": 0.5057, "step": 17112 }, { "epoch": 2.793600261213828, "grad_norm": 1.8277825117111206, "learning_rate": 1.87210033007606e-05, "loss": 0.5502, "step": 17113 }, { "epoch": 2.7937635198563324, "grad_norm": 1.8506476879119873, "learning_rate": 1.872084801693185e-05, "loss": 0.618, "step": 17114 }, { "epoch": 2.793926778498837, "grad_norm": 1.645798921585083, "learning_rate": 1.8720692724321207e-05, "loss": 0.6144, "step": 17115 }, { "epoch": 2.7940900371413413, "grad_norm": 1.7858378887176514, "learning_rate": 1.8720537422928832e-05, "loss": 0.6424, "step": 17116 }, { "epoch": 2.7942532957838457, "grad_norm": 1.481689453125, "learning_rate": 1.8720382112754873e-05, "loss": 0.4384, "step": 17117 }, { "epoch": 2.79441655442635, "grad_norm": 1.6285743713378906, "learning_rate": 1.8720226793799493e-05, "loss": 0.4709, "step": 17118 }, { "epoch": 2.794579813068854, "grad_norm": 1.6571630239486694, "learning_rate": 1.8720071466062852e-05, "loss": 0.5534, "step": 17119 }, { "epoch": 2.7947430717113586, "grad_norm": 1.4141291379928589, "learning_rate": 1.8719916129545094e-05, "loss": 0.447, "step": 17120 }, { "epoch": 2.794906330353863, "grad_norm": 1.3394076824188232, "learning_rate": 1.8719760784246387e-05, "loss": 0.4554, "step": 17121 }, { "epoch": 2.7950695889963675, "grad_norm": 1.8764984607696533, "learning_rate": 1.8719605430166878e-05, "loss": 0.6091, "step": 17122 }, { "epoch": 2.795232847638872, "grad_norm": 2.231739044189453, "learning_rate": 1.8719450067306735e-05, "loss": 0.7727, "step": 17123 }, { "epoch": 2.7953961062813764, "grad_norm": 1.5209869146347046, "learning_rate": 1.8719294695666105e-05, "loss": 0.5474, "step": 17124 }, { "epoch": 2.795559364923881, "grad_norm": 1.4742927551269531, "learning_rate": 1.8719139315245146e-05, "loss": 0.5118, "step": 17125 }, { "epoch": 2.795722623566385, "grad_norm": 1.7178586721420288, "learning_rate": 1.871898392604402e-05, "loss": 0.4972, "step": 17126 }, { "epoch": 2.7958858822088892, "grad_norm": 1.903619647026062, "learning_rate": 1.8718828528062878e-05, "loss": 0.5818, "step": 17127 }, { "epoch": 2.7960491408513937, "grad_norm": 1.9767827987670898, "learning_rate": 1.871867312130188e-05, "loss": 0.6215, "step": 17128 }, { "epoch": 2.796212399493898, "grad_norm": 1.5967093706130981, "learning_rate": 1.8718517705761177e-05, "loss": 0.5628, "step": 17129 }, { "epoch": 2.7963756581364025, "grad_norm": 1.785354733467102, "learning_rate": 1.871836228144093e-05, "loss": 0.5652, "step": 17130 }, { "epoch": 2.796538916778907, "grad_norm": 1.7153159379959106, "learning_rate": 1.8718206848341298e-05, "loss": 0.5984, "step": 17131 }, { "epoch": 2.7967021754214114, "grad_norm": 1.9811500310897827, "learning_rate": 1.8718051406462426e-05, "loss": 0.5033, "step": 17132 }, { "epoch": 2.796865434063916, "grad_norm": 1.675065279006958, "learning_rate": 1.8717895955804482e-05, "loss": 0.555, "step": 17133 }, { "epoch": 2.7970286927064203, "grad_norm": 1.811787486076355, "learning_rate": 1.8717740496367624e-05, "loss": 0.5971, "step": 17134 }, { "epoch": 2.7971919513489247, "grad_norm": 1.789440631866455, "learning_rate": 1.8717585028152e-05, "loss": 0.5585, "step": 17135 }, { "epoch": 2.797355209991429, "grad_norm": 1.4940167665481567, "learning_rate": 1.871742955115777e-05, "loss": 0.4836, "step": 17136 }, { "epoch": 2.797518468633933, "grad_norm": 1.6896947622299194, "learning_rate": 1.8717274065385092e-05, "loss": 0.5759, "step": 17137 }, { "epoch": 2.7976817272764376, "grad_norm": 1.5029852390289307, "learning_rate": 1.8717118570834118e-05, "loss": 0.5062, "step": 17138 }, { "epoch": 2.797844985918942, "grad_norm": 1.517225980758667, "learning_rate": 1.8716963067505012e-05, "loss": 0.4995, "step": 17139 }, { "epoch": 2.7980082445614465, "grad_norm": 1.822387456893921, "learning_rate": 1.8716807555397924e-05, "loss": 0.7399, "step": 17140 }, { "epoch": 2.798171503203951, "grad_norm": 1.838546872138977, "learning_rate": 1.8716652034513013e-05, "loss": 0.5645, "step": 17141 }, { "epoch": 2.7983347618464554, "grad_norm": 1.6739639043807983, "learning_rate": 1.8716496504850436e-05, "loss": 0.5677, "step": 17142 }, { "epoch": 2.79849802048896, "grad_norm": 1.655452013015747, "learning_rate": 1.871634096641035e-05, "loss": 0.5197, "step": 17143 }, { "epoch": 2.798661279131464, "grad_norm": 2.1170830726623535, "learning_rate": 1.871618541919291e-05, "loss": 0.7637, "step": 17144 }, { "epoch": 2.7988245377739682, "grad_norm": 1.7444794178009033, "learning_rate": 1.8716029863198274e-05, "loss": 0.5315, "step": 17145 }, { "epoch": 2.7989877964164727, "grad_norm": 1.647229552268982, "learning_rate": 1.87158742984266e-05, "loss": 0.455, "step": 17146 }, { "epoch": 2.799151055058977, "grad_norm": 1.9116860628128052, "learning_rate": 1.8715718724878044e-05, "loss": 0.674, "step": 17147 }, { "epoch": 2.7993143137014815, "grad_norm": 1.8898736238479614, "learning_rate": 1.8715563142552758e-05, "loss": 0.5633, "step": 17148 }, { "epoch": 2.799477572343986, "grad_norm": 1.8704588413238525, "learning_rate": 1.8715407551450903e-05, "loss": 0.6057, "step": 17149 }, { "epoch": 2.7996408309864904, "grad_norm": 1.6999191045761108, "learning_rate": 1.8715251951572635e-05, "loss": 0.5372, "step": 17150 }, { "epoch": 2.799804089628995, "grad_norm": 1.6348469257354736, "learning_rate": 1.871509634291811e-05, "loss": 0.5297, "step": 17151 }, { "epoch": 2.7999673482714993, "grad_norm": 1.91736900806427, "learning_rate": 1.8714940725487486e-05, "loss": 0.5542, "step": 17152 }, { "epoch": 2.8001306069140037, "grad_norm": 1.776286244392395, "learning_rate": 1.871478509928092e-05, "loss": 0.4571, "step": 17153 }, { "epoch": 2.800293865556508, "grad_norm": 2.1735870838165283, "learning_rate": 1.8714629464298567e-05, "loss": 0.7549, "step": 17154 }, { "epoch": 2.800457124199012, "grad_norm": 1.7920817136764526, "learning_rate": 1.8714473820540584e-05, "loss": 0.5412, "step": 17155 }, { "epoch": 2.8006203828415166, "grad_norm": 1.8092846870422363, "learning_rate": 1.871431816800713e-05, "loss": 0.5587, "step": 17156 }, { "epoch": 2.800783641484021, "grad_norm": 1.7333770990371704, "learning_rate": 1.8714162506698355e-05, "loss": 0.5478, "step": 17157 }, { "epoch": 2.8009469001265255, "grad_norm": 2.304982900619507, "learning_rate": 1.8714006836614426e-05, "loss": 0.7371, "step": 17158 }, { "epoch": 2.80111015876903, "grad_norm": 1.5867565870285034, "learning_rate": 1.8713851157755495e-05, "loss": 0.5156, "step": 17159 }, { "epoch": 2.8012734174115343, "grad_norm": 1.616225004196167, "learning_rate": 1.8713695470121714e-05, "loss": 0.3999, "step": 17160 }, { "epoch": 2.8014366760540383, "grad_norm": 2.1913020610809326, "learning_rate": 1.8713539773713246e-05, "loss": 0.6375, "step": 17161 }, { "epoch": 2.801599934696543, "grad_norm": 1.5897860527038574, "learning_rate": 1.8713384068530243e-05, "loss": 0.4895, "step": 17162 }, { "epoch": 2.801763193339047, "grad_norm": 1.4825299978256226, "learning_rate": 1.8713228354572866e-05, "loss": 0.4742, "step": 17163 }, { "epoch": 2.8019264519815517, "grad_norm": 1.9240847826004028, "learning_rate": 1.8713072631841275e-05, "loss": 0.7003, "step": 17164 }, { "epoch": 2.802089710624056, "grad_norm": 1.6458756923675537, "learning_rate": 1.8712916900335618e-05, "loss": 0.4961, "step": 17165 }, { "epoch": 2.8022529692665605, "grad_norm": 1.6858245134353638, "learning_rate": 1.8712761160056055e-05, "loss": 0.5507, "step": 17166 }, { "epoch": 2.802416227909065, "grad_norm": 1.6827044486999512, "learning_rate": 1.8712605411002746e-05, "loss": 0.5317, "step": 17167 }, { "epoch": 2.8025794865515694, "grad_norm": 1.9226188659667969, "learning_rate": 1.8712449653175845e-05, "loss": 0.5778, "step": 17168 }, { "epoch": 2.802742745194074, "grad_norm": 1.6174261569976807, "learning_rate": 1.871229388657551e-05, "loss": 0.4903, "step": 17169 }, { "epoch": 2.8029060038365783, "grad_norm": 1.9690009355545044, "learning_rate": 1.8712138111201898e-05, "loss": 0.5587, "step": 17170 }, { "epoch": 2.8030692624790827, "grad_norm": 2.0121724605560303, "learning_rate": 1.8711982327055163e-05, "loss": 0.648, "step": 17171 }, { "epoch": 2.8032325211215867, "grad_norm": 1.773256778717041, "learning_rate": 1.8711826534135463e-05, "loss": 0.5565, "step": 17172 }, { "epoch": 2.803395779764091, "grad_norm": 1.8513621091842651, "learning_rate": 1.871167073244296e-05, "loss": 0.597, "step": 17173 }, { "epoch": 2.8035590384065956, "grad_norm": 2.258924722671509, "learning_rate": 1.8711514921977806e-05, "loss": 0.6837, "step": 17174 }, { "epoch": 2.8037222970491, "grad_norm": 1.616266131401062, "learning_rate": 1.8711359102740156e-05, "loss": 0.5134, "step": 17175 }, { "epoch": 2.8038855556916045, "grad_norm": 2.142273187637329, "learning_rate": 1.8711203274730172e-05, "loss": 0.6675, "step": 17176 }, { "epoch": 2.804048814334109, "grad_norm": 1.697885513305664, "learning_rate": 1.871104743794801e-05, "loss": 0.6194, "step": 17177 }, { "epoch": 2.8042120729766133, "grad_norm": 2.2029380798339844, "learning_rate": 1.8710891592393823e-05, "loss": 0.6885, "step": 17178 }, { "epoch": 2.8043753316191173, "grad_norm": 1.9925802946090698, "learning_rate": 1.871073573806777e-05, "loss": 0.7104, "step": 17179 }, { "epoch": 2.8045385902616218, "grad_norm": 1.9414154291152954, "learning_rate": 1.871057987497001e-05, "loss": 0.726, "step": 17180 }, { "epoch": 2.804701848904126, "grad_norm": 1.7174521684646606, "learning_rate": 1.8710424003100698e-05, "loss": 0.5749, "step": 17181 }, { "epoch": 2.8048651075466307, "grad_norm": 1.6031368970870972, "learning_rate": 1.871026812245999e-05, "loss": 0.5501, "step": 17182 }, { "epoch": 2.805028366189135, "grad_norm": 1.9977513551712036, "learning_rate": 1.871011223304805e-05, "loss": 0.5638, "step": 17183 }, { "epoch": 2.8051916248316395, "grad_norm": 1.5023610591888428, "learning_rate": 1.870995633486502e-05, "loss": 0.5202, "step": 17184 }, { "epoch": 2.805354883474144, "grad_norm": 1.6566226482391357, "learning_rate": 1.8709800427911072e-05, "loss": 0.509, "step": 17185 }, { "epoch": 2.8055181421166484, "grad_norm": 1.5050160884857178, "learning_rate": 1.8709644512186358e-05, "loss": 0.589, "step": 17186 }, { "epoch": 2.805681400759153, "grad_norm": 2.4857699871063232, "learning_rate": 1.870948858769103e-05, "loss": 0.6379, "step": 17187 }, { "epoch": 2.8058446594016573, "grad_norm": 1.5097664594650269, "learning_rate": 1.8709332654425253e-05, "loss": 0.5236, "step": 17188 }, { "epoch": 2.8060079180441617, "grad_norm": 1.9204481840133667, "learning_rate": 1.8709176712389178e-05, "loss": 0.551, "step": 17189 }, { "epoch": 2.8061711766866657, "grad_norm": 1.6182008981704712, "learning_rate": 1.8709020761582967e-05, "loss": 0.4532, "step": 17190 }, { "epoch": 2.80633443532917, "grad_norm": 1.5383261442184448, "learning_rate": 1.8708864802006774e-05, "loss": 0.4872, "step": 17191 }, { "epoch": 2.8064976939716746, "grad_norm": 1.7319843769073486, "learning_rate": 1.8708708833660755e-05, "loss": 0.6197, "step": 17192 }, { "epoch": 2.806660952614179, "grad_norm": 1.7208744287490845, "learning_rate": 1.870855285654507e-05, "loss": 0.5299, "step": 17193 }, { "epoch": 2.8068242112566835, "grad_norm": 1.6972123384475708, "learning_rate": 1.8708396870659872e-05, "loss": 0.4846, "step": 17194 }, { "epoch": 2.806987469899188, "grad_norm": 1.3941377401351929, "learning_rate": 1.8708240876005324e-05, "loss": 0.4706, "step": 17195 }, { "epoch": 2.8071507285416923, "grad_norm": 1.69076669216156, "learning_rate": 1.870808487258158e-05, "loss": 0.5484, "step": 17196 }, { "epoch": 2.8073139871841963, "grad_norm": 1.6400057077407837, "learning_rate": 1.870792886038879e-05, "loss": 0.6573, "step": 17197 }, { "epoch": 2.8074772458267008, "grad_norm": 1.5634244680404663, "learning_rate": 1.8707772839427124e-05, "loss": 0.4815, "step": 17198 }, { "epoch": 2.807640504469205, "grad_norm": 1.7634202241897583, "learning_rate": 1.8707616809696735e-05, "loss": 0.6148, "step": 17199 }, { "epoch": 2.8078037631117096, "grad_norm": 1.8735967874526978, "learning_rate": 1.8707460771197773e-05, "loss": 0.6037, "step": 17200 }, { "epoch": 2.807967021754214, "grad_norm": 1.9749094247817993, "learning_rate": 1.8707304723930404e-05, "loss": 0.6898, "step": 17201 }, { "epoch": 2.8081302803967185, "grad_norm": 1.5848931074142456, "learning_rate": 1.870714866789478e-05, "loss": 0.4809, "step": 17202 }, { "epoch": 2.808293539039223, "grad_norm": 1.6882058382034302, "learning_rate": 1.8706992603091057e-05, "loss": 0.5379, "step": 17203 }, { "epoch": 2.8084567976817274, "grad_norm": 1.6271591186523438, "learning_rate": 1.8706836529519398e-05, "loss": 0.5051, "step": 17204 }, { "epoch": 2.808620056324232, "grad_norm": 1.7973142862319946, "learning_rate": 1.8706680447179955e-05, "loss": 0.5146, "step": 17205 }, { "epoch": 2.8087833149667363, "grad_norm": 1.6381945610046387, "learning_rate": 1.870652435607289e-05, "loss": 0.5068, "step": 17206 }, { "epoch": 2.8089465736092407, "grad_norm": 1.9906976222991943, "learning_rate": 1.8706368256198356e-05, "loss": 0.6096, "step": 17207 }, { "epoch": 2.8091098322517447, "grad_norm": 2.055434226989746, "learning_rate": 1.870621214755651e-05, "loss": 0.5372, "step": 17208 }, { "epoch": 2.809273090894249, "grad_norm": 1.377748727798462, "learning_rate": 1.8706056030147512e-05, "loss": 0.4449, "step": 17209 }, { "epoch": 2.8094363495367536, "grad_norm": 2.1051223278045654, "learning_rate": 1.870589990397152e-05, "loss": 0.605, "step": 17210 }, { "epoch": 2.809599608179258, "grad_norm": 2.3544068336486816, "learning_rate": 1.8705743769028687e-05, "loss": 0.7284, "step": 17211 }, { "epoch": 2.8097628668217625, "grad_norm": 2.2355659008026123, "learning_rate": 1.8705587625319167e-05, "loss": 0.7012, "step": 17212 }, { "epoch": 2.809926125464267, "grad_norm": 1.9192676544189453, "learning_rate": 1.870543147284313e-05, "loss": 0.581, "step": 17213 }, { "epoch": 2.810089384106771, "grad_norm": 1.6217750310897827, "learning_rate": 1.8705275311600724e-05, "loss": 0.5392, "step": 17214 }, { "epoch": 2.8102526427492753, "grad_norm": 1.7613965272903442, "learning_rate": 1.8705119141592108e-05, "loss": 0.5949, "step": 17215 }, { "epoch": 2.8104159013917798, "grad_norm": 1.7473149299621582, "learning_rate": 1.870496296281744e-05, "loss": 0.5289, "step": 17216 }, { "epoch": 2.810579160034284, "grad_norm": 1.8169668912887573, "learning_rate": 1.8704806775276874e-05, "loss": 0.6406, "step": 17217 }, { "epoch": 2.8107424186767886, "grad_norm": 1.5891401767730713, "learning_rate": 1.870465057897057e-05, "loss": 0.4285, "step": 17218 }, { "epoch": 2.810905677319293, "grad_norm": 1.9422376155853271, "learning_rate": 1.8704494373898687e-05, "loss": 0.6147, "step": 17219 }, { "epoch": 2.8110689359617975, "grad_norm": 1.9877514839172363, "learning_rate": 1.870433816006138e-05, "loss": 0.592, "step": 17220 }, { "epoch": 2.811232194604302, "grad_norm": 1.6401351690292358, "learning_rate": 1.8704181937458808e-05, "loss": 0.6111, "step": 17221 }, { "epoch": 2.8113954532468064, "grad_norm": 1.542031168937683, "learning_rate": 1.8704025706091124e-05, "loss": 0.5188, "step": 17222 }, { "epoch": 2.811558711889311, "grad_norm": 1.5012637376785278, "learning_rate": 1.870386946595849e-05, "loss": 0.5066, "step": 17223 }, { "epoch": 2.8117219705318153, "grad_norm": 1.4282277822494507, "learning_rate": 1.8703713217061064e-05, "loss": 0.477, "step": 17224 }, { "epoch": 2.8118852291743193, "grad_norm": 1.7379660606384277, "learning_rate": 1.8703556959398998e-05, "loss": 0.5908, "step": 17225 }, { "epoch": 2.8120484878168237, "grad_norm": 1.5535730123519897, "learning_rate": 1.8703400692972455e-05, "loss": 0.4807, "step": 17226 }, { "epoch": 2.812211746459328, "grad_norm": 2.0971555709838867, "learning_rate": 1.8703244417781587e-05, "loss": 0.629, "step": 17227 }, { "epoch": 2.8123750051018326, "grad_norm": 1.9601941108703613, "learning_rate": 1.8703088133826558e-05, "loss": 0.5598, "step": 17228 }, { "epoch": 2.812538263744337, "grad_norm": 1.6379350423812866, "learning_rate": 1.870293184110752e-05, "loss": 0.529, "step": 17229 }, { "epoch": 2.8127015223868415, "grad_norm": 1.5349886417388916, "learning_rate": 1.8702775539624628e-05, "loss": 0.5102, "step": 17230 }, { "epoch": 2.812864781029346, "grad_norm": 1.7630665302276611, "learning_rate": 1.8702619229378048e-05, "loss": 0.546, "step": 17231 }, { "epoch": 2.81302803967185, "grad_norm": 1.6848056316375732, "learning_rate": 1.870246291036793e-05, "loss": 0.516, "step": 17232 }, { "epoch": 2.8131912983143543, "grad_norm": 1.9701974391937256, "learning_rate": 1.8702306582594433e-05, "loss": 0.6317, "step": 17233 }, { "epoch": 2.8133545569568588, "grad_norm": 1.8394863605499268, "learning_rate": 1.870215024605772e-05, "loss": 0.555, "step": 17234 }, { "epoch": 2.813517815599363, "grad_norm": 1.7940844297409058, "learning_rate": 1.8701993900757942e-05, "loss": 0.556, "step": 17235 }, { "epoch": 2.8136810742418676, "grad_norm": 1.4964839220046997, "learning_rate": 1.870183754669526e-05, "loss": 0.4725, "step": 17236 }, { "epoch": 2.813844332884372, "grad_norm": 1.9736336469650269, "learning_rate": 1.8701681183869825e-05, "loss": 0.5773, "step": 17237 }, { "epoch": 2.8140075915268765, "grad_norm": 1.6926928758621216, "learning_rate": 1.8701524812281804e-05, "loss": 0.5413, "step": 17238 }, { "epoch": 2.814170850169381, "grad_norm": 1.8435646295547485, "learning_rate": 1.870136843193135e-05, "loss": 0.5857, "step": 17239 }, { "epoch": 2.8143341088118854, "grad_norm": 1.6880252361297607, "learning_rate": 1.8701212042818616e-05, "loss": 0.4825, "step": 17240 }, { "epoch": 2.81449736745439, "grad_norm": 1.8600562810897827, "learning_rate": 1.8701055644943768e-05, "loss": 0.6058, "step": 17241 }, { "epoch": 2.8146606260968943, "grad_norm": 1.5524762868881226, "learning_rate": 1.8700899238306956e-05, "loss": 0.5135, "step": 17242 }, { "epoch": 2.8148238847393983, "grad_norm": 1.9206198453903198, "learning_rate": 1.8700742822908345e-05, "loss": 0.5699, "step": 17243 }, { "epoch": 2.8149871433819027, "grad_norm": 1.8925408124923706, "learning_rate": 1.8700586398748085e-05, "loss": 0.6105, "step": 17244 }, { "epoch": 2.815150402024407, "grad_norm": 1.8709620237350464, "learning_rate": 1.8700429965826337e-05, "loss": 0.5302, "step": 17245 }, { "epoch": 2.8153136606669116, "grad_norm": 1.6788350343704224, "learning_rate": 1.870027352414326e-05, "loss": 0.5345, "step": 17246 }, { "epoch": 2.815476919309416, "grad_norm": 1.9488186836242676, "learning_rate": 1.870011707369901e-05, "loss": 0.619, "step": 17247 }, { "epoch": 2.8156401779519205, "grad_norm": 1.7203797101974487, "learning_rate": 1.869996061449374e-05, "loss": 0.5373, "step": 17248 }, { "epoch": 2.8158034365944244, "grad_norm": 1.9176677465438843, "learning_rate": 1.8699804146527617e-05, "loss": 0.5035, "step": 17249 }, { "epoch": 2.815966695236929, "grad_norm": 1.9909449815750122, "learning_rate": 1.869964766980079e-05, "loss": 0.5778, "step": 17250 }, { "epoch": 2.8161299538794333, "grad_norm": 1.7532309293746948, "learning_rate": 1.8699491184313422e-05, "loss": 0.5043, "step": 17251 }, { "epoch": 2.8162932125219378, "grad_norm": 2.1183066368103027, "learning_rate": 1.8699334690065672e-05, "loss": 0.5595, "step": 17252 }, { "epoch": 2.816456471164442, "grad_norm": 1.8930306434631348, "learning_rate": 1.869917818705769e-05, "loss": 0.5782, "step": 17253 }, { "epoch": 2.8166197298069466, "grad_norm": 1.609371304512024, "learning_rate": 1.869902167528964e-05, "loss": 0.5401, "step": 17254 }, { "epoch": 2.816782988449451, "grad_norm": 1.9157899618148804, "learning_rate": 1.8698865154761673e-05, "loss": 0.6099, "step": 17255 }, { "epoch": 2.8169462470919555, "grad_norm": 1.6352628469467163, "learning_rate": 1.8698708625473956e-05, "loss": 0.5308, "step": 17256 }, { "epoch": 2.81710950573446, "grad_norm": 1.9747720956802368, "learning_rate": 1.869855208742664e-05, "loss": 0.588, "step": 17257 }, { "epoch": 2.8172727643769644, "grad_norm": 1.4987632036209106, "learning_rate": 1.8698395540619883e-05, "loss": 0.4975, "step": 17258 }, { "epoch": 2.817436023019469, "grad_norm": 1.613078236579895, "learning_rate": 1.8698238985053846e-05, "loss": 0.5238, "step": 17259 }, { "epoch": 2.817599281661973, "grad_norm": 1.5811842679977417, "learning_rate": 1.8698082420728685e-05, "loss": 0.5388, "step": 17260 }, { "epoch": 2.8177625403044773, "grad_norm": 1.8937411308288574, "learning_rate": 1.8697925847644557e-05, "loss": 0.5406, "step": 17261 }, { "epoch": 2.8179257989469817, "grad_norm": 1.880462646484375, "learning_rate": 1.8697769265801616e-05, "loss": 0.5855, "step": 17262 }, { "epoch": 2.818089057589486, "grad_norm": 2.0337398052215576, "learning_rate": 1.8697612675200027e-05, "loss": 0.5994, "step": 17263 }, { "epoch": 2.8182523162319906, "grad_norm": 1.9251532554626465, "learning_rate": 1.8697456075839947e-05, "loss": 0.5425, "step": 17264 }, { "epoch": 2.818415574874495, "grad_norm": 1.8267496824264526, "learning_rate": 1.869729946772153e-05, "loss": 0.5301, "step": 17265 }, { "epoch": 2.8185788335169994, "grad_norm": 1.497531771659851, "learning_rate": 1.869714285084493e-05, "loss": 0.5046, "step": 17266 }, { "epoch": 2.8187420921595034, "grad_norm": 1.7988173961639404, "learning_rate": 1.8696986225210315e-05, "loss": 0.6142, "step": 17267 }, { "epoch": 2.818905350802008, "grad_norm": 2.074739456176758, "learning_rate": 1.8696829590817833e-05, "loss": 0.6618, "step": 17268 }, { "epoch": 2.8190686094445123, "grad_norm": 1.7981610298156738, "learning_rate": 1.8696672947667648e-05, "loss": 0.5993, "step": 17269 }, { "epoch": 2.8192318680870168, "grad_norm": 1.7672735452651978, "learning_rate": 1.8696516295759914e-05, "loss": 0.5863, "step": 17270 }, { "epoch": 2.819395126729521, "grad_norm": 1.9021672010421753, "learning_rate": 1.869635963509479e-05, "loss": 0.6537, "step": 17271 }, { "epoch": 2.8195583853720256, "grad_norm": 1.8504589796066284, "learning_rate": 1.8696202965672435e-05, "loss": 0.6168, "step": 17272 }, { "epoch": 2.81972164401453, "grad_norm": 1.7229737043380737, "learning_rate": 1.869604628749301e-05, "loss": 0.6462, "step": 17273 }, { "epoch": 2.8198849026570345, "grad_norm": 1.9509785175323486, "learning_rate": 1.8695889600556665e-05, "loss": 0.6163, "step": 17274 }, { "epoch": 2.820048161299539, "grad_norm": 2.0354652404785156, "learning_rate": 1.869573290486356e-05, "loss": 0.6104, "step": 17275 }, { "epoch": 2.8202114199420434, "grad_norm": 1.6404008865356445, "learning_rate": 1.869557620041386e-05, "loss": 0.6008, "step": 17276 }, { "epoch": 2.820374678584548, "grad_norm": 1.6229885816574097, "learning_rate": 1.8695419487207713e-05, "loss": 0.524, "step": 17277 }, { "epoch": 2.820537937227052, "grad_norm": 1.7044950723648071, "learning_rate": 1.869526276524528e-05, "loss": 0.5732, "step": 17278 }, { "epoch": 2.8207011958695563, "grad_norm": 1.6687562465667725, "learning_rate": 1.869510603452672e-05, "loss": 0.5452, "step": 17279 }, { "epoch": 2.8208644545120607, "grad_norm": 1.6968309879302979, "learning_rate": 1.869494929505219e-05, "loss": 0.5012, "step": 17280 }, { "epoch": 2.821027713154565, "grad_norm": 1.8802205324172974, "learning_rate": 1.8694792546821852e-05, "loss": 0.5975, "step": 17281 }, { "epoch": 2.8211909717970696, "grad_norm": 1.5958646535873413, "learning_rate": 1.8694635789835856e-05, "loss": 0.5162, "step": 17282 }, { "epoch": 2.821354230439574, "grad_norm": 1.7507811784744263, "learning_rate": 1.869447902409437e-05, "loss": 0.4816, "step": 17283 }, { "epoch": 2.8215174890820784, "grad_norm": 2.0087223052978516, "learning_rate": 1.8694322249597538e-05, "loss": 0.6325, "step": 17284 }, { "epoch": 2.8216807477245824, "grad_norm": 1.9035451412200928, "learning_rate": 1.869416546634553e-05, "loss": 0.6042, "step": 17285 }, { "epoch": 2.821844006367087, "grad_norm": 1.5695096254348755, "learning_rate": 1.86940086743385e-05, "loss": 0.4772, "step": 17286 }, { "epoch": 2.8220072650095913, "grad_norm": 1.8475184440612793, "learning_rate": 1.8693851873576605e-05, "loss": 0.5506, "step": 17287 }, { "epoch": 2.8221705236520958, "grad_norm": 1.7161576747894287, "learning_rate": 1.8693695064060004e-05, "loss": 0.5508, "step": 17288 }, { "epoch": 2.8223337822946, "grad_norm": 1.456768274307251, "learning_rate": 1.8693538245788853e-05, "loss": 0.5322, "step": 17289 }, { "epoch": 2.8224970409371046, "grad_norm": 1.8262561559677124, "learning_rate": 1.8693381418763312e-05, "loss": 0.5345, "step": 17290 }, { "epoch": 2.822660299579609, "grad_norm": 1.8271857500076294, "learning_rate": 1.869322458298354e-05, "loss": 0.5737, "step": 17291 }, { "epoch": 2.8228235582221135, "grad_norm": 1.9797736406326294, "learning_rate": 1.8693067738449692e-05, "loss": 0.6836, "step": 17292 }, { "epoch": 2.822986816864618, "grad_norm": 1.5971038341522217, "learning_rate": 1.8692910885161925e-05, "loss": 0.5506, "step": 17293 }, { "epoch": 2.8231500755071224, "grad_norm": 1.6488807201385498, "learning_rate": 1.86927540231204e-05, "loss": 0.5509, "step": 17294 }, { "epoch": 2.823313334149627, "grad_norm": 1.6878246068954468, "learning_rate": 1.8692597152325275e-05, "loss": 0.5592, "step": 17295 }, { "epoch": 2.823476592792131, "grad_norm": 1.6148552894592285, "learning_rate": 1.8692440272776705e-05, "loss": 0.5445, "step": 17296 }, { "epoch": 2.8236398514346353, "grad_norm": 1.648364782333374, "learning_rate": 1.8692283384474856e-05, "loss": 0.5099, "step": 17297 }, { "epoch": 2.8238031100771397, "grad_norm": 1.793453574180603, "learning_rate": 1.8692126487419876e-05, "loss": 0.5706, "step": 17298 }, { "epoch": 2.823966368719644, "grad_norm": 2.2620513439178467, "learning_rate": 1.8691969581611922e-05, "loss": 0.6555, "step": 17299 }, { "epoch": 2.8241296273621486, "grad_norm": 2.074971914291382, "learning_rate": 1.8691812667051164e-05, "loss": 0.6315, "step": 17300 }, { "epoch": 2.824292886004653, "grad_norm": 1.8155781030654907, "learning_rate": 1.869165574373775e-05, "loss": 0.583, "step": 17301 }, { "epoch": 2.824456144647157, "grad_norm": 1.484980583190918, "learning_rate": 1.869149881167184e-05, "loss": 0.4651, "step": 17302 }, { "epoch": 2.8246194032896614, "grad_norm": 1.712096929550171, "learning_rate": 1.8691341870853598e-05, "loss": 0.5567, "step": 17303 }, { "epoch": 2.824782661932166, "grad_norm": 1.8923858404159546, "learning_rate": 1.8691184921283172e-05, "loss": 0.5617, "step": 17304 }, { "epoch": 2.8249459205746703, "grad_norm": 1.701290488243103, "learning_rate": 1.8691027962960727e-05, "loss": 0.5175, "step": 17305 }, { "epoch": 2.8251091792171747, "grad_norm": 1.4980497360229492, "learning_rate": 1.8690870995886417e-05, "loss": 0.4396, "step": 17306 }, { "epoch": 2.825272437859679, "grad_norm": 1.5317814350128174, "learning_rate": 1.8690714020060402e-05, "loss": 0.5027, "step": 17307 }, { "epoch": 2.8254356965021836, "grad_norm": 2.0754806995391846, "learning_rate": 1.8690557035482843e-05, "loss": 0.7177, "step": 17308 }, { "epoch": 2.825598955144688, "grad_norm": 1.9141489267349243, "learning_rate": 1.8690400042153895e-05, "loss": 0.6023, "step": 17309 }, { "epoch": 2.8257622137871925, "grad_norm": 1.7760319709777832, "learning_rate": 1.8690243040073715e-05, "loss": 0.5827, "step": 17310 }, { "epoch": 2.825925472429697, "grad_norm": 1.6024688482284546, "learning_rate": 1.869008602924246e-05, "loss": 0.5342, "step": 17311 }, { "epoch": 2.8260887310722014, "grad_norm": 1.8162543773651123, "learning_rate": 1.86899290096603e-05, "loss": 0.5916, "step": 17312 }, { "epoch": 2.8262519897147054, "grad_norm": 1.550423502922058, "learning_rate": 1.8689771981327377e-05, "loss": 0.546, "step": 17313 }, { "epoch": 2.82641524835721, "grad_norm": 2.0149052143096924, "learning_rate": 1.8689614944243855e-05, "loss": 0.5946, "step": 17314 }, { "epoch": 2.8265785069997142, "grad_norm": 1.6024562120437622, "learning_rate": 1.8689457898409897e-05, "loss": 0.5705, "step": 17315 }, { "epoch": 2.8267417656422187, "grad_norm": 1.7897839546203613, "learning_rate": 1.8689300843825654e-05, "loss": 0.5865, "step": 17316 }, { "epoch": 2.826905024284723, "grad_norm": 1.6873918771743774, "learning_rate": 1.8689143780491287e-05, "loss": 0.5232, "step": 17317 }, { "epoch": 2.8270682829272276, "grad_norm": 1.8137292861938477, "learning_rate": 1.8688986708406958e-05, "loss": 0.5364, "step": 17318 }, { "epoch": 2.827231541569732, "grad_norm": 1.51260244846344, "learning_rate": 1.868882962757282e-05, "loss": 0.5956, "step": 17319 }, { "epoch": 2.827394800212236, "grad_norm": 2.0289108753204346, "learning_rate": 1.8688672537989032e-05, "loss": 0.6131, "step": 17320 }, { "epoch": 2.8275580588547404, "grad_norm": 1.393983006477356, "learning_rate": 1.868851543965575e-05, "loss": 0.4733, "step": 17321 }, { "epoch": 2.827721317497245, "grad_norm": 1.6007542610168457, "learning_rate": 1.8688358332573142e-05, "loss": 0.5224, "step": 17322 }, { "epoch": 2.8278845761397493, "grad_norm": 1.9269564151763916, "learning_rate": 1.8688201216741357e-05, "loss": 0.5624, "step": 17323 }, { "epoch": 2.8280478347822537, "grad_norm": 1.8556692600250244, "learning_rate": 1.8688044092160554e-05, "loss": 0.6026, "step": 17324 }, { "epoch": 2.828211093424758, "grad_norm": 1.9639966487884521, "learning_rate": 1.8687886958830894e-05, "loss": 0.4986, "step": 17325 }, { "epoch": 2.8283743520672626, "grad_norm": 1.833815574645996, "learning_rate": 1.8687729816752536e-05, "loss": 0.5826, "step": 17326 }, { "epoch": 2.828537610709767, "grad_norm": 1.7176768779754639, "learning_rate": 1.868757266592563e-05, "loss": 0.5888, "step": 17327 }, { "epoch": 2.8287008693522715, "grad_norm": 1.6722604036331177, "learning_rate": 1.8687415506350347e-05, "loss": 0.5354, "step": 17328 }, { "epoch": 2.828864127994776, "grad_norm": 1.935530185699463, "learning_rate": 1.868725833802684e-05, "loss": 0.7398, "step": 17329 }, { "epoch": 2.8290273866372804, "grad_norm": 1.9576176404953003, "learning_rate": 1.868710116095526e-05, "loss": 0.5482, "step": 17330 }, { "epoch": 2.8291906452797844, "grad_norm": 1.6977697610855103, "learning_rate": 1.8686943975135774e-05, "loss": 0.5549, "step": 17331 }, { "epoch": 2.829353903922289, "grad_norm": 1.880038857460022, "learning_rate": 1.868678678056854e-05, "loss": 0.6695, "step": 17332 }, { "epoch": 2.8295171625647932, "grad_norm": 1.537307858467102, "learning_rate": 1.8686629577253713e-05, "loss": 0.4679, "step": 17333 }, { "epoch": 2.8296804212072977, "grad_norm": 1.343293309211731, "learning_rate": 1.868647236519145e-05, "loss": 0.4562, "step": 17334 }, { "epoch": 2.829843679849802, "grad_norm": 1.4677590131759644, "learning_rate": 1.8686315144381914e-05, "loss": 0.5497, "step": 17335 }, { "epoch": 2.8300069384923066, "grad_norm": 1.6494159698486328, "learning_rate": 1.868615791482526e-05, "loss": 0.5568, "step": 17336 }, { "epoch": 2.8301701971348106, "grad_norm": 2.12111496925354, "learning_rate": 1.8686000676521648e-05, "loss": 0.6442, "step": 17337 }, { "epoch": 2.830333455777315, "grad_norm": 2.138129234313965, "learning_rate": 1.8685843429471235e-05, "loss": 0.7177, "step": 17338 }, { "epoch": 2.8304967144198194, "grad_norm": 2.2274515628814697, "learning_rate": 1.868568617367418e-05, "loss": 0.6653, "step": 17339 }, { "epoch": 2.830659973062324, "grad_norm": 2.2251148223876953, "learning_rate": 1.8685528909130643e-05, "loss": 0.6858, "step": 17340 }, { "epoch": 2.8308232317048283, "grad_norm": 1.7982655763626099, "learning_rate": 1.868537163584078e-05, "loss": 0.6309, "step": 17341 }, { "epoch": 2.8309864903473327, "grad_norm": 1.6273930072784424, "learning_rate": 1.8685214353804748e-05, "loss": 0.4908, "step": 17342 }, { "epoch": 2.831149748989837, "grad_norm": 1.8796451091766357, "learning_rate": 1.8685057063022708e-05, "loss": 0.6342, "step": 17343 }, { "epoch": 2.8313130076323416, "grad_norm": 1.6495277881622314, "learning_rate": 1.8684899763494816e-05, "loss": 0.5511, "step": 17344 }, { "epoch": 2.831476266274846, "grad_norm": 1.8027392625808716, "learning_rate": 1.8684742455221238e-05, "loss": 0.6454, "step": 17345 }, { "epoch": 2.8316395249173505, "grad_norm": 1.4123716354370117, "learning_rate": 1.8684585138202122e-05, "loss": 0.4695, "step": 17346 }, { "epoch": 2.831802783559855, "grad_norm": 1.8869311809539795, "learning_rate": 1.8684427812437632e-05, "loss": 0.6065, "step": 17347 }, { "epoch": 2.831966042202359, "grad_norm": 1.5599076747894287, "learning_rate": 1.8684270477927927e-05, "loss": 0.4726, "step": 17348 }, { "epoch": 2.8321293008448634, "grad_norm": 1.7237598896026611, "learning_rate": 1.868411313467316e-05, "loss": 0.5791, "step": 17349 }, { "epoch": 2.832292559487368, "grad_norm": 1.591324806213379, "learning_rate": 1.8683955782673496e-05, "loss": 0.4902, "step": 17350 }, { "epoch": 2.8324558181298722, "grad_norm": 1.777327060699463, "learning_rate": 1.8683798421929093e-05, "loss": 0.556, "step": 17351 }, { "epoch": 2.8326190767723767, "grad_norm": 1.5322059392929077, "learning_rate": 1.8683641052440105e-05, "loss": 0.5479, "step": 17352 }, { "epoch": 2.832782335414881, "grad_norm": 1.9605932235717773, "learning_rate": 1.868348367420669e-05, "loss": 0.602, "step": 17353 }, { "epoch": 2.8329455940573856, "grad_norm": 2.215287685394287, "learning_rate": 1.8683326287229017e-05, "loss": 0.8022, "step": 17354 }, { "epoch": 2.8331088526998895, "grad_norm": 1.7239046096801758, "learning_rate": 1.8683168891507232e-05, "loss": 0.5746, "step": 17355 }, { "epoch": 2.833272111342394, "grad_norm": 2.1312901973724365, "learning_rate": 1.86830114870415e-05, "loss": 0.5981, "step": 17356 }, { "epoch": 2.8334353699848984, "grad_norm": 1.6060967445373535, "learning_rate": 1.8682854073831974e-05, "loss": 0.5592, "step": 17357 }, { "epoch": 2.833598628627403, "grad_norm": 2.1366775035858154, "learning_rate": 1.868269665187882e-05, "loss": 0.6748, "step": 17358 }, { "epoch": 2.8337618872699073, "grad_norm": 1.7305536270141602, "learning_rate": 1.868253922118219e-05, "loss": 0.5244, "step": 17359 }, { "epoch": 2.8339251459124117, "grad_norm": 1.7873256206512451, "learning_rate": 1.8682381781742246e-05, "loss": 0.6359, "step": 17360 }, { "epoch": 2.834088404554916, "grad_norm": 1.8258682489395142, "learning_rate": 1.8682224333559146e-05, "loss": 0.6322, "step": 17361 }, { "epoch": 2.8342516631974206, "grad_norm": 1.987223505973816, "learning_rate": 1.8682066876633048e-05, "loss": 0.5879, "step": 17362 }, { "epoch": 2.834414921839925, "grad_norm": 1.4359761476516724, "learning_rate": 1.868190941096411e-05, "loss": 0.5234, "step": 17363 }, { "epoch": 2.8345781804824295, "grad_norm": 1.655376672744751, "learning_rate": 1.8681751936552496e-05, "loss": 0.5049, "step": 17364 }, { "epoch": 2.834741439124934, "grad_norm": 1.418330192565918, "learning_rate": 1.8681594453398354e-05, "loss": 0.5243, "step": 17365 }, { "epoch": 2.834904697767438, "grad_norm": 1.6683681011199951, "learning_rate": 1.8681436961501853e-05, "loss": 0.5321, "step": 17366 }, { "epoch": 2.8350679564099424, "grad_norm": 1.9677988290786743, "learning_rate": 1.8681279460863145e-05, "loss": 0.6173, "step": 17367 }, { "epoch": 2.835231215052447, "grad_norm": 1.9433727264404297, "learning_rate": 1.8681121951482397e-05, "loss": 0.5813, "step": 17368 }, { "epoch": 2.8353944736949512, "grad_norm": 1.6319072246551514, "learning_rate": 1.8680964433359753e-05, "loss": 0.5052, "step": 17369 }, { "epoch": 2.8355577323374557, "grad_norm": 1.9237794876098633, "learning_rate": 1.8680806906495384e-05, "loss": 0.5363, "step": 17370 }, { "epoch": 2.83572099097996, "grad_norm": 1.8750869035720825, "learning_rate": 1.8680649370889442e-05, "loss": 0.6142, "step": 17371 }, { "epoch": 2.8358842496224645, "grad_norm": 1.906185507774353, "learning_rate": 1.8680491826542093e-05, "loss": 0.5762, "step": 17372 }, { "epoch": 2.8360475082649685, "grad_norm": 1.9801242351531982, "learning_rate": 1.868033427345349e-05, "loss": 0.7093, "step": 17373 }, { "epoch": 2.836210766907473, "grad_norm": 2.0190465450286865, "learning_rate": 1.868017671162379e-05, "loss": 0.5502, "step": 17374 }, { "epoch": 2.8363740255499774, "grad_norm": 1.9609578847885132, "learning_rate": 1.8680019141053157e-05, "loss": 0.6459, "step": 17375 }, { "epoch": 2.836537284192482, "grad_norm": 1.7368839979171753, "learning_rate": 1.8679861561741745e-05, "loss": 0.5822, "step": 17376 }, { "epoch": 2.8367005428349863, "grad_norm": 1.8037288188934326, "learning_rate": 1.8679703973689714e-05, "loss": 0.6085, "step": 17377 }, { "epoch": 2.8368638014774907, "grad_norm": 1.735209584236145, "learning_rate": 1.8679546376897226e-05, "loss": 0.5528, "step": 17378 }, { "epoch": 2.837027060119995, "grad_norm": 1.6232365369796753, "learning_rate": 1.8679388771364438e-05, "loss": 0.5215, "step": 17379 }, { "epoch": 2.8371903187624996, "grad_norm": 1.6668144464492798, "learning_rate": 1.8679231157091507e-05, "loss": 0.5751, "step": 17380 }, { "epoch": 2.837353577405004, "grad_norm": 1.928308129310608, "learning_rate": 1.8679073534078588e-05, "loss": 0.5364, "step": 17381 }, { "epoch": 2.8375168360475085, "grad_norm": 1.5737284421920776, "learning_rate": 1.8678915902325848e-05, "loss": 0.4611, "step": 17382 }, { "epoch": 2.837680094690013, "grad_norm": 1.7641549110412598, "learning_rate": 1.8678758261833442e-05, "loss": 0.5171, "step": 17383 }, { "epoch": 2.837843353332517, "grad_norm": 1.588366150856018, "learning_rate": 1.867860061260153e-05, "loss": 0.5185, "step": 17384 }, { "epoch": 2.8380066119750214, "grad_norm": 1.7705422639846802, "learning_rate": 1.8678442954630267e-05, "loss": 0.5633, "step": 17385 }, { "epoch": 2.838169870617526, "grad_norm": 1.7404228448867798, "learning_rate": 1.8678285287919813e-05, "loss": 0.4903, "step": 17386 }, { "epoch": 2.8383331292600302, "grad_norm": 1.700900912284851, "learning_rate": 1.8678127612470334e-05, "loss": 0.5739, "step": 17387 }, { "epoch": 2.8384963879025347, "grad_norm": 1.7212839126586914, "learning_rate": 1.867796992828198e-05, "loss": 0.5926, "step": 17388 }, { "epoch": 2.838659646545039, "grad_norm": 1.9364198446273804, "learning_rate": 1.867781223535491e-05, "loss": 0.5376, "step": 17389 }, { "epoch": 2.838822905187543, "grad_norm": 2.1636710166931152, "learning_rate": 1.8677654533689287e-05, "loss": 0.6149, "step": 17390 }, { "epoch": 2.8389861638300475, "grad_norm": 1.5275121927261353, "learning_rate": 1.8677496823285265e-05, "loss": 0.5104, "step": 17391 }, { "epoch": 2.839149422472552, "grad_norm": 1.6003931760787964, "learning_rate": 1.867733910414301e-05, "loss": 0.4403, "step": 17392 }, { "epoch": 2.8393126811150564, "grad_norm": 1.9879560470581055, "learning_rate": 1.867718137626268e-05, "loss": 0.6583, "step": 17393 }, { "epoch": 2.839475939757561, "grad_norm": 2.1571993827819824, "learning_rate": 1.8677023639644422e-05, "loss": 0.6243, "step": 17394 }, { "epoch": 2.8396391984000653, "grad_norm": 1.4449546337127686, "learning_rate": 1.867686589428841e-05, "loss": 0.511, "step": 17395 }, { "epoch": 2.8398024570425697, "grad_norm": 1.694981575012207, "learning_rate": 1.8676708140194794e-05, "loss": 0.554, "step": 17396 }, { "epoch": 2.839965715685074, "grad_norm": 1.7250173091888428, "learning_rate": 1.867655037736374e-05, "loss": 0.5579, "step": 17397 }, { "epoch": 2.8401289743275786, "grad_norm": 1.85979425907135, "learning_rate": 1.8676392605795393e-05, "loss": 0.5294, "step": 17398 }, { "epoch": 2.840292232970083, "grad_norm": 1.8543678522109985, "learning_rate": 1.8676234825489923e-05, "loss": 0.6311, "step": 17399 }, { "epoch": 2.8404554916125875, "grad_norm": 1.8117822408676147, "learning_rate": 1.867607703644749e-05, "loss": 0.5866, "step": 17400 }, { "epoch": 2.8406187502550915, "grad_norm": 1.9270445108413696, "learning_rate": 1.867591923866825e-05, "loss": 0.6441, "step": 17401 }, { "epoch": 2.840782008897596, "grad_norm": 1.664080023765564, "learning_rate": 1.8675761432152362e-05, "loss": 0.5101, "step": 17402 }, { "epoch": 2.8409452675401003, "grad_norm": 1.8976043462753296, "learning_rate": 1.8675603616899983e-05, "loss": 0.6161, "step": 17403 }, { "epoch": 2.841108526182605, "grad_norm": 1.9277417659759521, "learning_rate": 1.8675445792911276e-05, "loss": 0.588, "step": 17404 }, { "epoch": 2.8412717848251092, "grad_norm": 1.645962119102478, "learning_rate": 1.8675287960186393e-05, "loss": 0.514, "step": 17405 }, { "epoch": 2.8414350434676137, "grad_norm": 1.787660002708435, "learning_rate": 1.86751301187255e-05, "loss": 0.5516, "step": 17406 }, { "epoch": 2.841598302110118, "grad_norm": 1.8849565982818604, "learning_rate": 1.8674972268528753e-05, "loss": 0.6826, "step": 17407 }, { "epoch": 2.841761560752622, "grad_norm": 1.5399669408798218, "learning_rate": 1.8674814409596315e-05, "loss": 0.4394, "step": 17408 }, { "epoch": 2.8419248193951265, "grad_norm": 1.6384273767471313, "learning_rate": 1.8674656541928334e-05, "loss": 0.5397, "step": 17409 }, { "epoch": 2.842088078037631, "grad_norm": 1.8995320796966553, "learning_rate": 1.867449866552498e-05, "loss": 0.5777, "step": 17410 }, { "epoch": 2.8422513366801354, "grad_norm": 1.6528632640838623, "learning_rate": 1.8674340780386407e-05, "loss": 0.5792, "step": 17411 }, { "epoch": 2.84241459532264, "grad_norm": 2.0069093704223633, "learning_rate": 1.8674182886512776e-05, "loss": 0.7137, "step": 17412 }, { "epoch": 2.8425778539651443, "grad_norm": 2.015998363494873, "learning_rate": 1.8674024983904246e-05, "loss": 0.627, "step": 17413 }, { "epoch": 2.8427411126076487, "grad_norm": 1.7591241598129272, "learning_rate": 1.8673867072560973e-05, "loss": 0.5666, "step": 17414 }, { "epoch": 2.842904371250153, "grad_norm": 1.5675101280212402, "learning_rate": 1.867370915248312e-05, "loss": 0.4807, "step": 17415 }, { "epoch": 2.8430676298926576, "grad_norm": 2.0212364196777344, "learning_rate": 1.8673551223670843e-05, "loss": 0.6933, "step": 17416 }, { "epoch": 2.843230888535162, "grad_norm": 2.004387140274048, "learning_rate": 1.86733932861243e-05, "loss": 0.6939, "step": 17417 }, { "epoch": 2.8433941471776665, "grad_norm": 1.5747078657150269, "learning_rate": 1.8673235339843657e-05, "loss": 0.53, "step": 17418 }, { "epoch": 2.8435574058201705, "grad_norm": 1.8123526573181152, "learning_rate": 1.8673077384829067e-05, "loss": 0.6437, "step": 17419 }, { "epoch": 2.843720664462675, "grad_norm": 1.7192234992980957, "learning_rate": 1.867291942108069e-05, "loss": 0.5295, "step": 17420 }, { "epoch": 2.8438839231051793, "grad_norm": 1.7792750597000122, "learning_rate": 1.8672761448598683e-05, "loss": 0.5714, "step": 17421 }, { "epoch": 2.844047181747684, "grad_norm": 1.9339157342910767, "learning_rate": 1.8672603467383212e-05, "loss": 0.4951, "step": 17422 }, { "epoch": 2.844210440390188, "grad_norm": 1.7594730854034424, "learning_rate": 1.8672445477434428e-05, "loss": 0.5997, "step": 17423 }, { "epoch": 2.8443736990326927, "grad_norm": 1.863518238067627, "learning_rate": 1.8672287478752494e-05, "loss": 0.5586, "step": 17424 }, { "epoch": 2.8445369576751967, "grad_norm": 1.8950573205947876, "learning_rate": 1.8672129471337568e-05, "loss": 0.6036, "step": 17425 }, { "epoch": 2.844700216317701, "grad_norm": 1.7747693061828613, "learning_rate": 1.867197145518981e-05, "loss": 0.5618, "step": 17426 }, { "epoch": 2.8448634749602055, "grad_norm": 2.0241665840148926, "learning_rate": 1.8671813430309385e-05, "loss": 0.6268, "step": 17427 }, { "epoch": 2.84502673360271, "grad_norm": 1.4320847988128662, "learning_rate": 1.867165539669644e-05, "loss": 0.5063, "step": 17428 }, { "epoch": 2.8451899922452144, "grad_norm": 1.6522828340530396, "learning_rate": 1.8671497354351142e-05, "loss": 0.5439, "step": 17429 }, { "epoch": 2.845353250887719, "grad_norm": 1.6280933618545532, "learning_rate": 1.8671339303273647e-05, "loss": 0.4953, "step": 17430 }, { "epoch": 2.8455165095302233, "grad_norm": 1.5003137588500977, "learning_rate": 1.867118124346412e-05, "loss": 0.484, "step": 17431 }, { "epoch": 2.8456797681727277, "grad_norm": 1.9186122417449951, "learning_rate": 1.867102317492271e-05, "loss": 0.662, "step": 17432 }, { "epoch": 2.845843026815232, "grad_norm": 1.7311291694641113, "learning_rate": 1.8670865097649585e-05, "loss": 0.5083, "step": 17433 }, { "epoch": 2.8460062854577366, "grad_norm": 1.72804594039917, "learning_rate": 1.86707070116449e-05, "loss": 0.5171, "step": 17434 }, { "epoch": 2.846169544100241, "grad_norm": 1.773207426071167, "learning_rate": 1.8670548916908817e-05, "loss": 0.5484, "step": 17435 }, { "epoch": 2.8463328027427455, "grad_norm": 1.7377712726593018, "learning_rate": 1.8670390813441494e-05, "loss": 0.5337, "step": 17436 }, { "epoch": 2.8464960613852495, "grad_norm": 2.0627620220184326, "learning_rate": 1.867023270124309e-05, "loss": 0.5933, "step": 17437 }, { "epoch": 2.846659320027754, "grad_norm": 1.6090749502182007, "learning_rate": 1.8670074580313763e-05, "loss": 0.4719, "step": 17438 }, { "epoch": 2.8468225786702583, "grad_norm": 1.9800165891647339, "learning_rate": 1.866991645065367e-05, "loss": 0.558, "step": 17439 }, { "epoch": 2.846985837312763, "grad_norm": 1.7464150190353394, "learning_rate": 1.866975831226298e-05, "loss": 0.5441, "step": 17440 }, { "epoch": 2.847149095955267, "grad_norm": 1.6675916910171509, "learning_rate": 1.866960016514184e-05, "loss": 0.4991, "step": 17441 }, { "epoch": 2.8473123545977717, "grad_norm": 2.1955721378326416, "learning_rate": 1.8669442009290416e-05, "loss": 0.6648, "step": 17442 }, { "epoch": 2.8474756132402756, "grad_norm": 1.6307456493377686, "learning_rate": 1.8669283844708866e-05, "loss": 0.523, "step": 17443 }, { "epoch": 2.84763887188278, "grad_norm": 2.0129706859588623, "learning_rate": 1.866912567139735e-05, "loss": 0.6666, "step": 17444 }, { "epoch": 2.8478021305252845, "grad_norm": 1.8485057353973389, "learning_rate": 1.866896748935603e-05, "loss": 0.5605, "step": 17445 }, { "epoch": 2.847965389167789, "grad_norm": 1.8940174579620361, "learning_rate": 1.866880929858506e-05, "loss": 0.5711, "step": 17446 }, { "epoch": 2.8481286478102934, "grad_norm": 1.8853673934936523, "learning_rate": 1.8668651099084602e-05, "loss": 0.5983, "step": 17447 }, { "epoch": 2.848291906452798, "grad_norm": 1.8383829593658447, "learning_rate": 1.866849289085481e-05, "loss": 0.5288, "step": 17448 }, { "epoch": 2.8484551650953023, "grad_norm": 1.6661547422409058, "learning_rate": 1.866833467389585e-05, "loss": 0.5, "step": 17449 }, { "epoch": 2.8486184237378067, "grad_norm": 2.011430501937866, "learning_rate": 1.8668176448207883e-05, "loss": 0.6637, "step": 17450 }, { "epoch": 2.848781682380311, "grad_norm": 1.3845654726028442, "learning_rate": 1.8668018213791062e-05, "loss": 0.4378, "step": 17451 }, { "epoch": 2.8489449410228156, "grad_norm": 1.6427555084228516, "learning_rate": 1.8667859970645547e-05, "loss": 0.5432, "step": 17452 }, { "epoch": 2.84910819966532, "grad_norm": 2.185577154159546, "learning_rate": 1.8667701718771502e-05, "loss": 0.6113, "step": 17453 }, { "epoch": 2.849271458307824, "grad_norm": 1.5195000171661377, "learning_rate": 1.8667543458169084e-05, "loss": 0.5803, "step": 17454 }, { "epoch": 2.8494347169503285, "grad_norm": 2.003730535507202, "learning_rate": 1.8667385188838453e-05, "loss": 0.6801, "step": 17455 }, { "epoch": 2.849597975592833, "grad_norm": 2.300915241241455, "learning_rate": 1.8667226910779767e-05, "loss": 0.6326, "step": 17456 }, { "epoch": 2.8497612342353373, "grad_norm": 1.5472261905670166, "learning_rate": 1.8667068623993182e-05, "loss": 0.6965, "step": 17457 }, { "epoch": 2.8499244928778418, "grad_norm": 1.9418416023254395, "learning_rate": 1.8666910328478862e-05, "loss": 0.575, "step": 17458 }, { "epoch": 2.850087751520346, "grad_norm": 1.6450414657592773, "learning_rate": 1.8666752024236968e-05, "loss": 0.5569, "step": 17459 }, { "epoch": 2.8502510101628507, "grad_norm": 1.922398328781128, "learning_rate": 1.8666593711267656e-05, "loss": 0.6325, "step": 17460 }, { "epoch": 2.8504142688053546, "grad_norm": 1.3919157981872559, "learning_rate": 1.8666435389571087e-05, "loss": 0.4254, "step": 17461 }, { "epoch": 2.850577527447859, "grad_norm": 1.784253478050232, "learning_rate": 1.8666277059147418e-05, "loss": 0.6469, "step": 17462 }, { "epoch": 2.8507407860903635, "grad_norm": 1.956870198249817, "learning_rate": 1.8666118719996813e-05, "loss": 0.5544, "step": 17463 }, { "epoch": 2.850904044732868, "grad_norm": 2.0529632568359375, "learning_rate": 1.8665960372119427e-05, "loss": 0.7675, "step": 17464 }, { "epoch": 2.8510673033753724, "grad_norm": 2.0510621070861816, "learning_rate": 1.8665802015515423e-05, "loss": 0.5761, "step": 17465 }, { "epoch": 2.851230562017877, "grad_norm": 1.7224055528640747, "learning_rate": 1.8665643650184954e-05, "loss": 0.5832, "step": 17466 }, { "epoch": 2.8513938206603813, "grad_norm": 2.0949854850769043, "learning_rate": 1.866548527612819e-05, "loss": 0.6459, "step": 17467 }, { "epoch": 2.8515570793028857, "grad_norm": 2.1213436126708984, "learning_rate": 1.866532689334528e-05, "loss": 0.7041, "step": 17468 }, { "epoch": 2.85172033794539, "grad_norm": 1.7411916255950928, "learning_rate": 1.866516850183639e-05, "loss": 0.6601, "step": 17469 }, { "epoch": 2.8518835965878946, "grad_norm": 1.9790385961532593, "learning_rate": 1.866501010160168e-05, "loss": 0.5977, "step": 17470 }, { "epoch": 2.852046855230399, "grad_norm": 1.84042489528656, "learning_rate": 1.8664851692641305e-05, "loss": 0.5726, "step": 17471 }, { "epoch": 2.852210113872903, "grad_norm": 1.8415707349777222, "learning_rate": 1.8664693274955428e-05, "loss": 0.5649, "step": 17472 }, { "epoch": 2.8523733725154075, "grad_norm": 1.610642671585083, "learning_rate": 1.8664534848544203e-05, "loss": 0.545, "step": 17473 }, { "epoch": 2.852536631157912, "grad_norm": 1.9735251665115356, "learning_rate": 1.8664376413407798e-05, "loss": 0.6371, "step": 17474 }, { "epoch": 2.8526998898004163, "grad_norm": 1.6927485466003418, "learning_rate": 1.8664217969546366e-05, "loss": 0.5617, "step": 17475 }, { "epoch": 2.8528631484429208, "grad_norm": 1.5354328155517578, "learning_rate": 1.866405951696007e-05, "loss": 0.4784, "step": 17476 }, { "epoch": 2.853026407085425, "grad_norm": 1.9953933954238892, "learning_rate": 1.8663901055649067e-05, "loss": 0.6408, "step": 17477 }, { "epoch": 2.853189665727929, "grad_norm": 1.8826286792755127, "learning_rate": 1.866374258561352e-05, "loss": 0.6654, "step": 17478 }, { "epoch": 2.8533529243704336, "grad_norm": 1.6042603254318237, "learning_rate": 1.8663584106853584e-05, "loss": 0.5381, "step": 17479 }, { "epoch": 2.853516183012938, "grad_norm": 1.732203722000122, "learning_rate": 1.8663425619369424e-05, "loss": 0.5959, "step": 17480 }, { "epoch": 2.8536794416554425, "grad_norm": 1.9894624948501587, "learning_rate": 1.8663267123161194e-05, "loss": 0.513, "step": 17481 }, { "epoch": 2.853842700297947, "grad_norm": 1.67531418800354, "learning_rate": 1.866310861822906e-05, "loss": 0.5622, "step": 17482 }, { "epoch": 2.8540059589404514, "grad_norm": 1.5791893005371094, "learning_rate": 1.8662950104573174e-05, "loss": 0.4944, "step": 17483 }, { "epoch": 2.854169217582956, "grad_norm": 2.155879020690918, "learning_rate": 1.86627915821937e-05, "loss": 0.6619, "step": 17484 }, { "epoch": 2.8543324762254603, "grad_norm": 1.9815895557403564, "learning_rate": 1.8662633051090798e-05, "loss": 0.6129, "step": 17485 }, { "epoch": 2.8544957348679647, "grad_norm": 1.4639344215393066, "learning_rate": 1.8662474511264627e-05, "loss": 0.4944, "step": 17486 }, { "epoch": 2.854658993510469, "grad_norm": 1.8373457193374634, "learning_rate": 1.8662315962715347e-05, "loss": 0.6026, "step": 17487 }, { "epoch": 2.8548222521529736, "grad_norm": 1.7456088066101074, "learning_rate": 1.8662157405443116e-05, "loss": 0.5829, "step": 17488 }, { "epoch": 2.8549855107954776, "grad_norm": 1.827806830406189, "learning_rate": 1.8661998839448096e-05, "loss": 0.5329, "step": 17489 }, { "epoch": 2.855148769437982, "grad_norm": 1.616477370262146, "learning_rate": 1.8661840264730445e-05, "loss": 0.5581, "step": 17490 }, { "epoch": 2.8553120280804865, "grad_norm": 1.620615005493164, "learning_rate": 1.8661681681290323e-05, "loss": 0.5168, "step": 17491 }, { "epoch": 2.855475286722991, "grad_norm": 1.754102110862732, "learning_rate": 1.866152308912789e-05, "loss": 0.5461, "step": 17492 }, { "epoch": 2.8556385453654953, "grad_norm": 1.7308567762374878, "learning_rate": 1.8661364488243305e-05, "loss": 0.5739, "step": 17493 }, { "epoch": 2.8558018040079998, "grad_norm": 1.6517187356948853, "learning_rate": 1.8661205878636726e-05, "loss": 0.4752, "step": 17494 }, { "epoch": 2.855965062650504, "grad_norm": 1.7421560287475586, "learning_rate": 1.866104726030832e-05, "loss": 0.5761, "step": 17495 }, { "epoch": 2.856128321293008, "grad_norm": 1.6477164030075073, "learning_rate": 1.8660888633258237e-05, "loss": 0.5157, "step": 17496 }, { "epoch": 2.8562915799355126, "grad_norm": 2.123060941696167, "learning_rate": 1.8660729997486648e-05, "loss": 0.6828, "step": 17497 }, { "epoch": 2.856454838578017, "grad_norm": 1.7502100467681885, "learning_rate": 1.86605713529937e-05, "loss": 0.6177, "step": 17498 }, { "epoch": 2.8566180972205215, "grad_norm": 1.6819252967834473, "learning_rate": 1.866041269977956e-05, "loss": 0.6148, "step": 17499 }, { "epoch": 2.856781355863026, "grad_norm": 1.6932228803634644, "learning_rate": 1.866025403784439e-05, "loss": 0.3907, "step": 17500 }, { "epoch": 2.8569446145055304, "grad_norm": 1.842013955116272, "learning_rate": 1.8660095367188343e-05, "loss": 0.5749, "step": 17501 }, { "epoch": 2.857107873148035, "grad_norm": 2.0424630641937256, "learning_rate": 1.8659936687811583e-05, "loss": 0.6381, "step": 17502 }, { "epoch": 2.8572711317905393, "grad_norm": 1.8658137321472168, "learning_rate": 1.865977799971427e-05, "loss": 0.582, "step": 17503 }, { "epoch": 2.8574343904330437, "grad_norm": 1.716103434562683, "learning_rate": 1.865961930289656e-05, "loss": 0.5881, "step": 17504 }, { "epoch": 2.857597649075548, "grad_norm": 1.4804948568344116, "learning_rate": 1.865946059735862e-05, "loss": 0.4856, "step": 17505 }, { "epoch": 2.8577609077180526, "grad_norm": 1.758158564567566, "learning_rate": 1.86593018831006e-05, "loss": 0.5857, "step": 17506 }, { "epoch": 2.8579241663605566, "grad_norm": 1.6069107055664062, "learning_rate": 1.865914316012267e-05, "loss": 0.6033, "step": 17507 }, { "epoch": 2.858087425003061, "grad_norm": 1.6949272155761719, "learning_rate": 1.8658984428424984e-05, "loss": 0.4893, "step": 17508 }, { "epoch": 2.8582506836455654, "grad_norm": 1.8007099628448486, "learning_rate": 1.8658825688007702e-05, "loss": 0.6568, "step": 17509 }, { "epoch": 2.85841394228807, "grad_norm": 1.657939076423645, "learning_rate": 1.8658666938870985e-05, "loss": 0.5483, "step": 17510 }, { "epoch": 2.8585772009305743, "grad_norm": 1.9038469791412354, "learning_rate": 1.8658508181014996e-05, "loss": 0.618, "step": 17511 }, { "epoch": 2.8587404595730788, "grad_norm": 1.7034354209899902, "learning_rate": 1.8658349414439887e-05, "loss": 0.5781, "step": 17512 }, { "epoch": 2.858903718215583, "grad_norm": 1.9488487243652344, "learning_rate": 1.8658190639145827e-05, "loss": 0.5316, "step": 17513 }, { "epoch": 2.859066976858087, "grad_norm": 2.0149948596954346, "learning_rate": 1.8658031855132965e-05, "loss": 0.5645, "step": 17514 }, { "epoch": 2.8592302355005916, "grad_norm": 1.8149365186691284, "learning_rate": 1.8657873062401472e-05, "loss": 0.5878, "step": 17515 }, { "epoch": 2.859393494143096, "grad_norm": 1.7306607961654663, "learning_rate": 1.8657714260951502e-05, "loss": 0.608, "step": 17516 }, { "epoch": 2.8595567527856005, "grad_norm": 1.5607361793518066, "learning_rate": 1.865755545078322e-05, "loss": 0.531, "step": 17517 }, { "epoch": 2.859720011428105, "grad_norm": 1.9952592849731445, "learning_rate": 1.8657396631896775e-05, "loss": 0.5939, "step": 17518 }, { "epoch": 2.8598832700706094, "grad_norm": 1.93032705783844, "learning_rate": 1.8657237804292335e-05, "loss": 0.6348, "step": 17519 }, { "epoch": 2.860046528713114, "grad_norm": 1.628165602684021, "learning_rate": 1.8657078967970063e-05, "loss": 0.5479, "step": 17520 }, { "epoch": 2.8602097873556183, "grad_norm": 1.8972247838974, "learning_rate": 1.865692012293011e-05, "loss": 0.6577, "step": 17521 }, { "epoch": 2.8603730459981227, "grad_norm": 1.8272473812103271, "learning_rate": 1.8656761269172645e-05, "loss": 0.6382, "step": 17522 }, { "epoch": 2.860536304640627, "grad_norm": 1.6367708444595337, "learning_rate": 1.865660240669782e-05, "loss": 0.5295, "step": 17523 }, { "epoch": 2.8606995632831316, "grad_norm": 1.4242279529571533, "learning_rate": 1.86564435355058e-05, "loss": 0.4989, "step": 17524 }, { "epoch": 2.8608628219256356, "grad_norm": 1.5849908590316772, "learning_rate": 1.8656284655596744e-05, "loss": 0.5418, "step": 17525 }, { "epoch": 2.86102608056814, "grad_norm": 1.596906065940857, "learning_rate": 1.8656125766970815e-05, "loss": 0.4781, "step": 17526 }, { "epoch": 2.8611893392106444, "grad_norm": 1.9862418174743652, "learning_rate": 1.8655966869628167e-05, "loss": 0.6936, "step": 17527 }, { "epoch": 2.861352597853149, "grad_norm": 1.6952341794967651, "learning_rate": 1.865580796356896e-05, "loss": 0.5362, "step": 17528 }, { "epoch": 2.8615158564956533, "grad_norm": 1.8117387294769287, "learning_rate": 1.865564904879336e-05, "loss": 0.6141, "step": 17529 }, { "epoch": 2.8616791151381578, "grad_norm": 1.7546583414077759, "learning_rate": 1.8655490125301523e-05, "loss": 0.5842, "step": 17530 }, { "epoch": 2.8618423737806618, "grad_norm": 1.698388934135437, "learning_rate": 1.8655331193093607e-05, "loss": 0.524, "step": 17531 }, { "epoch": 2.862005632423166, "grad_norm": 1.8024442195892334, "learning_rate": 1.8655172252169776e-05, "loss": 0.657, "step": 17532 }, { "epoch": 2.8621688910656706, "grad_norm": 1.6800333261489868, "learning_rate": 1.8655013302530193e-05, "loss": 0.5904, "step": 17533 }, { "epoch": 2.862332149708175, "grad_norm": 1.5582960844039917, "learning_rate": 1.865485434417501e-05, "loss": 0.5344, "step": 17534 }, { "epoch": 2.8624954083506795, "grad_norm": 1.8022692203521729, "learning_rate": 1.865469537710439e-05, "loss": 0.6043, "step": 17535 }, { "epoch": 2.862658666993184, "grad_norm": 1.6592050790786743, "learning_rate": 1.8654536401318495e-05, "loss": 0.489, "step": 17536 }, { "epoch": 2.8628219256356884, "grad_norm": 1.4314780235290527, "learning_rate": 1.8654377416817486e-05, "loss": 0.4792, "step": 17537 }, { "epoch": 2.862985184278193, "grad_norm": 1.4500106573104858, "learning_rate": 1.865421842360152e-05, "loss": 0.4921, "step": 17538 }, { "epoch": 2.8631484429206973, "grad_norm": 1.9490898847579956, "learning_rate": 1.865405942167076e-05, "loss": 0.6414, "step": 17539 }, { "epoch": 2.8633117015632017, "grad_norm": 2.2474560737609863, "learning_rate": 1.8653900411025365e-05, "loss": 0.8462, "step": 17540 }, { "epoch": 2.863474960205706, "grad_norm": 1.816515564918518, "learning_rate": 1.865374139166549e-05, "loss": 0.5417, "step": 17541 }, { "epoch": 2.86363821884821, "grad_norm": 1.8097059726715088, "learning_rate": 1.8653582363591303e-05, "loss": 0.5553, "step": 17542 }, { "epoch": 2.8638014774907146, "grad_norm": 1.8746459484100342, "learning_rate": 1.865342332680296e-05, "loss": 0.6117, "step": 17543 }, { "epoch": 2.863964736133219, "grad_norm": 1.5990581512451172, "learning_rate": 1.8653264281300622e-05, "loss": 0.5709, "step": 17544 }, { "epoch": 2.8641279947757234, "grad_norm": 1.3300626277923584, "learning_rate": 1.8653105227084447e-05, "loss": 0.4405, "step": 17545 }, { "epoch": 2.864291253418228, "grad_norm": 1.8248804807662964, "learning_rate": 1.86529461641546e-05, "loss": 0.5835, "step": 17546 }, { "epoch": 2.8644545120607323, "grad_norm": 1.4382096529006958, "learning_rate": 1.865278709251124e-05, "loss": 0.4535, "step": 17547 }, { "epoch": 2.8646177707032368, "grad_norm": 1.7208261489868164, "learning_rate": 1.8652628012154525e-05, "loss": 0.6345, "step": 17548 }, { "epoch": 2.8647810293457407, "grad_norm": 1.6773741245269775, "learning_rate": 1.865246892308461e-05, "loss": 0.5144, "step": 17549 }, { "epoch": 2.864944287988245, "grad_norm": 1.7468316555023193, "learning_rate": 1.865230982530167e-05, "loss": 0.6554, "step": 17550 }, { "epoch": 2.8651075466307496, "grad_norm": 1.5242927074432373, "learning_rate": 1.865215071880585e-05, "loss": 0.4758, "step": 17551 }, { "epoch": 2.865270805273254, "grad_norm": 1.5083783864974976, "learning_rate": 1.865199160359732e-05, "loss": 0.526, "step": 17552 }, { "epoch": 2.8654340639157585, "grad_norm": 1.577842116355896, "learning_rate": 1.8651832479676236e-05, "loss": 0.5423, "step": 17553 }, { "epoch": 2.865597322558263, "grad_norm": 1.6997284889221191, "learning_rate": 1.8651673347042757e-05, "loss": 0.5465, "step": 17554 }, { "epoch": 2.8657605812007674, "grad_norm": 1.5696581602096558, "learning_rate": 1.8651514205697046e-05, "loss": 0.4627, "step": 17555 }, { "epoch": 2.865923839843272, "grad_norm": 1.7012516260147095, "learning_rate": 1.8651355055639263e-05, "loss": 0.4834, "step": 17556 }, { "epoch": 2.8660870984857763, "grad_norm": 1.5914578437805176, "learning_rate": 1.865119589686957e-05, "loss": 0.5102, "step": 17557 }, { "epoch": 2.8662503571282807, "grad_norm": 2.114042043685913, "learning_rate": 1.865103672938812e-05, "loss": 0.6213, "step": 17558 }, { "epoch": 2.866413615770785, "grad_norm": 2.059462785720825, "learning_rate": 1.8650877553195085e-05, "loss": 0.6161, "step": 17559 }, { "epoch": 2.866576874413289, "grad_norm": 1.84483003616333, "learning_rate": 1.8650718368290613e-05, "loss": 0.636, "step": 17560 }, { "epoch": 2.8667401330557936, "grad_norm": 2.01324462890625, "learning_rate": 1.865055917467487e-05, "loss": 0.5591, "step": 17561 }, { "epoch": 2.866903391698298, "grad_norm": 1.8647021055221558, "learning_rate": 1.865039997234802e-05, "loss": 0.5371, "step": 17562 }, { "epoch": 2.8670666503408024, "grad_norm": 1.8802692890167236, "learning_rate": 1.8650240761310217e-05, "loss": 0.6158, "step": 17563 }, { "epoch": 2.867229908983307, "grad_norm": 1.8047963380813599, "learning_rate": 1.8650081541561622e-05, "loss": 0.6092, "step": 17564 }, { "epoch": 2.8673931676258113, "grad_norm": 2.3199312686920166, "learning_rate": 1.86499223131024e-05, "loss": 0.7841, "step": 17565 }, { "epoch": 2.8675564262683153, "grad_norm": 1.8640300035476685, "learning_rate": 1.864976307593271e-05, "loss": 0.6003, "step": 17566 }, { "epoch": 2.8677196849108197, "grad_norm": 1.6161607503890991, "learning_rate": 1.8649603830052708e-05, "loss": 0.4905, "step": 17567 }, { "epoch": 2.867882943553324, "grad_norm": 1.6727256774902344, "learning_rate": 1.8649444575462558e-05, "loss": 0.4968, "step": 17568 }, { "epoch": 2.8680462021958286, "grad_norm": 1.6471205949783325, "learning_rate": 1.864928531216242e-05, "loss": 0.4968, "step": 17569 }, { "epoch": 2.868209460838333, "grad_norm": 1.935549259185791, "learning_rate": 1.8649126040152454e-05, "loss": 0.6276, "step": 17570 }, { "epoch": 2.8683727194808375, "grad_norm": 1.6889914274215698, "learning_rate": 1.8648966759432818e-05, "loss": 0.5076, "step": 17571 }, { "epoch": 2.868535978123342, "grad_norm": 1.7923798561096191, "learning_rate": 1.8648807470003677e-05, "loss": 0.5293, "step": 17572 }, { "epoch": 2.8686992367658464, "grad_norm": 2.0820131301879883, "learning_rate": 1.8648648171865188e-05, "loss": 0.5377, "step": 17573 }, { "epoch": 2.868862495408351, "grad_norm": 1.7815223932266235, "learning_rate": 1.8648488865017517e-05, "loss": 0.6062, "step": 17574 }, { "epoch": 2.8690257540508552, "grad_norm": 2.004448652267456, "learning_rate": 1.864832954946081e-05, "loss": 0.5053, "step": 17575 }, { "epoch": 2.8691890126933597, "grad_norm": 1.7800742387771606, "learning_rate": 1.864817022519525e-05, "loss": 0.6172, "step": 17576 }, { "epoch": 2.8693522713358637, "grad_norm": 1.7194077968597412, "learning_rate": 1.864801089222098e-05, "loss": 0.5352, "step": 17577 }, { "epoch": 2.869515529978368, "grad_norm": 1.6917822360992432, "learning_rate": 1.8647851550538162e-05, "loss": 0.6193, "step": 17578 }, { "epoch": 2.8696787886208726, "grad_norm": 1.6163334846496582, "learning_rate": 1.8647692200146963e-05, "loss": 0.5251, "step": 17579 }, { "epoch": 2.869842047263377, "grad_norm": 1.7542035579681396, "learning_rate": 1.8647532841047536e-05, "loss": 0.556, "step": 17580 }, { "epoch": 2.8700053059058814, "grad_norm": 1.7970563173294067, "learning_rate": 1.8647373473240052e-05, "loss": 0.5309, "step": 17581 }, { "epoch": 2.870168564548386, "grad_norm": 1.4615005254745483, "learning_rate": 1.8647214096724662e-05, "loss": 0.4686, "step": 17582 }, { "epoch": 2.8703318231908903, "grad_norm": 1.7775514125823975, "learning_rate": 1.8647054711501527e-05, "loss": 0.5241, "step": 17583 }, { "epoch": 2.8704950818333943, "grad_norm": 1.611187219619751, "learning_rate": 1.8646895317570815e-05, "loss": 0.5597, "step": 17584 }, { "epoch": 2.8706583404758987, "grad_norm": 1.8891314268112183, "learning_rate": 1.864673591493268e-05, "loss": 0.6259, "step": 17585 }, { "epoch": 2.870821599118403, "grad_norm": 1.3723366260528564, "learning_rate": 1.8646576503587284e-05, "loss": 0.5028, "step": 17586 }, { "epoch": 2.8709848577609076, "grad_norm": 1.7788639068603516, "learning_rate": 1.8646417083534787e-05, "loss": 0.5962, "step": 17587 }, { "epoch": 2.871148116403412, "grad_norm": 1.9227019548416138, "learning_rate": 1.864625765477535e-05, "loss": 0.6189, "step": 17588 }, { "epoch": 2.8713113750459165, "grad_norm": 1.703065276145935, "learning_rate": 1.8646098217309137e-05, "loss": 0.4898, "step": 17589 }, { "epoch": 2.871474633688421, "grad_norm": 1.681036353111267, "learning_rate": 1.8645938771136303e-05, "loss": 0.5607, "step": 17590 }, { "epoch": 2.8716378923309254, "grad_norm": 1.6883313655853271, "learning_rate": 1.864577931625701e-05, "loss": 0.5561, "step": 17591 }, { "epoch": 2.87180115097343, "grad_norm": 1.5422677993774414, "learning_rate": 1.864561985267142e-05, "loss": 0.4847, "step": 17592 }, { "epoch": 2.8719644096159342, "grad_norm": 1.4655989408493042, "learning_rate": 1.8645460380379695e-05, "loss": 0.4964, "step": 17593 }, { "epoch": 2.8721276682584387, "grad_norm": 1.9326354265213013, "learning_rate": 1.864530089938199e-05, "loss": 0.6187, "step": 17594 }, { "epoch": 2.8722909269009427, "grad_norm": 1.7947176694869995, "learning_rate": 1.8645141409678477e-05, "loss": 0.4636, "step": 17595 }, { "epoch": 2.872454185543447, "grad_norm": 1.9411805868148804, "learning_rate": 1.8644981911269303e-05, "loss": 0.6111, "step": 17596 }, { "epoch": 2.8726174441859516, "grad_norm": 1.568211555480957, "learning_rate": 1.8644822404154634e-05, "loss": 0.4971, "step": 17597 }, { "epoch": 2.872780702828456, "grad_norm": 2.163645029067993, "learning_rate": 1.8644662888334634e-05, "loss": 0.7066, "step": 17598 }, { "epoch": 2.8729439614709604, "grad_norm": 1.703851580619812, "learning_rate": 1.8644503363809456e-05, "loss": 0.5285, "step": 17599 }, { "epoch": 2.873107220113465, "grad_norm": 2.012359857559204, "learning_rate": 1.864434383057927e-05, "loss": 0.6198, "step": 17600 }, { "epoch": 2.8732704787559693, "grad_norm": 1.4841735363006592, "learning_rate": 1.864418428864423e-05, "loss": 0.5174, "step": 17601 }, { "epoch": 2.8734337373984733, "grad_norm": 1.7246414422988892, "learning_rate": 1.8644024738004498e-05, "loss": 0.5154, "step": 17602 }, { "epoch": 2.8735969960409777, "grad_norm": 2.029541254043579, "learning_rate": 1.8643865178660235e-05, "loss": 0.6633, "step": 17603 }, { "epoch": 2.873760254683482, "grad_norm": 1.8386707305908203, "learning_rate": 1.8643705610611607e-05, "loss": 0.5895, "step": 17604 }, { "epoch": 2.8739235133259866, "grad_norm": 1.473022699356079, "learning_rate": 1.8643546033858764e-05, "loss": 0.4513, "step": 17605 }, { "epoch": 2.874086771968491, "grad_norm": 1.6123994588851929, "learning_rate": 1.8643386448401878e-05, "loss": 0.4875, "step": 17606 }, { "epoch": 2.8742500306109955, "grad_norm": 2.146212100982666, "learning_rate": 1.8643226854241102e-05, "loss": 0.6185, "step": 17607 }, { "epoch": 2.8744132892535, "grad_norm": 1.9847946166992188, "learning_rate": 1.8643067251376598e-05, "loss": 0.645, "step": 17608 }, { "epoch": 2.8745765478960044, "grad_norm": 1.6543450355529785, "learning_rate": 1.8642907639808525e-05, "loss": 0.4809, "step": 17609 }, { "epoch": 2.874739806538509, "grad_norm": 1.9411708116531372, "learning_rate": 1.864274801953705e-05, "loss": 0.5918, "step": 17610 }, { "epoch": 2.8749030651810132, "grad_norm": 2.1923749446868896, "learning_rate": 1.8642588390562325e-05, "loss": 0.6027, "step": 17611 }, { "epoch": 2.8750663238235177, "grad_norm": 1.6383713483810425, "learning_rate": 1.864242875288452e-05, "loss": 0.5417, "step": 17612 }, { "epoch": 2.8752295824660217, "grad_norm": 1.5123146772384644, "learning_rate": 1.8642269106503793e-05, "loss": 0.4899, "step": 17613 }, { "epoch": 2.875392841108526, "grad_norm": 1.8014166355133057, "learning_rate": 1.86421094514203e-05, "loss": 0.545, "step": 17614 }, { "epoch": 2.8755560997510305, "grad_norm": 1.7251265048980713, "learning_rate": 1.864194978763421e-05, "loss": 0.5634, "step": 17615 }, { "epoch": 2.875719358393535, "grad_norm": 1.7460079193115234, "learning_rate": 1.8641790115145673e-05, "loss": 0.576, "step": 17616 }, { "epoch": 2.8758826170360394, "grad_norm": 2.12522554397583, "learning_rate": 1.8641630433954857e-05, "loss": 0.757, "step": 17617 }, { "epoch": 2.876045875678544, "grad_norm": 1.5534641742706299, "learning_rate": 1.8641470744061926e-05, "loss": 0.5338, "step": 17618 }, { "epoch": 2.876209134321048, "grad_norm": 1.3147077560424805, "learning_rate": 1.864131104546703e-05, "loss": 0.4378, "step": 17619 }, { "epoch": 2.8763723929635523, "grad_norm": 1.6701453924179077, "learning_rate": 1.864115133817034e-05, "loss": 0.6037, "step": 17620 }, { "epoch": 2.8765356516060567, "grad_norm": 1.8124932050704956, "learning_rate": 1.864099162217201e-05, "loss": 0.4974, "step": 17621 }, { "epoch": 2.876698910248561, "grad_norm": 2.1269047260284424, "learning_rate": 1.864083189747221e-05, "loss": 0.6311, "step": 17622 }, { "epoch": 2.8768621688910656, "grad_norm": 1.794930100440979, "learning_rate": 1.8640672164071088e-05, "loss": 0.5275, "step": 17623 }, { "epoch": 2.87702542753357, "grad_norm": 1.965524435043335, "learning_rate": 1.8640512421968814e-05, "loss": 0.7345, "step": 17624 }, { "epoch": 2.8771886861760745, "grad_norm": 1.849182367324829, "learning_rate": 1.8640352671165544e-05, "loss": 0.6587, "step": 17625 }, { "epoch": 2.877351944818579, "grad_norm": 1.9332880973815918, "learning_rate": 1.8640192911661442e-05, "loss": 0.6841, "step": 17626 }, { "epoch": 2.8775152034610834, "grad_norm": 2.4250407218933105, "learning_rate": 1.864003314345667e-05, "loss": 0.5202, "step": 17627 }, { "epoch": 2.877678462103588, "grad_norm": 1.6894888877868652, "learning_rate": 1.8639873366551384e-05, "loss": 0.4336, "step": 17628 }, { "epoch": 2.8778417207460922, "grad_norm": 2.165438175201416, "learning_rate": 1.8639713580945752e-05, "loss": 0.679, "step": 17629 }, { "epoch": 2.8780049793885962, "grad_norm": 1.720058560371399, "learning_rate": 1.8639553786639927e-05, "loss": 0.5485, "step": 17630 }, { "epoch": 2.8781682380311007, "grad_norm": 1.6554187536239624, "learning_rate": 1.8639393983634077e-05, "loss": 0.5957, "step": 17631 }, { "epoch": 2.878331496673605, "grad_norm": 1.7799054384231567, "learning_rate": 1.8639234171928355e-05, "loss": 0.5486, "step": 17632 }, { "epoch": 2.8784947553161095, "grad_norm": 1.8246937990188599, "learning_rate": 1.8639074351522927e-05, "loss": 0.6062, "step": 17633 }, { "epoch": 2.878658013958614, "grad_norm": 1.7002384662628174, "learning_rate": 1.8638914522417955e-05, "loss": 0.5042, "step": 17634 }, { "epoch": 2.8788212726011184, "grad_norm": 1.3789575099945068, "learning_rate": 1.86387546846136e-05, "loss": 0.4431, "step": 17635 }, { "epoch": 2.878984531243623, "grad_norm": 1.8110694885253906, "learning_rate": 1.8638594838110023e-05, "loss": 0.5452, "step": 17636 }, { "epoch": 2.879147789886127, "grad_norm": 2.0096914768218994, "learning_rate": 1.863843498290738e-05, "loss": 0.6245, "step": 17637 }, { "epoch": 2.8793110485286313, "grad_norm": 1.7623430490493774, "learning_rate": 1.8638275119005834e-05, "loss": 0.5282, "step": 17638 }, { "epoch": 2.8794743071711357, "grad_norm": 1.8232334852218628, "learning_rate": 1.8638115246405548e-05, "loss": 0.6149, "step": 17639 }, { "epoch": 2.87963756581364, "grad_norm": 1.647824764251709, "learning_rate": 1.8637955365106685e-05, "loss": 0.5711, "step": 17640 }, { "epoch": 2.8798008244561446, "grad_norm": 1.6714307069778442, "learning_rate": 1.86377954751094e-05, "loss": 0.5639, "step": 17641 }, { "epoch": 2.879964083098649, "grad_norm": 1.7891960144042969, "learning_rate": 1.8637635576413857e-05, "loss": 0.5967, "step": 17642 }, { "epoch": 2.8801273417411535, "grad_norm": 1.8879578113555908, "learning_rate": 1.863747566902022e-05, "loss": 0.5723, "step": 17643 }, { "epoch": 2.880290600383658, "grad_norm": 2.963064193725586, "learning_rate": 1.8637315752928647e-05, "loss": 0.5239, "step": 17644 }, { "epoch": 2.8804538590261624, "grad_norm": 2.192307233810425, "learning_rate": 1.8637155828139297e-05, "loss": 0.6599, "step": 17645 }, { "epoch": 2.880617117668667, "grad_norm": 1.437577247619629, "learning_rate": 1.8636995894652338e-05, "loss": 0.5162, "step": 17646 }, { "epoch": 2.8807803763111712, "grad_norm": 2.0516817569732666, "learning_rate": 1.863683595246792e-05, "loss": 0.5713, "step": 17647 }, { "epoch": 2.8809436349536752, "grad_norm": 1.9263041019439697, "learning_rate": 1.8636676001586217e-05, "loss": 0.6355, "step": 17648 }, { "epoch": 2.8811068935961797, "grad_norm": 1.6058074235916138, "learning_rate": 1.863651604200738e-05, "loss": 0.5382, "step": 17649 }, { "epoch": 2.881270152238684, "grad_norm": 1.447536587715149, "learning_rate": 1.863635607373157e-05, "loss": 0.4426, "step": 17650 }, { "epoch": 2.8814334108811885, "grad_norm": 1.8798907995224, "learning_rate": 1.8636196096758957e-05, "loss": 0.6299, "step": 17651 }, { "epoch": 2.881596669523693, "grad_norm": 2.0809919834136963, "learning_rate": 1.8636036111089697e-05, "loss": 0.5507, "step": 17652 }, { "epoch": 2.8817599281661974, "grad_norm": 1.619510293006897, "learning_rate": 1.863587611672395e-05, "loss": 0.5003, "step": 17653 }, { "epoch": 2.8819231868087014, "grad_norm": 1.9441224336624146, "learning_rate": 1.8635716113661876e-05, "loss": 0.6438, "step": 17654 }, { "epoch": 2.882086445451206, "grad_norm": 1.6985387802124023, "learning_rate": 1.863555610190364e-05, "loss": 0.6099, "step": 17655 }, { "epoch": 2.8822497040937103, "grad_norm": 1.517426609992981, "learning_rate": 1.86353960814494e-05, "loss": 0.5653, "step": 17656 }, { "epoch": 2.8824129627362147, "grad_norm": 1.7353768348693848, "learning_rate": 1.863523605229932e-05, "loss": 0.5119, "step": 17657 }, { "epoch": 2.882576221378719, "grad_norm": 1.882068395614624, "learning_rate": 1.863507601445356e-05, "loss": 0.5962, "step": 17658 }, { "epoch": 2.8827394800212236, "grad_norm": 1.4893999099731445, "learning_rate": 1.8634915967912278e-05, "loss": 0.509, "step": 17659 }, { "epoch": 2.882902738663728, "grad_norm": 1.578779935836792, "learning_rate": 1.863475591267564e-05, "loss": 0.4792, "step": 17660 }, { "epoch": 2.8830659973062325, "grad_norm": 1.6673457622528076, "learning_rate": 1.8634595848743804e-05, "loss": 0.5812, "step": 17661 }, { "epoch": 2.883229255948737, "grad_norm": 1.8076609373092651, "learning_rate": 1.8634435776116935e-05, "loss": 0.5854, "step": 17662 }, { "epoch": 2.8833925145912414, "grad_norm": 1.9446676969528198, "learning_rate": 1.863427569479519e-05, "loss": 0.5839, "step": 17663 }, { "epoch": 2.883555773233746, "grad_norm": 1.836538314819336, "learning_rate": 1.863411560477873e-05, "loss": 0.4936, "step": 17664 }, { "epoch": 2.88371903187625, "grad_norm": 1.7473241090774536, "learning_rate": 1.8633955506067717e-05, "loss": 0.5428, "step": 17665 }, { "epoch": 2.883882290518754, "grad_norm": 1.9030282497406006, "learning_rate": 1.8633795398662315e-05, "loss": 0.5532, "step": 17666 }, { "epoch": 2.8840455491612587, "grad_norm": 1.853783369064331, "learning_rate": 1.8633635282562683e-05, "loss": 0.5741, "step": 17667 }, { "epoch": 2.884208807803763, "grad_norm": 1.7717732191085815, "learning_rate": 1.8633475157768986e-05, "loss": 0.5668, "step": 17668 }, { "epoch": 2.8843720664462675, "grad_norm": 1.691122055053711, "learning_rate": 1.8633315024281376e-05, "loss": 0.5644, "step": 17669 }, { "epoch": 2.884535325088772, "grad_norm": 1.674294352531433, "learning_rate": 1.8633154882100024e-05, "loss": 0.5883, "step": 17670 }, { "epoch": 2.8846985837312764, "grad_norm": 1.6296192407608032, "learning_rate": 1.8632994731225084e-05, "loss": 0.5413, "step": 17671 }, { "epoch": 2.8848618423737804, "grad_norm": 1.7110458612442017, "learning_rate": 1.8632834571656727e-05, "loss": 0.5481, "step": 17672 }, { "epoch": 2.885025101016285, "grad_norm": 1.889492392539978, "learning_rate": 1.8632674403395102e-05, "loss": 0.5384, "step": 17673 }, { "epoch": 2.8851883596587893, "grad_norm": 1.556234359741211, "learning_rate": 1.8632514226440377e-05, "loss": 0.5246, "step": 17674 }, { "epoch": 2.8853516183012937, "grad_norm": 1.763159155845642, "learning_rate": 1.8632354040792716e-05, "loss": 0.5985, "step": 17675 }, { "epoch": 2.885514876943798, "grad_norm": 1.8138935565948486, "learning_rate": 1.863219384645227e-05, "loss": 0.5705, "step": 17676 }, { "epoch": 2.8856781355863026, "grad_norm": 1.7421720027923584, "learning_rate": 1.863203364341921e-05, "loss": 0.5802, "step": 17677 }, { "epoch": 2.885841394228807, "grad_norm": 1.678167462348938, "learning_rate": 1.86318734316937e-05, "loss": 0.4673, "step": 17678 }, { "epoch": 2.8860046528713115, "grad_norm": 1.6231141090393066, "learning_rate": 1.863171321127589e-05, "loss": 0.5437, "step": 17679 }, { "epoch": 2.886167911513816, "grad_norm": 1.7522189617156982, "learning_rate": 1.8631552982165946e-05, "loss": 0.4901, "step": 17680 }, { "epoch": 2.8863311701563203, "grad_norm": 1.8749809265136719, "learning_rate": 1.8631392744364034e-05, "loss": 0.5836, "step": 17681 }, { "epoch": 2.886494428798825, "grad_norm": 1.7159922122955322, "learning_rate": 1.863123249787031e-05, "loss": 0.5992, "step": 17682 }, { "epoch": 2.886657687441329, "grad_norm": 1.900062918663025, "learning_rate": 1.863107224268494e-05, "loss": 0.5491, "step": 17683 }, { "epoch": 2.886820946083833, "grad_norm": 1.775831699371338, "learning_rate": 1.863091197880808e-05, "loss": 0.5851, "step": 17684 }, { "epoch": 2.8869842047263377, "grad_norm": 1.7001153230667114, "learning_rate": 1.8630751706239894e-05, "loss": 0.6041, "step": 17685 }, { "epoch": 2.887147463368842, "grad_norm": 1.7428109645843506, "learning_rate": 1.8630591424980542e-05, "loss": 0.5864, "step": 17686 }, { "epoch": 2.8873107220113465, "grad_norm": 1.6521086692810059, "learning_rate": 1.863043113503019e-05, "loss": 0.5472, "step": 17687 }, { "epoch": 2.887473980653851, "grad_norm": 1.3973227739334106, "learning_rate": 1.8630270836388994e-05, "loss": 0.505, "step": 17688 }, { "epoch": 2.8876372392963554, "grad_norm": 1.7875254154205322, "learning_rate": 1.8630110529057118e-05, "loss": 0.605, "step": 17689 }, { "epoch": 2.8878004979388594, "grad_norm": 1.7325220108032227, "learning_rate": 1.862995021303472e-05, "loss": 0.5034, "step": 17690 }, { "epoch": 2.887963756581364, "grad_norm": 1.377974033355713, "learning_rate": 1.862978988832197e-05, "loss": 0.4334, "step": 17691 }, { "epoch": 2.8881270152238683, "grad_norm": 1.6927976608276367, "learning_rate": 1.8629629554919024e-05, "loss": 0.5382, "step": 17692 }, { "epoch": 2.8882902738663727, "grad_norm": 1.6076256036758423, "learning_rate": 1.8629469212826037e-05, "loss": 0.5347, "step": 17693 }, { "epoch": 2.888453532508877, "grad_norm": 1.9172288179397583, "learning_rate": 1.862930886204318e-05, "loss": 0.5881, "step": 17694 }, { "epoch": 2.8886167911513816, "grad_norm": 1.923653483390808, "learning_rate": 1.8629148502570613e-05, "loss": 0.4944, "step": 17695 }, { "epoch": 2.888780049793886, "grad_norm": 1.6875557899475098, "learning_rate": 1.8628988134408494e-05, "loss": 0.5376, "step": 17696 }, { "epoch": 2.8889433084363905, "grad_norm": 1.9606821537017822, "learning_rate": 1.8628827757556985e-05, "loss": 0.5411, "step": 17697 }, { "epoch": 2.889106567078895, "grad_norm": 2.1482462882995605, "learning_rate": 1.862866737201625e-05, "loss": 0.597, "step": 17698 }, { "epoch": 2.8892698257213993, "grad_norm": 2.1258907318115234, "learning_rate": 1.862850697778645e-05, "loss": 0.6809, "step": 17699 }, { "epoch": 2.889433084363904, "grad_norm": 2.0534894466400146, "learning_rate": 1.8628346574867748e-05, "loss": 0.6572, "step": 17700 }, { "epoch": 2.8895963430064078, "grad_norm": 1.606369972229004, "learning_rate": 1.86281861632603e-05, "loss": 0.5138, "step": 17701 }, { "epoch": 2.889759601648912, "grad_norm": 1.9984064102172852, "learning_rate": 1.862802574296427e-05, "loss": 0.6654, "step": 17702 }, { "epoch": 2.8899228602914167, "grad_norm": 2.041107416152954, "learning_rate": 1.8627865313979822e-05, "loss": 0.56, "step": 17703 }, { "epoch": 2.890086118933921, "grad_norm": 1.9027751684188843, "learning_rate": 1.8627704876307116e-05, "loss": 0.5438, "step": 17704 }, { "epoch": 2.8902493775764255, "grad_norm": 1.8078527450561523, "learning_rate": 1.8627544429946312e-05, "loss": 0.5399, "step": 17705 }, { "epoch": 2.89041263621893, "grad_norm": 1.8444437980651855, "learning_rate": 1.8627383974897576e-05, "loss": 0.6477, "step": 17706 }, { "epoch": 2.890575894861434, "grad_norm": 1.5590691566467285, "learning_rate": 1.8627223511161063e-05, "loss": 0.5031, "step": 17707 }, { "epoch": 2.8907391535039384, "grad_norm": 1.8037335872650146, "learning_rate": 1.862706303873694e-05, "loss": 0.6616, "step": 17708 }, { "epoch": 2.890902412146443, "grad_norm": 2.1408469676971436, "learning_rate": 1.862690255762537e-05, "loss": 0.6798, "step": 17709 }, { "epoch": 2.8910656707889473, "grad_norm": 1.9356060028076172, "learning_rate": 1.8626742067826506e-05, "loss": 0.602, "step": 17710 }, { "epoch": 2.8912289294314517, "grad_norm": 1.5979702472686768, "learning_rate": 1.8626581569340517e-05, "loss": 0.5092, "step": 17711 }, { "epoch": 2.891392188073956, "grad_norm": 1.755308747291565, "learning_rate": 1.8626421062167562e-05, "loss": 0.5769, "step": 17712 }, { "epoch": 2.8915554467164606, "grad_norm": 4.770992279052734, "learning_rate": 1.8626260546307806e-05, "loss": 0.569, "step": 17713 }, { "epoch": 2.891718705358965, "grad_norm": 1.9905551671981812, "learning_rate": 1.8626100021761406e-05, "loss": 0.6763, "step": 17714 }, { "epoch": 2.8918819640014695, "grad_norm": 1.5221818685531616, "learning_rate": 1.8625939488528523e-05, "loss": 0.6304, "step": 17715 }, { "epoch": 2.892045222643974, "grad_norm": 1.5903034210205078, "learning_rate": 1.8625778946609324e-05, "loss": 0.5203, "step": 17716 }, { "epoch": 2.8922084812864783, "grad_norm": 1.6642882823944092, "learning_rate": 1.8625618396003972e-05, "loss": 0.4773, "step": 17717 }, { "epoch": 2.8923717399289823, "grad_norm": 1.7729309797286987, "learning_rate": 1.862545783671262e-05, "loss": 0.5629, "step": 17718 }, { "epoch": 2.8925349985714868, "grad_norm": 2.0539376735687256, "learning_rate": 1.8625297268735434e-05, "loss": 0.6527, "step": 17719 }, { "epoch": 2.892698257213991, "grad_norm": 1.753027081489563, "learning_rate": 1.8625136692072577e-05, "loss": 0.6177, "step": 17720 }, { "epoch": 2.8928615158564956, "grad_norm": 2.245826244354248, "learning_rate": 1.862497610672421e-05, "loss": 0.6956, "step": 17721 }, { "epoch": 2.893024774499, "grad_norm": 1.6018072366714478, "learning_rate": 1.8624815512690492e-05, "loss": 0.5798, "step": 17722 }, { "epoch": 2.8931880331415045, "grad_norm": 1.6475608348846436, "learning_rate": 1.862465490997159e-05, "loss": 0.5919, "step": 17723 }, { "epoch": 2.893351291784009, "grad_norm": 1.9945629835128784, "learning_rate": 1.862449429856766e-05, "loss": 0.6015, "step": 17724 }, { "epoch": 2.893514550426513, "grad_norm": 1.9120267629623413, "learning_rate": 1.8624333678478867e-05, "loss": 0.5391, "step": 17725 }, { "epoch": 2.8936778090690174, "grad_norm": 1.7489694356918335, "learning_rate": 1.8624173049705374e-05, "loss": 0.5834, "step": 17726 }, { "epoch": 2.893841067711522, "grad_norm": 1.737308144569397, "learning_rate": 1.862401241224734e-05, "loss": 0.4917, "step": 17727 }, { "epoch": 2.8940043263540263, "grad_norm": 1.523258924484253, "learning_rate": 1.8623851766104928e-05, "loss": 0.4855, "step": 17728 }, { "epoch": 2.8941675849965307, "grad_norm": 1.5458741188049316, "learning_rate": 1.86236911112783e-05, "loss": 0.5464, "step": 17729 }, { "epoch": 2.894330843639035, "grad_norm": 1.9457720518112183, "learning_rate": 1.8623530447767617e-05, "loss": 0.5481, "step": 17730 }, { "epoch": 2.8944941022815396, "grad_norm": 1.8386073112487793, "learning_rate": 1.862336977557304e-05, "loss": 0.569, "step": 17731 }, { "epoch": 2.894657360924044, "grad_norm": 1.4955552816390991, "learning_rate": 1.8623209094694736e-05, "loss": 0.4671, "step": 17732 }, { "epoch": 2.8948206195665485, "grad_norm": 2.082251787185669, "learning_rate": 1.8623048405132856e-05, "loss": 0.5329, "step": 17733 }, { "epoch": 2.894983878209053, "grad_norm": 1.9564498662948608, "learning_rate": 1.8622887706887574e-05, "loss": 0.6455, "step": 17734 }, { "epoch": 2.8951471368515573, "grad_norm": 1.6800943613052368, "learning_rate": 1.8622726999959045e-05, "loss": 0.5891, "step": 17735 }, { "epoch": 2.8953103954940613, "grad_norm": 1.9633570909500122, "learning_rate": 1.8622566284347433e-05, "loss": 0.5795, "step": 17736 }, { "epoch": 2.8954736541365658, "grad_norm": 1.5250705480575562, "learning_rate": 1.8622405560052896e-05, "loss": 0.4851, "step": 17737 }, { "epoch": 2.89563691277907, "grad_norm": 1.9209930896759033, "learning_rate": 1.86222448270756e-05, "loss": 0.6167, "step": 17738 }, { "epoch": 2.8958001714215746, "grad_norm": 1.7274808883666992, "learning_rate": 1.862208408541571e-05, "loss": 0.5873, "step": 17739 }, { "epoch": 2.895963430064079, "grad_norm": 1.9656296968460083, "learning_rate": 1.8621923335073378e-05, "loss": 0.5861, "step": 17740 }, { "epoch": 2.8961266887065835, "grad_norm": 1.5797642469406128, "learning_rate": 1.8621762576048775e-05, "loss": 0.6379, "step": 17741 }, { "epoch": 2.896289947349088, "grad_norm": 2.1134722232818604, "learning_rate": 1.862160180834206e-05, "loss": 0.7104, "step": 17742 }, { "epoch": 2.896453205991592, "grad_norm": 1.6758716106414795, "learning_rate": 1.862144103195339e-05, "loss": 0.5438, "step": 17743 }, { "epoch": 2.8966164646340964, "grad_norm": 2.0017921924591064, "learning_rate": 1.862128024688293e-05, "loss": 0.6798, "step": 17744 }, { "epoch": 2.896779723276601, "grad_norm": 1.6614614725112915, "learning_rate": 1.862111945313085e-05, "loss": 0.5051, "step": 17745 }, { "epoch": 2.8969429819191053, "grad_norm": 1.5223561525344849, "learning_rate": 1.8620958650697302e-05, "loss": 0.504, "step": 17746 }, { "epoch": 2.8971062405616097, "grad_norm": 1.59871244430542, "learning_rate": 1.8620797839582447e-05, "loss": 0.5238, "step": 17747 }, { "epoch": 2.897269499204114, "grad_norm": 1.645944356918335, "learning_rate": 1.8620637019786456e-05, "loss": 0.5571, "step": 17748 }, { "epoch": 2.8974327578466186, "grad_norm": 1.8247694969177246, "learning_rate": 1.8620476191309486e-05, "loss": 0.6611, "step": 17749 }, { "epoch": 2.897596016489123, "grad_norm": 1.7871205806732178, "learning_rate": 1.8620315354151695e-05, "loss": 0.473, "step": 17750 }, { "epoch": 2.8977592751316275, "grad_norm": 1.4744175672531128, "learning_rate": 1.862015450831325e-05, "loss": 0.4579, "step": 17751 }, { "epoch": 2.897922533774132, "grad_norm": 1.8065730333328247, "learning_rate": 1.8619993653794312e-05, "loss": 0.5721, "step": 17752 }, { "epoch": 2.8980857924166363, "grad_norm": 2.013733386993408, "learning_rate": 1.8619832790595045e-05, "loss": 0.5852, "step": 17753 }, { "epoch": 2.8982490510591403, "grad_norm": 1.8639310598373413, "learning_rate": 1.8619671918715605e-05, "loss": 0.6922, "step": 17754 }, { "epoch": 2.8984123097016448, "grad_norm": 1.576262354850769, "learning_rate": 1.8619511038156158e-05, "loss": 0.5014, "step": 17755 }, { "epoch": 2.898575568344149, "grad_norm": 1.6058295965194702, "learning_rate": 1.861935014891687e-05, "loss": 0.6141, "step": 17756 }, { "epoch": 2.8987388269866536, "grad_norm": 1.9900966882705688, "learning_rate": 1.8619189250997895e-05, "loss": 0.6287, "step": 17757 }, { "epoch": 2.898902085629158, "grad_norm": 1.9293241500854492, "learning_rate": 1.8619028344399397e-05, "loss": 0.5906, "step": 17758 }, { "epoch": 2.8990653442716625, "grad_norm": 1.4846386909484863, "learning_rate": 1.8618867429121543e-05, "loss": 0.4792, "step": 17759 }, { "epoch": 2.8992286029141665, "grad_norm": 1.2799679040908813, "learning_rate": 1.861870650516449e-05, "loss": 0.4613, "step": 17760 }, { "epoch": 2.899391861556671, "grad_norm": 1.4729598760604858, "learning_rate": 1.8618545572528405e-05, "loss": 0.541, "step": 17761 }, { "epoch": 2.8995551201991754, "grad_norm": 1.5994806289672852, "learning_rate": 1.8618384631213446e-05, "loss": 0.5127, "step": 17762 }, { "epoch": 2.89971837884168, "grad_norm": 1.960179090499878, "learning_rate": 1.861822368121977e-05, "loss": 0.629, "step": 17763 }, { "epoch": 2.8998816374841843, "grad_norm": 2.0977401733398438, "learning_rate": 1.861806272254755e-05, "loss": 0.5883, "step": 17764 }, { "epoch": 2.9000448961266887, "grad_norm": 1.8194873332977295, "learning_rate": 1.8617901755196947e-05, "loss": 0.5983, "step": 17765 }, { "epoch": 2.900208154769193, "grad_norm": 1.57864511013031, "learning_rate": 1.8617740779168114e-05, "loss": 0.5437, "step": 17766 }, { "epoch": 2.9003714134116976, "grad_norm": 1.9332852363586426, "learning_rate": 1.861757979446122e-05, "loss": 0.7716, "step": 17767 }, { "epoch": 2.900534672054202, "grad_norm": 1.50779128074646, "learning_rate": 1.8617418801076424e-05, "loss": 0.4807, "step": 17768 }, { "epoch": 2.9006979306967065, "grad_norm": 1.7065582275390625, "learning_rate": 1.861725779901389e-05, "loss": 0.4919, "step": 17769 }, { "epoch": 2.900861189339211, "grad_norm": 2.0082521438598633, "learning_rate": 1.8617096788273778e-05, "loss": 0.5676, "step": 17770 }, { "epoch": 2.901024447981715, "grad_norm": 1.5769697427749634, "learning_rate": 1.8616935768856255e-05, "loss": 0.5039, "step": 17771 }, { "epoch": 2.9011877066242193, "grad_norm": 1.821488380432129, "learning_rate": 1.861677474076148e-05, "loss": 0.5767, "step": 17772 }, { "epoch": 2.9013509652667238, "grad_norm": 2.0135397911071777, "learning_rate": 1.861661370398961e-05, "loss": 0.713, "step": 17773 }, { "epoch": 2.901514223909228, "grad_norm": 1.7651093006134033, "learning_rate": 1.8616452658540817e-05, "loss": 0.4902, "step": 17774 }, { "epoch": 2.9016774825517326, "grad_norm": 1.6385222673416138, "learning_rate": 1.861629160441526e-05, "loss": 0.5747, "step": 17775 }, { "epoch": 2.901840741194237, "grad_norm": 2.0526561737060547, "learning_rate": 1.8616130541613095e-05, "loss": 0.6435, "step": 17776 }, { "epoch": 2.9020039998367415, "grad_norm": 1.7237627506256104, "learning_rate": 1.861596947013449e-05, "loss": 0.5383, "step": 17777 }, { "epoch": 2.9021672584792455, "grad_norm": 2.245058059692383, "learning_rate": 1.861580838997961e-05, "loss": 0.7003, "step": 17778 }, { "epoch": 2.90233051712175, "grad_norm": 1.9089449644088745, "learning_rate": 1.861564730114861e-05, "loss": 0.6323, "step": 17779 }, { "epoch": 2.9024937757642544, "grad_norm": 1.781760573387146, "learning_rate": 1.8615486203641655e-05, "loss": 0.5843, "step": 17780 }, { "epoch": 2.902657034406759, "grad_norm": 1.9279301166534424, "learning_rate": 1.861532509745891e-05, "loss": 0.5614, "step": 17781 }, { "epoch": 2.9028202930492633, "grad_norm": 1.452147126197815, "learning_rate": 1.8615163982600533e-05, "loss": 0.4492, "step": 17782 }, { "epoch": 2.9029835516917677, "grad_norm": 1.9928741455078125, "learning_rate": 1.8615002859066686e-05, "loss": 0.651, "step": 17783 }, { "epoch": 2.903146810334272, "grad_norm": 1.8142743110656738, "learning_rate": 1.8614841726857538e-05, "loss": 0.5263, "step": 17784 }, { "epoch": 2.9033100689767766, "grad_norm": 1.7655153274536133, "learning_rate": 1.8614680585973244e-05, "loss": 0.5634, "step": 17785 }, { "epoch": 2.903473327619281, "grad_norm": 1.7965879440307617, "learning_rate": 1.8614519436413968e-05, "loss": 0.5544, "step": 17786 }, { "epoch": 2.9036365862617854, "grad_norm": 1.8769704103469849, "learning_rate": 1.8614358278179878e-05, "loss": 0.6152, "step": 17787 }, { "epoch": 2.90379984490429, "grad_norm": 1.6414679288864136, "learning_rate": 1.8614197111271127e-05, "loss": 0.54, "step": 17788 }, { "epoch": 2.903963103546794, "grad_norm": 2.1783061027526855, "learning_rate": 1.8614035935687882e-05, "loss": 0.6508, "step": 17789 }, { "epoch": 2.9041263621892983, "grad_norm": 1.7575427293777466, "learning_rate": 1.8613874751430304e-05, "loss": 0.5582, "step": 17790 }, { "epoch": 2.9042896208318028, "grad_norm": 2.085942506790161, "learning_rate": 1.861371355849856e-05, "loss": 0.6602, "step": 17791 }, { "epoch": 2.904452879474307, "grad_norm": 1.4218968152999878, "learning_rate": 1.861355235689281e-05, "loss": 0.474, "step": 17792 }, { "epoch": 2.9046161381168116, "grad_norm": 1.9140852689743042, "learning_rate": 1.861339114661321e-05, "loss": 0.6796, "step": 17793 }, { "epoch": 2.904779396759316, "grad_norm": 2.048856019973755, "learning_rate": 1.861322992765993e-05, "loss": 0.6875, "step": 17794 }, { "epoch": 2.90494265540182, "grad_norm": 1.7481664419174194, "learning_rate": 1.861306870003313e-05, "loss": 0.5182, "step": 17795 }, { "epoch": 2.9051059140443245, "grad_norm": 1.760698676109314, "learning_rate": 1.8612907463732973e-05, "loss": 0.5451, "step": 17796 }, { "epoch": 2.905269172686829, "grad_norm": 1.713754415512085, "learning_rate": 1.861274621875962e-05, "loss": 0.5662, "step": 17797 }, { "epoch": 2.9054324313293334, "grad_norm": 1.8880982398986816, "learning_rate": 1.861258496511323e-05, "loss": 0.4966, "step": 17798 }, { "epoch": 2.905595689971838, "grad_norm": 1.7429906129837036, "learning_rate": 1.8612423702793974e-05, "loss": 0.5173, "step": 17799 }, { "epoch": 2.9057589486143423, "grad_norm": 1.5395079851150513, "learning_rate": 1.861226243180201e-05, "loss": 0.5679, "step": 17800 }, { "epoch": 2.9059222072568467, "grad_norm": 2.0469460487365723, "learning_rate": 1.8612101152137495e-05, "loss": 0.5671, "step": 17801 }, { "epoch": 2.906085465899351, "grad_norm": 1.520018219947815, "learning_rate": 1.86119398638006e-05, "loss": 0.5258, "step": 17802 }, { "epoch": 2.9062487245418556, "grad_norm": 1.7438335418701172, "learning_rate": 1.8611778566791483e-05, "loss": 0.6385, "step": 17803 }, { "epoch": 2.90641198318436, "grad_norm": 1.69405198097229, "learning_rate": 1.8611617261110306e-05, "loss": 0.4847, "step": 17804 }, { "epoch": 2.9065752418268644, "grad_norm": 2.163175106048584, "learning_rate": 1.8611455946757236e-05, "loss": 0.7049, "step": 17805 }, { "epoch": 2.9067385004693684, "grad_norm": 2.064250946044922, "learning_rate": 1.861129462373243e-05, "loss": 0.5854, "step": 17806 }, { "epoch": 2.906901759111873, "grad_norm": 1.7847532033920288, "learning_rate": 1.861113329203605e-05, "loss": 0.5038, "step": 17807 }, { "epoch": 2.9070650177543773, "grad_norm": 1.8696763515472412, "learning_rate": 1.8610971951668265e-05, "loss": 0.589, "step": 17808 }, { "epoch": 2.9072282763968817, "grad_norm": 2.001246690750122, "learning_rate": 1.8610810602629233e-05, "loss": 0.6272, "step": 17809 }, { "epoch": 2.907391535039386, "grad_norm": 1.5194973945617676, "learning_rate": 1.8610649244919114e-05, "loss": 0.5316, "step": 17810 }, { "epoch": 2.9075547936818906, "grad_norm": 1.7623050212860107, "learning_rate": 1.861048787853808e-05, "loss": 0.5045, "step": 17811 }, { "epoch": 2.907718052324395, "grad_norm": 1.9318989515304565, "learning_rate": 1.861032650348628e-05, "loss": 0.6392, "step": 17812 }, { "epoch": 2.907881310966899, "grad_norm": 1.8209224939346313, "learning_rate": 1.8610165119763887e-05, "loss": 0.5798, "step": 17813 }, { "epoch": 2.9080445696094035, "grad_norm": 1.6228828430175781, "learning_rate": 1.861000372737106e-05, "loss": 0.5053, "step": 17814 }, { "epoch": 2.908207828251908, "grad_norm": 1.9301124811172485, "learning_rate": 1.8609842326307958e-05, "loss": 0.5389, "step": 17815 }, { "epoch": 2.9083710868944124, "grad_norm": 1.9047948122024536, "learning_rate": 1.8609680916574753e-05, "loss": 0.6502, "step": 17816 }, { "epoch": 2.908534345536917, "grad_norm": 1.8809958696365356, "learning_rate": 1.8609519498171597e-05, "loss": 0.5529, "step": 17817 }, { "epoch": 2.9086976041794212, "grad_norm": 2.090946674346924, "learning_rate": 1.8609358071098658e-05, "loss": 0.6526, "step": 17818 }, { "epoch": 2.9088608628219257, "grad_norm": 1.687947154045105, "learning_rate": 1.86091966353561e-05, "loss": 0.5428, "step": 17819 }, { "epoch": 2.90902412146443, "grad_norm": 1.7916088104248047, "learning_rate": 1.860903519094408e-05, "loss": 0.5875, "step": 17820 }, { "epoch": 2.9091873801069346, "grad_norm": 1.8782737255096436, "learning_rate": 1.8608873737862767e-05, "loss": 0.5784, "step": 17821 }, { "epoch": 2.909350638749439, "grad_norm": 2.060441255569458, "learning_rate": 1.8608712276112317e-05, "loss": 0.6317, "step": 17822 }, { "epoch": 2.9095138973919434, "grad_norm": 1.7932711839675903, "learning_rate": 1.86085508056929e-05, "loss": 0.6074, "step": 17823 }, { "epoch": 2.9096771560344474, "grad_norm": 1.8903188705444336, "learning_rate": 1.860838932660467e-05, "loss": 0.5922, "step": 17824 }, { "epoch": 2.909840414676952, "grad_norm": 2.0575807094573975, "learning_rate": 1.86082278388478e-05, "loss": 0.6445, "step": 17825 }, { "epoch": 2.9100036733194563, "grad_norm": 1.8959766626358032, "learning_rate": 1.8608066342422443e-05, "loss": 0.6189, "step": 17826 }, { "epoch": 2.9101669319619607, "grad_norm": 1.5961552858352661, "learning_rate": 1.8607904837328765e-05, "loss": 0.4831, "step": 17827 }, { "epoch": 2.910330190604465, "grad_norm": 1.5772615671157837, "learning_rate": 1.860774332356693e-05, "loss": 0.4707, "step": 17828 }, { "epoch": 2.9104934492469696, "grad_norm": 2.126033306121826, "learning_rate": 1.86075818011371e-05, "loss": 0.6519, "step": 17829 }, { "epoch": 2.910656707889474, "grad_norm": 2.1368894577026367, "learning_rate": 1.860742027003944e-05, "loss": 0.8552, "step": 17830 }, { "epoch": 2.910819966531978, "grad_norm": 1.7530128955841064, "learning_rate": 1.8607258730274106e-05, "loss": 0.5154, "step": 17831 }, { "epoch": 2.9109832251744825, "grad_norm": 1.7587404251098633, "learning_rate": 1.8607097181841265e-05, "loss": 0.5631, "step": 17832 }, { "epoch": 2.911146483816987, "grad_norm": 1.6685981750488281, "learning_rate": 1.8606935624741082e-05, "loss": 0.5161, "step": 17833 }, { "epoch": 2.9113097424594914, "grad_norm": 1.8164883852005005, "learning_rate": 1.8606774058973715e-05, "loss": 0.6677, "step": 17834 }, { "epoch": 2.911473001101996, "grad_norm": 1.7784061431884766, "learning_rate": 1.860661248453933e-05, "loss": 0.5501, "step": 17835 }, { "epoch": 2.9116362597445002, "grad_norm": 1.8102604150772095, "learning_rate": 1.860645090143809e-05, "loss": 0.6518, "step": 17836 }, { "epoch": 2.9117995183870047, "grad_norm": 1.4981985092163086, "learning_rate": 1.8606289309670155e-05, "loss": 0.4396, "step": 17837 }, { "epoch": 2.911962777029509, "grad_norm": 1.5951236486434937, "learning_rate": 1.8606127709235684e-05, "loss": 0.4779, "step": 17838 }, { "epoch": 2.9121260356720136, "grad_norm": 1.7296514511108398, "learning_rate": 1.860596610013485e-05, "loss": 0.6179, "step": 17839 }, { "epoch": 2.912289294314518, "grad_norm": 1.887043833732605, "learning_rate": 1.860580448236781e-05, "loss": 0.5534, "step": 17840 }, { "epoch": 2.9124525529570224, "grad_norm": 1.630205750465393, "learning_rate": 1.8605642855934727e-05, "loss": 0.5271, "step": 17841 }, { "epoch": 2.9126158115995264, "grad_norm": 2.0895328521728516, "learning_rate": 1.8605481220835765e-05, "loss": 0.6507, "step": 17842 }, { "epoch": 2.912779070242031, "grad_norm": 1.5169239044189453, "learning_rate": 1.860531957707108e-05, "loss": 0.499, "step": 17843 }, { "epoch": 2.9129423288845353, "grad_norm": 1.606407880783081, "learning_rate": 1.8605157924640845e-05, "loss": 0.496, "step": 17844 }, { "epoch": 2.9131055875270397, "grad_norm": 1.734812617301941, "learning_rate": 1.860499626354522e-05, "loss": 0.5629, "step": 17845 }, { "epoch": 2.913268846169544, "grad_norm": 1.682987093925476, "learning_rate": 1.860483459378436e-05, "loss": 0.5435, "step": 17846 }, { "epoch": 2.9134321048120486, "grad_norm": 1.9356775283813477, "learning_rate": 1.8604672915358438e-05, "loss": 0.5537, "step": 17847 }, { "epoch": 2.9135953634545526, "grad_norm": 1.9208205938339233, "learning_rate": 1.8604511228267615e-05, "loss": 0.5946, "step": 17848 }, { "epoch": 2.913758622097057, "grad_norm": 1.682381272315979, "learning_rate": 1.8604349532512048e-05, "loss": 0.4915, "step": 17849 }, { "epoch": 2.9139218807395615, "grad_norm": 1.750856637954712, "learning_rate": 1.8604187828091906e-05, "loss": 0.515, "step": 17850 }, { "epoch": 2.914085139382066, "grad_norm": 1.7758454084396362, "learning_rate": 1.8604026115007346e-05, "loss": 0.6066, "step": 17851 }, { "epoch": 2.9142483980245704, "grad_norm": 2.3196723461151123, "learning_rate": 1.8603864393258534e-05, "loss": 0.6044, "step": 17852 }, { "epoch": 2.914411656667075, "grad_norm": 1.8440107107162476, "learning_rate": 1.8603702662845634e-05, "loss": 0.6408, "step": 17853 }, { "epoch": 2.9145749153095792, "grad_norm": 1.9932106733322144, "learning_rate": 1.860354092376881e-05, "loss": 0.5884, "step": 17854 }, { "epoch": 2.9147381739520837, "grad_norm": 1.6793954372406006, "learning_rate": 1.860337917602822e-05, "loss": 0.485, "step": 17855 }, { "epoch": 2.914901432594588, "grad_norm": 1.557876706123352, "learning_rate": 1.860321741962403e-05, "loss": 0.4831, "step": 17856 }, { "epoch": 2.9150646912370926, "grad_norm": 2.3762307167053223, "learning_rate": 1.86030556545564e-05, "loss": 0.6682, "step": 17857 }, { "epoch": 2.915227949879597, "grad_norm": 1.608048439025879, "learning_rate": 1.8602893880825498e-05, "loss": 0.5423, "step": 17858 }, { "epoch": 2.915391208522101, "grad_norm": 1.9455102682113647, "learning_rate": 1.8602732098431488e-05, "loss": 0.5327, "step": 17859 }, { "epoch": 2.9155544671646054, "grad_norm": 1.6257545948028564, "learning_rate": 1.8602570307374523e-05, "loss": 0.534, "step": 17860 }, { "epoch": 2.91571772580711, "grad_norm": 1.6405078172683716, "learning_rate": 1.8602408507654772e-05, "loss": 0.6237, "step": 17861 }, { "epoch": 2.9158809844496143, "grad_norm": 1.5228326320648193, "learning_rate": 1.86022466992724e-05, "loss": 0.4651, "step": 17862 }, { "epoch": 2.9160442430921187, "grad_norm": 1.7370457649230957, "learning_rate": 1.8602084882227568e-05, "loss": 0.5598, "step": 17863 }, { "epoch": 2.916207501734623, "grad_norm": 1.5862135887145996, "learning_rate": 1.8601923056520437e-05, "loss": 0.5375, "step": 17864 }, { "epoch": 2.9163707603771276, "grad_norm": 1.6465702056884766, "learning_rate": 1.8601761222151174e-05, "loss": 0.5697, "step": 17865 }, { "epoch": 2.9165340190196316, "grad_norm": 1.7884713411331177, "learning_rate": 1.860159937911994e-05, "loss": 0.5668, "step": 17866 }, { "epoch": 2.916697277662136, "grad_norm": 1.7640323638916016, "learning_rate": 1.8601437527426897e-05, "loss": 0.5043, "step": 17867 }, { "epoch": 2.9168605363046405, "grad_norm": 1.7849290370941162, "learning_rate": 1.8601275667072205e-05, "loss": 0.5543, "step": 17868 }, { "epoch": 2.917023794947145, "grad_norm": 1.8355060815811157, "learning_rate": 1.8601113798056035e-05, "loss": 0.6097, "step": 17869 }, { "epoch": 2.9171870535896494, "grad_norm": 1.8711811304092407, "learning_rate": 1.8600951920378546e-05, "loss": 0.7097, "step": 17870 }, { "epoch": 2.917350312232154, "grad_norm": 1.8175523281097412, "learning_rate": 1.8600790034039895e-05, "loss": 0.6085, "step": 17871 }, { "epoch": 2.9175135708746582, "grad_norm": 1.5256733894348145, "learning_rate": 1.8600628139040258e-05, "loss": 0.5092, "step": 17872 }, { "epoch": 2.9176768295171627, "grad_norm": 1.6591397523880005, "learning_rate": 1.8600466235379786e-05, "loss": 0.4625, "step": 17873 }, { "epoch": 2.917840088159667, "grad_norm": 1.8574957847595215, "learning_rate": 1.860030432305865e-05, "loss": 0.6631, "step": 17874 }, { "epoch": 2.9180033468021715, "grad_norm": 1.4704838991165161, "learning_rate": 1.8600142402077006e-05, "loss": 0.5011, "step": 17875 }, { "epoch": 2.918166605444676, "grad_norm": 1.302383542060852, "learning_rate": 1.8599980472435025e-05, "loss": 0.464, "step": 17876 }, { "epoch": 2.91832986408718, "grad_norm": 1.7129923105239868, "learning_rate": 1.8599818534132865e-05, "loss": 0.5799, "step": 17877 }, { "epoch": 2.9184931227296844, "grad_norm": 1.7768864631652832, "learning_rate": 1.8599656587170687e-05, "loss": 0.6283, "step": 17878 }, { "epoch": 2.918656381372189, "grad_norm": 1.7070026397705078, "learning_rate": 1.859949463154866e-05, "loss": 0.5783, "step": 17879 }, { "epoch": 2.9188196400146933, "grad_norm": 1.6146975755691528, "learning_rate": 1.8599332667266942e-05, "loss": 0.586, "step": 17880 }, { "epoch": 2.9189828986571977, "grad_norm": 1.8681390285491943, "learning_rate": 1.8599170694325698e-05, "loss": 0.6175, "step": 17881 }, { "epoch": 2.919146157299702, "grad_norm": 1.8025680780410767, "learning_rate": 1.8599008712725094e-05, "loss": 0.6021, "step": 17882 }, { "epoch": 2.919309415942206, "grad_norm": 1.4294878244400024, "learning_rate": 1.859884672246529e-05, "loss": 0.4391, "step": 17883 }, { "epoch": 2.9194726745847106, "grad_norm": 2.021430730819702, "learning_rate": 1.8598684723546448e-05, "loss": 0.5947, "step": 17884 }, { "epoch": 2.919635933227215, "grad_norm": 1.6895673274993896, "learning_rate": 1.8598522715968736e-05, "loss": 0.5434, "step": 17885 }, { "epoch": 2.9197991918697195, "grad_norm": 1.4866735935211182, "learning_rate": 1.859836069973231e-05, "loss": 0.4737, "step": 17886 }, { "epoch": 2.919962450512224, "grad_norm": 1.8909248113632202, "learning_rate": 1.859819867483734e-05, "loss": 0.5587, "step": 17887 }, { "epoch": 2.9201257091547284, "grad_norm": 1.6870230436325073, "learning_rate": 1.8598036641283985e-05, "loss": 0.542, "step": 17888 }, { "epoch": 2.920288967797233, "grad_norm": 1.384941577911377, "learning_rate": 1.8597874599072413e-05, "loss": 0.5075, "step": 17889 }, { "epoch": 2.9204522264397372, "grad_norm": 1.696475863456726, "learning_rate": 1.859771254820278e-05, "loss": 0.4768, "step": 17890 }, { "epoch": 2.9206154850822417, "grad_norm": 1.8641002178192139, "learning_rate": 1.8597550488675252e-05, "loss": 0.6883, "step": 17891 }, { "epoch": 2.920778743724746, "grad_norm": 1.954168438911438, "learning_rate": 1.859738842048999e-05, "loss": 0.5905, "step": 17892 }, { "epoch": 2.9209420023672505, "grad_norm": 1.6769558191299438, "learning_rate": 1.8597226343647165e-05, "loss": 0.5562, "step": 17893 }, { "epoch": 2.9211052610097545, "grad_norm": 1.664737582206726, "learning_rate": 1.8597064258146935e-05, "loss": 0.4768, "step": 17894 }, { "epoch": 2.921268519652259, "grad_norm": 2.2146146297454834, "learning_rate": 1.859690216398946e-05, "loss": 0.6291, "step": 17895 }, { "epoch": 2.9214317782947634, "grad_norm": 1.862608551979065, "learning_rate": 1.8596740061174912e-05, "loss": 0.5417, "step": 17896 }, { "epoch": 2.921595036937268, "grad_norm": 1.954742193222046, "learning_rate": 1.859657794970345e-05, "loss": 0.7383, "step": 17897 }, { "epoch": 2.9217582955797723, "grad_norm": 2.046867609024048, "learning_rate": 1.859641582957523e-05, "loss": 0.6492, "step": 17898 }, { "epoch": 2.9219215542222767, "grad_norm": 1.6088579893112183, "learning_rate": 1.8596253700790427e-05, "loss": 0.4839, "step": 17899 }, { "epoch": 2.922084812864781, "grad_norm": 1.9849224090576172, "learning_rate": 1.859609156334919e-05, "loss": 0.609, "step": 17900 }, { "epoch": 2.922248071507285, "grad_norm": 1.8670803308486938, "learning_rate": 1.85959294172517e-05, "loss": 0.6352, "step": 17901 }, { "epoch": 2.9224113301497896, "grad_norm": 1.8991568088531494, "learning_rate": 1.859576726249811e-05, "loss": 0.5374, "step": 17902 }, { "epoch": 2.922574588792294, "grad_norm": 2.0115537643432617, "learning_rate": 1.8595605099088583e-05, "loss": 0.5519, "step": 17903 }, { "epoch": 2.9227378474347985, "grad_norm": 1.7305188179016113, "learning_rate": 1.8595442927023283e-05, "loss": 0.6648, "step": 17904 }, { "epoch": 2.922901106077303, "grad_norm": 1.62247633934021, "learning_rate": 1.8595280746302376e-05, "loss": 0.4908, "step": 17905 }, { "epoch": 2.9230643647198074, "grad_norm": 1.596406102180481, "learning_rate": 1.8595118556926023e-05, "loss": 0.5113, "step": 17906 }, { "epoch": 2.923227623362312, "grad_norm": 1.992498517036438, "learning_rate": 1.859495635889439e-05, "loss": 0.5376, "step": 17907 }, { "epoch": 2.9233908820048162, "grad_norm": 1.7793515920639038, "learning_rate": 1.8594794152207637e-05, "loss": 0.5525, "step": 17908 }, { "epoch": 2.9235541406473207, "grad_norm": 1.8079369068145752, "learning_rate": 1.8594631936865926e-05, "loss": 0.611, "step": 17909 }, { "epoch": 2.923717399289825, "grad_norm": 1.638043761253357, "learning_rate": 1.8594469712869425e-05, "loss": 0.5575, "step": 17910 }, { "epoch": 2.9238806579323295, "grad_norm": 1.5246232748031616, "learning_rate": 1.8594307480218297e-05, "loss": 0.4679, "step": 17911 }, { "epoch": 2.9240439165748335, "grad_norm": 1.7906066179275513, "learning_rate": 1.85941452389127e-05, "loss": 0.5215, "step": 17912 }, { "epoch": 2.924207175217338, "grad_norm": 2.0487053394317627, "learning_rate": 1.8593982988952802e-05, "loss": 0.6439, "step": 17913 }, { "epoch": 2.9243704338598424, "grad_norm": 1.7352861166000366, "learning_rate": 1.8593820730338766e-05, "loss": 0.5309, "step": 17914 }, { "epoch": 2.924533692502347, "grad_norm": 1.704026699066162, "learning_rate": 1.8593658463070757e-05, "loss": 0.5457, "step": 17915 }, { "epoch": 2.9246969511448513, "grad_norm": 1.8454428911209106, "learning_rate": 1.859349618714893e-05, "loss": 0.6157, "step": 17916 }, { "epoch": 2.9248602097873557, "grad_norm": 1.523018717765808, "learning_rate": 1.859333390257346e-05, "loss": 0.5112, "step": 17917 }, { "epoch": 2.92502346842986, "grad_norm": 1.625593900680542, "learning_rate": 1.8593171609344505e-05, "loss": 0.4952, "step": 17918 }, { "epoch": 2.925186727072364, "grad_norm": 1.7568827867507935, "learning_rate": 1.8593009307462227e-05, "loss": 0.5347, "step": 17919 }, { "epoch": 2.9253499857148686, "grad_norm": 1.7919505834579468, "learning_rate": 1.8592846996926793e-05, "loss": 0.6035, "step": 17920 }, { "epoch": 2.925513244357373, "grad_norm": 2.013686418533325, "learning_rate": 1.859268467773836e-05, "loss": 0.5749, "step": 17921 }, { "epoch": 2.9256765029998775, "grad_norm": 1.468985676765442, "learning_rate": 1.85925223498971e-05, "loss": 0.4519, "step": 17922 }, { "epoch": 2.925839761642382, "grad_norm": 1.7490332126617432, "learning_rate": 1.859236001340317e-05, "loss": 0.6045, "step": 17923 }, { "epoch": 2.9260030202848863, "grad_norm": 1.7930822372436523, "learning_rate": 1.8592197668256737e-05, "loss": 0.5351, "step": 17924 }, { "epoch": 2.926166278927391, "grad_norm": 2.010746479034424, "learning_rate": 1.8592035314457963e-05, "loss": 0.6848, "step": 17925 }, { "epoch": 2.9263295375698952, "grad_norm": 1.7743477821350098, "learning_rate": 1.859187295200701e-05, "loss": 0.5766, "step": 17926 }, { "epoch": 2.9264927962123997, "grad_norm": 1.7078264951705933, "learning_rate": 1.8591710580904043e-05, "loss": 0.4881, "step": 17927 }, { "epoch": 2.926656054854904, "grad_norm": 1.7915047407150269, "learning_rate": 1.8591548201149228e-05, "loss": 0.6081, "step": 17928 }, { "epoch": 2.9268193134974085, "grad_norm": 1.8123133182525635, "learning_rate": 1.8591385812742724e-05, "loss": 0.5815, "step": 17929 }, { "epoch": 2.9269825721399125, "grad_norm": 1.4957998991012573, "learning_rate": 1.85912234156847e-05, "loss": 0.4932, "step": 17930 }, { "epoch": 2.927145830782417, "grad_norm": 1.822986364364624, "learning_rate": 1.8591061009975316e-05, "loss": 0.5924, "step": 17931 }, { "epoch": 2.9273090894249214, "grad_norm": 1.765818476676941, "learning_rate": 1.8590898595614734e-05, "loss": 0.5113, "step": 17932 }, { "epoch": 2.927472348067426, "grad_norm": 1.9998183250427246, "learning_rate": 1.859073617260312e-05, "loss": 0.6751, "step": 17933 }, { "epoch": 2.9276356067099303, "grad_norm": 2.416313886642456, "learning_rate": 1.8590573740940635e-05, "loss": 0.6086, "step": 17934 }, { "epoch": 2.9277988653524347, "grad_norm": 1.8034532070159912, "learning_rate": 1.8590411300627447e-05, "loss": 0.5486, "step": 17935 }, { "epoch": 2.9279621239949387, "grad_norm": 1.6868922710418701, "learning_rate": 1.8590248851663715e-05, "loss": 0.515, "step": 17936 }, { "epoch": 2.928125382637443, "grad_norm": 1.6406034231185913, "learning_rate": 1.8590086394049605e-05, "loss": 0.5817, "step": 17937 }, { "epoch": 2.9282886412799476, "grad_norm": 1.8560587167739868, "learning_rate": 1.8589923927785283e-05, "loss": 0.6808, "step": 17938 }, { "epoch": 2.928451899922452, "grad_norm": 1.895390272140503, "learning_rate": 1.8589761452870908e-05, "loss": 0.6702, "step": 17939 }, { "epoch": 2.9286151585649565, "grad_norm": 1.541446328163147, "learning_rate": 1.8589598969306646e-05, "loss": 0.5023, "step": 17940 }, { "epoch": 2.928778417207461, "grad_norm": 1.8904463052749634, "learning_rate": 1.858943647709266e-05, "loss": 0.723, "step": 17941 }, { "epoch": 2.9289416758499653, "grad_norm": 1.7231184244155884, "learning_rate": 1.8589273976229112e-05, "loss": 0.6177, "step": 17942 }, { "epoch": 2.92910493449247, "grad_norm": 1.6920517683029175, "learning_rate": 1.858911146671617e-05, "loss": 0.5386, "step": 17943 }, { "epoch": 2.929268193134974, "grad_norm": 1.6130996942520142, "learning_rate": 1.8588948948553995e-05, "loss": 0.589, "step": 17944 }, { "epoch": 2.9294314517774787, "grad_norm": 1.5637621879577637, "learning_rate": 1.858878642174275e-05, "loss": 0.5132, "step": 17945 }, { "epoch": 2.929594710419983, "grad_norm": 1.3991285562515259, "learning_rate": 1.8588623886282597e-05, "loss": 0.4328, "step": 17946 }, { "epoch": 2.929757969062487, "grad_norm": 2.408189296722412, "learning_rate": 1.8588461342173704e-05, "loss": 0.6257, "step": 17947 }, { "epoch": 2.9299212277049915, "grad_norm": 1.7660748958587646, "learning_rate": 1.8588298789416232e-05, "loss": 0.6096, "step": 17948 }, { "epoch": 2.930084486347496, "grad_norm": 2.064765691757202, "learning_rate": 1.8588136228010347e-05, "loss": 0.6139, "step": 17949 }, { "epoch": 2.9302477449900004, "grad_norm": 1.5302923917770386, "learning_rate": 1.858797365795621e-05, "loss": 0.5366, "step": 17950 }, { "epoch": 2.930411003632505, "grad_norm": 1.763407826423645, "learning_rate": 1.8587811079253985e-05, "loss": 0.447, "step": 17951 }, { "epoch": 2.9305742622750093, "grad_norm": 1.7470853328704834, "learning_rate": 1.858764849190384e-05, "loss": 0.6152, "step": 17952 }, { "epoch": 2.9307375209175137, "grad_norm": 1.69144868850708, "learning_rate": 1.8587485895905932e-05, "loss": 0.5614, "step": 17953 }, { "epoch": 2.9309007795600177, "grad_norm": 1.6453455686569214, "learning_rate": 1.8587323291260428e-05, "loss": 0.4477, "step": 17954 }, { "epoch": 2.931064038202522, "grad_norm": 1.5945335626602173, "learning_rate": 1.8587160677967493e-05, "loss": 0.5035, "step": 17955 }, { "epoch": 2.9312272968450266, "grad_norm": 2.073347806930542, "learning_rate": 1.8586998056027287e-05, "loss": 0.625, "step": 17956 }, { "epoch": 2.931390555487531, "grad_norm": 2.1483993530273438, "learning_rate": 1.858683542543998e-05, "loss": 0.5606, "step": 17957 }, { "epoch": 2.9315538141300355, "grad_norm": 1.8882253170013428, "learning_rate": 1.8586672786205732e-05, "loss": 0.5927, "step": 17958 }, { "epoch": 2.93171707277254, "grad_norm": 1.782860279083252, "learning_rate": 1.8586510138324705e-05, "loss": 0.5629, "step": 17959 }, { "epoch": 2.9318803314150443, "grad_norm": 1.6959872245788574, "learning_rate": 1.8586347481797064e-05, "loss": 0.4937, "step": 17960 }, { "epoch": 2.9320435900575488, "grad_norm": 1.6044689416885376, "learning_rate": 1.858618481662297e-05, "loss": 0.4609, "step": 17961 }, { "epoch": 2.932206848700053, "grad_norm": 1.8220643997192383, "learning_rate": 1.8586022142802597e-05, "loss": 0.5837, "step": 17962 }, { "epoch": 2.9323701073425577, "grad_norm": 1.689004898071289, "learning_rate": 1.85858594603361e-05, "loss": 0.497, "step": 17963 }, { "epoch": 2.932533365985062, "grad_norm": 1.7420686483383179, "learning_rate": 1.8585696769223643e-05, "loss": 0.4904, "step": 17964 }, { "epoch": 2.932696624627566, "grad_norm": 1.5595111846923828, "learning_rate": 1.8585534069465392e-05, "loss": 0.5031, "step": 17965 }, { "epoch": 2.9328598832700705, "grad_norm": 1.7041480541229248, "learning_rate": 1.858537136106151e-05, "loss": 0.4777, "step": 17966 }, { "epoch": 2.933023141912575, "grad_norm": 1.7258635759353638, "learning_rate": 1.8585208644012165e-05, "loss": 0.5676, "step": 17967 }, { "epoch": 2.9331864005550794, "grad_norm": 1.5887751579284668, "learning_rate": 1.858504591831751e-05, "loss": 0.5002, "step": 17968 }, { "epoch": 2.933349659197584, "grad_norm": 1.6990437507629395, "learning_rate": 1.8584883183977724e-05, "loss": 0.5318, "step": 17969 }, { "epoch": 2.9335129178400883, "grad_norm": 1.8764703273773193, "learning_rate": 1.858472044099296e-05, "loss": 0.5213, "step": 17970 }, { "epoch": 2.9336761764825927, "grad_norm": 1.4518674612045288, "learning_rate": 1.8584557689363382e-05, "loss": 0.4361, "step": 17971 }, { "epoch": 2.9338394351250967, "grad_norm": 1.9052942991256714, "learning_rate": 1.8584394929089157e-05, "loss": 0.4964, "step": 17972 }, { "epoch": 2.934002693767601, "grad_norm": 1.9771413803100586, "learning_rate": 1.8584232160170452e-05, "loss": 0.6091, "step": 17973 }, { "epoch": 2.9341659524101056, "grad_norm": 1.7133971452713013, "learning_rate": 1.8584069382607427e-05, "loss": 0.5366, "step": 17974 }, { "epoch": 2.93432921105261, "grad_norm": 1.5606110095977783, "learning_rate": 1.8583906596400245e-05, "loss": 0.471, "step": 17975 }, { "epoch": 2.9344924696951145, "grad_norm": 2.0329689979553223, "learning_rate": 1.858374380154907e-05, "loss": 0.5409, "step": 17976 }, { "epoch": 2.934655728337619, "grad_norm": 1.800714373588562, "learning_rate": 1.858358099805407e-05, "loss": 0.5801, "step": 17977 }, { "epoch": 2.9348189869801233, "grad_norm": 2.2615914344787598, "learning_rate": 1.858341818591541e-05, "loss": 0.8652, "step": 17978 }, { "epoch": 2.9349822456226278, "grad_norm": 1.9496499300003052, "learning_rate": 1.8583255365133243e-05, "loss": 0.5797, "step": 17979 }, { "epoch": 2.935145504265132, "grad_norm": 1.879807949066162, "learning_rate": 1.8583092535707742e-05, "loss": 0.5565, "step": 17980 }, { "epoch": 2.9353087629076366, "grad_norm": 1.8824173212051392, "learning_rate": 1.8582929697639067e-05, "loss": 0.5611, "step": 17981 }, { "epoch": 2.935472021550141, "grad_norm": 2.0085232257843018, "learning_rate": 1.8582766850927386e-05, "loss": 0.5498, "step": 17982 }, { "epoch": 2.935635280192645, "grad_norm": 1.787595510482788, "learning_rate": 1.8582603995572862e-05, "loss": 0.5682, "step": 17983 }, { "epoch": 2.9357985388351495, "grad_norm": 1.5872801542282104, "learning_rate": 1.8582441131575658e-05, "loss": 0.5608, "step": 17984 }, { "epoch": 2.935961797477654, "grad_norm": 1.4629100561141968, "learning_rate": 1.858227825893594e-05, "loss": 0.4931, "step": 17985 }, { "epoch": 2.9361250561201584, "grad_norm": 1.7257094383239746, "learning_rate": 1.858211537765387e-05, "loss": 0.5257, "step": 17986 }, { "epoch": 2.936288314762663, "grad_norm": 1.7356125116348267, "learning_rate": 1.8581952487729606e-05, "loss": 0.5594, "step": 17987 }, { "epoch": 2.9364515734051673, "grad_norm": 1.5784704685211182, "learning_rate": 1.8581789589163323e-05, "loss": 0.6005, "step": 17988 }, { "epoch": 2.9366148320476713, "grad_norm": 1.7964249849319458, "learning_rate": 1.8581626681955177e-05, "loss": 0.599, "step": 17989 }, { "epoch": 2.9367780906901757, "grad_norm": 1.3930518627166748, "learning_rate": 1.858146376610534e-05, "loss": 0.4995, "step": 17990 }, { "epoch": 2.93694134933268, "grad_norm": 2.039621591567993, "learning_rate": 1.8581300841613967e-05, "loss": 0.664, "step": 17991 }, { "epoch": 2.9371046079751846, "grad_norm": 1.4649262428283691, "learning_rate": 1.858113790848123e-05, "loss": 0.4687, "step": 17992 }, { "epoch": 2.937267866617689, "grad_norm": 1.7644776105880737, "learning_rate": 1.8580974966707287e-05, "loss": 0.55, "step": 17993 }, { "epoch": 2.9374311252601935, "grad_norm": 1.818028211593628, "learning_rate": 1.8580812016292306e-05, "loss": 0.5402, "step": 17994 }, { "epoch": 2.937594383902698, "grad_norm": 1.9628305435180664, "learning_rate": 1.858064905723645e-05, "loss": 0.6092, "step": 17995 }, { "epoch": 2.9377576425452023, "grad_norm": 1.9067360162734985, "learning_rate": 1.858048608953988e-05, "loss": 0.6544, "step": 17996 }, { "epoch": 2.9379209011877068, "grad_norm": 1.7021703720092773, "learning_rate": 1.858032311320276e-05, "loss": 0.5404, "step": 17997 }, { "epoch": 2.938084159830211, "grad_norm": 2.026393413543701, "learning_rate": 1.8580160128225263e-05, "loss": 0.5885, "step": 17998 }, { "epoch": 2.9382474184727156, "grad_norm": 1.8413658142089844, "learning_rate": 1.8579997134607545e-05, "loss": 0.6775, "step": 17999 }, { "epoch": 2.9384106771152196, "grad_norm": 1.7751041650772095, "learning_rate": 1.8579834132349773e-05, "loss": 0.5849, "step": 18000 }, { "epoch": 2.938573935757724, "grad_norm": 1.3367198705673218, "learning_rate": 1.857967112145211e-05, "loss": 0.4288, "step": 18001 }, { "epoch": 2.9387371944002285, "grad_norm": 1.8593498468399048, "learning_rate": 1.8579508101914715e-05, "loss": 0.5744, "step": 18002 }, { "epoch": 2.938900453042733, "grad_norm": 1.7493009567260742, "learning_rate": 1.8579345073737765e-05, "loss": 0.5311, "step": 18003 }, { "epoch": 2.9390637116852374, "grad_norm": 1.6209200620651245, "learning_rate": 1.8579182036921415e-05, "loss": 0.5637, "step": 18004 }, { "epoch": 2.939226970327742, "grad_norm": 1.7260329723358154, "learning_rate": 1.8579018991465833e-05, "loss": 0.553, "step": 18005 }, { "epoch": 2.9393902289702463, "grad_norm": 2.1213278770446777, "learning_rate": 1.8578855937371176e-05, "loss": 0.5917, "step": 18006 }, { "epoch": 2.9395534876127503, "grad_norm": 1.9250500202178955, "learning_rate": 1.8578692874637612e-05, "loss": 0.5612, "step": 18007 }, { "epoch": 2.9397167462552547, "grad_norm": 1.6840766668319702, "learning_rate": 1.8578529803265313e-05, "loss": 0.608, "step": 18008 }, { "epoch": 2.939880004897759, "grad_norm": 1.6709774732589722, "learning_rate": 1.8578366723254432e-05, "loss": 0.5747, "step": 18009 }, { "epoch": 2.9400432635402636, "grad_norm": 1.8643375635147095, "learning_rate": 1.857820363460514e-05, "loss": 0.6407, "step": 18010 }, { "epoch": 2.940206522182768, "grad_norm": 1.6746883392333984, "learning_rate": 1.85780405373176e-05, "loss": 0.5849, "step": 18011 }, { "epoch": 2.9403697808252724, "grad_norm": 2.149601697921753, "learning_rate": 1.8577877431391977e-05, "loss": 0.6617, "step": 18012 }, { "epoch": 2.940533039467777, "grad_norm": 1.569210410118103, "learning_rate": 1.857771431682843e-05, "loss": 0.5498, "step": 18013 }, { "epoch": 2.9406962981102813, "grad_norm": 1.8874183893203735, "learning_rate": 1.8577551193627126e-05, "loss": 0.6005, "step": 18014 }, { "epoch": 2.9408595567527858, "grad_norm": 1.7618495225906372, "learning_rate": 1.8577388061788234e-05, "loss": 0.5565, "step": 18015 }, { "epoch": 2.94102281539529, "grad_norm": 2.0395314693450928, "learning_rate": 1.8577224921311914e-05, "loss": 0.5768, "step": 18016 }, { "epoch": 2.9411860740377946, "grad_norm": 1.2721446752548218, "learning_rate": 1.857706177219833e-05, "loss": 0.4147, "step": 18017 }, { "epoch": 2.9413493326802986, "grad_norm": 1.7796175479888916, "learning_rate": 1.8576898614447647e-05, "loss": 0.6386, "step": 18018 }, { "epoch": 2.941512591322803, "grad_norm": 1.6364092826843262, "learning_rate": 1.857673544806003e-05, "loss": 0.5065, "step": 18019 }, { "epoch": 2.9416758499653075, "grad_norm": 1.7773348093032837, "learning_rate": 1.857657227303564e-05, "loss": 0.539, "step": 18020 }, { "epoch": 2.941839108607812, "grad_norm": 1.2756208181381226, "learning_rate": 1.8576409089374646e-05, "loss": 0.4374, "step": 18021 }, { "epoch": 2.9420023672503164, "grad_norm": 1.7695531845092773, "learning_rate": 1.857624589707721e-05, "loss": 0.5744, "step": 18022 }, { "epoch": 2.942165625892821, "grad_norm": 1.6499700546264648, "learning_rate": 1.8576082696143498e-05, "loss": 0.5485, "step": 18023 }, { "epoch": 2.942328884535325, "grad_norm": 1.8487637042999268, "learning_rate": 1.8575919486573673e-05, "loss": 0.6077, "step": 18024 }, { "epoch": 2.9424921431778293, "grad_norm": 1.4366466999053955, "learning_rate": 1.8575756268367897e-05, "loss": 0.4643, "step": 18025 }, { "epoch": 2.9426554018203337, "grad_norm": 1.793678879737854, "learning_rate": 1.8575593041526335e-05, "loss": 0.5875, "step": 18026 }, { "epoch": 2.942818660462838, "grad_norm": 1.9411144256591797, "learning_rate": 1.8575429806049158e-05, "loss": 0.7382, "step": 18027 }, { "epoch": 2.9429819191053426, "grad_norm": 1.7894147634506226, "learning_rate": 1.8575266561936526e-05, "loss": 0.5042, "step": 18028 }, { "epoch": 2.943145177747847, "grad_norm": 2.075589418411255, "learning_rate": 1.85751033091886e-05, "loss": 0.553, "step": 18029 }, { "epoch": 2.9433084363903514, "grad_norm": 1.6673574447631836, "learning_rate": 1.8574940047805547e-05, "loss": 0.5705, "step": 18030 }, { "epoch": 2.943471695032856, "grad_norm": 1.6359843015670776, "learning_rate": 1.857477677778753e-05, "loss": 0.5245, "step": 18031 }, { "epoch": 2.9436349536753603, "grad_norm": 1.4172242879867554, "learning_rate": 1.857461349913472e-05, "loss": 0.4026, "step": 18032 }, { "epoch": 2.9437982123178648, "grad_norm": 1.8109841346740723, "learning_rate": 1.8574450211847273e-05, "loss": 0.5421, "step": 18033 }, { "epoch": 2.943961470960369, "grad_norm": 1.6301275491714478, "learning_rate": 1.8574286915925357e-05, "loss": 0.536, "step": 18034 }, { "epoch": 2.944124729602873, "grad_norm": 1.4539228677749634, "learning_rate": 1.857412361136914e-05, "loss": 0.5119, "step": 18035 }, { "epoch": 2.9442879882453776, "grad_norm": 1.9372824430465698, "learning_rate": 1.857396029817878e-05, "loss": 0.66, "step": 18036 }, { "epoch": 2.944451246887882, "grad_norm": 1.9071247577667236, "learning_rate": 1.8573796976354442e-05, "loss": 0.6924, "step": 18037 }, { "epoch": 2.9446145055303865, "grad_norm": 1.7879050970077515, "learning_rate": 1.8573633645896295e-05, "loss": 0.5753, "step": 18038 }, { "epoch": 2.944777764172891, "grad_norm": 1.6888853311538696, "learning_rate": 1.85734703068045e-05, "loss": 0.5408, "step": 18039 }, { "epoch": 2.9449410228153954, "grad_norm": 1.857982873916626, "learning_rate": 1.857330695907922e-05, "loss": 0.646, "step": 18040 }, { "epoch": 2.9451042814579, "grad_norm": 1.526651382446289, "learning_rate": 1.857314360272063e-05, "loss": 0.462, "step": 18041 }, { "epoch": 2.945267540100404, "grad_norm": 1.4464304447174072, "learning_rate": 1.8572980237728882e-05, "loss": 0.517, "step": 18042 }, { "epoch": 2.9454307987429083, "grad_norm": 1.7867621183395386, "learning_rate": 1.8572816864104144e-05, "loss": 0.5866, "step": 18043 }, { "epoch": 2.9455940573854127, "grad_norm": 1.8136091232299805, "learning_rate": 1.8572653481846585e-05, "loss": 0.5311, "step": 18044 }, { "epoch": 2.945757316027917, "grad_norm": 1.7979810237884521, "learning_rate": 1.8572490090956364e-05, "loss": 0.6313, "step": 18045 }, { "epoch": 2.9459205746704216, "grad_norm": 1.9422125816345215, "learning_rate": 1.8572326691433648e-05, "loss": 0.6601, "step": 18046 }, { "epoch": 2.946083833312926, "grad_norm": 1.77805495262146, "learning_rate": 1.8572163283278602e-05, "loss": 0.5668, "step": 18047 }, { "epoch": 2.9462470919554304, "grad_norm": 1.6690195798873901, "learning_rate": 1.857199986649139e-05, "loss": 0.6081, "step": 18048 }, { "epoch": 2.946410350597935, "grad_norm": 1.8916101455688477, "learning_rate": 1.8571836441072176e-05, "loss": 0.6188, "step": 18049 }, { "epoch": 2.9465736092404393, "grad_norm": 1.9806627035140991, "learning_rate": 1.8571673007021124e-05, "loss": 0.5789, "step": 18050 }, { "epoch": 2.9467368678829438, "grad_norm": 2.029191017150879, "learning_rate": 1.8571509564338403e-05, "loss": 0.7106, "step": 18051 }, { "epoch": 2.946900126525448, "grad_norm": 1.5958247184753418, "learning_rate": 1.857134611302417e-05, "loss": 0.6575, "step": 18052 }, { "epoch": 2.947063385167952, "grad_norm": 1.5890089273452759, "learning_rate": 1.8571182653078598e-05, "loss": 0.5529, "step": 18053 }, { "epoch": 2.9472266438104566, "grad_norm": 1.7500520944595337, "learning_rate": 1.8571019184501842e-05, "loss": 0.566, "step": 18054 }, { "epoch": 2.947389902452961, "grad_norm": 1.9082276821136475, "learning_rate": 1.8570855707294075e-05, "loss": 0.629, "step": 18055 }, { "epoch": 2.9475531610954655, "grad_norm": 1.6358975172042847, "learning_rate": 1.8570692221455456e-05, "loss": 0.5543, "step": 18056 }, { "epoch": 2.94771641973797, "grad_norm": 1.7338250875473022, "learning_rate": 1.8570528726986157e-05, "loss": 0.5883, "step": 18057 }, { "epoch": 2.9478796783804744, "grad_norm": 1.7178035974502563, "learning_rate": 1.8570365223886334e-05, "loss": 0.6212, "step": 18058 }, { "epoch": 2.948042937022979, "grad_norm": 1.795100212097168, "learning_rate": 1.8570201712156154e-05, "loss": 0.6119, "step": 18059 }, { "epoch": 2.948206195665483, "grad_norm": 2.043907642364502, "learning_rate": 1.857003819179579e-05, "loss": 0.6823, "step": 18060 }, { "epoch": 2.9483694543079872, "grad_norm": 1.9183822870254517, "learning_rate": 1.8569874662805394e-05, "loss": 0.5741, "step": 18061 }, { "epoch": 2.9485327129504917, "grad_norm": 2.0951156616210938, "learning_rate": 1.856971112518514e-05, "loss": 0.7513, "step": 18062 }, { "epoch": 2.948695971592996, "grad_norm": 1.830708384513855, "learning_rate": 1.856954757893519e-05, "loss": 0.6137, "step": 18063 }, { "epoch": 2.9488592302355006, "grad_norm": 1.9934183359146118, "learning_rate": 1.85693840240557e-05, "loss": 0.5954, "step": 18064 }, { "epoch": 2.949022488878005, "grad_norm": 1.6199222803115845, "learning_rate": 1.856922046054685e-05, "loss": 0.5232, "step": 18065 }, { "epoch": 2.9491857475205094, "grad_norm": 1.6457316875457764, "learning_rate": 1.8569056888408793e-05, "loss": 0.5333, "step": 18066 }, { "epoch": 2.949349006163014, "grad_norm": 1.4063727855682373, "learning_rate": 1.85688933076417e-05, "loss": 0.455, "step": 18067 }, { "epoch": 2.9495122648055183, "grad_norm": 1.476234793663025, "learning_rate": 1.8568729718245735e-05, "loss": 0.5153, "step": 18068 }, { "epoch": 2.9496755234480228, "grad_norm": 1.5507657527923584, "learning_rate": 1.856856612022106e-05, "loss": 0.5222, "step": 18069 }, { "epoch": 2.949838782090527, "grad_norm": 1.4827975034713745, "learning_rate": 1.856840251356784e-05, "loss": 0.5185, "step": 18070 }, { "epoch": 2.950002040733031, "grad_norm": 1.5673984289169312, "learning_rate": 1.856823889828624e-05, "loss": 0.5566, "step": 18071 }, { "epoch": 2.9501652993755356, "grad_norm": 1.610834002494812, "learning_rate": 1.856807527437643e-05, "loss": 0.5722, "step": 18072 }, { "epoch": 2.95032855801804, "grad_norm": 1.9638224840164185, "learning_rate": 1.856791164183857e-05, "loss": 0.6481, "step": 18073 }, { "epoch": 2.9504918166605445, "grad_norm": 1.5823862552642822, "learning_rate": 1.8567748000672822e-05, "loss": 0.5291, "step": 18074 }, { "epoch": 2.950655075303049, "grad_norm": 1.647438883781433, "learning_rate": 1.8567584350879358e-05, "loss": 0.5731, "step": 18075 }, { "epoch": 2.9508183339455534, "grad_norm": 1.8748046159744263, "learning_rate": 1.8567420692458334e-05, "loss": 0.5919, "step": 18076 }, { "epoch": 2.9509815925880574, "grad_norm": 1.7580033540725708, "learning_rate": 1.8567257025409925e-05, "loss": 0.5168, "step": 18077 }, { "epoch": 2.951144851230562, "grad_norm": 1.644645094871521, "learning_rate": 1.8567093349734288e-05, "loss": 0.5712, "step": 18078 }, { "epoch": 2.9513081098730662, "grad_norm": 1.9562546014785767, "learning_rate": 1.8566929665431587e-05, "loss": 0.6328, "step": 18079 }, { "epoch": 2.9514713685155707, "grad_norm": 1.8055895566940308, "learning_rate": 1.8566765972501994e-05, "loss": 0.5823, "step": 18080 }, { "epoch": 2.951634627158075, "grad_norm": 1.870413899421692, "learning_rate": 1.8566602270945672e-05, "loss": 0.4999, "step": 18081 }, { "epoch": 2.9517978858005796, "grad_norm": 1.6711071729660034, "learning_rate": 1.8566438560762777e-05, "loss": 0.5351, "step": 18082 }, { "epoch": 2.951961144443084, "grad_norm": 1.6578034162521362, "learning_rate": 1.8566274841953485e-05, "loss": 0.5221, "step": 18083 }, { "epoch": 2.9521244030855884, "grad_norm": 1.6413918733596802, "learning_rate": 1.856611111451796e-05, "loss": 0.595, "step": 18084 }, { "epoch": 2.952287661728093, "grad_norm": 1.5837206840515137, "learning_rate": 1.8565947378456357e-05, "loss": 0.4413, "step": 18085 }, { "epoch": 2.9524509203705973, "grad_norm": 1.4865412712097168, "learning_rate": 1.8565783633768846e-05, "loss": 0.4726, "step": 18086 }, { "epoch": 2.9526141790131017, "grad_norm": 1.542541742324829, "learning_rate": 1.85656198804556e-05, "loss": 0.4721, "step": 18087 }, { "epoch": 2.9527774376556057, "grad_norm": 2.103179931640625, "learning_rate": 1.8565456118516772e-05, "loss": 0.6867, "step": 18088 }, { "epoch": 2.95294069629811, "grad_norm": 2.0652265548706055, "learning_rate": 1.8565292347952534e-05, "loss": 0.7377, "step": 18089 }, { "epoch": 2.9531039549406146, "grad_norm": 1.577345371246338, "learning_rate": 1.856512856876305e-05, "loss": 0.5152, "step": 18090 }, { "epoch": 2.953267213583119, "grad_norm": 1.952079176902771, "learning_rate": 1.856496478094848e-05, "loss": 0.6658, "step": 18091 }, { "epoch": 2.9534304722256235, "grad_norm": 1.9994937181472778, "learning_rate": 1.8564800984508996e-05, "loss": 0.6605, "step": 18092 }, { "epoch": 2.953593730868128, "grad_norm": 1.8440040349960327, "learning_rate": 1.856463717944476e-05, "loss": 0.7243, "step": 18093 }, { "epoch": 2.9537569895106324, "grad_norm": 1.5432770252227783, "learning_rate": 1.8564473365755936e-05, "loss": 0.4776, "step": 18094 }, { "epoch": 2.9539202481531364, "grad_norm": 1.98477303981781, "learning_rate": 1.8564309543442687e-05, "loss": 0.6147, "step": 18095 }, { "epoch": 2.954083506795641, "grad_norm": 1.8681087493896484, "learning_rate": 1.8564145712505183e-05, "loss": 0.5399, "step": 18096 }, { "epoch": 2.9542467654381452, "grad_norm": 2.1236751079559326, "learning_rate": 1.856398187294359e-05, "loss": 0.5822, "step": 18097 }, { "epoch": 2.9544100240806497, "grad_norm": 1.9147045612335205, "learning_rate": 1.856381802475806e-05, "loss": 0.6008, "step": 18098 }, { "epoch": 2.954573282723154, "grad_norm": 1.976349949836731, "learning_rate": 1.8563654167948775e-05, "loss": 0.629, "step": 18099 }, { "epoch": 2.9547365413656586, "grad_norm": 1.6939783096313477, "learning_rate": 1.856349030251589e-05, "loss": 0.5764, "step": 18100 }, { "epoch": 2.954899800008163, "grad_norm": 1.8261750936508179, "learning_rate": 1.8563326428459575e-05, "loss": 0.6102, "step": 18101 }, { "epoch": 2.9550630586506674, "grad_norm": 1.7089571952819824, "learning_rate": 1.856316254577999e-05, "loss": 0.4901, "step": 18102 }, { "epoch": 2.955226317293172, "grad_norm": 2.123260974884033, "learning_rate": 1.8562998654477306e-05, "loss": 0.6927, "step": 18103 }, { "epoch": 2.9553895759356763, "grad_norm": 2.152907609939575, "learning_rate": 1.856283475455168e-05, "loss": 0.5964, "step": 18104 }, { "epoch": 2.9555528345781807, "grad_norm": 1.6917438507080078, "learning_rate": 1.8562670846003283e-05, "loss": 0.5632, "step": 18105 }, { "epoch": 2.9557160932206847, "grad_norm": 1.9249720573425293, "learning_rate": 1.856250692883228e-05, "loss": 0.5946, "step": 18106 }, { "epoch": 2.955879351863189, "grad_norm": 1.9186310768127441, "learning_rate": 1.8562343003038833e-05, "loss": 0.7153, "step": 18107 }, { "epoch": 2.9560426105056936, "grad_norm": 1.4700809717178345, "learning_rate": 1.8562179068623112e-05, "loss": 0.5064, "step": 18108 }, { "epoch": 2.956205869148198, "grad_norm": 2.276103973388672, "learning_rate": 1.8562015125585276e-05, "loss": 0.7563, "step": 18109 }, { "epoch": 2.9563691277907025, "grad_norm": 1.390039324760437, "learning_rate": 1.8561851173925495e-05, "loss": 0.5032, "step": 18110 }, { "epoch": 2.956532386433207, "grad_norm": 1.741662859916687, "learning_rate": 1.8561687213643932e-05, "loss": 0.5679, "step": 18111 }, { "epoch": 2.956695645075711, "grad_norm": 1.7234525680541992, "learning_rate": 1.8561523244740752e-05, "loss": 0.6022, "step": 18112 }, { "epoch": 2.9568589037182154, "grad_norm": 1.9708195924758911, "learning_rate": 1.8561359267216116e-05, "loss": 0.5327, "step": 18113 }, { "epoch": 2.95702216236072, "grad_norm": 1.6738073825836182, "learning_rate": 1.8561195281070198e-05, "loss": 0.5085, "step": 18114 }, { "epoch": 2.9571854210032242, "grad_norm": 1.5160729885101318, "learning_rate": 1.856103128630316e-05, "loss": 0.6, "step": 18115 }, { "epoch": 2.9573486796457287, "grad_norm": 1.6154049634933472, "learning_rate": 1.8560867282915164e-05, "loss": 0.5292, "step": 18116 }, { "epoch": 2.957511938288233, "grad_norm": 1.945219874382019, "learning_rate": 1.8560703270906376e-05, "loss": 0.6413, "step": 18117 }, { "epoch": 2.9576751969307375, "grad_norm": 1.925022006034851, "learning_rate": 1.856053925027696e-05, "loss": 0.6491, "step": 18118 }, { "epoch": 2.957838455573242, "grad_norm": 1.9037195444107056, "learning_rate": 1.856037522102709e-05, "loss": 0.5963, "step": 18119 }, { "epoch": 2.9580017142157464, "grad_norm": 1.7455673217773438, "learning_rate": 1.8560211183156918e-05, "loss": 0.5511, "step": 18120 }, { "epoch": 2.958164972858251, "grad_norm": 1.7736386060714722, "learning_rate": 1.856004713666662e-05, "loss": 0.5153, "step": 18121 }, { "epoch": 2.9583282315007553, "grad_norm": 1.6166919469833374, "learning_rate": 1.8559883081556352e-05, "loss": 0.4859, "step": 18122 }, { "epoch": 2.9584914901432593, "grad_norm": 1.6300199031829834, "learning_rate": 1.855971901782629e-05, "loss": 0.5079, "step": 18123 }, { "epoch": 2.9586547487857637, "grad_norm": 2.0903406143188477, "learning_rate": 1.855955494547659e-05, "loss": 0.6673, "step": 18124 }, { "epoch": 2.958818007428268, "grad_norm": 1.5611151456832886, "learning_rate": 1.855939086450742e-05, "loss": 0.6084, "step": 18125 }, { "epoch": 2.9589812660707726, "grad_norm": 1.7498500347137451, "learning_rate": 1.8559226774918945e-05, "loss": 0.6196, "step": 18126 }, { "epoch": 2.959144524713277, "grad_norm": 1.8391656875610352, "learning_rate": 1.855906267671133e-05, "loss": 0.6688, "step": 18127 }, { "epoch": 2.9593077833557815, "grad_norm": 2.016286611557007, "learning_rate": 1.8558898569884743e-05, "loss": 0.6459, "step": 18128 }, { "epoch": 2.959471041998286, "grad_norm": 1.7072981595993042, "learning_rate": 1.8558734454439348e-05, "loss": 0.5873, "step": 18129 }, { "epoch": 2.95963430064079, "grad_norm": 1.8694050312042236, "learning_rate": 1.855857033037531e-05, "loss": 0.6056, "step": 18130 }, { "epoch": 2.9597975592832944, "grad_norm": 1.919055461883545, "learning_rate": 1.8558406197692792e-05, "loss": 0.539, "step": 18131 }, { "epoch": 2.959960817925799, "grad_norm": 1.7384508848190308, "learning_rate": 1.8558242056391963e-05, "loss": 0.5278, "step": 18132 }, { "epoch": 2.9601240765683032, "grad_norm": 2.3119008541107178, "learning_rate": 1.8558077906472988e-05, "loss": 0.629, "step": 18133 }, { "epoch": 2.9602873352108077, "grad_norm": 1.6334726810455322, "learning_rate": 1.8557913747936028e-05, "loss": 0.518, "step": 18134 }, { "epoch": 2.960450593853312, "grad_norm": 1.7381631135940552, "learning_rate": 1.8557749580781253e-05, "loss": 0.4551, "step": 18135 }, { "epoch": 2.9606138524958165, "grad_norm": 1.728139042854309, "learning_rate": 1.8557585405008823e-05, "loss": 0.55, "step": 18136 }, { "epoch": 2.960777111138321, "grad_norm": 1.6108583211898804, "learning_rate": 1.8557421220618913e-05, "loss": 0.5034, "step": 18137 }, { "epoch": 2.9609403697808254, "grad_norm": 1.554089903831482, "learning_rate": 1.8557257027611677e-05, "loss": 0.5475, "step": 18138 }, { "epoch": 2.96110362842333, "grad_norm": 1.6109753847122192, "learning_rate": 1.8557092825987286e-05, "loss": 0.5055, "step": 18139 }, { "epoch": 2.9612668870658343, "grad_norm": 1.6414663791656494, "learning_rate": 1.8556928615745903e-05, "loss": 0.5128, "step": 18140 }, { "epoch": 2.9614301457083383, "grad_norm": 1.6871025562286377, "learning_rate": 1.85567643968877e-05, "loss": 0.5843, "step": 18141 }, { "epoch": 2.9615934043508427, "grad_norm": 1.7328838109970093, "learning_rate": 1.8556600169412835e-05, "loss": 0.5508, "step": 18142 }, { "epoch": 2.961756662993347, "grad_norm": 1.868011236190796, "learning_rate": 1.8556435933321478e-05, "loss": 0.6581, "step": 18143 }, { "epoch": 2.9619199216358516, "grad_norm": 1.5218510627746582, "learning_rate": 1.855627168861379e-05, "loss": 0.4955, "step": 18144 }, { "epoch": 2.962083180278356, "grad_norm": 1.6428921222686768, "learning_rate": 1.8556107435289936e-05, "loss": 0.5685, "step": 18145 }, { "epoch": 2.9622464389208605, "grad_norm": 1.5416829586029053, "learning_rate": 1.8555943173350087e-05, "loss": 0.4708, "step": 18146 }, { "epoch": 2.962409697563365, "grad_norm": 1.6917961835861206, "learning_rate": 1.855577890279441e-05, "loss": 0.5702, "step": 18147 }, { "epoch": 2.962572956205869, "grad_norm": 1.7757213115692139, "learning_rate": 1.8555614623623058e-05, "loss": 0.5688, "step": 18148 }, { "epoch": 2.9627362148483734, "grad_norm": 1.778321385383606, "learning_rate": 1.8555450335836206e-05, "loss": 0.5371, "step": 18149 }, { "epoch": 2.962899473490878, "grad_norm": 1.8449740409851074, "learning_rate": 1.8555286039434022e-05, "loss": 0.5893, "step": 18150 }, { "epoch": 2.9630627321333822, "grad_norm": 1.6250321865081787, "learning_rate": 1.8555121734416663e-05, "loss": 0.5416, "step": 18151 }, { "epoch": 2.9632259907758867, "grad_norm": 1.7698135375976562, "learning_rate": 1.8554957420784305e-05, "loss": 0.6383, "step": 18152 }, { "epoch": 2.963389249418391, "grad_norm": 1.7123297452926636, "learning_rate": 1.85547930985371e-05, "loss": 0.6039, "step": 18153 }, { "epoch": 2.9635525080608955, "grad_norm": 1.734176754951477, "learning_rate": 1.8554628767675223e-05, "loss": 0.5882, "step": 18154 }, { "epoch": 2.9637157667034, "grad_norm": 1.7789782285690308, "learning_rate": 1.8554464428198836e-05, "loss": 0.5864, "step": 18155 }, { "epoch": 2.9638790253459044, "grad_norm": 1.7084827423095703, "learning_rate": 1.855430008010811e-05, "loss": 0.5902, "step": 18156 }, { "epoch": 2.964042283988409, "grad_norm": 1.6308003664016724, "learning_rate": 1.85541357234032e-05, "loss": 0.4939, "step": 18157 }, { "epoch": 2.9642055426309133, "grad_norm": 1.908738136291504, "learning_rate": 1.8553971358084283e-05, "loss": 0.6074, "step": 18158 }, { "epoch": 2.9643688012734173, "grad_norm": 1.7047337293624878, "learning_rate": 1.8553806984151513e-05, "loss": 0.5408, "step": 18159 }, { "epoch": 2.9645320599159217, "grad_norm": 1.6893696784973145, "learning_rate": 1.855364260160507e-05, "loss": 0.5852, "step": 18160 }, { "epoch": 2.964695318558426, "grad_norm": 1.947135329246521, "learning_rate": 1.8553478210445103e-05, "loss": 0.6195, "step": 18161 }, { "epoch": 2.9648585772009306, "grad_norm": 1.7147351503372192, "learning_rate": 1.855331381067179e-05, "loss": 0.5232, "step": 18162 }, { "epoch": 2.965021835843435, "grad_norm": 1.4189355373382568, "learning_rate": 1.8553149402285292e-05, "loss": 0.409, "step": 18163 }, { "epoch": 2.9651850944859395, "grad_norm": 1.6084070205688477, "learning_rate": 1.8552984985285776e-05, "loss": 0.5148, "step": 18164 }, { "epoch": 2.9653483531284435, "grad_norm": 1.6608827114105225, "learning_rate": 1.8552820559673402e-05, "loss": 0.5415, "step": 18165 }, { "epoch": 2.965511611770948, "grad_norm": 1.468374490737915, "learning_rate": 1.8552656125448343e-05, "loss": 0.45, "step": 18166 }, { "epoch": 2.9656748704134523, "grad_norm": 1.904512882232666, "learning_rate": 1.855249168261076e-05, "loss": 0.5902, "step": 18167 }, { "epoch": 2.965838129055957, "grad_norm": 1.8768353462219238, "learning_rate": 1.8552327231160823e-05, "loss": 0.5993, "step": 18168 }, { "epoch": 2.9660013876984612, "grad_norm": 2.0201563835144043, "learning_rate": 1.8552162771098694e-05, "loss": 0.6215, "step": 18169 }, { "epoch": 2.9661646463409657, "grad_norm": 1.4923202991485596, "learning_rate": 1.8551998302424538e-05, "loss": 0.475, "step": 18170 }, { "epoch": 2.96632790498347, "grad_norm": 1.660489559173584, "learning_rate": 1.8551833825138522e-05, "loss": 0.5733, "step": 18171 }, { "epoch": 2.9664911636259745, "grad_norm": 1.5600953102111816, "learning_rate": 1.8551669339240814e-05, "loss": 0.5897, "step": 18172 }, { "epoch": 2.966654422268479, "grad_norm": 2.0842835903167725, "learning_rate": 1.8551504844731573e-05, "loss": 0.5818, "step": 18173 }, { "epoch": 2.9668176809109834, "grad_norm": 1.7132611274719238, "learning_rate": 1.8551340341610972e-05, "loss": 0.5943, "step": 18174 }, { "epoch": 2.966980939553488, "grad_norm": 1.6732656955718994, "learning_rate": 1.8551175829879173e-05, "loss": 0.5182, "step": 18175 }, { "epoch": 2.967144198195992, "grad_norm": 2.1856913566589355, "learning_rate": 1.855101130953634e-05, "loss": 0.6668, "step": 18176 }, { "epoch": 2.9673074568384963, "grad_norm": 1.6018717288970947, "learning_rate": 1.8550846780582645e-05, "loss": 0.4983, "step": 18177 }, { "epoch": 2.9674707154810007, "grad_norm": 1.5558254718780518, "learning_rate": 1.8550682243018248e-05, "loss": 0.4592, "step": 18178 }, { "epoch": 2.967633974123505, "grad_norm": 1.8557801246643066, "learning_rate": 1.8550517696843314e-05, "loss": 0.509, "step": 18179 }, { "epoch": 2.9677972327660096, "grad_norm": 1.6679770946502686, "learning_rate": 1.8550353142058016e-05, "loss": 0.4933, "step": 18180 }, { "epoch": 2.967960491408514, "grad_norm": 1.8816874027252197, "learning_rate": 1.855018857866251e-05, "loss": 0.6487, "step": 18181 }, { "epoch": 2.9681237500510185, "grad_norm": 1.55352783203125, "learning_rate": 1.8550024006656967e-05, "loss": 0.4996, "step": 18182 }, { "epoch": 2.9682870086935225, "grad_norm": 1.5650925636291504, "learning_rate": 1.8549859426041555e-05, "loss": 0.4752, "step": 18183 }, { "epoch": 2.968450267336027, "grad_norm": 1.7042255401611328, "learning_rate": 1.8549694836816432e-05, "loss": 0.5518, "step": 18184 }, { "epoch": 2.9686135259785313, "grad_norm": 1.9354888200759888, "learning_rate": 1.854953023898177e-05, "loss": 0.5262, "step": 18185 }, { "epoch": 2.968776784621036, "grad_norm": 1.850725531578064, "learning_rate": 1.8549365632537733e-05, "loss": 0.5856, "step": 18186 }, { "epoch": 2.96894004326354, "grad_norm": 1.823691725730896, "learning_rate": 1.8549201017484493e-05, "loss": 0.5268, "step": 18187 }, { "epoch": 2.9691033019060447, "grad_norm": 2.0509369373321533, "learning_rate": 1.8549036393822206e-05, "loss": 0.6028, "step": 18188 }, { "epoch": 2.969266560548549, "grad_norm": 1.6641638278961182, "learning_rate": 1.8548871761551038e-05, "loss": 0.5109, "step": 18189 }, { "epoch": 2.9694298191910535, "grad_norm": 1.8646317720413208, "learning_rate": 1.854870712067116e-05, "loss": 0.632, "step": 18190 }, { "epoch": 2.969593077833558, "grad_norm": 1.6161279678344727, "learning_rate": 1.854854247118274e-05, "loss": 0.5242, "step": 18191 }, { "epoch": 2.9697563364760624, "grad_norm": 1.4998263120651245, "learning_rate": 1.8548377813085937e-05, "loss": 0.5047, "step": 18192 }, { "epoch": 2.969919595118567, "grad_norm": 1.7434195280075073, "learning_rate": 1.854821314638092e-05, "loss": 0.5342, "step": 18193 }, { "epoch": 2.970082853761071, "grad_norm": 1.4435157775878906, "learning_rate": 1.8548048471067854e-05, "loss": 0.5177, "step": 18194 }, { "epoch": 2.9702461124035753, "grad_norm": 1.939494252204895, "learning_rate": 1.854788378714691e-05, "loss": 0.5957, "step": 18195 }, { "epoch": 2.9704093710460797, "grad_norm": 1.326919674873352, "learning_rate": 1.8547719094618243e-05, "loss": 0.4524, "step": 18196 }, { "epoch": 2.970572629688584, "grad_norm": 1.5881034135818481, "learning_rate": 1.8547554393482026e-05, "loss": 0.5211, "step": 18197 }, { "epoch": 2.9707358883310886, "grad_norm": 2.0996592044830322, "learning_rate": 1.8547389683738427e-05, "loss": 0.6088, "step": 18198 }, { "epoch": 2.970899146973593, "grad_norm": 1.5659735202789307, "learning_rate": 1.854722496538761e-05, "loss": 0.5665, "step": 18199 }, { "epoch": 2.9710624056160975, "grad_norm": 1.637933611869812, "learning_rate": 1.8547060238429737e-05, "loss": 0.5386, "step": 18200 }, { "epoch": 2.9712256642586015, "grad_norm": 1.8051273822784424, "learning_rate": 1.854689550286498e-05, "loss": 0.5541, "step": 18201 }, { "epoch": 2.971388922901106, "grad_norm": 1.7232459783554077, "learning_rate": 1.8546730758693498e-05, "loss": 0.5401, "step": 18202 }, { "epoch": 2.9715521815436103, "grad_norm": 1.9662402868270874, "learning_rate": 1.8546566005915458e-05, "loss": 0.6396, "step": 18203 }, { "epoch": 2.9717154401861148, "grad_norm": 1.3359075784683228, "learning_rate": 1.854640124453103e-05, "loss": 0.502, "step": 18204 }, { "epoch": 2.971878698828619, "grad_norm": 1.657192587852478, "learning_rate": 1.8546236474540384e-05, "loss": 0.4249, "step": 18205 }, { "epoch": 2.9720419574711237, "grad_norm": 1.7939566373825073, "learning_rate": 1.854607169594367e-05, "loss": 0.5316, "step": 18206 }, { "epoch": 2.972205216113628, "grad_norm": 1.7591578960418701, "learning_rate": 1.8545906908741074e-05, "loss": 0.4955, "step": 18207 }, { "epoch": 2.9723684747561325, "grad_norm": 1.8536492586135864, "learning_rate": 1.8545742112932744e-05, "loss": 0.7046, "step": 18208 }, { "epoch": 2.972531733398637, "grad_norm": 1.5745627880096436, "learning_rate": 1.854557730851886e-05, "loss": 0.4885, "step": 18209 }, { "epoch": 2.9726949920411414, "grad_norm": 1.4578603506088257, "learning_rate": 1.854541249549958e-05, "loss": 0.5735, "step": 18210 }, { "epoch": 2.972858250683646, "grad_norm": 1.9467918872833252, "learning_rate": 1.854524767387507e-05, "loss": 0.6246, "step": 18211 }, { "epoch": 2.97302150932615, "grad_norm": 1.8222229480743408, "learning_rate": 1.85450828436455e-05, "loss": 0.6184, "step": 18212 }, { "epoch": 2.9731847679686543, "grad_norm": 1.979440450668335, "learning_rate": 1.8544918004811034e-05, "loss": 0.6552, "step": 18213 }, { "epoch": 2.9733480266111587, "grad_norm": 2.1005289554595947, "learning_rate": 1.8544753157371837e-05, "loss": 0.675, "step": 18214 }, { "epoch": 2.973511285253663, "grad_norm": 1.5870260000228882, "learning_rate": 1.8544588301328077e-05, "loss": 0.5165, "step": 18215 }, { "epoch": 2.9736745438961676, "grad_norm": 1.5060704946517944, "learning_rate": 1.8544423436679916e-05, "loss": 0.5701, "step": 18216 }, { "epoch": 2.973837802538672, "grad_norm": 2.0405776500701904, "learning_rate": 1.8544258563427526e-05, "loss": 0.643, "step": 18217 }, { "epoch": 2.974001061181176, "grad_norm": 2.1862711906433105, "learning_rate": 1.8544093681571067e-05, "loss": 0.7036, "step": 18218 }, { "epoch": 2.9741643198236805, "grad_norm": 2.1261558532714844, "learning_rate": 1.854392879111071e-05, "loss": 0.741, "step": 18219 }, { "epoch": 2.974327578466185, "grad_norm": 2.092999219894409, "learning_rate": 1.8543763892046618e-05, "loss": 0.7189, "step": 18220 }, { "epoch": 2.9744908371086893, "grad_norm": 1.74619722366333, "learning_rate": 1.8543598984378958e-05, "loss": 0.5812, "step": 18221 }, { "epoch": 2.9746540957511938, "grad_norm": 1.9988503456115723, "learning_rate": 1.8543434068107896e-05, "loss": 0.6584, "step": 18222 }, { "epoch": 2.974817354393698, "grad_norm": 1.783095121383667, "learning_rate": 1.85432691432336e-05, "loss": 0.5017, "step": 18223 }, { "epoch": 2.9749806130362026, "grad_norm": 1.7868540287017822, "learning_rate": 1.8543104209756233e-05, "loss": 0.5913, "step": 18224 }, { "epoch": 2.975143871678707, "grad_norm": 1.7318741083145142, "learning_rate": 1.8542939267675962e-05, "loss": 0.5376, "step": 18225 }, { "epoch": 2.9753071303212115, "grad_norm": 1.6010483503341675, "learning_rate": 1.8542774316992953e-05, "loss": 0.5271, "step": 18226 }, { "epoch": 2.975470388963716, "grad_norm": 1.786377191543579, "learning_rate": 1.8542609357707376e-05, "loss": 0.667, "step": 18227 }, { "epoch": 2.9756336476062204, "grad_norm": 1.8095815181732178, "learning_rate": 1.854244438981939e-05, "loss": 0.5791, "step": 18228 }, { "epoch": 2.9757969062487244, "grad_norm": 2.135197162628174, "learning_rate": 1.8542279413329164e-05, "loss": 0.5476, "step": 18229 }, { "epoch": 2.975960164891229, "grad_norm": 1.7838348150253296, "learning_rate": 1.8542114428236864e-05, "loss": 0.5907, "step": 18230 }, { "epoch": 2.9761234235337333, "grad_norm": 1.727491855621338, "learning_rate": 1.854194943454266e-05, "loss": 0.5426, "step": 18231 }, { "epoch": 2.9762866821762377, "grad_norm": 1.7341772317886353, "learning_rate": 1.8541784432246716e-05, "loss": 0.5738, "step": 18232 }, { "epoch": 2.976449940818742, "grad_norm": 1.7566782236099243, "learning_rate": 1.8541619421349195e-05, "loss": 0.4872, "step": 18233 }, { "epoch": 2.9766131994612466, "grad_norm": 1.8296979665756226, "learning_rate": 1.8541454401850268e-05, "loss": 0.5192, "step": 18234 }, { "epoch": 2.976776458103751, "grad_norm": 1.8785728216171265, "learning_rate": 1.8541289373750098e-05, "loss": 0.552, "step": 18235 }, { "epoch": 2.976939716746255, "grad_norm": 1.5980488061904907, "learning_rate": 1.854112433704885e-05, "loss": 0.5409, "step": 18236 }, { "epoch": 2.9771029753887595, "grad_norm": 1.9695004224777222, "learning_rate": 1.8540959291746694e-05, "loss": 0.5728, "step": 18237 }, { "epoch": 2.977266234031264, "grad_norm": 2.1932804584503174, "learning_rate": 1.8540794237843793e-05, "loss": 0.5602, "step": 18238 }, { "epoch": 2.9774294926737683, "grad_norm": 1.9010404348373413, "learning_rate": 1.8540629175340315e-05, "loss": 0.6171, "step": 18239 }, { "epoch": 2.9775927513162728, "grad_norm": 1.7625526189804077, "learning_rate": 1.8540464104236428e-05, "loss": 0.5091, "step": 18240 }, { "epoch": 2.977756009958777, "grad_norm": 1.6501644849777222, "learning_rate": 1.854029902453229e-05, "loss": 0.5233, "step": 18241 }, { "epoch": 2.9779192686012816, "grad_norm": 2.053384780883789, "learning_rate": 1.8540133936228077e-05, "loss": 0.7264, "step": 18242 }, { "epoch": 2.978082527243786, "grad_norm": 2.166025400161743, "learning_rate": 1.853996883932395e-05, "loss": 0.7876, "step": 18243 }, { "epoch": 2.9782457858862905, "grad_norm": 2.0199954509735107, "learning_rate": 1.853980373382008e-05, "loss": 0.6919, "step": 18244 }, { "epoch": 2.978409044528795, "grad_norm": 1.955330729484558, "learning_rate": 1.8539638619716628e-05, "loss": 0.5512, "step": 18245 }, { "epoch": 2.9785723031712994, "grad_norm": 1.5919924974441528, "learning_rate": 1.853947349701376e-05, "loss": 0.5603, "step": 18246 }, { "epoch": 2.9787355618138034, "grad_norm": 1.9034037590026855, "learning_rate": 1.8539308365711644e-05, "loss": 0.55, "step": 18247 }, { "epoch": 2.978898820456308, "grad_norm": 1.5993704795837402, "learning_rate": 1.8539143225810453e-05, "loss": 0.5454, "step": 18248 }, { "epoch": 2.9790620790988123, "grad_norm": 1.7215934991836548, "learning_rate": 1.853897807731034e-05, "loss": 0.6026, "step": 18249 }, { "epoch": 2.9792253377413167, "grad_norm": 1.543052077293396, "learning_rate": 1.8538812920211484e-05, "loss": 0.5468, "step": 18250 }, { "epoch": 2.979388596383821, "grad_norm": 1.9889198541641235, "learning_rate": 1.8538647754514043e-05, "loss": 0.6638, "step": 18251 }, { "epoch": 2.9795518550263256, "grad_norm": 1.6613562107086182, "learning_rate": 1.8538482580218185e-05, "loss": 0.6302, "step": 18252 }, { "epoch": 2.9797151136688296, "grad_norm": 1.639471411705017, "learning_rate": 1.853831739732408e-05, "loss": 0.5198, "step": 18253 }, { "epoch": 2.979878372311334, "grad_norm": 1.5015804767608643, "learning_rate": 1.8538152205831886e-05, "loss": 0.4911, "step": 18254 }, { "epoch": 2.9800416309538384, "grad_norm": 1.7349450588226318, "learning_rate": 1.853798700574178e-05, "loss": 0.5028, "step": 18255 }, { "epoch": 2.980204889596343, "grad_norm": 1.6073940992355347, "learning_rate": 1.8537821797053922e-05, "loss": 0.5306, "step": 18256 }, { "epoch": 2.9803681482388473, "grad_norm": 1.8022133111953735, "learning_rate": 1.853765657976848e-05, "loss": 0.5918, "step": 18257 }, { "epoch": 2.9805314068813518, "grad_norm": 2.0392026901245117, "learning_rate": 1.853749135388562e-05, "loss": 0.6616, "step": 18258 }, { "epoch": 2.980694665523856, "grad_norm": 1.5212280750274658, "learning_rate": 1.8537326119405507e-05, "loss": 0.5349, "step": 18259 }, { "epoch": 2.9808579241663606, "grad_norm": 2.0180530548095703, "learning_rate": 1.8537160876328313e-05, "loss": 0.5742, "step": 18260 }, { "epoch": 2.981021182808865, "grad_norm": 1.9912445545196533, "learning_rate": 1.8536995624654197e-05, "loss": 0.5866, "step": 18261 }, { "epoch": 2.9811844414513695, "grad_norm": 1.7667882442474365, "learning_rate": 1.853683036438333e-05, "loss": 0.4847, "step": 18262 }, { "epoch": 2.981347700093874, "grad_norm": 1.3009551763534546, "learning_rate": 1.8536665095515876e-05, "loss": 0.4656, "step": 18263 }, { "epoch": 2.981510958736378, "grad_norm": 1.502537727355957, "learning_rate": 1.8536499818052e-05, "loss": 0.5923, "step": 18264 }, { "epoch": 2.9816742173788824, "grad_norm": 1.6453442573547363, "learning_rate": 1.8536334531991874e-05, "loss": 0.5022, "step": 18265 }, { "epoch": 2.981837476021387, "grad_norm": 1.9266594648361206, "learning_rate": 1.8536169237335663e-05, "loss": 0.6209, "step": 18266 }, { "epoch": 2.9820007346638913, "grad_norm": 4.259009838104248, "learning_rate": 1.853600393408353e-05, "loss": 1.13, "step": 18267 }, { "epoch": 2.9821639933063957, "grad_norm": 1.8285460472106934, "learning_rate": 1.853583862223564e-05, "loss": 0.5543, "step": 18268 }, { "epoch": 2.9823272519489, "grad_norm": 1.7889249324798584, "learning_rate": 1.853567330179217e-05, "loss": 0.6034, "step": 18269 }, { "epoch": 2.9824905105914046, "grad_norm": 1.7460594177246094, "learning_rate": 1.8535507972753275e-05, "loss": 0.5562, "step": 18270 }, { "epoch": 2.9826537692339086, "grad_norm": 1.6565920114517212, "learning_rate": 1.8535342635119128e-05, "loss": 0.5559, "step": 18271 }, { "epoch": 2.982817027876413, "grad_norm": 1.907871127128601, "learning_rate": 1.853517728888989e-05, "loss": 0.6358, "step": 18272 }, { "epoch": 2.9829802865189174, "grad_norm": 1.2140332460403442, "learning_rate": 1.8535011934065733e-05, "loss": 0.3917, "step": 18273 }, { "epoch": 2.983143545161422, "grad_norm": 1.6336971521377563, "learning_rate": 1.8534846570646818e-05, "loss": 0.5465, "step": 18274 }, { "epoch": 2.9833068038039263, "grad_norm": 1.8147526979446411, "learning_rate": 1.8534681198633318e-05, "loss": 0.576, "step": 18275 }, { "epoch": 2.9834700624464308, "grad_norm": 1.6397618055343628, "learning_rate": 1.8534515818025397e-05, "loss": 0.5882, "step": 18276 }, { "epoch": 2.983633321088935, "grad_norm": 1.8914878368377686, "learning_rate": 1.853435042882322e-05, "loss": 0.664, "step": 18277 }, { "epoch": 2.9837965797314396, "grad_norm": 1.6835737228393555, "learning_rate": 1.853418503102695e-05, "loss": 0.5484, "step": 18278 }, { "epoch": 2.983959838373944, "grad_norm": 1.7317651510238647, "learning_rate": 1.8534019624636764e-05, "loss": 0.6047, "step": 18279 }, { "epoch": 2.9841230970164485, "grad_norm": 1.4397761821746826, "learning_rate": 1.853385420965282e-05, "loss": 0.4041, "step": 18280 }, { "epoch": 2.984286355658953, "grad_norm": 2.173340320587158, "learning_rate": 1.853368878607529e-05, "loss": 0.612, "step": 18281 }, { "epoch": 2.984449614301457, "grad_norm": 2.129903793334961, "learning_rate": 1.853352335390433e-05, "loss": 0.7016, "step": 18282 }, { "epoch": 2.9846128729439614, "grad_norm": 1.980839729309082, "learning_rate": 1.853335791314012e-05, "loss": 0.6208, "step": 18283 }, { "epoch": 2.984776131586466, "grad_norm": 1.7694593667984009, "learning_rate": 1.853319246378282e-05, "loss": 0.5816, "step": 18284 }, { "epoch": 2.9849393902289703, "grad_norm": 1.8405898809432983, "learning_rate": 1.85330270058326e-05, "loss": 0.5882, "step": 18285 }, { "epoch": 2.9851026488714747, "grad_norm": 2.228308916091919, "learning_rate": 1.8532861539289618e-05, "loss": 0.6802, "step": 18286 }, { "epoch": 2.985265907513979, "grad_norm": 1.6935869455337524, "learning_rate": 1.853269606415405e-05, "loss": 0.4668, "step": 18287 }, { "epoch": 2.9854291661564836, "grad_norm": 1.7907236814498901, "learning_rate": 1.853253058042606e-05, "loss": 0.5881, "step": 18288 }, { "epoch": 2.9855924247989876, "grad_norm": 1.7836415767669678, "learning_rate": 1.8532365088105816e-05, "loss": 0.6486, "step": 18289 }, { "epoch": 2.985755683441492, "grad_norm": 1.7960295677185059, "learning_rate": 1.8532199587193477e-05, "loss": 0.5853, "step": 18290 }, { "epoch": 2.9859189420839964, "grad_norm": 1.8897862434387207, "learning_rate": 1.853203407768922e-05, "loss": 0.5507, "step": 18291 }, { "epoch": 2.986082200726501, "grad_norm": 1.6077934503555298, "learning_rate": 1.8531868559593205e-05, "loss": 0.4891, "step": 18292 }, { "epoch": 2.9862454593690053, "grad_norm": 1.5636487007141113, "learning_rate": 1.8531703032905603e-05, "loss": 0.5114, "step": 18293 }, { "epoch": 2.9864087180115098, "grad_norm": 2.3060624599456787, "learning_rate": 1.8531537497626573e-05, "loss": 0.6119, "step": 18294 }, { "epoch": 2.986571976654014, "grad_norm": 1.7930686473846436, "learning_rate": 1.853137195375629e-05, "loss": 0.5559, "step": 18295 }, { "epoch": 2.9867352352965186, "grad_norm": 1.9402812719345093, "learning_rate": 1.853120640129492e-05, "loss": 0.6691, "step": 18296 }, { "epoch": 2.986898493939023, "grad_norm": 2.1886978149414062, "learning_rate": 1.8531040840242625e-05, "loss": 0.5938, "step": 18297 }, { "epoch": 2.9870617525815275, "grad_norm": 2.363053560256958, "learning_rate": 1.8530875270599573e-05, "loss": 0.7055, "step": 18298 }, { "epoch": 2.987225011224032, "grad_norm": 1.7611863613128662, "learning_rate": 1.8530709692365934e-05, "loss": 0.5745, "step": 18299 }, { "epoch": 2.987388269866536, "grad_norm": 1.7467700242996216, "learning_rate": 1.8530544105541872e-05, "loss": 0.576, "step": 18300 }, { "epoch": 2.9875515285090404, "grad_norm": 1.7809275388717651, "learning_rate": 1.8530378510127555e-05, "loss": 0.5546, "step": 18301 }, { "epoch": 2.987714787151545, "grad_norm": 1.7219352722167969, "learning_rate": 1.853021290612315e-05, "loss": 0.5489, "step": 18302 }, { "epoch": 2.9878780457940493, "grad_norm": 1.3850469589233398, "learning_rate": 1.8530047293528818e-05, "loss": 0.4865, "step": 18303 }, { "epoch": 2.9880413044365537, "grad_norm": 1.8991518020629883, "learning_rate": 1.8529881672344733e-05, "loss": 0.5345, "step": 18304 }, { "epoch": 2.988204563079058, "grad_norm": 1.81593656539917, "learning_rate": 1.8529716042571063e-05, "loss": 0.4816, "step": 18305 }, { "epoch": 2.988367821721562, "grad_norm": 1.994473934173584, "learning_rate": 1.8529550404207967e-05, "loss": 0.5592, "step": 18306 }, { "epoch": 2.9885310803640666, "grad_norm": 1.86656653881073, "learning_rate": 1.852938475725562e-05, "loss": 0.5518, "step": 18307 }, { "epoch": 2.988694339006571, "grad_norm": 1.5667191743850708, "learning_rate": 1.852921910171418e-05, "loss": 0.4968, "step": 18308 }, { "epoch": 2.9888575976490754, "grad_norm": 1.873581886291504, "learning_rate": 1.852905343758382e-05, "loss": 0.5235, "step": 18309 }, { "epoch": 2.98902085629158, "grad_norm": 1.5499571561813354, "learning_rate": 1.852888776486471e-05, "loss": 0.5077, "step": 18310 }, { "epoch": 2.9891841149340843, "grad_norm": 1.6724574565887451, "learning_rate": 1.8528722083557006e-05, "loss": 0.5847, "step": 18311 }, { "epoch": 2.9893473735765888, "grad_norm": 1.9302486181259155, "learning_rate": 1.8528556393660884e-05, "loss": 0.635, "step": 18312 }, { "epoch": 2.989510632219093, "grad_norm": 1.619215726852417, "learning_rate": 1.8528390695176507e-05, "loss": 0.4803, "step": 18313 }, { "epoch": 2.9896738908615976, "grad_norm": 2.012899398803711, "learning_rate": 1.8528224988104044e-05, "loss": 0.6497, "step": 18314 }, { "epoch": 2.989837149504102, "grad_norm": 1.9265031814575195, "learning_rate": 1.852805927244366e-05, "loss": 0.5907, "step": 18315 }, { "epoch": 2.9900004081466065, "grad_norm": 1.7280688285827637, "learning_rate": 1.8527893548195522e-05, "loss": 0.5175, "step": 18316 }, { "epoch": 2.9901636667891105, "grad_norm": 1.9736720323562622, "learning_rate": 1.85277278153598e-05, "loss": 0.5953, "step": 18317 }, { "epoch": 2.990326925431615, "grad_norm": 2.152324914932251, "learning_rate": 1.8527562073936657e-05, "loss": 0.7341, "step": 18318 }, { "epoch": 2.9904901840741194, "grad_norm": 1.9676761627197266, "learning_rate": 1.852739632392626e-05, "loss": 0.6987, "step": 18319 }, { "epoch": 2.990653442716624, "grad_norm": 2.0830650329589844, "learning_rate": 1.852723056532878e-05, "loss": 0.654, "step": 18320 }, { "epoch": 2.9908167013591282, "grad_norm": 1.6638784408569336, "learning_rate": 1.8527064798144376e-05, "loss": 0.5867, "step": 18321 }, { "epoch": 2.9909799600016327, "grad_norm": 1.7794435024261475, "learning_rate": 1.8526899022373224e-05, "loss": 0.5915, "step": 18322 }, { "epoch": 2.991143218644137, "grad_norm": 1.8954675197601318, "learning_rate": 1.8526733238015486e-05, "loss": 0.646, "step": 18323 }, { "epoch": 2.991306477286641, "grad_norm": 1.756690263748169, "learning_rate": 1.852656744507133e-05, "loss": 0.564, "step": 18324 }, { "epoch": 2.9914697359291456, "grad_norm": 1.5294424295425415, "learning_rate": 1.8526401643540924e-05, "loss": 0.5258, "step": 18325 }, { "epoch": 2.99163299457165, "grad_norm": 1.7507840394973755, "learning_rate": 1.8526235833424433e-05, "loss": 0.5683, "step": 18326 }, { "epoch": 2.9917962532141544, "grad_norm": 1.7037434577941895, "learning_rate": 1.8526070014722026e-05, "loss": 0.5249, "step": 18327 }, { "epoch": 2.991959511856659, "grad_norm": 1.7802932262420654, "learning_rate": 1.8525904187433866e-05, "loss": 0.5241, "step": 18328 }, { "epoch": 2.9921227704991633, "grad_norm": 1.6140450239181519, "learning_rate": 1.8525738351560122e-05, "loss": 0.432, "step": 18329 }, { "epoch": 2.9922860291416677, "grad_norm": 1.5623780488967896, "learning_rate": 1.8525572507100964e-05, "loss": 0.5267, "step": 18330 }, { "epoch": 2.992449287784172, "grad_norm": 1.6189546585083008, "learning_rate": 1.852540665405656e-05, "loss": 0.4609, "step": 18331 }, { "epoch": 2.9926125464266766, "grad_norm": 1.6717780828475952, "learning_rate": 1.8525240792427067e-05, "loss": 0.6022, "step": 18332 }, { "epoch": 2.992775805069181, "grad_norm": 2.0648698806762695, "learning_rate": 1.8525074922212663e-05, "loss": 0.6479, "step": 18333 }, { "epoch": 2.9929390637116855, "grad_norm": 1.910882830619812, "learning_rate": 1.852490904341351e-05, "loss": 0.5057, "step": 18334 }, { "epoch": 2.9931023223541895, "grad_norm": 1.2483888864517212, "learning_rate": 1.8524743156029778e-05, "loss": 0.4227, "step": 18335 }, { "epoch": 2.993265580996694, "grad_norm": 1.6565933227539062, "learning_rate": 1.8524577260061628e-05, "loss": 0.545, "step": 18336 }, { "epoch": 2.9934288396391984, "grad_norm": 1.4916671514511108, "learning_rate": 1.852441135550923e-05, "loss": 0.4996, "step": 18337 }, { "epoch": 2.993592098281703, "grad_norm": 1.7841724157333374, "learning_rate": 1.8524245442372756e-05, "loss": 0.5064, "step": 18338 }, { "epoch": 2.9937553569242072, "grad_norm": 1.7434172630310059, "learning_rate": 1.8524079520652366e-05, "loss": 0.513, "step": 18339 }, { "epoch": 2.9939186155667117, "grad_norm": 1.7766677141189575, "learning_rate": 1.852391359034823e-05, "loss": 0.5494, "step": 18340 }, { "epoch": 2.9940818742092157, "grad_norm": 1.8250246047973633, "learning_rate": 1.852374765146052e-05, "loss": 0.6103, "step": 18341 }, { "epoch": 2.99424513285172, "grad_norm": 2.1334476470947266, "learning_rate": 1.8523581703989396e-05, "loss": 0.5845, "step": 18342 }, { "epoch": 2.9944083914942246, "grad_norm": 1.7793142795562744, "learning_rate": 1.8523415747935026e-05, "loss": 0.5538, "step": 18343 }, { "epoch": 2.994571650136729, "grad_norm": 1.6337394714355469, "learning_rate": 1.852324978329758e-05, "loss": 0.5648, "step": 18344 }, { "epoch": 2.9947349087792334, "grad_norm": 1.9101333618164062, "learning_rate": 1.8523083810077224e-05, "loss": 0.6265, "step": 18345 }, { "epoch": 2.994898167421738, "grad_norm": 1.8142592906951904, "learning_rate": 1.8522917828274124e-05, "loss": 0.6193, "step": 18346 }, { "epoch": 2.9950614260642423, "grad_norm": 1.7255738973617554, "learning_rate": 1.8522751837888448e-05, "loss": 0.5681, "step": 18347 }, { "epoch": 2.9952246847067467, "grad_norm": 1.7363373041152954, "learning_rate": 1.852258583892036e-05, "loss": 0.5223, "step": 18348 }, { "epoch": 2.995387943349251, "grad_norm": 1.9215550422668457, "learning_rate": 1.8522419831370037e-05, "loss": 0.5246, "step": 18349 }, { "epoch": 2.9955512019917556, "grad_norm": 1.6817104816436768, "learning_rate": 1.8522253815237636e-05, "loss": 0.5568, "step": 18350 }, { "epoch": 2.99571446063426, "grad_norm": 1.4248195886611938, "learning_rate": 1.8522087790523325e-05, "loss": 0.5433, "step": 18351 }, { "epoch": 2.995877719276764, "grad_norm": 2.117276430130005, "learning_rate": 1.852192175722728e-05, "loss": 0.6512, "step": 18352 }, { "epoch": 2.9960409779192685, "grad_norm": 1.483764410018921, "learning_rate": 1.8521755715349658e-05, "loss": 0.5178, "step": 18353 }, { "epoch": 2.996204236561773, "grad_norm": 1.4881583452224731, "learning_rate": 1.852158966489063e-05, "loss": 0.4313, "step": 18354 }, { "epoch": 2.9963674952042774, "grad_norm": 1.8912324905395508, "learning_rate": 1.8521423605850366e-05, "loss": 0.6533, "step": 18355 }, { "epoch": 2.996530753846782, "grad_norm": 1.5512250661849976, "learning_rate": 1.852125753822903e-05, "loss": 0.5125, "step": 18356 }, { "epoch": 2.9966940124892862, "grad_norm": 1.6535760164260864, "learning_rate": 1.852109146202679e-05, "loss": 0.513, "step": 18357 }, { "epoch": 2.9968572711317907, "grad_norm": 1.5639076232910156, "learning_rate": 1.8520925377243812e-05, "loss": 0.512, "step": 18358 }, { "epoch": 2.9970205297742947, "grad_norm": 1.808722734451294, "learning_rate": 1.852075928388026e-05, "loss": 0.5169, "step": 18359 }, { "epoch": 2.997183788416799, "grad_norm": 1.6385999917984009, "learning_rate": 1.8520593181936312e-05, "loss": 0.5465, "step": 18360 }, { "epoch": 2.9973470470593035, "grad_norm": 1.947251319885254, "learning_rate": 1.852042707141213e-05, "loss": 0.5627, "step": 18361 }, { "epoch": 2.997510305701808, "grad_norm": 1.8388338088989258, "learning_rate": 1.8520260952307874e-05, "loss": 0.6269, "step": 18362 }, { "epoch": 2.9976735643443124, "grad_norm": 1.9596970081329346, "learning_rate": 1.852009482462372e-05, "loss": 0.5418, "step": 18363 }, { "epoch": 2.997836822986817, "grad_norm": 1.8457374572753906, "learning_rate": 1.8519928688359836e-05, "loss": 0.6199, "step": 18364 }, { "epoch": 2.9980000816293213, "grad_norm": 1.4222497940063477, "learning_rate": 1.851976254351638e-05, "loss": 0.4654, "step": 18365 }, { "epoch": 2.9981633402718257, "grad_norm": 1.7840983867645264, "learning_rate": 1.8519596390093533e-05, "loss": 0.6026, "step": 18366 }, { "epoch": 2.99832659891433, "grad_norm": 1.6988296508789062, "learning_rate": 1.851943022809145e-05, "loss": 0.5198, "step": 18367 }, { "epoch": 2.9984898575568346, "grad_norm": 1.8054527044296265, "learning_rate": 1.8519264057510304e-05, "loss": 0.6254, "step": 18368 }, { "epoch": 2.998653116199339, "grad_norm": 1.8095499277114868, "learning_rate": 1.851909787835026e-05, "loss": 0.5138, "step": 18369 }, { "epoch": 2.998816374841843, "grad_norm": 1.9422591924667358, "learning_rate": 1.851893169061149e-05, "loss": 0.5, "step": 18370 }, { "epoch": 2.9989796334843475, "grad_norm": 1.724324107170105, "learning_rate": 1.8518765494294154e-05, "loss": 0.4788, "step": 18371 }, { "epoch": 2.999142892126852, "grad_norm": 1.7294195890426636, "learning_rate": 1.8518599289398425e-05, "loss": 0.5146, "step": 18372 }, { "epoch": 2.9993061507693564, "grad_norm": 1.775810718536377, "learning_rate": 1.8518433075924468e-05, "loss": 0.524, "step": 18373 }, { "epoch": 2.999469409411861, "grad_norm": 2.0746102333068848, "learning_rate": 1.8518266853872456e-05, "loss": 0.6908, "step": 18374 }, { "epoch": 2.9996326680543652, "grad_norm": 1.8121826648712158, "learning_rate": 1.8518100623242548e-05, "loss": 0.5378, "step": 18375 }, { "epoch": 2.9997959266968697, "grad_norm": 1.7077902555465698, "learning_rate": 1.8517934384034913e-05, "loss": 0.5262, "step": 18376 }, { "epoch": 2.9999591853393737, "grad_norm": 1.9575108289718628, "learning_rate": 1.8517768136249722e-05, "loss": 0.5616, "step": 18377 }, { "epoch": 3.0, "grad_norm": 4.377547740936279, "learning_rate": 1.8517601879887143e-05, "loss": 0.9378, "step": 18378 }, { "epoch": 3.0001632586425044, "grad_norm": 1.7345848083496094, "learning_rate": 1.851743561494734e-05, "loss": 0.7063, "step": 18379 }, { "epoch": 3.000326517285009, "grad_norm": 1.059463381767273, "learning_rate": 1.851726934143048e-05, "loss": 0.3863, "step": 18380 }, { "epoch": 3.0004897759275133, "grad_norm": 1.49441397190094, "learning_rate": 1.851710305933673e-05, "loss": 0.4362, "step": 18381 }, { "epoch": 3.0006530345700178, "grad_norm": 1.6815259456634521, "learning_rate": 1.8516936768666263e-05, "loss": 0.5695, "step": 18382 }, { "epoch": 3.0008162932125217, "grad_norm": 1.5109926462173462, "learning_rate": 1.8516770469419242e-05, "loss": 0.4678, "step": 18383 }, { "epoch": 3.000979551855026, "grad_norm": 1.3794513940811157, "learning_rate": 1.8516604161595834e-05, "loss": 0.4301, "step": 18384 }, { "epoch": 3.0011428104975306, "grad_norm": 1.2333649396896362, "learning_rate": 1.8516437845196213e-05, "loss": 0.377, "step": 18385 }, { "epoch": 3.001306069140035, "grad_norm": 1.6946130990982056, "learning_rate": 1.8516271520220536e-05, "loss": 0.4909, "step": 18386 }, { "epoch": 3.0014693277825395, "grad_norm": 1.5168777704238892, "learning_rate": 1.8516105186668976e-05, "loss": 0.5049, "step": 18387 }, { "epoch": 3.001632586425044, "grad_norm": 1.5291268825531006, "learning_rate": 1.8515938844541702e-05, "loss": 0.5318, "step": 18388 }, { "epoch": 3.0017958450675484, "grad_norm": 1.6677768230438232, "learning_rate": 1.851577249383888e-05, "loss": 0.4646, "step": 18389 }, { "epoch": 3.001959103710053, "grad_norm": 1.6690900325775146, "learning_rate": 1.8515606134560676e-05, "loss": 0.4736, "step": 18390 }, { "epoch": 3.0021223623525572, "grad_norm": 1.8276921510696411, "learning_rate": 1.851543976670726e-05, "loss": 0.5106, "step": 18391 }, { "epoch": 3.0022856209950612, "grad_norm": 1.695304036140442, "learning_rate": 1.8515273390278797e-05, "loss": 0.4622, "step": 18392 }, { "epoch": 3.0024488796375657, "grad_norm": 1.9219801425933838, "learning_rate": 1.8515107005275458e-05, "loss": 0.4804, "step": 18393 }, { "epoch": 3.00261213828007, "grad_norm": 1.7109897136688232, "learning_rate": 1.851494061169741e-05, "loss": 0.453, "step": 18394 }, { "epoch": 3.0027753969225746, "grad_norm": 1.718621850013733, "learning_rate": 1.8514774209544818e-05, "loss": 0.4625, "step": 18395 }, { "epoch": 3.002938655565079, "grad_norm": 2.235752582550049, "learning_rate": 1.8514607798817846e-05, "loss": 0.7186, "step": 18396 }, { "epoch": 3.0031019142075834, "grad_norm": 1.5759915113449097, "learning_rate": 1.851444137951667e-05, "loss": 0.4628, "step": 18397 }, { "epoch": 3.003265172850088, "grad_norm": 1.9532945156097412, "learning_rate": 1.8514274951641454e-05, "loss": 0.4882, "step": 18398 }, { "epoch": 3.0034284314925923, "grad_norm": 1.4866477251052856, "learning_rate": 1.8514108515192365e-05, "loss": 0.4009, "step": 18399 }, { "epoch": 3.0035916901350967, "grad_norm": 1.5457483530044556, "learning_rate": 1.8513942070169572e-05, "loss": 0.4827, "step": 18400 }, { "epoch": 3.0037549487776007, "grad_norm": 2.1415607929229736, "learning_rate": 1.8513775616573236e-05, "loss": 0.6014, "step": 18401 }, { "epoch": 3.003918207420105, "grad_norm": 1.884354829788208, "learning_rate": 1.8513609154403535e-05, "loss": 0.4486, "step": 18402 }, { "epoch": 3.0040814660626096, "grad_norm": 1.61143958568573, "learning_rate": 1.8513442683660634e-05, "loss": 0.4172, "step": 18403 }, { "epoch": 3.004244724705114, "grad_norm": 1.571800947189331, "learning_rate": 1.8513276204344698e-05, "loss": 0.4168, "step": 18404 }, { "epoch": 3.0044079833476185, "grad_norm": 1.936752438545227, "learning_rate": 1.851310971645589e-05, "loss": 0.5429, "step": 18405 }, { "epoch": 3.004571241990123, "grad_norm": 1.502915859222412, "learning_rate": 1.8512943219994387e-05, "loss": 0.3992, "step": 18406 }, { "epoch": 3.0047345006326274, "grad_norm": 1.626674771308899, "learning_rate": 1.851277671496035e-05, "loss": 0.3661, "step": 18407 }, { "epoch": 3.004897759275132, "grad_norm": 2.057309150695801, "learning_rate": 1.8512610201353952e-05, "loss": 0.4154, "step": 18408 }, { "epoch": 3.005061017917636, "grad_norm": 2.2861721515655518, "learning_rate": 1.8512443679175358e-05, "loss": 0.5508, "step": 18409 }, { "epoch": 3.0052242765601402, "grad_norm": 1.8140822649002075, "learning_rate": 1.8512277148424732e-05, "loss": 0.4949, "step": 18410 }, { "epoch": 3.0053875352026447, "grad_norm": 2.1315994262695312, "learning_rate": 1.851211060910225e-05, "loss": 0.4707, "step": 18411 }, { "epoch": 3.005550793845149, "grad_norm": 1.9330172538757324, "learning_rate": 1.851194406120807e-05, "loss": 0.4731, "step": 18412 }, { "epoch": 3.0057140524876536, "grad_norm": 1.7516013383865356, "learning_rate": 1.8511777504742364e-05, "loss": 0.445, "step": 18413 }, { "epoch": 3.005877311130158, "grad_norm": 2.1389477252960205, "learning_rate": 1.8511610939705302e-05, "loss": 0.5006, "step": 18414 }, { "epoch": 3.0060405697726624, "grad_norm": 1.7275699377059937, "learning_rate": 1.8511444366097053e-05, "loss": 0.435, "step": 18415 }, { "epoch": 3.006203828415167, "grad_norm": 1.4510436058044434, "learning_rate": 1.851127778391778e-05, "loss": 0.3573, "step": 18416 }, { "epoch": 3.0063670870576713, "grad_norm": 1.6458261013031006, "learning_rate": 1.851111119316765e-05, "loss": 0.436, "step": 18417 }, { "epoch": 3.0065303457001753, "grad_norm": 1.960505485534668, "learning_rate": 1.8510944593846837e-05, "loss": 0.4778, "step": 18418 }, { "epoch": 3.0066936043426797, "grad_norm": 1.7683360576629639, "learning_rate": 1.8510777985955498e-05, "loss": 0.367, "step": 18419 }, { "epoch": 3.006856862985184, "grad_norm": 1.92803156375885, "learning_rate": 1.8510611369493815e-05, "loss": 0.4133, "step": 18420 }, { "epoch": 3.0070201216276886, "grad_norm": 2.101102828979492, "learning_rate": 1.851044474446195e-05, "loss": 0.6209, "step": 18421 }, { "epoch": 3.007183380270193, "grad_norm": 2.19883131980896, "learning_rate": 1.8510278110860063e-05, "loss": 0.5062, "step": 18422 }, { "epoch": 3.0073466389126975, "grad_norm": 1.9199559688568115, "learning_rate": 1.8510111468688332e-05, "loss": 0.4686, "step": 18423 }, { "epoch": 3.007509897555202, "grad_norm": 1.6093086004257202, "learning_rate": 1.850994481794692e-05, "loss": 0.4466, "step": 18424 }, { "epoch": 3.0076731561977064, "grad_norm": 1.9249941110610962, "learning_rate": 1.8509778158636e-05, "loss": 0.4237, "step": 18425 }, { "epoch": 3.007836414840211, "grad_norm": 1.8909653425216675, "learning_rate": 1.8509611490755728e-05, "loss": 0.5167, "step": 18426 }, { "epoch": 3.007999673482715, "grad_norm": 1.720744013786316, "learning_rate": 1.8509444814306284e-05, "loss": 0.4099, "step": 18427 }, { "epoch": 3.0081629321252192, "grad_norm": 1.8505769968032837, "learning_rate": 1.850927812928783e-05, "loss": 0.4641, "step": 18428 }, { "epoch": 3.0083261907677237, "grad_norm": 2.2107269763946533, "learning_rate": 1.8509111435700537e-05, "loss": 0.6222, "step": 18429 }, { "epoch": 3.008489449410228, "grad_norm": 2.2616117000579834, "learning_rate": 1.8508944733544568e-05, "loss": 0.4774, "step": 18430 }, { "epoch": 3.0086527080527325, "grad_norm": 2.1400210857391357, "learning_rate": 1.8508778022820095e-05, "loss": 0.4967, "step": 18431 }, { "epoch": 3.008815966695237, "grad_norm": 1.626513123512268, "learning_rate": 1.8508611303527285e-05, "loss": 0.4132, "step": 18432 }, { "epoch": 3.0089792253377414, "grad_norm": 1.9320924282073975, "learning_rate": 1.850844457566631e-05, "loss": 0.4379, "step": 18433 }, { "epoch": 3.009142483980246, "grad_norm": 1.9688199758529663, "learning_rate": 1.8508277839237328e-05, "loss": 0.4371, "step": 18434 }, { "epoch": 3.0093057426227503, "grad_norm": 1.6453510522842407, "learning_rate": 1.8508111094240516e-05, "loss": 0.3973, "step": 18435 }, { "epoch": 3.0094690012652543, "grad_norm": 1.8470560312271118, "learning_rate": 1.8507944340676035e-05, "loss": 0.4427, "step": 18436 }, { "epoch": 3.0096322599077587, "grad_norm": 1.723944067955017, "learning_rate": 1.850777757854406e-05, "loss": 0.4095, "step": 18437 }, { "epoch": 3.009795518550263, "grad_norm": 1.8001320362091064, "learning_rate": 1.850761080784475e-05, "loss": 0.4608, "step": 18438 }, { "epoch": 3.0099587771927676, "grad_norm": 1.8901313543319702, "learning_rate": 1.8507444028578284e-05, "loss": 0.4632, "step": 18439 }, { "epoch": 3.010122035835272, "grad_norm": 2.261765718460083, "learning_rate": 1.8507277240744818e-05, "loss": 0.5294, "step": 18440 }, { "epoch": 3.0102852944777765, "grad_norm": 2.6511919498443604, "learning_rate": 1.850711044434453e-05, "loss": 0.514, "step": 18441 }, { "epoch": 3.010448553120281, "grad_norm": 1.9696508646011353, "learning_rate": 1.8506943639377587e-05, "loss": 0.5448, "step": 18442 }, { "epoch": 3.0106118117627854, "grad_norm": 1.9744733572006226, "learning_rate": 1.850677682584415e-05, "loss": 0.4818, "step": 18443 }, { "epoch": 3.01077507040529, "grad_norm": 1.9812250137329102, "learning_rate": 1.850661000374439e-05, "loss": 0.4568, "step": 18444 }, { "epoch": 3.010938329047794, "grad_norm": 1.9795281887054443, "learning_rate": 1.8506443173078478e-05, "loss": 0.4701, "step": 18445 }, { "epoch": 3.0111015876902982, "grad_norm": 1.8404620885849, "learning_rate": 1.850627633384658e-05, "loss": 0.4487, "step": 18446 }, { "epoch": 3.0112648463328027, "grad_norm": 1.8910188674926758, "learning_rate": 1.850610948604886e-05, "loss": 0.5046, "step": 18447 }, { "epoch": 3.011428104975307, "grad_norm": 1.4994096755981445, "learning_rate": 1.8505942629685492e-05, "loss": 0.4319, "step": 18448 }, { "epoch": 3.0115913636178115, "grad_norm": 1.7680944204330444, "learning_rate": 1.8505775764756646e-05, "loss": 0.4616, "step": 18449 }, { "epoch": 3.011754622260316, "grad_norm": 2.065762996673584, "learning_rate": 1.8505608891262487e-05, "loss": 0.4625, "step": 18450 }, { "epoch": 3.0119178809028204, "grad_norm": 1.9081531763076782, "learning_rate": 1.8505442009203175e-05, "loss": 0.4498, "step": 18451 }, { "epoch": 3.012081139545325, "grad_norm": 1.8591570854187012, "learning_rate": 1.8505275118578892e-05, "loss": 0.4903, "step": 18452 }, { "epoch": 3.012244398187829, "grad_norm": 1.6592947244644165, "learning_rate": 1.8505108219389792e-05, "loss": 0.3962, "step": 18453 }, { "epoch": 3.0124076568303333, "grad_norm": 1.7315024137496948, "learning_rate": 1.8504941311636057e-05, "loss": 0.4425, "step": 18454 }, { "epoch": 3.0125709154728377, "grad_norm": 1.9520494937896729, "learning_rate": 1.8504774395317847e-05, "loss": 0.5555, "step": 18455 }, { "epoch": 3.012734174115342, "grad_norm": 2.23970890045166, "learning_rate": 1.850460747043533e-05, "loss": 0.5153, "step": 18456 }, { "epoch": 3.0128974327578466, "grad_norm": 1.631711483001709, "learning_rate": 1.850444053698867e-05, "loss": 0.4295, "step": 18457 }, { "epoch": 3.013060691400351, "grad_norm": 1.5388363599777222, "learning_rate": 1.850427359497805e-05, "loss": 0.4681, "step": 18458 }, { "epoch": 3.0132239500428555, "grad_norm": 2.2095422744750977, "learning_rate": 1.8504106644403626e-05, "loss": 0.5605, "step": 18459 }, { "epoch": 3.01338720868536, "grad_norm": 2.034498929977417, "learning_rate": 1.850393968526557e-05, "loss": 0.5697, "step": 18460 }, { "epoch": 3.0135504673278644, "grad_norm": 1.707322120666504, "learning_rate": 1.8503772717564047e-05, "loss": 0.4521, "step": 18461 }, { "epoch": 3.0137137259703684, "grad_norm": 1.648890733718872, "learning_rate": 1.8503605741299224e-05, "loss": 0.4749, "step": 18462 }, { "epoch": 3.013876984612873, "grad_norm": 2.1041650772094727, "learning_rate": 1.8503438756471275e-05, "loss": 0.5311, "step": 18463 }, { "epoch": 3.0140402432553772, "grad_norm": 1.8426687717437744, "learning_rate": 1.8503271763080368e-05, "loss": 0.4347, "step": 18464 }, { "epoch": 3.0142035018978817, "grad_norm": 2.1467485427856445, "learning_rate": 1.8503104761126666e-05, "loss": 0.5149, "step": 18465 }, { "epoch": 3.014366760540386, "grad_norm": 1.979142665863037, "learning_rate": 1.850293775061034e-05, "loss": 0.5072, "step": 18466 }, { "epoch": 3.0145300191828905, "grad_norm": 2.097163438796997, "learning_rate": 1.8502770731531558e-05, "loss": 0.4964, "step": 18467 }, { "epoch": 3.014693277825395, "grad_norm": 1.7263426780700684, "learning_rate": 1.8502603703890488e-05, "loss": 0.4558, "step": 18468 }, { "epoch": 3.0148565364678994, "grad_norm": 1.9380106925964355, "learning_rate": 1.8502436667687296e-05, "loss": 0.4793, "step": 18469 }, { "epoch": 3.015019795110404, "grad_norm": 1.991590142250061, "learning_rate": 1.8502269622922157e-05, "loss": 0.4445, "step": 18470 }, { "epoch": 3.015183053752908, "grad_norm": 1.8317480087280273, "learning_rate": 1.8502102569595233e-05, "loss": 0.4735, "step": 18471 }, { "epoch": 3.0153463123954123, "grad_norm": 1.4751231670379639, "learning_rate": 1.8501935507706692e-05, "loss": 0.4014, "step": 18472 }, { "epoch": 3.0155095710379167, "grad_norm": 2.1528706550598145, "learning_rate": 1.8501768437256705e-05, "loss": 0.5282, "step": 18473 }, { "epoch": 3.015672829680421, "grad_norm": 1.870710849761963, "learning_rate": 1.850160135824544e-05, "loss": 0.4667, "step": 18474 }, { "epoch": 3.0158360883229256, "grad_norm": 1.8722426891326904, "learning_rate": 1.8501434270673066e-05, "loss": 0.4529, "step": 18475 }, { "epoch": 3.01599934696543, "grad_norm": 2.156005620956421, "learning_rate": 1.850126717453975e-05, "loss": 0.4573, "step": 18476 }, { "epoch": 3.0161626056079345, "grad_norm": 1.709607720375061, "learning_rate": 1.8501100069845657e-05, "loss": 0.4451, "step": 18477 }, { "epoch": 3.016325864250439, "grad_norm": 1.8761217594146729, "learning_rate": 1.8500932956590962e-05, "loss": 0.4409, "step": 18478 }, { "epoch": 3.0164891228929434, "grad_norm": 1.5948123931884766, "learning_rate": 1.850076583477583e-05, "loss": 0.46, "step": 18479 }, { "epoch": 3.0166523815354473, "grad_norm": 2.171351671218872, "learning_rate": 1.8500598704400427e-05, "loss": 0.4992, "step": 18480 }, { "epoch": 3.016815640177952, "grad_norm": 2.2236905097961426, "learning_rate": 1.8500431565464923e-05, "loss": 0.5097, "step": 18481 }, { "epoch": 3.0169788988204562, "grad_norm": 1.736372709274292, "learning_rate": 1.850026441796949e-05, "loss": 0.4631, "step": 18482 }, { "epoch": 3.0171421574629607, "grad_norm": 1.8046756982803345, "learning_rate": 1.850009726191429e-05, "loss": 0.4052, "step": 18483 }, { "epoch": 3.017305416105465, "grad_norm": 1.6675002574920654, "learning_rate": 1.8499930097299496e-05, "loss": 0.4909, "step": 18484 }, { "epoch": 3.0174686747479695, "grad_norm": 2.023834228515625, "learning_rate": 1.849976292412527e-05, "loss": 0.5026, "step": 18485 }, { "epoch": 3.017631933390474, "grad_norm": 1.8564742803573608, "learning_rate": 1.849959574239179e-05, "loss": 0.4473, "step": 18486 }, { "epoch": 3.0177951920329784, "grad_norm": 1.754656195640564, "learning_rate": 1.8499428552099217e-05, "loss": 0.4267, "step": 18487 }, { "epoch": 3.017958450675483, "grad_norm": 1.8235825300216675, "learning_rate": 1.8499261353247722e-05, "loss": 0.4374, "step": 18488 }, { "epoch": 3.018121709317987, "grad_norm": 2.7384119033813477, "learning_rate": 1.8499094145837475e-05, "loss": 0.5287, "step": 18489 }, { "epoch": 3.0182849679604913, "grad_norm": 1.638433575630188, "learning_rate": 1.849892692986864e-05, "loss": 0.409, "step": 18490 }, { "epoch": 3.0184482266029957, "grad_norm": 1.8186067342758179, "learning_rate": 1.849875970534139e-05, "loss": 0.4641, "step": 18491 }, { "epoch": 3.0186114852455, "grad_norm": 1.594215750694275, "learning_rate": 1.849859247225589e-05, "loss": 0.422, "step": 18492 }, { "epoch": 3.0187747438880046, "grad_norm": 2.2628183364868164, "learning_rate": 1.849842523061231e-05, "loss": 0.5316, "step": 18493 }, { "epoch": 3.018938002530509, "grad_norm": 1.6670162677764893, "learning_rate": 1.849825798041082e-05, "loss": 0.4217, "step": 18494 }, { "epoch": 3.0191012611730135, "grad_norm": 1.8213368654251099, "learning_rate": 1.849809072165158e-05, "loss": 0.5048, "step": 18495 }, { "epoch": 3.019264519815518, "grad_norm": 2.0318217277526855, "learning_rate": 1.849792345433477e-05, "loss": 0.5022, "step": 18496 }, { "epoch": 3.019427778458022, "grad_norm": 2.1603782176971436, "learning_rate": 1.8497756178460552e-05, "loss": 0.4841, "step": 18497 }, { "epoch": 3.0195910371005263, "grad_norm": 1.3117237091064453, "learning_rate": 1.8497588894029098e-05, "loss": 0.3807, "step": 18498 }, { "epoch": 3.019754295743031, "grad_norm": 2.1416077613830566, "learning_rate": 1.8497421601040574e-05, "loss": 0.456, "step": 18499 }, { "epoch": 3.019917554385535, "grad_norm": 2.1815788745880127, "learning_rate": 1.8497254299495147e-05, "loss": 0.5268, "step": 18500 }, { "epoch": 3.0200808130280397, "grad_norm": 2.2280588150024414, "learning_rate": 1.8497086989392986e-05, "loss": 0.4558, "step": 18501 }, { "epoch": 3.020244071670544, "grad_norm": 2.328702211380005, "learning_rate": 1.8496919670734262e-05, "loss": 0.5123, "step": 18502 }, { "epoch": 3.0204073303130485, "grad_norm": 1.6061946153640747, "learning_rate": 1.8496752343519142e-05, "loss": 0.4386, "step": 18503 }, { "epoch": 3.020570588955553, "grad_norm": 2.2084386348724365, "learning_rate": 1.8496585007747794e-05, "loss": 0.5412, "step": 18504 }, { "epoch": 3.0207338475980574, "grad_norm": 1.7044726610183716, "learning_rate": 1.849641766342039e-05, "loss": 0.4343, "step": 18505 }, { "epoch": 3.0208971062405614, "grad_norm": 1.9971150159835815, "learning_rate": 1.8496250310537092e-05, "loss": 0.4921, "step": 18506 }, { "epoch": 3.021060364883066, "grad_norm": 1.8884111642837524, "learning_rate": 1.8496082949098074e-05, "loss": 0.4961, "step": 18507 }, { "epoch": 3.0212236235255703, "grad_norm": 1.8118231296539307, "learning_rate": 1.84959155791035e-05, "loss": 0.3893, "step": 18508 }, { "epoch": 3.0213868821680747, "grad_norm": 1.7713574171066284, "learning_rate": 1.8495748200553547e-05, "loss": 0.4341, "step": 18509 }, { "epoch": 3.021550140810579, "grad_norm": 1.8058075904846191, "learning_rate": 1.8495580813448374e-05, "loss": 0.4222, "step": 18510 }, { "epoch": 3.0217133994530836, "grad_norm": 2.315885066986084, "learning_rate": 1.8495413417788154e-05, "loss": 0.6678, "step": 18511 }, { "epoch": 3.021876658095588, "grad_norm": 1.6711541414260864, "learning_rate": 1.8495246013573057e-05, "loss": 0.4729, "step": 18512 }, { "epoch": 3.0220399167380925, "grad_norm": 2.0752992630004883, "learning_rate": 1.8495078600803246e-05, "loss": 0.5668, "step": 18513 }, { "epoch": 3.022203175380597, "grad_norm": 1.814719796180725, "learning_rate": 1.8494911179478894e-05, "loss": 0.561, "step": 18514 }, { "epoch": 3.022366434023101, "grad_norm": 1.6292648315429688, "learning_rate": 1.849474374960017e-05, "loss": 0.4156, "step": 18515 }, { "epoch": 3.0225296926656053, "grad_norm": 1.9059581756591797, "learning_rate": 1.849457631116724e-05, "loss": 0.5285, "step": 18516 }, { "epoch": 3.0226929513081098, "grad_norm": 1.9030908346176147, "learning_rate": 1.8494408864180274e-05, "loss": 0.4194, "step": 18517 }, { "epoch": 3.022856209950614, "grad_norm": 1.7944772243499756, "learning_rate": 1.8494241408639443e-05, "loss": 0.4485, "step": 18518 }, { "epoch": 3.0230194685931187, "grad_norm": 1.6627840995788574, "learning_rate": 1.849407394454491e-05, "loss": 0.3895, "step": 18519 }, { "epoch": 3.023182727235623, "grad_norm": 1.8362826108932495, "learning_rate": 1.8493906471896846e-05, "loss": 0.4182, "step": 18520 }, { "epoch": 3.0233459858781275, "grad_norm": 1.477038860321045, "learning_rate": 1.8493738990695423e-05, "loss": 0.3698, "step": 18521 }, { "epoch": 3.023509244520632, "grad_norm": 1.9110393524169922, "learning_rate": 1.8493571500940807e-05, "loss": 0.4457, "step": 18522 }, { "epoch": 3.0236725031631364, "grad_norm": 1.6459077596664429, "learning_rate": 1.8493404002633167e-05, "loss": 0.4214, "step": 18523 }, { "epoch": 3.0238357618056404, "grad_norm": 1.8744080066680908, "learning_rate": 1.849323649577267e-05, "loss": 0.437, "step": 18524 }, { "epoch": 3.023999020448145, "grad_norm": 1.6930999755859375, "learning_rate": 1.8493068980359487e-05, "loss": 0.4452, "step": 18525 }, { "epoch": 3.0241622790906493, "grad_norm": 1.89438796043396, "learning_rate": 1.8492901456393786e-05, "loss": 0.4909, "step": 18526 }, { "epoch": 3.0243255377331537, "grad_norm": 1.8247227668762207, "learning_rate": 1.8492733923875736e-05, "loss": 0.4437, "step": 18527 }, { "epoch": 3.024488796375658, "grad_norm": 1.966711401939392, "learning_rate": 1.8492566382805502e-05, "loss": 0.4763, "step": 18528 }, { "epoch": 3.0246520550181626, "grad_norm": 2.03691029548645, "learning_rate": 1.849239883318326e-05, "loss": 0.4717, "step": 18529 }, { "epoch": 3.024815313660667, "grad_norm": 2.0042803287506104, "learning_rate": 1.8492231275009172e-05, "loss": 0.4968, "step": 18530 }, { "epoch": 3.0249785723031715, "grad_norm": 1.8122791051864624, "learning_rate": 1.849206370828341e-05, "loss": 0.4401, "step": 18531 }, { "epoch": 3.025141830945676, "grad_norm": 1.7246408462524414, "learning_rate": 1.8491896133006142e-05, "loss": 0.4282, "step": 18532 }, { "epoch": 3.02530508958818, "grad_norm": 2.2058522701263428, "learning_rate": 1.849172854917754e-05, "loss": 0.4704, "step": 18533 }, { "epoch": 3.0254683482306843, "grad_norm": 1.9551091194152832, "learning_rate": 1.8491560956797766e-05, "loss": 0.5531, "step": 18534 }, { "epoch": 3.0256316068731888, "grad_norm": 1.973941445350647, "learning_rate": 1.8491393355866993e-05, "loss": 0.5182, "step": 18535 }, { "epoch": 3.025794865515693, "grad_norm": 1.7628107070922852, "learning_rate": 1.8491225746385387e-05, "loss": 0.5576, "step": 18536 }, { "epoch": 3.0259581241581976, "grad_norm": 1.9157634973526, "learning_rate": 1.8491058128353123e-05, "loss": 0.481, "step": 18537 }, { "epoch": 3.026121382800702, "grad_norm": 1.829927682876587, "learning_rate": 1.8490890501770363e-05, "loss": 0.4123, "step": 18538 }, { "epoch": 3.0262846414432065, "grad_norm": 2.167896270751953, "learning_rate": 1.849072286663728e-05, "loss": 0.5436, "step": 18539 }, { "epoch": 3.026447900085711, "grad_norm": 2.085259437561035, "learning_rate": 1.849055522295404e-05, "loss": 0.4671, "step": 18540 }, { "epoch": 3.026611158728215, "grad_norm": 1.7202872037887573, "learning_rate": 1.8490387570720814e-05, "loss": 0.4366, "step": 18541 }, { "epoch": 3.0267744173707194, "grad_norm": 1.7242106199264526, "learning_rate": 1.849021990993777e-05, "loss": 0.3921, "step": 18542 }, { "epoch": 3.026937676013224, "grad_norm": 2.1002416610717773, "learning_rate": 1.8490052240605075e-05, "loss": 0.53, "step": 18543 }, { "epoch": 3.0271009346557283, "grad_norm": 1.8681710958480835, "learning_rate": 1.8489884562722903e-05, "loss": 0.4748, "step": 18544 }, { "epoch": 3.0272641932982327, "grad_norm": 1.7063722610473633, "learning_rate": 1.8489716876291417e-05, "loss": 0.4673, "step": 18545 }, { "epoch": 3.027427451940737, "grad_norm": 1.9917008876800537, "learning_rate": 1.8489549181310788e-05, "loss": 0.4549, "step": 18546 }, { "epoch": 3.0275907105832416, "grad_norm": 2.0555872917175293, "learning_rate": 1.8489381477781186e-05, "loss": 0.5769, "step": 18547 }, { "epoch": 3.027753969225746, "grad_norm": 1.47267746925354, "learning_rate": 1.848921376570278e-05, "loss": 0.4048, "step": 18548 }, { "epoch": 3.0279172278682505, "grad_norm": 1.8522158861160278, "learning_rate": 1.8489046045075737e-05, "loss": 0.4771, "step": 18549 }, { "epoch": 3.0280804865107545, "grad_norm": 1.7550816535949707, "learning_rate": 1.8488878315900228e-05, "loss": 0.4646, "step": 18550 }, { "epoch": 3.028243745153259, "grad_norm": 1.5968536138534546, "learning_rate": 1.8488710578176418e-05, "loss": 0.397, "step": 18551 }, { "epoch": 3.0284070037957633, "grad_norm": 1.7823832035064697, "learning_rate": 1.8488542831904484e-05, "loss": 0.4974, "step": 18552 }, { "epoch": 3.0285702624382678, "grad_norm": 1.803792119026184, "learning_rate": 1.8488375077084585e-05, "loss": 0.4489, "step": 18553 }, { "epoch": 3.028733521080772, "grad_norm": 1.6587532758712769, "learning_rate": 1.8488207313716897e-05, "loss": 0.4614, "step": 18554 }, { "epoch": 3.0288967797232766, "grad_norm": 2.115346670150757, "learning_rate": 1.8488039541801582e-05, "loss": 0.499, "step": 18555 }, { "epoch": 3.029060038365781, "grad_norm": 2.0273098945617676, "learning_rate": 1.848787176133882e-05, "loss": 0.4592, "step": 18556 }, { "epoch": 3.0292232970082855, "grad_norm": 1.8349318504333496, "learning_rate": 1.848770397232877e-05, "loss": 0.3986, "step": 18557 }, { "epoch": 3.02938655565079, "grad_norm": 1.8474103212356567, "learning_rate": 1.8487536174771605e-05, "loss": 0.4635, "step": 18558 }, { "epoch": 3.029549814293294, "grad_norm": 1.6848357915878296, "learning_rate": 1.848736836866749e-05, "loss": 0.4248, "step": 18559 }, { "epoch": 3.0297130729357984, "grad_norm": 1.7958800792694092, "learning_rate": 1.8487200554016602e-05, "loss": 0.4214, "step": 18560 }, { "epoch": 3.029876331578303, "grad_norm": 1.713108777999878, "learning_rate": 1.8487032730819104e-05, "loss": 0.4445, "step": 18561 }, { "epoch": 3.0300395902208073, "grad_norm": 1.851967692375183, "learning_rate": 1.848686489907517e-05, "loss": 0.4409, "step": 18562 }, { "epoch": 3.0302028488633117, "grad_norm": 1.8192331790924072, "learning_rate": 1.8486697058784956e-05, "loss": 0.4356, "step": 18563 }, { "epoch": 3.030366107505816, "grad_norm": 1.8840584754943848, "learning_rate": 1.8486529209948645e-05, "loss": 0.4836, "step": 18564 }, { "epoch": 3.0305293661483206, "grad_norm": 2.20465087890625, "learning_rate": 1.8486361352566402e-05, "loss": 0.6, "step": 18565 }, { "epoch": 3.030692624790825, "grad_norm": 1.774479866027832, "learning_rate": 1.8486193486638396e-05, "loss": 0.4839, "step": 18566 }, { "epoch": 3.0308558834333295, "grad_norm": 2.039196729660034, "learning_rate": 1.8486025612164796e-05, "loss": 0.4939, "step": 18567 }, { "epoch": 3.0310191420758335, "grad_norm": 2.077199697494507, "learning_rate": 1.848585772914577e-05, "loss": 0.4285, "step": 18568 }, { "epoch": 3.031182400718338, "grad_norm": 1.970241665840149, "learning_rate": 1.8485689837581484e-05, "loss": 0.5272, "step": 18569 }, { "epoch": 3.0313456593608423, "grad_norm": 2.029618263244629, "learning_rate": 1.8485521937472114e-05, "loss": 0.4935, "step": 18570 }, { "epoch": 3.0315089180033468, "grad_norm": 2.002202272415161, "learning_rate": 1.8485354028817824e-05, "loss": 0.4968, "step": 18571 }, { "epoch": 3.031672176645851, "grad_norm": 2.049346923828125, "learning_rate": 1.8485186111618785e-05, "loss": 0.4867, "step": 18572 }, { "epoch": 3.0318354352883556, "grad_norm": 2.336742639541626, "learning_rate": 1.848501818587517e-05, "loss": 0.5273, "step": 18573 }, { "epoch": 3.03199869393086, "grad_norm": 2.1030962467193604, "learning_rate": 1.8484850251587135e-05, "loss": 0.5398, "step": 18574 }, { "epoch": 3.0321619525733645, "grad_norm": 1.8739272356033325, "learning_rate": 1.8484682308754863e-05, "loss": 0.4358, "step": 18575 }, { "epoch": 3.032325211215869, "grad_norm": 1.888171672821045, "learning_rate": 1.848451435737852e-05, "loss": 0.4631, "step": 18576 }, { "epoch": 3.032488469858373, "grad_norm": 1.7720956802368164, "learning_rate": 1.8484346397458272e-05, "loss": 0.4313, "step": 18577 }, { "epoch": 3.0326517285008774, "grad_norm": 1.9825297594070435, "learning_rate": 1.848417842899429e-05, "loss": 0.4135, "step": 18578 }, { "epoch": 3.032814987143382, "grad_norm": 1.896290898323059, "learning_rate": 1.8484010451986744e-05, "loss": 0.3977, "step": 18579 }, { "epoch": 3.0329782457858863, "grad_norm": 2.1111438274383545, "learning_rate": 1.8483842466435798e-05, "loss": 0.5671, "step": 18580 }, { "epoch": 3.0331415044283907, "grad_norm": 1.9359071254730225, "learning_rate": 1.8483674472341627e-05, "loss": 0.4097, "step": 18581 }, { "epoch": 3.033304763070895, "grad_norm": 1.9141757488250732, "learning_rate": 1.8483506469704394e-05, "loss": 0.3859, "step": 18582 }, { "epoch": 3.0334680217133996, "grad_norm": 2.3721466064453125, "learning_rate": 1.8483338458524278e-05, "loss": 0.549, "step": 18583 }, { "epoch": 3.033631280355904, "grad_norm": 1.817600131034851, "learning_rate": 1.8483170438801438e-05, "loss": 0.4819, "step": 18584 }, { "epoch": 3.033794538998408, "grad_norm": 1.8866328001022339, "learning_rate": 1.8483002410536054e-05, "loss": 0.4885, "step": 18585 }, { "epoch": 3.0339577976409124, "grad_norm": 1.5779774188995361, "learning_rate": 1.8482834373728282e-05, "loss": 0.3586, "step": 18586 }, { "epoch": 3.034121056283417, "grad_norm": 1.6901535987854004, "learning_rate": 1.84826663283783e-05, "loss": 0.4271, "step": 18587 }, { "epoch": 3.0342843149259213, "grad_norm": 2.003873825073242, "learning_rate": 1.8482498274486277e-05, "loss": 0.5517, "step": 18588 }, { "epoch": 3.0344475735684258, "grad_norm": 1.8855799436569214, "learning_rate": 1.8482330212052377e-05, "loss": 0.451, "step": 18589 }, { "epoch": 3.03461083221093, "grad_norm": 1.867447853088379, "learning_rate": 1.848216214107678e-05, "loss": 0.4744, "step": 18590 }, { "epoch": 3.0347740908534346, "grad_norm": 2.0480082035064697, "learning_rate": 1.8481994061559638e-05, "loss": 0.4901, "step": 18591 }, { "epoch": 3.034937349495939, "grad_norm": 1.8364367485046387, "learning_rate": 1.8481825973501138e-05, "loss": 0.4188, "step": 18592 }, { "epoch": 3.0351006081384435, "grad_norm": 1.8925799131393433, "learning_rate": 1.848165787690144e-05, "loss": 0.4801, "step": 18593 }, { "epoch": 3.0352638667809475, "grad_norm": 2.1837453842163086, "learning_rate": 1.8481489771760713e-05, "loss": 0.5154, "step": 18594 }, { "epoch": 3.035427125423452, "grad_norm": 1.9367014169692993, "learning_rate": 1.8481321658079127e-05, "loss": 0.4743, "step": 18595 }, { "epoch": 3.0355903840659564, "grad_norm": 1.9907019138336182, "learning_rate": 1.8481153535856854e-05, "loss": 0.4742, "step": 18596 }, { "epoch": 3.035753642708461, "grad_norm": 1.9176371097564697, "learning_rate": 1.848098540509406e-05, "loss": 0.4582, "step": 18597 }, { "epoch": 3.0359169013509653, "grad_norm": 1.6209602355957031, "learning_rate": 1.8480817265790917e-05, "loss": 0.4286, "step": 18598 }, { "epoch": 3.0360801599934697, "grad_norm": 2.0434186458587646, "learning_rate": 1.8480649117947594e-05, "loss": 0.514, "step": 18599 }, { "epoch": 3.036243418635974, "grad_norm": 2.1719791889190674, "learning_rate": 1.848048096156426e-05, "loss": 0.518, "step": 18600 }, { "epoch": 3.0364066772784786, "grad_norm": 1.784977674484253, "learning_rate": 1.8480312796641083e-05, "loss": 0.4151, "step": 18601 }, { "epoch": 3.036569935920983, "grad_norm": 1.579433798789978, "learning_rate": 1.8480144623178236e-05, "loss": 0.4783, "step": 18602 }, { "epoch": 3.036733194563487, "grad_norm": 1.697459101676941, "learning_rate": 1.847997644117588e-05, "loss": 0.4369, "step": 18603 }, { "epoch": 3.0368964532059914, "grad_norm": 1.9512717723846436, "learning_rate": 1.8479808250634197e-05, "loss": 0.496, "step": 18604 }, { "epoch": 3.037059711848496, "grad_norm": 2.0308525562286377, "learning_rate": 1.8479640051553344e-05, "loss": 0.6, "step": 18605 }, { "epoch": 3.0372229704910003, "grad_norm": 1.902160406112671, "learning_rate": 1.8479471843933497e-05, "loss": 0.4531, "step": 18606 }, { "epoch": 3.0373862291335048, "grad_norm": 1.8032641410827637, "learning_rate": 1.8479303627774823e-05, "loss": 0.4474, "step": 18607 }, { "epoch": 3.037549487776009, "grad_norm": 1.7131620645523071, "learning_rate": 1.8479135403077494e-05, "loss": 0.4841, "step": 18608 }, { "epoch": 3.0377127464185136, "grad_norm": 1.6816978454589844, "learning_rate": 1.8478967169841677e-05, "loss": 0.4389, "step": 18609 }, { "epoch": 3.037876005061018, "grad_norm": 1.8240941762924194, "learning_rate": 1.8478798928067544e-05, "loss": 0.4891, "step": 18610 }, { "epoch": 3.0380392637035225, "grad_norm": 2.1447415351867676, "learning_rate": 1.8478630677755264e-05, "loss": 0.551, "step": 18611 }, { "epoch": 3.0382025223460265, "grad_norm": 1.8508821725845337, "learning_rate": 1.8478462418905e-05, "loss": 0.4671, "step": 18612 }, { "epoch": 3.038365780988531, "grad_norm": 1.8245741128921509, "learning_rate": 1.847829415151693e-05, "loss": 0.4491, "step": 18613 }, { "epoch": 3.0385290396310354, "grad_norm": 1.7404614686965942, "learning_rate": 1.8478125875591222e-05, "loss": 0.4607, "step": 18614 }, { "epoch": 3.03869229827354, "grad_norm": 1.8120719194412231, "learning_rate": 1.8477957591128038e-05, "loss": 0.4724, "step": 18615 }, { "epoch": 3.0388555569160443, "grad_norm": 2.084932804107666, "learning_rate": 1.847778929812756e-05, "loss": 0.569, "step": 18616 }, { "epoch": 3.0390188155585487, "grad_norm": 2.260119915008545, "learning_rate": 1.8477620996589945e-05, "loss": 0.499, "step": 18617 }, { "epoch": 3.039182074201053, "grad_norm": 2.0077059268951416, "learning_rate": 1.8477452686515368e-05, "loss": 0.4535, "step": 18618 }, { "epoch": 3.0393453328435576, "grad_norm": 1.46359384059906, "learning_rate": 1.8477284367904e-05, "loss": 0.3882, "step": 18619 }, { "epoch": 3.039508591486062, "grad_norm": 2.3076517581939697, "learning_rate": 1.8477116040756007e-05, "loss": 0.5746, "step": 18620 }, { "epoch": 3.039671850128566, "grad_norm": 3.8177757263183594, "learning_rate": 1.847694770507156e-05, "loss": 0.5317, "step": 18621 }, { "epoch": 3.0398351087710704, "grad_norm": 1.7906885147094727, "learning_rate": 1.8476779360850833e-05, "loss": 0.425, "step": 18622 }, { "epoch": 3.039998367413575, "grad_norm": 1.717004418373108, "learning_rate": 1.847661100809399e-05, "loss": 0.4638, "step": 18623 }, { "epoch": 3.0401616260560793, "grad_norm": 1.9060102701187134, "learning_rate": 1.84764426468012e-05, "loss": 0.4744, "step": 18624 }, { "epoch": 3.0403248846985838, "grad_norm": 1.95914626121521, "learning_rate": 1.8476274276972635e-05, "loss": 0.4997, "step": 18625 }, { "epoch": 3.040488143341088, "grad_norm": 1.9381455183029175, "learning_rate": 1.8476105898608466e-05, "loss": 0.4508, "step": 18626 }, { "epoch": 3.0406514019835926, "grad_norm": 2.1354362964630127, "learning_rate": 1.8475937511708858e-05, "loss": 0.4924, "step": 18627 }, { "epoch": 3.040814660626097, "grad_norm": 1.9223411083221436, "learning_rate": 1.8475769116273987e-05, "loss": 0.5058, "step": 18628 }, { "epoch": 3.0409779192686015, "grad_norm": 1.8241260051727295, "learning_rate": 1.8475600712304015e-05, "loss": 0.443, "step": 18629 }, { "epoch": 3.0411411779111055, "grad_norm": 2.0476441383361816, "learning_rate": 1.847543229979912e-05, "loss": 0.4688, "step": 18630 }, { "epoch": 3.04130443655361, "grad_norm": 1.948378562927246, "learning_rate": 1.8475263878759462e-05, "loss": 0.4971, "step": 18631 }, { "epoch": 3.0414676951961144, "grad_norm": 1.622545599937439, "learning_rate": 1.8475095449185216e-05, "loss": 0.4637, "step": 18632 }, { "epoch": 3.041630953838619, "grad_norm": 2.034364938735962, "learning_rate": 1.8474927011076554e-05, "loss": 0.4819, "step": 18633 }, { "epoch": 3.0417942124811232, "grad_norm": 1.6279072761535645, "learning_rate": 1.847475856443364e-05, "loss": 0.4325, "step": 18634 }, { "epoch": 3.0419574711236277, "grad_norm": 1.8424049615859985, "learning_rate": 1.8474590109256646e-05, "loss": 0.4753, "step": 18635 }, { "epoch": 3.042120729766132, "grad_norm": 1.6093389987945557, "learning_rate": 1.8474421645545745e-05, "loss": 0.4152, "step": 18636 }, { "epoch": 3.0422839884086366, "grad_norm": 1.8137383460998535, "learning_rate": 1.8474253173301103e-05, "loss": 0.5798, "step": 18637 }, { "epoch": 3.0424472470511406, "grad_norm": 1.683749794960022, "learning_rate": 1.847408469252289e-05, "loss": 0.4872, "step": 18638 }, { "epoch": 3.042610505693645, "grad_norm": 1.756422758102417, "learning_rate": 1.8473916203211274e-05, "loss": 0.4472, "step": 18639 }, { "epoch": 3.0427737643361494, "grad_norm": 2.014017343521118, "learning_rate": 1.8473747705366427e-05, "loss": 0.4421, "step": 18640 }, { "epoch": 3.042937022978654, "grad_norm": 1.701934814453125, "learning_rate": 1.8473579198988522e-05, "loss": 0.4311, "step": 18641 }, { "epoch": 3.0431002816211583, "grad_norm": 1.6692113876342773, "learning_rate": 1.8473410684077722e-05, "loss": 0.4299, "step": 18642 }, { "epoch": 3.0432635402636627, "grad_norm": 2.0820791721343994, "learning_rate": 1.8473242160634197e-05, "loss": 0.5056, "step": 18643 }, { "epoch": 3.043426798906167, "grad_norm": 1.9600498676300049, "learning_rate": 1.8473073628658123e-05, "loss": 0.5167, "step": 18644 }, { "epoch": 3.0435900575486716, "grad_norm": 1.6431571245193481, "learning_rate": 1.8472905088149663e-05, "loss": 0.3961, "step": 18645 }, { "epoch": 3.043753316191176, "grad_norm": 2.023959159851074, "learning_rate": 1.8472736539108995e-05, "loss": 0.4518, "step": 18646 }, { "epoch": 3.04391657483368, "grad_norm": 1.7972930669784546, "learning_rate": 1.847256798153628e-05, "loss": 0.4175, "step": 18647 }, { "epoch": 3.0440798334761845, "grad_norm": 2.0838162899017334, "learning_rate": 1.8472399415431693e-05, "loss": 0.469, "step": 18648 }, { "epoch": 3.044243092118689, "grad_norm": 2.2357752323150635, "learning_rate": 1.84722308407954e-05, "loss": 0.4646, "step": 18649 }, { "epoch": 3.0444063507611934, "grad_norm": 2.1866726875305176, "learning_rate": 1.8472062257627573e-05, "loss": 0.5837, "step": 18650 }, { "epoch": 3.044569609403698, "grad_norm": 2.0656659603118896, "learning_rate": 1.8471893665928385e-05, "loss": 0.5301, "step": 18651 }, { "epoch": 3.0447328680462022, "grad_norm": 1.5548664331436157, "learning_rate": 1.8471725065698e-05, "loss": 0.3986, "step": 18652 }, { "epoch": 3.0448961266887067, "grad_norm": 1.7966485023498535, "learning_rate": 1.847155645693659e-05, "loss": 0.44, "step": 18653 }, { "epoch": 3.045059385331211, "grad_norm": 2.2940239906311035, "learning_rate": 1.8471387839644325e-05, "loss": 0.5528, "step": 18654 }, { "epoch": 3.0452226439737156, "grad_norm": 1.7163159847259521, "learning_rate": 1.8471219213821374e-05, "loss": 0.4789, "step": 18655 }, { "epoch": 3.0453859026162196, "grad_norm": 1.8231674432754517, "learning_rate": 1.8471050579467907e-05, "loss": 0.4723, "step": 18656 }, { "epoch": 3.045549161258724, "grad_norm": 2.1097187995910645, "learning_rate": 1.8470881936584094e-05, "loss": 0.5135, "step": 18657 }, { "epoch": 3.0457124199012284, "grad_norm": 2.0402026176452637, "learning_rate": 1.8470713285170106e-05, "loss": 0.4732, "step": 18658 }, { "epoch": 3.045875678543733, "grad_norm": 1.8236067295074463, "learning_rate": 1.8470544625226114e-05, "loss": 0.4354, "step": 18659 }, { "epoch": 3.0460389371862373, "grad_norm": 2.174427032470703, "learning_rate": 1.8470375956752283e-05, "loss": 0.522, "step": 18660 }, { "epoch": 3.0462021958287417, "grad_norm": 2.083010196685791, "learning_rate": 1.847020727974879e-05, "loss": 0.4755, "step": 18661 }, { "epoch": 3.046365454471246, "grad_norm": 2.041193723678589, "learning_rate": 1.8470038594215796e-05, "loss": 0.6333, "step": 18662 }, { "epoch": 3.0465287131137506, "grad_norm": 1.8220324516296387, "learning_rate": 1.8469869900153476e-05, "loss": 0.4567, "step": 18663 }, { "epoch": 3.046691971756255, "grad_norm": 1.9396158456802368, "learning_rate": 1.8469701197562e-05, "loss": 0.4908, "step": 18664 }, { "epoch": 3.046855230398759, "grad_norm": 2.0589895248413086, "learning_rate": 1.8469532486441536e-05, "loss": 0.5035, "step": 18665 }, { "epoch": 3.0470184890412635, "grad_norm": 1.9262452125549316, "learning_rate": 1.8469363766792258e-05, "loss": 0.46, "step": 18666 }, { "epoch": 3.047181747683768, "grad_norm": 1.7004395723342896, "learning_rate": 1.846919503861433e-05, "loss": 0.4467, "step": 18667 }, { "epoch": 3.0473450063262724, "grad_norm": 1.8657355308532715, "learning_rate": 1.8469026301907926e-05, "loss": 0.4671, "step": 18668 }, { "epoch": 3.047508264968777, "grad_norm": 1.83957839012146, "learning_rate": 1.8468857556673215e-05, "loss": 0.458, "step": 18669 }, { "epoch": 3.0476715236112812, "grad_norm": 1.8485075235366821, "learning_rate": 1.846868880291037e-05, "loss": 0.5067, "step": 18670 }, { "epoch": 3.0478347822537857, "grad_norm": 1.469110131263733, "learning_rate": 1.8468520040619552e-05, "loss": 0.3568, "step": 18671 }, { "epoch": 3.04799804089629, "grad_norm": 1.8875027894973755, "learning_rate": 1.8468351269800936e-05, "loss": 0.4432, "step": 18672 }, { "epoch": 3.048161299538794, "grad_norm": 1.9053432941436768, "learning_rate": 1.8468182490454697e-05, "loss": 0.4762, "step": 18673 }, { "epoch": 3.0483245581812985, "grad_norm": 1.8398176431655884, "learning_rate": 1.8468013702580998e-05, "loss": 0.4967, "step": 18674 }, { "epoch": 3.048487816823803, "grad_norm": 1.545680046081543, "learning_rate": 1.846784490618001e-05, "loss": 0.3862, "step": 18675 }, { "epoch": 3.0486510754663074, "grad_norm": 1.8510953187942505, "learning_rate": 1.8467676101251907e-05, "loss": 0.4183, "step": 18676 }, { "epoch": 3.048814334108812, "grad_norm": 1.663995623588562, "learning_rate": 1.8467507287796857e-05, "loss": 0.4587, "step": 18677 }, { "epoch": 3.0489775927513163, "grad_norm": 1.6798077821731567, "learning_rate": 1.8467338465815028e-05, "loss": 0.37, "step": 18678 }, { "epoch": 3.0491408513938207, "grad_norm": 1.8757520914077759, "learning_rate": 1.8467169635306593e-05, "loss": 0.4035, "step": 18679 }, { "epoch": 3.049304110036325, "grad_norm": 2.2842392921447754, "learning_rate": 1.8467000796271717e-05, "loss": 0.5358, "step": 18680 }, { "epoch": 3.0494673686788296, "grad_norm": 2.1493191719055176, "learning_rate": 1.8466831948710578e-05, "loss": 0.5548, "step": 18681 }, { "epoch": 3.0496306273213336, "grad_norm": 1.4486808776855469, "learning_rate": 1.8466663092623337e-05, "loss": 0.3575, "step": 18682 }, { "epoch": 3.049793885963838, "grad_norm": 1.7720052003860474, "learning_rate": 1.8466494228010174e-05, "loss": 0.4742, "step": 18683 }, { "epoch": 3.0499571446063425, "grad_norm": 2.126699209213257, "learning_rate": 1.8466325354871248e-05, "loss": 0.4918, "step": 18684 }, { "epoch": 3.050120403248847, "grad_norm": 2.2300221920013428, "learning_rate": 1.8466156473206736e-05, "loss": 0.5048, "step": 18685 }, { "epoch": 3.0502836618913514, "grad_norm": 1.7653998136520386, "learning_rate": 1.846598758301681e-05, "loss": 0.4734, "step": 18686 }, { "epoch": 3.050446920533856, "grad_norm": 1.882630705833435, "learning_rate": 1.846581868430163e-05, "loss": 0.4629, "step": 18687 }, { "epoch": 3.0506101791763602, "grad_norm": 1.6190106868743896, "learning_rate": 1.8465649777061377e-05, "loss": 0.444, "step": 18688 }, { "epoch": 3.0507734378188647, "grad_norm": 1.8425428867340088, "learning_rate": 1.846548086129622e-05, "loss": 0.4717, "step": 18689 }, { "epoch": 3.050936696461369, "grad_norm": 1.9034438133239746, "learning_rate": 1.8465311937006323e-05, "loss": 0.4592, "step": 18690 }, { "epoch": 3.051099955103873, "grad_norm": 1.9791196584701538, "learning_rate": 1.846514300419186e-05, "loss": 0.4961, "step": 18691 }, { "epoch": 3.0512632137463775, "grad_norm": 2.077639579772949, "learning_rate": 1.8464974062852998e-05, "loss": 0.4694, "step": 18692 }, { "epoch": 3.051426472388882, "grad_norm": 2.307971239089966, "learning_rate": 1.846480511298991e-05, "loss": 0.4715, "step": 18693 }, { "epoch": 3.0515897310313864, "grad_norm": 1.7811779975891113, "learning_rate": 1.8464636154602765e-05, "loss": 0.4356, "step": 18694 }, { "epoch": 3.051752989673891, "grad_norm": 1.6607669591903687, "learning_rate": 1.8464467187691736e-05, "loss": 0.43, "step": 18695 }, { "epoch": 3.0519162483163953, "grad_norm": 1.7749162912368774, "learning_rate": 1.846429821225699e-05, "loss": 0.4621, "step": 18696 }, { "epoch": 3.0520795069588997, "grad_norm": 1.5229171514511108, "learning_rate": 1.8464129228298697e-05, "loss": 0.3972, "step": 18697 }, { "epoch": 3.052242765601404, "grad_norm": 1.8957631587982178, "learning_rate": 1.8463960235817027e-05, "loss": 0.5287, "step": 18698 }, { "epoch": 3.0524060242439086, "grad_norm": 1.6193853616714478, "learning_rate": 1.8463791234812152e-05, "loss": 0.4142, "step": 18699 }, { "epoch": 3.0525692828864126, "grad_norm": 1.9981498718261719, "learning_rate": 1.8463622225284242e-05, "loss": 0.5327, "step": 18700 }, { "epoch": 3.052732541528917, "grad_norm": 2.1244075298309326, "learning_rate": 1.846345320723347e-05, "loss": 0.5118, "step": 18701 }, { "epoch": 3.0528958001714215, "grad_norm": 2.209906578063965, "learning_rate": 1.846328418066e-05, "loss": 0.5901, "step": 18702 }, { "epoch": 3.053059058813926, "grad_norm": 1.7870675325393677, "learning_rate": 1.8463115145564005e-05, "loss": 0.4418, "step": 18703 }, { "epoch": 3.0532223174564304, "grad_norm": 1.9929282665252686, "learning_rate": 1.8462946101945655e-05, "loss": 0.5034, "step": 18704 }, { "epoch": 3.053385576098935, "grad_norm": 1.7951833009719849, "learning_rate": 1.846277704980512e-05, "loss": 0.4761, "step": 18705 }, { "epoch": 3.0535488347414392, "grad_norm": 2.0282347202301025, "learning_rate": 1.8462607989142573e-05, "loss": 0.5012, "step": 18706 }, { "epoch": 3.0537120933839437, "grad_norm": 2.2750163078308105, "learning_rate": 1.846243891995818e-05, "loss": 0.5127, "step": 18707 }, { "epoch": 3.053875352026448, "grad_norm": 1.7806729078292847, "learning_rate": 1.8462269842252113e-05, "loss": 0.4279, "step": 18708 }, { "epoch": 3.054038610668952, "grad_norm": 1.699812412261963, "learning_rate": 1.8462100756024543e-05, "loss": 0.4584, "step": 18709 }, { "epoch": 3.0542018693114565, "grad_norm": 2.0590999126434326, "learning_rate": 1.8461931661275642e-05, "loss": 0.5519, "step": 18710 }, { "epoch": 3.054365127953961, "grad_norm": 1.780012845993042, "learning_rate": 1.8461762558005576e-05, "loss": 0.3916, "step": 18711 }, { "epoch": 3.0545283865964654, "grad_norm": 1.5121303796768188, "learning_rate": 1.8461593446214518e-05, "loss": 0.3685, "step": 18712 }, { "epoch": 3.05469164523897, "grad_norm": 1.917776107788086, "learning_rate": 1.8461424325902636e-05, "loss": 0.4162, "step": 18713 }, { "epoch": 3.0548549038814743, "grad_norm": 1.8498414754867554, "learning_rate": 1.8461255197070102e-05, "loss": 0.4019, "step": 18714 }, { "epoch": 3.0550181625239787, "grad_norm": 2.094975471496582, "learning_rate": 1.846108605971709e-05, "loss": 0.4913, "step": 18715 }, { "epoch": 3.055181421166483, "grad_norm": 2.1996169090270996, "learning_rate": 1.8460916913843764e-05, "loss": 0.4946, "step": 18716 }, { "epoch": 3.0553446798089876, "grad_norm": 2.022977113723755, "learning_rate": 1.8460747759450296e-05, "loss": 0.479, "step": 18717 }, { "epoch": 3.0555079384514916, "grad_norm": 1.4077081680297852, "learning_rate": 1.846057859653686e-05, "loss": 0.3484, "step": 18718 }, { "epoch": 3.055671197093996, "grad_norm": 1.941217064857483, "learning_rate": 1.846040942510362e-05, "loss": 0.414, "step": 18719 }, { "epoch": 3.0558344557365005, "grad_norm": 1.6521804332733154, "learning_rate": 1.846024024515075e-05, "loss": 0.3995, "step": 18720 }, { "epoch": 3.055997714379005, "grad_norm": 2.332625389099121, "learning_rate": 1.8460071056678424e-05, "loss": 0.5009, "step": 18721 }, { "epoch": 3.0561609730215094, "grad_norm": 2.1248717308044434, "learning_rate": 1.8459901859686805e-05, "loss": 0.522, "step": 18722 }, { "epoch": 3.056324231664014, "grad_norm": 1.986194372177124, "learning_rate": 1.845973265417607e-05, "loss": 0.3952, "step": 18723 }, { "epoch": 3.0564874903065182, "grad_norm": 1.995153784751892, "learning_rate": 1.8459563440146384e-05, "loss": 0.4307, "step": 18724 }, { "epoch": 3.0566507489490227, "grad_norm": 1.8827095031738281, "learning_rate": 1.845939421759792e-05, "loss": 0.4608, "step": 18725 }, { "epoch": 3.0568140075915267, "grad_norm": 1.8095571994781494, "learning_rate": 1.845922498653085e-05, "loss": 0.4532, "step": 18726 }, { "epoch": 3.056977266234031, "grad_norm": 1.9022883176803589, "learning_rate": 1.8459055746945343e-05, "loss": 0.4622, "step": 18727 }, { "epoch": 3.0571405248765355, "grad_norm": 2.2726333141326904, "learning_rate": 1.8458886498841567e-05, "loss": 0.5058, "step": 18728 }, { "epoch": 3.05730378351904, "grad_norm": 2.5704638957977295, "learning_rate": 1.8458717242219696e-05, "loss": 0.5837, "step": 18729 }, { "epoch": 3.0574670421615444, "grad_norm": 2.027662754058838, "learning_rate": 1.8458547977079903e-05, "loss": 0.4227, "step": 18730 }, { "epoch": 3.057630300804049, "grad_norm": 1.784295916557312, "learning_rate": 1.845837870342235e-05, "loss": 0.3965, "step": 18731 }, { "epoch": 3.0577935594465533, "grad_norm": 2.308485269546509, "learning_rate": 1.8458209421247208e-05, "loss": 0.4805, "step": 18732 }, { "epoch": 3.0579568180890577, "grad_norm": 1.6927231550216675, "learning_rate": 1.8458040130554656e-05, "loss": 0.4047, "step": 18733 }, { "epoch": 3.058120076731562, "grad_norm": 1.718102216720581, "learning_rate": 1.845787083134486e-05, "loss": 0.428, "step": 18734 }, { "epoch": 3.058283335374066, "grad_norm": 1.7090480327606201, "learning_rate": 1.8457701523617988e-05, "loss": 0.4096, "step": 18735 }, { "epoch": 3.0584465940165706, "grad_norm": 1.8369098901748657, "learning_rate": 1.8457532207374216e-05, "loss": 0.4485, "step": 18736 }, { "epoch": 3.058609852659075, "grad_norm": 1.6586722135543823, "learning_rate": 1.845736288261371e-05, "loss": 0.4041, "step": 18737 }, { "epoch": 3.0587731113015795, "grad_norm": 2.2092535495758057, "learning_rate": 1.845719354933664e-05, "loss": 0.5125, "step": 18738 }, { "epoch": 3.058936369944084, "grad_norm": 1.8877769708633423, "learning_rate": 1.845702420754318e-05, "loss": 0.401, "step": 18739 }, { "epoch": 3.0590996285865883, "grad_norm": 2.047327756881714, "learning_rate": 1.8456854857233498e-05, "loss": 0.5154, "step": 18740 }, { "epoch": 3.059262887229093, "grad_norm": 1.985632061958313, "learning_rate": 1.8456685498407767e-05, "loss": 0.4412, "step": 18741 }, { "epoch": 3.0594261458715972, "grad_norm": 2.4681646823883057, "learning_rate": 1.8456516131066157e-05, "loss": 0.4895, "step": 18742 }, { "epoch": 3.0595894045141017, "grad_norm": 2.090775728225708, "learning_rate": 1.8456346755208834e-05, "loss": 0.4538, "step": 18743 }, { "epoch": 3.0597526631566057, "grad_norm": 2.1487197875976562, "learning_rate": 1.8456177370835973e-05, "loss": 0.5023, "step": 18744 }, { "epoch": 3.05991592179911, "grad_norm": 1.869023084640503, "learning_rate": 1.8456007977947744e-05, "loss": 0.448, "step": 18745 }, { "epoch": 3.0600791804416145, "grad_norm": 1.8030370473861694, "learning_rate": 1.8455838576544317e-05, "loss": 0.4313, "step": 18746 }, { "epoch": 3.060242439084119, "grad_norm": 2.025587320327759, "learning_rate": 1.8455669166625864e-05, "loss": 0.4673, "step": 18747 }, { "epoch": 3.0604056977266234, "grad_norm": 2.341188669204712, "learning_rate": 1.8455499748192554e-05, "loss": 0.5292, "step": 18748 }, { "epoch": 3.060568956369128, "grad_norm": 1.8538835048675537, "learning_rate": 1.8455330321244558e-05, "loss": 0.4928, "step": 18749 }, { "epoch": 3.0607322150116323, "grad_norm": 1.915796160697937, "learning_rate": 1.8455160885782045e-05, "loss": 0.4313, "step": 18750 }, { "epoch": 3.0608954736541367, "grad_norm": 2.1587038040161133, "learning_rate": 1.8454991441805186e-05, "loss": 0.4954, "step": 18751 }, { "epoch": 3.061058732296641, "grad_norm": 1.7202714681625366, "learning_rate": 1.845482198931416e-05, "loss": 0.448, "step": 18752 }, { "epoch": 3.061221990939145, "grad_norm": 2.257978916168213, "learning_rate": 1.8454652528309123e-05, "loss": 0.5252, "step": 18753 }, { "epoch": 3.0613852495816496, "grad_norm": 1.6192007064819336, "learning_rate": 1.8454483058790254e-05, "loss": 0.433, "step": 18754 }, { "epoch": 3.061548508224154, "grad_norm": 2.0182294845581055, "learning_rate": 1.8454313580757728e-05, "loss": 0.4545, "step": 18755 }, { "epoch": 3.0617117668666585, "grad_norm": 2.7299411296844482, "learning_rate": 1.8454144094211704e-05, "loss": 0.51, "step": 18756 }, { "epoch": 3.061875025509163, "grad_norm": 2.188082218170166, "learning_rate": 1.8453974599152366e-05, "loss": 0.5215, "step": 18757 }, { "epoch": 3.0620382841516673, "grad_norm": 2.0099289417266846, "learning_rate": 1.8453805095579872e-05, "loss": 0.5183, "step": 18758 }, { "epoch": 3.062201542794172, "grad_norm": 1.8648685216903687, "learning_rate": 1.8453635583494402e-05, "loss": 0.4668, "step": 18759 }, { "epoch": 3.062364801436676, "grad_norm": 2.0064539909362793, "learning_rate": 1.8453466062896122e-05, "loss": 0.4871, "step": 18760 }, { "epoch": 3.06252806007918, "grad_norm": 1.9047431945800781, "learning_rate": 1.8453296533785202e-05, "loss": 0.4997, "step": 18761 }, { "epoch": 3.0626913187216847, "grad_norm": 1.980421543121338, "learning_rate": 1.8453126996161818e-05, "loss": 0.5062, "step": 18762 }, { "epoch": 3.062854577364189, "grad_norm": 1.6593960523605347, "learning_rate": 1.8452957450026135e-05, "loss": 0.3719, "step": 18763 }, { "epoch": 3.0630178360066935, "grad_norm": 1.9482228755950928, "learning_rate": 1.8452787895378327e-05, "loss": 0.4308, "step": 18764 }, { "epoch": 3.063181094649198, "grad_norm": 2.419581890106201, "learning_rate": 1.8452618332218563e-05, "loss": 0.5411, "step": 18765 }, { "epoch": 3.0633443532917024, "grad_norm": 1.9124846458435059, "learning_rate": 1.8452448760547015e-05, "loss": 0.5385, "step": 18766 }, { "epoch": 3.063507611934207, "grad_norm": 1.8167780637741089, "learning_rate": 1.8452279180363854e-05, "loss": 0.4426, "step": 18767 }, { "epoch": 3.0636708705767113, "grad_norm": 1.699260950088501, "learning_rate": 1.8452109591669248e-05, "loss": 0.4387, "step": 18768 }, { "epoch": 3.0638341292192157, "grad_norm": 2.080015182495117, "learning_rate": 1.8451939994463374e-05, "loss": 0.481, "step": 18769 }, { "epoch": 3.06399738786172, "grad_norm": 1.9230217933654785, "learning_rate": 1.8451770388746398e-05, "loss": 0.4287, "step": 18770 }, { "epoch": 3.064160646504224, "grad_norm": 1.827426791191101, "learning_rate": 1.845160077451849e-05, "loss": 0.465, "step": 18771 }, { "epoch": 3.0643239051467286, "grad_norm": 1.5017127990722656, "learning_rate": 1.845143115177982e-05, "loss": 0.3663, "step": 18772 }, { "epoch": 3.064487163789233, "grad_norm": 1.8771162033081055, "learning_rate": 1.8451261520530563e-05, "loss": 0.5101, "step": 18773 }, { "epoch": 3.0646504224317375, "grad_norm": 1.3327879905700684, "learning_rate": 1.8451091880770885e-05, "loss": 0.3607, "step": 18774 }, { "epoch": 3.064813681074242, "grad_norm": 1.596580982208252, "learning_rate": 1.8450922232500966e-05, "loss": 0.393, "step": 18775 }, { "epoch": 3.0649769397167463, "grad_norm": 1.7358887195587158, "learning_rate": 1.8450752575720967e-05, "loss": 0.396, "step": 18776 }, { "epoch": 3.065140198359251, "grad_norm": 2.254580020904541, "learning_rate": 1.845058291043106e-05, "loss": 0.4688, "step": 18777 }, { "epoch": 3.065303457001755, "grad_norm": 1.796830654144287, "learning_rate": 1.8450413236631425e-05, "loss": 0.3779, "step": 18778 }, { "epoch": 3.065466715644259, "grad_norm": 1.7433054447174072, "learning_rate": 1.845024355432222e-05, "loss": 0.4443, "step": 18779 }, { "epoch": 3.0656299742867636, "grad_norm": 2.4573659896850586, "learning_rate": 1.8450073863503622e-05, "loss": 0.4966, "step": 18780 }, { "epoch": 3.065793232929268, "grad_norm": 2.0889763832092285, "learning_rate": 1.8449904164175804e-05, "loss": 0.4811, "step": 18781 }, { "epoch": 3.0659564915717725, "grad_norm": 2.18731951713562, "learning_rate": 1.8449734456338936e-05, "loss": 0.4349, "step": 18782 }, { "epoch": 3.066119750214277, "grad_norm": 1.7849833965301514, "learning_rate": 1.8449564739993186e-05, "loss": 0.4856, "step": 18783 }, { "epoch": 3.0662830088567814, "grad_norm": 2.3309128284454346, "learning_rate": 1.8449395015138728e-05, "loss": 0.5798, "step": 18784 }, { "epoch": 3.066446267499286, "grad_norm": 1.9184921979904175, "learning_rate": 1.844922528177573e-05, "loss": 0.4527, "step": 18785 }, { "epoch": 3.0666095261417903, "grad_norm": 1.634769082069397, "learning_rate": 1.8449055539904363e-05, "loss": 0.4139, "step": 18786 }, { "epoch": 3.0667727847842947, "grad_norm": 1.9795173406600952, "learning_rate": 1.8448885789524802e-05, "loss": 0.4913, "step": 18787 }, { "epoch": 3.0669360434267987, "grad_norm": 2.085861921310425, "learning_rate": 1.844871603063721e-05, "loss": 0.5435, "step": 18788 }, { "epoch": 3.067099302069303, "grad_norm": 1.867795467376709, "learning_rate": 1.844854626324177e-05, "loss": 0.4135, "step": 18789 }, { "epoch": 3.0672625607118076, "grad_norm": 1.920078158378601, "learning_rate": 1.8448376487338647e-05, "loss": 0.4524, "step": 18790 }, { "epoch": 3.067425819354312, "grad_norm": 1.8178565502166748, "learning_rate": 1.8448206702928005e-05, "loss": 0.4533, "step": 18791 }, { "epoch": 3.0675890779968165, "grad_norm": 1.8438527584075928, "learning_rate": 1.8448036910010025e-05, "loss": 0.4871, "step": 18792 }, { "epoch": 3.067752336639321, "grad_norm": 1.9826704263687134, "learning_rate": 1.844786710858487e-05, "loss": 0.4577, "step": 18793 }, { "epoch": 3.0679155952818253, "grad_norm": 1.754239559173584, "learning_rate": 1.844769729865272e-05, "loss": 0.4249, "step": 18794 }, { "epoch": 3.0680788539243298, "grad_norm": 2.4487569332122803, "learning_rate": 1.844752748021374e-05, "loss": 0.6038, "step": 18795 }, { "epoch": 3.068242112566834, "grad_norm": 1.706122875213623, "learning_rate": 1.84473576532681e-05, "loss": 0.4649, "step": 18796 }, { "epoch": 3.068405371209338, "grad_norm": 2.041039228439331, "learning_rate": 1.8447187817815972e-05, "loss": 0.4782, "step": 18797 }, { "epoch": 3.0685686298518426, "grad_norm": 1.55514395236969, "learning_rate": 1.844701797385753e-05, "loss": 0.3788, "step": 18798 }, { "epoch": 3.068731888494347, "grad_norm": 1.7854281663894653, "learning_rate": 1.8446848121392946e-05, "loss": 0.4606, "step": 18799 }, { "epoch": 3.0688951471368515, "grad_norm": 2.037917137145996, "learning_rate": 1.8446678260422388e-05, "loss": 0.53, "step": 18800 }, { "epoch": 3.069058405779356, "grad_norm": 1.9133150577545166, "learning_rate": 1.844650839094602e-05, "loss": 0.4452, "step": 18801 }, { "epoch": 3.0692216644218604, "grad_norm": 1.9216768741607666, "learning_rate": 1.8446338512964028e-05, "loss": 0.4917, "step": 18802 }, { "epoch": 3.069384923064365, "grad_norm": 1.7884793281555176, "learning_rate": 1.844616862647657e-05, "loss": 0.4299, "step": 18803 }, { "epoch": 3.0695481817068693, "grad_norm": 1.9165092706680298, "learning_rate": 1.8445998731483827e-05, "loss": 0.4008, "step": 18804 }, { "epoch": 3.0697114403493737, "grad_norm": 1.9314677715301514, "learning_rate": 1.844582882798596e-05, "loss": 0.3942, "step": 18805 }, { "epoch": 3.0698746989918777, "grad_norm": 2.2654433250427246, "learning_rate": 1.844565891598315e-05, "loss": 0.5436, "step": 18806 }, { "epoch": 3.070037957634382, "grad_norm": 1.7380315065383911, "learning_rate": 1.844548899547556e-05, "loss": 0.4668, "step": 18807 }, { "epoch": 3.0702012162768866, "grad_norm": 2.2576708793640137, "learning_rate": 1.844531906646337e-05, "loss": 0.5726, "step": 18808 }, { "epoch": 3.070364474919391, "grad_norm": 1.8316447734832764, "learning_rate": 1.8445149128946744e-05, "loss": 0.4965, "step": 18809 }, { "epoch": 3.0705277335618955, "grad_norm": 1.7351958751678467, "learning_rate": 1.8444979182925855e-05, "loss": 0.4402, "step": 18810 }, { "epoch": 3.0706909922044, "grad_norm": 1.982672095298767, "learning_rate": 1.8444809228400874e-05, "loss": 0.4829, "step": 18811 }, { "epoch": 3.0708542508469043, "grad_norm": 1.7118393182754517, "learning_rate": 1.844463926537197e-05, "loss": 0.445, "step": 18812 }, { "epoch": 3.0710175094894088, "grad_norm": 1.8214925527572632, "learning_rate": 1.844446929383932e-05, "loss": 0.4403, "step": 18813 }, { "epoch": 3.0711807681319128, "grad_norm": 2.1792144775390625, "learning_rate": 1.844429931380309e-05, "loss": 0.5553, "step": 18814 }, { "epoch": 3.071344026774417, "grad_norm": 2.407731771469116, "learning_rate": 1.8444129325263455e-05, "loss": 0.51, "step": 18815 }, { "epoch": 3.0715072854169216, "grad_norm": 2.4391098022460938, "learning_rate": 1.844395932822058e-05, "loss": 0.4763, "step": 18816 }, { "epoch": 3.071670544059426, "grad_norm": 1.8661123514175415, "learning_rate": 1.844378932267464e-05, "loss": 0.3985, "step": 18817 }, { "epoch": 3.0718338027019305, "grad_norm": 2.0415642261505127, "learning_rate": 1.8443619308625812e-05, "loss": 0.4733, "step": 18818 }, { "epoch": 3.071997061344435, "grad_norm": 1.7580021619796753, "learning_rate": 1.844344928607426e-05, "loss": 0.4228, "step": 18819 }, { "epoch": 3.0721603199869394, "grad_norm": 2.0802364349365234, "learning_rate": 1.8443279255020153e-05, "loss": 0.5292, "step": 18820 }, { "epoch": 3.072323578629444, "grad_norm": 2.2764739990234375, "learning_rate": 1.8443109215463665e-05, "loss": 0.5953, "step": 18821 }, { "epoch": 3.0724868372719483, "grad_norm": 1.983020305633545, "learning_rate": 1.8442939167404975e-05, "loss": 0.4931, "step": 18822 }, { "epoch": 3.0726500959144523, "grad_norm": 1.8773808479309082, "learning_rate": 1.8442769110844243e-05, "loss": 0.4431, "step": 18823 }, { "epoch": 3.0728133545569567, "grad_norm": 1.8706331253051758, "learning_rate": 1.8442599045781645e-05, "loss": 0.4966, "step": 18824 }, { "epoch": 3.072976613199461, "grad_norm": 1.8343526124954224, "learning_rate": 1.844242897221735e-05, "loss": 0.4187, "step": 18825 }, { "epoch": 3.0731398718419656, "grad_norm": 1.703696846961975, "learning_rate": 1.8442258890151535e-05, "loss": 0.4495, "step": 18826 }, { "epoch": 3.07330313048447, "grad_norm": 1.814351201057434, "learning_rate": 1.844208879958437e-05, "loss": 0.4412, "step": 18827 }, { "epoch": 3.0734663891269745, "grad_norm": 1.8826587200164795, "learning_rate": 1.844191870051602e-05, "loss": 0.5261, "step": 18828 }, { "epoch": 3.073629647769479, "grad_norm": 2.12418794631958, "learning_rate": 1.844174859294666e-05, "loss": 0.5303, "step": 18829 }, { "epoch": 3.0737929064119833, "grad_norm": 1.4922559261322021, "learning_rate": 1.844157847687646e-05, "loss": 0.4084, "step": 18830 }, { "epoch": 3.0739561650544878, "grad_norm": 1.7425146102905273, "learning_rate": 1.8441408352305595e-05, "loss": 0.4129, "step": 18831 }, { "epoch": 3.0741194236969918, "grad_norm": 1.975193977355957, "learning_rate": 1.8441238219234234e-05, "loss": 0.5039, "step": 18832 }, { "epoch": 3.074282682339496, "grad_norm": 2.271416425704956, "learning_rate": 1.8441068077662545e-05, "loss": 0.7564, "step": 18833 }, { "epoch": 3.0744459409820006, "grad_norm": 2.3555190563201904, "learning_rate": 1.8440897927590707e-05, "loss": 0.599, "step": 18834 }, { "epoch": 3.074609199624505, "grad_norm": 1.8905137777328491, "learning_rate": 1.8440727769018883e-05, "loss": 0.5006, "step": 18835 }, { "epoch": 3.0747724582670095, "grad_norm": 2.0193545818328857, "learning_rate": 1.8440557601947254e-05, "loss": 0.5174, "step": 18836 }, { "epoch": 3.074935716909514, "grad_norm": 1.962745189666748, "learning_rate": 1.844038742637598e-05, "loss": 0.4974, "step": 18837 }, { "epoch": 3.0750989755520184, "grad_norm": 2.2239410877227783, "learning_rate": 1.8440217242305243e-05, "loss": 0.511, "step": 18838 }, { "epoch": 3.075262234194523, "grad_norm": 2.051652431488037, "learning_rate": 1.8440047049735205e-05, "loss": 0.4376, "step": 18839 }, { "epoch": 3.0754254928370273, "grad_norm": 1.8901581764221191, "learning_rate": 1.8439876848666048e-05, "loss": 0.476, "step": 18840 }, { "epoch": 3.0755887514795313, "grad_norm": 1.6658117771148682, "learning_rate": 1.843970663909793e-05, "loss": 0.4183, "step": 18841 }, { "epoch": 3.0757520101220357, "grad_norm": 2.0383148193359375, "learning_rate": 1.8439536421031035e-05, "loss": 0.5322, "step": 18842 }, { "epoch": 3.07591526876454, "grad_norm": 1.9539990425109863, "learning_rate": 1.8439366194465526e-05, "loss": 0.4955, "step": 18843 }, { "epoch": 3.0760785274070446, "grad_norm": 2.1011717319488525, "learning_rate": 1.843919595940158e-05, "loss": 0.5348, "step": 18844 }, { "epoch": 3.076241786049549, "grad_norm": 2.296721935272217, "learning_rate": 1.8439025715839364e-05, "loss": 0.4709, "step": 18845 }, { "epoch": 3.0764050446920534, "grad_norm": 2.160215139389038, "learning_rate": 1.843885546377905e-05, "loss": 0.4602, "step": 18846 }, { "epoch": 3.076568303334558, "grad_norm": 1.9022434949874878, "learning_rate": 1.843868520322081e-05, "loss": 0.4855, "step": 18847 }, { "epoch": 3.0767315619770623, "grad_norm": 1.6373975276947021, "learning_rate": 1.8438514934164822e-05, "loss": 0.4375, "step": 18848 }, { "epoch": 3.0768948206195668, "grad_norm": 1.8800544738769531, "learning_rate": 1.843834465661125e-05, "loss": 0.4667, "step": 18849 }, { "epoch": 3.0770580792620708, "grad_norm": 2.222196102142334, "learning_rate": 1.8438174370560263e-05, "loss": 0.5028, "step": 18850 }, { "epoch": 3.077221337904575, "grad_norm": 1.7658442258834839, "learning_rate": 1.843800407601204e-05, "loss": 0.4475, "step": 18851 }, { "epoch": 3.0773845965470796, "grad_norm": 1.8040783405303955, "learning_rate": 1.843783377296675e-05, "loss": 0.4788, "step": 18852 }, { "epoch": 3.077547855189584, "grad_norm": 2.1723926067352295, "learning_rate": 1.8437663461424563e-05, "loss": 0.5326, "step": 18853 }, { "epoch": 3.0777111138320885, "grad_norm": 2.088674783706665, "learning_rate": 1.843749314138565e-05, "loss": 0.4943, "step": 18854 }, { "epoch": 3.077874372474593, "grad_norm": 1.7965309619903564, "learning_rate": 1.843732281285018e-05, "loss": 0.4555, "step": 18855 }, { "epoch": 3.0780376311170974, "grad_norm": 2.2205846309661865, "learning_rate": 1.8437152475818335e-05, "loss": 0.5216, "step": 18856 }, { "epoch": 3.078200889759602, "grad_norm": 1.6337807178497314, "learning_rate": 1.8436982130290277e-05, "loss": 0.4414, "step": 18857 }, { "epoch": 3.0783641484021063, "grad_norm": 1.7559576034545898, "learning_rate": 1.843681177626618e-05, "loss": 0.4488, "step": 18858 }, { "epoch": 3.0785274070446103, "grad_norm": 1.9173935651779175, "learning_rate": 1.8436641413746214e-05, "loss": 0.5182, "step": 18859 }, { "epoch": 3.0786906656871147, "grad_norm": 1.780761480331421, "learning_rate": 1.8436471042730555e-05, "loss": 0.4129, "step": 18860 }, { "epoch": 3.078853924329619, "grad_norm": 2.1013360023498535, "learning_rate": 1.843630066321937e-05, "loss": 0.4438, "step": 18861 }, { "epoch": 3.0790171829721236, "grad_norm": 1.6075325012207031, "learning_rate": 1.8436130275212832e-05, "loss": 0.3591, "step": 18862 }, { "epoch": 3.079180441614628, "grad_norm": 2.1101760864257812, "learning_rate": 1.8435959878711114e-05, "loss": 0.5978, "step": 18863 }, { "epoch": 3.0793437002571324, "grad_norm": 1.859292984008789, "learning_rate": 1.843578947371439e-05, "loss": 0.48, "step": 18864 }, { "epoch": 3.079506958899637, "grad_norm": 1.755092978477478, "learning_rate": 1.8435619060222825e-05, "loss": 0.4096, "step": 18865 }, { "epoch": 3.0796702175421413, "grad_norm": 2.13969087600708, "learning_rate": 1.8435448638236596e-05, "loss": 0.5565, "step": 18866 }, { "epoch": 3.0798334761846453, "grad_norm": 1.8613191843032837, "learning_rate": 1.8435278207755867e-05, "loss": 0.4395, "step": 18867 }, { "epoch": 3.0799967348271498, "grad_norm": 1.708535075187683, "learning_rate": 1.843510776878082e-05, "loss": 0.4353, "step": 18868 }, { "epoch": 3.080159993469654, "grad_norm": 2.192122220993042, "learning_rate": 1.843493732131162e-05, "loss": 0.5571, "step": 18869 }, { "epoch": 3.0803232521121586, "grad_norm": 1.6817936897277832, "learning_rate": 1.843476686534844e-05, "loss": 0.4646, "step": 18870 }, { "epoch": 3.080486510754663, "grad_norm": 1.857354998588562, "learning_rate": 1.8434596400891455e-05, "loss": 0.509, "step": 18871 }, { "epoch": 3.0806497693971675, "grad_norm": 2.0006778240203857, "learning_rate": 1.843442592794083e-05, "loss": 0.4927, "step": 18872 }, { "epoch": 3.080813028039672, "grad_norm": 2.2836649417877197, "learning_rate": 1.8434255446496743e-05, "loss": 0.4629, "step": 18873 }, { "epoch": 3.0809762866821764, "grad_norm": 2.1072330474853516, "learning_rate": 1.8434084956559362e-05, "loss": 0.4744, "step": 18874 }, { "epoch": 3.081139545324681, "grad_norm": 1.6394875049591064, "learning_rate": 1.843391445812886e-05, "loss": 0.4124, "step": 18875 }, { "epoch": 3.081302803967185, "grad_norm": 1.7107243537902832, "learning_rate": 1.8433743951205406e-05, "loss": 0.4205, "step": 18876 }, { "epoch": 3.0814660626096892, "grad_norm": 1.699691891670227, "learning_rate": 1.8433573435789177e-05, "loss": 0.3941, "step": 18877 }, { "epoch": 3.0816293212521937, "grad_norm": 1.9793766736984253, "learning_rate": 1.843340291188034e-05, "loss": 0.4651, "step": 18878 }, { "epoch": 3.081792579894698, "grad_norm": 2.0843029022216797, "learning_rate": 1.843323237947907e-05, "loss": 0.4999, "step": 18879 }, { "epoch": 3.0819558385372026, "grad_norm": 1.8294036388397217, "learning_rate": 1.8433061838585537e-05, "loss": 0.5112, "step": 18880 }, { "epoch": 3.082119097179707, "grad_norm": 1.8631725311279297, "learning_rate": 1.843289128919991e-05, "loss": 0.5116, "step": 18881 }, { "epoch": 3.0822823558222114, "grad_norm": 2.056917667388916, "learning_rate": 1.8432720731322367e-05, "loss": 0.5024, "step": 18882 }, { "epoch": 3.082445614464716, "grad_norm": 1.6460543870925903, "learning_rate": 1.8432550164953077e-05, "loss": 0.487, "step": 18883 }, { "epoch": 3.0826088731072203, "grad_norm": 2.10288667678833, "learning_rate": 1.843237959009221e-05, "loss": 0.4726, "step": 18884 }, { "epoch": 3.0827721317497243, "grad_norm": 2.0627365112304688, "learning_rate": 1.8432209006739937e-05, "loss": 0.5017, "step": 18885 }, { "epoch": 3.0829353903922287, "grad_norm": 1.8946086168289185, "learning_rate": 1.8432038414896432e-05, "loss": 0.4988, "step": 18886 }, { "epoch": 3.083098649034733, "grad_norm": 1.8317276239395142, "learning_rate": 1.843186781456187e-05, "loss": 0.4563, "step": 18887 }, { "epoch": 3.0832619076772376, "grad_norm": 1.7498021125793457, "learning_rate": 1.843169720573642e-05, "loss": 0.4339, "step": 18888 }, { "epoch": 3.083425166319742, "grad_norm": 1.6694129705429077, "learning_rate": 1.843152658842025e-05, "loss": 0.4455, "step": 18889 }, { "epoch": 3.0835884249622465, "grad_norm": 1.9756728410720825, "learning_rate": 1.8431355962613535e-05, "loss": 0.4538, "step": 18890 }, { "epoch": 3.083751683604751, "grad_norm": 1.7390265464782715, "learning_rate": 1.8431185328316445e-05, "loss": 0.4494, "step": 18891 }, { "epoch": 3.0839149422472554, "grad_norm": 2.0222105979919434, "learning_rate": 1.8431014685529157e-05, "loss": 0.4632, "step": 18892 }, { "epoch": 3.08407820088976, "grad_norm": 1.6990940570831299, "learning_rate": 1.8430844034251837e-05, "loss": 0.3818, "step": 18893 }, { "epoch": 3.084241459532264, "grad_norm": 1.9534467458724976, "learning_rate": 1.8430673374484663e-05, "loss": 0.5114, "step": 18894 }, { "epoch": 3.0844047181747682, "grad_norm": 1.7974034547805786, "learning_rate": 1.84305027062278e-05, "loss": 0.4201, "step": 18895 }, { "epoch": 3.0845679768172727, "grad_norm": 2.0211541652679443, "learning_rate": 1.8430332029481425e-05, "loss": 0.4238, "step": 18896 }, { "epoch": 3.084731235459777, "grad_norm": 2.3846843242645264, "learning_rate": 1.8430161344245708e-05, "loss": 0.5701, "step": 18897 }, { "epoch": 3.0848944941022816, "grad_norm": 1.9929403066635132, "learning_rate": 1.8429990650520816e-05, "loss": 0.5305, "step": 18898 }, { "epoch": 3.085057752744786, "grad_norm": 1.5572116374969482, "learning_rate": 1.842981994830693e-05, "loss": 0.3789, "step": 18899 }, { "epoch": 3.0852210113872904, "grad_norm": 2.0137388706207275, "learning_rate": 1.8429649237604215e-05, "loss": 0.4663, "step": 18900 }, { "epoch": 3.085384270029795, "grad_norm": 1.5921205282211304, "learning_rate": 1.8429478518412848e-05, "loss": 0.3886, "step": 18901 }, { "epoch": 3.085547528672299, "grad_norm": 1.6845531463623047, "learning_rate": 1.8429307790732997e-05, "loss": 0.4146, "step": 18902 }, { "epoch": 3.0857107873148033, "grad_norm": 1.9013365507125854, "learning_rate": 1.842913705456484e-05, "loss": 0.5406, "step": 18903 }, { "epoch": 3.0858740459573077, "grad_norm": 1.9745876789093018, "learning_rate": 1.8428966309908538e-05, "loss": 0.4581, "step": 18904 }, { "epoch": 3.086037304599812, "grad_norm": 1.698372721672058, "learning_rate": 1.8428795556764272e-05, "loss": 0.4509, "step": 18905 }, { "epoch": 3.0862005632423166, "grad_norm": 1.7978824377059937, "learning_rate": 1.8428624795132207e-05, "loss": 0.3892, "step": 18906 }, { "epoch": 3.086363821884821, "grad_norm": 2.1463847160339355, "learning_rate": 1.8428454025012526e-05, "loss": 0.5573, "step": 18907 }, { "epoch": 3.0865270805273255, "grad_norm": 1.645027995109558, "learning_rate": 1.842828324640539e-05, "loss": 0.4348, "step": 18908 }, { "epoch": 3.08669033916983, "grad_norm": 1.8199950456619263, "learning_rate": 1.8428112459310975e-05, "loss": 0.433, "step": 18909 }, { "epoch": 3.0868535978123344, "grad_norm": 2.1963396072387695, "learning_rate": 1.8427941663729453e-05, "loss": 0.5172, "step": 18910 }, { "epoch": 3.0870168564548384, "grad_norm": 2.0730042457580566, "learning_rate": 1.8427770859660997e-05, "loss": 0.4921, "step": 18911 }, { "epoch": 3.087180115097343, "grad_norm": 1.5925207138061523, "learning_rate": 1.8427600047105775e-05, "loss": 0.3947, "step": 18912 }, { "epoch": 3.0873433737398472, "grad_norm": 1.7242997884750366, "learning_rate": 1.8427429226063968e-05, "loss": 0.3305, "step": 18913 }, { "epoch": 3.0875066323823517, "grad_norm": 1.914718508720398, "learning_rate": 1.8427258396535737e-05, "loss": 0.4455, "step": 18914 }, { "epoch": 3.087669891024856, "grad_norm": 1.590900182723999, "learning_rate": 1.842708755852126e-05, "loss": 0.4086, "step": 18915 }, { "epoch": 3.0878331496673606, "grad_norm": 1.9944499731063843, "learning_rate": 1.842691671202071e-05, "loss": 0.4396, "step": 18916 }, { "epoch": 3.087996408309865, "grad_norm": 1.4331024885177612, "learning_rate": 1.8426745857034252e-05, "loss": 0.4256, "step": 18917 }, { "epoch": 3.0881596669523694, "grad_norm": 1.8899720907211304, "learning_rate": 1.8426574993562067e-05, "loss": 0.4176, "step": 18918 }, { "epoch": 3.088322925594874, "grad_norm": 1.6871520280838013, "learning_rate": 1.8426404121604324e-05, "loss": 0.3848, "step": 18919 }, { "epoch": 3.088486184237378, "grad_norm": 2.4072718620300293, "learning_rate": 1.8426233241161193e-05, "loss": 0.4916, "step": 18920 }, { "epoch": 3.0886494428798823, "grad_norm": 1.9007920026779175, "learning_rate": 1.8426062352232846e-05, "loss": 0.4391, "step": 18921 }, { "epoch": 3.0888127015223867, "grad_norm": 2.083683729171753, "learning_rate": 1.842589145481946e-05, "loss": 0.5468, "step": 18922 }, { "epoch": 3.088975960164891, "grad_norm": 1.8783966302871704, "learning_rate": 1.8425720548921203e-05, "loss": 0.4083, "step": 18923 }, { "epoch": 3.0891392188073956, "grad_norm": 2.108386278152466, "learning_rate": 1.8425549634538245e-05, "loss": 0.5154, "step": 18924 }, { "epoch": 3.0893024774499, "grad_norm": 1.8346065282821655, "learning_rate": 1.842537871167076e-05, "loss": 0.462, "step": 18925 }, { "epoch": 3.0894657360924045, "grad_norm": 2.031768560409546, "learning_rate": 1.8425207780318925e-05, "loss": 0.4067, "step": 18926 }, { "epoch": 3.089628994734909, "grad_norm": 1.9320361614227295, "learning_rate": 1.8425036840482905e-05, "loss": 0.4561, "step": 18927 }, { "epoch": 3.0897922533774134, "grad_norm": 2.243340492248535, "learning_rate": 1.8424865892162874e-05, "loss": 0.5066, "step": 18928 }, { "epoch": 3.0899555120199174, "grad_norm": 2.1496119499206543, "learning_rate": 1.8424694935359012e-05, "loss": 0.5382, "step": 18929 }, { "epoch": 3.090118770662422, "grad_norm": 1.6834529638290405, "learning_rate": 1.842452397007148e-05, "loss": 0.4126, "step": 18930 }, { "epoch": 3.0902820293049262, "grad_norm": 2.4878175258636475, "learning_rate": 1.842435299630045e-05, "loss": 0.4812, "step": 18931 }, { "epoch": 3.0904452879474307, "grad_norm": 1.7239290475845337, "learning_rate": 1.8424182014046103e-05, "loss": 0.4579, "step": 18932 }, { "epoch": 3.090608546589935, "grad_norm": 2.2292487621307373, "learning_rate": 1.842401102330861e-05, "loss": 0.5019, "step": 18933 }, { "epoch": 3.0907718052324396, "grad_norm": 2.063720464706421, "learning_rate": 1.8423840024088134e-05, "loss": 0.5479, "step": 18934 }, { "epoch": 3.090935063874944, "grad_norm": 1.7588616609573364, "learning_rate": 1.8423669016384856e-05, "loss": 0.406, "step": 18935 }, { "epoch": 3.0910983225174484, "grad_norm": 1.8015446662902832, "learning_rate": 1.842349800019895e-05, "loss": 0.5022, "step": 18936 }, { "epoch": 3.091261581159953, "grad_norm": 1.7532639503479004, "learning_rate": 1.8423326975530578e-05, "loss": 0.4642, "step": 18937 }, { "epoch": 3.091424839802457, "grad_norm": 1.9718114137649536, "learning_rate": 1.842315594237992e-05, "loss": 0.4559, "step": 18938 }, { "epoch": 3.0915880984449613, "grad_norm": 1.6580588817596436, "learning_rate": 1.8422984900747148e-05, "loss": 0.4424, "step": 18939 }, { "epoch": 3.0917513570874657, "grad_norm": 1.4869608879089355, "learning_rate": 1.842281385063243e-05, "loss": 0.437, "step": 18940 }, { "epoch": 3.09191461572997, "grad_norm": 1.817999005317688, "learning_rate": 1.842264279203594e-05, "loss": 0.4334, "step": 18941 }, { "epoch": 3.0920778743724746, "grad_norm": 1.7412457466125488, "learning_rate": 1.842247172495785e-05, "loss": 0.4836, "step": 18942 }, { "epoch": 3.092241133014979, "grad_norm": 2.382089138031006, "learning_rate": 1.8422300649398336e-05, "loss": 0.5351, "step": 18943 }, { "epoch": 3.0924043916574835, "grad_norm": 1.657732605934143, "learning_rate": 1.842212956535757e-05, "loss": 0.4324, "step": 18944 }, { "epoch": 3.092567650299988, "grad_norm": 1.6913809776306152, "learning_rate": 1.8421958472835715e-05, "loss": 0.4465, "step": 18945 }, { "epoch": 3.0927309089424924, "grad_norm": 1.8423367738723755, "learning_rate": 1.8421787371832954e-05, "loss": 0.5009, "step": 18946 }, { "epoch": 3.0928941675849964, "grad_norm": 2.097724676132202, "learning_rate": 1.8421616262349452e-05, "loss": 0.5387, "step": 18947 }, { "epoch": 3.093057426227501, "grad_norm": 1.6578075885772705, "learning_rate": 1.842144514438539e-05, "loss": 0.3923, "step": 18948 }, { "epoch": 3.0932206848700052, "grad_norm": 1.5458980798721313, "learning_rate": 1.8421274017940932e-05, "loss": 0.4114, "step": 18949 }, { "epoch": 3.0933839435125097, "grad_norm": 2.228379011154175, "learning_rate": 1.8421102883016253e-05, "loss": 0.6016, "step": 18950 }, { "epoch": 3.093547202155014, "grad_norm": 2.0353455543518066, "learning_rate": 1.8420931739611527e-05, "loss": 0.56, "step": 18951 }, { "epoch": 3.0937104607975185, "grad_norm": 1.7086330652236938, "learning_rate": 1.8420760587726925e-05, "loss": 0.409, "step": 18952 }, { "epoch": 3.093873719440023, "grad_norm": 1.891031265258789, "learning_rate": 1.8420589427362618e-05, "loss": 0.483, "step": 18953 }, { "epoch": 3.0940369780825274, "grad_norm": 2.34728741645813, "learning_rate": 1.8420418258518782e-05, "loss": 0.5169, "step": 18954 }, { "epoch": 3.0942002367250314, "grad_norm": 1.700730800628662, "learning_rate": 1.8420247081195584e-05, "loss": 0.4401, "step": 18955 }, { "epoch": 3.094363495367536, "grad_norm": 1.9691925048828125, "learning_rate": 1.84200758953932e-05, "loss": 0.5734, "step": 18956 }, { "epoch": 3.0945267540100403, "grad_norm": 1.9014405012130737, "learning_rate": 1.8419904701111804e-05, "loss": 0.4983, "step": 18957 }, { "epoch": 3.0946900126525447, "grad_norm": 1.88710618019104, "learning_rate": 1.8419733498351563e-05, "loss": 0.4026, "step": 18958 }, { "epoch": 3.094853271295049, "grad_norm": 2.0525238513946533, "learning_rate": 1.8419562287112658e-05, "loss": 0.4753, "step": 18959 }, { "epoch": 3.0950165299375536, "grad_norm": 1.7597275972366333, "learning_rate": 1.8419391067395248e-05, "loss": 0.4625, "step": 18960 }, { "epoch": 3.095179788580058, "grad_norm": 2.319255828857422, "learning_rate": 1.841921983919952e-05, "loss": 0.8395, "step": 18961 }, { "epoch": 3.0953430472225625, "grad_norm": 1.6670342683792114, "learning_rate": 1.8419048602525637e-05, "loss": 0.3964, "step": 18962 }, { "epoch": 3.095506305865067, "grad_norm": 1.7694436311721802, "learning_rate": 1.8418877357373776e-05, "loss": 0.4231, "step": 18963 }, { "epoch": 3.095669564507571, "grad_norm": 2.0551061630249023, "learning_rate": 1.8418706103744108e-05, "loss": 0.5283, "step": 18964 }, { "epoch": 3.0958328231500754, "grad_norm": 1.931770920753479, "learning_rate": 1.8418534841636805e-05, "loss": 0.4505, "step": 18965 }, { "epoch": 3.09599608179258, "grad_norm": 2.1917319297790527, "learning_rate": 1.8418363571052037e-05, "loss": 0.5141, "step": 18966 }, { "epoch": 3.0961593404350842, "grad_norm": 1.8727467060089111, "learning_rate": 1.841819229198998e-05, "loss": 0.4571, "step": 18967 }, { "epoch": 3.0963225990775887, "grad_norm": 1.9298038482666016, "learning_rate": 1.841802100445081e-05, "loss": 0.4772, "step": 18968 }, { "epoch": 3.096485857720093, "grad_norm": 1.8089710474014282, "learning_rate": 1.8417849708434687e-05, "loss": 0.473, "step": 18969 }, { "epoch": 3.0966491163625975, "grad_norm": 2.198641777038574, "learning_rate": 1.8417678403941798e-05, "loss": 0.5159, "step": 18970 }, { "epoch": 3.096812375005102, "grad_norm": 2.035943031311035, "learning_rate": 1.8417507090972308e-05, "loss": 0.5024, "step": 18971 }, { "epoch": 3.0969756336476064, "grad_norm": 1.6451998949050903, "learning_rate": 1.8417335769526386e-05, "loss": 0.4529, "step": 18972 }, { "epoch": 3.0971388922901104, "grad_norm": 1.8999505043029785, "learning_rate": 1.8417164439604213e-05, "loss": 0.4186, "step": 18973 }, { "epoch": 3.097302150932615, "grad_norm": 1.8864094018936157, "learning_rate": 1.8416993101205957e-05, "loss": 0.4595, "step": 18974 }, { "epoch": 3.0974654095751193, "grad_norm": 1.9778538942337036, "learning_rate": 1.841682175433179e-05, "loss": 0.5471, "step": 18975 }, { "epoch": 3.0976286682176237, "grad_norm": 1.9574711322784424, "learning_rate": 1.841665039898189e-05, "loss": 0.5299, "step": 18976 }, { "epoch": 3.097791926860128, "grad_norm": 1.8119025230407715, "learning_rate": 1.8416479035156426e-05, "loss": 0.4166, "step": 18977 }, { "epoch": 3.0979551855026326, "grad_norm": 2.2681591510772705, "learning_rate": 1.8416307662855564e-05, "loss": 0.5185, "step": 18978 }, { "epoch": 3.098118444145137, "grad_norm": 1.8263970613479614, "learning_rate": 1.8416136282079485e-05, "loss": 0.4327, "step": 18979 }, { "epoch": 3.0982817027876415, "grad_norm": 1.79865300655365, "learning_rate": 1.841596489282836e-05, "loss": 0.4482, "step": 18980 }, { "epoch": 3.098444961430146, "grad_norm": 1.669936180114746, "learning_rate": 1.841579349510236e-05, "loss": 0.4588, "step": 18981 }, { "epoch": 3.09860822007265, "grad_norm": 1.8828682899475098, "learning_rate": 1.8415622088901655e-05, "loss": 0.5138, "step": 18982 }, { "epoch": 3.0987714787151543, "grad_norm": 2.2013888359069824, "learning_rate": 1.8415450674226422e-05, "loss": 0.5297, "step": 18983 }, { "epoch": 3.098934737357659, "grad_norm": 1.884238600730896, "learning_rate": 1.8415279251076838e-05, "loss": 0.4101, "step": 18984 }, { "epoch": 3.0990979960001632, "grad_norm": 1.9794212579727173, "learning_rate": 1.8415107819453065e-05, "loss": 0.5112, "step": 18985 }, { "epoch": 3.0992612546426677, "grad_norm": 1.663489818572998, "learning_rate": 1.841493637935528e-05, "loss": 0.4343, "step": 18986 }, { "epoch": 3.099424513285172, "grad_norm": 1.7106904983520508, "learning_rate": 1.841476493078366e-05, "loss": 0.4139, "step": 18987 }, { "epoch": 3.0995877719276765, "grad_norm": 1.7274110317230225, "learning_rate": 1.841459347373837e-05, "loss": 0.4746, "step": 18988 }, { "epoch": 3.099751030570181, "grad_norm": 1.8329302072525024, "learning_rate": 1.8414422008219585e-05, "loss": 0.521, "step": 18989 }, { "epoch": 3.099914289212685, "grad_norm": 2.0986664295196533, "learning_rate": 1.8414250534227485e-05, "loss": 0.5132, "step": 18990 }, { "epoch": 3.1000775478551894, "grad_norm": 1.7751376628875732, "learning_rate": 1.8414079051762234e-05, "loss": 0.4676, "step": 18991 }, { "epoch": 3.100240806497694, "grad_norm": 2.085333824157715, "learning_rate": 1.841390756082401e-05, "loss": 0.5709, "step": 18992 }, { "epoch": 3.1004040651401983, "grad_norm": 1.7551745176315308, "learning_rate": 1.841373606141298e-05, "loss": 0.4877, "step": 18993 }, { "epoch": 3.1005673237827027, "grad_norm": 1.6370567083358765, "learning_rate": 1.841356455352932e-05, "loss": 0.425, "step": 18994 }, { "epoch": 3.100730582425207, "grad_norm": 1.8510997295379639, "learning_rate": 1.8413393037173206e-05, "loss": 0.4814, "step": 18995 }, { "epoch": 3.1008938410677116, "grad_norm": 2.227151393890381, "learning_rate": 1.8413221512344805e-05, "loss": 0.5151, "step": 18996 }, { "epoch": 3.101057099710216, "grad_norm": 2.0914809703826904, "learning_rate": 1.8413049979044295e-05, "loss": 0.6898, "step": 18997 }, { "epoch": 3.1012203583527205, "grad_norm": 1.8980011940002441, "learning_rate": 1.8412878437271842e-05, "loss": 0.4302, "step": 18998 }, { "epoch": 3.101383616995225, "grad_norm": 1.8548920154571533, "learning_rate": 1.8412706887027624e-05, "loss": 0.483, "step": 18999 }, { "epoch": 3.101546875637729, "grad_norm": 1.592512607574463, "learning_rate": 1.8412535328311813e-05, "loss": 0.4541, "step": 19000 }, { "epoch": 3.1017101342802333, "grad_norm": 1.9632327556610107, "learning_rate": 1.841236376112458e-05, "loss": 0.4297, "step": 19001 }, { "epoch": 3.101873392922738, "grad_norm": 1.8129053115844727, "learning_rate": 1.84121921854661e-05, "loss": 0.469, "step": 19002 }, { "epoch": 3.102036651565242, "grad_norm": 1.8883506059646606, "learning_rate": 1.8412020601336547e-05, "loss": 0.4867, "step": 19003 }, { "epoch": 3.1021999102077467, "grad_norm": 1.8723527193069458, "learning_rate": 1.841184900873609e-05, "loss": 0.4596, "step": 19004 }, { "epoch": 3.102363168850251, "grad_norm": 1.977531909942627, "learning_rate": 1.84116774076649e-05, "loss": 0.4748, "step": 19005 }, { "epoch": 3.1025264274927555, "grad_norm": 1.9879380464553833, "learning_rate": 1.8411505798123156e-05, "loss": 0.5143, "step": 19006 }, { "epoch": 3.10268968613526, "grad_norm": 1.9427287578582764, "learning_rate": 1.8411334180111027e-05, "loss": 0.4879, "step": 19007 }, { "epoch": 3.102852944777764, "grad_norm": 2.1207242012023926, "learning_rate": 1.8411162553628687e-05, "loss": 0.5459, "step": 19008 }, { "epoch": 3.1030162034202684, "grad_norm": 1.6136043071746826, "learning_rate": 1.8410990918676308e-05, "loss": 0.4646, "step": 19009 }, { "epoch": 3.103179462062773, "grad_norm": 1.7465434074401855, "learning_rate": 1.8410819275254065e-05, "loss": 0.4656, "step": 19010 }, { "epoch": 3.1033427207052773, "grad_norm": 1.9319732189178467, "learning_rate": 1.841064762336213e-05, "loss": 0.4857, "step": 19011 }, { "epoch": 3.1035059793477817, "grad_norm": 1.8380181789398193, "learning_rate": 1.841047596300067e-05, "loss": 0.4727, "step": 19012 }, { "epoch": 3.103669237990286, "grad_norm": 2.288666248321533, "learning_rate": 1.8410304294169867e-05, "loss": 0.6244, "step": 19013 }, { "epoch": 3.1038324966327906, "grad_norm": 1.9889397621154785, "learning_rate": 1.841013261686989e-05, "loss": 0.4609, "step": 19014 }, { "epoch": 3.103995755275295, "grad_norm": 1.59856379032135, "learning_rate": 1.840996093110091e-05, "loss": 0.3995, "step": 19015 }, { "epoch": 3.1041590139177995, "grad_norm": 2.0326247215270996, "learning_rate": 1.8409789236863102e-05, "loss": 0.4761, "step": 19016 }, { "epoch": 3.1043222725603035, "grad_norm": 1.7710314989089966, "learning_rate": 1.840961753415664e-05, "loss": 0.4102, "step": 19017 }, { "epoch": 3.104485531202808, "grad_norm": 1.7599267959594727, "learning_rate": 1.8409445822981694e-05, "loss": 0.397, "step": 19018 }, { "epoch": 3.1046487898453123, "grad_norm": 2.1912729740142822, "learning_rate": 1.840927410333844e-05, "loss": 0.5312, "step": 19019 }, { "epoch": 3.104812048487817, "grad_norm": 2.0700430870056152, "learning_rate": 1.8409102375227042e-05, "loss": 0.4968, "step": 19020 }, { "epoch": 3.104975307130321, "grad_norm": 1.51426100730896, "learning_rate": 1.8408930638647685e-05, "loss": 0.3942, "step": 19021 }, { "epoch": 3.1051385657728257, "grad_norm": 2.265420436859131, "learning_rate": 1.8408758893600543e-05, "loss": 0.9754, "step": 19022 }, { "epoch": 3.10530182441533, "grad_norm": 2.2239606380462646, "learning_rate": 1.8408587140085778e-05, "loss": 0.5372, "step": 19023 }, { "epoch": 3.1054650830578345, "grad_norm": 1.9637805223464966, "learning_rate": 1.8408415378103567e-05, "loss": 0.4614, "step": 19024 }, { "epoch": 3.105628341700339, "grad_norm": 2.1804585456848145, "learning_rate": 1.8408243607654083e-05, "loss": 0.5155, "step": 19025 }, { "epoch": 3.105791600342843, "grad_norm": 1.5970202684402466, "learning_rate": 1.84080718287375e-05, "loss": 0.3552, "step": 19026 }, { "epoch": 3.1059548589853474, "grad_norm": 1.8876723051071167, "learning_rate": 1.8407900041353995e-05, "loss": 0.4799, "step": 19027 }, { "epoch": 3.106118117627852, "grad_norm": 2.122821807861328, "learning_rate": 1.8407728245503735e-05, "loss": 0.4868, "step": 19028 }, { "epoch": 3.1062813762703563, "grad_norm": 1.557146668434143, "learning_rate": 1.8407556441186895e-05, "loss": 0.4189, "step": 19029 }, { "epoch": 3.1064446349128607, "grad_norm": 1.7716423273086548, "learning_rate": 1.8407384628403642e-05, "loss": 0.4312, "step": 19030 }, { "epoch": 3.106607893555365, "grad_norm": 2.0076303482055664, "learning_rate": 1.8407212807154163e-05, "loss": 0.4587, "step": 19031 }, { "epoch": 3.1067711521978696, "grad_norm": 1.3556954860687256, "learning_rate": 1.840704097743862e-05, "loss": 0.3861, "step": 19032 }, { "epoch": 3.106934410840374, "grad_norm": 1.8387558460235596, "learning_rate": 1.840686913925719e-05, "loss": 0.4134, "step": 19033 }, { "epoch": 3.1070976694828785, "grad_norm": 2.5243215560913086, "learning_rate": 1.8406697292610042e-05, "loss": 0.5212, "step": 19034 }, { "epoch": 3.1072609281253825, "grad_norm": 1.6554654836654663, "learning_rate": 1.8406525437497355e-05, "loss": 0.408, "step": 19035 }, { "epoch": 3.107424186767887, "grad_norm": 2.1327080726623535, "learning_rate": 1.8406353573919298e-05, "loss": 0.5581, "step": 19036 }, { "epoch": 3.1075874454103913, "grad_norm": 2.24916672706604, "learning_rate": 1.840618170187605e-05, "loss": 0.5185, "step": 19037 }, { "epoch": 3.1077507040528958, "grad_norm": 1.8622065782546997, "learning_rate": 1.8406009821367772e-05, "loss": 0.4455, "step": 19038 }, { "epoch": 3.1079139626954, "grad_norm": 1.6989214420318604, "learning_rate": 1.8405837932394644e-05, "loss": 0.4197, "step": 19039 }, { "epoch": 3.1080772213379046, "grad_norm": 1.9272774457931519, "learning_rate": 1.8405666034956842e-05, "loss": 0.5568, "step": 19040 }, { "epoch": 3.108240479980409, "grad_norm": 2.157724380493164, "learning_rate": 1.840549412905454e-05, "loss": 0.5357, "step": 19041 }, { "epoch": 3.1084037386229135, "grad_norm": 1.891399621963501, "learning_rate": 1.8405322214687905e-05, "loss": 0.4769, "step": 19042 }, { "epoch": 3.1085669972654175, "grad_norm": 2.1864571571350098, "learning_rate": 1.8405150291857112e-05, "loss": 0.5439, "step": 19043 }, { "epoch": 3.108730255907922, "grad_norm": 2.113689661026001, "learning_rate": 1.8404978360562333e-05, "loss": 0.5032, "step": 19044 }, { "epoch": 3.1088935145504264, "grad_norm": 1.9628092050552368, "learning_rate": 1.8404806420803746e-05, "loss": 0.4954, "step": 19045 }, { "epoch": 3.109056773192931, "grad_norm": 2.2464334964752197, "learning_rate": 1.840463447258152e-05, "loss": 0.4893, "step": 19046 }, { "epoch": 3.1092200318354353, "grad_norm": 2.155205726623535, "learning_rate": 1.840446251589583e-05, "loss": 0.4975, "step": 19047 }, { "epoch": 3.1093832904779397, "grad_norm": 1.393471598625183, "learning_rate": 1.840429055074685e-05, "loss": 0.3534, "step": 19048 }, { "epoch": 3.109546549120444, "grad_norm": 1.7957748174667358, "learning_rate": 1.840411857713475e-05, "loss": 0.384, "step": 19049 }, { "epoch": 3.1097098077629486, "grad_norm": 1.7572323083877563, "learning_rate": 1.8403946595059705e-05, "loss": 0.4328, "step": 19050 }, { "epoch": 3.109873066405453, "grad_norm": 1.9426058530807495, "learning_rate": 1.8403774604521885e-05, "loss": 0.4489, "step": 19051 }, { "epoch": 3.110036325047957, "grad_norm": 2.6337037086486816, "learning_rate": 1.8403602605521472e-05, "loss": 0.5485, "step": 19052 }, { "epoch": 3.1101995836904615, "grad_norm": 1.7630445957183838, "learning_rate": 1.840343059805863e-05, "loss": 0.3778, "step": 19053 }, { "epoch": 3.110362842332966, "grad_norm": 2.7102644443511963, "learning_rate": 1.8403258582133533e-05, "loss": 0.5022, "step": 19054 }, { "epoch": 3.1105261009754703, "grad_norm": 1.8413305282592773, "learning_rate": 1.8403086557746363e-05, "loss": 0.4705, "step": 19055 }, { "epoch": 3.1106893596179748, "grad_norm": 1.684760570526123, "learning_rate": 1.8402914524897283e-05, "loss": 0.4031, "step": 19056 }, { "epoch": 3.110852618260479, "grad_norm": 2.183561325073242, "learning_rate": 1.8402742483586472e-05, "loss": 0.5003, "step": 19057 }, { "epoch": 3.1110158769029836, "grad_norm": 2.0258543491363525, "learning_rate": 1.84025704338141e-05, "loss": 0.4796, "step": 19058 }, { "epoch": 3.111179135545488, "grad_norm": 2.1878154277801514, "learning_rate": 1.840239837558034e-05, "loss": 0.6106, "step": 19059 }, { "epoch": 3.1113423941879925, "grad_norm": 2.485060691833496, "learning_rate": 1.840222630888537e-05, "loss": 0.5558, "step": 19060 }, { "epoch": 3.1115056528304965, "grad_norm": 1.5685791969299316, "learning_rate": 1.8402054233729362e-05, "loss": 0.4098, "step": 19061 }, { "epoch": 3.111668911473001, "grad_norm": 1.485779881477356, "learning_rate": 1.8401882150112485e-05, "loss": 0.4012, "step": 19062 }, { "epoch": 3.1118321701155054, "grad_norm": 1.9207626581192017, "learning_rate": 1.8401710058034914e-05, "loss": 0.4681, "step": 19063 }, { "epoch": 3.11199542875801, "grad_norm": 1.657449722290039, "learning_rate": 1.8401537957496826e-05, "loss": 0.4137, "step": 19064 }, { "epoch": 3.1121586874005143, "grad_norm": 2.054091215133667, "learning_rate": 1.8401365848498386e-05, "loss": 0.5298, "step": 19065 }, { "epoch": 3.1123219460430187, "grad_norm": 1.6379716396331787, "learning_rate": 1.840119373103978e-05, "loss": 0.3693, "step": 19066 }, { "epoch": 3.112485204685523, "grad_norm": 1.9512654542922974, "learning_rate": 1.840102160512117e-05, "loss": 0.4904, "step": 19067 }, { "epoch": 3.1126484633280276, "grad_norm": 1.6732302904129028, "learning_rate": 1.8400849470742734e-05, "loss": 0.4212, "step": 19068 }, { "epoch": 3.112811721970532, "grad_norm": 1.7100154161453247, "learning_rate": 1.8400677327904647e-05, "loss": 0.4752, "step": 19069 }, { "epoch": 3.112974980613036, "grad_norm": 1.778276801109314, "learning_rate": 1.840050517660708e-05, "loss": 0.4966, "step": 19070 }, { "epoch": 3.1131382392555405, "grad_norm": 1.8129254579544067, "learning_rate": 1.8400333016850204e-05, "loss": 0.4414, "step": 19071 }, { "epoch": 3.113301497898045, "grad_norm": 2.234964370727539, "learning_rate": 1.8400160848634193e-05, "loss": 0.5385, "step": 19072 }, { "epoch": 3.1134647565405493, "grad_norm": 2.105086326599121, "learning_rate": 1.8399988671959227e-05, "loss": 0.5722, "step": 19073 }, { "epoch": 3.1136280151830538, "grad_norm": 2.034006118774414, "learning_rate": 1.839981648682547e-05, "loss": 0.5017, "step": 19074 }, { "epoch": 3.113791273825558, "grad_norm": 1.848780632019043, "learning_rate": 1.8399644293233106e-05, "loss": 0.4776, "step": 19075 }, { "epoch": 3.1139545324680626, "grad_norm": 1.8085854053497314, "learning_rate": 1.83994720911823e-05, "loss": 0.6134, "step": 19076 }, { "epoch": 3.114117791110567, "grad_norm": 2.073728084564209, "learning_rate": 1.8399299880673226e-05, "loss": 0.5313, "step": 19077 }, { "epoch": 3.1142810497530715, "grad_norm": 1.965909481048584, "learning_rate": 1.839912766170606e-05, "loss": 0.4327, "step": 19078 }, { "epoch": 3.1144443083955755, "grad_norm": 2.425568103790283, "learning_rate": 1.8398955434280976e-05, "loss": 0.4779, "step": 19079 }, { "epoch": 3.11460756703808, "grad_norm": 2.033043622970581, "learning_rate": 1.8398783198398146e-05, "loss": 0.5236, "step": 19080 }, { "epoch": 3.1147708256805844, "grad_norm": 1.7101037502288818, "learning_rate": 1.839861095405774e-05, "loss": 0.437, "step": 19081 }, { "epoch": 3.114934084323089, "grad_norm": 2.190295696258545, "learning_rate": 1.8398438701259936e-05, "loss": 0.4815, "step": 19082 }, { "epoch": 3.1150973429655933, "grad_norm": 1.9844053983688354, "learning_rate": 1.8398266440004912e-05, "loss": 0.475, "step": 19083 }, { "epoch": 3.1152606016080977, "grad_norm": 1.772014856338501, "learning_rate": 1.839809417029283e-05, "loss": 0.3723, "step": 19084 }, { "epoch": 3.115423860250602, "grad_norm": 2.0712497234344482, "learning_rate": 1.839792189212387e-05, "loss": 0.4782, "step": 19085 }, { "epoch": 3.1155871188931066, "grad_norm": 1.3864095211029053, "learning_rate": 1.8397749605498208e-05, "loss": 0.341, "step": 19086 }, { "epoch": 3.115750377535611, "grad_norm": 1.8471800088882446, "learning_rate": 1.839757731041601e-05, "loss": 0.4745, "step": 19087 }, { "epoch": 3.115913636178115, "grad_norm": 2.1476757526397705, "learning_rate": 1.8397405006877462e-05, "loss": 0.5007, "step": 19088 }, { "epoch": 3.1160768948206194, "grad_norm": 2.1842470169067383, "learning_rate": 1.839723269488272e-05, "loss": 0.4499, "step": 19089 }, { "epoch": 3.116240153463124, "grad_norm": 1.9584110975265503, "learning_rate": 1.8397060374431972e-05, "loss": 0.5429, "step": 19090 }, { "epoch": 3.1164034121056283, "grad_norm": 2.1511290073394775, "learning_rate": 1.839688804552539e-05, "loss": 0.5695, "step": 19091 }, { "epoch": 3.1165666707481328, "grad_norm": 1.796772837638855, "learning_rate": 1.8396715708163136e-05, "loss": 0.457, "step": 19092 }, { "epoch": 3.116729929390637, "grad_norm": 1.816171646118164, "learning_rate": 1.8396543362345394e-05, "loss": 0.463, "step": 19093 }, { "epoch": 3.1168931880331416, "grad_norm": 1.6841988563537598, "learning_rate": 1.8396371008072335e-05, "loss": 0.4251, "step": 19094 }, { "epoch": 3.117056446675646, "grad_norm": 1.9399113655090332, "learning_rate": 1.8396198645344133e-05, "loss": 0.4456, "step": 19095 }, { "epoch": 3.11721970531815, "grad_norm": 1.592839002609253, "learning_rate": 1.8396026274160962e-05, "loss": 0.3955, "step": 19096 }, { "epoch": 3.1173829639606545, "grad_norm": 1.8083409070968628, "learning_rate": 1.8395853894522994e-05, "loss": 0.4352, "step": 19097 }, { "epoch": 3.117546222603159, "grad_norm": 1.6731728315353394, "learning_rate": 1.8395681506430403e-05, "loss": 0.3894, "step": 19098 }, { "epoch": 3.1177094812456634, "grad_norm": 1.9719464778900146, "learning_rate": 1.8395509109883363e-05, "loss": 0.5043, "step": 19099 }, { "epoch": 3.117872739888168, "grad_norm": 2.319307804107666, "learning_rate": 1.839533670488205e-05, "loss": 0.6036, "step": 19100 }, { "epoch": 3.1180359985306723, "grad_norm": 1.9424128532409668, "learning_rate": 1.8395164291426633e-05, "loss": 0.4514, "step": 19101 }, { "epoch": 3.1181992571731767, "grad_norm": 2.043370246887207, "learning_rate": 1.839499186951729e-05, "loss": 0.4579, "step": 19102 }, { "epoch": 3.118362515815681, "grad_norm": 2.0569565296173096, "learning_rate": 1.839481943915419e-05, "loss": 0.4847, "step": 19103 }, { "epoch": 3.1185257744581856, "grad_norm": 2.307811737060547, "learning_rate": 1.839464700033751e-05, "loss": 0.4882, "step": 19104 }, { "epoch": 3.1186890331006896, "grad_norm": 2.0470194816589355, "learning_rate": 1.8394474553067422e-05, "loss": 0.5043, "step": 19105 }, { "epoch": 3.118852291743194, "grad_norm": 1.8945519924163818, "learning_rate": 1.8394302097344103e-05, "loss": 0.3927, "step": 19106 }, { "epoch": 3.1190155503856984, "grad_norm": 2.1111748218536377, "learning_rate": 1.839412963316772e-05, "loss": 0.5081, "step": 19107 }, { "epoch": 3.119178809028203, "grad_norm": 1.8925790786743164, "learning_rate": 1.8393957160538452e-05, "loss": 0.4859, "step": 19108 }, { "epoch": 3.1193420676707073, "grad_norm": 2.5407028198242188, "learning_rate": 1.8393784679456474e-05, "loss": 0.6213, "step": 19109 }, { "epoch": 3.1195053263132118, "grad_norm": 2.533562421798706, "learning_rate": 1.8393612189921953e-05, "loss": 0.5074, "step": 19110 }, { "epoch": 3.119668584955716, "grad_norm": 1.7451250553131104, "learning_rate": 1.8393439691935068e-05, "loss": 0.4117, "step": 19111 }, { "epoch": 3.1198318435982206, "grad_norm": 1.9081515073776245, "learning_rate": 1.8393267185495992e-05, "loss": 0.5058, "step": 19112 }, { "epoch": 3.119995102240725, "grad_norm": 2.1967012882232666, "learning_rate": 1.8393094670604897e-05, "loss": 0.4578, "step": 19113 }, { "epoch": 3.120158360883229, "grad_norm": 2.27057147026062, "learning_rate": 1.839292214726196e-05, "loss": 0.4944, "step": 19114 }, { "epoch": 3.1203216195257335, "grad_norm": 1.8932514190673828, "learning_rate": 1.839274961546735e-05, "loss": 0.4902, "step": 19115 }, { "epoch": 3.120484878168238, "grad_norm": 1.9823964834213257, "learning_rate": 1.8392577075221247e-05, "loss": 0.458, "step": 19116 }, { "epoch": 3.1206481368107424, "grad_norm": 2.023143768310547, "learning_rate": 1.8392404526523816e-05, "loss": 0.5258, "step": 19117 }, { "epoch": 3.120811395453247, "grad_norm": 2.0249555110931396, "learning_rate": 1.8392231969375243e-05, "loss": 0.4742, "step": 19118 }, { "epoch": 3.1209746540957513, "grad_norm": 1.9541535377502441, "learning_rate": 1.839205940377569e-05, "loss": 0.48, "step": 19119 }, { "epoch": 3.1211379127382557, "grad_norm": 2.3015940189361572, "learning_rate": 1.8391886829725334e-05, "loss": 0.4685, "step": 19120 }, { "epoch": 3.12130117138076, "grad_norm": 2.199803113937378, "learning_rate": 1.8391714247224354e-05, "loss": 0.5353, "step": 19121 }, { "epoch": 3.1214644300232646, "grad_norm": 1.776964545249939, "learning_rate": 1.839154165627292e-05, "loss": 0.4345, "step": 19122 }, { "epoch": 3.1216276886657686, "grad_norm": 1.9244810342788696, "learning_rate": 1.83913690568712e-05, "loss": 0.493, "step": 19123 }, { "epoch": 3.121790947308273, "grad_norm": 1.6264513731002808, "learning_rate": 1.839119644901938e-05, "loss": 0.4377, "step": 19124 }, { "epoch": 3.1219542059507774, "grad_norm": 1.7649201154708862, "learning_rate": 1.8391023832717626e-05, "loss": 0.4717, "step": 19125 }, { "epoch": 3.122117464593282, "grad_norm": 2.316427230834961, "learning_rate": 1.839085120796611e-05, "loss": 0.432, "step": 19126 }, { "epoch": 3.1222807232357863, "grad_norm": 1.7968899011611938, "learning_rate": 1.8390678574765014e-05, "loss": 0.5347, "step": 19127 }, { "epoch": 3.1224439818782908, "grad_norm": 1.9190679788589478, "learning_rate": 1.8390505933114503e-05, "loss": 0.4928, "step": 19128 }, { "epoch": 3.122607240520795, "grad_norm": 2.048699378967285, "learning_rate": 1.8390333283014757e-05, "loss": 0.4656, "step": 19129 }, { "epoch": 3.1227704991632996, "grad_norm": 2.496537923812866, "learning_rate": 1.8390160624465946e-05, "loss": 0.4842, "step": 19130 }, { "epoch": 3.1229337578058036, "grad_norm": 2.0629682540893555, "learning_rate": 1.8389987957468245e-05, "loss": 0.4627, "step": 19131 }, { "epoch": 3.123097016448308, "grad_norm": 1.4336191415786743, "learning_rate": 1.838981528202183e-05, "loss": 0.3719, "step": 19132 }, { "epoch": 3.1232602750908125, "grad_norm": 2.2204713821411133, "learning_rate": 1.8389642598126873e-05, "loss": 0.5625, "step": 19133 }, { "epoch": 3.123423533733317, "grad_norm": 1.7413872480392456, "learning_rate": 1.8389469905783545e-05, "loss": 0.3909, "step": 19134 }, { "epoch": 3.1235867923758214, "grad_norm": 2.054579496383667, "learning_rate": 1.8389297204992028e-05, "loss": 0.4642, "step": 19135 }, { "epoch": 3.123750051018326, "grad_norm": 1.9909058809280396, "learning_rate": 1.838912449575249e-05, "loss": 0.4943, "step": 19136 }, { "epoch": 3.1239133096608303, "grad_norm": 1.9790704250335693, "learning_rate": 1.8388951778065104e-05, "loss": 0.4935, "step": 19137 }, { "epoch": 3.1240765683033347, "grad_norm": 2.2108731269836426, "learning_rate": 1.8388779051930043e-05, "loss": 0.4816, "step": 19138 }, { "epoch": 3.124239826945839, "grad_norm": 2.0340092182159424, "learning_rate": 1.838860631734749e-05, "loss": 0.4371, "step": 19139 }, { "epoch": 3.124403085588343, "grad_norm": 2.063110828399658, "learning_rate": 1.838843357431761e-05, "loss": 0.4498, "step": 19140 }, { "epoch": 3.1245663442308476, "grad_norm": 2.3248705863952637, "learning_rate": 1.838826082284058e-05, "loss": 0.5777, "step": 19141 }, { "epoch": 3.124729602873352, "grad_norm": 2.1884920597076416, "learning_rate": 1.8388088062916572e-05, "loss": 0.5249, "step": 19142 }, { "epoch": 3.1248928615158564, "grad_norm": 2.0289318561553955, "learning_rate": 1.838791529454576e-05, "loss": 0.461, "step": 19143 }, { "epoch": 3.125056120158361, "grad_norm": 2.0098142623901367, "learning_rate": 1.8387742517728325e-05, "loss": 0.4633, "step": 19144 }, { "epoch": 3.1252193788008653, "grad_norm": 2.4782795906066895, "learning_rate": 1.838756973246443e-05, "loss": 0.4679, "step": 19145 }, { "epoch": 3.1253826374433697, "grad_norm": 1.8772648572921753, "learning_rate": 1.838739693875426e-05, "loss": 0.4129, "step": 19146 }, { "epoch": 3.125545896085874, "grad_norm": 2.33862566947937, "learning_rate": 1.838722413659798e-05, "loss": 0.5364, "step": 19147 }, { "epoch": 3.1257091547283786, "grad_norm": 1.920239806175232, "learning_rate": 1.838705132599577e-05, "loss": 0.5043, "step": 19148 }, { "epoch": 3.1258724133708826, "grad_norm": 2.282862901687622, "learning_rate": 1.8386878506947798e-05, "loss": 0.5933, "step": 19149 }, { "epoch": 3.126035672013387, "grad_norm": 1.5184367895126343, "learning_rate": 1.8386705679454243e-05, "loss": 0.3733, "step": 19150 }, { "epoch": 3.1261989306558915, "grad_norm": 1.9671231508255005, "learning_rate": 1.8386532843515275e-05, "loss": 0.491, "step": 19151 }, { "epoch": 3.126362189298396, "grad_norm": 1.79429292678833, "learning_rate": 1.8386359999131078e-05, "loss": 0.5035, "step": 19152 }, { "epoch": 3.1265254479409004, "grad_norm": 2.1592330932617188, "learning_rate": 1.8386187146301812e-05, "loss": 0.5183, "step": 19153 }, { "epoch": 3.126688706583405, "grad_norm": 1.8004266023635864, "learning_rate": 1.838601428502766e-05, "loss": 0.41, "step": 19154 }, { "epoch": 3.1268519652259092, "grad_norm": 1.7408418655395508, "learning_rate": 1.838584141530879e-05, "loss": 0.5027, "step": 19155 }, { "epoch": 3.1270152238684137, "grad_norm": 1.8845396041870117, "learning_rate": 1.8385668537145386e-05, "loss": 0.4756, "step": 19156 }, { "epoch": 3.127178482510918, "grad_norm": 1.8021447658538818, "learning_rate": 1.8385495650537612e-05, "loss": 0.3872, "step": 19157 }, { "epoch": 3.127341741153422, "grad_norm": 1.7435846328735352, "learning_rate": 1.838532275548565e-05, "loss": 0.4397, "step": 19158 }, { "epoch": 3.1275049997959266, "grad_norm": 2.212862730026245, "learning_rate": 1.8385149851989667e-05, "loss": 0.5581, "step": 19159 }, { "epoch": 3.127668258438431, "grad_norm": 1.983757495880127, "learning_rate": 1.8384976940049842e-05, "loss": 0.4955, "step": 19160 }, { "epoch": 3.1278315170809354, "grad_norm": 2.0575027465820312, "learning_rate": 1.8384804019666348e-05, "loss": 0.4849, "step": 19161 }, { "epoch": 3.12799477572344, "grad_norm": 1.9399220943450928, "learning_rate": 1.838463109083936e-05, "loss": 0.4896, "step": 19162 }, { "epoch": 3.1281580343659443, "grad_norm": 2.2822883129119873, "learning_rate": 1.8384458153569044e-05, "loss": 0.4826, "step": 19163 }, { "epoch": 3.1283212930084487, "grad_norm": 2.037858247756958, "learning_rate": 1.8384285207855585e-05, "loss": 0.4705, "step": 19164 }, { "epoch": 3.128484551650953, "grad_norm": 1.8360326290130615, "learning_rate": 1.8384112253699153e-05, "loss": 0.4354, "step": 19165 }, { "epoch": 3.128647810293457, "grad_norm": 1.881895899772644, "learning_rate": 1.838393929109992e-05, "loss": 0.4822, "step": 19166 }, { "epoch": 3.1288110689359616, "grad_norm": 1.5092086791992188, "learning_rate": 1.838376632005807e-05, "loss": 0.3553, "step": 19167 }, { "epoch": 3.128974327578466, "grad_norm": 2.107337713241577, "learning_rate": 1.8383593340573763e-05, "loss": 0.5285, "step": 19168 }, { "epoch": 3.1291375862209705, "grad_norm": 2.0311777591705322, "learning_rate": 1.8383420352647177e-05, "loss": 0.5102, "step": 19169 }, { "epoch": 3.129300844863475, "grad_norm": 1.9234910011291504, "learning_rate": 1.8383247356278496e-05, "loss": 0.473, "step": 19170 }, { "epoch": 3.1294641035059794, "grad_norm": 2.5392792224884033, "learning_rate": 1.838307435146788e-05, "loss": 0.955, "step": 19171 }, { "epoch": 3.129627362148484, "grad_norm": 1.9994525909423828, "learning_rate": 1.8382901338215515e-05, "loss": 0.4304, "step": 19172 }, { "epoch": 3.1297906207909882, "grad_norm": 1.7872364521026611, "learning_rate": 1.8382728316521568e-05, "loss": 0.4255, "step": 19173 }, { "epoch": 3.1299538794334927, "grad_norm": 1.704208254814148, "learning_rate": 1.838255528638622e-05, "loss": 0.45, "step": 19174 }, { "epoch": 3.130117138075997, "grad_norm": 1.8586641550064087, "learning_rate": 1.838238224780964e-05, "loss": 0.4388, "step": 19175 }, { "epoch": 3.130280396718501, "grad_norm": 2.0445377826690674, "learning_rate": 1.8382209200792e-05, "loss": 0.5095, "step": 19176 }, { "epoch": 3.1304436553610056, "grad_norm": 2.1643171310424805, "learning_rate": 1.838203614533348e-05, "loss": 0.545, "step": 19177 }, { "epoch": 3.13060691400351, "grad_norm": 2.1047680377960205, "learning_rate": 1.838186308143425e-05, "loss": 0.4825, "step": 19178 }, { "epoch": 3.1307701726460144, "grad_norm": 2.1559841632843018, "learning_rate": 1.8381690009094488e-05, "loss": 0.6017, "step": 19179 }, { "epoch": 3.130933431288519, "grad_norm": 1.629186987876892, "learning_rate": 1.838151692831437e-05, "loss": 0.4135, "step": 19180 }, { "epoch": 3.1310966899310233, "grad_norm": 2.229228973388672, "learning_rate": 1.838134383909406e-05, "loss": 0.5778, "step": 19181 }, { "epoch": 3.1312599485735277, "grad_norm": 1.8668835163116455, "learning_rate": 1.838117074143374e-05, "loss": 0.471, "step": 19182 }, { "epoch": 3.131423207216032, "grad_norm": 1.9742660522460938, "learning_rate": 1.8380997635333587e-05, "loss": 0.4884, "step": 19183 }, { "epoch": 3.131586465858536, "grad_norm": 1.5638824701309204, "learning_rate": 1.8380824520793767e-05, "loss": 0.3597, "step": 19184 }, { "epoch": 3.1317497245010406, "grad_norm": 1.7046419382095337, "learning_rate": 1.8380651397814463e-05, "loss": 0.4369, "step": 19185 }, { "epoch": 3.131912983143545, "grad_norm": 2.371206760406494, "learning_rate": 1.8380478266395844e-05, "loss": 0.5842, "step": 19186 }, { "epoch": 3.1320762417860495, "grad_norm": 2.091435432434082, "learning_rate": 1.8380305126538084e-05, "loss": 0.4862, "step": 19187 }, { "epoch": 3.132239500428554, "grad_norm": 2.00459885597229, "learning_rate": 1.838013197824136e-05, "loss": 0.5227, "step": 19188 }, { "epoch": 3.1324027590710584, "grad_norm": 2.192333936691284, "learning_rate": 1.8379958821505846e-05, "loss": 0.4968, "step": 19189 }, { "epoch": 3.132566017713563, "grad_norm": 2.337123155593872, "learning_rate": 1.8379785656331714e-05, "loss": 0.5236, "step": 19190 }, { "epoch": 3.1327292763560672, "grad_norm": 1.9252872467041016, "learning_rate": 1.8379612482719142e-05, "loss": 0.4618, "step": 19191 }, { "epoch": 3.1328925349985717, "grad_norm": 1.7911752462387085, "learning_rate": 1.83794393006683e-05, "loss": 0.4206, "step": 19192 }, { "epoch": 3.1330557936410757, "grad_norm": 2.023324966430664, "learning_rate": 1.8379266110179368e-05, "loss": 0.4994, "step": 19193 }, { "epoch": 3.13321905228358, "grad_norm": 1.7995480298995972, "learning_rate": 1.8379092911252515e-05, "loss": 0.4535, "step": 19194 }, { "epoch": 3.1333823109260845, "grad_norm": 1.7781600952148438, "learning_rate": 1.837891970388792e-05, "loss": 0.412, "step": 19195 }, { "epoch": 3.133545569568589, "grad_norm": 2.1254894733428955, "learning_rate": 1.837874648808575e-05, "loss": 0.5509, "step": 19196 }, { "epoch": 3.1337088282110934, "grad_norm": 1.8739579916000366, "learning_rate": 1.837857326384619e-05, "loss": 0.4552, "step": 19197 }, { "epoch": 3.133872086853598, "grad_norm": 2.3681814670562744, "learning_rate": 1.8378400031169406e-05, "loss": 0.4976, "step": 19198 }, { "epoch": 3.1340353454961023, "grad_norm": 1.6057387590408325, "learning_rate": 1.837822679005558e-05, "loss": 0.4698, "step": 19199 }, { "epoch": 3.1341986041386067, "grad_norm": 1.864689826965332, "learning_rate": 1.8378053540504874e-05, "loss": 0.4284, "step": 19200 }, { "epoch": 3.134361862781111, "grad_norm": 1.9578101634979248, "learning_rate": 1.8377880282517476e-05, "loss": 0.4088, "step": 19201 }, { "epoch": 3.134525121423615, "grad_norm": 2.199793577194214, "learning_rate": 1.8377707016093553e-05, "loss": 0.4343, "step": 19202 }, { "epoch": 3.1346883800661196, "grad_norm": 1.900309681892395, "learning_rate": 1.8377533741233282e-05, "loss": 0.4381, "step": 19203 }, { "epoch": 3.134851638708624, "grad_norm": 1.8463164567947388, "learning_rate": 1.8377360457936835e-05, "loss": 0.4556, "step": 19204 }, { "epoch": 3.1350148973511285, "grad_norm": 1.971508264541626, "learning_rate": 1.837718716620439e-05, "loss": 0.4681, "step": 19205 }, { "epoch": 3.135178155993633, "grad_norm": 2.0972816944122314, "learning_rate": 1.837701386603612e-05, "loss": 0.4622, "step": 19206 }, { "epoch": 3.1353414146361374, "grad_norm": 1.7982628345489502, "learning_rate": 1.83768405574322e-05, "loss": 0.4539, "step": 19207 }, { "epoch": 3.135504673278642, "grad_norm": 1.6723878383636475, "learning_rate": 1.83766672403928e-05, "loss": 0.4075, "step": 19208 }, { "epoch": 3.1356679319211462, "grad_norm": 1.6772204637527466, "learning_rate": 1.83764939149181e-05, "loss": 0.4266, "step": 19209 }, { "epoch": 3.1358311905636507, "grad_norm": 1.879230260848999, "learning_rate": 1.8376320581008274e-05, "loss": 0.5095, "step": 19210 }, { "epoch": 3.1359944492061547, "grad_norm": 1.8299741744995117, "learning_rate": 1.8376147238663497e-05, "loss": 0.4843, "step": 19211 }, { "epoch": 3.136157707848659, "grad_norm": 1.9370840787887573, "learning_rate": 1.8375973887883938e-05, "loss": 0.481, "step": 19212 }, { "epoch": 3.1363209664911635, "grad_norm": 2.246957540512085, "learning_rate": 1.8375800528669777e-05, "loss": 0.5173, "step": 19213 }, { "epoch": 3.136484225133668, "grad_norm": 2.25228214263916, "learning_rate": 1.8375627161021185e-05, "loss": 0.6105, "step": 19214 }, { "epoch": 3.1366474837761724, "grad_norm": 1.6443427801132202, "learning_rate": 1.8375453784938343e-05, "loss": 0.4404, "step": 19215 }, { "epoch": 3.136810742418677, "grad_norm": 2.1025502681732178, "learning_rate": 1.837528040042142e-05, "loss": 0.4891, "step": 19216 }, { "epoch": 3.1369740010611813, "grad_norm": 1.6649075746536255, "learning_rate": 1.837510700747059e-05, "loss": 0.3995, "step": 19217 }, { "epoch": 3.1371372597036857, "grad_norm": 1.9918556213378906, "learning_rate": 1.837493360608603e-05, "loss": 0.4375, "step": 19218 }, { "epoch": 3.1373005183461897, "grad_norm": 2.0413553714752197, "learning_rate": 1.8374760196267915e-05, "loss": 0.49, "step": 19219 }, { "epoch": 3.137463776988694, "grad_norm": 1.8786976337432861, "learning_rate": 1.837458677801642e-05, "loss": 0.4489, "step": 19220 }, { "epoch": 3.1376270356311986, "grad_norm": 3.1672182083129883, "learning_rate": 1.8374413351331716e-05, "loss": 0.531, "step": 19221 }, { "epoch": 3.137790294273703, "grad_norm": 2.4466147422790527, "learning_rate": 1.837423991621398e-05, "loss": 0.5085, "step": 19222 }, { "epoch": 3.1379535529162075, "grad_norm": 1.9197770357131958, "learning_rate": 1.8374066472663386e-05, "loss": 0.4531, "step": 19223 }, { "epoch": 3.138116811558712, "grad_norm": 1.8191171884536743, "learning_rate": 1.837389302068011e-05, "loss": 0.4815, "step": 19224 }, { "epoch": 3.1382800702012164, "grad_norm": 2.0078275203704834, "learning_rate": 1.837371956026433e-05, "loss": 0.5365, "step": 19225 }, { "epoch": 3.138443328843721, "grad_norm": 1.8167836666107178, "learning_rate": 1.8373546091416212e-05, "loss": 0.4047, "step": 19226 }, { "epoch": 3.1386065874862252, "grad_norm": 2.078007698059082, "learning_rate": 1.8373372614135935e-05, "loss": 0.605, "step": 19227 }, { "epoch": 3.1387698461287297, "grad_norm": 2.2191758155822754, "learning_rate": 1.8373199128423676e-05, "loss": 0.5396, "step": 19228 }, { "epoch": 3.1389331047712337, "grad_norm": 1.4233413934707642, "learning_rate": 1.8373025634279606e-05, "loss": 0.3712, "step": 19229 }, { "epoch": 3.139096363413738, "grad_norm": 1.903640627861023, "learning_rate": 1.8372852131703903e-05, "loss": 0.4744, "step": 19230 }, { "epoch": 3.1392596220562425, "grad_norm": 2.3639211654663086, "learning_rate": 1.8372678620696742e-05, "loss": 0.4988, "step": 19231 }, { "epoch": 3.139422880698747, "grad_norm": 1.9745310544967651, "learning_rate": 1.8372505101258294e-05, "loss": 0.5383, "step": 19232 }, { "epoch": 3.1395861393412514, "grad_norm": 1.5489318370819092, "learning_rate": 1.8372331573388733e-05, "loss": 0.3644, "step": 19233 }, { "epoch": 3.139749397983756, "grad_norm": 1.902844786643982, "learning_rate": 1.837215803708824e-05, "loss": 0.4831, "step": 19234 }, { "epoch": 3.1399126566262603, "grad_norm": 1.9865195751190186, "learning_rate": 1.8371984492356985e-05, "loss": 0.4361, "step": 19235 }, { "epoch": 3.1400759152687647, "grad_norm": 1.9308693408966064, "learning_rate": 1.8371810939195143e-05, "loss": 0.4323, "step": 19236 }, { "epoch": 3.1402391739112687, "grad_norm": 2.0678958892822266, "learning_rate": 1.837163737760289e-05, "loss": 0.5162, "step": 19237 }, { "epoch": 3.140402432553773, "grad_norm": 1.6506986618041992, "learning_rate": 1.83714638075804e-05, "loss": 0.3708, "step": 19238 }, { "epoch": 3.1405656911962776, "grad_norm": 2.0123302936553955, "learning_rate": 1.8371290229127848e-05, "loss": 0.4802, "step": 19239 }, { "epoch": 3.140728949838782, "grad_norm": 2.1602470874786377, "learning_rate": 1.837111664224541e-05, "loss": 0.5507, "step": 19240 }, { "epoch": 3.1408922084812865, "grad_norm": 1.6565812826156616, "learning_rate": 1.8370943046933257e-05, "loss": 0.4923, "step": 19241 }, { "epoch": 3.141055467123791, "grad_norm": 2.244677782058716, "learning_rate": 1.837076944319157e-05, "loss": 0.507, "step": 19242 }, { "epoch": 3.1412187257662953, "grad_norm": 2.213373899459839, "learning_rate": 1.837059583102052e-05, "loss": 0.5763, "step": 19243 }, { "epoch": 3.1413819844088, "grad_norm": 2.346959114074707, "learning_rate": 1.8370422210420284e-05, "loss": 0.5708, "step": 19244 }, { "epoch": 3.1415452430513042, "grad_norm": 2.1421985626220703, "learning_rate": 1.8370248581391032e-05, "loss": 0.4957, "step": 19245 }, { "epoch": 3.141708501693808, "grad_norm": 1.9464410543441772, "learning_rate": 1.837007494393294e-05, "loss": 0.4066, "step": 19246 }, { "epoch": 3.1418717603363127, "grad_norm": 1.6493085622787476, "learning_rate": 1.836990129804619e-05, "loss": 0.3991, "step": 19247 }, { "epoch": 3.142035018978817, "grad_norm": 2.5067317485809326, "learning_rate": 1.8369727643730947e-05, "loss": 0.5849, "step": 19248 }, { "epoch": 3.1421982776213215, "grad_norm": 1.7200936079025269, "learning_rate": 1.8369553980987392e-05, "loss": 0.3872, "step": 19249 }, { "epoch": 3.142361536263826, "grad_norm": 1.9086614847183228, "learning_rate": 1.83693803098157e-05, "loss": 0.4374, "step": 19250 }, { "epoch": 3.1425247949063304, "grad_norm": 2.303955078125, "learning_rate": 1.836920663021604e-05, "loss": 0.5056, "step": 19251 }, { "epoch": 3.142688053548835, "grad_norm": 2.0120105743408203, "learning_rate": 1.8369032942188595e-05, "loss": 0.4451, "step": 19252 }, { "epoch": 3.1428513121913393, "grad_norm": 2.1218080520629883, "learning_rate": 1.8368859245733535e-05, "loss": 0.573, "step": 19253 }, { "epoch": 3.1430145708338437, "grad_norm": 2.056225299835205, "learning_rate": 1.8368685540851037e-05, "loss": 0.4802, "step": 19254 }, { "epoch": 3.1431778294763477, "grad_norm": 1.8847757577896118, "learning_rate": 1.8368511827541275e-05, "loss": 0.5328, "step": 19255 }, { "epoch": 3.143341088118852, "grad_norm": 1.8967527151107788, "learning_rate": 1.836833810580442e-05, "loss": 0.4313, "step": 19256 }, { "epoch": 3.1435043467613566, "grad_norm": 2.2437615394592285, "learning_rate": 1.8368164375640653e-05, "loss": 0.5625, "step": 19257 }, { "epoch": 3.143667605403861, "grad_norm": 2.049694776535034, "learning_rate": 1.836799063705015e-05, "loss": 0.4975, "step": 19258 }, { "epoch": 3.1438308640463655, "grad_norm": 1.9041457176208496, "learning_rate": 1.8367816890033077e-05, "loss": 0.5247, "step": 19259 }, { "epoch": 3.14399412268887, "grad_norm": 2.6352407932281494, "learning_rate": 1.836764313458962e-05, "loss": 0.5815, "step": 19260 }, { "epoch": 3.1441573813313743, "grad_norm": 1.9593379497528076, "learning_rate": 1.8367469370719946e-05, "loss": 0.4785, "step": 19261 }, { "epoch": 3.144320639973879, "grad_norm": 1.6995769739151, "learning_rate": 1.836729559842423e-05, "loss": 0.3965, "step": 19262 }, { "epoch": 3.144483898616383, "grad_norm": 1.720718264579773, "learning_rate": 1.8367121817702653e-05, "loss": 0.4318, "step": 19263 }, { "epoch": 3.144647157258887, "grad_norm": 2.056804656982422, "learning_rate": 1.8366948028555388e-05, "loss": 0.4654, "step": 19264 }, { "epoch": 3.1448104159013917, "grad_norm": 2.1128287315368652, "learning_rate": 1.8366774230982606e-05, "loss": 0.5836, "step": 19265 }, { "epoch": 3.144973674543896, "grad_norm": 2.1343350410461426, "learning_rate": 1.836660042498448e-05, "loss": 0.5675, "step": 19266 }, { "epoch": 3.1451369331864005, "grad_norm": 1.5325431823730469, "learning_rate": 1.8366426610561198e-05, "loss": 0.3773, "step": 19267 }, { "epoch": 3.145300191828905, "grad_norm": 1.6370446681976318, "learning_rate": 1.8366252787712922e-05, "loss": 0.395, "step": 19268 }, { "epoch": 3.1454634504714094, "grad_norm": 1.7818723917007446, "learning_rate": 1.8366078956439833e-05, "loss": 0.4305, "step": 19269 }, { "epoch": 3.145626709113914, "grad_norm": 1.6227312088012695, "learning_rate": 1.8365905116742103e-05, "loss": 0.3878, "step": 19270 }, { "epoch": 3.1457899677564183, "grad_norm": 1.6777211427688599, "learning_rate": 1.8365731268619912e-05, "loss": 0.3905, "step": 19271 }, { "epoch": 3.1459532263989223, "grad_norm": 2.104206085205078, "learning_rate": 1.836555741207343e-05, "loss": 0.4908, "step": 19272 }, { "epoch": 3.1461164850414267, "grad_norm": 1.3536583185195923, "learning_rate": 1.836538354710283e-05, "loss": 0.3294, "step": 19273 }, { "epoch": 3.146279743683931, "grad_norm": 1.7271136045455933, "learning_rate": 1.8365209673708298e-05, "loss": 0.4074, "step": 19274 }, { "epoch": 3.1464430023264356, "grad_norm": 1.8771289587020874, "learning_rate": 1.836503579189e-05, "loss": 0.3902, "step": 19275 }, { "epoch": 3.14660626096894, "grad_norm": 1.784264326095581, "learning_rate": 1.836486190164811e-05, "loss": 0.4054, "step": 19276 }, { "epoch": 3.1467695196114445, "grad_norm": 2.005175828933716, "learning_rate": 1.836468800298281e-05, "loss": 0.448, "step": 19277 }, { "epoch": 3.146932778253949, "grad_norm": 2.6808018684387207, "learning_rate": 1.836451409589427e-05, "loss": 0.443, "step": 19278 }, { "epoch": 3.1470960368964533, "grad_norm": 2.147965431213379, "learning_rate": 1.836434018038267e-05, "loss": 0.5516, "step": 19279 }, { "epoch": 3.147259295538958, "grad_norm": 2.337078094482422, "learning_rate": 1.8364166256448173e-05, "loss": 0.4661, "step": 19280 }, { "epoch": 3.1474225541814618, "grad_norm": 1.8231076002120972, "learning_rate": 1.8363992324090967e-05, "loss": 0.427, "step": 19281 }, { "epoch": 3.147585812823966, "grad_norm": 1.8085187673568726, "learning_rate": 1.8363818383311226e-05, "loss": 0.4434, "step": 19282 }, { "epoch": 3.1477490714664706, "grad_norm": 2.2313284873962402, "learning_rate": 1.8363644434109117e-05, "loss": 0.489, "step": 19283 }, { "epoch": 3.147912330108975, "grad_norm": 2.2925872802734375, "learning_rate": 1.8363470476484824e-05, "loss": 0.505, "step": 19284 }, { "epoch": 3.1480755887514795, "grad_norm": 2.1349267959594727, "learning_rate": 1.836329651043852e-05, "loss": 0.4629, "step": 19285 }, { "epoch": 3.148238847393984, "grad_norm": 1.7754992246627808, "learning_rate": 1.8363122535970374e-05, "loss": 0.4495, "step": 19286 }, { "epoch": 3.1484021060364884, "grad_norm": 1.8219929933547974, "learning_rate": 1.836294855308057e-05, "loss": 0.4787, "step": 19287 }, { "epoch": 3.148565364678993, "grad_norm": 2.0113871097564697, "learning_rate": 1.8362774561769275e-05, "loss": 0.4827, "step": 19288 }, { "epoch": 3.1487286233214973, "grad_norm": 1.9047794342041016, "learning_rate": 1.836260056203667e-05, "loss": 0.4786, "step": 19289 }, { "epoch": 3.1488918819640013, "grad_norm": 1.744431734085083, "learning_rate": 1.8362426553882932e-05, "loss": 0.3713, "step": 19290 }, { "epoch": 3.1490551406065057, "grad_norm": 2.1946518421173096, "learning_rate": 1.8362252537308226e-05, "loss": 0.4497, "step": 19291 }, { "epoch": 3.14921839924901, "grad_norm": 1.848078727722168, "learning_rate": 1.8362078512312738e-05, "loss": 0.387, "step": 19292 }, { "epoch": 3.1493816578915146, "grad_norm": 1.9390836954116821, "learning_rate": 1.836190447889664e-05, "loss": 0.4612, "step": 19293 }, { "epoch": 3.149544916534019, "grad_norm": 1.9046239852905273, "learning_rate": 1.8361730437060103e-05, "loss": 0.4893, "step": 19294 }, { "epoch": 3.1497081751765235, "grad_norm": 2.034959316253662, "learning_rate": 1.8361556386803308e-05, "loss": 0.5809, "step": 19295 }, { "epoch": 3.149871433819028, "grad_norm": 1.7888551950454712, "learning_rate": 1.8361382328126426e-05, "loss": 0.3904, "step": 19296 }, { "epoch": 3.1500346924615323, "grad_norm": 2.4024617671966553, "learning_rate": 1.8361208261029637e-05, "loss": 0.5734, "step": 19297 }, { "epoch": 3.1501979511040368, "grad_norm": 1.9023979902267456, "learning_rate": 1.8361034185513113e-05, "loss": 0.4837, "step": 19298 }, { "epoch": 3.1503612097465408, "grad_norm": 1.9971468448638916, "learning_rate": 1.8360860101577027e-05, "loss": 0.4859, "step": 19299 }, { "epoch": 3.150524468389045, "grad_norm": 2.05781626701355, "learning_rate": 1.836068600922156e-05, "loss": 0.4001, "step": 19300 }, { "epoch": 3.1506877270315496, "grad_norm": 1.6749922037124634, "learning_rate": 1.8360511908446886e-05, "loss": 0.4357, "step": 19301 }, { "epoch": 3.150850985674054, "grad_norm": 1.9641056060791016, "learning_rate": 1.8360337799253173e-05, "loss": 0.4974, "step": 19302 }, { "epoch": 3.1510142443165585, "grad_norm": 2.427427053451538, "learning_rate": 1.8360163681640606e-05, "loss": 0.6142, "step": 19303 }, { "epoch": 3.151177502959063, "grad_norm": 2.235340118408203, "learning_rate": 1.8359989555609355e-05, "loss": 0.5166, "step": 19304 }, { "epoch": 3.1513407616015674, "grad_norm": 1.9851443767547607, "learning_rate": 1.8359815421159595e-05, "loss": 0.5307, "step": 19305 }, { "epoch": 3.151504020244072, "grad_norm": 1.9493770599365234, "learning_rate": 1.8359641278291508e-05, "loss": 0.4374, "step": 19306 }, { "epoch": 3.151667278886576, "grad_norm": 1.9398936033248901, "learning_rate": 1.835946712700526e-05, "loss": 0.5207, "step": 19307 }, { "epoch": 3.1518305375290803, "grad_norm": 2.019547700881958, "learning_rate": 1.8359292967301035e-05, "loss": 0.4508, "step": 19308 }, { "epoch": 3.1519937961715847, "grad_norm": 1.7939473390579224, "learning_rate": 1.8359118799179002e-05, "loss": 0.4152, "step": 19309 }, { "epoch": 3.152157054814089, "grad_norm": 2.4552643299102783, "learning_rate": 1.835894462263934e-05, "loss": 0.5484, "step": 19310 }, { "epoch": 3.1523203134565936, "grad_norm": 1.7289093732833862, "learning_rate": 1.835877043768222e-05, "loss": 0.457, "step": 19311 }, { "epoch": 3.152483572099098, "grad_norm": 2.103909969329834, "learning_rate": 1.835859624430782e-05, "loss": 0.4587, "step": 19312 }, { "epoch": 3.1526468307416025, "grad_norm": 1.7311168909072876, "learning_rate": 1.835842204251632e-05, "loss": 0.483, "step": 19313 }, { "epoch": 3.152810089384107, "grad_norm": 1.9446189403533936, "learning_rate": 1.835824783230789e-05, "loss": 0.4557, "step": 19314 }, { "epoch": 3.1529733480266113, "grad_norm": 2.013918161392212, "learning_rate": 1.8358073613682705e-05, "loss": 0.5215, "step": 19315 }, { "epoch": 3.1531366066691158, "grad_norm": 1.787811279296875, "learning_rate": 1.835789938664094e-05, "loss": 0.4518, "step": 19316 }, { "epoch": 3.1532998653116198, "grad_norm": 1.4579166173934937, "learning_rate": 1.8357725151182778e-05, "loss": 0.345, "step": 19317 }, { "epoch": 3.153463123954124, "grad_norm": 2.0798680782318115, "learning_rate": 1.8357550907308386e-05, "loss": 0.4737, "step": 19318 }, { "epoch": 3.1536263825966286, "grad_norm": 2.091296911239624, "learning_rate": 1.8357376655017943e-05, "loss": 0.5089, "step": 19319 }, { "epoch": 3.153789641239133, "grad_norm": 1.633711814880371, "learning_rate": 1.8357202394311624e-05, "loss": 0.4324, "step": 19320 }, { "epoch": 3.1539528998816375, "grad_norm": 2.1252927780151367, "learning_rate": 1.8357028125189603e-05, "loss": 0.5399, "step": 19321 }, { "epoch": 3.154116158524142, "grad_norm": 1.931122064590454, "learning_rate": 1.835685384765206e-05, "loss": 0.4268, "step": 19322 }, { "epoch": 3.1542794171666464, "grad_norm": 1.7546980381011963, "learning_rate": 1.835667956169916e-05, "loss": 0.405, "step": 19323 }, { "epoch": 3.154442675809151, "grad_norm": 1.7538822889328003, "learning_rate": 1.8356505267331096e-05, "loss": 0.4342, "step": 19324 }, { "epoch": 3.154605934451655, "grad_norm": 1.7514036893844604, "learning_rate": 1.8356330964548026e-05, "loss": 0.4226, "step": 19325 }, { "epoch": 3.1547691930941593, "grad_norm": 1.9059715270996094, "learning_rate": 1.8356156653350138e-05, "loss": 0.4446, "step": 19326 }, { "epoch": 3.1549324517366637, "grad_norm": 1.9629360437393188, "learning_rate": 1.8355982333737597e-05, "loss": 0.4917, "step": 19327 }, { "epoch": 3.155095710379168, "grad_norm": 1.8719786405563354, "learning_rate": 1.8355808005710588e-05, "loss": 0.4581, "step": 19328 }, { "epoch": 3.1552589690216726, "grad_norm": 1.8576785326004028, "learning_rate": 1.835563366926928e-05, "loss": 0.4979, "step": 19329 }, { "epoch": 3.155422227664177, "grad_norm": 2.124069929122925, "learning_rate": 1.8355459324413854e-05, "loss": 0.536, "step": 19330 }, { "epoch": 3.1555854863066815, "grad_norm": 1.781045913696289, "learning_rate": 1.835528497114448e-05, "loss": 0.4719, "step": 19331 }, { "epoch": 3.155748744949186, "grad_norm": 1.9589205980300903, "learning_rate": 1.835511060946134e-05, "loss": 0.4906, "step": 19332 }, { "epoch": 3.1559120035916903, "grad_norm": 1.7335076332092285, "learning_rate": 1.8354936239364604e-05, "loss": 0.454, "step": 19333 }, { "epoch": 3.1560752622341943, "grad_norm": 2.192348003387451, "learning_rate": 1.8354761860854447e-05, "loss": 0.4786, "step": 19334 }, { "epoch": 3.1562385208766988, "grad_norm": 2.196835517883301, "learning_rate": 1.8354587473931048e-05, "loss": 0.5243, "step": 19335 }, { "epoch": 3.156401779519203, "grad_norm": 2.360830307006836, "learning_rate": 1.8354413078594585e-05, "loss": 0.5309, "step": 19336 }, { "epoch": 3.1565650381617076, "grad_norm": 1.7894150018692017, "learning_rate": 1.8354238674845225e-05, "loss": 0.3726, "step": 19337 }, { "epoch": 3.156728296804212, "grad_norm": 1.797112226486206, "learning_rate": 1.8354064262683154e-05, "loss": 0.4906, "step": 19338 }, { "epoch": 3.1568915554467165, "grad_norm": 1.765925645828247, "learning_rate": 1.835388984210854e-05, "loss": 0.5123, "step": 19339 }, { "epoch": 3.157054814089221, "grad_norm": 1.8997020721435547, "learning_rate": 1.835371541312156e-05, "loss": 0.4119, "step": 19340 }, { "epoch": 3.1572180727317254, "grad_norm": 1.7575836181640625, "learning_rate": 1.835354097572239e-05, "loss": 0.4649, "step": 19341 }, { "epoch": 3.15738133137423, "grad_norm": 2.382176160812378, "learning_rate": 1.8353366529911208e-05, "loss": 0.4932, "step": 19342 }, { "epoch": 3.157544590016734, "grad_norm": 1.9101983308792114, "learning_rate": 1.835319207568819e-05, "loss": 0.438, "step": 19343 }, { "epoch": 3.1577078486592383, "grad_norm": 1.8974034786224365, "learning_rate": 1.8353017613053508e-05, "loss": 0.4579, "step": 19344 }, { "epoch": 3.1578711073017427, "grad_norm": 1.7961353063583374, "learning_rate": 1.8352843142007343e-05, "loss": 0.4421, "step": 19345 }, { "epoch": 3.158034365944247, "grad_norm": 1.9813495874404907, "learning_rate": 1.8352668662549863e-05, "loss": 0.3998, "step": 19346 }, { "epoch": 3.1581976245867516, "grad_norm": 2.050842523574829, "learning_rate": 1.8352494174681248e-05, "loss": 0.426, "step": 19347 }, { "epoch": 3.158360883229256, "grad_norm": 2.035122871398926, "learning_rate": 1.8352319678401677e-05, "loss": 0.5783, "step": 19348 }, { "epoch": 3.1585241418717604, "grad_norm": 2.0063464641571045, "learning_rate": 1.835214517371132e-05, "loss": 0.4805, "step": 19349 }, { "epoch": 3.158687400514265, "grad_norm": 2.2939789295196533, "learning_rate": 1.835197066061035e-05, "loss": 0.4675, "step": 19350 }, { "epoch": 3.1588506591567693, "grad_norm": 2.0906171798706055, "learning_rate": 1.8351796139098953e-05, "loss": 0.5614, "step": 19351 }, { "epoch": 3.1590139177992733, "grad_norm": 2.0543031692504883, "learning_rate": 1.83516216091773e-05, "loss": 0.5093, "step": 19352 }, { "epoch": 3.1591771764417778, "grad_norm": 1.7534745931625366, "learning_rate": 1.8351447070845565e-05, "loss": 0.3829, "step": 19353 }, { "epoch": 3.159340435084282, "grad_norm": 2.091531991958618, "learning_rate": 1.8351272524103928e-05, "loss": 0.5027, "step": 19354 }, { "epoch": 3.1595036937267866, "grad_norm": 2.3798582553863525, "learning_rate": 1.835109796895256e-05, "loss": 0.4476, "step": 19355 }, { "epoch": 3.159666952369291, "grad_norm": 1.9583569765090942, "learning_rate": 1.8350923405391636e-05, "loss": 0.4805, "step": 19356 }, { "epoch": 3.1598302110117955, "grad_norm": 1.758935570716858, "learning_rate": 1.8350748833421336e-05, "loss": 0.429, "step": 19357 }, { "epoch": 3.1599934696543, "grad_norm": 1.7307875156402588, "learning_rate": 1.8350574253041836e-05, "loss": 0.4061, "step": 19358 }, { "epoch": 3.1601567282968044, "grad_norm": 1.9494054317474365, "learning_rate": 1.8350399664253307e-05, "loss": 0.4377, "step": 19359 }, { "epoch": 3.1603199869393084, "grad_norm": 1.8879218101501465, "learning_rate": 1.8350225067055927e-05, "loss": 0.4229, "step": 19360 }, { "epoch": 3.160483245581813, "grad_norm": 1.867623209953308, "learning_rate": 1.8350050461449874e-05, "loss": 0.4678, "step": 19361 }, { "epoch": 3.1606465042243173, "grad_norm": 1.6050113439559937, "learning_rate": 1.8349875847435322e-05, "loss": 0.3863, "step": 19362 }, { "epoch": 3.1608097628668217, "grad_norm": 1.8245062828063965, "learning_rate": 1.834970122501245e-05, "loss": 0.4506, "step": 19363 }, { "epoch": 3.160973021509326, "grad_norm": 1.7713088989257812, "learning_rate": 1.8349526594181428e-05, "loss": 0.4477, "step": 19364 }, { "epoch": 3.1611362801518306, "grad_norm": 2.07456636428833, "learning_rate": 1.8349351954942438e-05, "loss": 0.5156, "step": 19365 }, { "epoch": 3.161299538794335, "grad_norm": 2.008727550506592, "learning_rate": 1.834917730729565e-05, "loss": 0.492, "step": 19366 }, { "epoch": 3.1614627974368394, "grad_norm": 1.812483310699463, "learning_rate": 1.8349002651241243e-05, "loss": 0.4671, "step": 19367 }, { "epoch": 3.161626056079344, "grad_norm": 1.8250105381011963, "learning_rate": 1.8348827986779394e-05, "loss": 0.4703, "step": 19368 }, { "epoch": 3.1617893147218483, "grad_norm": 1.9629286527633667, "learning_rate": 1.8348653313910276e-05, "loss": 0.4598, "step": 19369 }, { "epoch": 3.1619525733643523, "grad_norm": 1.9102827310562134, "learning_rate": 1.8348478632634067e-05, "loss": 0.487, "step": 19370 }, { "epoch": 3.1621158320068568, "grad_norm": 2.093076467514038, "learning_rate": 1.834830394295094e-05, "loss": 0.4824, "step": 19371 }, { "epoch": 3.162279090649361, "grad_norm": 1.7708357572555542, "learning_rate": 1.8348129244861078e-05, "loss": 0.4209, "step": 19372 }, { "epoch": 3.1624423492918656, "grad_norm": 2.405855655670166, "learning_rate": 1.8347954538364646e-05, "loss": 0.4938, "step": 19373 }, { "epoch": 3.16260560793437, "grad_norm": 2.4030673503875732, "learning_rate": 1.834777982346183e-05, "loss": 0.4744, "step": 19374 }, { "epoch": 3.1627688665768745, "grad_norm": 1.9872937202453613, "learning_rate": 1.8347605100152804e-05, "loss": 0.5006, "step": 19375 }, { "epoch": 3.162932125219379, "grad_norm": 2.140369176864624, "learning_rate": 1.8347430368437738e-05, "loss": 0.5117, "step": 19376 }, { "epoch": 3.1630953838618834, "grad_norm": 2.0262787342071533, "learning_rate": 1.8347255628316812e-05, "loss": 0.4884, "step": 19377 }, { "epoch": 3.1632586425043874, "grad_norm": 1.948663353919983, "learning_rate": 1.8347080879790203e-05, "loss": 0.4746, "step": 19378 }, { "epoch": 3.163421901146892, "grad_norm": 1.9948992729187012, "learning_rate": 1.8346906122858085e-05, "loss": 0.4613, "step": 19379 }, { "epoch": 3.1635851597893963, "grad_norm": 2.0289862155914307, "learning_rate": 1.8346731357520637e-05, "loss": 0.4789, "step": 19380 }, { "epoch": 3.1637484184319007, "grad_norm": 1.9380401372909546, "learning_rate": 1.8346556583778032e-05, "loss": 0.4383, "step": 19381 }, { "epoch": 3.163911677074405, "grad_norm": 2.375171422958374, "learning_rate": 1.8346381801630446e-05, "loss": 0.5323, "step": 19382 }, { "epoch": 3.1640749357169096, "grad_norm": 2.0791704654693604, "learning_rate": 1.8346207011078056e-05, "loss": 0.5354, "step": 19383 }, { "epoch": 3.164238194359414, "grad_norm": 1.989286184310913, "learning_rate": 1.8346032212121036e-05, "loss": 0.4513, "step": 19384 }, { "epoch": 3.1644014530019184, "grad_norm": 2.0222158432006836, "learning_rate": 1.8345857404759565e-05, "loss": 0.4518, "step": 19385 }, { "epoch": 3.164564711644423, "grad_norm": 1.9334638118743896, "learning_rate": 1.8345682588993816e-05, "loss": 0.427, "step": 19386 }, { "epoch": 3.164727970286927, "grad_norm": 2.006775379180908, "learning_rate": 1.834550776482397e-05, "loss": 0.5372, "step": 19387 }, { "epoch": 3.1648912289294313, "grad_norm": 1.694175124168396, "learning_rate": 1.83453329322502e-05, "loss": 0.4481, "step": 19388 }, { "epoch": 3.1650544875719357, "grad_norm": 1.7279059886932373, "learning_rate": 1.834515809127268e-05, "loss": 0.4219, "step": 19389 }, { "epoch": 3.16521774621444, "grad_norm": 1.769660234451294, "learning_rate": 1.8344983241891588e-05, "loss": 0.4353, "step": 19390 }, { "epoch": 3.1653810048569446, "grad_norm": 2.053053140640259, "learning_rate": 1.83448083841071e-05, "loss": 0.527, "step": 19391 }, { "epoch": 3.165544263499449, "grad_norm": 1.9787068367004395, "learning_rate": 1.834463351791939e-05, "loss": 0.4029, "step": 19392 }, { "epoch": 3.1657075221419535, "grad_norm": 2.4926156997680664, "learning_rate": 1.834445864332864e-05, "loss": 0.5314, "step": 19393 }, { "epoch": 3.165870780784458, "grad_norm": 2.2229695320129395, "learning_rate": 1.8344283760335022e-05, "loss": 0.5468, "step": 19394 }, { "epoch": 3.166034039426962, "grad_norm": 1.8993566036224365, "learning_rate": 1.8344108868938715e-05, "loss": 0.546, "step": 19395 }, { "epoch": 3.1661972980694664, "grad_norm": 1.824479341506958, "learning_rate": 1.8343933969139888e-05, "loss": 0.4781, "step": 19396 }, { "epoch": 3.166360556711971, "grad_norm": 2.098909854888916, "learning_rate": 1.834375906093872e-05, "loss": 0.4905, "step": 19397 }, { "epoch": 3.1665238153544752, "grad_norm": 2.050670623779297, "learning_rate": 1.8343584144335395e-05, "loss": 0.4797, "step": 19398 }, { "epoch": 3.1666870739969797, "grad_norm": 1.9465349912643433, "learning_rate": 1.834340921933008e-05, "loss": 0.5158, "step": 19399 }, { "epoch": 3.166850332639484, "grad_norm": 1.7363545894622803, "learning_rate": 1.8343234285922955e-05, "loss": 0.4277, "step": 19400 }, { "epoch": 3.1670135912819886, "grad_norm": 1.7218409776687622, "learning_rate": 1.834305934411419e-05, "loss": 0.4401, "step": 19401 }, { "epoch": 3.167176849924493, "grad_norm": 1.4308303594589233, "learning_rate": 1.8342884393903975e-05, "loss": 0.3443, "step": 19402 }, { "epoch": 3.1673401085669974, "grad_norm": 1.7513666152954102, "learning_rate": 1.8342709435292476e-05, "loss": 0.4643, "step": 19403 }, { "epoch": 3.167503367209502, "grad_norm": 2.188016176223755, "learning_rate": 1.8342534468279863e-05, "loss": 0.5133, "step": 19404 }, { "epoch": 3.167666625852006, "grad_norm": 1.957112431526184, "learning_rate": 1.8342359492866327e-05, "loss": 0.4853, "step": 19405 }, { "epoch": 3.1678298844945103, "grad_norm": 1.801254153251648, "learning_rate": 1.8342184509052034e-05, "loss": 0.5029, "step": 19406 }, { "epoch": 3.1679931431370147, "grad_norm": 1.875631332397461, "learning_rate": 1.8342009516837166e-05, "loss": 0.5092, "step": 19407 }, { "epoch": 3.168156401779519, "grad_norm": 1.6301218271255493, "learning_rate": 1.8341834516221896e-05, "loss": 0.3862, "step": 19408 }, { "epoch": 3.1683196604220236, "grad_norm": 2.2237656116485596, "learning_rate": 1.83416595072064e-05, "loss": 0.5631, "step": 19409 }, { "epoch": 3.168482919064528, "grad_norm": 1.911882996559143, "learning_rate": 1.8341484489790856e-05, "loss": 0.4739, "step": 19410 }, { "epoch": 3.1686461777070325, "grad_norm": 2.035968065261841, "learning_rate": 1.8341309463975437e-05, "loss": 0.479, "step": 19411 }, { "epoch": 3.168809436349537, "grad_norm": 2.013012409210205, "learning_rate": 1.8341134429760322e-05, "loss": 0.4654, "step": 19412 }, { "epoch": 3.168972694992041, "grad_norm": 2.054798126220703, "learning_rate": 1.8340959387145693e-05, "loss": 0.4669, "step": 19413 }, { "epoch": 3.1691359536345454, "grad_norm": 1.959506630897522, "learning_rate": 1.8340784336131715e-05, "loss": 0.4978, "step": 19414 }, { "epoch": 3.16929921227705, "grad_norm": 2.094292163848877, "learning_rate": 1.834060927671857e-05, "loss": 0.5704, "step": 19415 }, { "epoch": 3.1694624709195542, "grad_norm": 1.8116670846939087, "learning_rate": 1.8340434208906434e-05, "loss": 0.4186, "step": 19416 }, { "epoch": 3.1696257295620587, "grad_norm": 1.7341760396957397, "learning_rate": 1.8340259132695483e-05, "loss": 0.4461, "step": 19417 }, { "epoch": 3.169788988204563, "grad_norm": 1.9142746925354004, "learning_rate": 1.8340084048085888e-05, "loss": 0.587, "step": 19418 }, { "epoch": 3.1699522468470676, "grad_norm": 2.0725996494293213, "learning_rate": 1.8339908955077836e-05, "loss": 0.4087, "step": 19419 }, { "epoch": 3.170115505489572, "grad_norm": 1.709054946899414, "learning_rate": 1.8339733853671497e-05, "loss": 0.4193, "step": 19420 }, { "epoch": 3.1702787641320764, "grad_norm": 1.9195125102996826, "learning_rate": 1.833955874386705e-05, "loss": 0.462, "step": 19421 }, { "epoch": 3.1704420227745804, "grad_norm": 1.7478309869766235, "learning_rate": 1.8339383625664667e-05, "loss": 0.4512, "step": 19422 }, { "epoch": 3.170605281417085, "grad_norm": 2.0806820392608643, "learning_rate": 1.833920849906453e-05, "loss": 0.4162, "step": 19423 }, { "epoch": 3.1707685400595893, "grad_norm": 1.8339508771896362, "learning_rate": 1.8339033364066808e-05, "loss": 0.4163, "step": 19424 }, { "epoch": 3.1709317987020937, "grad_norm": 2.22298526763916, "learning_rate": 1.8338858220671683e-05, "loss": 0.5977, "step": 19425 }, { "epoch": 3.171095057344598, "grad_norm": 1.776863932609558, "learning_rate": 1.833868306887933e-05, "loss": 0.4193, "step": 19426 }, { "epoch": 3.1712583159871026, "grad_norm": 1.916347861289978, "learning_rate": 1.8338507908689925e-05, "loss": 0.4338, "step": 19427 }, { "epoch": 3.171421574629607, "grad_norm": 2.3009960651397705, "learning_rate": 1.8338332740103648e-05, "loss": 0.5209, "step": 19428 }, { "epoch": 3.1715848332721115, "grad_norm": 1.7458263635635376, "learning_rate": 1.833815756312067e-05, "loss": 0.4449, "step": 19429 }, { "epoch": 3.171748091914616, "grad_norm": 1.730692744255066, "learning_rate": 1.8337982377741167e-05, "loss": 0.4525, "step": 19430 }, { "epoch": 3.17191135055712, "grad_norm": 2.0963997840881348, "learning_rate": 1.8337807183965322e-05, "loss": 0.5663, "step": 19431 }, { "epoch": 3.1720746091996244, "grad_norm": 1.4782755374908447, "learning_rate": 1.8337631981793308e-05, "loss": 0.3942, "step": 19432 }, { "epoch": 3.172237867842129, "grad_norm": 1.969273567199707, "learning_rate": 1.8337456771225296e-05, "loss": 0.4845, "step": 19433 }, { "epoch": 3.1724011264846332, "grad_norm": 1.8532772064208984, "learning_rate": 1.833728155226147e-05, "loss": 0.4689, "step": 19434 }, { "epoch": 3.1725643851271377, "grad_norm": 2.19911527633667, "learning_rate": 1.8337106324902002e-05, "loss": 0.5301, "step": 19435 }, { "epoch": 3.172727643769642, "grad_norm": 2.0296473503112793, "learning_rate": 1.8336931089147076e-05, "loss": 0.4283, "step": 19436 }, { "epoch": 3.1728909024121466, "grad_norm": 2.0314407348632812, "learning_rate": 1.8336755844996857e-05, "loss": 0.4577, "step": 19437 }, { "epoch": 3.173054161054651, "grad_norm": 1.9533050060272217, "learning_rate": 1.833658059245153e-05, "loss": 0.4761, "step": 19438 }, { "epoch": 3.1732174196971554, "grad_norm": 2.100487232208252, "learning_rate": 1.8336405331511263e-05, "loss": 0.4898, "step": 19439 }, { "epoch": 3.1733806783396594, "grad_norm": 1.3662142753601074, "learning_rate": 1.8336230062176245e-05, "loss": 0.3774, "step": 19440 }, { "epoch": 3.173543936982164, "grad_norm": 1.8979792594909668, "learning_rate": 1.833605478444664e-05, "loss": 0.4527, "step": 19441 }, { "epoch": 3.1737071956246683, "grad_norm": 1.8443915843963623, "learning_rate": 1.8335879498322633e-05, "loss": 0.3716, "step": 19442 }, { "epoch": 3.1738704542671727, "grad_norm": 2.0308098793029785, "learning_rate": 1.83357042038044e-05, "loss": 0.4476, "step": 19443 }, { "epoch": 3.174033712909677, "grad_norm": 2.054004192352295, "learning_rate": 1.833552890089211e-05, "loss": 0.4554, "step": 19444 }, { "epoch": 3.1741969715521816, "grad_norm": 2.174063205718994, "learning_rate": 1.8335353589585945e-05, "loss": 0.4951, "step": 19445 }, { "epoch": 3.174360230194686, "grad_norm": 2.3536453247070312, "learning_rate": 1.8335178269886086e-05, "loss": 0.5772, "step": 19446 }, { "epoch": 3.1745234888371905, "grad_norm": 1.9505541324615479, "learning_rate": 1.83350029417927e-05, "loss": 0.5203, "step": 19447 }, { "epoch": 3.1746867474796945, "grad_norm": 2.2560369968414307, "learning_rate": 1.833482760530597e-05, "loss": 0.6076, "step": 19448 }, { "epoch": 3.174850006122199, "grad_norm": 1.7788325548171997, "learning_rate": 1.8334652260426068e-05, "loss": 0.4625, "step": 19449 }, { "epoch": 3.1750132647647034, "grad_norm": 1.842991590499878, "learning_rate": 1.8334476907153177e-05, "loss": 0.3963, "step": 19450 }, { "epoch": 3.175176523407208, "grad_norm": 2.0132415294647217, "learning_rate": 1.833430154548747e-05, "loss": 0.4696, "step": 19451 }, { "epoch": 3.1753397820497122, "grad_norm": 1.7360085248947144, "learning_rate": 1.8334126175429125e-05, "loss": 0.39, "step": 19452 }, { "epoch": 3.1755030406922167, "grad_norm": 1.7899138927459717, "learning_rate": 1.8333950796978312e-05, "loss": 0.4272, "step": 19453 }, { "epoch": 3.175666299334721, "grad_norm": 2.2138829231262207, "learning_rate": 1.833377541013522e-05, "loss": 0.4903, "step": 19454 }, { "epoch": 3.1758295579772255, "grad_norm": 2.0732901096343994, "learning_rate": 1.833360001490001e-05, "loss": 0.4976, "step": 19455 }, { "epoch": 3.17599281661973, "grad_norm": 2.004549264907837, "learning_rate": 1.833342461127287e-05, "loss": 0.4824, "step": 19456 }, { "epoch": 3.1761560752622344, "grad_norm": 1.7096878290176392, "learning_rate": 1.8333249199253974e-05, "loss": 0.4265, "step": 19457 }, { "epoch": 3.1763193339047384, "grad_norm": 1.686898112297058, "learning_rate": 1.83330737788435e-05, "loss": 0.394, "step": 19458 }, { "epoch": 3.176482592547243, "grad_norm": 1.7556111812591553, "learning_rate": 1.833289835004162e-05, "loss": 0.4616, "step": 19459 }, { "epoch": 3.1766458511897473, "grad_norm": 1.6305060386657715, "learning_rate": 1.8332722912848513e-05, "loss": 0.3793, "step": 19460 }, { "epoch": 3.1768091098322517, "grad_norm": 1.7666574716567993, "learning_rate": 1.833254746726436e-05, "loss": 0.4028, "step": 19461 }, { "epoch": 3.176972368474756, "grad_norm": 2.3226239681243896, "learning_rate": 1.8332372013289335e-05, "loss": 0.542, "step": 19462 }, { "epoch": 3.1771356271172606, "grad_norm": 2.1562864780426025, "learning_rate": 1.833219655092361e-05, "loss": 0.503, "step": 19463 }, { "epoch": 3.177298885759765, "grad_norm": 1.6368584632873535, "learning_rate": 1.8332021080167366e-05, "loss": 0.4203, "step": 19464 }, { "epoch": 3.1774621444022695, "grad_norm": 1.726203203201294, "learning_rate": 1.833184560102078e-05, "loss": 0.4656, "step": 19465 }, { "epoch": 3.1776254030447735, "grad_norm": 1.5525686740875244, "learning_rate": 1.8331670113484025e-05, "loss": 0.3821, "step": 19466 }, { "epoch": 3.177788661687278, "grad_norm": 2.148404836654663, "learning_rate": 1.8331494617557284e-05, "loss": 0.5092, "step": 19467 }, { "epoch": 3.1779519203297824, "grad_norm": 1.8498390913009644, "learning_rate": 1.833131911324073e-05, "loss": 0.4472, "step": 19468 }, { "epoch": 3.178115178972287, "grad_norm": 1.7343631982803345, "learning_rate": 1.8331143600534534e-05, "loss": 0.4446, "step": 19469 }, { "epoch": 3.1782784376147912, "grad_norm": 1.6653110980987549, "learning_rate": 1.8330968079438887e-05, "loss": 0.4357, "step": 19470 }, { "epoch": 3.1784416962572957, "grad_norm": 2.0536370277404785, "learning_rate": 1.833079254995395e-05, "loss": 0.447, "step": 19471 }, { "epoch": 3.1786049548998, "grad_norm": 1.8495733737945557, "learning_rate": 1.8330617012079913e-05, "loss": 0.5094, "step": 19472 }, { "epoch": 3.1787682135423045, "grad_norm": 1.853529930114746, "learning_rate": 1.8330441465816942e-05, "loss": 0.4171, "step": 19473 }, { "epoch": 3.178931472184809, "grad_norm": 2.032954454421997, "learning_rate": 1.833026591116522e-05, "loss": 0.536, "step": 19474 }, { "epoch": 3.179094730827313, "grad_norm": 1.8326293230056763, "learning_rate": 1.8330090348124926e-05, "loss": 0.4567, "step": 19475 }, { "epoch": 3.1792579894698174, "grad_norm": 1.8864691257476807, "learning_rate": 1.832991477669623e-05, "loss": 0.5495, "step": 19476 }, { "epoch": 3.179421248112322, "grad_norm": 1.8669357299804688, "learning_rate": 1.8329739196879312e-05, "loss": 0.4421, "step": 19477 }, { "epoch": 3.1795845067548263, "grad_norm": 2.1007907390594482, "learning_rate": 1.832956360867435e-05, "loss": 0.4499, "step": 19478 }, { "epoch": 3.1797477653973307, "grad_norm": 2.15738844871521, "learning_rate": 1.832938801208152e-05, "loss": 0.5399, "step": 19479 }, { "epoch": 3.179911024039835, "grad_norm": 1.8570142984390259, "learning_rate": 1.8329212407100996e-05, "loss": 0.4516, "step": 19480 }, { "epoch": 3.1800742826823396, "grad_norm": 1.9413913488388062, "learning_rate": 1.832903679373296e-05, "loss": 0.4317, "step": 19481 }, { "epoch": 3.180237541324844, "grad_norm": 1.5999428033828735, "learning_rate": 1.8328861171977585e-05, "loss": 0.4162, "step": 19482 }, { "epoch": 3.180400799967348, "grad_norm": 2.2204248905181885, "learning_rate": 1.832868554183505e-05, "loss": 0.5326, "step": 19483 }, { "epoch": 3.1805640586098525, "grad_norm": 2.2970845699310303, "learning_rate": 1.832850990330553e-05, "loss": 0.5384, "step": 19484 }, { "epoch": 3.180727317252357, "grad_norm": 1.6347428560256958, "learning_rate": 1.83283342563892e-05, "loss": 0.4168, "step": 19485 }, { "epoch": 3.1808905758948613, "grad_norm": 1.9419910907745361, "learning_rate": 1.8328158601086242e-05, "loss": 0.5134, "step": 19486 }, { "epoch": 3.181053834537366, "grad_norm": 1.5779368877410889, "learning_rate": 1.8327982937396833e-05, "loss": 0.3957, "step": 19487 }, { "epoch": 3.1812170931798702, "grad_norm": 1.8582366704940796, "learning_rate": 1.8327807265321145e-05, "loss": 0.4905, "step": 19488 }, { "epoch": 3.1813803518223747, "grad_norm": 1.5388875007629395, "learning_rate": 1.8327631584859355e-05, "loss": 0.4332, "step": 19489 }, { "epoch": 3.181543610464879, "grad_norm": 1.710969090461731, "learning_rate": 1.8327455896011645e-05, "loss": 0.4068, "step": 19490 }, { "epoch": 3.1817068691073835, "grad_norm": 2.0014355182647705, "learning_rate": 1.832728019877819e-05, "loss": 0.5252, "step": 19491 }, { "epoch": 3.181870127749888, "grad_norm": 1.685555100440979, "learning_rate": 1.8327104493159167e-05, "loss": 0.38, "step": 19492 }, { "epoch": 3.182033386392392, "grad_norm": 1.9415581226348877, "learning_rate": 1.8326928779154748e-05, "loss": 0.4434, "step": 19493 }, { "epoch": 3.1821966450348964, "grad_norm": 1.9431551694869995, "learning_rate": 1.8326753056765113e-05, "loss": 0.4571, "step": 19494 }, { "epoch": 3.182359903677401, "grad_norm": 2.0218188762664795, "learning_rate": 1.8326577325990444e-05, "loss": 0.4441, "step": 19495 }, { "epoch": 3.1825231623199053, "grad_norm": 1.966469645500183, "learning_rate": 1.832640158683091e-05, "loss": 0.4783, "step": 19496 }, { "epoch": 3.1826864209624097, "grad_norm": 1.6297578811645508, "learning_rate": 1.8326225839286698e-05, "loss": 0.3948, "step": 19497 }, { "epoch": 3.182849679604914, "grad_norm": 1.9749455451965332, "learning_rate": 1.8326050083357977e-05, "loss": 0.4967, "step": 19498 }, { "epoch": 3.1830129382474186, "grad_norm": 1.7173680067062378, "learning_rate": 1.832587431904492e-05, "loss": 0.4651, "step": 19499 }, { "epoch": 3.183176196889923, "grad_norm": 1.996874451637268, "learning_rate": 1.8325698546347714e-05, "loss": 0.477, "step": 19500 }, { "epoch": 3.183339455532427, "grad_norm": 2.2433865070343018, "learning_rate": 1.8325522765266532e-05, "loss": 0.5478, "step": 19501 }, { "epoch": 3.1835027141749315, "grad_norm": 1.8912630081176758, "learning_rate": 1.832534697580155e-05, "loss": 0.4732, "step": 19502 }, { "epoch": 3.183665972817436, "grad_norm": 2.1384878158569336, "learning_rate": 1.8325171177952948e-05, "loss": 0.5055, "step": 19503 }, { "epoch": 3.1838292314599403, "grad_norm": 1.8180205821990967, "learning_rate": 1.8324995371720898e-05, "loss": 0.4894, "step": 19504 }, { "epoch": 3.183992490102445, "grad_norm": 1.6100879907608032, "learning_rate": 1.832481955710558e-05, "loss": 0.4242, "step": 19505 }, { "epoch": 3.184155748744949, "grad_norm": 2.200056791305542, "learning_rate": 1.832464373410717e-05, "loss": 0.4414, "step": 19506 }, { "epoch": 3.1843190073874537, "grad_norm": 2.087191581726074, "learning_rate": 1.8324467902725848e-05, "loss": 0.4987, "step": 19507 }, { "epoch": 3.184482266029958, "grad_norm": 1.9070347547531128, "learning_rate": 1.832429206296179e-05, "loss": 0.49, "step": 19508 }, { "epoch": 3.1846455246724625, "grad_norm": 1.9199659824371338, "learning_rate": 1.8324116214815172e-05, "loss": 0.475, "step": 19509 }, { "epoch": 3.1848087833149665, "grad_norm": 2.3945484161376953, "learning_rate": 1.832394035828617e-05, "loss": 0.4879, "step": 19510 }, { "epoch": 3.184972041957471, "grad_norm": 1.7942672967910767, "learning_rate": 1.8323764493374964e-05, "loss": 0.443, "step": 19511 }, { "epoch": 3.1851353005999754, "grad_norm": 1.758232593536377, "learning_rate": 1.8323588620081723e-05, "loss": 0.4461, "step": 19512 }, { "epoch": 3.18529855924248, "grad_norm": 2.085678815841675, "learning_rate": 1.8323412738406638e-05, "loss": 0.5214, "step": 19513 }, { "epoch": 3.1854618178849843, "grad_norm": 1.8132766485214233, "learning_rate": 1.8323236848349873e-05, "loss": 0.4397, "step": 19514 }, { "epoch": 3.1856250765274887, "grad_norm": 1.7460663318634033, "learning_rate": 1.8323060949911612e-05, "loss": 0.4849, "step": 19515 }, { "epoch": 3.185788335169993, "grad_norm": 2.100801944732666, "learning_rate": 1.8322885043092035e-05, "loss": 0.5102, "step": 19516 }, { "epoch": 3.1859515938124976, "grad_norm": 1.6405383348464966, "learning_rate": 1.832270912789131e-05, "loss": 0.4022, "step": 19517 }, { "epoch": 3.186114852455002, "grad_norm": 2.1602718830108643, "learning_rate": 1.8322533204309622e-05, "loss": 0.6193, "step": 19518 }, { "epoch": 3.186278111097506, "grad_norm": 1.7573230266571045, "learning_rate": 1.8322357272347146e-05, "loss": 0.426, "step": 19519 }, { "epoch": 3.1864413697400105, "grad_norm": 1.9749364852905273, "learning_rate": 1.8322181332004057e-05, "loss": 0.493, "step": 19520 }, { "epoch": 3.186604628382515, "grad_norm": 1.8659029006958008, "learning_rate": 1.8322005383280534e-05, "loss": 0.4563, "step": 19521 }, { "epoch": 3.1867678870250193, "grad_norm": 2.1972219944000244, "learning_rate": 1.832182942617675e-05, "loss": 0.5257, "step": 19522 }, { "epoch": 3.186931145667524, "grad_norm": 1.6085740327835083, "learning_rate": 1.832165346069289e-05, "loss": 0.4155, "step": 19523 }, { "epoch": 3.187094404310028, "grad_norm": 2.3197073936462402, "learning_rate": 1.8321477486829128e-05, "loss": 0.5119, "step": 19524 }, { "epoch": 3.1872576629525327, "grad_norm": 1.645952820777893, "learning_rate": 1.8321301504585638e-05, "loss": 0.3581, "step": 19525 }, { "epoch": 3.187420921595037, "grad_norm": 1.7364463806152344, "learning_rate": 1.83211255139626e-05, "loss": 0.4272, "step": 19526 }, { "epoch": 3.1875841802375415, "grad_norm": 1.6503351926803589, "learning_rate": 1.8320949514960192e-05, "loss": 0.4427, "step": 19527 }, { "epoch": 3.1877474388800455, "grad_norm": 1.811378836631775, "learning_rate": 1.832077350757859e-05, "loss": 0.4378, "step": 19528 }, { "epoch": 3.18791069752255, "grad_norm": 1.887601613998413, "learning_rate": 1.832059749181797e-05, "loss": 0.4467, "step": 19529 }, { "epoch": 3.1880739561650544, "grad_norm": 1.6549791097640991, "learning_rate": 1.8320421467678507e-05, "loss": 0.3898, "step": 19530 }, { "epoch": 3.188237214807559, "grad_norm": 1.8880434036254883, "learning_rate": 1.832024543516039e-05, "loss": 0.5054, "step": 19531 }, { "epoch": 3.1884004734500633, "grad_norm": 1.8308242559432983, "learning_rate": 1.8320069394263784e-05, "loss": 0.4075, "step": 19532 }, { "epoch": 3.1885637320925677, "grad_norm": 2.060730457305908, "learning_rate": 1.8319893344988867e-05, "loss": 0.5441, "step": 19533 }, { "epoch": 3.188726990735072, "grad_norm": 2.0667738914489746, "learning_rate": 1.8319717287335822e-05, "loss": 0.4611, "step": 19534 }, { "epoch": 3.1888902493775766, "grad_norm": 2.1282100677490234, "learning_rate": 1.8319541221304825e-05, "loss": 0.5761, "step": 19535 }, { "epoch": 3.1890535080200806, "grad_norm": 2.2473654747009277, "learning_rate": 1.8319365146896056e-05, "loss": 0.5521, "step": 19536 }, { "epoch": 3.189216766662585, "grad_norm": 1.591321587562561, "learning_rate": 1.831918906410968e-05, "loss": 0.381, "step": 19537 }, { "epoch": 3.1893800253050895, "grad_norm": 2.2890560626983643, "learning_rate": 1.8319012972945887e-05, "loss": 0.4908, "step": 19538 }, { "epoch": 3.189543283947594, "grad_norm": 1.6805204153060913, "learning_rate": 1.8318836873404854e-05, "loss": 0.4955, "step": 19539 }, { "epoch": 3.1897065425900983, "grad_norm": 1.6428909301757812, "learning_rate": 1.831866076548675e-05, "loss": 0.3978, "step": 19540 }, { "epoch": 3.1898698012326028, "grad_norm": 1.9693000316619873, "learning_rate": 1.8318484649191757e-05, "loss": 0.3583, "step": 19541 }, { "epoch": 3.190033059875107, "grad_norm": 2.095877170562744, "learning_rate": 1.8318308524520052e-05, "loss": 0.5064, "step": 19542 }, { "epoch": 3.1901963185176117, "grad_norm": 1.9269578456878662, "learning_rate": 1.8318132391471815e-05, "loss": 0.4648, "step": 19543 }, { "epoch": 3.190359577160116, "grad_norm": 1.7647509574890137, "learning_rate": 1.831795625004722e-05, "loss": 0.4636, "step": 19544 }, { "epoch": 3.1905228358026205, "grad_norm": 1.8884388208389282, "learning_rate": 1.8317780100246442e-05, "loss": 0.4453, "step": 19545 }, { "epoch": 3.1906860944451245, "grad_norm": 1.9400248527526855, "learning_rate": 1.8317603942069665e-05, "loss": 0.406, "step": 19546 }, { "epoch": 3.190849353087629, "grad_norm": 2.1974709033966064, "learning_rate": 1.8317427775517063e-05, "loss": 0.583, "step": 19547 }, { "epoch": 3.1910126117301334, "grad_norm": 1.8757987022399902, "learning_rate": 1.8317251600588814e-05, "loss": 0.4075, "step": 19548 }, { "epoch": 3.191175870372638, "grad_norm": 2.389094352722168, "learning_rate": 1.8317075417285092e-05, "loss": 0.529, "step": 19549 }, { "epoch": 3.1913391290151423, "grad_norm": 2.011375665664673, "learning_rate": 1.8316899225606078e-05, "loss": 0.4638, "step": 19550 }, { "epoch": 3.1915023876576467, "grad_norm": 2.1239383220672607, "learning_rate": 1.831672302555195e-05, "loss": 0.5059, "step": 19551 }, { "epoch": 3.191665646300151, "grad_norm": 2.140509843826294, "learning_rate": 1.8316546817122885e-05, "loss": 0.5226, "step": 19552 }, { "epoch": 3.1918289049426556, "grad_norm": 2.5327160358428955, "learning_rate": 1.8316370600319054e-05, "loss": 0.4842, "step": 19553 }, { "epoch": 3.1919921635851596, "grad_norm": 2.1440703868865967, "learning_rate": 1.8316194375140646e-05, "loss": 0.4715, "step": 19554 }, { "epoch": 3.192155422227664, "grad_norm": 1.9341566562652588, "learning_rate": 1.8316018141587833e-05, "loss": 0.4911, "step": 19555 }, { "epoch": 3.1923186808701685, "grad_norm": 1.7734322547912598, "learning_rate": 1.831584189966079e-05, "loss": 0.46, "step": 19556 }, { "epoch": 3.192481939512673, "grad_norm": 2.2548458576202393, "learning_rate": 1.8315665649359692e-05, "loss": 0.4941, "step": 19557 }, { "epoch": 3.1926451981551773, "grad_norm": 2.13787841796875, "learning_rate": 1.8315489390684725e-05, "loss": 0.4697, "step": 19558 }, { "epoch": 3.1928084567976818, "grad_norm": 1.954977035522461, "learning_rate": 1.8315313123636063e-05, "loss": 0.4706, "step": 19559 }, { "epoch": 3.192971715440186, "grad_norm": 2.226811647415161, "learning_rate": 1.8315136848213883e-05, "loss": 0.4919, "step": 19560 }, { "epoch": 3.1931349740826906, "grad_norm": 1.9745374917984009, "learning_rate": 1.8314960564418362e-05, "loss": 0.4819, "step": 19561 }, { "epoch": 3.193298232725195, "grad_norm": 1.7666473388671875, "learning_rate": 1.8314784272249677e-05, "loss": 0.432, "step": 19562 }, { "epoch": 3.193461491367699, "grad_norm": 1.625012755393982, "learning_rate": 1.8314607971708006e-05, "loss": 0.4079, "step": 19563 }, { "epoch": 3.1936247500102035, "grad_norm": 1.8283253908157349, "learning_rate": 1.8314431662793527e-05, "loss": 0.4816, "step": 19564 }, { "epoch": 3.193788008652708, "grad_norm": 1.7323793172836304, "learning_rate": 1.831425534550642e-05, "loss": 0.434, "step": 19565 }, { "epoch": 3.1939512672952124, "grad_norm": 1.746498942375183, "learning_rate": 1.8314079019846857e-05, "loss": 0.4281, "step": 19566 }, { "epoch": 3.194114525937717, "grad_norm": 2.121636152267456, "learning_rate": 1.831390268581502e-05, "loss": 0.4876, "step": 19567 }, { "epoch": 3.1942777845802213, "grad_norm": 1.8976359367370605, "learning_rate": 1.8313726343411085e-05, "loss": 0.4613, "step": 19568 }, { "epoch": 3.1944410432227257, "grad_norm": 2.340796947479248, "learning_rate": 1.831354999263523e-05, "loss": 0.6362, "step": 19569 }, { "epoch": 3.19460430186523, "grad_norm": 2.103839874267578, "learning_rate": 1.8313373633487633e-05, "loss": 0.4583, "step": 19570 }, { "epoch": 3.1947675605077346, "grad_norm": 1.7732237577438354, "learning_rate": 1.8313197265968472e-05, "loss": 0.4216, "step": 19571 }, { "epoch": 3.1949308191502386, "grad_norm": 2.10184907913208, "learning_rate": 1.8313020890077922e-05, "loss": 0.5817, "step": 19572 }, { "epoch": 3.195094077792743, "grad_norm": 1.9710017442703247, "learning_rate": 1.8312844505816162e-05, "loss": 0.4811, "step": 19573 }, { "epoch": 3.1952573364352475, "grad_norm": 2.1800854206085205, "learning_rate": 1.831266811318337e-05, "loss": 0.4888, "step": 19574 }, { "epoch": 3.195420595077752, "grad_norm": 2.368826389312744, "learning_rate": 1.8312491712179722e-05, "loss": 0.5054, "step": 19575 }, { "epoch": 3.1955838537202563, "grad_norm": 1.6319855451583862, "learning_rate": 1.83123153028054e-05, "loss": 0.3737, "step": 19576 }, { "epoch": 3.1957471123627608, "grad_norm": 1.7007611989974976, "learning_rate": 1.8312138885060577e-05, "loss": 0.3902, "step": 19577 }, { "epoch": 3.195910371005265, "grad_norm": 1.7965928316116333, "learning_rate": 1.8311962458945432e-05, "loss": 0.4303, "step": 19578 }, { "epoch": 3.1960736296477696, "grad_norm": 1.6361415386199951, "learning_rate": 1.8311786024460145e-05, "loss": 0.4016, "step": 19579 }, { "epoch": 3.196236888290274, "grad_norm": 1.7300794124603271, "learning_rate": 1.8311609581604887e-05, "loss": 0.4563, "step": 19580 }, { "epoch": 3.196400146932778, "grad_norm": 1.7035677433013916, "learning_rate": 1.8311433130379844e-05, "loss": 0.3803, "step": 19581 }, { "epoch": 3.1965634055752825, "grad_norm": 1.5410540103912354, "learning_rate": 1.831125667078519e-05, "loss": 0.3723, "step": 19582 }, { "epoch": 3.196726664217787, "grad_norm": 2.016362190246582, "learning_rate": 1.8311080202821104e-05, "loss": 0.4857, "step": 19583 }, { "epoch": 3.1968899228602914, "grad_norm": 2.1865170001983643, "learning_rate": 1.8310903726487758e-05, "loss": 0.5285, "step": 19584 }, { "epoch": 3.197053181502796, "grad_norm": 2.0166268348693848, "learning_rate": 1.831072724178534e-05, "loss": 0.4948, "step": 19585 }, { "epoch": 3.1972164401453003, "grad_norm": 2.032808303833008, "learning_rate": 1.831055074871402e-05, "loss": 0.5913, "step": 19586 }, { "epoch": 3.1973796987878047, "grad_norm": 2.3696446418762207, "learning_rate": 1.831037424727397e-05, "loss": 0.5123, "step": 19587 }, { "epoch": 3.197542957430309, "grad_norm": 1.4204304218292236, "learning_rate": 1.8310197737465385e-05, "loss": 0.3676, "step": 19588 }, { "epoch": 3.197706216072813, "grad_norm": 1.952985405921936, "learning_rate": 1.831002121928843e-05, "loss": 0.4398, "step": 19589 }, { "epoch": 3.1978694747153176, "grad_norm": 1.892081618309021, "learning_rate": 1.8309844692743283e-05, "loss": 0.5001, "step": 19590 }, { "epoch": 3.198032733357822, "grad_norm": 2.40335750579834, "learning_rate": 1.830966815783013e-05, "loss": 0.4961, "step": 19591 }, { "epoch": 3.1981959920003264, "grad_norm": 1.836540699005127, "learning_rate": 1.830949161454914e-05, "loss": 0.3972, "step": 19592 }, { "epoch": 3.198359250642831, "grad_norm": 2.152526617050171, "learning_rate": 1.8309315062900493e-05, "loss": 0.5108, "step": 19593 }, { "epoch": 3.1985225092853353, "grad_norm": 2.115694046020508, "learning_rate": 1.830913850288437e-05, "loss": 0.4778, "step": 19594 }, { "epoch": 3.1986857679278398, "grad_norm": 2.135572910308838, "learning_rate": 1.8308961934500948e-05, "loss": 0.5462, "step": 19595 }, { "epoch": 3.198849026570344, "grad_norm": 2.3206892013549805, "learning_rate": 1.8308785357750402e-05, "loss": 0.5042, "step": 19596 }, { "epoch": 3.1990122852128486, "grad_norm": 2.015599012374878, "learning_rate": 1.830860877263291e-05, "loss": 0.4511, "step": 19597 }, { "epoch": 3.199175543855353, "grad_norm": 2.0190742015838623, "learning_rate": 1.830843217914865e-05, "loss": 0.4611, "step": 19598 }, { "epoch": 3.199338802497857, "grad_norm": 2.409735918045044, "learning_rate": 1.8308255577297808e-05, "loss": 0.5156, "step": 19599 }, { "epoch": 3.1995020611403615, "grad_norm": 1.8327524662017822, "learning_rate": 1.8308078967080547e-05, "loss": 0.4334, "step": 19600 }, { "epoch": 3.199665319782866, "grad_norm": 2.0062079429626465, "learning_rate": 1.8307902348497056e-05, "loss": 0.4888, "step": 19601 }, { "epoch": 3.1998285784253704, "grad_norm": 1.6429908275604248, "learning_rate": 1.830772572154751e-05, "loss": 0.4028, "step": 19602 }, { "epoch": 3.199991837067875, "grad_norm": 2.0927200317382812, "learning_rate": 1.8307549086232085e-05, "loss": 0.4997, "step": 19603 }, { "epoch": 3.2001550957103793, "grad_norm": 1.7307814359664917, "learning_rate": 1.830737244255096e-05, "loss": 0.3514, "step": 19604 }, { "epoch": 3.2003183543528837, "grad_norm": 1.7446619272232056, "learning_rate": 1.8307195790504316e-05, "loss": 0.4583, "step": 19605 }, { "epoch": 3.200481612995388, "grad_norm": 1.620684027671814, "learning_rate": 1.8307019130092326e-05, "loss": 0.4172, "step": 19606 }, { "epoch": 3.200644871637892, "grad_norm": 2.267345428466797, "learning_rate": 1.830684246131517e-05, "loss": 0.6104, "step": 19607 }, { "epoch": 3.2008081302803966, "grad_norm": 1.794154405593872, "learning_rate": 1.8306665784173026e-05, "loss": 0.4777, "step": 19608 }, { "epoch": 3.200971388922901, "grad_norm": 1.871884822845459, "learning_rate": 1.8306489098666072e-05, "loss": 0.4945, "step": 19609 }, { "epoch": 3.2011346475654054, "grad_norm": 1.8022282123565674, "learning_rate": 1.8306312404794485e-05, "loss": 0.4907, "step": 19610 }, { "epoch": 3.20129790620791, "grad_norm": 2.0368547439575195, "learning_rate": 1.8306135702558444e-05, "loss": 0.4317, "step": 19611 }, { "epoch": 3.2014611648504143, "grad_norm": 2.0825605392456055, "learning_rate": 1.830595899195813e-05, "loss": 0.5336, "step": 19612 }, { "epoch": 3.2016244234929188, "grad_norm": 1.2495461702346802, "learning_rate": 1.8305782272993712e-05, "loss": 0.3391, "step": 19613 }, { "epoch": 3.201787682135423, "grad_norm": 1.90328848361969, "learning_rate": 1.8305605545665374e-05, "loss": 0.5042, "step": 19614 }, { "epoch": 3.2019509407779276, "grad_norm": 1.9784331321716309, "learning_rate": 1.8305428809973297e-05, "loss": 0.5026, "step": 19615 }, { "epoch": 3.2021141994204316, "grad_norm": 2.024881601333618, "learning_rate": 1.8305252065917653e-05, "loss": 0.4896, "step": 19616 }, { "epoch": 3.202277458062936, "grad_norm": 2.0466530323028564, "learning_rate": 1.8305075313498624e-05, "loss": 0.5007, "step": 19617 }, { "epoch": 3.2024407167054405, "grad_norm": 1.919000267982483, "learning_rate": 1.8304898552716384e-05, "loss": 0.5346, "step": 19618 }, { "epoch": 3.202603975347945, "grad_norm": 1.8420442342758179, "learning_rate": 1.8304721783571116e-05, "loss": 0.4766, "step": 19619 }, { "epoch": 3.2027672339904494, "grad_norm": 1.9834940433502197, "learning_rate": 1.830454500606299e-05, "loss": 0.4646, "step": 19620 }, { "epoch": 3.202930492632954, "grad_norm": 1.5404096841812134, "learning_rate": 1.8304368220192198e-05, "loss": 0.3539, "step": 19621 }, { "epoch": 3.2030937512754583, "grad_norm": 1.607448935508728, "learning_rate": 1.8304191425958905e-05, "loss": 0.3864, "step": 19622 }, { "epoch": 3.2032570099179627, "grad_norm": 1.7967172861099243, "learning_rate": 1.830401462336329e-05, "loss": 0.5477, "step": 19623 }, { "epoch": 3.2034202685604667, "grad_norm": 2.363156318664551, "learning_rate": 1.830383781240554e-05, "loss": 0.5192, "step": 19624 }, { "epoch": 3.203583527202971, "grad_norm": 1.5355092287063599, "learning_rate": 1.8303660993085825e-05, "loss": 0.4007, "step": 19625 }, { "epoch": 3.2037467858454756, "grad_norm": 2.018120765686035, "learning_rate": 1.8303484165404323e-05, "loss": 0.5341, "step": 19626 }, { "epoch": 3.20391004448798, "grad_norm": 1.8691537380218506, "learning_rate": 1.8303307329361217e-05, "loss": 0.4226, "step": 19627 }, { "epoch": 3.2040733031304844, "grad_norm": 1.774322271347046, "learning_rate": 1.8303130484956682e-05, "loss": 0.496, "step": 19628 }, { "epoch": 3.204236561772989, "grad_norm": 1.967867374420166, "learning_rate": 1.83029536321909e-05, "loss": 0.4552, "step": 19629 }, { "epoch": 3.2043998204154933, "grad_norm": 1.6074422597885132, "learning_rate": 1.8302776771064044e-05, "loss": 0.3821, "step": 19630 }, { "epoch": 3.2045630790579978, "grad_norm": 1.8645256757736206, "learning_rate": 1.8302599901576296e-05, "loss": 0.4787, "step": 19631 }, { "epoch": 3.204726337700502, "grad_norm": 1.9750136137008667, "learning_rate": 1.8302423023727828e-05, "loss": 0.5011, "step": 19632 }, { "epoch": 3.2048895963430066, "grad_norm": 1.8905576467514038, "learning_rate": 1.8302246137518823e-05, "loss": 0.5192, "step": 19633 }, { "epoch": 3.2050528549855106, "grad_norm": 1.7879860401153564, "learning_rate": 1.830206924294946e-05, "loss": 0.3874, "step": 19634 }, { "epoch": 3.205216113628015, "grad_norm": 1.8421549797058105, "learning_rate": 1.8301892340019916e-05, "loss": 0.5504, "step": 19635 }, { "epoch": 3.2053793722705195, "grad_norm": 1.7435662746429443, "learning_rate": 1.8301715428730367e-05, "loss": 0.4015, "step": 19636 }, { "epoch": 3.205542630913024, "grad_norm": 2.0909814834594727, "learning_rate": 1.8301538509080992e-05, "loss": 0.4606, "step": 19637 }, { "epoch": 3.2057058895555284, "grad_norm": 2.08389949798584, "learning_rate": 1.830136158107197e-05, "loss": 0.4943, "step": 19638 }, { "epoch": 3.205869148198033, "grad_norm": 1.5654551982879639, "learning_rate": 1.830118464470348e-05, "loss": 0.3988, "step": 19639 }, { "epoch": 3.2060324068405373, "grad_norm": 2.013561964035034, "learning_rate": 1.8301007699975704e-05, "loss": 0.5066, "step": 19640 }, { "epoch": 3.2061956654830417, "grad_norm": 1.8626009225845337, "learning_rate": 1.8300830746888813e-05, "loss": 0.4459, "step": 19641 }, { "epoch": 3.2063589241255457, "grad_norm": 1.7621209621429443, "learning_rate": 1.830065378544298e-05, "loss": 0.4215, "step": 19642 }, { "epoch": 3.20652218276805, "grad_norm": 2.053036689758301, "learning_rate": 1.83004768156384e-05, "loss": 0.434, "step": 19643 }, { "epoch": 3.2066854414105546, "grad_norm": 2.0912325382232666, "learning_rate": 1.8300299837475236e-05, "loss": 0.5095, "step": 19644 }, { "epoch": 3.206848700053059, "grad_norm": 2.413562059402466, "learning_rate": 1.8300122850953678e-05, "loss": 0.5048, "step": 19645 }, { "epoch": 3.2070119586955634, "grad_norm": 1.9266762733459473, "learning_rate": 1.8299945856073896e-05, "loss": 0.488, "step": 19646 }, { "epoch": 3.207175217338068, "grad_norm": 1.9684224128723145, "learning_rate": 1.8299768852836068e-05, "loss": 0.4615, "step": 19647 }, { "epoch": 3.2073384759805723, "grad_norm": 1.912516713142395, "learning_rate": 1.8299591841240376e-05, "loss": 0.4414, "step": 19648 }, { "epoch": 3.2075017346230768, "grad_norm": 2.0965850353240967, "learning_rate": 1.8299414821287e-05, "loss": 0.4823, "step": 19649 }, { "epoch": 3.207664993265581, "grad_norm": 1.710727572441101, "learning_rate": 1.829923779297611e-05, "loss": 0.4052, "step": 19650 }, { "epoch": 3.207828251908085, "grad_norm": 1.8523346185684204, "learning_rate": 1.8299060756307895e-05, "loss": 0.4253, "step": 19651 }, { "epoch": 3.2079915105505896, "grad_norm": 2.2285959720611572, "learning_rate": 1.8298883711282526e-05, "loss": 0.4865, "step": 19652 }, { "epoch": 3.208154769193094, "grad_norm": 1.8871684074401855, "learning_rate": 1.8298706657900185e-05, "loss": 0.4901, "step": 19653 }, { "epoch": 3.2083180278355985, "grad_norm": 2.412090539932251, "learning_rate": 1.8298529596161047e-05, "loss": 0.5558, "step": 19654 }, { "epoch": 3.208481286478103, "grad_norm": 2.218461036682129, "learning_rate": 1.8298352526065292e-05, "loss": 0.5515, "step": 19655 }, { "epoch": 3.2086445451206074, "grad_norm": 1.9370707273483276, "learning_rate": 1.82981754476131e-05, "loss": 0.5068, "step": 19656 }, { "epoch": 3.208807803763112, "grad_norm": 1.9357099533081055, "learning_rate": 1.8297998360804646e-05, "loss": 0.5067, "step": 19657 }, { "epoch": 3.2089710624056162, "grad_norm": 2.1195530891418457, "learning_rate": 1.8297821265640107e-05, "loss": 0.5169, "step": 19658 }, { "epoch": 3.2091343210481207, "grad_norm": 1.760433554649353, "learning_rate": 1.8297644162119666e-05, "loss": 0.427, "step": 19659 }, { "epoch": 3.2092975796906247, "grad_norm": 1.749983787536621, "learning_rate": 1.8297467050243503e-05, "loss": 0.4074, "step": 19660 }, { "epoch": 3.209460838333129, "grad_norm": 1.8577622175216675, "learning_rate": 1.829728993001179e-05, "loss": 0.486, "step": 19661 }, { "epoch": 3.2096240969756336, "grad_norm": 1.8855870962142944, "learning_rate": 1.829711280142471e-05, "loss": 0.4913, "step": 19662 }, { "epoch": 3.209787355618138, "grad_norm": 2.039752960205078, "learning_rate": 1.829693566448244e-05, "loss": 0.4396, "step": 19663 }, { "epoch": 3.2099506142606424, "grad_norm": 2.2638161182403564, "learning_rate": 1.8296758519185154e-05, "loss": 0.5574, "step": 19664 }, { "epoch": 3.210113872903147, "grad_norm": 1.536924123764038, "learning_rate": 1.8296581365533038e-05, "loss": 0.358, "step": 19665 }, { "epoch": 3.2102771315456513, "grad_norm": 2.4349405765533447, "learning_rate": 1.8296404203526267e-05, "loss": 0.6145, "step": 19666 }, { "epoch": 3.2104403901881557, "grad_norm": 2.2324492931365967, "learning_rate": 1.8296227033165016e-05, "loss": 0.5554, "step": 19667 }, { "epoch": 3.21060364883066, "grad_norm": 1.7308926582336426, "learning_rate": 1.8296049854449466e-05, "loss": 0.3396, "step": 19668 }, { "epoch": 3.210766907473164, "grad_norm": 1.900835394859314, "learning_rate": 1.82958726673798e-05, "loss": 0.5244, "step": 19669 }, { "epoch": 3.2109301661156686, "grad_norm": 2.0649027824401855, "learning_rate": 1.829569547195619e-05, "loss": 0.4192, "step": 19670 }, { "epoch": 3.211093424758173, "grad_norm": 2.043198585510254, "learning_rate": 1.8295518268178817e-05, "loss": 0.5146, "step": 19671 }, { "epoch": 3.2112566834006775, "grad_norm": 1.8824375867843628, "learning_rate": 1.8295341056047858e-05, "loss": 0.4837, "step": 19672 }, { "epoch": 3.211419942043182, "grad_norm": 1.9548472166061401, "learning_rate": 1.8295163835563497e-05, "loss": 0.5435, "step": 19673 }, { "epoch": 3.2115832006856864, "grad_norm": 1.9567254781723022, "learning_rate": 1.8294986606725907e-05, "loss": 0.5031, "step": 19674 }, { "epoch": 3.211746459328191, "grad_norm": 1.617077350616455, "learning_rate": 1.8294809369535265e-05, "loss": 0.4347, "step": 19675 }, { "epoch": 3.2119097179706952, "grad_norm": 1.6488335132598877, "learning_rate": 1.8294632123991753e-05, "loss": 0.4427, "step": 19676 }, { "epoch": 3.2120729766131992, "grad_norm": 2.05963397026062, "learning_rate": 1.8294454870095547e-05, "loss": 0.489, "step": 19677 }, { "epoch": 3.2122362352557037, "grad_norm": 2.299913167953491, "learning_rate": 1.8294277607846834e-05, "loss": 0.4816, "step": 19678 }, { "epoch": 3.212399493898208, "grad_norm": 1.6785780191421509, "learning_rate": 1.829410033724578e-05, "loss": 0.4503, "step": 19679 }, { "epoch": 3.2125627525407126, "grad_norm": 2.0155081748962402, "learning_rate": 1.829392305829257e-05, "loss": 0.4553, "step": 19680 }, { "epoch": 3.212726011183217, "grad_norm": 2.4312777519226074, "learning_rate": 1.829374577098738e-05, "loss": 0.5325, "step": 19681 }, { "epoch": 3.2128892698257214, "grad_norm": 2.511763095855713, "learning_rate": 1.8293568475330393e-05, "loss": 0.6153, "step": 19682 }, { "epoch": 3.213052528468226, "grad_norm": 1.8054568767547607, "learning_rate": 1.8293391171321784e-05, "loss": 0.4848, "step": 19683 }, { "epoch": 3.2132157871107303, "grad_norm": 2.1676671504974365, "learning_rate": 1.829321385896173e-05, "loss": 0.5622, "step": 19684 }, { "epoch": 3.2133790457532347, "grad_norm": 2.304023265838623, "learning_rate": 1.8293036538250418e-05, "loss": 0.4822, "step": 19685 }, { "epoch": 3.213542304395739, "grad_norm": 2.3942205905914307, "learning_rate": 1.8292859209188014e-05, "loss": 0.4138, "step": 19686 }, { "epoch": 3.213705563038243, "grad_norm": 1.6554298400878906, "learning_rate": 1.8292681871774705e-05, "loss": 0.4151, "step": 19687 }, { "epoch": 3.2138688216807476, "grad_norm": 2.010852336883545, "learning_rate": 1.8292504526010668e-05, "loss": 0.4804, "step": 19688 }, { "epoch": 3.214032080323252, "grad_norm": 1.8285444974899292, "learning_rate": 1.8292327171896082e-05, "loss": 0.4671, "step": 19689 }, { "epoch": 3.2141953389657565, "grad_norm": 2.0833446979522705, "learning_rate": 1.8292149809431123e-05, "loss": 0.5755, "step": 19690 }, { "epoch": 3.214358597608261, "grad_norm": 1.8746492862701416, "learning_rate": 1.829197243861597e-05, "loss": 0.4718, "step": 19691 }, { "epoch": 3.2145218562507654, "grad_norm": 2.0355634689331055, "learning_rate": 1.8291795059450806e-05, "loss": 0.4219, "step": 19692 }, { "epoch": 3.21468511489327, "grad_norm": 1.8298709392547607, "learning_rate": 1.8291617671935807e-05, "loss": 0.4551, "step": 19693 }, { "epoch": 3.2148483735357742, "grad_norm": 2.0532383918762207, "learning_rate": 1.829144027607115e-05, "loss": 0.4926, "step": 19694 }, { "epoch": 3.2150116321782782, "grad_norm": 2.0638351440429688, "learning_rate": 1.8291262871857015e-05, "loss": 0.5067, "step": 19695 }, { "epoch": 3.2151748908207827, "grad_norm": 1.7651591300964355, "learning_rate": 1.829108545929358e-05, "loss": 0.4423, "step": 19696 }, { "epoch": 3.215338149463287, "grad_norm": 1.7539269924163818, "learning_rate": 1.8290908038381024e-05, "loss": 0.4264, "step": 19697 }, { "epoch": 3.2155014081057915, "grad_norm": 2.208038806915283, "learning_rate": 1.8290730609119525e-05, "loss": 0.4573, "step": 19698 }, { "epoch": 3.215664666748296, "grad_norm": 2.3940048217773438, "learning_rate": 1.8290553171509263e-05, "loss": 0.5779, "step": 19699 }, { "epoch": 3.2158279253908004, "grad_norm": 1.7746416330337524, "learning_rate": 1.8290375725550417e-05, "loss": 0.4586, "step": 19700 }, { "epoch": 3.215991184033305, "grad_norm": 2.1617279052734375, "learning_rate": 1.8290198271243166e-05, "loss": 0.5727, "step": 19701 }, { "epoch": 3.2161544426758093, "grad_norm": 1.7708792686462402, "learning_rate": 1.8290020808587688e-05, "loss": 0.4228, "step": 19702 }, { "epoch": 3.2163177013183137, "grad_norm": 2.3087081909179688, "learning_rate": 1.8289843337584158e-05, "loss": 0.4836, "step": 19703 }, { "epoch": 3.2164809599608177, "grad_norm": 1.9485132694244385, "learning_rate": 1.828966585823276e-05, "loss": 0.469, "step": 19704 }, { "epoch": 3.216644218603322, "grad_norm": 1.8208705186843872, "learning_rate": 1.8289488370533667e-05, "loss": 0.4662, "step": 19705 }, { "epoch": 3.2168074772458266, "grad_norm": 1.7626985311508179, "learning_rate": 1.8289310874487066e-05, "loss": 0.4755, "step": 19706 }, { "epoch": 3.216970735888331, "grad_norm": 1.6838027238845825, "learning_rate": 1.828913337009313e-05, "loss": 0.4455, "step": 19707 }, { "epoch": 3.2171339945308355, "grad_norm": 1.997738003730774, "learning_rate": 1.828895585735204e-05, "loss": 0.5284, "step": 19708 }, { "epoch": 3.21729725317334, "grad_norm": 2.015070676803589, "learning_rate": 1.828877833626397e-05, "loss": 0.4735, "step": 19709 }, { "epoch": 3.2174605118158444, "grad_norm": 2.0451624393463135, "learning_rate": 1.8288600806829104e-05, "loss": 0.485, "step": 19710 }, { "epoch": 3.217623770458349, "grad_norm": 1.8634099960327148, "learning_rate": 1.828842326904762e-05, "loss": 0.397, "step": 19711 }, { "epoch": 3.217787029100853, "grad_norm": 1.762561559677124, "learning_rate": 1.8288245722919695e-05, "loss": 0.421, "step": 19712 }, { "epoch": 3.2179502877433572, "grad_norm": 2.1464874744415283, "learning_rate": 1.828806816844551e-05, "loss": 0.4366, "step": 19713 }, { "epoch": 3.2181135463858617, "grad_norm": 1.9309561252593994, "learning_rate": 1.828789060562524e-05, "loss": 0.4444, "step": 19714 }, { "epoch": 3.218276805028366, "grad_norm": 1.9617294073104858, "learning_rate": 1.8287713034459072e-05, "loss": 0.44, "step": 19715 }, { "epoch": 3.2184400636708705, "grad_norm": 1.8072595596313477, "learning_rate": 1.8287535454947172e-05, "loss": 0.4604, "step": 19716 }, { "epoch": 3.218603322313375, "grad_norm": 2.129019021987915, "learning_rate": 1.828735786708973e-05, "loss": 0.485, "step": 19717 }, { "epoch": 3.2187665809558794, "grad_norm": 1.891739010810852, "learning_rate": 1.8287180270886922e-05, "loss": 0.4389, "step": 19718 }, { "epoch": 3.218929839598384, "grad_norm": 1.8310402631759644, "learning_rate": 1.8287002666338924e-05, "loss": 0.4676, "step": 19719 }, { "epoch": 3.2190930982408883, "grad_norm": 1.7038880586624146, "learning_rate": 1.8286825053445916e-05, "loss": 0.3976, "step": 19720 }, { "epoch": 3.2192563568833927, "grad_norm": 1.896985650062561, "learning_rate": 1.828664743220808e-05, "loss": 0.435, "step": 19721 }, { "epoch": 3.2194196155258967, "grad_norm": 1.618194580078125, "learning_rate": 1.828646980262559e-05, "loss": 0.4321, "step": 19722 }, { "epoch": 3.219582874168401, "grad_norm": 1.6995024681091309, "learning_rate": 1.8286292164698624e-05, "loss": 0.4352, "step": 19723 }, { "epoch": 3.2197461328109056, "grad_norm": 1.9158746004104614, "learning_rate": 1.8286114518427372e-05, "loss": 0.5333, "step": 19724 }, { "epoch": 3.21990939145341, "grad_norm": 1.955507755279541, "learning_rate": 1.8285936863811998e-05, "loss": 0.45, "step": 19725 }, { "epoch": 3.2200726500959145, "grad_norm": 1.9535053968429565, "learning_rate": 1.828575920085269e-05, "loss": 0.4296, "step": 19726 }, { "epoch": 3.220235908738419, "grad_norm": 1.7457698583602905, "learning_rate": 1.8285581529549625e-05, "loss": 0.4471, "step": 19727 }, { "epoch": 3.2203991673809234, "grad_norm": 2.1091747283935547, "learning_rate": 1.8285403849902977e-05, "loss": 0.5092, "step": 19728 }, { "epoch": 3.220562426023428, "grad_norm": 2.0415871143341064, "learning_rate": 1.8285226161912937e-05, "loss": 0.4395, "step": 19729 }, { "epoch": 3.220725684665932, "grad_norm": 1.8372162580490112, "learning_rate": 1.8285048465579672e-05, "loss": 0.4393, "step": 19730 }, { "epoch": 3.2208889433084362, "grad_norm": 1.7880946397781372, "learning_rate": 1.8284870760903367e-05, "loss": 0.4314, "step": 19731 }, { "epoch": 3.2210522019509407, "grad_norm": 2.4104645252227783, "learning_rate": 1.8284693047884198e-05, "loss": 0.5813, "step": 19732 }, { "epoch": 3.221215460593445, "grad_norm": 1.838129997253418, "learning_rate": 1.8284515326522347e-05, "loss": 0.464, "step": 19733 }, { "epoch": 3.2213787192359495, "grad_norm": 1.9246076345443726, "learning_rate": 1.828433759681799e-05, "loss": 0.4236, "step": 19734 }, { "epoch": 3.221541977878454, "grad_norm": 1.9546054601669312, "learning_rate": 1.8284159858771307e-05, "loss": 0.4643, "step": 19735 }, { "epoch": 3.2217052365209584, "grad_norm": 1.8680888414382935, "learning_rate": 1.828398211238248e-05, "loss": 0.4354, "step": 19736 }, { "epoch": 3.221868495163463, "grad_norm": 2.026540994644165, "learning_rate": 1.8283804357651683e-05, "loss": 0.4387, "step": 19737 }, { "epoch": 3.2220317538059673, "grad_norm": 1.88138747215271, "learning_rate": 1.8283626594579097e-05, "loss": 0.5368, "step": 19738 }, { "epoch": 3.2221950124484713, "grad_norm": 1.9321210384368896, "learning_rate": 1.8283448823164904e-05, "loss": 0.408, "step": 19739 }, { "epoch": 3.2223582710909757, "grad_norm": 1.795000672340393, "learning_rate": 1.8283271043409275e-05, "loss": 0.4934, "step": 19740 }, { "epoch": 3.22252152973348, "grad_norm": 1.9393657445907593, "learning_rate": 1.82830932553124e-05, "loss": 0.4698, "step": 19741 }, { "epoch": 3.2226847883759846, "grad_norm": 1.8735644817352295, "learning_rate": 1.8282915458874446e-05, "loss": 0.5073, "step": 19742 }, { "epoch": 3.222848047018489, "grad_norm": 2.1061511039733887, "learning_rate": 1.82827376540956e-05, "loss": 0.5575, "step": 19743 }, { "epoch": 3.2230113056609935, "grad_norm": 1.9558589458465576, "learning_rate": 1.8282559840976043e-05, "loss": 0.4743, "step": 19744 }, { "epoch": 3.223174564303498, "grad_norm": 2.253047466278076, "learning_rate": 1.828238201951595e-05, "loss": 0.5224, "step": 19745 }, { "epoch": 3.2233378229460024, "grad_norm": 2.344491481781006, "learning_rate": 1.82822041897155e-05, "loss": 0.5029, "step": 19746 }, { "epoch": 3.223501081588507, "grad_norm": 1.7785671949386597, "learning_rate": 1.828202635157487e-05, "loss": 0.4723, "step": 19747 }, { "epoch": 3.223664340231011, "grad_norm": 1.803494930267334, "learning_rate": 1.828184850509424e-05, "loss": 0.4646, "step": 19748 }, { "epoch": 3.223827598873515, "grad_norm": 1.9012837409973145, "learning_rate": 1.8281670650273796e-05, "loss": 0.4534, "step": 19749 }, { "epoch": 3.2239908575160197, "grad_norm": 1.8963944911956787, "learning_rate": 1.8281492787113707e-05, "loss": 0.4754, "step": 19750 }, { "epoch": 3.224154116158524, "grad_norm": 1.6369264125823975, "learning_rate": 1.828131491561416e-05, "loss": 0.3985, "step": 19751 }, { "epoch": 3.2243173748010285, "grad_norm": 1.6247692108154297, "learning_rate": 1.8281137035775332e-05, "loss": 0.4061, "step": 19752 }, { "epoch": 3.224480633443533, "grad_norm": 2.275331735610962, "learning_rate": 1.82809591475974e-05, "loss": 0.512, "step": 19753 }, { "epoch": 3.2246438920860374, "grad_norm": 1.4983733892440796, "learning_rate": 1.8280781251080546e-05, "loss": 0.4203, "step": 19754 }, { "epoch": 3.224807150728542, "grad_norm": 1.7650138139724731, "learning_rate": 1.8280603346224945e-05, "loss": 0.4613, "step": 19755 }, { "epoch": 3.2249704093710463, "grad_norm": 1.9371355772018433, "learning_rate": 1.8280425433030782e-05, "loss": 0.4919, "step": 19756 }, { "epoch": 3.2251336680135503, "grad_norm": 1.801053762435913, "learning_rate": 1.8280247511498226e-05, "loss": 0.4849, "step": 19757 }, { "epoch": 3.2252969266560547, "grad_norm": 1.5591658353805542, "learning_rate": 1.828006958162747e-05, "loss": 0.3612, "step": 19758 }, { "epoch": 3.225460185298559, "grad_norm": 1.814083218574524, "learning_rate": 1.8279891643418685e-05, "loss": 0.4261, "step": 19759 }, { "epoch": 3.2256234439410636, "grad_norm": 1.860736608505249, "learning_rate": 1.8279713696872047e-05, "loss": 0.5, "step": 19760 }, { "epoch": 3.225786702583568, "grad_norm": 2.0602238178253174, "learning_rate": 1.8279535741987745e-05, "loss": 0.5007, "step": 19761 }, { "epoch": 3.2259499612260725, "grad_norm": 2.1616873741149902, "learning_rate": 1.8279357778765946e-05, "loss": 0.5197, "step": 19762 }, { "epoch": 3.226113219868577, "grad_norm": 1.8931833505630493, "learning_rate": 1.8279179807206842e-05, "loss": 0.4347, "step": 19763 }, { "epoch": 3.2262764785110813, "grad_norm": 2.472564697265625, "learning_rate": 1.8279001827310603e-05, "loss": 0.522, "step": 19764 }, { "epoch": 3.2264397371535853, "grad_norm": 2.543645143508911, "learning_rate": 1.8278823839077412e-05, "loss": 0.5419, "step": 19765 }, { "epoch": 3.22660299579609, "grad_norm": 2.10745906829834, "learning_rate": 1.8278645842507448e-05, "loss": 0.5593, "step": 19766 }, { "epoch": 3.226766254438594, "grad_norm": 1.9902522563934326, "learning_rate": 1.827846783760089e-05, "loss": 0.4386, "step": 19767 }, { "epoch": 3.2269295130810987, "grad_norm": 1.6609046459197998, "learning_rate": 1.8278289824357917e-05, "loss": 0.4359, "step": 19768 }, { "epoch": 3.227092771723603, "grad_norm": 1.940859317779541, "learning_rate": 1.8278111802778705e-05, "loss": 0.4726, "step": 19769 }, { "epoch": 3.2272560303661075, "grad_norm": 1.933000087738037, "learning_rate": 1.8277933772863443e-05, "loss": 0.4862, "step": 19770 }, { "epoch": 3.227419289008612, "grad_norm": 1.6849687099456787, "learning_rate": 1.8277755734612302e-05, "loss": 0.4133, "step": 19771 }, { "epoch": 3.2275825476511164, "grad_norm": 1.6419737339019775, "learning_rate": 1.827757768802546e-05, "loss": 0.4522, "step": 19772 }, { "epoch": 3.227745806293621, "grad_norm": 1.9152719974517822, "learning_rate": 1.82773996331031e-05, "loss": 0.5032, "step": 19773 }, { "epoch": 3.2279090649361253, "grad_norm": 2.004554033279419, "learning_rate": 1.8277221569845402e-05, "loss": 0.4549, "step": 19774 }, { "epoch": 3.2280723235786293, "grad_norm": 2.447281837463379, "learning_rate": 1.8277043498252544e-05, "loss": 0.5247, "step": 19775 }, { "epoch": 3.2282355822211337, "grad_norm": 2.0307157039642334, "learning_rate": 1.8276865418324706e-05, "loss": 0.4292, "step": 19776 }, { "epoch": 3.228398840863638, "grad_norm": 2.053356647491455, "learning_rate": 1.8276687330062067e-05, "loss": 0.4738, "step": 19777 }, { "epoch": 3.2285620995061426, "grad_norm": 2.159471035003662, "learning_rate": 1.8276509233464806e-05, "loss": 0.5348, "step": 19778 }, { "epoch": 3.228725358148647, "grad_norm": 2.178358793258667, "learning_rate": 1.82763311285331e-05, "loss": 0.5651, "step": 19779 }, { "epoch": 3.2288886167911515, "grad_norm": 1.943186640739441, "learning_rate": 1.8276153015267133e-05, "loss": 0.4359, "step": 19780 }, { "epoch": 3.229051875433656, "grad_norm": 2.030911445617676, "learning_rate": 1.827597489366708e-05, "loss": 0.5649, "step": 19781 }, { "epoch": 3.2292151340761603, "grad_norm": 1.8532209396362305, "learning_rate": 1.8275796763733126e-05, "loss": 0.4661, "step": 19782 }, { "epoch": 3.2293783927186643, "grad_norm": 1.8343489170074463, "learning_rate": 1.8275618625465443e-05, "loss": 0.3931, "step": 19783 }, { "epoch": 3.2295416513611688, "grad_norm": 1.9170185327529907, "learning_rate": 1.8275440478864216e-05, "loss": 0.4613, "step": 19784 }, { "epoch": 3.229704910003673, "grad_norm": 1.723833680152893, "learning_rate": 1.8275262323929625e-05, "loss": 0.4254, "step": 19785 }, { "epoch": 3.2298681686461777, "grad_norm": 2.1768293380737305, "learning_rate": 1.8275084160661842e-05, "loss": 0.487, "step": 19786 }, { "epoch": 3.230031427288682, "grad_norm": 2.0834195613861084, "learning_rate": 1.8274905989061057e-05, "loss": 0.4766, "step": 19787 }, { "epoch": 3.2301946859311865, "grad_norm": 2.173133373260498, "learning_rate": 1.827472780912744e-05, "loss": 0.5044, "step": 19788 }, { "epoch": 3.230357944573691, "grad_norm": 1.9893742799758911, "learning_rate": 1.8274549620861174e-05, "loss": 0.4798, "step": 19789 }, { "epoch": 3.2305212032161954, "grad_norm": 1.9256478548049927, "learning_rate": 1.8274371424262442e-05, "loss": 0.4324, "step": 19790 }, { "epoch": 3.2306844618587, "grad_norm": 1.7570847272872925, "learning_rate": 1.827419321933142e-05, "loss": 0.4447, "step": 19791 }, { "epoch": 3.230847720501204, "grad_norm": 1.7684173583984375, "learning_rate": 1.8274015006068282e-05, "loss": 0.4614, "step": 19792 }, { "epoch": 3.2310109791437083, "grad_norm": 2.1403696537017822, "learning_rate": 1.8273836784473218e-05, "loss": 0.545, "step": 19793 }, { "epoch": 3.2311742377862127, "grad_norm": 2.174215316772461, "learning_rate": 1.8273658554546402e-05, "loss": 0.4667, "step": 19794 }, { "epoch": 3.231337496428717, "grad_norm": 2.199486494064331, "learning_rate": 1.8273480316288014e-05, "loss": 0.5154, "step": 19795 }, { "epoch": 3.2315007550712216, "grad_norm": 1.962664008140564, "learning_rate": 1.827330206969823e-05, "loss": 0.4646, "step": 19796 }, { "epoch": 3.231664013713726, "grad_norm": 2.048784017562866, "learning_rate": 1.8273123814777237e-05, "loss": 0.4839, "step": 19797 }, { "epoch": 3.2318272723562305, "grad_norm": 1.8029698133468628, "learning_rate": 1.827294555152521e-05, "loss": 0.4318, "step": 19798 }, { "epoch": 3.231990530998735, "grad_norm": 2.4245545864105225, "learning_rate": 1.827276727994233e-05, "loss": 0.4895, "step": 19799 }, { "epoch": 3.2321537896412393, "grad_norm": 1.9175947904586792, "learning_rate": 1.8272589000028774e-05, "loss": 0.5155, "step": 19800 }, { "epoch": 3.2323170482837433, "grad_norm": 1.9413102865219116, "learning_rate": 1.8272410711784722e-05, "loss": 0.4837, "step": 19801 }, { "epoch": 3.2324803069262478, "grad_norm": 1.9432035684585571, "learning_rate": 1.8272232415210355e-05, "loss": 0.4564, "step": 19802 }, { "epoch": 3.232643565568752, "grad_norm": 1.8126466274261475, "learning_rate": 1.8272054110305853e-05, "loss": 0.4578, "step": 19803 }, { "epoch": 3.2328068242112566, "grad_norm": 2.241772174835205, "learning_rate": 1.8271875797071395e-05, "loss": 0.5211, "step": 19804 }, { "epoch": 3.232970082853761, "grad_norm": 1.578998327255249, "learning_rate": 1.827169747550716e-05, "loss": 0.4433, "step": 19805 }, { "epoch": 3.2331333414962655, "grad_norm": 1.9582678079605103, "learning_rate": 1.8271519145613327e-05, "loss": 0.4584, "step": 19806 }, { "epoch": 3.23329660013877, "grad_norm": 1.9424922466278076, "learning_rate": 1.827134080739008e-05, "loss": 0.4347, "step": 19807 }, { "epoch": 3.2334598587812744, "grad_norm": 1.647964596748352, "learning_rate": 1.827116246083759e-05, "loss": 0.4712, "step": 19808 }, { "epoch": 3.233623117423779, "grad_norm": 2.2228574752807617, "learning_rate": 1.8270984105956044e-05, "loss": 0.4632, "step": 19809 }, { "epoch": 3.233786376066283, "grad_norm": 1.8270292282104492, "learning_rate": 1.827080574274562e-05, "loss": 0.4273, "step": 19810 }, { "epoch": 3.2339496347087873, "grad_norm": 1.8364341259002686, "learning_rate": 1.8270627371206495e-05, "loss": 0.4569, "step": 19811 }, { "epoch": 3.2341128933512917, "grad_norm": 2.4708588123321533, "learning_rate": 1.8270448991338852e-05, "loss": 0.5157, "step": 19812 }, { "epoch": 3.234276151993796, "grad_norm": 1.5743780136108398, "learning_rate": 1.827027060314287e-05, "loss": 0.4087, "step": 19813 }, { "epoch": 3.2344394106363006, "grad_norm": 1.8878320455551147, "learning_rate": 1.8270092206618723e-05, "loss": 0.3923, "step": 19814 }, { "epoch": 3.234602669278805, "grad_norm": 1.8849319219589233, "learning_rate": 1.82699138017666e-05, "loss": 0.4907, "step": 19815 }, { "epoch": 3.2347659279213095, "grad_norm": 2.24770188331604, "learning_rate": 1.8269735388586673e-05, "loss": 0.49, "step": 19816 }, { "epoch": 3.234929186563814, "grad_norm": 2.090771198272705, "learning_rate": 1.8269556967079127e-05, "loss": 0.5029, "step": 19817 }, { "epoch": 3.235092445206318, "grad_norm": 1.8641576766967773, "learning_rate": 1.8269378537244136e-05, "loss": 0.478, "step": 19818 }, { "epoch": 3.2352557038488223, "grad_norm": 1.945031762123108, "learning_rate": 1.8269200099081887e-05, "loss": 0.4407, "step": 19819 }, { "epoch": 3.2354189624913268, "grad_norm": 2.044161081314087, "learning_rate": 1.8269021652592555e-05, "loss": 0.6832, "step": 19820 }, { "epoch": 3.235582221133831, "grad_norm": 1.9448083639144897, "learning_rate": 1.826884319777632e-05, "loss": 0.4996, "step": 19821 }, { "epoch": 3.2357454797763356, "grad_norm": 2.087594509124756, "learning_rate": 1.826866473463336e-05, "loss": 0.535, "step": 19822 }, { "epoch": 3.23590873841884, "grad_norm": 2.220273494720459, "learning_rate": 1.8268486263163858e-05, "loss": 0.542, "step": 19823 }, { "epoch": 3.2360719970613445, "grad_norm": 1.8438373804092407, "learning_rate": 1.8268307783367994e-05, "loss": 0.3982, "step": 19824 }, { "epoch": 3.236235255703849, "grad_norm": 2.036590337753296, "learning_rate": 1.8268129295245946e-05, "loss": 0.4104, "step": 19825 }, { "epoch": 3.2363985143463534, "grad_norm": 1.8373087644577026, "learning_rate": 1.8267950798797892e-05, "loss": 0.4012, "step": 19826 }, { "epoch": 3.236561772988858, "grad_norm": 1.9465183019638062, "learning_rate": 1.8267772294024016e-05, "loss": 0.4923, "step": 19827 }, { "epoch": 3.236725031631362, "grad_norm": 1.9365758895874023, "learning_rate": 1.826759378092449e-05, "loss": 0.4182, "step": 19828 }, { "epoch": 3.2368882902738663, "grad_norm": 2.1688482761383057, "learning_rate": 1.8267415259499506e-05, "loss": 0.5098, "step": 19829 }, { "epoch": 3.2370515489163707, "grad_norm": 2.095107078552246, "learning_rate": 1.8267236729749234e-05, "loss": 0.4722, "step": 19830 }, { "epoch": 3.237214807558875, "grad_norm": 2.0042600631713867, "learning_rate": 1.8267058191673858e-05, "loss": 0.4456, "step": 19831 }, { "epoch": 3.2373780662013796, "grad_norm": 2.2536723613739014, "learning_rate": 1.8266879645273557e-05, "loss": 0.5944, "step": 19832 }, { "epoch": 3.237541324843884, "grad_norm": 1.790701985359192, "learning_rate": 1.826670109054851e-05, "loss": 0.4556, "step": 19833 }, { "epoch": 3.2377045834863885, "grad_norm": 2.3091604709625244, "learning_rate": 1.8266522527498894e-05, "loss": 0.5134, "step": 19834 }, { "epoch": 3.237867842128893, "grad_norm": 1.577185034751892, "learning_rate": 1.8266343956124895e-05, "loss": 0.4239, "step": 19835 }, { "epoch": 3.238031100771397, "grad_norm": 2.1533334255218506, "learning_rate": 1.826616537642669e-05, "loss": 0.513, "step": 19836 }, { "epoch": 3.2381943594139013, "grad_norm": 1.6994380950927734, "learning_rate": 1.826598678840446e-05, "loss": 0.4088, "step": 19837 }, { "epoch": 3.2383576180564058, "grad_norm": 2.283864974975586, "learning_rate": 1.8265808192058377e-05, "loss": 0.5423, "step": 19838 }, { "epoch": 3.23852087669891, "grad_norm": 2.1102755069732666, "learning_rate": 1.8265629587388634e-05, "loss": 0.4807, "step": 19839 }, { "epoch": 3.2386841353414146, "grad_norm": 2.660179615020752, "learning_rate": 1.8265450974395403e-05, "loss": 1.0319, "step": 19840 }, { "epoch": 3.238847393983919, "grad_norm": 1.9428311586380005, "learning_rate": 1.8265272353078863e-05, "loss": 0.4534, "step": 19841 }, { "epoch": 3.2390106526264235, "grad_norm": 2.2522826194763184, "learning_rate": 1.82650937234392e-05, "loss": 0.4638, "step": 19842 }, { "epoch": 3.239173911268928, "grad_norm": 2.07511305809021, "learning_rate": 1.8264915085476585e-05, "loss": 0.5067, "step": 19843 }, { "epoch": 3.2393371699114324, "grad_norm": 2.4052040576934814, "learning_rate": 1.8264736439191205e-05, "loss": 0.5538, "step": 19844 }, { "epoch": 3.2395004285539364, "grad_norm": 1.618126630783081, "learning_rate": 1.8264557784583234e-05, "loss": 0.4054, "step": 19845 }, { "epoch": 3.239663687196441, "grad_norm": 2.291670799255371, "learning_rate": 1.826437912165286e-05, "loss": 0.5994, "step": 19846 }, { "epoch": 3.2398269458389453, "grad_norm": 1.9798251390457153, "learning_rate": 1.8264200450400256e-05, "loss": 0.4906, "step": 19847 }, { "epoch": 3.2399902044814497, "grad_norm": 2.067878007888794, "learning_rate": 1.8264021770825607e-05, "loss": 0.4825, "step": 19848 }, { "epoch": 3.240153463123954, "grad_norm": 1.6447994709014893, "learning_rate": 1.826384308292909e-05, "loss": 0.4228, "step": 19849 }, { "epoch": 3.2403167217664586, "grad_norm": 2.0298912525177, "learning_rate": 1.826366438671088e-05, "loss": 0.4957, "step": 19850 }, { "epoch": 3.240479980408963, "grad_norm": 1.8682385683059692, "learning_rate": 1.826348568217117e-05, "loss": 0.4159, "step": 19851 }, { "epoch": 3.2406432390514675, "grad_norm": 1.8607889413833618, "learning_rate": 1.8263306969310127e-05, "loss": 0.4373, "step": 19852 }, { "epoch": 3.2408064976939714, "grad_norm": 2.428715944290161, "learning_rate": 1.826312824812794e-05, "loss": 0.5251, "step": 19853 }, { "epoch": 3.240969756336476, "grad_norm": 1.9911134243011475, "learning_rate": 1.826294951862478e-05, "loss": 0.4945, "step": 19854 }, { "epoch": 3.2411330149789803, "grad_norm": 1.93474280834198, "learning_rate": 1.8262770780800834e-05, "loss": 0.5251, "step": 19855 }, { "epoch": 3.2412962736214848, "grad_norm": 1.7007322311401367, "learning_rate": 1.826259203465628e-05, "loss": 0.4015, "step": 19856 }, { "epoch": 3.241459532263989, "grad_norm": 2.049635648727417, "learning_rate": 1.8262413280191298e-05, "loss": 0.4607, "step": 19857 }, { "epoch": 3.2416227909064936, "grad_norm": 1.842278003692627, "learning_rate": 1.826223451740607e-05, "loss": 0.4339, "step": 19858 }, { "epoch": 3.241786049548998, "grad_norm": 1.9717143774032593, "learning_rate": 1.8262055746300773e-05, "loss": 0.4726, "step": 19859 }, { "epoch": 3.2419493081915025, "grad_norm": 2.410515785217285, "learning_rate": 1.8261876966875583e-05, "loss": 0.4734, "step": 19860 }, { "epoch": 3.242112566834007, "grad_norm": 1.966588020324707, "learning_rate": 1.826169817913069e-05, "loss": 0.489, "step": 19861 }, { "epoch": 3.2422758254765114, "grad_norm": 2.257474660873413, "learning_rate": 1.826151938306627e-05, "loss": 0.5982, "step": 19862 }, { "epoch": 3.2424390841190154, "grad_norm": 1.7792490720748901, "learning_rate": 1.82613405786825e-05, "loss": 0.4417, "step": 19863 }, { "epoch": 3.24260234276152, "grad_norm": 1.6467094421386719, "learning_rate": 1.8261161765979566e-05, "loss": 0.4375, "step": 19864 }, { "epoch": 3.2427656014040243, "grad_norm": 1.9282317161560059, "learning_rate": 1.8260982944957638e-05, "loss": 0.4755, "step": 19865 }, { "epoch": 3.2429288600465287, "grad_norm": 1.580509901046753, "learning_rate": 1.8260804115616908e-05, "loss": 0.3793, "step": 19866 }, { "epoch": 3.243092118689033, "grad_norm": 1.7956122159957886, "learning_rate": 1.826062527795755e-05, "loss": 0.4862, "step": 19867 }, { "epoch": 3.2432553773315376, "grad_norm": 2.032193183898926, "learning_rate": 1.826044643197974e-05, "loss": 0.5103, "step": 19868 }, { "epoch": 3.243418635974042, "grad_norm": 2.2485523223876953, "learning_rate": 1.8260267577683665e-05, "loss": 0.5315, "step": 19869 }, { "epoch": 3.2435818946165464, "grad_norm": 2.20963454246521, "learning_rate": 1.8260088715069506e-05, "loss": 0.5091, "step": 19870 }, { "epoch": 3.2437451532590504, "grad_norm": 1.8808228969573975, "learning_rate": 1.8259909844137435e-05, "loss": 0.4095, "step": 19871 }, { "epoch": 3.243908411901555, "grad_norm": 2.3236608505249023, "learning_rate": 1.825973096488764e-05, "loss": 0.5279, "step": 19872 }, { "epoch": 3.2440716705440593, "grad_norm": 1.8761402368545532, "learning_rate": 1.8259552077320297e-05, "loss": 0.4186, "step": 19873 }, { "epoch": 3.2442349291865638, "grad_norm": 1.853437066078186, "learning_rate": 1.825937318143559e-05, "loss": 0.395, "step": 19874 }, { "epoch": 3.244398187829068, "grad_norm": 1.710277795791626, "learning_rate": 1.825919427723369e-05, "loss": 0.4367, "step": 19875 }, { "epoch": 3.2445614464715726, "grad_norm": 2.3811535835266113, "learning_rate": 1.8259015364714786e-05, "loss": 0.456, "step": 19876 }, { "epoch": 3.244724705114077, "grad_norm": 2.2033474445343018, "learning_rate": 1.825883644387906e-05, "loss": 0.468, "step": 19877 }, { "epoch": 3.2448879637565815, "grad_norm": 1.9465454816818237, "learning_rate": 1.8258657514726683e-05, "loss": 0.4034, "step": 19878 }, { "epoch": 3.245051222399086, "grad_norm": 2.0051772594451904, "learning_rate": 1.8258478577257844e-05, "loss": 0.4571, "step": 19879 }, { "epoch": 3.24521448104159, "grad_norm": 2.2070631980895996, "learning_rate": 1.8258299631472717e-05, "loss": 0.5373, "step": 19880 }, { "epoch": 3.2453777396840944, "grad_norm": 2.254781484603882, "learning_rate": 1.8258120677371484e-05, "loss": 0.5365, "step": 19881 }, { "epoch": 3.245540998326599, "grad_norm": 2.1196467876434326, "learning_rate": 1.8257941714954326e-05, "loss": 0.4777, "step": 19882 }, { "epoch": 3.2457042569691033, "grad_norm": 1.788446307182312, "learning_rate": 1.8257762744221422e-05, "loss": 0.5052, "step": 19883 }, { "epoch": 3.2458675156116077, "grad_norm": 2.2347207069396973, "learning_rate": 1.8257583765172955e-05, "loss": 0.5096, "step": 19884 }, { "epoch": 3.246030774254112, "grad_norm": 2.4230611324310303, "learning_rate": 1.8257404777809102e-05, "loss": 0.5986, "step": 19885 }, { "epoch": 3.2461940328966166, "grad_norm": 2.1515071392059326, "learning_rate": 1.8257225782130044e-05, "loss": 0.595, "step": 19886 }, { "epoch": 3.246357291539121, "grad_norm": 1.7397183179855347, "learning_rate": 1.8257046778135966e-05, "loss": 0.4102, "step": 19887 }, { "epoch": 3.2465205501816254, "grad_norm": 1.9876456260681152, "learning_rate": 1.825686776582704e-05, "loss": 0.4887, "step": 19888 }, { "epoch": 3.2466838088241294, "grad_norm": 1.9642671346664429, "learning_rate": 1.825668874520345e-05, "loss": 0.4882, "step": 19889 }, { "epoch": 3.246847067466634, "grad_norm": 1.9460172653198242, "learning_rate": 1.825650971626538e-05, "loss": 0.5189, "step": 19890 }, { "epoch": 3.2470103261091383, "grad_norm": 2.143934726715088, "learning_rate": 1.8256330679013007e-05, "loss": 0.5193, "step": 19891 }, { "epoch": 3.2471735847516427, "grad_norm": 1.954857587814331, "learning_rate": 1.8256151633446507e-05, "loss": 0.5027, "step": 19892 }, { "epoch": 3.247336843394147, "grad_norm": 2.215578317642212, "learning_rate": 1.8255972579566064e-05, "loss": 0.6088, "step": 19893 }, { "epoch": 3.2475001020366516, "grad_norm": 1.8269784450531006, "learning_rate": 1.8255793517371864e-05, "loss": 0.4656, "step": 19894 }, { "epoch": 3.247663360679156, "grad_norm": 1.7076570987701416, "learning_rate": 1.825561444686408e-05, "loss": 0.3969, "step": 19895 }, { "epoch": 3.2478266193216605, "grad_norm": 1.8521252870559692, "learning_rate": 1.8255435368042894e-05, "loss": 0.4231, "step": 19896 }, { "epoch": 3.247989877964165, "grad_norm": 2.2111520767211914, "learning_rate": 1.8255256280908486e-05, "loss": 0.5532, "step": 19897 }, { "epoch": 3.248153136606669, "grad_norm": 2.2424046993255615, "learning_rate": 1.825507718546104e-05, "loss": 0.5281, "step": 19898 }, { "epoch": 3.2483163952491734, "grad_norm": 2.001190662384033, "learning_rate": 1.825489808170073e-05, "loss": 0.5122, "step": 19899 }, { "epoch": 3.248479653891678, "grad_norm": 2.4249374866485596, "learning_rate": 1.825471896962774e-05, "loss": 0.5946, "step": 19900 }, { "epoch": 3.2486429125341822, "grad_norm": 2.4750750064849854, "learning_rate": 1.8254539849242253e-05, "loss": 0.4907, "step": 19901 }, { "epoch": 3.2488061711766867, "grad_norm": 2.0469672679901123, "learning_rate": 1.8254360720544446e-05, "loss": 0.4478, "step": 19902 }, { "epoch": 3.248969429819191, "grad_norm": 2.4646084308624268, "learning_rate": 1.8254181583534496e-05, "loss": 0.582, "step": 19903 }, { "epoch": 3.2491326884616956, "grad_norm": 1.9825248718261719, "learning_rate": 1.825400243821259e-05, "loss": 0.4667, "step": 19904 }, { "epoch": 3.2492959471042, "grad_norm": 1.8104220628738403, "learning_rate": 1.8253823284578907e-05, "loss": 0.4988, "step": 19905 }, { "epoch": 3.249459205746704, "grad_norm": 1.9910534620285034, "learning_rate": 1.8253644122633628e-05, "loss": 0.5572, "step": 19906 }, { "epoch": 3.2496224643892084, "grad_norm": 2.154738187789917, "learning_rate": 1.8253464952376926e-05, "loss": 0.4838, "step": 19907 }, { "epoch": 3.249785723031713, "grad_norm": 1.7532366514205933, "learning_rate": 1.825328577380899e-05, "loss": 0.4567, "step": 19908 }, { "epoch": 3.2499489816742173, "grad_norm": 1.7065171003341675, "learning_rate": 1.825310658693e-05, "loss": 0.4682, "step": 19909 }, { "epoch": 3.2501122403167217, "grad_norm": 1.6093268394470215, "learning_rate": 1.825292739174013e-05, "loss": 0.3838, "step": 19910 }, { "epoch": 3.250275498959226, "grad_norm": 1.8842140436172485, "learning_rate": 1.8252748188239564e-05, "loss": 0.4494, "step": 19911 }, { "epoch": 3.2504387576017306, "grad_norm": 1.9161442518234253, "learning_rate": 1.8252568976428483e-05, "loss": 0.4628, "step": 19912 }, { "epoch": 3.250602016244235, "grad_norm": 1.6061969995498657, "learning_rate": 1.825238975630707e-05, "loss": 0.4305, "step": 19913 }, { "epoch": 3.2507652748867395, "grad_norm": 2.123081922531128, "learning_rate": 1.8252210527875502e-05, "loss": 0.425, "step": 19914 }, { "epoch": 3.250928533529244, "grad_norm": 1.9789525270462036, "learning_rate": 1.8252031291133957e-05, "loss": 0.4288, "step": 19915 }, { "epoch": 3.251091792171748, "grad_norm": 2.2017998695373535, "learning_rate": 1.825185204608262e-05, "loss": 0.5126, "step": 19916 }, { "epoch": 3.2512550508142524, "grad_norm": 2.177875280380249, "learning_rate": 1.8251672792721673e-05, "loss": 0.5366, "step": 19917 }, { "epoch": 3.251418309456757, "grad_norm": 1.6123872995376587, "learning_rate": 1.825149353105129e-05, "loss": 0.4745, "step": 19918 }, { "epoch": 3.2515815680992612, "grad_norm": 1.906653881072998, "learning_rate": 1.825131426107166e-05, "loss": 0.4893, "step": 19919 }, { "epoch": 3.2517448267417657, "grad_norm": 1.8109337091445923, "learning_rate": 1.8251134982782952e-05, "loss": 0.4292, "step": 19920 }, { "epoch": 3.25190808538427, "grad_norm": 1.6222740411758423, "learning_rate": 1.8250955696185357e-05, "loss": 0.3997, "step": 19921 }, { "epoch": 3.2520713440267746, "grad_norm": 1.8411736488342285, "learning_rate": 1.8250776401279053e-05, "loss": 0.464, "step": 19922 }, { "epoch": 3.252234602669279, "grad_norm": 1.9030483961105347, "learning_rate": 1.825059709806422e-05, "loss": 0.4374, "step": 19923 }, { "epoch": 3.252397861311783, "grad_norm": 1.9095194339752197, "learning_rate": 1.8250417786541033e-05, "loss": 0.4325, "step": 19924 }, { "epoch": 3.2525611199542874, "grad_norm": 2.2829782962799072, "learning_rate": 1.825023846670968e-05, "loss": 0.5155, "step": 19925 }, { "epoch": 3.252724378596792, "grad_norm": 1.6985522508621216, "learning_rate": 1.8250059138570343e-05, "loss": 0.4037, "step": 19926 }, { "epoch": 3.2528876372392963, "grad_norm": 2.064514398574829, "learning_rate": 1.824987980212319e-05, "loss": 0.4679, "step": 19927 }, { "epoch": 3.2530508958818007, "grad_norm": 2.045318126678467, "learning_rate": 1.824970045736842e-05, "loss": 0.4896, "step": 19928 }, { "epoch": 3.253214154524305, "grad_norm": 1.8595269918441772, "learning_rate": 1.8249521104306195e-05, "loss": 0.4482, "step": 19929 }, { "epoch": 3.2533774131668096, "grad_norm": 1.607696533203125, "learning_rate": 1.824934174293671e-05, "loss": 0.3971, "step": 19930 }, { "epoch": 3.253540671809314, "grad_norm": 2.0045278072357178, "learning_rate": 1.824916237326014e-05, "loss": 0.5443, "step": 19931 }, { "epoch": 3.2537039304518185, "grad_norm": 2.2996957302093506, "learning_rate": 1.8248982995276664e-05, "loss": 0.5439, "step": 19932 }, { "epoch": 3.2538671890943225, "grad_norm": 2.0599286556243896, "learning_rate": 1.8248803608986466e-05, "loss": 0.4869, "step": 19933 }, { "epoch": 3.254030447736827, "grad_norm": 2.1422319412231445, "learning_rate": 1.8248624214389723e-05, "loss": 0.5945, "step": 19934 }, { "epoch": 3.2541937063793314, "grad_norm": 2.062629461288452, "learning_rate": 1.8248444811486617e-05, "loss": 0.4615, "step": 19935 }, { "epoch": 3.254356965021836, "grad_norm": 2.198063611984253, "learning_rate": 1.8248265400277332e-05, "loss": 0.4936, "step": 19936 }, { "epoch": 3.2545202236643402, "grad_norm": 1.900908350944519, "learning_rate": 1.8248085980762043e-05, "loss": 0.441, "step": 19937 }, { "epoch": 3.2546834823068447, "grad_norm": 1.4588347673416138, "learning_rate": 1.8247906552940934e-05, "loss": 0.3876, "step": 19938 }, { "epoch": 3.254846740949349, "grad_norm": 1.8382700681686401, "learning_rate": 1.8247727116814187e-05, "loss": 0.3977, "step": 19939 }, { "epoch": 3.2550099995918536, "grad_norm": 1.8644049167633057, "learning_rate": 1.824754767238198e-05, "loss": 0.469, "step": 19940 }, { "epoch": 3.2551732582343575, "grad_norm": 2.10211443901062, "learning_rate": 1.8247368219644496e-05, "loss": 0.5081, "step": 19941 }, { "epoch": 3.255336516876862, "grad_norm": 1.7339688539505005, "learning_rate": 1.8247188758601912e-05, "loss": 0.4078, "step": 19942 }, { "epoch": 3.2554997755193664, "grad_norm": 2.2604892253875732, "learning_rate": 1.8247009289254414e-05, "loss": 0.5566, "step": 19943 }, { "epoch": 3.255663034161871, "grad_norm": 2.323786497116089, "learning_rate": 1.8246829811602178e-05, "loss": 0.4902, "step": 19944 }, { "epoch": 3.2558262928043753, "grad_norm": 1.7504377365112305, "learning_rate": 1.8246650325645387e-05, "loss": 0.3724, "step": 19945 }, { "epoch": 3.2559895514468797, "grad_norm": 1.8784488439559937, "learning_rate": 1.824647083138422e-05, "loss": 0.4923, "step": 19946 }, { "epoch": 3.256152810089384, "grad_norm": 1.6818276643753052, "learning_rate": 1.824629132881886e-05, "loss": 0.3771, "step": 19947 }, { "epoch": 3.2563160687318886, "grad_norm": 1.8028771877288818, "learning_rate": 1.8246111817949486e-05, "loss": 0.5109, "step": 19948 }, { "epoch": 3.256479327374393, "grad_norm": 1.9828357696533203, "learning_rate": 1.8245932298776285e-05, "loss": 0.4523, "step": 19949 }, { "epoch": 3.2566425860168975, "grad_norm": 1.7806031703948975, "learning_rate": 1.8245752771299426e-05, "loss": 0.4605, "step": 19950 }, { "epoch": 3.2568058446594015, "grad_norm": 1.8306434154510498, "learning_rate": 1.82455732355191e-05, "loss": 0.485, "step": 19951 }, { "epoch": 3.256969103301906, "grad_norm": 1.9412750005722046, "learning_rate": 1.824539369143548e-05, "loss": 0.5575, "step": 19952 }, { "epoch": 3.2571323619444104, "grad_norm": 1.983553409576416, "learning_rate": 1.8245214139048753e-05, "loss": 0.4847, "step": 19953 }, { "epoch": 3.257295620586915, "grad_norm": 1.787693738937378, "learning_rate": 1.82450345783591e-05, "loss": 0.4329, "step": 19954 }, { "epoch": 3.2574588792294192, "grad_norm": 1.972496747970581, "learning_rate": 1.8244855009366693e-05, "loss": 0.4478, "step": 19955 }, { "epoch": 3.2576221378719237, "grad_norm": 1.941447138786316, "learning_rate": 1.8244675432071723e-05, "loss": 0.4764, "step": 19956 }, { "epoch": 3.257785396514428, "grad_norm": 1.8226481676101685, "learning_rate": 1.8244495846474367e-05, "loss": 0.4241, "step": 19957 }, { "epoch": 3.2579486551569325, "grad_norm": 2.2057716846466064, "learning_rate": 1.8244316252574808e-05, "loss": 0.4621, "step": 19958 }, { "epoch": 3.2581119137994365, "grad_norm": 1.572947382926941, "learning_rate": 1.824413665037322e-05, "loss": 0.4317, "step": 19959 }, { "epoch": 3.258275172441941, "grad_norm": 1.9553881883621216, "learning_rate": 1.824395703986979e-05, "loss": 0.5114, "step": 19960 }, { "epoch": 3.2584384310844454, "grad_norm": 1.7482995986938477, "learning_rate": 1.82437774210647e-05, "loss": 0.4041, "step": 19961 }, { "epoch": 3.25860168972695, "grad_norm": 2.3349599838256836, "learning_rate": 1.8243597793958128e-05, "loss": 0.4869, "step": 19962 }, { "epoch": 3.2587649483694543, "grad_norm": 2.0042033195495605, "learning_rate": 1.8243418158550254e-05, "loss": 0.4511, "step": 19963 }, { "epoch": 3.2589282070119587, "grad_norm": 1.8524048328399658, "learning_rate": 1.824323851484126e-05, "loss": 0.3794, "step": 19964 }, { "epoch": 3.259091465654463, "grad_norm": 2.007139205932617, "learning_rate": 1.8243058862831328e-05, "loss": 0.4401, "step": 19965 }, { "epoch": 3.2592547242969676, "grad_norm": 1.6633027791976929, "learning_rate": 1.8242879202520635e-05, "loss": 0.3755, "step": 19966 }, { "epoch": 3.259417982939472, "grad_norm": 1.71791672706604, "learning_rate": 1.8242699533909368e-05, "loss": 0.4594, "step": 19967 }, { "epoch": 3.2595812415819765, "grad_norm": 2.16852068901062, "learning_rate": 1.8242519856997703e-05, "loss": 0.6853, "step": 19968 }, { "epoch": 3.2597445002244805, "grad_norm": 2.2362418174743652, "learning_rate": 1.8242340171785823e-05, "loss": 0.5464, "step": 19969 }, { "epoch": 3.259907758866985, "grad_norm": 2.077819585800171, "learning_rate": 1.8242160478273908e-05, "loss": 0.4807, "step": 19970 }, { "epoch": 3.2600710175094894, "grad_norm": 2.061586856842041, "learning_rate": 1.8241980776462144e-05, "loss": 0.5037, "step": 19971 }, { "epoch": 3.260234276151994, "grad_norm": 1.9462618827819824, "learning_rate": 1.8241801066350705e-05, "loss": 0.5238, "step": 19972 }, { "epoch": 3.2603975347944982, "grad_norm": 2.240145444869995, "learning_rate": 1.824162134793977e-05, "loss": 0.5313, "step": 19973 }, { "epoch": 3.2605607934370027, "grad_norm": 1.8971116542816162, "learning_rate": 1.824144162122953e-05, "loss": 0.4889, "step": 19974 }, { "epoch": 3.260724052079507, "grad_norm": 3.248300790786743, "learning_rate": 1.8241261886220155e-05, "loss": 0.599, "step": 19975 }, { "epoch": 3.260887310722011, "grad_norm": 2.0228404998779297, "learning_rate": 1.824108214291184e-05, "loss": 0.494, "step": 19976 }, { "epoch": 3.2610505693645155, "grad_norm": 1.7644407749176025, "learning_rate": 1.824090239130475e-05, "loss": 0.4238, "step": 19977 }, { "epoch": 3.26121382800702, "grad_norm": 2.031167984008789, "learning_rate": 1.8240722631399077e-05, "loss": 0.5367, "step": 19978 }, { "epoch": 3.2613770866495244, "grad_norm": 1.9449899196624756, "learning_rate": 1.8240542863194997e-05, "loss": 0.4612, "step": 19979 }, { "epoch": 3.261540345292029, "grad_norm": 1.9337657690048218, "learning_rate": 1.8240363086692695e-05, "loss": 0.4064, "step": 19980 }, { "epoch": 3.2617036039345333, "grad_norm": 1.741765022277832, "learning_rate": 1.8240183301892343e-05, "loss": 0.4342, "step": 19981 }, { "epoch": 3.2618668625770377, "grad_norm": 2.1080784797668457, "learning_rate": 1.8240003508794134e-05, "loss": 0.4796, "step": 19982 }, { "epoch": 3.262030121219542, "grad_norm": 1.937576413154602, "learning_rate": 1.8239823707398245e-05, "loss": 0.5393, "step": 19983 }, { "epoch": 3.2621933798620466, "grad_norm": 1.850681185722351, "learning_rate": 1.8239643897704855e-05, "loss": 0.5067, "step": 19984 }, { "epoch": 3.262356638504551, "grad_norm": 1.7104310989379883, "learning_rate": 1.8239464079714144e-05, "loss": 0.4686, "step": 19985 }, { "epoch": 3.262519897147055, "grad_norm": 1.8275175094604492, "learning_rate": 1.8239284253426294e-05, "loss": 0.4189, "step": 19986 }, { "epoch": 3.2626831557895595, "grad_norm": 1.9923691749572754, "learning_rate": 1.823910441884149e-05, "loss": 0.5328, "step": 19987 }, { "epoch": 3.262846414432064, "grad_norm": 1.9968422651290894, "learning_rate": 1.8238924575959905e-05, "loss": 0.4898, "step": 19988 }, { "epoch": 3.2630096730745684, "grad_norm": 1.6941279172897339, "learning_rate": 1.8238744724781728e-05, "loss": 0.4519, "step": 19989 }, { "epoch": 3.263172931717073, "grad_norm": 2.0652999877929688, "learning_rate": 1.823856486530714e-05, "loss": 0.5112, "step": 19990 }, { "epoch": 3.2633361903595772, "grad_norm": 1.9452491998672485, "learning_rate": 1.8238384997536317e-05, "loss": 0.459, "step": 19991 }, { "epoch": 3.2634994490020817, "grad_norm": 1.8669904470443726, "learning_rate": 1.8238205121469442e-05, "loss": 0.4068, "step": 19992 }, { "epoch": 3.263662707644586, "grad_norm": 1.6877986192703247, "learning_rate": 1.82380252371067e-05, "loss": 0.4376, "step": 19993 }, { "epoch": 3.26382596628709, "grad_norm": 1.9831780195236206, "learning_rate": 1.8237845344448262e-05, "loss": 0.53, "step": 19994 }, { "epoch": 3.2639892249295945, "grad_norm": 1.8338723182678223, "learning_rate": 1.8237665443494323e-05, "loss": 0.4312, "step": 19995 }, { "epoch": 3.264152483572099, "grad_norm": 1.9956586360931396, "learning_rate": 1.823748553424505e-05, "loss": 0.5169, "step": 19996 }, { "epoch": 3.2643157422146034, "grad_norm": 2.5667524337768555, "learning_rate": 1.823730561670064e-05, "loss": 0.5545, "step": 19997 }, { "epoch": 3.264479000857108, "grad_norm": 1.84737229347229, "learning_rate": 1.8237125690861258e-05, "loss": 0.5085, "step": 19998 }, { "epoch": 3.2646422594996123, "grad_norm": 2.0889086723327637, "learning_rate": 1.8236945756727093e-05, "loss": 0.4705, "step": 19999 }, { "epoch": 3.2648055181421167, "grad_norm": 2.074276924133301, "learning_rate": 1.8236765814298328e-05, "loss": 0.4871, "step": 20000 }, { "epoch": 3.264968776784621, "grad_norm": 2.0856823921203613, "learning_rate": 1.823658586357514e-05, "loss": 0.4856, "step": 20001 }, { "epoch": 3.2651320354271256, "grad_norm": 1.705482006072998, "learning_rate": 1.8236405904557717e-05, "loss": 0.4139, "step": 20002 }, { "epoch": 3.26529529406963, "grad_norm": 1.6888068914413452, "learning_rate": 1.8236225937246234e-05, "loss": 0.4174, "step": 20003 }, { "epoch": 3.265458552712134, "grad_norm": 2.3524422645568848, "learning_rate": 1.823604596164087e-05, "loss": 0.5801, "step": 20004 }, { "epoch": 3.2656218113546385, "grad_norm": 1.826093316078186, "learning_rate": 1.8235865977741814e-05, "loss": 0.46, "step": 20005 }, { "epoch": 3.265785069997143, "grad_norm": 2.176861524581909, "learning_rate": 1.823568598554924e-05, "loss": 0.4755, "step": 20006 }, { "epoch": 3.2659483286396473, "grad_norm": 2.0666399002075195, "learning_rate": 1.8235505985063335e-05, "loss": 0.4015, "step": 20007 }, { "epoch": 3.266111587282152, "grad_norm": 1.8500133752822876, "learning_rate": 1.8235325976284276e-05, "loss": 0.4296, "step": 20008 }, { "epoch": 3.2662748459246562, "grad_norm": 2.516704797744751, "learning_rate": 1.8235145959212247e-05, "loss": 0.5071, "step": 20009 }, { "epoch": 3.2664381045671607, "grad_norm": 2.1790292263031006, "learning_rate": 1.823496593384743e-05, "loss": 0.4355, "step": 20010 }, { "epoch": 3.266601363209665, "grad_norm": 1.9828853607177734, "learning_rate": 1.8234785900189997e-05, "loss": 0.5155, "step": 20011 }, { "epoch": 3.266764621852169, "grad_norm": 1.6553735733032227, "learning_rate": 1.8234605858240142e-05, "loss": 0.3548, "step": 20012 }, { "epoch": 3.2669278804946735, "grad_norm": 2.1302294731140137, "learning_rate": 1.823442580799804e-05, "loss": 0.4561, "step": 20013 }, { "epoch": 3.267091139137178, "grad_norm": 1.7976449728012085, "learning_rate": 1.8234245749463878e-05, "loss": 0.4475, "step": 20014 }, { "epoch": 3.2672543977796824, "grad_norm": 2.0678367614746094, "learning_rate": 1.8234065682637828e-05, "loss": 0.4595, "step": 20015 }, { "epoch": 3.267417656422187, "grad_norm": 2.210742950439453, "learning_rate": 1.8233885607520077e-05, "loss": 0.5536, "step": 20016 }, { "epoch": 3.2675809150646913, "grad_norm": 2.0639259815216064, "learning_rate": 1.8233705524110806e-05, "loss": 0.4836, "step": 20017 }, { "epoch": 3.2677441737071957, "grad_norm": 2.1362955570220947, "learning_rate": 1.8233525432410193e-05, "loss": 0.4769, "step": 20018 }, { "epoch": 3.2679074323497, "grad_norm": 2.164738655090332, "learning_rate": 1.8233345332418423e-05, "loss": 0.469, "step": 20019 }, { "epoch": 3.2680706909922046, "grad_norm": 1.98214590549469, "learning_rate": 1.823316522413568e-05, "loss": 0.457, "step": 20020 }, { "epoch": 3.268233949634709, "grad_norm": 2.105360269546509, "learning_rate": 1.823298510756214e-05, "loss": 0.4916, "step": 20021 }, { "epoch": 3.268397208277213, "grad_norm": 1.4777368307113647, "learning_rate": 1.8232804982697986e-05, "loss": 0.3576, "step": 20022 }, { "epoch": 3.2685604669197175, "grad_norm": 2.0799720287323, "learning_rate": 1.8232624849543398e-05, "loss": 0.4549, "step": 20023 }, { "epoch": 3.268723725562222, "grad_norm": 2.2404367923736572, "learning_rate": 1.823244470809856e-05, "loss": 0.5565, "step": 20024 }, { "epoch": 3.2688869842047263, "grad_norm": 2.084009885787964, "learning_rate": 1.823226455836365e-05, "loss": 0.4497, "step": 20025 }, { "epoch": 3.269050242847231, "grad_norm": 2.0479583740234375, "learning_rate": 1.8232084400338858e-05, "loss": 0.433, "step": 20026 }, { "epoch": 3.269213501489735, "grad_norm": 2.0356364250183105, "learning_rate": 1.8231904234024353e-05, "loss": 0.4947, "step": 20027 }, { "epoch": 3.2693767601322397, "grad_norm": 3.740813970565796, "learning_rate": 1.8231724059420326e-05, "loss": 0.4971, "step": 20028 }, { "epoch": 3.2695400187747437, "grad_norm": 1.927234411239624, "learning_rate": 1.8231543876526957e-05, "loss": 0.4479, "step": 20029 }, { "epoch": 3.269703277417248, "grad_norm": 1.901902675628662, "learning_rate": 1.8231363685344422e-05, "loss": 0.5018, "step": 20030 }, { "epoch": 3.2698665360597525, "grad_norm": 1.5412687063217163, "learning_rate": 1.8231183485872904e-05, "loss": 0.3996, "step": 20031 }, { "epoch": 3.270029794702257, "grad_norm": 1.5436336994171143, "learning_rate": 1.823100327811259e-05, "loss": 0.3639, "step": 20032 }, { "epoch": 3.2701930533447614, "grad_norm": 2.0425288677215576, "learning_rate": 1.8230823062063657e-05, "loss": 0.4499, "step": 20033 }, { "epoch": 3.270356311987266, "grad_norm": 2.0965516567230225, "learning_rate": 1.8230642837726287e-05, "loss": 0.505, "step": 20034 }, { "epoch": 3.2705195706297703, "grad_norm": 2.4187567234039307, "learning_rate": 1.8230462605100663e-05, "loss": 0.5778, "step": 20035 }, { "epoch": 3.2706828292722747, "grad_norm": 2.020226240158081, "learning_rate": 1.8230282364186964e-05, "loss": 0.5232, "step": 20036 }, { "epoch": 3.270846087914779, "grad_norm": 1.9986402988433838, "learning_rate": 1.8230102114985372e-05, "loss": 0.4471, "step": 20037 }, { "epoch": 3.2710093465572836, "grad_norm": 1.7460289001464844, "learning_rate": 1.822992185749607e-05, "loss": 0.4214, "step": 20038 }, { "epoch": 3.2711726051997876, "grad_norm": 2.2603023052215576, "learning_rate": 1.822974159171924e-05, "loss": 0.5474, "step": 20039 }, { "epoch": 3.271335863842292, "grad_norm": 1.9589605331420898, "learning_rate": 1.822956131765506e-05, "loss": 0.5408, "step": 20040 }, { "epoch": 3.2714991224847965, "grad_norm": 2.3397390842437744, "learning_rate": 1.822938103530372e-05, "loss": 0.5237, "step": 20041 }, { "epoch": 3.271662381127301, "grad_norm": 2.500288963317871, "learning_rate": 1.8229200744665392e-05, "loss": 0.6815, "step": 20042 }, { "epoch": 3.2718256397698053, "grad_norm": 1.8104894161224365, "learning_rate": 1.822902044574026e-05, "loss": 0.463, "step": 20043 }, { "epoch": 3.2719888984123098, "grad_norm": 2.0026133060455322, "learning_rate": 1.8228840138528505e-05, "loss": 0.4287, "step": 20044 }, { "epoch": 3.272152157054814, "grad_norm": 2.0873911380767822, "learning_rate": 1.8228659823030312e-05, "loss": 0.507, "step": 20045 }, { "epoch": 3.2723154156973187, "grad_norm": 2.217996835708618, "learning_rate": 1.822847949924586e-05, "loss": 0.4785, "step": 20046 }, { "epoch": 3.2724786743398226, "grad_norm": 2.120502233505249, "learning_rate": 1.8228299167175335e-05, "loss": 0.4732, "step": 20047 }, { "epoch": 3.272641932982327, "grad_norm": 1.9510513544082642, "learning_rate": 1.822811882681891e-05, "loss": 0.5061, "step": 20048 }, { "epoch": 3.2728051916248315, "grad_norm": 1.8231394290924072, "learning_rate": 1.8227938478176773e-05, "loss": 0.4493, "step": 20049 }, { "epoch": 3.272968450267336, "grad_norm": 1.4969950914382935, "learning_rate": 1.8227758121249108e-05, "loss": 0.4032, "step": 20050 }, { "epoch": 3.2731317089098404, "grad_norm": 2.099907398223877, "learning_rate": 1.8227577756036088e-05, "loss": 0.4742, "step": 20051 }, { "epoch": 3.273294967552345, "grad_norm": 2.081010580062866, "learning_rate": 1.82273973825379e-05, "loss": 0.4078, "step": 20052 }, { "epoch": 3.2734582261948493, "grad_norm": 1.6241044998168945, "learning_rate": 1.8227217000754724e-05, "loss": 0.418, "step": 20053 }, { "epoch": 3.2736214848373537, "grad_norm": 2.2163095474243164, "learning_rate": 1.822703661068675e-05, "loss": 0.513, "step": 20054 }, { "epoch": 3.273784743479858, "grad_norm": 1.5827330350875854, "learning_rate": 1.8226856212334144e-05, "loss": 0.3854, "step": 20055 }, { "epoch": 3.2739480021223626, "grad_norm": 1.8173706531524658, "learning_rate": 1.8226675805697097e-05, "loss": 0.4147, "step": 20056 }, { "epoch": 3.2741112607648666, "grad_norm": 1.9936648607254028, "learning_rate": 1.8226495390775793e-05, "loss": 0.5876, "step": 20057 }, { "epoch": 3.274274519407371, "grad_norm": 2.0162875652313232, "learning_rate": 1.8226314967570408e-05, "loss": 0.4907, "step": 20058 }, { "epoch": 3.2744377780498755, "grad_norm": 1.739646077156067, "learning_rate": 1.822613453608113e-05, "loss": 0.4647, "step": 20059 }, { "epoch": 3.27460103669238, "grad_norm": 2.080221652984619, "learning_rate": 1.8225954096308133e-05, "loss": 0.4954, "step": 20060 }, { "epoch": 3.2747642953348843, "grad_norm": 1.6992954015731812, "learning_rate": 1.82257736482516e-05, "loss": 0.4668, "step": 20061 }, { "epoch": 3.2749275539773888, "grad_norm": 2.004969596862793, "learning_rate": 1.8225593191911717e-05, "loss": 0.4959, "step": 20062 }, { "epoch": 3.275090812619893, "grad_norm": 1.8841824531555176, "learning_rate": 1.8225412727288668e-05, "loss": 0.4989, "step": 20063 }, { "epoch": 3.275254071262397, "grad_norm": 2.0956950187683105, "learning_rate": 1.822523225438263e-05, "loss": 0.5854, "step": 20064 }, { "epoch": 3.2754173299049016, "grad_norm": 1.9369014501571655, "learning_rate": 1.822505177319378e-05, "loss": 0.5071, "step": 20065 }, { "epoch": 3.275580588547406, "grad_norm": 2.343132972717285, "learning_rate": 1.8224871283722308e-05, "loss": 0.5571, "step": 20066 }, { "epoch": 3.2757438471899105, "grad_norm": 1.9612784385681152, "learning_rate": 1.8224690785968392e-05, "loss": 0.5704, "step": 20067 }, { "epoch": 3.275907105832415, "grad_norm": 2.203066110610962, "learning_rate": 1.8224510279932214e-05, "loss": 0.5416, "step": 20068 }, { "epoch": 3.2760703644749194, "grad_norm": 1.4626696109771729, "learning_rate": 1.822432976561396e-05, "loss": 0.3777, "step": 20069 }, { "epoch": 3.276233623117424, "grad_norm": 1.9721506834030151, "learning_rate": 1.8224149243013805e-05, "loss": 0.4949, "step": 20070 }, { "epoch": 3.2763968817599283, "grad_norm": 1.8160955905914307, "learning_rate": 1.8223968712131935e-05, "loss": 0.449, "step": 20071 }, { "epoch": 3.2765601404024327, "grad_norm": 1.7878061532974243, "learning_rate": 1.822378817296853e-05, "loss": 0.4941, "step": 20072 }, { "epoch": 3.276723399044937, "grad_norm": 2.2493250370025635, "learning_rate": 1.822360762552377e-05, "loss": 0.5953, "step": 20073 }, { "epoch": 3.276886657687441, "grad_norm": 2.0596001148223877, "learning_rate": 1.8223427069797845e-05, "loss": 0.485, "step": 20074 }, { "epoch": 3.2770499163299456, "grad_norm": 1.9612470865249634, "learning_rate": 1.8223246505790926e-05, "loss": 0.5369, "step": 20075 }, { "epoch": 3.27721317497245, "grad_norm": 1.670788049697876, "learning_rate": 1.8223065933503203e-05, "loss": 0.4142, "step": 20076 }, { "epoch": 3.2773764336149545, "grad_norm": 2.235863447189331, "learning_rate": 1.8222885352934855e-05, "loss": 0.4741, "step": 20077 }, { "epoch": 3.277539692257459, "grad_norm": 2.035806894302368, "learning_rate": 1.8222704764086062e-05, "loss": 0.6547, "step": 20078 }, { "epoch": 3.2777029508999633, "grad_norm": 1.99791419506073, "learning_rate": 1.8222524166957007e-05, "loss": 0.4919, "step": 20079 }, { "epoch": 3.2778662095424678, "grad_norm": 1.518079161643982, "learning_rate": 1.8222343561547876e-05, "loss": 0.4268, "step": 20080 }, { "epoch": 3.278029468184972, "grad_norm": 1.8950029611587524, "learning_rate": 1.8222162947858842e-05, "loss": 0.5006, "step": 20081 }, { "epoch": 3.278192726827476, "grad_norm": 1.5545259714126587, "learning_rate": 1.8221982325890096e-05, "loss": 0.4228, "step": 20082 }, { "epoch": 3.2783559854699806, "grad_norm": 1.7212026119232178, "learning_rate": 1.8221801695641815e-05, "loss": 0.4151, "step": 20083 }, { "epoch": 3.278519244112485, "grad_norm": 1.9909695386886597, "learning_rate": 1.822162105711418e-05, "loss": 0.4597, "step": 20084 }, { "epoch": 3.2786825027549895, "grad_norm": 2.2306430339813232, "learning_rate": 1.8221440410307375e-05, "loss": 0.5215, "step": 20085 }, { "epoch": 3.278845761397494, "grad_norm": 1.9544486999511719, "learning_rate": 1.8221259755221585e-05, "loss": 0.5158, "step": 20086 }, { "epoch": 3.2790090200399984, "grad_norm": 2.0398809909820557, "learning_rate": 1.8221079091856986e-05, "loss": 0.5124, "step": 20087 }, { "epoch": 3.279172278682503, "grad_norm": 1.7739578485488892, "learning_rate": 1.8220898420213765e-05, "loss": 0.4756, "step": 20088 }, { "epoch": 3.2793355373250073, "grad_norm": 1.7147523164749146, "learning_rate": 1.8220717740292098e-05, "loss": 0.4573, "step": 20089 }, { "epoch": 3.2794987959675117, "grad_norm": 1.900590419769287, "learning_rate": 1.822053705209217e-05, "loss": 0.4107, "step": 20090 }, { "epoch": 3.279662054610016, "grad_norm": 1.624188780784607, "learning_rate": 1.8220356355614164e-05, "loss": 0.383, "step": 20091 }, { "epoch": 3.27982531325252, "grad_norm": 2.528716564178467, "learning_rate": 1.8220175650858264e-05, "loss": 0.4282, "step": 20092 }, { "epoch": 3.2799885718950246, "grad_norm": 2.1227023601531982, "learning_rate": 1.821999493782465e-05, "loss": 0.5069, "step": 20093 }, { "epoch": 3.280151830537529, "grad_norm": 2.1840364933013916, "learning_rate": 1.82198142165135e-05, "loss": 0.5398, "step": 20094 }, { "epoch": 3.2803150891800334, "grad_norm": 1.9213356971740723, "learning_rate": 1.8219633486925002e-05, "loss": 0.4511, "step": 20095 }, { "epoch": 3.280478347822538, "grad_norm": 2.1088271141052246, "learning_rate": 1.8219452749059332e-05, "loss": 0.4822, "step": 20096 }, { "epoch": 3.2806416064650423, "grad_norm": 1.2904629707336426, "learning_rate": 1.821927200291668e-05, "loss": 0.3321, "step": 20097 }, { "epoch": 3.2808048651075468, "grad_norm": 1.9364672899246216, "learning_rate": 1.821909124849722e-05, "loss": 0.5065, "step": 20098 }, { "epoch": 3.280968123750051, "grad_norm": 1.5520272254943848, "learning_rate": 1.8218910485801137e-05, "loss": 0.4151, "step": 20099 }, { "epoch": 3.281131382392555, "grad_norm": 1.8089964389801025, "learning_rate": 1.8218729714828612e-05, "loss": 0.4864, "step": 20100 }, { "epoch": 3.2812946410350596, "grad_norm": 2.0736703872680664, "learning_rate": 1.821854893557983e-05, "loss": 0.5378, "step": 20101 }, { "epoch": 3.281457899677564, "grad_norm": 2.032336473464966, "learning_rate": 1.8218368148054973e-05, "loss": 0.5951, "step": 20102 }, { "epoch": 3.2816211583200685, "grad_norm": 1.846699833869934, "learning_rate": 1.821818735225422e-05, "loss": 0.4242, "step": 20103 }, { "epoch": 3.281784416962573, "grad_norm": 1.880824089050293, "learning_rate": 1.8218006548177752e-05, "loss": 0.4677, "step": 20104 }, { "epoch": 3.2819476756050774, "grad_norm": 1.9091202020645142, "learning_rate": 1.821782573582576e-05, "loss": 0.4918, "step": 20105 }, { "epoch": 3.282110934247582, "grad_norm": 2.014801263809204, "learning_rate": 1.8217644915198415e-05, "loss": 0.4341, "step": 20106 }, { "epoch": 3.2822741928900863, "grad_norm": 2.153175115585327, "learning_rate": 1.8217464086295904e-05, "loss": 0.4343, "step": 20107 }, { "epoch": 3.2824374515325907, "grad_norm": 2.3483288288116455, "learning_rate": 1.821728324911841e-05, "loss": 0.5446, "step": 20108 }, { "epoch": 3.282600710175095, "grad_norm": 1.9040987491607666, "learning_rate": 1.8217102403666112e-05, "loss": 0.501, "step": 20109 }, { "epoch": 3.282763968817599, "grad_norm": 2.272305727005005, "learning_rate": 1.8216921549939196e-05, "loss": 0.4632, "step": 20110 }, { "epoch": 3.2829272274601036, "grad_norm": 2.2432503700256348, "learning_rate": 1.8216740687937843e-05, "loss": 0.518, "step": 20111 }, { "epoch": 3.283090486102608, "grad_norm": 2.347123861312866, "learning_rate": 1.821655981766223e-05, "loss": 0.6775, "step": 20112 }, { "epoch": 3.2832537447451124, "grad_norm": 2.0627970695495605, "learning_rate": 1.8216378939112548e-05, "loss": 0.5765, "step": 20113 }, { "epoch": 3.283417003387617, "grad_norm": 1.9008116722106934, "learning_rate": 1.8216198052288975e-05, "loss": 0.4214, "step": 20114 }, { "epoch": 3.2835802620301213, "grad_norm": 1.9180598258972168, "learning_rate": 1.821601715719169e-05, "loss": 0.5156, "step": 20115 }, { "epoch": 3.2837435206726258, "grad_norm": 2.47414231300354, "learning_rate": 1.8215836253820878e-05, "loss": 0.4505, "step": 20116 }, { "epoch": 3.2839067793151298, "grad_norm": 2.024210214614868, "learning_rate": 1.8215655342176722e-05, "loss": 0.414, "step": 20117 }, { "epoch": 3.284070037957634, "grad_norm": 2.223085641860962, "learning_rate": 1.8215474422259403e-05, "loss": 0.5261, "step": 20118 }, { "epoch": 3.2842332966001386, "grad_norm": 2.263730049133301, "learning_rate": 1.8215293494069103e-05, "loss": 0.5178, "step": 20119 }, { "epoch": 3.284396555242643, "grad_norm": 1.7333790063858032, "learning_rate": 1.8215112557606005e-05, "loss": 0.4842, "step": 20120 }, { "epoch": 3.2845598138851475, "grad_norm": 1.9308565855026245, "learning_rate": 1.821493161287029e-05, "loss": 0.4907, "step": 20121 }, { "epoch": 3.284723072527652, "grad_norm": 1.7676548957824707, "learning_rate": 1.8214750659862143e-05, "loss": 0.4435, "step": 20122 }, { "epoch": 3.2848863311701564, "grad_norm": 2.276116132736206, "learning_rate": 1.8214569698581745e-05, "loss": 0.4861, "step": 20123 }, { "epoch": 3.285049589812661, "grad_norm": 2.0962178707122803, "learning_rate": 1.8214388729029274e-05, "loss": 0.4705, "step": 20124 }, { "epoch": 3.2852128484551653, "grad_norm": 1.6454168558120728, "learning_rate": 1.8214207751204917e-05, "loss": 0.3887, "step": 20125 }, { "epoch": 3.2853761070976697, "grad_norm": 1.8012549877166748, "learning_rate": 1.8214026765108856e-05, "loss": 0.4673, "step": 20126 }, { "epoch": 3.2855393657401737, "grad_norm": 1.468717098236084, "learning_rate": 1.821384577074127e-05, "loss": 0.374, "step": 20127 }, { "epoch": 3.285702624382678, "grad_norm": 1.707858920097351, "learning_rate": 1.8213664768102344e-05, "loss": 0.4496, "step": 20128 }, { "epoch": 3.2858658830251826, "grad_norm": 2.0714807510375977, "learning_rate": 1.8213483757192263e-05, "loss": 0.507, "step": 20129 }, { "epoch": 3.286029141667687, "grad_norm": 1.822707176208496, "learning_rate": 1.8213302738011204e-05, "loss": 0.4013, "step": 20130 }, { "epoch": 3.2861924003101914, "grad_norm": 2.037616014480591, "learning_rate": 1.8213121710559352e-05, "loss": 0.4442, "step": 20131 }, { "epoch": 3.286355658952696, "grad_norm": 2.215130090713501, "learning_rate": 1.8212940674836888e-05, "loss": 0.5564, "step": 20132 }, { "epoch": 3.2865189175952003, "grad_norm": 1.987836480140686, "learning_rate": 1.8212759630843994e-05, "loss": 0.4659, "step": 20133 }, { "epoch": 3.2866821762377048, "grad_norm": 1.7593462467193604, "learning_rate": 1.8212578578580853e-05, "loss": 0.4767, "step": 20134 }, { "epoch": 3.2868454348802087, "grad_norm": 2.0645058155059814, "learning_rate": 1.821239751804765e-05, "loss": 0.5106, "step": 20135 }, { "epoch": 3.287008693522713, "grad_norm": 1.7202552556991577, "learning_rate": 1.8212216449244563e-05, "loss": 0.4314, "step": 20136 }, { "epoch": 3.2871719521652176, "grad_norm": 1.9316314458847046, "learning_rate": 1.8212035372171776e-05, "loss": 0.5437, "step": 20137 }, { "epoch": 3.287335210807722, "grad_norm": 1.9834628105163574, "learning_rate": 1.8211854286829474e-05, "loss": 0.4926, "step": 20138 }, { "epoch": 3.2874984694502265, "grad_norm": 2.2212889194488525, "learning_rate": 1.8211673193217834e-05, "loss": 0.552, "step": 20139 }, { "epoch": 3.287661728092731, "grad_norm": 1.836548089981079, "learning_rate": 1.821149209133704e-05, "loss": 0.4169, "step": 20140 }, { "epoch": 3.2878249867352354, "grad_norm": 1.6052130460739136, "learning_rate": 1.821131098118728e-05, "loss": 0.4305, "step": 20141 }, { "epoch": 3.28798824537774, "grad_norm": 2.022195816040039, "learning_rate": 1.821112986276873e-05, "loss": 0.5458, "step": 20142 }, { "epoch": 3.2881515040202443, "grad_norm": 2.081653356552124, "learning_rate": 1.8210948736081575e-05, "loss": 0.5257, "step": 20143 }, { "epoch": 3.2883147626627487, "grad_norm": 2.1137208938598633, "learning_rate": 1.8210767601125996e-05, "loss": 0.5156, "step": 20144 }, { "epoch": 3.2884780213052527, "grad_norm": 2.0089128017425537, "learning_rate": 1.8210586457902176e-05, "loss": 0.4653, "step": 20145 }, { "epoch": 3.288641279947757, "grad_norm": 1.857908010482788, "learning_rate": 1.82104053064103e-05, "loss": 0.4626, "step": 20146 }, { "epoch": 3.2888045385902616, "grad_norm": 1.9957505464553833, "learning_rate": 1.8210224146650544e-05, "loss": 0.4594, "step": 20147 }, { "epoch": 3.288967797232766, "grad_norm": 1.7859681844711304, "learning_rate": 1.8210042978623095e-05, "loss": 0.4553, "step": 20148 }, { "epoch": 3.2891310558752704, "grad_norm": 1.761387825012207, "learning_rate": 1.8209861802328137e-05, "loss": 0.419, "step": 20149 }, { "epoch": 3.289294314517775, "grad_norm": 1.6536802053451538, "learning_rate": 1.820968061776585e-05, "loss": 0.4303, "step": 20150 }, { "epoch": 3.2894575731602793, "grad_norm": 1.5560230016708374, "learning_rate": 1.8209499424936416e-05, "loss": 0.4018, "step": 20151 }, { "epoch": 3.2896208318027838, "grad_norm": 1.5427839756011963, "learning_rate": 1.820931822384002e-05, "loss": 0.4138, "step": 20152 }, { "epoch": 3.2897840904452877, "grad_norm": 2.0317881107330322, "learning_rate": 1.820913701447684e-05, "loss": 0.5571, "step": 20153 }, { "epoch": 3.289947349087792, "grad_norm": 2.1879284381866455, "learning_rate": 1.8208955796847067e-05, "loss": 0.5039, "step": 20154 }, { "epoch": 3.2901106077302966, "grad_norm": 1.7831050157546997, "learning_rate": 1.8208774570950872e-05, "loss": 0.4482, "step": 20155 }, { "epoch": 3.290273866372801, "grad_norm": 2.342907667160034, "learning_rate": 1.8208593336788443e-05, "loss": 0.5442, "step": 20156 }, { "epoch": 3.2904371250153055, "grad_norm": 2.123445987701416, "learning_rate": 1.8208412094359964e-05, "loss": 0.5116, "step": 20157 }, { "epoch": 3.29060038365781, "grad_norm": 1.7371375560760498, "learning_rate": 1.8208230843665617e-05, "loss": 0.4352, "step": 20158 }, { "epoch": 3.2907636423003144, "grad_norm": 1.4640638828277588, "learning_rate": 1.8208049584705586e-05, "loss": 0.3736, "step": 20159 }, { "epoch": 3.290926900942819, "grad_norm": 2.0285186767578125, "learning_rate": 1.8207868317480046e-05, "loss": 0.4555, "step": 20160 }, { "epoch": 3.2910901595853232, "grad_norm": 2.0785207748413086, "learning_rate": 1.8207687041989185e-05, "loss": 0.4679, "step": 20161 }, { "epoch": 3.2912534182278272, "grad_norm": 1.454455852508545, "learning_rate": 1.820750575823319e-05, "loss": 0.4013, "step": 20162 }, { "epoch": 3.2914166768703317, "grad_norm": 2.08986234664917, "learning_rate": 1.8207324466212237e-05, "loss": 0.5962, "step": 20163 }, { "epoch": 3.291579935512836, "grad_norm": 1.9870102405548096, "learning_rate": 1.8207143165926512e-05, "loss": 0.5236, "step": 20164 }, { "epoch": 3.2917431941553406, "grad_norm": 1.8273261785507202, "learning_rate": 1.8206961857376192e-05, "loss": 0.4625, "step": 20165 }, { "epoch": 3.291906452797845, "grad_norm": 1.834934115409851, "learning_rate": 1.8206780540561463e-05, "loss": 0.5013, "step": 20166 }, { "epoch": 3.2920697114403494, "grad_norm": 2.0871803760528564, "learning_rate": 1.8206599215482512e-05, "loss": 0.5405, "step": 20167 }, { "epoch": 3.292232970082854, "grad_norm": 1.8684810400009155, "learning_rate": 1.8206417882139517e-05, "loss": 0.4602, "step": 20168 }, { "epoch": 3.2923962287253583, "grad_norm": 1.9684937000274658, "learning_rate": 1.8206236540532663e-05, "loss": 0.4555, "step": 20169 }, { "epoch": 3.2925594873678623, "grad_norm": 2.0388641357421875, "learning_rate": 1.8206055190662128e-05, "loss": 0.5197, "step": 20170 }, { "epoch": 3.2927227460103667, "grad_norm": 1.8432976007461548, "learning_rate": 1.82058738325281e-05, "loss": 0.4453, "step": 20171 }, { "epoch": 3.292886004652871, "grad_norm": 2.0878148078918457, "learning_rate": 1.8205692466130754e-05, "loss": 0.5572, "step": 20172 }, { "epoch": 3.2930492632953756, "grad_norm": 2.029425621032715, "learning_rate": 1.8205511091470282e-05, "loss": 0.4735, "step": 20173 }, { "epoch": 3.29321252193788, "grad_norm": 1.7358986139297485, "learning_rate": 1.820532970854686e-05, "loss": 0.4222, "step": 20174 }, { "epoch": 3.2933757805803845, "grad_norm": 2.087656021118164, "learning_rate": 1.8205148317360678e-05, "loss": 0.5036, "step": 20175 }, { "epoch": 3.293539039222889, "grad_norm": 2.0555498600006104, "learning_rate": 1.8204966917911912e-05, "loss": 0.533, "step": 20176 }, { "epoch": 3.2937022978653934, "grad_norm": 2.1591179370880127, "learning_rate": 1.8204785510200746e-05, "loss": 0.6024, "step": 20177 }, { "epoch": 3.293865556507898, "grad_norm": 2.1465988159179688, "learning_rate": 1.820460409422736e-05, "loss": 0.538, "step": 20178 }, { "epoch": 3.2940288151504022, "grad_norm": 1.798044204711914, "learning_rate": 1.8204422669991943e-05, "loss": 0.3972, "step": 20179 }, { "epoch": 3.2941920737929062, "grad_norm": 2.5151102542877197, "learning_rate": 1.8204241237494672e-05, "loss": 0.4483, "step": 20180 }, { "epoch": 3.2943553324354107, "grad_norm": 1.882784128189087, "learning_rate": 1.8204059796735736e-05, "loss": 0.4877, "step": 20181 }, { "epoch": 3.294518591077915, "grad_norm": 2.158917188644409, "learning_rate": 1.8203878347715312e-05, "loss": 0.6121, "step": 20182 }, { "epoch": 3.2946818497204196, "grad_norm": 1.6413520574569702, "learning_rate": 1.8203696890433583e-05, "loss": 0.3805, "step": 20183 }, { "epoch": 3.294845108362924, "grad_norm": 2.263437032699585, "learning_rate": 1.8203515424890738e-05, "loss": 0.5076, "step": 20184 }, { "epoch": 3.2950083670054284, "grad_norm": 1.7173436880111694, "learning_rate": 1.8203333951086953e-05, "loss": 0.449, "step": 20185 }, { "epoch": 3.295171625647933, "grad_norm": 1.8142317533493042, "learning_rate": 1.820315246902241e-05, "loss": 0.4736, "step": 20186 }, { "epoch": 3.2953348842904373, "grad_norm": 1.8805084228515625, "learning_rate": 1.82029709786973e-05, "loss": 0.4712, "step": 20187 }, { "epoch": 3.2954981429329413, "grad_norm": 2.044696807861328, "learning_rate": 1.8202789480111794e-05, "loss": 0.4891, "step": 20188 }, { "epoch": 3.2956614015754457, "grad_norm": 2.1054258346557617, "learning_rate": 1.8202607973266088e-05, "loss": 0.5188, "step": 20189 }, { "epoch": 3.29582466021795, "grad_norm": 1.7776950597763062, "learning_rate": 1.8202426458160353e-05, "loss": 0.4551, "step": 20190 }, { "epoch": 3.2959879188604546, "grad_norm": 2.125070810317993, "learning_rate": 1.820224493479478e-05, "loss": 0.4606, "step": 20191 }, { "epoch": 3.296151177502959, "grad_norm": 1.990598440170288, "learning_rate": 1.8202063403169545e-05, "loss": 0.4628, "step": 20192 }, { "epoch": 3.2963144361454635, "grad_norm": 2.417362928390503, "learning_rate": 1.8201881863284837e-05, "loss": 0.521, "step": 20193 }, { "epoch": 3.296477694787968, "grad_norm": 2.3944833278656006, "learning_rate": 1.8201700315140836e-05, "loss": 0.5267, "step": 20194 }, { "epoch": 3.2966409534304724, "grad_norm": 2.181710958480835, "learning_rate": 1.8201518758737726e-05, "loss": 0.57, "step": 20195 }, { "epoch": 3.296804212072977, "grad_norm": 2.1353211402893066, "learning_rate": 1.8201337194075685e-05, "loss": 0.4694, "step": 20196 }, { "epoch": 3.2969674707154812, "grad_norm": 1.958409309387207, "learning_rate": 1.82011556211549e-05, "loss": 0.485, "step": 20197 }, { "epoch": 3.2971307293579852, "grad_norm": 2.328439474105835, "learning_rate": 1.8200974039975557e-05, "loss": 0.5269, "step": 20198 }, { "epoch": 3.2972939880004897, "grad_norm": 1.912975549697876, "learning_rate": 1.8200792450537833e-05, "loss": 0.5142, "step": 20199 }, { "epoch": 3.297457246642994, "grad_norm": 1.7776461839675903, "learning_rate": 1.8200610852841913e-05, "loss": 0.4619, "step": 20200 }, { "epoch": 3.2976205052854985, "grad_norm": 2.0804450511932373, "learning_rate": 1.8200429246887982e-05, "loss": 0.522, "step": 20201 }, { "epoch": 3.297783763928003, "grad_norm": 1.9461050033569336, "learning_rate": 1.8200247632676218e-05, "loss": 0.5428, "step": 20202 }, { "epoch": 3.2979470225705074, "grad_norm": 1.9572941064834595, "learning_rate": 1.8200066010206807e-05, "loss": 0.5628, "step": 20203 }, { "epoch": 3.298110281213012, "grad_norm": 2.093688488006592, "learning_rate": 1.8199884379479937e-05, "loss": 0.5328, "step": 20204 }, { "epoch": 3.298273539855516, "grad_norm": 2.040679931640625, "learning_rate": 1.819970274049578e-05, "loss": 0.5144, "step": 20205 }, { "epoch": 3.2984367984980203, "grad_norm": 2.35945200920105, "learning_rate": 1.8199521093254524e-05, "loss": 0.5004, "step": 20206 }, { "epoch": 3.2986000571405247, "grad_norm": 2.047131061553955, "learning_rate": 1.8199339437756355e-05, "loss": 0.5816, "step": 20207 }, { "epoch": 3.298763315783029, "grad_norm": 2.156564235687256, "learning_rate": 1.8199157774001454e-05, "loss": 0.5201, "step": 20208 }, { "epoch": 3.2989265744255336, "grad_norm": 2.355180501937866, "learning_rate": 1.819897610199e-05, "loss": 0.4845, "step": 20209 }, { "epoch": 3.299089833068038, "grad_norm": 1.8646215200424194, "learning_rate": 1.8198794421722183e-05, "loss": 0.4561, "step": 20210 }, { "epoch": 3.2992530917105425, "grad_norm": 1.8094708919525146, "learning_rate": 1.819861273319818e-05, "loss": 0.3914, "step": 20211 }, { "epoch": 3.299416350353047, "grad_norm": 2.1986169815063477, "learning_rate": 1.8198431036418178e-05, "loss": 0.4977, "step": 20212 }, { "epoch": 3.2995796089955514, "grad_norm": 1.6623684167861938, "learning_rate": 1.8198249331382357e-05, "loss": 0.447, "step": 20213 }, { "epoch": 3.299742867638056, "grad_norm": 1.6733142137527466, "learning_rate": 1.81980676180909e-05, "loss": 0.3846, "step": 20214 }, { "epoch": 3.29990612628056, "grad_norm": 1.752639651298523, "learning_rate": 1.819788589654399e-05, "loss": 0.4486, "step": 20215 }, { "epoch": 3.3000693849230642, "grad_norm": 2.098701000213623, "learning_rate": 1.8197704166741815e-05, "loss": 0.4905, "step": 20216 }, { "epoch": 3.3002326435655687, "grad_norm": 2.1602888107299805, "learning_rate": 1.8197522428684554e-05, "loss": 0.4721, "step": 20217 }, { "epoch": 3.300395902208073, "grad_norm": 1.9066669940948486, "learning_rate": 1.8197340682372387e-05, "loss": 0.4748, "step": 20218 }, { "epoch": 3.3005591608505775, "grad_norm": 1.9743196964263916, "learning_rate": 1.8197158927805502e-05, "loss": 0.4128, "step": 20219 }, { "epoch": 3.300722419493082, "grad_norm": 2.244410514831543, "learning_rate": 1.819697716498408e-05, "loss": 0.5329, "step": 20220 }, { "epoch": 3.3008856781355864, "grad_norm": 1.7245208024978638, "learning_rate": 1.8196795393908303e-05, "loss": 0.3886, "step": 20221 }, { "epoch": 3.301048936778091, "grad_norm": 1.7552282810211182, "learning_rate": 1.819661361457836e-05, "loss": 0.4256, "step": 20222 }, { "epoch": 3.301212195420595, "grad_norm": 2.3076891899108887, "learning_rate": 1.819643182699442e-05, "loss": 0.4913, "step": 20223 }, { "epoch": 3.3013754540630993, "grad_norm": 2.0976948738098145, "learning_rate": 1.819625003115668e-05, "loss": 0.507, "step": 20224 }, { "epoch": 3.3015387127056037, "grad_norm": 1.9611387252807617, "learning_rate": 1.819606822706532e-05, "loss": 0.4419, "step": 20225 }, { "epoch": 3.301701971348108, "grad_norm": 1.9190988540649414, "learning_rate": 1.819588641472052e-05, "loss": 0.4671, "step": 20226 }, { "epoch": 3.3018652299906126, "grad_norm": 2.033280849456787, "learning_rate": 1.819570459412247e-05, "loss": 0.4943, "step": 20227 }, { "epoch": 3.302028488633117, "grad_norm": 2.3793697357177734, "learning_rate": 1.819552276527134e-05, "loss": 0.5927, "step": 20228 }, { "epoch": 3.3021917472756215, "grad_norm": 2.764160394668579, "learning_rate": 1.819534092816732e-05, "loss": 0.5236, "step": 20229 }, { "epoch": 3.302355005918126, "grad_norm": 2.3788630962371826, "learning_rate": 1.81951590828106e-05, "loss": 0.488, "step": 20230 }, { "epoch": 3.3025182645606304, "grad_norm": 2.2068068981170654, "learning_rate": 1.8194977229201357e-05, "loss": 0.5094, "step": 20231 }, { "epoch": 3.302681523203135, "grad_norm": 1.5841877460479736, "learning_rate": 1.8194795367339768e-05, "loss": 0.4286, "step": 20232 }, { "epoch": 3.302844781845639, "grad_norm": 2.0626273155212402, "learning_rate": 1.8194613497226025e-05, "loss": 0.478, "step": 20233 }, { "epoch": 3.3030080404881432, "grad_norm": 1.678049921989441, "learning_rate": 1.819443161886031e-05, "loss": 0.4473, "step": 20234 }, { "epoch": 3.3031712991306477, "grad_norm": 1.7983901500701904, "learning_rate": 1.8194249732242805e-05, "loss": 0.4575, "step": 20235 }, { "epoch": 3.303334557773152, "grad_norm": 1.8457368612289429, "learning_rate": 1.819406783737369e-05, "loss": 0.4323, "step": 20236 }, { "epoch": 3.3034978164156565, "grad_norm": 1.651836633682251, "learning_rate": 1.8193885934253154e-05, "loss": 0.4665, "step": 20237 }, { "epoch": 3.303661075058161, "grad_norm": 1.7980703115463257, "learning_rate": 1.8193704022881375e-05, "loss": 0.4891, "step": 20238 }, { "epoch": 3.3038243337006654, "grad_norm": 1.8539544343948364, "learning_rate": 1.8193522103258535e-05, "loss": 0.5375, "step": 20239 }, { "epoch": 3.30398759234317, "grad_norm": 2.05081844329834, "learning_rate": 1.8193340175384826e-05, "loss": 0.4946, "step": 20240 }, { "epoch": 3.304150850985674, "grad_norm": 2.1892242431640625, "learning_rate": 1.8193158239260423e-05, "loss": 0.5144, "step": 20241 }, { "epoch": 3.3043141096281783, "grad_norm": 2.3281214237213135, "learning_rate": 1.8192976294885513e-05, "loss": 0.5273, "step": 20242 }, { "epoch": 3.3044773682706827, "grad_norm": 1.9355599880218506, "learning_rate": 1.8192794342260274e-05, "loss": 0.5062, "step": 20243 }, { "epoch": 3.304640626913187, "grad_norm": 1.9990801811218262, "learning_rate": 1.81926123813849e-05, "loss": 0.4929, "step": 20244 }, { "epoch": 3.3048038855556916, "grad_norm": 1.7260493040084839, "learning_rate": 1.8192430412259563e-05, "loss": 0.432, "step": 20245 }, { "epoch": 3.304967144198196, "grad_norm": 1.780877947807312, "learning_rate": 1.819224843488445e-05, "loss": 0.4077, "step": 20246 }, { "epoch": 3.3051304028407005, "grad_norm": 2.0693681240081787, "learning_rate": 1.8192066449259748e-05, "loss": 0.4408, "step": 20247 }, { "epoch": 3.305293661483205, "grad_norm": 1.9886529445648193, "learning_rate": 1.8191884455385635e-05, "loss": 0.4219, "step": 20248 }, { "epoch": 3.3054569201257094, "grad_norm": 2.026374101638794, "learning_rate": 1.8191702453262298e-05, "loss": 0.5022, "step": 20249 }, { "epoch": 3.305620178768214, "grad_norm": 1.6957908868789673, "learning_rate": 1.819152044288992e-05, "loss": 0.4047, "step": 20250 }, { "epoch": 3.305783437410718, "grad_norm": 1.8096919059753418, "learning_rate": 1.819133842426868e-05, "loss": 0.4, "step": 20251 }, { "epoch": 3.3059466960532222, "grad_norm": 2.2177138328552246, "learning_rate": 1.8191156397398768e-05, "loss": 0.5085, "step": 20252 }, { "epoch": 3.3061099546957267, "grad_norm": 2.1266701221466064, "learning_rate": 1.819097436228036e-05, "loss": 0.476, "step": 20253 }, { "epoch": 3.306273213338231, "grad_norm": 1.619235873222351, "learning_rate": 1.8190792318913644e-05, "loss": 0.4659, "step": 20254 }, { "epoch": 3.3064364719807355, "grad_norm": 1.5429290533065796, "learning_rate": 1.8190610267298803e-05, "loss": 0.4583, "step": 20255 }, { "epoch": 3.30659973062324, "grad_norm": 1.8074283599853516, "learning_rate": 1.819042820743602e-05, "loss": 0.4287, "step": 20256 }, { "epoch": 3.3067629892657444, "grad_norm": 2.1002631187438965, "learning_rate": 1.8190246139325478e-05, "loss": 0.5695, "step": 20257 }, { "epoch": 3.3069262479082484, "grad_norm": 2.1061367988586426, "learning_rate": 1.8190064062967357e-05, "loss": 0.5141, "step": 20258 }, { "epoch": 3.307089506550753, "grad_norm": 1.9210373163223267, "learning_rate": 1.818988197836185e-05, "loss": 0.4903, "step": 20259 }, { "epoch": 3.3072527651932573, "grad_norm": 1.8339629173278809, "learning_rate": 1.8189699885509128e-05, "loss": 0.4001, "step": 20260 }, { "epoch": 3.3074160238357617, "grad_norm": 1.98940908908844, "learning_rate": 1.818951778440938e-05, "loss": 0.4704, "step": 20261 }, { "epoch": 3.307579282478266, "grad_norm": 1.929509162902832, "learning_rate": 1.8189335675062797e-05, "loss": 0.4982, "step": 20262 }, { "epoch": 3.3077425411207706, "grad_norm": 1.6327989101409912, "learning_rate": 1.818915355746955e-05, "loss": 0.3839, "step": 20263 }, { "epoch": 3.307905799763275, "grad_norm": 2.104438066482544, "learning_rate": 1.818897143162983e-05, "loss": 0.4982, "step": 20264 }, { "epoch": 3.3080690584057795, "grad_norm": 1.798417091369629, "learning_rate": 1.8188789297543814e-05, "loss": 0.438, "step": 20265 }, { "epoch": 3.308232317048284, "grad_norm": 1.7603328227996826, "learning_rate": 1.818860715521169e-05, "loss": 0.4177, "step": 20266 }, { "epoch": 3.3083955756907883, "grad_norm": 1.967347264289856, "learning_rate": 1.818842500463364e-05, "loss": 0.4511, "step": 20267 }, { "epoch": 3.3085588343332923, "grad_norm": 1.342400312423706, "learning_rate": 1.8188242845809853e-05, "loss": 0.3604, "step": 20268 }, { "epoch": 3.308722092975797, "grad_norm": 1.8954020738601685, "learning_rate": 1.8188060678740508e-05, "loss": 0.457, "step": 20269 }, { "epoch": 3.308885351618301, "grad_norm": 1.972973346710205, "learning_rate": 1.818787850342578e-05, "loss": 0.4857, "step": 20270 }, { "epoch": 3.3090486102608057, "grad_norm": 1.6970702409744263, "learning_rate": 1.818769631986587e-05, "loss": 0.3999, "step": 20271 }, { "epoch": 3.30921186890331, "grad_norm": 2.1506965160369873, "learning_rate": 1.8187514128060946e-05, "loss": 0.4813, "step": 20272 }, { "epoch": 3.3093751275458145, "grad_norm": 2.001246452331543, "learning_rate": 1.81873319280112e-05, "loss": 0.5072, "step": 20273 }, { "epoch": 3.309538386188319, "grad_norm": 2.2016608715057373, "learning_rate": 1.8187149719716812e-05, "loss": 0.5725, "step": 20274 }, { "epoch": 3.3097016448308234, "grad_norm": 2.09468150138855, "learning_rate": 1.8186967503177964e-05, "loss": 0.5465, "step": 20275 }, { "epoch": 3.3098649034733274, "grad_norm": 2.309565544128418, "learning_rate": 1.8186785278394846e-05, "loss": 0.5273, "step": 20276 }, { "epoch": 3.310028162115832, "grad_norm": 2.0220584869384766, "learning_rate": 1.8186603045367638e-05, "loss": 0.4667, "step": 20277 }, { "epoch": 3.3101914207583363, "grad_norm": 1.6845120191574097, "learning_rate": 1.818642080409652e-05, "loss": 0.4038, "step": 20278 }, { "epoch": 3.3103546794008407, "grad_norm": 1.8602248430252075, "learning_rate": 1.8186238554581676e-05, "loss": 0.5054, "step": 20279 }, { "epoch": 3.310517938043345, "grad_norm": 1.8046118021011353, "learning_rate": 1.8186056296823297e-05, "loss": 0.4781, "step": 20280 }, { "epoch": 3.3106811966858496, "grad_norm": 1.6724293231964111, "learning_rate": 1.818587403082156e-05, "loss": 0.4433, "step": 20281 }, { "epoch": 3.310844455328354, "grad_norm": 1.8891240358352661, "learning_rate": 1.818569175657665e-05, "loss": 0.4118, "step": 20282 }, { "epoch": 3.3110077139708585, "grad_norm": 2.1405158042907715, "learning_rate": 1.818550947408875e-05, "loss": 0.4467, "step": 20283 }, { "epoch": 3.311170972613363, "grad_norm": 1.9433629512786865, "learning_rate": 1.8185327183358043e-05, "loss": 0.5572, "step": 20284 }, { "epoch": 3.3113342312558673, "grad_norm": 1.897985577583313, "learning_rate": 1.8185144884384715e-05, "loss": 0.4335, "step": 20285 }, { "epoch": 3.3114974898983713, "grad_norm": 2.0858075618743896, "learning_rate": 1.8184962577168947e-05, "loss": 0.4809, "step": 20286 }, { "epoch": 3.3116607485408758, "grad_norm": 1.869234561920166, "learning_rate": 1.818478026171093e-05, "loss": 0.4905, "step": 20287 }, { "epoch": 3.31182400718338, "grad_norm": 3.120375633239746, "learning_rate": 1.8184597938010832e-05, "loss": 0.4453, "step": 20288 }, { "epoch": 3.3119872658258847, "grad_norm": 2.3554718494415283, "learning_rate": 1.818441560606885e-05, "loss": 0.5622, "step": 20289 }, { "epoch": 3.312150524468389, "grad_norm": 2.1762337684631348, "learning_rate": 1.8184233265885165e-05, "loss": 0.54, "step": 20290 }, { "epoch": 3.3123137831108935, "grad_norm": 1.6645441055297852, "learning_rate": 1.818405091745996e-05, "loss": 0.4225, "step": 20291 }, { "epoch": 3.312477041753398, "grad_norm": 1.970054030418396, "learning_rate": 1.8183868560793416e-05, "loss": 0.4772, "step": 20292 }, { "epoch": 3.312640300395902, "grad_norm": 2.4644649028778076, "learning_rate": 1.8183686195885715e-05, "loss": 0.4811, "step": 20293 }, { "epoch": 3.3128035590384064, "grad_norm": 2.0564961433410645, "learning_rate": 1.818350382273705e-05, "loss": 0.5475, "step": 20294 }, { "epoch": 3.312966817680911, "grad_norm": 1.8276058435440063, "learning_rate": 1.8183321441347598e-05, "loss": 0.4526, "step": 20295 }, { "epoch": 3.3131300763234153, "grad_norm": 1.9840835332870483, "learning_rate": 1.8183139051717544e-05, "loss": 0.4919, "step": 20296 }, { "epoch": 3.3132933349659197, "grad_norm": 1.8413515090942383, "learning_rate": 1.8182956653847068e-05, "loss": 0.4583, "step": 20297 }, { "epoch": 3.313456593608424, "grad_norm": 1.9741758108139038, "learning_rate": 1.8182774247736358e-05, "loss": 0.4824, "step": 20298 }, { "epoch": 3.3136198522509286, "grad_norm": 1.8202356100082397, "learning_rate": 1.8182591833385596e-05, "loss": 0.4216, "step": 20299 }, { "epoch": 3.313783110893433, "grad_norm": 1.721514105796814, "learning_rate": 1.818240941079497e-05, "loss": 0.4122, "step": 20300 }, { "epoch": 3.3139463695359375, "grad_norm": 1.6179980039596558, "learning_rate": 1.8182226979964656e-05, "loss": 0.4119, "step": 20301 }, { "epoch": 3.314109628178442, "grad_norm": 1.994858980178833, "learning_rate": 1.818204454089484e-05, "loss": 0.465, "step": 20302 }, { "epoch": 3.314272886820946, "grad_norm": 2.2130825519561768, "learning_rate": 1.818186209358571e-05, "loss": 0.598, "step": 20303 }, { "epoch": 3.3144361454634503, "grad_norm": 1.7542930841445923, "learning_rate": 1.818167963803745e-05, "loss": 0.5024, "step": 20304 }, { "epoch": 3.3145994041059548, "grad_norm": 1.8106650114059448, "learning_rate": 1.8181497174250236e-05, "loss": 0.4298, "step": 20305 }, { "epoch": 3.314762662748459, "grad_norm": 2.1433095932006836, "learning_rate": 1.818131470222426e-05, "loss": 0.6041, "step": 20306 }, { "epoch": 3.3149259213909636, "grad_norm": 1.9115753173828125, "learning_rate": 1.8181132221959697e-05, "loss": 0.5058, "step": 20307 }, { "epoch": 3.315089180033468, "grad_norm": 2.333846092224121, "learning_rate": 1.818094973345674e-05, "loss": 0.5199, "step": 20308 }, { "epoch": 3.3152524386759725, "grad_norm": 1.5810552835464478, "learning_rate": 1.818076723671557e-05, "loss": 0.3875, "step": 20309 }, { "epoch": 3.315415697318477, "grad_norm": 1.8517199754714966, "learning_rate": 1.8180584731736367e-05, "loss": 0.4287, "step": 20310 }, { "epoch": 3.315578955960981, "grad_norm": 1.8237066268920898, "learning_rate": 1.8180402218519317e-05, "loss": 0.4556, "step": 20311 }, { "epoch": 3.3157422146034854, "grad_norm": 1.785017967224121, "learning_rate": 1.8180219697064606e-05, "loss": 0.4756, "step": 20312 }, { "epoch": 3.31590547324599, "grad_norm": 2.221456289291382, "learning_rate": 1.8180037167372416e-05, "loss": 0.5112, "step": 20313 }, { "epoch": 3.3160687318884943, "grad_norm": 1.8234442472457886, "learning_rate": 1.817985462944293e-05, "loss": 0.4584, "step": 20314 }, { "epoch": 3.3162319905309987, "grad_norm": 1.8810482025146484, "learning_rate": 1.817967208327633e-05, "loss": 0.4517, "step": 20315 }, { "epoch": 3.316395249173503, "grad_norm": 1.8180490732192993, "learning_rate": 1.8179489528872808e-05, "loss": 0.4462, "step": 20316 }, { "epoch": 3.3165585078160076, "grad_norm": 1.9956610202789307, "learning_rate": 1.817930696623254e-05, "loss": 0.5531, "step": 20317 }, { "epoch": 3.316721766458512, "grad_norm": 1.8160871267318726, "learning_rate": 1.817912439535571e-05, "loss": 0.455, "step": 20318 }, { "epoch": 3.3168850251010165, "grad_norm": 1.8581857681274414, "learning_rate": 1.8178941816242502e-05, "loss": 0.4522, "step": 20319 }, { "epoch": 3.317048283743521, "grad_norm": 1.8244327306747437, "learning_rate": 1.8178759228893108e-05, "loss": 0.4905, "step": 20320 }, { "epoch": 3.317211542386025, "grad_norm": 2.0474953651428223, "learning_rate": 1.81785766333077e-05, "loss": 0.4392, "step": 20321 }, { "epoch": 3.3173748010285293, "grad_norm": 2.1329870223999023, "learning_rate": 1.8178394029486473e-05, "loss": 0.5232, "step": 20322 }, { "epoch": 3.3175380596710338, "grad_norm": 1.6874444484710693, "learning_rate": 1.81782114174296e-05, "loss": 0.4342, "step": 20323 }, { "epoch": 3.317701318313538, "grad_norm": 2.0243730545043945, "learning_rate": 1.8178028797137273e-05, "loss": 0.483, "step": 20324 }, { "epoch": 3.3178645769560426, "grad_norm": 2.2408833503723145, "learning_rate": 1.8177846168609674e-05, "loss": 0.5728, "step": 20325 }, { "epoch": 3.318027835598547, "grad_norm": 1.7401039600372314, "learning_rate": 1.8177663531846983e-05, "loss": 0.4262, "step": 20326 }, { "epoch": 3.3181910942410515, "grad_norm": 1.889311671257019, "learning_rate": 1.817748088684939e-05, "loss": 0.463, "step": 20327 }, { "epoch": 3.318354352883556, "grad_norm": 2.1338582038879395, "learning_rate": 1.8177298233617075e-05, "loss": 0.4672, "step": 20328 }, { "epoch": 3.31851761152606, "grad_norm": 1.7884019613265991, "learning_rate": 1.8177115572150224e-05, "loss": 0.4643, "step": 20329 }, { "epoch": 3.3186808701685644, "grad_norm": 2.3842222690582275, "learning_rate": 1.8176932902449017e-05, "loss": 0.5795, "step": 20330 }, { "epoch": 3.318844128811069, "grad_norm": 2.0559210777282715, "learning_rate": 1.8176750224513643e-05, "loss": 0.4473, "step": 20331 }, { "epoch": 3.3190073874535733, "grad_norm": 1.8986507654190063, "learning_rate": 1.8176567538344283e-05, "loss": 0.4554, "step": 20332 }, { "epoch": 3.3191706460960777, "grad_norm": 2.023102045059204, "learning_rate": 1.8176384843941122e-05, "loss": 0.5893, "step": 20333 }, { "epoch": 3.319333904738582, "grad_norm": 1.5739802122116089, "learning_rate": 1.8176202141304343e-05, "loss": 0.3961, "step": 20334 }, { "epoch": 3.3194971633810866, "grad_norm": 1.8679440021514893, "learning_rate": 1.817601943043413e-05, "loss": 0.452, "step": 20335 }, { "epoch": 3.319660422023591, "grad_norm": 2.132244348526001, "learning_rate": 1.8175836711330668e-05, "loss": 0.4883, "step": 20336 }, { "epoch": 3.3198236806660955, "grad_norm": 1.9089148044586182, "learning_rate": 1.817565398399414e-05, "loss": 0.5156, "step": 20337 }, { "epoch": 3.3199869393086, "grad_norm": 2.0282983779907227, "learning_rate": 1.817547124842473e-05, "loss": 0.4556, "step": 20338 }, { "epoch": 3.320150197951104, "grad_norm": 2.5340042114257812, "learning_rate": 1.8175288504622625e-05, "loss": 0.537, "step": 20339 }, { "epoch": 3.3203134565936083, "grad_norm": 1.923071026802063, "learning_rate": 1.8175105752588006e-05, "loss": 0.4567, "step": 20340 }, { "epoch": 3.3204767152361128, "grad_norm": 1.3893684148788452, "learning_rate": 1.8174922992321057e-05, "loss": 0.3887, "step": 20341 }, { "epoch": 3.320639973878617, "grad_norm": 1.6957625150680542, "learning_rate": 1.8174740223821965e-05, "loss": 0.4009, "step": 20342 }, { "epoch": 3.3208032325211216, "grad_norm": 1.9410983324050903, "learning_rate": 1.8174557447090908e-05, "loss": 0.4342, "step": 20343 }, { "epoch": 3.320966491163626, "grad_norm": 1.8006094694137573, "learning_rate": 1.8174374662128076e-05, "loss": 0.4527, "step": 20344 }, { "epoch": 3.3211297498061305, "grad_norm": 1.9048916101455688, "learning_rate": 1.817419186893365e-05, "loss": 0.4374, "step": 20345 }, { "epoch": 3.3212930084486345, "grad_norm": 2.0203146934509277, "learning_rate": 1.8174009067507818e-05, "loss": 0.4719, "step": 20346 }, { "epoch": 3.321456267091139, "grad_norm": 1.9150710105895996, "learning_rate": 1.8173826257850758e-05, "loss": 0.4904, "step": 20347 }, { "epoch": 3.3216195257336434, "grad_norm": 1.9600164890289307, "learning_rate": 1.8173643439962658e-05, "loss": 0.4938, "step": 20348 }, { "epoch": 3.321782784376148, "grad_norm": 1.7538836002349854, "learning_rate": 1.81734606138437e-05, "loss": 0.4833, "step": 20349 }, { "epoch": 3.3219460430186523, "grad_norm": 1.7395457029342651, "learning_rate": 1.817327777949407e-05, "loss": 0.427, "step": 20350 }, { "epoch": 3.3221093016611567, "grad_norm": 1.6313354969024658, "learning_rate": 1.8173094936913952e-05, "loss": 0.4196, "step": 20351 }, { "epoch": 3.322272560303661, "grad_norm": 1.884286642074585, "learning_rate": 1.817291208610353e-05, "loss": 0.4704, "step": 20352 }, { "epoch": 3.3224358189461656, "grad_norm": 1.8478251695632935, "learning_rate": 1.8172729227062986e-05, "loss": 0.4083, "step": 20353 }, { "epoch": 3.32259907758867, "grad_norm": 1.6432524919509888, "learning_rate": 1.817254635979251e-05, "loss": 0.4687, "step": 20354 }, { "epoch": 3.3227623362311745, "grad_norm": 2.3702547550201416, "learning_rate": 1.8172363484292278e-05, "loss": 0.66, "step": 20355 }, { "epoch": 3.3229255948736784, "grad_norm": 1.6041650772094727, "learning_rate": 1.8172180600562478e-05, "loss": 0.3956, "step": 20356 }, { "epoch": 3.323088853516183, "grad_norm": 1.9689428806304932, "learning_rate": 1.8171997708603295e-05, "loss": 0.4668, "step": 20357 }, { "epoch": 3.3232521121586873, "grad_norm": 2.2656941413879395, "learning_rate": 1.8171814808414912e-05, "loss": 0.4591, "step": 20358 }, { "epoch": 3.3234153708011918, "grad_norm": 1.7730982303619385, "learning_rate": 1.8171631899997516e-05, "loss": 0.4635, "step": 20359 }, { "epoch": 3.323578629443696, "grad_norm": 1.696735143661499, "learning_rate": 1.8171448983351284e-05, "loss": 0.5173, "step": 20360 }, { "epoch": 3.3237418880862006, "grad_norm": 1.5843404531478882, "learning_rate": 1.817126605847641e-05, "loss": 0.4446, "step": 20361 }, { "epoch": 3.323905146728705, "grad_norm": 1.7821357250213623, "learning_rate": 1.8171083125373073e-05, "loss": 0.3998, "step": 20362 }, { "epoch": 3.3240684053712095, "grad_norm": 2.001666307449341, "learning_rate": 1.8170900184041454e-05, "loss": 0.4344, "step": 20363 }, { "epoch": 3.3242316640137135, "grad_norm": 1.8334686756134033, "learning_rate": 1.8170717234481744e-05, "loss": 0.4878, "step": 20364 }, { "epoch": 3.324394922656218, "grad_norm": 1.4344563484191895, "learning_rate": 1.817053427669412e-05, "loss": 0.4014, "step": 20365 }, { "epoch": 3.3245581812987224, "grad_norm": 1.8907151222229004, "learning_rate": 1.8170351310678777e-05, "loss": 0.4915, "step": 20366 }, { "epoch": 3.324721439941227, "grad_norm": 2.100950002670288, "learning_rate": 1.8170168336435886e-05, "loss": 0.4805, "step": 20367 }, { "epoch": 3.3248846985837313, "grad_norm": 1.7900173664093018, "learning_rate": 1.816998535396564e-05, "loss": 0.4133, "step": 20368 }, { "epoch": 3.3250479572262357, "grad_norm": 2.4166781902313232, "learning_rate": 1.8169802363268222e-05, "loss": 0.5121, "step": 20369 }, { "epoch": 3.32521121586874, "grad_norm": 1.8592642545700073, "learning_rate": 1.8169619364343814e-05, "loss": 0.4447, "step": 20370 }, { "epoch": 3.3253744745112446, "grad_norm": 2.234452486038208, "learning_rate": 1.8169436357192602e-05, "loss": 0.5525, "step": 20371 }, { "epoch": 3.325537733153749, "grad_norm": 1.5292949676513672, "learning_rate": 1.8169253341814766e-05, "loss": 0.4186, "step": 20372 }, { "epoch": 3.3257009917962534, "grad_norm": 2.250293493270874, "learning_rate": 1.81690703182105e-05, "loss": 0.5165, "step": 20373 }, { "epoch": 3.3258642504387574, "grad_norm": 1.7341219186782837, "learning_rate": 1.816888728637998e-05, "loss": 0.4464, "step": 20374 }, { "epoch": 3.326027509081262, "grad_norm": 1.8987751007080078, "learning_rate": 1.816870424632339e-05, "loss": 0.4408, "step": 20375 }, { "epoch": 3.3261907677237663, "grad_norm": 1.8574254512786865, "learning_rate": 1.816852119804092e-05, "loss": 0.4115, "step": 20376 }, { "epoch": 3.3263540263662708, "grad_norm": 1.792920708656311, "learning_rate": 1.8168338141532752e-05, "loss": 0.4868, "step": 20377 }, { "epoch": 3.326517285008775, "grad_norm": 2.088862180709839, "learning_rate": 1.8168155076799068e-05, "loss": 0.4887, "step": 20378 }, { "epoch": 3.3266805436512796, "grad_norm": 1.9212536811828613, "learning_rate": 1.8167972003840054e-05, "loss": 0.438, "step": 20379 }, { "epoch": 3.326843802293784, "grad_norm": 2.1211204528808594, "learning_rate": 1.8167788922655893e-05, "loss": 0.486, "step": 20380 }, { "epoch": 3.3270070609362885, "grad_norm": 1.939491629600525, "learning_rate": 1.8167605833246774e-05, "loss": 0.4845, "step": 20381 }, { "epoch": 3.3271703195787925, "grad_norm": 2.803906202316284, "learning_rate": 1.8167422735612877e-05, "loss": 0.6278, "step": 20382 }, { "epoch": 3.327333578221297, "grad_norm": 2.3518567085266113, "learning_rate": 1.8167239629754384e-05, "loss": 0.5842, "step": 20383 }, { "epoch": 3.3274968368638014, "grad_norm": 2.4680986404418945, "learning_rate": 1.8167056515671487e-05, "loss": 0.4801, "step": 20384 }, { "epoch": 3.327660095506306, "grad_norm": 2.1042282581329346, "learning_rate": 1.8166873393364364e-05, "loss": 0.5164, "step": 20385 }, { "epoch": 3.3278233541488103, "grad_norm": 2.0070688724517822, "learning_rate": 1.81666902628332e-05, "loss": 0.4965, "step": 20386 }, { "epoch": 3.3279866127913147, "grad_norm": 2.1696267127990723, "learning_rate": 1.816650712407819e-05, "loss": 0.5453, "step": 20387 }, { "epoch": 3.328149871433819, "grad_norm": 2.0343146324157715, "learning_rate": 1.81663239770995e-05, "loss": 0.479, "step": 20388 }, { "epoch": 3.3283131300763236, "grad_norm": 2.405569553375244, "learning_rate": 1.8166140821897327e-05, "loss": 0.5736, "step": 20389 }, { "epoch": 3.328476388718828, "grad_norm": 2.2891407012939453, "learning_rate": 1.8165957658471854e-05, "loss": 0.537, "step": 20390 }, { "epoch": 3.328639647361332, "grad_norm": 2.080235242843628, "learning_rate": 1.8165774486823262e-05, "loss": 0.4772, "step": 20391 }, { "epoch": 3.3288029060038364, "grad_norm": 1.91862154006958, "learning_rate": 1.8165591306951736e-05, "loss": 0.4361, "step": 20392 }, { "epoch": 3.328966164646341, "grad_norm": 2.512232542037964, "learning_rate": 1.8165408118857465e-05, "loss": 0.5214, "step": 20393 }, { "epoch": 3.3291294232888453, "grad_norm": 2.0169804096221924, "learning_rate": 1.8165224922540624e-05, "loss": 0.4475, "step": 20394 }, { "epoch": 3.3292926819313498, "grad_norm": 1.691528081893921, "learning_rate": 1.816504171800141e-05, "loss": 0.4775, "step": 20395 }, { "epoch": 3.329455940573854, "grad_norm": 2.761024236679077, "learning_rate": 1.816485850524e-05, "loss": 0.4676, "step": 20396 }, { "epoch": 3.3296191992163586, "grad_norm": 2.087334394454956, "learning_rate": 1.816467528425658e-05, "loss": 0.5417, "step": 20397 }, { "epoch": 3.329782457858863, "grad_norm": 1.6557435989379883, "learning_rate": 1.816449205505133e-05, "loss": 0.4164, "step": 20398 }, { "epoch": 3.329945716501367, "grad_norm": 1.5311428308486938, "learning_rate": 1.8164308817624445e-05, "loss": 0.4098, "step": 20399 }, { "epoch": 3.3301089751438715, "grad_norm": 1.9091377258300781, "learning_rate": 1.81641255719761e-05, "loss": 0.609, "step": 20400 }, { "epoch": 3.330272233786376, "grad_norm": 1.733169436454773, "learning_rate": 1.816394231810648e-05, "loss": 0.4755, "step": 20401 }, { "epoch": 3.3304354924288804, "grad_norm": 1.7300328016281128, "learning_rate": 1.8163759056015778e-05, "loss": 0.4717, "step": 20402 }, { "epoch": 3.330598751071385, "grad_norm": 2.0304982662200928, "learning_rate": 1.816357578570417e-05, "loss": 0.3754, "step": 20403 }, { "epoch": 3.3307620097138892, "grad_norm": 1.5319933891296387, "learning_rate": 1.816339250717184e-05, "loss": 0.3711, "step": 20404 }, { "epoch": 3.3309252683563937, "grad_norm": 1.9265180826187134, "learning_rate": 1.816320922041898e-05, "loss": 0.4937, "step": 20405 }, { "epoch": 3.331088526998898, "grad_norm": 1.7618565559387207, "learning_rate": 1.816302592544577e-05, "loss": 0.4781, "step": 20406 }, { "epoch": 3.3312517856414026, "grad_norm": 1.9373869895935059, "learning_rate": 1.8162842622252395e-05, "loss": 0.4547, "step": 20407 }, { "epoch": 3.331415044283907, "grad_norm": 1.777408480644226, "learning_rate": 1.8162659310839038e-05, "loss": 0.4798, "step": 20408 }, { "epoch": 3.331578302926411, "grad_norm": 1.9085235595703125, "learning_rate": 1.8162475991205886e-05, "loss": 0.4011, "step": 20409 }, { "epoch": 3.3317415615689154, "grad_norm": 1.7119228839874268, "learning_rate": 1.8162292663353126e-05, "loss": 0.4011, "step": 20410 }, { "epoch": 3.33190482021142, "grad_norm": 2.2212769985198975, "learning_rate": 1.8162109327280935e-05, "loss": 0.482, "step": 20411 }, { "epoch": 3.3320680788539243, "grad_norm": 2.119373321533203, "learning_rate": 1.8161925982989502e-05, "loss": 0.546, "step": 20412 }, { "epoch": 3.3322313374964287, "grad_norm": 2.115652322769165, "learning_rate": 1.8161742630479014e-05, "loss": 0.5121, "step": 20413 }, { "epoch": 3.332394596138933, "grad_norm": 2.2811365127563477, "learning_rate": 1.816155926974965e-05, "loss": 0.4935, "step": 20414 }, { "epoch": 3.3325578547814376, "grad_norm": 1.863740086555481, "learning_rate": 1.8161375900801603e-05, "loss": 0.4492, "step": 20415 }, { "epoch": 3.332721113423942, "grad_norm": 1.7981088161468506, "learning_rate": 1.8161192523635053e-05, "loss": 0.4289, "step": 20416 }, { "epoch": 3.332884372066446, "grad_norm": 2.7033755779266357, "learning_rate": 1.8161009138250182e-05, "loss": 0.5536, "step": 20417 }, { "epoch": 3.3330476307089505, "grad_norm": 1.8190340995788574, "learning_rate": 1.8160825744647175e-05, "loss": 0.4778, "step": 20418 }, { "epoch": 3.333210889351455, "grad_norm": 2.2523584365844727, "learning_rate": 1.816064234282622e-05, "loss": 0.5104, "step": 20419 }, { "epoch": 3.3333741479939594, "grad_norm": 2.179523468017578, "learning_rate": 1.81604589327875e-05, "loss": 0.4302, "step": 20420 }, { "epoch": 3.333537406636464, "grad_norm": 1.594185471534729, "learning_rate": 1.8160275514531202e-05, "loss": 0.4169, "step": 20421 }, { "epoch": 3.3337006652789682, "grad_norm": 3.004140853881836, "learning_rate": 1.816009208805751e-05, "loss": 0.5456, "step": 20422 }, { "epoch": 3.3338639239214727, "grad_norm": 1.600839376449585, "learning_rate": 1.81599086533666e-05, "loss": 0.4282, "step": 20423 }, { "epoch": 3.334027182563977, "grad_norm": 1.8840551376342773, "learning_rate": 1.8159725210458668e-05, "loss": 0.5653, "step": 20424 }, { "epoch": 3.3341904412064816, "grad_norm": 1.8739546537399292, "learning_rate": 1.8159541759333897e-05, "loss": 0.4551, "step": 20425 }, { "epoch": 3.334353699848986, "grad_norm": 1.77106511592865, "learning_rate": 1.815935829999247e-05, "loss": 0.4503, "step": 20426 }, { "epoch": 3.33451695849149, "grad_norm": 1.7347135543823242, "learning_rate": 1.815917483243457e-05, "loss": 0.4003, "step": 20427 }, { "epoch": 3.3346802171339944, "grad_norm": 1.9933667182922363, "learning_rate": 1.815899135666038e-05, "loss": 0.4459, "step": 20428 }, { "epoch": 3.334843475776499, "grad_norm": 2.162950277328491, "learning_rate": 1.8158807872670092e-05, "loss": 0.4854, "step": 20429 }, { "epoch": 3.3350067344190033, "grad_norm": 2.404553174972534, "learning_rate": 1.8158624380463888e-05, "loss": 0.5738, "step": 20430 }, { "epoch": 3.3351699930615077, "grad_norm": 1.7177079916000366, "learning_rate": 1.8158440880041945e-05, "loss": 0.4526, "step": 20431 }, { "epoch": 3.335333251704012, "grad_norm": 2.286064863204956, "learning_rate": 1.815825737140446e-05, "loss": 0.5423, "step": 20432 }, { "epoch": 3.3354965103465166, "grad_norm": 1.9171476364135742, "learning_rate": 1.815807385455161e-05, "loss": 0.5267, "step": 20433 }, { "epoch": 3.3356597689890206, "grad_norm": 1.730269432067871, "learning_rate": 1.8157890329483582e-05, "loss": 0.4732, "step": 20434 }, { "epoch": 3.335823027631525, "grad_norm": 2.401296854019165, "learning_rate": 1.815770679620056e-05, "loss": 0.5015, "step": 20435 }, { "epoch": 3.3359862862740295, "grad_norm": 2.2721574306488037, "learning_rate": 1.815752325470273e-05, "loss": 0.5173, "step": 20436 }, { "epoch": 3.336149544916534, "grad_norm": 1.756759762763977, "learning_rate": 1.8157339704990275e-05, "loss": 0.398, "step": 20437 }, { "epoch": 3.3363128035590384, "grad_norm": 1.8358687162399292, "learning_rate": 1.8157156147063384e-05, "loss": 0.4497, "step": 20438 }, { "epoch": 3.336476062201543, "grad_norm": 1.9853205680847168, "learning_rate": 1.8156972580922235e-05, "loss": 0.4666, "step": 20439 }, { "epoch": 3.3366393208440472, "grad_norm": 2.1346147060394287, "learning_rate": 1.8156789006567018e-05, "loss": 0.4936, "step": 20440 }, { "epoch": 3.3368025794865517, "grad_norm": 2.0849807262420654, "learning_rate": 1.815660542399792e-05, "loss": 0.4853, "step": 20441 }, { "epoch": 3.336965838129056, "grad_norm": 2.407816171646118, "learning_rate": 1.8156421833215118e-05, "loss": 0.603, "step": 20442 }, { "epoch": 3.3371290967715606, "grad_norm": 1.985344648361206, "learning_rate": 1.8156238234218808e-05, "loss": 0.5019, "step": 20443 }, { "epoch": 3.3372923554140645, "grad_norm": 1.7194336652755737, "learning_rate": 1.815605462700916e-05, "loss": 0.4014, "step": 20444 }, { "epoch": 3.337455614056569, "grad_norm": 1.7539669275283813, "learning_rate": 1.815587101158637e-05, "loss": 0.4456, "step": 20445 }, { "epoch": 3.3376188726990734, "grad_norm": 2.0703439712524414, "learning_rate": 1.815568738795062e-05, "loss": 0.4243, "step": 20446 }, { "epoch": 3.337782131341578, "grad_norm": 1.8811603784561157, "learning_rate": 1.8155503756102098e-05, "loss": 0.4396, "step": 20447 }, { "epoch": 3.3379453899840823, "grad_norm": 1.8931646347045898, "learning_rate": 1.8155320116040983e-05, "loss": 0.516, "step": 20448 }, { "epoch": 3.3381086486265867, "grad_norm": 1.8584785461425781, "learning_rate": 1.8155136467767464e-05, "loss": 0.4465, "step": 20449 }, { "epoch": 3.338271907269091, "grad_norm": 2.1201767921447754, "learning_rate": 1.8154952811281723e-05, "loss": 0.4518, "step": 20450 }, { "epoch": 3.3384351659115956, "grad_norm": 1.8643665313720703, "learning_rate": 1.8154769146583947e-05, "loss": 0.4294, "step": 20451 }, { "epoch": 3.3385984245540996, "grad_norm": 1.7507836818695068, "learning_rate": 1.8154585473674325e-05, "loss": 0.4665, "step": 20452 }, { "epoch": 3.338761683196604, "grad_norm": 1.7054810523986816, "learning_rate": 1.815440179255303e-05, "loss": 0.4587, "step": 20453 }, { "epoch": 3.3389249418391085, "grad_norm": 1.8052594661712646, "learning_rate": 1.815421810322026e-05, "loss": 0.4512, "step": 20454 }, { "epoch": 3.339088200481613, "grad_norm": 1.989746332168579, "learning_rate": 1.8154034405676194e-05, "loss": 0.5174, "step": 20455 }, { "epoch": 3.3392514591241174, "grad_norm": 1.907233715057373, "learning_rate": 1.8153850699921016e-05, "loss": 0.4263, "step": 20456 }, { "epoch": 3.339414717766622, "grad_norm": 2.063629150390625, "learning_rate": 1.8153666985954915e-05, "loss": 0.4794, "step": 20457 }, { "epoch": 3.3395779764091262, "grad_norm": 2.093867778778076, "learning_rate": 1.815348326377807e-05, "loss": 0.482, "step": 20458 }, { "epoch": 3.3397412350516307, "grad_norm": 1.7883585691452026, "learning_rate": 1.8153299533390672e-05, "loss": 0.4409, "step": 20459 }, { "epoch": 3.339904493694135, "grad_norm": 2.0235743522644043, "learning_rate": 1.8153115794792904e-05, "loss": 0.4707, "step": 20460 }, { "epoch": 3.3400677523366396, "grad_norm": 2.239682197570801, "learning_rate": 1.815293204798495e-05, "loss": 0.4801, "step": 20461 }, { "epoch": 3.3402310109791435, "grad_norm": 2.157569169998169, "learning_rate": 1.8152748292966993e-05, "loss": 0.4812, "step": 20462 }, { "epoch": 3.340394269621648, "grad_norm": 2.431809663772583, "learning_rate": 1.8152564529739224e-05, "loss": 0.497, "step": 20463 }, { "epoch": 3.3405575282641524, "grad_norm": 1.8112183809280396, "learning_rate": 1.8152380758301822e-05, "loss": 0.5106, "step": 20464 }, { "epoch": 3.340720786906657, "grad_norm": 1.853445291519165, "learning_rate": 1.8152196978654974e-05, "loss": 0.4846, "step": 20465 }, { "epoch": 3.3408840455491613, "grad_norm": 1.8728126287460327, "learning_rate": 1.8152013190798868e-05, "loss": 0.4314, "step": 20466 }, { "epoch": 3.3410473041916657, "grad_norm": 2.1227707862854004, "learning_rate": 1.8151829394733688e-05, "loss": 0.5092, "step": 20467 }, { "epoch": 3.34121056283417, "grad_norm": 1.9822040796279907, "learning_rate": 1.8151645590459617e-05, "loss": 0.5006, "step": 20468 }, { "epoch": 3.3413738214766746, "grad_norm": 2.231862783432007, "learning_rate": 1.815146177797684e-05, "loss": 0.5125, "step": 20469 }, { "epoch": 3.3415370801191786, "grad_norm": 2.221327781677246, "learning_rate": 1.815127795728554e-05, "loss": 0.5297, "step": 20470 }, { "epoch": 3.341700338761683, "grad_norm": 1.7844561338424683, "learning_rate": 1.8151094128385912e-05, "loss": 0.392, "step": 20471 }, { "epoch": 3.3418635974041875, "grad_norm": 1.815636157989502, "learning_rate": 1.8150910291278134e-05, "loss": 0.4417, "step": 20472 }, { "epoch": 3.342026856046692, "grad_norm": 1.9935779571533203, "learning_rate": 1.8150726445962386e-05, "loss": 0.47, "step": 20473 }, { "epoch": 3.3421901146891964, "grad_norm": 1.8004640340805054, "learning_rate": 1.8150542592438865e-05, "loss": 0.5022, "step": 20474 }, { "epoch": 3.342353373331701, "grad_norm": 1.6365830898284912, "learning_rate": 1.8150358730707744e-05, "loss": 0.4006, "step": 20475 }, { "epoch": 3.3425166319742052, "grad_norm": 1.9302080869674683, "learning_rate": 1.8150174860769216e-05, "loss": 0.4921, "step": 20476 }, { "epoch": 3.3426798906167097, "grad_norm": 1.7574162483215332, "learning_rate": 1.8149990982623467e-05, "loss": 0.4718, "step": 20477 }, { "epoch": 3.342843149259214, "grad_norm": 2.332559108734131, "learning_rate": 1.8149807096270674e-05, "loss": 0.6356, "step": 20478 }, { "epoch": 3.3430064079017185, "grad_norm": 1.8673624992370605, "learning_rate": 1.814962320171103e-05, "loss": 0.5341, "step": 20479 }, { "epoch": 3.3431696665442225, "grad_norm": 1.9311434030532837, "learning_rate": 1.8149439298944716e-05, "loss": 0.4038, "step": 20480 }, { "epoch": 3.343332925186727, "grad_norm": 2.352257251739502, "learning_rate": 1.814925538797192e-05, "loss": 0.6742, "step": 20481 }, { "epoch": 3.3434961838292314, "grad_norm": 1.6562236547470093, "learning_rate": 1.814907146879283e-05, "loss": 0.4247, "step": 20482 }, { "epoch": 3.343659442471736, "grad_norm": 1.9376901388168335, "learning_rate": 1.8148887541407623e-05, "loss": 0.4557, "step": 20483 }, { "epoch": 3.3438227011142403, "grad_norm": 1.7061517238616943, "learning_rate": 1.8148703605816486e-05, "loss": 0.4749, "step": 20484 }, { "epoch": 3.3439859597567447, "grad_norm": 1.7881053686141968, "learning_rate": 1.8148519662019613e-05, "loss": 0.4414, "step": 20485 }, { "epoch": 3.344149218399249, "grad_norm": 1.4285862445831299, "learning_rate": 1.814833571001718e-05, "loss": 0.3337, "step": 20486 }, { "epoch": 3.344312477041753, "grad_norm": 1.8841406106948853, "learning_rate": 1.8148151749809373e-05, "loss": 0.4888, "step": 20487 }, { "epoch": 3.3444757356842576, "grad_norm": 2.093477487564087, "learning_rate": 1.814796778139638e-05, "loss": 0.51, "step": 20488 }, { "epoch": 3.344638994326762, "grad_norm": 1.5075596570968628, "learning_rate": 1.8147783804778386e-05, "loss": 0.396, "step": 20489 }, { "epoch": 3.3448022529692665, "grad_norm": 1.6664100885391235, "learning_rate": 1.814759981995558e-05, "loss": 0.4356, "step": 20490 }, { "epoch": 3.344965511611771, "grad_norm": 1.7101342678070068, "learning_rate": 1.814741582692814e-05, "loss": 0.4221, "step": 20491 }, { "epoch": 3.3451287702542754, "grad_norm": 2.1660056114196777, "learning_rate": 1.814723182569625e-05, "loss": 0.479, "step": 20492 }, { "epoch": 3.34529202889678, "grad_norm": 2.0331382751464844, "learning_rate": 1.8147047816260108e-05, "loss": 0.4474, "step": 20493 }, { "epoch": 3.3454552875392842, "grad_norm": 1.7379258871078491, "learning_rate": 1.8146863798619884e-05, "loss": 0.4185, "step": 20494 }, { "epoch": 3.3456185461817887, "grad_norm": 1.736324667930603, "learning_rate": 1.8146679772775774e-05, "loss": 0.4994, "step": 20495 }, { "epoch": 3.345781804824293, "grad_norm": 1.7453954219818115, "learning_rate": 1.814649573872796e-05, "loss": 0.4531, "step": 20496 }, { "epoch": 3.345945063466797, "grad_norm": 1.8297977447509766, "learning_rate": 1.814631169647662e-05, "loss": 0.4609, "step": 20497 }, { "epoch": 3.3461083221093015, "grad_norm": 1.6236201524734497, "learning_rate": 1.8146127646021955e-05, "loss": 0.4367, "step": 20498 }, { "epoch": 3.346271580751806, "grad_norm": 1.8406190872192383, "learning_rate": 1.8145943587364138e-05, "loss": 0.4384, "step": 20499 }, { "epoch": 3.3464348393943104, "grad_norm": 1.893052101135254, "learning_rate": 1.814575952050336e-05, "loss": 0.4899, "step": 20500 }, { "epoch": 3.346598098036815, "grad_norm": 1.7142345905303955, "learning_rate": 1.81455754454398e-05, "loss": 0.3871, "step": 20501 }, { "epoch": 3.3467613566793193, "grad_norm": 2.564931869506836, "learning_rate": 1.8145391362173653e-05, "loss": 1.0924, "step": 20502 }, { "epoch": 3.3469246153218237, "grad_norm": 2.046145439147949, "learning_rate": 1.8145207270705095e-05, "loss": 0.5529, "step": 20503 }, { "epoch": 3.347087873964328, "grad_norm": 2.0872445106506348, "learning_rate": 1.814502317103432e-05, "loss": 0.4438, "step": 20504 }, { "epoch": 3.347251132606832, "grad_norm": 2.3191442489624023, "learning_rate": 1.8144839063161505e-05, "loss": 0.5039, "step": 20505 }, { "epoch": 3.3474143912493366, "grad_norm": 1.9184647798538208, "learning_rate": 1.8144654947086837e-05, "loss": 0.4804, "step": 20506 }, { "epoch": 3.347577649891841, "grad_norm": 2.4591894149780273, "learning_rate": 1.814447082281051e-05, "loss": 0.5648, "step": 20507 }, { "epoch": 3.3477409085343455, "grad_norm": 1.8312809467315674, "learning_rate": 1.81442866903327e-05, "loss": 0.4629, "step": 20508 }, { "epoch": 3.34790416717685, "grad_norm": 2.0513343811035156, "learning_rate": 1.814410254965359e-05, "loss": 0.4777, "step": 20509 }, { "epoch": 3.3480674258193543, "grad_norm": 2.081883430480957, "learning_rate": 1.8143918400773378e-05, "loss": 0.4571, "step": 20510 }, { "epoch": 3.348230684461859, "grad_norm": 1.7126662731170654, "learning_rate": 1.8143734243692243e-05, "loss": 0.4404, "step": 20511 }, { "epoch": 3.3483939431043632, "grad_norm": 1.8917064666748047, "learning_rate": 1.8143550078410365e-05, "loss": 0.5032, "step": 20512 }, { "epoch": 3.3485572017468677, "grad_norm": 1.7663758993148804, "learning_rate": 1.8143365904927938e-05, "loss": 0.4546, "step": 20513 }, { "epoch": 3.348720460389372, "grad_norm": 2.0660901069641113, "learning_rate": 1.814318172324514e-05, "loss": 0.5295, "step": 20514 }, { "epoch": 3.348883719031876, "grad_norm": 2.092346429824829, "learning_rate": 1.8142997533362163e-05, "loss": 0.4759, "step": 20515 }, { "epoch": 3.3490469776743805, "grad_norm": 2.0614898204803467, "learning_rate": 1.814281333527919e-05, "loss": 0.4777, "step": 20516 }, { "epoch": 3.349210236316885, "grad_norm": 1.9404205083847046, "learning_rate": 1.8142629128996403e-05, "loss": 0.4448, "step": 20517 }, { "epoch": 3.3493734949593894, "grad_norm": 2.0142102241516113, "learning_rate": 1.8142444914513993e-05, "loss": 0.5193, "step": 20518 }, { "epoch": 3.349536753601894, "grad_norm": 1.6887385845184326, "learning_rate": 1.8142260691832142e-05, "loss": 0.4366, "step": 20519 }, { "epoch": 3.3497000122443983, "grad_norm": 1.7132853269577026, "learning_rate": 1.814207646095104e-05, "loss": 0.4232, "step": 20520 }, { "epoch": 3.3498632708869027, "grad_norm": 2.1071219444274902, "learning_rate": 1.8141892221870866e-05, "loss": 0.4948, "step": 20521 }, { "epoch": 3.3500265295294067, "grad_norm": 2.070240020751953, "learning_rate": 1.8141707974591808e-05, "loss": 0.4988, "step": 20522 }, { "epoch": 3.350189788171911, "grad_norm": 2.3231852054595947, "learning_rate": 1.8141523719114054e-05, "loss": 0.4686, "step": 20523 }, { "epoch": 3.3503530468144156, "grad_norm": 1.999793529510498, "learning_rate": 1.8141339455437786e-05, "loss": 0.5001, "step": 20524 }, { "epoch": 3.35051630545692, "grad_norm": 1.9751548767089844, "learning_rate": 1.8141155183563195e-05, "loss": 0.5546, "step": 20525 }, { "epoch": 3.3506795640994245, "grad_norm": 1.7495816946029663, "learning_rate": 1.814097090349046e-05, "loss": 0.4822, "step": 20526 }, { "epoch": 3.350842822741929, "grad_norm": 1.9172676801681519, "learning_rate": 1.8140786615219768e-05, "loss": 0.4554, "step": 20527 }, { "epoch": 3.3510060813844333, "grad_norm": 1.8145374059677124, "learning_rate": 1.8140602318751308e-05, "loss": 0.4208, "step": 20528 }, { "epoch": 3.351169340026938, "grad_norm": 2.1258764266967773, "learning_rate": 1.8140418014085263e-05, "loss": 0.5478, "step": 20529 }, { "epoch": 3.351332598669442, "grad_norm": 1.9722189903259277, "learning_rate": 1.8140233701221822e-05, "loss": 0.4608, "step": 20530 }, { "epoch": 3.3514958573119467, "grad_norm": 1.9045885801315308, "learning_rate": 1.8140049380161167e-05, "loss": 0.46, "step": 20531 }, { "epoch": 3.3516591159544507, "grad_norm": 1.9042412042617798, "learning_rate": 1.813986505090348e-05, "loss": 0.4225, "step": 20532 }, { "epoch": 3.351822374596955, "grad_norm": 1.7985730171203613, "learning_rate": 1.8139680713448954e-05, "loss": 0.4257, "step": 20533 }, { "epoch": 3.3519856332394595, "grad_norm": 1.8167048692703247, "learning_rate": 1.8139496367797776e-05, "loss": 0.432, "step": 20534 }, { "epoch": 3.352148891881964, "grad_norm": 1.7752690315246582, "learning_rate": 1.813931201395012e-05, "loss": 0.4264, "step": 20535 }, { "epoch": 3.3523121505244684, "grad_norm": 1.6764217615127563, "learning_rate": 1.8139127651906183e-05, "loss": 0.3962, "step": 20536 }, { "epoch": 3.352475409166973, "grad_norm": 1.9930124282836914, "learning_rate": 1.8138943281666144e-05, "loss": 0.4823, "step": 20537 }, { "epoch": 3.3526386678094773, "grad_norm": 2.174823760986328, "learning_rate": 1.8138758903230193e-05, "loss": 0.4549, "step": 20538 }, { "epoch": 3.3528019264519817, "grad_norm": 1.602498173713684, "learning_rate": 1.8138574516598515e-05, "loss": 0.4129, "step": 20539 }, { "epoch": 3.3529651850944857, "grad_norm": 2.0349392890930176, "learning_rate": 1.8138390121771295e-05, "loss": 0.5741, "step": 20540 }, { "epoch": 3.35312844373699, "grad_norm": 2.175269365310669, "learning_rate": 1.8138205718748715e-05, "loss": 0.527, "step": 20541 }, { "epoch": 3.3532917023794946, "grad_norm": 2.1580660343170166, "learning_rate": 1.8138021307530967e-05, "loss": 0.5282, "step": 20542 }, { "epoch": 3.353454961021999, "grad_norm": 1.9116677045822144, "learning_rate": 1.8137836888118232e-05, "loss": 0.4895, "step": 20543 }, { "epoch": 3.3536182196645035, "grad_norm": 1.9926847219467163, "learning_rate": 1.8137652460510698e-05, "loss": 0.4833, "step": 20544 }, { "epoch": 3.353781478307008, "grad_norm": 1.8818891048431396, "learning_rate": 1.813746802470855e-05, "loss": 0.3499, "step": 20545 }, { "epoch": 3.3539447369495123, "grad_norm": 1.7550848722457886, "learning_rate": 1.8137283580711975e-05, "loss": 0.4361, "step": 20546 }, { "epoch": 3.3541079955920168, "grad_norm": 2.0029103755950928, "learning_rate": 1.8137099128521156e-05, "loss": 0.4665, "step": 20547 }, { "epoch": 3.354271254234521, "grad_norm": 1.9828451871871948, "learning_rate": 1.813691466813628e-05, "loss": 0.5079, "step": 20548 }, { "epoch": 3.3544345128770257, "grad_norm": 1.849475622177124, "learning_rate": 1.8136730199557534e-05, "loss": 0.535, "step": 20549 }, { "epoch": 3.3545977715195296, "grad_norm": 1.8641092777252197, "learning_rate": 1.81365457227851e-05, "loss": 0.4842, "step": 20550 }, { "epoch": 3.354761030162034, "grad_norm": 3.08954119682312, "learning_rate": 1.8136361237819172e-05, "loss": 0.5584, "step": 20551 }, { "epoch": 3.3549242888045385, "grad_norm": 1.7667384147644043, "learning_rate": 1.8136176744659927e-05, "loss": 0.4141, "step": 20552 }, { "epoch": 3.355087547447043, "grad_norm": 1.8548328876495361, "learning_rate": 1.8135992243307556e-05, "loss": 0.5376, "step": 20553 }, { "epoch": 3.3552508060895474, "grad_norm": 2.02778697013855, "learning_rate": 1.8135807733762242e-05, "loss": 0.5553, "step": 20554 }, { "epoch": 3.355414064732052, "grad_norm": 1.666226863861084, "learning_rate": 1.8135623216024175e-05, "loss": 0.4185, "step": 20555 }, { "epoch": 3.3555773233745563, "grad_norm": 1.8624898195266724, "learning_rate": 1.813543869009353e-05, "loss": 0.4806, "step": 20556 }, { "epoch": 3.3557405820170607, "grad_norm": 1.7717310190200806, "learning_rate": 1.8135254155970505e-05, "loss": 0.4083, "step": 20557 }, { "epoch": 3.3559038406595647, "grad_norm": 2.044231414794922, "learning_rate": 1.813506961365528e-05, "loss": 0.4641, "step": 20558 }, { "epoch": 3.356067099302069, "grad_norm": 2.037903070449829, "learning_rate": 1.8134885063148045e-05, "loss": 0.5185, "step": 20559 }, { "epoch": 3.3562303579445736, "grad_norm": 1.8099033832550049, "learning_rate": 1.8134700504448982e-05, "loss": 0.4809, "step": 20560 }, { "epoch": 3.356393616587078, "grad_norm": 1.7853621244430542, "learning_rate": 1.8134515937558274e-05, "loss": 0.4598, "step": 20561 }, { "epoch": 3.3565568752295825, "grad_norm": 1.7285994291305542, "learning_rate": 1.8134331362476113e-05, "loss": 0.418, "step": 20562 }, { "epoch": 3.356720133872087, "grad_norm": 2.190115213394165, "learning_rate": 1.8134146779202683e-05, "loss": 0.5592, "step": 20563 }, { "epoch": 3.3568833925145913, "grad_norm": 1.9711472988128662, "learning_rate": 1.813396218773817e-05, "loss": 0.5244, "step": 20564 }, { "epoch": 3.3570466511570958, "grad_norm": 1.6040784120559692, "learning_rate": 1.813377758808276e-05, "loss": 0.3703, "step": 20565 }, { "epoch": 3.3572099097996, "grad_norm": 1.7316110134124756, "learning_rate": 1.8133592980236634e-05, "loss": 0.4488, "step": 20566 }, { "epoch": 3.3573731684421046, "grad_norm": 1.939969539642334, "learning_rate": 1.8133408364199986e-05, "loss": 0.4552, "step": 20567 }, { "epoch": 3.3575364270846086, "grad_norm": 2.369743824005127, "learning_rate": 1.8133223739972994e-05, "loss": 0.7037, "step": 20568 }, { "epoch": 3.357699685727113, "grad_norm": 1.9792144298553467, "learning_rate": 1.813303910755585e-05, "loss": 0.5101, "step": 20569 }, { "epoch": 3.3578629443696175, "grad_norm": 1.6912111043930054, "learning_rate": 1.8132854466948743e-05, "loss": 0.4433, "step": 20570 }, { "epoch": 3.358026203012122, "grad_norm": 2.1179842948913574, "learning_rate": 1.8132669818151846e-05, "loss": 0.5431, "step": 20571 }, { "epoch": 3.3581894616546264, "grad_norm": 1.5819098949432373, "learning_rate": 1.8132485161165357e-05, "loss": 0.4014, "step": 20572 }, { "epoch": 3.358352720297131, "grad_norm": 2.0001494884490967, "learning_rate": 1.8132300495989455e-05, "loss": 0.4311, "step": 20573 }, { "epoch": 3.3585159789396353, "grad_norm": 2.3396646976470947, "learning_rate": 1.813211582262433e-05, "loss": 0.6005, "step": 20574 }, { "epoch": 3.3586792375821393, "grad_norm": 2.243364095687866, "learning_rate": 1.8131931141070166e-05, "loss": 0.6047, "step": 20575 }, { "epoch": 3.3588424962246437, "grad_norm": 1.9463287591934204, "learning_rate": 1.813174645132715e-05, "loss": 0.4174, "step": 20576 }, { "epoch": 3.359005754867148, "grad_norm": 1.727419376373291, "learning_rate": 1.8131561753395465e-05, "loss": 0.4291, "step": 20577 }, { "epoch": 3.3591690135096526, "grad_norm": 2.000072956085205, "learning_rate": 1.8131377047275302e-05, "loss": 0.4447, "step": 20578 }, { "epoch": 3.359332272152157, "grad_norm": 2.13973331451416, "learning_rate": 1.8131192332966844e-05, "loss": 0.5461, "step": 20579 }, { "epoch": 3.3594955307946615, "grad_norm": 1.7686837911605835, "learning_rate": 1.8131007610470278e-05, "loss": 0.4464, "step": 20580 }, { "epoch": 3.359658789437166, "grad_norm": 2.2557382583618164, "learning_rate": 1.813082287978579e-05, "loss": 0.5576, "step": 20581 }, { "epoch": 3.3598220480796703, "grad_norm": 1.7695143222808838, "learning_rate": 1.8130638140913563e-05, "loss": 0.4171, "step": 20582 }, { "epoch": 3.3599853067221748, "grad_norm": 2.022392749786377, "learning_rate": 1.813045339385379e-05, "loss": 0.4201, "step": 20583 }, { "epoch": 3.360148565364679, "grad_norm": 1.9426640272140503, "learning_rate": 1.8130268638606647e-05, "loss": 0.4468, "step": 20584 }, { "epoch": 3.360311824007183, "grad_norm": 1.763593316078186, "learning_rate": 1.813008387517233e-05, "loss": 0.4474, "step": 20585 }, { "epoch": 3.3604750826496876, "grad_norm": 2.0189476013183594, "learning_rate": 1.8129899103551016e-05, "loss": 0.4091, "step": 20586 }, { "epoch": 3.360638341292192, "grad_norm": 1.5402759313583374, "learning_rate": 1.81297143237429e-05, "loss": 0.3783, "step": 20587 }, { "epoch": 3.3608015999346965, "grad_norm": 1.975188970565796, "learning_rate": 1.8129529535748162e-05, "loss": 0.5018, "step": 20588 }, { "epoch": 3.360964858577201, "grad_norm": 2.0084519386291504, "learning_rate": 1.812934473956699e-05, "loss": 0.4748, "step": 20589 }, { "epoch": 3.3611281172197054, "grad_norm": 2.3451759815216064, "learning_rate": 1.812915993519957e-05, "loss": 0.8123, "step": 20590 }, { "epoch": 3.36129137586221, "grad_norm": 2.118159055709839, "learning_rate": 1.8128975122646092e-05, "loss": 0.5162, "step": 20591 }, { "epoch": 3.3614546345047143, "grad_norm": 2.030762195587158, "learning_rate": 1.8128790301906735e-05, "loss": 0.4857, "step": 20592 }, { "epoch": 3.3616178931472183, "grad_norm": 1.8423209190368652, "learning_rate": 1.8128605472981685e-05, "loss": 0.4621, "step": 20593 }, { "epoch": 3.3617811517897227, "grad_norm": 1.6340343952178955, "learning_rate": 1.8128420635871136e-05, "loss": 0.4252, "step": 20594 }, { "epoch": 3.361944410432227, "grad_norm": 1.9118565320968628, "learning_rate": 1.8128235790575267e-05, "loss": 0.4627, "step": 20595 }, { "epoch": 3.3621076690747316, "grad_norm": 1.9635767936706543, "learning_rate": 1.8128050937094267e-05, "loss": 0.5279, "step": 20596 }, { "epoch": 3.362270927717236, "grad_norm": 2.323089838027954, "learning_rate": 1.8127866075428325e-05, "loss": 0.5343, "step": 20597 }, { "epoch": 3.3624341863597405, "grad_norm": 2.045599937438965, "learning_rate": 1.812768120557762e-05, "loss": 0.5085, "step": 20598 }, { "epoch": 3.362597445002245, "grad_norm": 2.1568055152893066, "learning_rate": 1.8127496327542343e-05, "loss": 0.5935, "step": 20599 }, { "epoch": 3.3627607036447493, "grad_norm": 1.915115237236023, "learning_rate": 1.812731144132268e-05, "loss": 0.4785, "step": 20600 }, { "epoch": 3.3629239622872538, "grad_norm": 2.1450467109680176, "learning_rate": 1.812712654691882e-05, "loss": 0.4732, "step": 20601 }, { "epoch": 3.363087220929758, "grad_norm": 2.137075901031494, "learning_rate": 1.812694164433094e-05, "loss": 0.5989, "step": 20602 }, { "epoch": 3.363250479572262, "grad_norm": 2.092597246170044, "learning_rate": 1.8126756733559237e-05, "loss": 0.5275, "step": 20603 }, { "epoch": 3.3634137382147666, "grad_norm": 2.202094078063965, "learning_rate": 1.812657181460389e-05, "loss": 0.4901, "step": 20604 }, { "epoch": 3.363576996857271, "grad_norm": 2.3819620609283447, "learning_rate": 1.8126386887465087e-05, "loss": 0.6127, "step": 20605 }, { "epoch": 3.3637402554997755, "grad_norm": 2.1980245113372803, "learning_rate": 1.8126201952143013e-05, "loss": 0.5387, "step": 20606 }, { "epoch": 3.36390351414228, "grad_norm": 1.8024864196777344, "learning_rate": 1.8126017008637858e-05, "loss": 0.4807, "step": 20607 }, { "epoch": 3.3640667727847844, "grad_norm": 1.858633279800415, "learning_rate": 1.8125832056949802e-05, "loss": 0.501, "step": 20608 }, { "epoch": 3.364230031427289, "grad_norm": 2.276362419128418, "learning_rate": 1.812564709707904e-05, "loss": 0.5134, "step": 20609 }, { "epoch": 3.3643932900697933, "grad_norm": 1.8537912368774414, "learning_rate": 1.812546212902575e-05, "loss": 0.4667, "step": 20610 }, { "epoch": 3.3645565487122973, "grad_norm": 1.7336993217468262, "learning_rate": 1.8125277152790125e-05, "loss": 0.4588, "step": 20611 }, { "epoch": 3.3647198073548017, "grad_norm": 2.0529816150665283, "learning_rate": 1.812509216837235e-05, "loss": 0.5684, "step": 20612 }, { "epoch": 3.364883065997306, "grad_norm": 2.2319154739379883, "learning_rate": 1.8124907175772604e-05, "loss": 0.584, "step": 20613 }, { "epoch": 3.3650463246398106, "grad_norm": 1.71195387840271, "learning_rate": 1.8124722174991084e-05, "loss": 0.434, "step": 20614 }, { "epoch": 3.365209583282315, "grad_norm": 2.1138014793395996, "learning_rate": 1.8124537166027965e-05, "loss": 0.5023, "step": 20615 }, { "epoch": 3.3653728419248194, "grad_norm": 1.6867345571517944, "learning_rate": 1.8124352148883443e-05, "loss": 0.417, "step": 20616 }, { "epoch": 3.365536100567324, "grad_norm": 1.5665075778961182, "learning_rate": 1.81241671235577e-05, "loss": 0.4386, "step": 20617 }, { "epoch": 3.3656993592098283, "grad_norm": 1.745053768157959, "learning_rate": 1.812398209005092e-05, "loss": 0.4484, "step": 20618 }, { "epoch": 3.3658626178523328, "grad_norm": 1.9815144538879395, "learning_rate": 1.8123797048363297e-05, "loss": 0.4697, "step": 20619 }, { "epoch": 3.3660258764948368, "grad_norm": 1.9061592817306519, "learning_rate": 1.812361199849501e-05, "loss": 0.5028, "step": 20620 }, { "epoch": 3.366189135137341, "grad_norm": 1.9450185298919678, "learning_rate": 1.8123426940446246e-05, "loss": 0.443, "step": 20621 }, { "epoch": 3.3663523937798456, "grad_norm": 1.6667300462722778, "learning_rate": 1.8123241874217194e-05, "loss": 0.461, "step": 20622 }, { "epoch": 3.36651565242235, "grad_norm": 1.8834333419799805, "learning_rate": 1.8123056799808044e-05, "loss": 0.4605, "step": 20623 }, { "epoch": 3.3666789110648545, "grad_norm": 1.8690201044082642, "learning_rate": 1.812287171721897e-05, "loss": 0.4537, "step": 20624 }, { "epoch": 3.366842169707359, "grad_norm": 1.9023363590240479, "learning_rate": 1.8122686626450176e-05, "loss": 0.4457, "step": 20625 }, { "epoch": 3.3670054283498634, "grad_norm": 1.806418538093567, "learning_rate": 1.8122501527501834e-05, "loss": 0.4592, "step": 20626 }, { "epoch": 3.367168686992368, "grad_norm": 1.901698112487793, "learning_rate": 1.8122316420374132e-05, "loss": 0.4285, "step": 20627 }, { "epoch": 3.367331945634872, "grad_norm": 2.668753147125244, "learning_rate": 1.8122131305067263e-05, "loss": 0.5645, "step": 20628 }, { "epoch": 3.3674952042773763, "grad_norm": 1.8973565101623535, "learning_rate": 1.812194618158141e-05, "loss": 0.4591, "step": 20629 }, { "epoch": 3.3676584629198807, "grad_norm": 2.115570545196533, "learning_rate": 1.812176104991676e-05, "loss": 0.4881, "step": 20630 }, { "epoch": 3.367821721562385, "grad_norm": 2.030967950820923, "learning_rate": 1.8121575910073497e-05, "loss": 0.4634, "step": 20631 }, { "epoch": 3.3679849802048896, "grad_norm": 2.2945773601531982, "learning_rate": 1.812139076205181e-05, "loss": 0.5205, "step": 20632 }, { "epoch": 3.368148238847394, "grad_norm": 2.2867462635040283, "learning_rate": 1.8121205605851885e-05, "loss": 0.5192, "step": 20633 }, { "epoch": 3.3683114974898984, "grad_norm": 1.6644233465194702, "learning_rate": 1.8121020441473907e-05, "loss": 0.4053, "step": 20634 }, { "epoch": 3.368474756132403, "grad_norm": 2.3203656673431396, "learning_rate": 1.8120835268918063e-05, "loss": 0.5103, "step": 20635 }, { "epoch": 3.3686380147749073, "grad_norm": 2.248469352722168, "learning_rate": 1.812065008818454e-05, "loss": 0.5189, "step": 20636 }, { "epoch": 3.3688012734174118, "grad_norm": 2.1450488567352295, "learning_rate": 1.8120464899273527e-05, "loss": 0.4622, "step": 20637 }, { "epoch": 3.3689645320599158, "grad_norm": 1.9718178510665894, "learning_rate": 1.812027970218521e-05, "loss": 0.4525, "step": 20638 }, { "epoch": 3.36912779070242, "grad_norm": 2.111433506011963, "learning_rate": 1.812009449691977e-05, "loss": 0.5481, "step": 20639 }, { "epoch": 3.3692910493449246, "grad_norm": 1.6336827278137207, "learning_rate": 1.8119909283477394e-05, "loss": 0.3981, "step": 20640 }, { "epoch": 3.369454307987429, "grad_norm": 2.214057207107544, "learning_rate": 1.8119724061858275e-05, "loss": 0.507, "step": 20641 }, { "epoch": 3.3696175666299335, "grad_norm": 2.079586982727051, "learning_rate": 1.8119538832062594e-05, "loss": 0.5457, "step": 20642 }, { "epoch": 3.369780825272438, "grad_norm": 1.8709512948989868, "learning_rate": 1.811935359409054e-05, "loss": 0.4367, "step": 20643 }, { "epoch": 3.3699440839149424, "grad_norm": 2.0748188495635986, "learning_rate": 1.8119168347942304e-05, "loss": 0.4859, "step": 20644 }, { "epoch": 3.370107342557447, "grad_norm": 2.3804166316986084, "learning_rate": 1.8118983093618063e-05, "loss": 0.54, "step": 20645 }, { "epoch": 3.370270601199951, "grad_norm": 2.1416661739349365, "learning_rate": 1.811879783111801e-05, "loss": 0.5196, "step": 20646 }, { "epoch": 3.3704338598424552, "grad_norm": 1.7087581157684326, "learning_rate": 1.8118612560442327e-05, "loss": 0.4483, "step": 20647 }, { "epoch": 3.3705971184849597, "grad_norm": 1.9674934148788452, "learning_rate": 1.8118427281591207e-05, "loss": 0.5454, "step": 20648 }, { "epoch": 3.370760377127464, "grad_norm": 1.7668908834457397, "learning_rate": 1.811824199456483e-05, "loss": 0.4313, "step": 20649 }, { "epoch": 3.3709236357699686, "grad_norm": 1.8472732305526733, "learning_rate": 1.8118056699363386e-05, "loss": 0.4336, "step": 20650 }, { "epoch": 3.371086894412473, "grad_norm": 1.7377554178237915, "learning_rate": 1.8117871395987065e-05, "loss": 0.4239, "step": 20651 }, { "epoch": 3.3712501530549774, "grad_norm": 2.1164774894714355, "learning_rate": 1.8117686084436045e-05, "loss": 0.4732, "step": 20652 }, { "epoch": 3.371413411697482, "grad_norm": 1.7240996360778809, "learning_rate": 1.8117500764710517e-05, "loss": 0.4744, "step": 20653 }, { "epoch": 3.3715766703399863, "grad_norm": 2.12272047996521, "learning_rate": 1.811731543681067e-05, "loss": 0.5272, "step": 20654 }, { "epoch": 3.3717399289824908, "grad_norm": 1.8366557359695435, "learning_rate": 1.811713010073669e-05, "loss": 0.5214, "step": 20655 }, { "epoch": 3.3719031876249947, "grad_norm": 2.0690131187438965, "learning_rate": 1.8116944756488757e-05, "loss": 0.4935, "step": 20656 }, { "epoch": 3.372066446267499, "grad_norm": 1.9989898204803467, "learning_rate": 1.8116759404067066e-05, "loss": 0.5225, "step": 20657 }, { "epoch": 3.3722297049100036, "grad_norm": 2.4988958835601807, "learning_rate": 1.8116574043471802e-05, "loss": 0.5501, "step": 20658 }, { "epoch": 3.372392963552508, "grad_norm": 1.7449856996536255, "learning_rate": 1.8116388674703148e-05, "loss": 0.3815, "step": 20659 }, { "epoch": 3.3725562221950125, "grad_norm": 1.9427249431610107, "learning_rate": 1.8116203297761293e-05, "loss": 0.5063, "step": 20660 }, { "epoch": 3.372719480837517, "grad_norm": 2.3447751998901367, "learning_rate": 1.811601791264642e-05, "loss": 0.4826, "step": 20661 }, { "epoch": 3.3728827394800214, "grad_norm": 2.4501712322235107, "learning_rate": 1.8115832519358724e-05, "loss": 0.4744, "step": 20662 }, { "epoch": 3.3730459981225254, "grad_norm": 1.9122904539108276, "learning_rate": 1.8115647117898386e-05, "loss": 0.5655, "step": 20663 }, { "epoch": 3.37320925676503, "grad_norm": 1.774474024772644, "learning_rate": 1.8115461708265592e-05, "loss": 0.4623, "step": 20664 }, { "epoch": 3.3733725154075342, "grad_norm": 2.0855257511138916, "learning_rate": 1.8115276290460534e-05, "loss": 0.4865, "step": 20665 }, { "epoch": 3.3735357740500387, "grad_norm": 2.052199125289917, "learning_rate": 1.8115090864483393e-05, "loss": 0.447, "step": 20666 }, { "epoch": 3.373699032692543, "grad_norm": 1.9682270288467407, "learning_rate": 1.8114905430334352e-05, "loss": 0.4515, "step": 20667 }, { "epoch": 3.3738622913350476, "grad_norm": 1.762764573097229, "learning_rate": 1.8114719988013612e-05, "loss": 0.432, "step": 20668 }, { "epoch": 3.374025549977552, "grad_norm": 1.8826098442077637, "learning_rate": 1.8114534537521346e-05, "loss": 0.4731, "step": 20669 }, { "epoch": 3.3741888086200564, "grad_norm": 1.6779298782348633, "learning_rate": 1.811434907885775e-05, "loss": 0.4002, "step": 20670 }, { "epoch": 3.374352067262561, "grad_norm": 1.5264171361923218, "learning_rate": 1.8114163612023e-05, "loss": 0.4099, "step": 20671 }, { "epoch": 3.3745153259050653, "grad_norm": 1.4971978664398193, "learning_rate": 1.8113978137017293e-05, "loss": 0.4125, "step": 20672 }, { "epoch": 3.3746785845475693, "grad_norm": 1.9798475503921509, "learning_rate": 1.8113792653840813e-05, "loss": 0.4434, "step": 20673 }, { "epoch": 3.3748418431900737, "grad_norm": 1.8136155605316162, "learning_rate": 1.8113607162493748e-05, "loss": 0.4728, "step": 20674 }, { "epoch": 3.375005101832578, "grad_norm": 2.2781882286071777, "learning_rate": 1.8113421662976275e-05, "loss": 0.5332, "step": 20675 }, { "epoch": 3.3751683604750826, "grad_norm": 2.0038599967956543, "learning_rate": 1.8113236155288596e-05, "loss": 0.4627, "step": 20676 }, { "epoch": 3.375331619117587, "grad_norm": 2.3504927158355713, "learning_rate": 1.8113050639430885e-05, "loss": 0.5431, "step": 20677 }, { "epoch": 3.3754948777600915, "grad_norm": 2.183626651763916, "learning_rate": 1.8112865115403338e-05, "loss": 0.556, "step": 20678 }, { "epoch": 3.375658136402596, "grad_norm": 1.9260857105255127, "learning_rate": 1.8112679583206138e-05, "loss": 0.4119, "step": 20679 }, { "epoch": 3.3758213950451004, "grad_norm": 2.022296667098999, "learning_rate": 1.811249404283947e-05, "loss": 0.5193, "step": 20680 }, { "epoch": 3.3759846536876044, "grad_norm": 1.8537914752960205, "learning_rate": 1.8112308494303527e-05, "loss": 0.4459, "step": 20681 }, { "epoch": 3.376147912330109, "grad_norm": 2.188546895980835, "learning_rate": 1.8112122937598485e-05, "loss": 0.585, "step": 20682 }, { "epoch": 3.3763111709726132, "grad_norm": 2.0550854206085205, "learning_rate": 1.8111937372724542e-05, "loss": 0.5031, "step": 20683 }, { "epoch": 3.3764744296151177, "grad_norm": 1.8515523672103882, "learning_rate": 1.811175179968188e-05, "loss": 0.4449, "step": 20684 }, { "epoch": 3.376637688257622, "grad_norm": 1.8496795892715454, "learning_rate": 1.8111566218470683e-05, "loss": 0.4922, "step": 20685 }, { "epoch": 3.3768009469001266, "grad_norm": 1.9675878286361694, "learning_rate": 1.8111380629091142e-05, "loss": 0.478, "step": 20686 }, { "epoch": 3.376964205542631, "grad_norm": 2.5456066131591797, "learning_rate": 1.8111195031543443e-05, "loss": 0.5961, "step": 20687 }, { "epoch": 3.3771274641851354, "grad_norm": 1.6381704807281494, "learning_rate": 1.8111009425827774e-05, "loss": 0.4567, "step": 20688 }, { "epoch": 3.37729072282764, "grad_norm": 2.100661516189575, "learning_rate": 1.8110823811944322e-05, "loss": 0.6155, "step": 20689 }, { "epoch": 3.3774539814701443, "grad_norm": 2.0803749561309814, "learning_rate": 1.8110638189893267e-05, "loss": 0.4775, "step": 20690 }, { "epoch": 3.3776172401126483, "grad_norm": 2.1093311309814453, "learning_rate": 1.811045255967481e-05, "loss": 0.5049, "step": 20691 }, { "epoch": 3.3777804987551527, "grad_norm": 2.1072683334350586, "learning_rate": 1.8110266921289123e-05, "loss": 0.5237, "step": 20692 }, { "epoch": 3.377943757397657, "grad_norm": 1.8101168870925903, "learning_rate": 1.81100812747364e-05, "loss": 0.4209, "step": 20693 }, { "epoch": 3.3781070160401616, "grad_norm": 1.863582968711853, "learning_rate": 1.8109895620016825e-05, "loss": 0.4325, "step": 20694 }, { "epoch": 3.378270274682666, "grad_norm": 1.6959035396575928, "learning_rate": 1.810970995713059e-05, "loss": 0.4645, "step": 20695 }, { "epoch": 3.3784335333251705, "grad_norm": 1.843686819076538, "learning_rate": 1.810952428607788e-05, "loss": 0.4482, "step": 20696 }, { "epoch": 3.378596791967675, "grad_norm": 1.9306869506835938, "learning_rate": 1.8109338606858883e-05, "loss": 0.5161, "step": 20697 }, { "epoch": 3.3787600506101794, "grad_norm": 1.973350167274475, "learning_rate": 1.810915291947378e-05, "loss": 0.5192, "step": 20698 }, { "epoch": 3.3789233092526834, "grad_norm": 1.6712099313735962, "learning_rate": 1.8108967223922764e-05, "loss": 0.4199, "step": 20699 }, { "epoch": 3.379086567895188, "grad_norm": 2.1444380283355713, "learning_rate": 1.810878152020602e-05, "loss": 0.5675, "step": 20700 }, { "epoch": 3.3792498265376922, "grad_norm": 2.210897207260132, "learning_rate": 1.8108595808323736e-05, "loss": 0.5044, "step": 20701 }, { "epoch": 3.3794130851801967, "grad_norm": 1.8173779249191284, "learning_rate": 1.81084100882761e-05, "loss": 0.487, "step": 20702 }, { "epoch": 3.379576343822701, "grad_norm": 1.8070499897003174, "learning_rate": 1.8108224360063294e-05, "loss": 0.4814, "step": 20703 }, { "epoch": 3.3797396024652056, "grad_norm": 1.7821731567382812, "learning_rate": 1.8108038623685508e-05, "loss": 0.4527, "step": 20704 }, { "epoch": 3.37990286110771, "grad_norm": 1.5739529132843018, "learning_rate": 1.810785287914293e-05, "loss": 0.3931, "step": 20705 }, { "epoch": 3.3800661197502144, "grad_norm": 1.8660011291503906, "learning_rate": 1.8107667126435747e-05, "loss": 0.4126, "step": 20706 }, { "epoch": 3.380229378392719, "grad_norm": 1.661035180091858, "learning_rate": 1.8107481365564147e-05, "loss": 0.4383, "step": 20707 }, { "epoch": 3.380392637035223, "grad_norm": 1.8584847450256348, "learning_rate": 1.810729559652831e-05, "loss": 0.4476, "step": 20708 }, { "epoch": 3.3805558956777273, "grad_norm": 1.9937855005264282, "learning_rate": 1.8107109819328434e-05, "loss": 0.415, "step": 20709 }, { "epoch": 3.3807191543202317, "grad_norm": 2.1601247787475586, "learning_rate": 1.8106924033964696e-05, "loss": 0.491, "step": 20710 }, { "epoch": 3.380882412962736, "grad_norm": 1.7093299627304077, "learning_rate": 1.810673824043729e-05, "loss": 0.403, "step": 20711 }, { "epoch": 3.3810456716052406, "grad_norm": 1.7737568616867065, "learning_rate": 1.81065524387464e-05, "loss": 0.4225, "step": 20712 }, { "epoch": 3.381208930247745, "grad_norm": 1.765700340270996, "learning_rate": 1.8106366628892216e-05, "loss": 0.5102, "step": 20713 }, { "epoch": 3.3813721888902495, "grad_norm": 1.877897024154663, "learning_rate": 1.810618081087492e-05, "loss": 0.5263, "step": 20714 }, { "epoch": 3.381535447532754, "grad_norm": 2.3864121437072754, "learning_rate": 1.8105994984694706e-05, "loss": 0.7411, "step": 20715 }, { "epoch": 3.381698706175258, "grad_norm": 2.156789779663086, "learning_rate": 1.8105809150351753e-05, "loss": 0.5016, "step": 20716 }, { "epoch": 3.3818619648177624, "grad_norm": 1.8723886013031006, "learning_rate": 1.8105623307846255e-05, "loss": 0.489, "step": 20717 }, { "epoch": 3.382025223460267, "grad_norm": 1.953747272491455, "learning_rate": 1.8105437457178395e-05, "loss": 0.4631, "step": 20718 }, { "epoch": 3.3821884821027712, "grad_norm": 2.1136014461517334, "learning_rate": 1.8105251598348364e-05, "loss": 0.5372, "step": 20719 }, { "epoch": 3.3823517407452757, "grad_norm": 1.8610776662826538, "learning_rate": 1.8105065731356343e-05, "loss": 0.5142, "step": 20720 }, { "epoch": 3.38251499938778, "grad_norm": 2.151742458343506, "learning_rate": 1.8104879856202525e-05, "loss": 0.5533, "step": 20721 }, { "epoch": 3.3826782580302845, "grad_norm": 2.027801990509033, "learning_rate": 1.8104693972887095e-05, "loss": 0.5071, "step": 20722 }, { "epoch": 3.382841516672789, "grad_norm": 1.9913526773452759, "learning_rate": 1.8104508081410242e-05, "loss": 0.5019, "step": 20723 }, { "epoch": 3.3830047753152934, "grad_norm": 2.2560250759124756, "learning_rate": 1.8104322181772148e-05, "loss": 0.5536, "step": 20724 }, { "epoch": 3.383168033957798, "grad_norm": 1.8674519062042236, "learning_rate": 1.8104136273973007e-05, "loss": 0.4503, "step": 20725 }, { "epoch": 3.383331292600302, "grad_norm": 2.057425022125244, "learning_rate": 1.8103950358013e-05, "loss": 0.4937, "step": 20726 }, { "epoch": 3.3834945512428063, "grad_norm": 1.9736731052398682, "learning_rate": 1.8103764433892318e-05, "loss": 0.4811, "step": 20727 }, { "epoch": 3.3836578098853107, "grad_norm": 2.451697826385498, "learning_rate": 1.810357850161115e-05, "loss": 0.5227, "step": 20728 }, { "epoch": 3.383821068527815, "grad_norm": 2.2081243991851807, "learning_rate": 1.810339256116968e-05, "loss": 0.4609, "step": 20729 }, { "epoch": 3.3839843271703196, "grad_norm": 1.9674246311187744, "learning_rate": 1.8103206612568094e-05, "loss": 0.4172, "step": 20730 }, { "epoch": 3.384147585812824, "grad_norm": 2.017714023590088, "learning_rate": 1.810302065580658e-05, "loss": 0.4916, "step": 20731 }, { "epoch": 3.3843108444553285, "grad_norm": 2.1516497135162354, "learning_rate": 1.810283469088533e-05, "loss": 0.4892, "step": 20732 }, { "epoch": 3.384474103097833, "grad_norm": 1.8733896017074585, "learning_rate": 1.8102648717804525e-05, "loss": 0.3846, "step": 20733 }, { "epoch": 3.384637361740337, "grad_norm": 1.8436214923858643, "learning_rate": 1.8102462736564355e-05, "loss": 0.4558, "step": 20734 }, { "epoch": 3.3848006203828414, "grad_norm": 2.1860265731811523, "learning_rate": 1.8102276747165007e-05, "loss": 0.5204, "step": 20735 }, { "epoch": 3.384963879025346, "grad_norm": 2.7460503578186035, "learning_rate": 1.810209074960667e-05, "loss": 0.6748, "step": 20736 }, { "epoch": 3.3851271376678502, "grad_norm": 1.9335267543792725, "learning_rate": 1.810190474388953e-05, "loss": 0.4826, "step": 20737 }, { "epoch": 3.3852903963103547, "grad_norm": 2.3889353275299072, "learning_rate": 1.8101718730013773e-05, "loss": 0.5557, "step": 20738 }, { "epoch": 3.385453654952859, "grad_norm": 2.3930797576904297, "learning_rate": 1.8101532707979584e-05, "loss": 0.5581, "step": 20739 }, { "epoch": 3.3856169135953635, "grad_norm": 1.7377244234085083, "learning_rate": 1.8101346677787157e-05, "loss": 0.4497, "step": 20740 }, { "epoch": 3.385780172237868, "grad_norm": 1.6663718223571777, "learning_rate": 1.8101160639436677e-05, "loss": 0.4207, "step": 20741 }, { "epoch": 3.3859434308803724, "grad_norm": 1.9941307306289673, "learning_rate": 1.8100974592928332e-05, "loss": 0.4971, "step": 20742 }, { "epoch": 3.386106689522877, "grad_norm": 2.2755141258239746, "learning_rate": 1.8100788538262304e-05, "loss": 0.5321, "step": 20743 }, { "epoch": 3.386269948165381, "grad_norm": 1.819032907485962, "learning_rate": 1.8100602475438783e-05, "loss": 0.4052, "step": 20744 }, { "epoch": 3.3864332068078853, "grad_norm": 1.9243406057357788, "learning_rate": 1.8100416404457962e-05, "loss": 0.4169, "step": 20745 }, { "epoch": 3.3865964654503897, "grad_norm": 2.326233386993408, "learning_rate": 1.810023032532002e-05, "loss": 0.571, "step": 20746 }, { "epoch": 3.386759724092894, "grad_norm": 1.96358323097229, "learning_rate": 1.8100044238025152e-05, "loss": 0.4365, "step": 20747 }, { "epoch": 3.3869229827353986, "grad_norm": 1.8777434825897217, "learning_rate": 1.809985814257354e-05, "loss": 0.4669, "step": 20748 }, { "epoch": 3.387086241377903, "grad_norm": 2.213808536529541, "learning_rate": 1.809967203896537e-05, "loss": 0.4419, "step": 20749 }, { "epoch": 3.3872495000204075, "grad_norm": 1.7179409265518188, "learning_rate": 1.809948592720084e-05, "loss": 0.4434, "step": 20750 }, { "epoch": 3.3874127586629115, "grad_norm": 2.0182535648345947, "learning_rate": 1.809929980728012e-05, "loss": 0.6132, "step": 20751 }, { "epoch": 3.387576017305416, "grad_norm": 2.299684762954712, "learning_rate": 1.8099113679203414e-05, "loss": 0.5451, "step": 20752 }, { "epoch": 3.3877392759479203, "grad_norm": 1.8184012174606323, "learning_rate": 1.80989275429709e-05, "loss": 0.5177, "step": 20753 }, { "epoch": 3.387902534590425, "grad_norm": 2.113348960876465, "learning_rate": 1.809874139858277e-05, "loss": 0.4951, "step": 20754 }, { "epoch": 3.3880657932329292, "grad_norm": 1.955216407775879, "learning_rate": 1.8098555246039205e-05, "loss": 0.5123, "step": 20755 }, { "epoch": 3.3882290518754337, "grad_norm": 1.8045704364776611, "learning_rate": 1.80983690853404e-05, "loss": 0.4806, "step": 20756 }, { "epoch": 3.388392310517938, "grad_norm": 1.9485353231430054, "learning_rate": 1.8098182916486542e-05, "loss": 0.4136, "step": 20757 }, { "epoch": 3.3885555691604425, "grad_norm": 1.9848041534423828, "learning_rate": 1.8097996739477812e-05, "loss": 0.5092, "step": 20758 }, { "epoch": 3.388718827802947, "grad_norm": 1.8429189920425415, "learning_rate": 1.8097810554314402e-05, "loss": 0.48, "step": 20759 }, { "epoch": 3.3888820864454514, "grad_norm": 2.246202230453491, "learning_rate": 1.80976243609965e-05, "loss": 0.4392, "step": 20760 }, { "epoch": 3.3890453450879554, "grad_norm": 1.8979281187057495, "learning_rate": 1.8097438159524295e-05, "loss": 0.4603, "step": 20761 }, { "epoch": 3.38920860373046, "grad_norm": 1.7380403280258179, "learning_rate": 1.8097251949897966e-05, "loss": 0.464, "step": 20762 }, { "epoch": 3.3893718623729643, "grad_norm": 1.9188544750213623, "learning_rate": 1.8097065732117713e-05, "loss": 0.4982, "step": 20763 }, { "epoch": 3.3895351210154687, "grad_norm": 1.7025855779647827, "learning_rate": 1.809687950618371e-05, "loss": 0.4299, "step": 20764 }, { "epoch": 3.389698379657973, "grad_norm": 1.9120043516159058, "learning_rate": 1.8096693272096158e-05, "loss": 0.458, "step": 20765 }, { "epoch": 3.3898616383004776, "grad_norm": 2.3866028785705566, "learning_rate": 1.8096507029855236e-05, "loss": 0.5526, "step": 20766 }, { "epoch": 3.390024896942982, "grad_norm": 2.1199920177459717, "learning_rate": 1.809632077946113e-05, "loss": 0.5357, "step": 20767 }, { "epoch": 3.3901881555854865, "grad_norm": 1.8381441831588745, "learning_rate": 1.809613452091404e-05, "loss": 0.5091, "step": 20768 }, { "epoch": 3.3903514142279905, "grad_norm": 2.012343645095825, "learning_rate": 1.8095948254214132e-05, "loss": 0.4955, "step": 20769 }, { "epoch": 3.390514672870495, "grad_norm": 2.1988985538482666, "learning_rate": 1.8095761979361615e-05, "loss": 0.4836, "step": 20770 }, { "epoch": 3.3906779315129993, "grad_norm": 1.8399035930633545, "learning_rate": 1.8095575696356667e-05, "loss": 0.4923, "step": 20771 }, { "epoch": 3.390841190155504, "grad_norm": 1.9079651832580566, "learning_rate": 1.8095389405199474e-05, "loss": 0.5217, "step": 20772 }, { "epoch": 3.391004448798008, "grad_norm": 1.7975518703460693, "learning_rate": 1.8095203105890225e-05, "loss": 0.4872, "step": 20773 }, { "epoch": 3.3911677074405127, "grad_norm": 2.457554817199707, "learning_rate": 1.809501679842911e-05, "loss": 0.5475, "step": 20774 }, { "epoch": 3.391330966083017, "grad_norm": 1.8505417108535767, "learning_rate": 1.809483048281632e-05, "loss": 0.4689, "step": 20775 }, { "epoch": 3.3914942247255215, "grad_norm": 2.0329504013061523, "learning_rate": 1.8094644159052033e-05, "loss": 0.4497, "step": 20776 }, { "epoch": 3.391657483368026, "grad_norm": 1.9005087614059448, "learning_rate": 1.809445782713644e-05, "loss": 0.4566, "step": 20777 }, { "epoch": 3.3918207420105304, "grad_norm": 1.8781131505966187, "learning_rate": 1.8094271487069733e-05, "loss": 0.5141, "step": 20778 }, { "epoch": 3.3919840006530344, "grad_norm": 1.94882071018219, "learning_rate": 1.8094085138852098e-05, "loss": 0.4586, "step": 20779 }, { "epoch": 3.392147259295539, "grad_norm": 1.874389410018921, "learning_rate": 1.8093898782483718e-05, "loss": 0.4818, "step": 20780 }, { "epoch": 3.3923105179380433, "grad_norm": 1.8871994018554688, "learning_rate": 1.8093712417964785e-05, "loss": 0.5166, "step": 20781 }, { "epoch": 3.3924737765805477, "grad_norm": 2.1328225135803223, "learning_rate": 1.809352604529549e-05, "loss": 0.5144, "step": 20782 }, { "epoch": 3.392637035223052, "grad_norm": 2.0941572189331055, "learning_rate": 1.8093339664476014e-05, "loss": 0.5044, "step": 20783 }, { "epoch": 3.3928002938655566, "grad_norm": 1.9679679870605469, "learning_rate": 1.8093153275506542e-05, "loss": 0.452, "step": 20784 }, { "epoch": 3.392963552508061, "grad_norm": 2.0108206272125244, "learning_rate": 1.8092966878387273e-05, "loss": 0.5645, "step": 20785 }, { "epoch": 3.3931268111505655, "grad_norm": 2.2617523670196533, "learning_rate": 1.8092780473118387e-05, "loss": 0.5498, "step": 20786 }, { "epoch": 3.3932900697930695, "grad_norm": 1.8047860860824585, "learning_rate": 1.8092594059700072e-05, "loss": 0.4349, "step": 20787 }, { "epoch": 3.393453328435574, "grad_norm": 1.9108372926712036, "learning_rate": 1.8092407638132517e-05, "loss": 0.4875, "step": 20788 }, { "epoch": 3.3936165870780783, "grad_norm": 1.7377065420150757, "learning_rate": 1.809222120841591e-05, "loss": 0.3882, "step": 20789 }, { "epoch": 3.3937798457205828, "grad_norm": 2.4870569705963135, "learning_rate": 1.8092034770550438e-05, "loss": 0.5399, "step": 20790 }, { "epoch": 3.393943104363087, "grad_norm": 1.853127121925354, "learning_rate": 1.809184832453629e-05, "loss": 0.5135, "step": 20791 }, { "epoch": 3.3941063630055917, "grad_norm": 1.8436893224716187, "learning_rate": 1.809166187037365e-05, "loss": 0.4764, "step": 20792 }, { "epoch": 3.394269621648096, "grad_norm": 1.8245564699172974, "learning_rate": 1.8091475408062713e-05, "loss": 0.4982, "step": 20793 }, { "epoch": 3.3944328802906005, "grad_norm": 1.789466142654419, "learning_rate": 1.809128893760366e-05, "loss": 0.4237, "step": 20794 }, { "epoch": 3.394596138933105, "grad_norm": 1.7256624698638916, "learning_rate": 1.809110245899668e-05, "loss": 0.4022, "step": 20795 }, { "epoch": 3.3947593975756094, "grad_norm": 2.036665201187134, "learning_rate": 1.8090915972241965e-05, "loss": 0.5091, "step": 20796 }, { "epoch": 3.3949226562181134, "grad_norm": 2.0477612018585205, "learning_rate": 1.80907294773397e-05, "loss": 0.5071, "step": 20797 }, { "epoch": 3.395085914860618, "grad_norm": 2.46899151802063, "learning_rate": 1.809054297429007e-05, "loss": 0.5604, "step": 20798 }, { "epoch": 3.3952491735031223, "grad_norm": 2.2644999027252197, "learning_rate": 1.8090356463093267e-05, "loss": 0.4959, "step": 20799 }, { "epoch": 3.3954124321456267, "grad_norm": 2.193031072616577, "learning_rate": 1.8090169943749477e-05, "loss": 0.4998, "step": 20800 }, { "epoch": 3.395575690788131, "grad_norm": 2.1270902156829834, "learning_rate": 1.8089983416258887e-05, "loss": 0.5043, "step": 20801 }, { "epoch": 3.3957389494306356, "grad_norm": 1.6244614124298096, "learning_rate": 1.8089796880621686e-05, "loss": 0.39, "step": 20802 }, { "epoch": 3.39590220807314, "grad_norm": 2.0252037048339844, "learning_rate": 1.8089610336838062e-05, "loss": 0.4862, "step": 20803 }, { "epoch": 3.396065466715644, "grad_norm": 2.0220561027526855, "learning_rate": 1.8089423784908203e-05, "loss": 0.4495, "step": 20804 }, { "epoch": 3.3962287253581485, "grad_norm": 1.9513235092163086, "learning_rate": 1.8089237224832296e-05, "loss": 0.5199, "step": 20805 }, { "epoch": 3.396391984000653, "grad_norm": 2.06278395652771, "learning_rate": 1.808905065661053e-05, "loss": 0.4931, "step": 20806 }, { "epoch": 3.3965552426431573, "grad_norm": 1.9758377075195312, "learning_rate": 1.808886408024309e-05, "loss": 0.5373, "step": 20807 }, { "epoch": 3.3967185012856618, "grad_norm": 1.8416084051132202, "learning_rate": 1.8088677495730168e-05, "loss": 0.4871, "step": 20808 }, { "epoch": 3.396881759928166, "grad_norm": 1.6714484691619873, "learning_rate": 1.808849090307195e-05, "loss": 0.3844, "step": 20809 }, { "epoch": 3.3970450185706706, "grad_norm": 1.881103515625, "learning_rate": 1.8088304302268622e-05, "loss": 0.4694, "step": 20810 }, { "epoch": 3.397208277213175, "grad_norm": 2.0891897678375244, "learning_rate": 1.8088117693320374e-05, "loss": 0.4595, "step": 20811 }, { "epoch": 3.3973715358556795, "grad_norm": 1.8712040185928345, "learning_rate": 1.8087931076227393e-05, "loss": 0.6039, "step": 20812 }, { "epoch": 3.397534794498184, "grad_norm": 2.0990805625915527, "learning_rate": 1.8087744450989872e-05, "loss": 0.5179, "step": 20813 }, { "epoch": 3.397698053140688, "grad_norm": 1.7003964185714722, "learning_rate": 1.808755781760799e-05, "loss": 0.4018, "step": 20814 }, { "epoch": 3.3978613117831924, "grad_norm": 1.975242018699646, "learning_rate": 1.8087371176081944e-05, "loss": 0.4457, "step": 20815 }, { "epoch": 3.398024570425697, "grad_norm": 1.793789029121399, "learning_rate": 1.8087184526411913e-05, "loss": 0.4571, "step": 20816 }, { "epoch": 3.3981878290682013, "grad_norm": 1.6435480117797852, "learning_rate": 1.8086997868598088e-05, "loss": 0.4015, "step": 20817 }, { "epoch": 3.3983510877107057, "grad_norm": 2.117762565612793, "learning_rate": 1.8086811202640664e-05, "loss": 0.5496, "step": 20818 }, { "epoch": 3.39851434635321, "grad_norm": 2.223388671875, "learning_rate": 1.808662452853982e-05, "loss": 0.5967, "step": 20819 }, { "epoch": 3.3986776049957146, "grad_norm": 1.5937843322753906, "learning_rate": 1.808643784629575e-05, "loss": 0.395, "step": 20820 }, { "epoch": 3.398840863638219, "grad_norm": 1.6638901233673096, "learning_rate": 1.8086251155908634e-05, "loss": 0.4471, "step": 20821 }, { "epoch": 3.399004122280723, "grad_norm": 2.1577765941619873, "learning_rate": 1.8086064457378667e-05, "loss": 0.5832, "step": 20822 }, { "epoch": 3.3991673809232275, "grad_norm": 1.959903359413147, "learning_rate": 1.8085877750706036e-05, "loss": 0.4665, "step": 20823 }, { "epoch": 3.399330639565732, "grad_norm": 1.7288618087768555, "learning_rate": 1.8085691035890928e-05, "loss": 0.3652, "step": 20824 }, { "epoch": 3.3994938982082363, "grad_norm": 1.8897463083267212, "learning_rate": 1.808550431293353e-05, "loss": 0.4277, "step": 20825 }, { "epoch": 3.3996571568507408, "grad_norm": 1.7013901472091675, "learning_rate": 1.8085317581834037e-05, "loss": 0.4107, "step": 20826 }, { "epoch": 3.399820415493245, "grad_norm": 1.953620433807373, "learning_rate": 1.8085130842592624e-05, "loss": 0.4976, "step": 20827 }, { "epoch": 3.3999836741357496, "grad_norm": 1.876363754272461, "learning_rate": 1.8084944095209492e-05, "loss": 0.4479, "step": 20828 }, { "epoch": 3.400146932778254, "grad_norm": 1.9839023351669312, "learning_rate": 1.808475733968482e-05, "loss": 0.4735, "step": 20829 }, { "epoch": 3.4003101914207585, "grad_norm": 1.9913145303726196, "learning_rate": 1.80845705760188e-05, "loss": 0.4545, "step": 20830 }, { "epoch": 3.400473450063263, "grad_norm": 1.820870041847229, "learning_rate": 1.808438380421162e-05, "loss": 0.4167, "step": 20831 }, { "epoch": 3.400636708705767, "grad_norm": 1.8638120889663696, "learning_rate": 1.808419702426347e-05, "loss": 0.437, "step": 20832 }, { "epoch": 3.4007999673482714, "grad_norm": 1.9034894704818726, "learning_rate": 1.8084010236174533e-05, "loss": 0.4948, "step": 20833 }, { "epoch": 3.400963225990776, "grad_norm": 1.5474581718444824, "learning_rate": 1.8083823439945e-05, "loss": 0.4118, "step": 20834 }, { "epoch": 3.4011264846332803, "grad_norm": 1.8149471282958984, "learning_rate": 1.808363663557506e-05, "loss": 0.5069, "step": 20835 }, { "epoch": 3.4012897432757847, "grad_norm": 1.9827994108200073, "learning_rate": 1.80834498230649e-05, "loss": 0.4717, "step": 20836 }, { "epoch": 3.401453001918289, "grad_norm": 2.0428085327148438, "learning_rate": 1.8083263002414707e-05, "loss": 0.4809, "step": 20837 }, { "epoch": 3.4016162605607936, "grad_norm": 2.0293726921081543, "learning_rate": 1.8083076173624672e-05, "loss": 0.4441, "step": 20838 }, { "epoch": 3.4017795192032976, "grad_norm": 2.0973563194274902, "learning_rate": 1.808288933669498e-05, "loss": 0.5273, "step": 20839 }, { "epoch": 3.401942777845802, "grad_norm": 1.9429383277893066, "learning_rate": 1.808270249162582e-05, "loss": 0.4678, "step": 20840 }, { "epoch": 3.4021060364883065, "grad_norm": 1.76774263381958, "learning_rate": 1.808251563841738e-05, "loss": 0.4034, "step": 20841 }, { "epoch": 3.402269295130811, "grad_norm": 2.1432676315307617, "learning_rate": 1.8082328777069853e-05, "loss": 0.5488, "step": 20842 }, { "epoch": 3.4024325537733153, "grad_norm": 2.0463290214538574, "learning_rate": 1.808214190758342e-05, "loss": 0.4357, "step": 20843 }, { "epoch": 3.4025958124158198, "grad_norm": 1.8430145978927612, "learning_rate": 1.8081955029958272e-05, "loss": 0.4089, "step": 20844 }, { "epoch": 3.402759071058324, "grad_norm": 1.6534111499786377, "learning_rate": 1.80817681441946e-05, "loss": 0.436, "step": 20845 }, { "epoch": 3.4029223297008286, "grad_norm": 1.932248592376709, "learning_rate": 1.808158125029259e-05, "loss": 0.4745, "step": 20846 }, { "epoch": 3.403085588343333, "grad_norm": 2.1262292861938477, "learning_rate": 1.8081394348252422e-05, "loss": 0.5021, "step": 20847 }, { "epoch": 3.4032488469858375, "grad_norm": 1.9437782764434814, "learning_rate": 1.8081207438074298e-05, "loss": 0.3893, "step": 20848 }, { "epoch": 3.4034121056283415, "grad_norm": 1.9657751321792603, "learning_rate": 1.80810205197584e-05, "loss": 0.4845, "step": 20849 }, { "epoch": 3.403575364270846, "grad_norm": 1.9202277660369873, "learning_rate": 1.8080833593304917e-05, "loss": 0.4801, "step": 20850 }, { "epoch": 3.4037386229133504, "grad_norm": 2.130953788757324, "learning_rate": 1.8080646658714037e-05, "loss": 0.4947, "step": 20851 }, { "epoch": 3.403901881555855, "grad_norm": 1.7476476430892944, "learning_rate": 1.8080459715985948e-05, "loss": 0.4315, "step": 20852 }, { "epoch": 3.4040651401983593, "grad_norm": 2.2068395614624023, "learning_rate": 1.8080272765120837e-05, "loss": 0.5681, "step": 20853 }, { "epoch": 3.4042283988408637, "grad_norm": 2.2858641147613525, "learning_rate": 1.8080085806118893e-05, "loss": 0.4656, "step": 20854 }, { "epoch": 3.404391657483368, "grad_norm": 2.332651376724243, "learning_rate": 1.8079898838980304e-05, "loss": 0.5248, "step": 20855 }, { "epoch": 3.4045549161258726, "grad_norm": 1.9130791425704956, "learning_rate": 1.8079711863705265e-05, "loss": 0.4457, "step": 20856 }, { "epoch": 3.4047181747683766, "grad_norm": 1.710361361503601, "learning_rate": 1.8079524880293953e-05, "loss": 0.4826, "step": 20857 }, { "epoch": 3.404881433410881, "grad_norm": 1.9798142910003662, "learning_rate": 1.807933788874656e-05, "loss": 0.5151, "step": 20858 }, { "epoch": 3.4050446920533854, "grad_norm": 1.826170563697815, "learning_rate": 1.8079150889063282e-05, "loss": 0.4508, "step": 20859 }, { "epoch": 3.40520795069589, "grad_norm": 2.0178349018096924, "learning_rate": 1.8078963881244296e-05, "loss": 0.466, "step": 20860 }, { "epoch": 3.4053712093383943, "grad_norm": 1.933624505996704, "learning_rate": 1.80787768652898e-05, "loss": 0.5248, "step": 20861 }, { "epoch": 3.4055344679808988, "grad_norm": 2.018415927886963, "learning_rate": 1.807858984119997e-05, "loss": 0.5319, "step": 20862 }, { "epoch": 3.405697726623403, "grad_norm": 2.0493154525756836, "learning_rate": 1.807840280897501e-05, "loss": 0.4479, "step": 20863 }, { "epoch": 3.4058609852659076, "grad_norm": 1.7835774421691895, "learning_rate": 1.8078215768615095e-05, "loss": 0.4078, "step": 20864 }, { "epoch": 3.406024243908412, "grad_norm": 1.6838513612747192, "learning_rate": 1.8078028720120424e-05, "loss": 0.4397, "step": 20865 }, { "epoch": 3.4061875025509165, "grad_norm": 2.1489570140838623, "learning_rate": 1.8077841663491174e-05, "loss": 0.5379, "step": 20866 }, { "epoch": 3.4063507611934205, "grad_norm": 1.5075486898422241, "learning_rate": 1.8077654598727545e-05, "loss": 0.3811, "step": 20867 }, { "epoch": 3.406514019835925, "grad_norm": 1.7240017652511597, "learning_rate": 1.807746752582972e-05, "loss": 0.4312, "step": 20868 }, { "epoch": 3.4066772784784294, "grad_norm": 1.6042765378952026, "learning_rate": 1.8077280444797884e-05, "loss": 0.3653, "step": 20869 }, { "epoch": 3.406840537120934, "grad_norm": 1.6227306127548218, "learning_rate": 1.8077093355632232e-05, "loss": 0.391, "step": 20870 }, { "epoch": 3.4070037957634383, "grad_norm": 2.588488817214966, "learning_rate": 1.8076906258332946e-05, "loss": 0.5726, "step": 20871 }, { "epoch": 3.4071670544059427, "grad_norm": 2.256401777267456, "learning_rate": 1.807671915290022e-05, "loss": 0.4921, "step": 20872 }, { "epoch": 3.407330313048447, "grad_norm": 1.6943551301956177, "learning_rate": 1.8076532039334238e-05, "loss": 0.4015, "step": 20873 }, { "epoch": 3.4074935716909516, "grad_norm": 1.799858808517456, "learning_rate": 1.8076344917635192e-05, "loss": 0.4886, "step": 20874 }, { "epoch": 3.4076568303334556, "grad_norm": 1.9318337440490723, "learning_rate": 1.8076157787803268e-05, "loss": 0.3636, "step": 20875 }, { "epoch": 3.40782008897596, "grad_norm": 1.779524803161621, "learning_rate": 1.8075970649838655e-05, "loss": 0.4445, "step": 20876 }, { "epoch": 3.4079833476184644, "grad_norm": 2.0774621963500977, "learning_rate": 1.8075783503741543e-05, "loss": 0.525, "step": 20877 }, { "epoch": 3.408146606260969, "grad_norm": 2.490010976791382, "learning_rate": 1.8075596349512116e-05, "loss": 0.4739, "step": 20878 }, { "epoch": 3.4083098649034733, "grad_norm": 2.090510606765747, "learning_rate": 1.807540918715057e-05, "loss": 0.5408, "step": 20879 }, { "epoch": 3.4084731235459778, "grad_norm": 2.5296061038970947, "learning_rate": 1.807522201665709e-05, "loss": 0.5517, "step": 20880 }, { "epoch": 3.408636382188482, "grad_norm": 2.3510677814483643, "learning_rate": 1.807503483803186e-05, "loss": 0.5835, "step": 20881 }, { "epoch": 3.4087996408309866, "grad_norm": 2.2000858783721924, "learning_rate": 1.807484765127507e-05, "loss": 0.4967, "step": 20882 }, { "epoch": 3.408962899473491, "grad_norm": 1.3892238140106201, "learning_rate": 1.8074660456386914e-05, "loss": 0.3464, "step": 20883 }, { "epoch": 3.4091261581159955, "grad_norm": 1.9321955442428589, "learning_rate": 1.8074473253367576e-05, "loss": 0.5125, "step": 20884 }, { "epoch": 3.4092894167584995, "grad_norm": 1.8065664768218994, "learning_rate": 1.8074286042217246e-05, "loss": 0.4505, "step": 20885 }, { "epoch": 3.409452675401004, "grad_norm": 1.9865789413452148, "learning_rate": 1.807409882293611e-05, "loss": 0.4853, "step": 20886 }, { "epoch": 3.4096159340435084, "grad_norm": 2.113102436065674, "learning_rate": 1.8073911595524362e-05, "loss": 0.4851, "step": 20887 }, { "epoch": 3.409779192686013, "grad_norm": 2.5855190753936768, "learning_rate": 1.8073724359982184e-05, "loss": 0.4996, "step": 20888 }, { "epoch": 3.4099424513285173, "grad_norm": 2.5880932807922363, "learning_rate": 1.807353711630977e-05, "loss": 0.5084, "step": 20889 }, { "epoch": 3.4101057099710217, "grad_norm": 2.1153035163879395, "learning_rate": 1.8073349864507307e-05, "loss": 0.4213, "step": 20890 }, { "epoch": 3.410268968613526, "grad_norm": 1.9905728101730347, "learning_rate": 1.807316260457498e-05, "loss": 0.496, "step": 20891 }, { "epoch": 3.41043222725603, "grad_norm": 2.0550107955932617, "learning_rate": 1.8072975336512983e-05, "loss": 0.4679, "step": 20892 }, { "epoch": 3.4105954858985346, "grad_norm": 2.10811185836792, "learning_rate": 1.80727880603215e-05, "loss": 0.453, "step": 20893 }, { "epoch": 3.410758744541039, "grad_norm": 1.8064311742782593, "learning_rate": 1.807260077600072e-05, "loss": 0.448, "step": 20894 }, { "epoch": 3.4109220031835434, "grad_norm": 2.1799492835998535, "learning_rate": 1.807241348355084e-05, "loss": 0.5209, "step": 20895 }, { "epoch": 3.411085261826048, "grad_norm": 1.5269858837127686, "learning_rate": 1.8072226182972036e-05, "loss": 0.4069, "step": 20896 }, { "epoch": 3.4112485204685523, "grad_norm": 1.9874153137207031, "learning_rate": 1.8072038874264504e-05, "loss": 0.4139, "step": 20897 }, { "epoch": 3.4114117791110568, "grad_norm": 1.847602367401123, "learning_rate": 1.807185155742843e-05, "loss": 0.4835, "step": 20898 }, { "epoch": 3.411575037753561, "grad_norm": 1.8493742942810059, "learning_rate": 1.8071664232464005e-05, "loss": 0.4758, "step": 20899 }, { "epoch": 3.4117382963960656, "grad_norm": 1.494354248046875, "learning_rate": 1.8071476899371414e-05, "loss": 0.4411, "step": 20900 }, { "epoch": 3.41190155503857, "grad_norm": 1.560133695602417, "learning_rate": 1.807128955815085e-05, "loss": 0.4026, "step": 20901 }, { "epoch": 3.412064813681074, "grad_norm": 2.061162233352661, "learning_rate": 1.80711022088025e-05, "loss": 0.522, "step": 20902 }, { "epoch": 3.4122280723235785, "grad_norm": 1.8727480173110962, "learning_rate": 1.8070914851326552e-05, "loss": 0.4577, "step": 20903 }, { "epoch": 3.412391330966083, "grad_norm": 1.775193214416504, "learning_rate": 1.8070727485723193e-05, "loss": 0.5084, "step": 20904 }, { "epoch": 3.4125545896085874, "grad_norm": 2.014887571334839, "learning_rate": 1.8070540111992615e-05, "loss": 0.4698, "step": 20905 }, { "epoch": 3.412717848251092, "grad_norm": 1.5512316226959229, "learning_rate": 1.8070352730135002e-05, "loss": 0.397, "step": 20906 }, { "epoch": 3.4128811068935963, "grad_norm": 1.9608924388885498, "learning_rate": 1.8070165340150552e-05, "loss": 0.5029, "step": 20907 }, { "epoch": 3.4130443655361007, "grad_norm": 1.6361955404281616, "learning_rate": 1.8069977942039444e-05, "loss": 0.4163, "step": 20908 }, { "epoch": 3.413207624178605, "grad_norm": 1.730828046798706, "learning_rate": 1.806979053580187e-05, "loss": 0.378, "step": 20909 }, { "epoch": 3.413370882821109, "grad_norm": 2.338343620300293, "learning_rate": 1.806960312143802e-05, "loss": 0.5598, "step": 20910 }, { "epoch": 3.4135341414636136, "grad_norm": 1.955583930015564, "learning_rate": 1.8069415698948082e-05, "loss": 0.4002, "step": 20911 }, { "epoch": 3.413697400106118, "grad_norm": 2.0379981994628906, "learning_rate": 1.8069228268332245e-05, "loss": 0.4936, "step": 20912 }, { "epoch": 3.4138606587486224, "grad_norm": 2.2792305946350098, "learning_rate": 1.8069040829590698e-05, "loss": 0.56, "step": 20913 }, { "epoch": 3.414023917391127, "grad_norm": 2.0365848541259766, "learning_rate": 1.8068853382723625e-05, "loss": 0.4277, "step": 20914 }, { "epoch": 3.4141871760336313, "grad_norm": 1.7799237966537476, "learning_rate": 1.8068665927731222e-05, "loss": 0.3788, "step": 20915 }, { "epoch": 3.4143504346761357, "grad_norm": 1.9737820625305176, "learning_rate": 1.8068478464613673e-05, "loss": 0.4774, "step": 20916 }, { "epoch": 3.41451369331864, "grad_norm": 1.6691768169403076, "learning_rate": 1.806829099337117e-05, "loss": 0.4527, "step": 20917 }, { "epoch": 3.4146769519611446, "grad_norm": 1.9577420949935913, "learning_rate": 1.8068103514003898e-05, "loss": 0.5829, "step": 20918 }, { "epoch": 3.414840210603649, "grad_norm": 1.9392520189285278, "learning_rate": 1.806791602651205e-05, "loss": 0.4823, "step": 20919 }, { "epoch": 3.415003469246153, "grad_norm": 2.2111504077911377, "learning_rate": 1.8067728530895812e-05, "loss": 0.4811, "step": 20920 }, { "epoch": 3.4151667278886575, "grad_norm": 2.10827898979187, "learning_rate": 1.8067541027155376e-05, "loss": 0.5198, "step": 20921 }, { "epoch": 3.415329986531162, "grad_norm": 1.8967008590698242, "learning_rate": 1.8067353515290925e-05, "loss": 0.4081, "step": 20922 }, { "epoch": 3.4154932451736664, "grad_norm": 1.7581321001052856, "learning_rate": 1.8067165995302652e-05, "loss": 0.4197, "step": 20923 }, { "epoch": 3.415656503816171, "grad_norm": 1.7418667078018188, "learning_rate": 1.8066978467190744e-05, "loss": 0.4417, "step": 20924 }, { "epoch": 3.4158197624586752, "grad_norm": 2.318708896636963, "learning_rate": 1.8066790930955393e-05, "loss": 0.5278, "step": 20925 }, { "epoch": 3.4159830211011797, "grad_norm": 2.132516860961914, "learning_rate": 1.8066603386596783e-05, "loss": 0.5007, "step": 20926 }, { "epoch": 3.416146279743684, "grad_norm": 1.8649885654449463, "learning_rate": 1.8066415834115107e-05, "loss": 0.4613, "step": 20927 }, { "epoch": 3.416309538386188, "grad_norm": 1.898942232131958, "learning_rate": 1.8066228273510554e-05, "loss": 0.513, "step": 20928 }, { "epoch": 3.4164727970286926, "grad_norm": 1.6693394184112549, "learning_rate": 1.806604070478331e-05, "loss": 0.4267, "step": 20929 }, { "epoch": 3.416636055671197, "grad_norm": 1.7742481231689453, "learning_rate": 1.8065853127933563e-05, "loss": 0.3788, "step": 20930 }, { "epoch": 3.4167993143137014, "grad_norm": 2.190532922744751, "learning_rate": 1.8065665542961505e-05, "loss": 0.4639, "step": 20931 }, { "epoch": 3.416962572956206, "grad_norm": 2.029883623123169, "learning_rate": 1.8065477949867327e-05, "loss": 0.4914, "step": 20932 }, { "epoch": 3.4171258315987103, "grad_norm": 2.2647860050201416, "learning_rate": 1.8065290348651213e-05, "loss": 0.4647, "step": 20933 }, { "epoch": 3.4172890902412147, "grad_norm": 1.90986967086792, "learning_rate": 1.8065102739313355e-05, "loss": 0.4638, "step": 20934 }, { "epoch": 3.417452348883719, "grad_norm": 1.9383094310760498, "learning_rate": 1.8064915121853938e-05, "loss": 0.4465, "step": 20935 }, { "epoch": 3.4176156075262236, "grad_norm": 1.8370985984802246, "learning_rate": 1.8064727496273152e-05, "loss": 0.4617, "step": 20936 }, { "epoch": 3.4177788661687276, "grad_norm": 2.048436164855957, "learning_rate": 1.806453986257119e-05, "loss": 0.4707, "step": 20937 }, { "epoch": 3.417942124811232, "grad_norm": 2.427215337753296, "learning_rate": 1.806435222074824e-05, "loss": 0.5668, "step": 20938 }, { "epoch": 3.4181053834537365, "grad_norm": 1.8400644063949585, "learning_rate": 1.806416457080449e-05, "loss": 0.4142, "step": 20939 }, { "epoch": 3.418268642096241, "grad_norm": 1.965983510017395, "learning_rate": 1.8063976912740125e-05, "loss": 0.4932, "step": 20940 }, { "epoch": 3.4184319007387454, "grad_norm": 1.919546365737915, "learning_rate": 1.8063789246555338e-05, "loss": 0.5259, "step": 20941 }, { "epoch": 3.41859515938125, "grad_norm": 1.744006633758545, "learning_rate": 1.806360157225032e-05, "loss": 0.4477, "step": 20942 }, { "epoch": 3.4187584180237542, "grad_norm": 2.2193939685821533, "learning_rate": 1.8063413889825254e-05, "loss": 0.568, "step": 20943 }, { "epoch": 3.4189216766662587, "grad_norm": 1.890881061553955, "learning_rate": 1.8063226199280335e-05, "loss": 0.4804, "step": 20944 }, { "epoch": 3.4190849353087627, "grad_norm": 2.2607264518737793, "learning_rate": 1.8063038500615747e-05, "loss": 0.5609, "step": 20945 }, { "epoch": 3.419248193951267, "grad_norm": 1.9614630937576294, "learning_rate": 1.8062850793831685e-05, "loss": 0.467, "step": 20946 }, { "epoch": 3.4194114525937715, "grad_norm": 2.0018117427825928, "learning_rate": 1.806266307892833e-05, "loss": 0.4592, "step": 20947 }, { "epoch": 3.419574711236276, "grad_norm": 2.0650417804718018, "learning_rate": 1.8062475355905877e-05, "loss": 0.4606, "step": 20948 }, { "epoch": 3.4197379698787804, "grad_norm": 1.9689550399780273, "learning_rate": 1.8062287624764515e-05, "loss": 0.5047, "step": 20949 }, { "epoch": 3.419901228521285, "grad_norm": 1.580026388168335, "learning_rate": 1.806209988550443e-05, "loss": 0.4432, "step": 20950 }, { "epoch": 3.4200644871637893, "grad_norm": 1.5470058917999268, "learning_rate": 1.8061912138125815e-05, "loss": 0.3775, "step": 20951 }, { "epoch": 3.4202277458062937, "grad_norm": 1.7279679775238037, "learning_rate": 1.8061724382628854e-05, "loss": 0.456, "step": 20952 }, { "epoch": 3.420391004448798, "grad_norm": 1.9776639938354492, "learning_rate": 1.806153661901374e-05, "loss": 0.4949, "step": 20953 }, { "epoch": 3.4205542630913026, "grad_norm": 2.2543482780456543, "learning_rate": 1.806134884728066e-05, "loss": 0.5436, "step": 20954 }, { "epoch": 3.4207175217338066, "grad_norm": 1.823337435722351, "learning_rate": 1.8061161067429802e-05, "loss": 0.5074, "step": 20955 }, { "epoch": 3.420880780376311, "grad_norm": 1.9231795072555542, "learning_rate": 1.806097327946136e-05, "loss": 0.4981, "step": 20956 }, { "epoch": 3.4210440390188155, "grad_norm": 1.904241919517517, "learning_rate": 1.8060785483375517e-05, "loss": 0.4788, "step": 20957 }, { "epoch": 3.42120729766132, "grad_norm": 2.1296000480651855, "learning_rate": 1.806059767917247e-05, "loss": 0.4587, "step": 20958 }, { "epoch": 3.4213705563038244, "grad_norm": 2.1155030727386475, "learning_rate": 1.80604098668524e-05, "loss": 0.4696, "step": 20959 }, { "epoch": 3.421533814946329, "grad_norm": 1.7190736532211304, "learning_rate": 1.8060222046415502e-05, "loss": 0.465, "step": 20960 }, { "epoch": 3.4216970735888332, "grad_norm": 2.245124340057373, "learning_rate": 1.806003421786196e-05, "loss": 0.4774, "step": 20961 }, { "epoch": 3.4218603322313377, "grad_norm": 2.393611431121826, "learning_rate": 1.8059846381191964e-05, "loss": 0.5658, "step": 20962 }, { "epoch": 3.4220235908738417, "grad_norm": 1.7557154893875122, "learning_rate": 1.8059658536405705e-05, "loss": 0.4664, "step": 20963 }, { "epoch": 3.422186849516346, "grad_norm": 1.8357088565826416, "learning_rate": 1.8059470683503377e-05, "loss": 0.4634, "step": 20964 }, { "epoch": 3.4223501081588505, "grad_norm": 1.9673444032669067, "learning_rate": 1.805928282248516e-05, "loss": 0.4347, "step": 20965 }, { "epoch": 3.422513366801355, "grad_norm": 2.698209762573242, "learning_rate": 1.8059094953351247e-05, "loss": 0.5276, "step": 20966 }, { "epoch": 3.4226766254438594, "grad_norm": 2.303269147872925, "learning_rate": 1.8058907076101828e-05, "loss": 0.5609, "step": 20967 }, { "epoch": 3.422839884086364, "grad_norm": 1.6832166910171509, "learning_rate": 1.8058719190737094e-05, "loss": 0.4028, "step": 20968 }, { "epoch": 3.4230031427288683, "grad_norm": 1.8481309413909912, "learning_rate": 1.8058531297257232e-05, "loss": 0.492, "step": 20969 }, { "epoch": 3.4231664013713727, "grad_norm": 2.129307985305786, "learning_rate": 1.805834339566243e-05, "loss": 0.5234, "step": 20970 }, { "epoch": 3.423329660013877, "grad_norm": 2.2966575622558594, "learning_rate": 1.8058155485952874e-05, "loss": 0.5581, "step": 20971 }, { "epoch": 3.4234929186563816, "grad_norm": 2.0039827823638916, "learning_rate": 1.8057967568128764e-05, "loss": 0.4675, "step": 20972 }, { "epoch": 3.4236561772988856, "grad_norm": 1.8044530153274536, "learning_rate": 1.805777964219028e-05, "loss": 0.4497, "step": 20973 }, { "epoch": 3.42381943594139, "grad_norm": 2.0142927169799805, "learning_rate": 1.8057591708137616e-05, "loss": 0.499, "step": 20974 }, { "epoch": 3.4239826945838945, "grad_norm": 1.8823603391647339, "learning_rate": 1.8057403765970955e-05, "loss": 0.4814, "step": 20975 }, { "epoch": 3.424145953226399, "grad_norm": 1.9968080520629883, "learning_rate": 1.8057215815690494e-05, "loss": 0.4832, "step": 20976 }, { "epoch": 3.4243092118689034, "grad_norm": 2.293184518814087, "learning_rate": 1.805702785729642e-05, "loss": 0.5457, "step": 20977 }, { "epoch": 3.424472470511408, "grad_norm": 1.8986115455627441, "learning_rate": 1.8056839890788917e-05, "loss": 0.4508, "step": 20978 }, { "epoch": 3.4246357291539122, "grad_norm": 1.7312191724777222, "learning_rate": 1.8056651916168178e-05, "loss": 0.4592, "step": 20979 }, { "epoch": 3.4247989877964162, "grad_norm": 2.544952392578125, "learning_rate": 1.80564639334344e-05, "loss": 0.4911, "step": 20980 }, { "epoch": 3.4249622464389207, "grad_norm": 2.0604746341705322, "learning_rate": 1.8056275942587757e-05, "loss": 0.4715, "step": 20981 }, { "epoch": 3.425125505081425, "grad_norm": 2.578256607055664, "learning_rate": 1.8056087943628448e-05, "loss": 0.6032, "step": 20982 }, { "epoch": 3.4252887637239295, "grad_norm": 1.7691692113876343, "learning_rate": 1.805589993655666e-05, "loss": 0.4433, "step": 20983 }, { "epoch": 3.425452022366434, "grad_norm": 1.9222784042358398, "learning_rate": 1.8055711921372588e-05, "loss": 0.417, "step": 20984 }, { "epoch": 3.4256152810089384, "grad_norm": 1.711568832397461, "learning_rate": 1.8055523898076416e-05, "loss": 0.4317, "step": 20985 }, { "epoch": 3.425778539651443, "grad_norm": 1.7789169549942017, "learning_rate": 1.805533586666833e-05, "loss": 0.4413, "step": 20986 }, { "epoch": 3.4259417982939473, "grad_norm": 1.893938422203064, "learning_rate": 1.805514782714852e-05, "loss": 0.4485, "step": 20987 }, { "epoch": 3.4261050569364517, "grad_norm": 1.66093909740448, "learning_rate": 1.8054959779517185e-05, "loss": 0.4059, "step": 20988 }, { "epoch": 3.426268315578956, "grad_norm": 2.3466744422912598, "learning_rate": 1.8054771723774504e-05, "loss": 0.5377, "step": 20989 }, { "epoch": 3.42643157422146, "grad_norm": 2.087351083755493, "learning_rate": 1.805458365992067e-05, "loss": 0.5117, "step": 20990 }, { "epoch": 3.4265948328639646, "grad_norm": 2.624525785446167, "learning_rate": 1.8054395587955873e-05, "loss": 0.5716, "step": 20991 }, { "epoch": 3.426758091506469, "grad_norm": 2.0823686122894287, "learning_rate": 1.80542075078803e-05, "loss": 0.533, "step": 20992 }, { "epoch": 3.4269213501489735, "grad_norm": 1.8631961345672607, "learning_rate": 1.8054019419694145e-05, "loss": 0.4371, "step": 20993 }, { "epoch": 3.427084608791478, "grad_norm": 2.094456195831299, "learning_rate": 1.8053831323397594e-05, "loss": 0.4699, "step": 20994 }, { "epoch": 3.4272478674339824, "grad_norm": 1.8111711740493774, "learning_rate": 1.8053643218990835e-05, "loss": 0.405, "step": 20995 }, { "epoch": 3.427411126076487, "grad_norm": 1.7908467054367065, "learning_rate": 1.8053455106474064e-05, "loss": 0.4581, "step": 20996 }, { "epoch": 3.4275743847189912, "grad_norm": 1.999762773513794, "learning_rate": 1.805326698584746e-05, "loss": 0.477, "step": 20997 }, { "epoch": 3.4277376433614952, "grad_norm": 2.1658482551574707, "learning_rate": 1.8053078857111218e-05, "loss": 0.5207, "step": 20998 }, { "epoch": 3.4279009020039997, "grad_norm": 1.5977691411972046, "learning_rate": 1.8052890720265536e-05, "loss": 0.3739, "step": 20999 }, { "epoch": 3.428064160646504, "grad_norm": 1.6236581802368164, "learning_rate": 1.8052702575310588e-05, "loss": 0.4121, "step": 21000 }, { "epoch": 3.4282274192890085, "grad_norm": 1.8391835689544678, "learning_rate": 1.8052514422246573e-05, "loss": 0.4438, "step": 21001 }, { "epoch": 3.428390677931513, "grad_norm": 2.288407802581787, "learning_rate": 1.8052326261073675e-05, "loss": 0.4652, "step": 21002 }, { "epoch": 3.4285539365740174, "grad_norm": 1.8970259428024292, "learning_rate": 1.805213809179209e-05, "loss": 0.4534, "step": 21003 }, { "epoch": 3.428717195216522, "grad_norm": 2.1387479305267334, "learning_rate": 1.8051949914402004e-05, "loss": 0.4534, "step": 21004 }, { "epoch": 3.4288804538590263, "grad_norm": 2.23282790184021, "learning_rate": 1.8051761728903604e-05, "loss": 0.5019, "step": 21005 }, { "epoch": 3.4290437125015307, "grad_norm": 1.9505668878555298, "learning_rate": 1.8051573535297085e-05, "loss": 0.4674, "step": 21006 }, { "epoch": 3.429206971144035, "grad_norm": 2.096165657043457, "learning_rate": 1.8051385333582636e-05, "loss": 0.5133, "step": 21007 }, { "epoch": 3.429370229786539, "grad_norm": 1.8358874320983887, "learning_rate": 1.8051197123760437e-05, "loss": 0.4277, "step": 21008 }, { "epoch": 3.4295334884290436, "grad_norm": 2.343500852584839, "learning_rate": 1.805100890583069e-05, "loss": 0.5287, "step": 21009 }, { "epoch": 3.429696747071548, "grad_norm": 2.178896903991699, "learning_rate": 1.8050820679793578e-05, "loss": 0.4667, "step": 21010 }, { "epoch": 3.4298600057140525, "grad_norm": 1.7913116216659546, "learning_rate": 1.805063244564929e-05, "loss": 0.4039, "step": 21011 }, { "epoch": 3.430023264356557, "grad_norm": 1.7982150316238403, "learning_rate": 1.8050444203398018e-05, "loss": 0.4106, "step": 21012 }, { "epoch": 3.4301865229990613, "grad_norm": 1.445927381515503, "learning_rate": 1.8050255953039953e-05, "loss": 0.3158, "step": 21013 }, { "epoch": 3.430349781641566, "grad_norm": 2.290567636489868, "learning_rate": 1.805006769457528e-05, "loss": 0.5632, "step": 21014 }, { "epoch": 3.4305130402840702, "grad_norm": 1.9137136936187744, "learning_rate": 1.804987942800419e-05, "loss": 0.4593, "step": 21015 }, { "epoch": 3.430676298926574, "grad_norm": 2.071044445037842, "learning_rate": 1.8049691153326875e-05, "loss": 0.4794, "step": 21016 }, { "epoch": 3.4308395575690787, "grad_norm": 1.7522441148757935, "learning_rate": 1.8049502870543522e-05, "loss": 0.5015, "step": 21017 }, { "epoch": 3.431002816211583, "grad_norm": 1.9640202522277832, "learning_rate": 1.8049314579654324e-05, "loss": 0.4535, "step": 21018 }, { "epoch": 3.4311660748540875, "grad_norm": 2.2445485591888428, "learning_rate": 1.804912628065947e-05, "loss": 0.4825, "step": 21019 }, { "epoch": 3.431329333496592, "grad_norm": 2.511988639831543, "learning_rate": 1.804893797355914e-05, "loss": 0.528, "step": 21020 }, { "epoch": 3.4314925921390964, "grad_norm": 2.3962278366088867, "learning_rate": 1.804874965835354e-05, "loss": 0.5493, "step": 21021 }, { "epoch": 3.431655850781601, "grad_norm": 2.3290133476257324, "learning_rate": 1.8048561335042847e-05, "loss": 0.4942, "step": 21022 }, { "epoch": 3.4318191094241053, "grad_norm": 2.073758125305176, "learning_rate": 1.8048373003627257e-05, "loss": 0.4755, "step": 21023 }, { "epoch": 3.4319823680666097, "grad_norm": 2.1579437255859375, "learning_rate": 1.8048184664106957e-05, "loss": 0.4927, "step": 21024 }, { "epoch": 3.432145626709114, "grad_norm": 1.8308686017990112, "learning_rate": 1.8047996316482134e-05, "loss": 0.5134, "step": 21025 }, { "epoch": 3.432308885351618, "grad_norm": 1.8626868724822998, "learning_rate": 1.8047807960752985e-05, "loss": 0.4454, "step": 21026 }, { "epoch": 3.4324721439941226, "grad_norm": 1.7948518991470337, "learning_rate": 1.8047619596919695e-05, "loss": 0.3998, "step": 21027 }, { "epoch": 3.432635402636627, "grad_norm": 2.24253249168396, "learning_rate": 1.804743122498245e-05, "loss": 0.4709, "step": 21028 }, { "epoch": 3.4327986612791315, "grad_norm": 1.5376487970352173, "learning_rate": 1.804724284494145e-05, "loss": 0.4098, "step": 21029 }, { "epoch": 3.432961919921636, "grad_norm": 1.9694797992706299, "learning_rate": 1.804705445679688e-05, "loss": 0.4517, "step": 21030 }, { "epoch": 3.4331251785641403, "grad_norm": 2.216348886489868, "learning_rate": 1.804686606054892e-05, "loss": 0.5832, "step": 21031 }, { "epoch": 3.433288437206645, "grad_norm": 1.9425132274627686, "learning_rate": 1.8046677656197774e-05, "loss": 0.5128, "step": 21032 }, { "epoch": 3.4334516958491488, "grad_norm": 2.1841318607330322, "learning_rate": 1.8046489243743625e-05, "loss": 0.5897, "step": 21033 }, { "epoch": 3.433614954491653, "grad_norm": 1.8130236864089966, "learning_rate": 1.8046300823186656e-05, "loss": 0.4623, "step": 21034 }, { "epoch": 3.4337782131341577, "grad_norm": 1.9991122484207153, "learning_rate": 1.8046112394527072e-05, "loss": 0.5396, "step": 21035 }, { "epoch": 3.433941471776662, "grad_norm": 1.7704532146453857, "learning_rate": 1.8045923957765055e-05, "loss": 0.4319, "step": 21036 }, { "epoch": 3.4341047304191665, "grad_norm": 2.0650641918182373, "learning_rate": 1.8045735512900792e-05, "loss": 0.485, "step": 21037 }, { "epoch": 3.434267989061671, "grad_norm": 1.933755874633789, "learning_rate": 1.804554705993448e-05, "loss": 0.4437, "step": 21038 }, { "epoch": 3.4344312477041754, "grad_norm": 2.015444278717041, "learning_rate": 1.8045358598866297e-05, "loss": 0.4706, "step": 21039 }, { "epoch": 3.43459450634668, "grad_norm": 1.7579448223114014, "learning_rate": 1.8045170129696444e-05, "loss": 0.488, "step": 21040 }, { "epoch": 3.4347577649891843, "grad_norm": 1.695526123046875, "learning_rate": 1.8044981652425107e-05, "loss": 0.4939, "step": 21041 }, { "epoch": 3.4349210236316887, "grad_norm": 1.790925145149231, "learning_rate": 1.8044793167052476e-05, "loss": 0.5002, "step": 21042 }, { "epoch": 3.4350842822741927, "grad_norm": 1.5034589767456055, "learning_rate": 1.804460467357874e-05, "loss": 0.3812, "step": 21043 }, { "epoch": 3.435247540916697, "grad_norm": 2.204847574234009, "learning_rate": 1.804441617200409e-05, "loss": 0.5247, "step": 21044 }, { "epoch": 3.4354107995592016, "grad_norm": 2.08420729637146, "learning_rate": 1.8044227662328712e-05, "loss": 0.4474, "step": 21045 }, { "epoch": 3.435574058201706, "grad_norm": 1.7247035503387451, "learning_rate": 1.8044039144552802e-05, "loss": 0.4869, "step": 21046 }, { "epoch": 3.4357373168442105, "grad_norm": 1.8672069311141968, "learning_rate": 1.8043850618676546e-05, "loss": 0.4238, "step": 21047 }, { "epoch": 3.435900575486715, "grad_norm": 1.9161535501480103, "learning_rate": 1.8043662084700134e-05, "loss": 0.4041, "step": 21048 }, { "epoch": 3.4360638341292193, "grad_norm": 1.9724066257476807, "learning_rate": 1.8043473542623753e-05, "loss": 0.4575, "step": 21049 }, { "epoch": 3.436227092771724, "grad_norm": 1.8346930742263794, "learning_rate": 1.8043284992447603e-05, "loss": 0.4689, "step": 21050 }, { "epoch": 3.4363903514142278, "grad_norm": 1.7079464197158813, "learning_rate": 1.8043096434171863e-05, "loss": 0.4349, "step": 21051 }, { "epoch": 3.436553610056732, "grad_norm": 1.7245512008666992, "learning_rate": 1.804290786779673e-05, "loss": 0.4661, "step": 21052 }, { "epoch": 3.4367168686992366, "grad_norm": 1.88006591796875, "learning_rate": 1.8042719293322388e-05, "loss": 0.4355, "step": 21053 }, { "epoch": 3.436880127341741, "grad_norm": 2.2155678272247314, "learning_rate": 1.804253071074903e-05, "loss": 0.4521, "step": 21054 }, { "epoch": 3.4370433859842455, "grad_norm": 2.0793697834014893, "learning_rate": 1.8042342120076844e-05, "loss": 0.5021, "step": 21055 }, { "epoch": 3.43720664462675, "grad_norm": 2.4557716846466064, "learning_rate": 1.8042153521306027e-05, "loss": 0.5164, "step": 21056 }, { "epoch": 3.4373699032692544, "grad_norm": 1.960584282875061, "learning_rate": 1.804196491443676e-05, "loss": 0.4665, "step": 21057 }, { "epoch": 3.437533161911759, "grad_norm": 2.2138330936431885, "learning_rate": 1.804177629946924e-05, "loss": 0.5333, "step": 21058 }, { "epoch": 3.4376964205542633, "grad_norm": 1.7021658420562744, "learning_rate": 1.8041587676403647e-05, "loss": 0.4283, "step": 21059 }, { "epoch": 3.4378596791967677, "grad_norm": 1.8625946044921875, "learning_rate": 1.8041399045240183e-05, "loss": 0.431, "step": 21060 }, { "epoch": 3.4380229378392717, "grad_norm": 1.6064623594284058, "learning_rate": 1.804121040597903e-05, "loss": 0.4112, "step": 21061 }, { "epoch": 3.438186196481776, "grad_norm": 2.023308038711548, "learning_rate": 1.804102175862038e-05, "loss": 0.5286, "step": 21062 }, { "epoch": 3.4383494551242806, "grad_norm": 1.829521894454956, "learning_rate": 1.804083310316442e-05, "loss": 0.4418, "step": 21063 }, { "epoch": 3.438512713766785, "grad_norm": 1.9780116081237793, "learning_rate": 1.8040644439611348e-05, "loss": 0.5238, "step": 21064 }, { "epoch": 3.4386759724092895, "grad_norm": 1.8580251932144165, "learning_rate": 1.804045576796135e-05, "loss": 0.4837, "step": 21065 }, { "epoch": 3.438839231051794, "grad_norm": 2.1438982486724854, "learning_rate": 1.8040267088214613e-05, "loss": 0.5594, "step": 21066 }, { "epoch": 3.4390024896942983, "grad_norm": 2.432607412338257, "learning_rate": 1.804007840037133e-05, "loss": 0.585, "step": 21067 }, { "epoch": 3.4391657483368023, "grad_norm": 2.0131118297576904, "learning_rate": 1.8039889704431688e-05, "loss": 0.4473, "step": 21068 }, { "epoch": 3.4393290069793068, "grad_norm": 1.7806254625320435, "learning_rate": 1.8039701000395878e-05, "loss": 0.4393, "step": 21069 }, { "epoch": 3.439492265621811, "grad_norm": 1.8261793851852417, "learning_rate": 1.8039512288264098e-05, "loss": 0.4526, "step": 21070 }, { "epoch": 3.4396555242643156, "grad_norm": 1.7466992139816284, "learning_rate": 1.8039323568036525e-05, "loss": 0.4055, "step": 21071 }, { "epoch": 3.43981878290682, "grad_norm": 1.8749244213104248, "learning_rate": 1.8039134839713357e-05, "loss": 0.4659, "step": 21072 }, { "epoch": 3.4399820415493245, "grad_norm": 1.9380602836608887, "learning_rate": 1.8038946103294784e-05, "loss": 0.4677, "step": 21073 }, { "epoch": 3.440145300191829, "grad_norm": 2.101651191711426, "learning_rate": 1.803875735878099e-05, "loss": 0.5739, "step": 21074 }, { "epoch": 3.4403085588343334, "grad_norm": 2.229686737060547, "learning_rate": 1.8038568606172172e-05, "loss": 0.5192, "step": 21075 }, { "epoch": 3.440471817476838, "grad_norm": 2.3122239112854004, "learning_rate": 1.803837984546852e-05, "loss": 0.5196, "step": 21076 }, { "epoch": 3.4406350761193423, "grad_norm": 2.2260019779205322, "learning_rate": 1.803819107667022e-05, "loss": 0.4432, "step": 21077 }, { "epoch": 3.4407983347618463, "grad_norm": 1.7240246534347534, "learning_rate": 1.8038002299777467e-05, "loss": 0.4229, "step": 21078 }, { "epoch": 3.4409615934043507, "grad_norm": 1.8745685815811157, "learning_rate": 1.803781351479044e-05, "loss": 0.4538, "step": 21079 }, { "epoch": 3.441124852046855, "grad_norm": 1.9744393825531006, "learning_rate": 1.8037624721709344e-05, "loss": 0.4967, "step": 21080 }, { "epoch": 3.4412881106893596, "grad_norm": 2.2386538982391357, "learning_rate": 1.803743592053436e-05, "loss": 0.5776, "step": 21081 }, { "epoch": 3.441451369331864, "grad_norm": 1.987705111503601, "learning_rate": 1.8037247111265678e-05, "loss": 0.4797, "step": 21082 }, { "epoch": 3.4416146279743685, "grad_norm": 1.9365782737731934, "learning_rate": 1.803705829390349e-05, "loss": 0.4791, "step": 21083 }, { "epoch": 3.441777886616873, "grad_norm": 1.9395312070846558, "learning_rate": 1.8036869468447992e-05, "loss": 0.3838, "step": 21084 }, { "epoch": 3.4419411452593773, "grad_norm": 1.8418619632720947, "learning_rate": 1.803668063489936e-05, "loss": 0.4809, "step": 21085 }, { "epoch": 3.4421044039018813, "grad_norm": 2.1462416648864746, "learning_rate": 1.80364917932578e-05, "loss": 0.4892, "step": 21086 }, { "epoch": 3.4422676625443858, "grad_norm": 2.0989651679992676, "learning_rate": 1.8036302943523492e-05, "loss": 0.6016, "step": 21087 }, { "epoch": 3.44243092118689, "grad_norm": 1.9184987545013428, "learning_rate": 1.803611408569663e-05, "loss": 0.4734, "step": 21088 }, { "epoch": 3.4425941798293946, "grad_norm": 1.9471898078918457, "learning_rate": 1.8035925219777402e-05, "loss": 0.4701, "step": 21089 }, { "epoch": 3.442757438471899, "grad_norm": 1.9733160734176636, "learning_rate": 1.8035736345766e-05, "loss": 0.4549, "step": 21090 }, { "epoch": 3.4429206971144035, "grad_norm": 2.3523685932159424, "learning_rate": 1.8035547463662616e-05, "loss": 0.6027, "step": 21091 }, { "epoch": 3.443083955756908, "grad_norm": 1.9773914813995361, "learning_rate": 1.8035358573467433e-05, "loss": 0.4804, "step": 21092 }, { "epoch": 3.4432472143994124, "grad_norm": 1.9873268604278564, "learning_rate": 1.803516967518065e-05, "loss": 0.5033, "step": 21093 }, { "epoch": 3.443410473041917, "grad_norm": 1.9778283834457397, "learning_rate": 1.8034980768802453e-05, "loss": 0.4956, "step": 21094 }, { "epoch": 3.4435737316844213, "grad_norm": 1.8053103685379028, "learning_rate": 1.803479185433303e-05, "loss": 0.478, "step": 21095 }, { "epoch": 3.4437369903269253, "grad_norm": 1.7668253183364868, "learning_rate": 1.8034602931772577e-05, "loss": 0.494, "step": 21096 }, { "epoch": 3.4439002489694297, "grad_norm": 1.8161801099777222, "learning_rate": 1.8034414001121278e-05, "loss": 0.4506, "step": 21097 }, { "epoch": 3.444063507611934, "grad_norm": 1.7921028137207031, "learning_rate": 1.8034225062379327e-05, "loss": 0.4367, "step": 21098 }, { "epoch": 3.4442267662544386, "grad_norm": 1.8987151384353638, "learning_rate": 1.8034036115546914e-05, "loss": 0.5415, "step": 21099 }, { "epoch": 3.444390024896943, "grad_norm": 1.671889305114746, "learning_rate": 1.803384716062423e-05, "loss": 0.4039, "step": 21100 }, { "epoch": 3.4445532835394475, "grad_norm": 2.1686882972717285, "learning_rate": 1.803365819761146e-05, "loss": 0.5496, "step": 21101 }, { "epoch": 3.444716542181952, "grad_norm": 1.8762924671173096, "learning_rate": 1.8033469226508802e-05, "loss": 0.4439, "step": 21102 }, { "epoch": 3.4448798008244563, "grad_norm": 1.8488904237747192, "learning_rate": 1.803328024731644e-05, "loss": 0.472, "step": 21103 }, { "epoch": 3.4450430594669603, "grad_norm": 2.9366371631622314, "learning_rate": 1.8033091260034572e-05, "loss": 0.5674, "step": 21104 }, { "epoch": 3.4452063181094648, "grad_norm": 2.3890905380249023, "learning_rate": 1.803290226466338e-05, "loss": 0.4963, "step": 21105 }, { "epoch": 3.445369576751969, "grad_norm": 2.302659749984741, "learning_rate": 1.8032713261203057e-05, "loss": 0.4872, "step": 21106 }, { "epoch": 3.4455328353944736, "grad_norm": 1.8565974235534668, "learning_rate": 1.8032524249653792e-05, "loss": 0.459, "step": 21107 }, { "epoch": 3.445696094036978, "grad_norm": 1.8825501203536987, "learning_rate": 1.803233523001578e-05, "loss": 0.5422, "step": 21108 }, { "epoch": 3.4458593526794825, "grad_norm": 2.2394630908966064, "learning_rate": 1.803214620228921e-05, "loss": 0.5508, "step": 21109 }, { "epoch": 3.446022611321987, "grad_norm": 2.1162021160125732, "learning_rate": 1.8031957166474265e-05, "loss": 0.4663, "step": 21110 }, { "epoch": 3.4461858699644914, "grad_norm": 1.8550496101379395, "learning_rate": 1.8031768122571148e-05, "loss": 0.4257, "step": 21111 }, { "epoch": 3.446349128606996, "grad_norm": 2.2123234272003174, "learning_rate": 1.8031579070580038e-05, "loss": 0.5403, "step": 21112 }, { "epoch": 3.4465123872495003, "grad_norm": 1.8822243213653564, "learning_rate": 1.8031390010501132e-05, "loss": 0.387, "step": 21113 }, { "epoch": 3.4466756458920043, "grad_norm": 2.152122735977173, "learning_rate": 1.8031200942334617e-05, "loss": 0.4226, "step": 21114 }, { "epoch": 3.4468389045345087, "grad_norm": 1.9316775798797607, "learning_rate": 1.8031011866080686e-05, "loss": 0.4187, "step": 21115 }, { "epoch": 3.447002163177013, "grad_norm": 1.7000598907470703, "learning_rate": 1.803082278173953e-05, "loss": 0.4703, "step": 21116 }, { "epoch": 3.4471654218195176, "grad_norm": 1.9437170028686523, "learning_rate": 1.8030633689311337e-05, "loss": 0.4547, "step": 21117 }, { "epoch": 3.447328680462022, "grad_norm": 1.7931421995162964, "learning_rate": 1.8030444588796295e-05, "loss": 0.4441, "step": 21118 }, { "epoch": 3.4474919391045264, "grad_norm": 2.0238118171691895, "learning_rate": 1.8030255480194602e-05, "loss": 0.4661, "step": 21119 }, { "epoch": 3.447655197747031, "grad_norm": 1.7889392375946045, "learning_rate": 1.8030066363506438e-05, "loss": 0.4895, "step": 21120 }, { "epoch": 3.447818456389535, "grad_norm": 2.058605432510376, "learning_rate": 1.8029877238732003e-05, "loss": 0.4928, "step": 21121 }, { "epoch": 3.4479817150320393, "grad_norm": 1.960555076599121, "learning_rate": 1.802968810587148e-05, "loss": 0.4715, "step": 21122 }, { "epoch": 3.4481449736745438, "grad_norm": 1.9566489458084106, "learning_rate": 1.802949896492507e-05, "loss": 0.5093, "step": 21123 }, { "epoch": 3.448308232317048, "grad_norm": 1.6019697189331055, "learning_rate": 1.802930981589295e-05, "loss": 0.3694, "step": 21124 }, { "epoch": 3.4484714909595526, "grad_norm": 1.7979763746261597, "learning_rate": 1.8029120658775322e-05, "loss": 0.4571, "step": 21125 }, { "epoch": 3.448634749602057, "grad_norm": 2.102450132369995, "learning_rate": 1.802893149357237e-05, "loss": 0.4331, "step": 21126 }, { "epoch": 3.4487980082445615, "grad_norm": 1.589735746383667, "learning_rate": 1.8028742320284286e-05, "loss": 0.3858, "step": 21127 }, { "epoch": 3.448961266887066, "grad_norm": 1.3301870822906494, "learning_rate": 1.802855313891126e-05, "loss": 0.3608, "step": 21128 }, { "epoch": 3.4491245255295704, "grad_norm": 1.5672696828842163, "learning_rate": 1.802836394945348e-05, "loss": 0.3883, "step": 21129 }, { "epoch": 3.449287784172075, "grad_norm": 1.7691171169281006, "learning_rate": 1.8028174751911147e-05, "loss": 0.4134, "step": 21130 }, { "epoch": 3.449451042814579, "grad_norm": 2.259733200073242, "learning_rate": 1.802798554628444e-05, "loss": 0.5721, "step": 21131 }, { "epoch": 3.4496143014570833, "grad_norm": 1.871999979019165, "learning_rate": 1.8027796332573554e-05, "loss": 0.4852, "step": 21132 }, { "epoch": 3.4497775600995877, "grad_norm": 2.212775945663452, "learning_rate": 1.802760711077868e-05, "loss": 0.4704, "step": 21133 }, { "epoch": 3.449940818742092, "grad_norm": 2.3687238693237305, "learning_rate": 1.8027417880900007e-05, "loss": 0.5334, "step": 21134 }, { "epoch": 3.4501040773845966, "grad_norm": 2.11893630027771, "learning_rate": 1.8027228642937725e-05, "loss": 0.428, "step": 21135 }, { "epoch": 3.450267336027101, "grad_norm": 2.021400213241577, "learning_rate": 1.8027039396892028e-05, "loss": 0.4879, "step": 21136 }, { "epoch": 3.4504305946696054, "grad_norm": 1.9692648649215698, "learning_rate": 1.8026850142763102e-05, "loss": 0.4719, "step": 21137 }, { "epoch": 3.45059385331211, "grad_norm": 2.0494754314422607, "learning_rate": 1.8026660880551144e-05, "loss": 0.4687, "step": 21138 }, { "epoch": 3.450757111954614, "grad_norm": 2.081557273864746, "learning_rate": 1.8026471610256337e-05, "loss": 0.4704, "step": 21139 }, { "epoch": 3.4509203705971183, "grad_norm": 2.163404941558838, "learning_rate": 1.8026282331878877e-05, "loss": 0.5018, "step": 21140 }, { "epoch": 3.4510836292396228, "grad_norm": 2.2189078330993652, "learning_rate": 1.8026093045418955e-05, "loss": 0.6183, "step": 21141 }, { "epoch": 3.451246887882127, "grad_norm": 1.8747464418411255, "learning_rate": 1.8025903750876755e-05, "loss": 0.4299, "step": 21142 }, { "epoch": 3.4514101465246316, "grad_norm": 1.8979051113128662, "learning_rate": 1.8025714448252473e-05, "loss": 0.47, "step": 21143 }, { "epoch": 3.451573405167136, "grad_norm": 1.9100521802902222, "learning_rate": 1.80255251375463e-05, "loss": 0.4983, "step": 21144 }, { "epoch": 3.4517366638096405, "grad_norm": 1.7578954696655273, "learning_rate": 1.8025335818758426e-05, "loss": 0.4611, "step": 21145 }, { "epoch": 3.451899922452145, "grad_norm": 2.1032164096832275, "learning_rate": 1.8025146491889038e-05, "loss": 0.5048, "step": 21146 }, { "epoch": 3.4520631810946494, "grad_norm": 2.0501868724823, "learning_rate": 1.8024957156938332e-05, "loss": 0.4734, "step": 21147 }, { "epoch": 3.452226439737154, "grad_norm": 1.988102912902832, "learning_rate": 1.8024767813906495e-05, "loss": 0.4903, "step": 21148 }, { "epoch": 3.452389698379658, "grad_norm": 1.6181097030639648, "learning_rate": 1.802457846279372e-05, "loss": 0.3911, "step": 21149 }, { "epoch": 3.4525529570221622, "grad_norm": 2.2508485317230225, "learning_rate": 1.8024389103600196e-05, "loss": 0.4474, "step": 21150 }, { "epoch": 3.4527162156646667, "grad_norm": 1.6561225652694702, "learning_rate": 1.8024199736326117e-05, "loss": 0.4106, "step": 21151 }, { "epoch": 3.452879474307171, "grad_norm": 1.6290333271026611, "learning_rate": 1.802401036097167e-05, "loss": 0.3974, "step": 21152 }, { "epoch": 3.4530427329496756, "grad_norm": 1.6094623804092407, "learning_rate": 1.802382097753704e-05, "loss": 0.4637, "step": 21153 }, { "epoch": 3.45320599159218, "grad_norm": 1.783301830291748, "learning_rate": 1.802363158602243e-05, "loss": 0.4025, "step": 21154 }, { "epoch": 3.4533692502346844, "grad_norm": 1.8618196249008179, "learning_rate": 1.8023442186428027e-05, "loss": 0.4336, "step": 21155 }, { "epoch": 3.453532508877189, "grad_norm": 1.8595356941223145, "learning_rate": 1.8023252778754017e-05, "loss": 0.4868, "step": 21156 }, { "epoch": 3.453695767519693, "grad_norm": 2.0630509853363037, "learning_rate": 1.8023063363000593e-05, "loss": 0.5309, "step": 21157 }, { "epoch": 3.4538590261621973, "grad_norm": 1.7897638082504272, "learning_rate": 1.802287393916795e-05, "loss": 0.4406, "step": 21158 }, { "epoch": 3.4540222848047017, "grad_norm": 2.0970470905303955, "learning_rate": 1.802268450725627e-05, "loss": 0.4667, "step": 21159 }, { "epoch": 3.454185543447206, "grad_norm": 1.6120094060897827, "learning_rate": 1.802249506726575e-05, "loss": 0.4324, "step": 21160 }, { "epoch": 3.4543488020897106, "grad_norm": 2.160970687866211, "learning_rate": 1.8022305619196584e-05, "loss": 0.5146, "step": 21161 }, { "epoch": 3.454512060732215, "grad_norm": 1.9511911869049072, "learning_rate": 1.8022116163048955e-05, "loss": 0.4839, "step": 21162 }, { "epoch": 3.4546753193747195, "grad_norm": 2.1121647357940674, "learning_rate": 1.8021926698823058e-05, "loss": 0.5354, "step": 21163 }, { "epoch": 3.454838578017224, "grad_norm": 2.0538904666900635, "learning_rate": 1.8021737226519082e-05, "loss": 0.4787, "step": 21164 }, { "epoch": 3.4550018366597284, "grad_norm": 2.028799295425415, "learning_rate": 1.802154774613722e-05, "loss": 0.4479, "step": 21165 }, { "epoch": 3.4551650953022324, "grad_norm": 1.9190068244934082, "learning_rate": 1.802135825767766e-05, "loss": 0.4426, "step": 21166 }, { "epoch": 3.455328353944737, "grad_norm": 2.266367197036743, "learning_rate": 1.8021168761140596e-05, "loss": 0.5367, "step": 21167 }, { "epoch": 3.4554916125872412, "grad_norm": 1.8727422952651978, "learning_rate": 1.8020979256526217e-05, "loss": 0.4092, "step": 21168 }, { "epoch": 3.4556548712297457, "grad_norm": 1.9997804164886475, "learning_rate": 1.8020789743834714e-05, "loss": 0.5116, "step": 21169 }, { "epoch": 3.45581812987225, "grad_norm": 1.5738593339920044, "learning_rate": 1.8020600223066277e-05, "loss": 0.3888, "step": 21170 }, { "epoch": 3.4559813885147546, "grad_norm": 1.8405377864837646, "learning_rate": 1.8020410694221097e-05, "loss": 0.4187, "step": 21171 }, { "epoch": 3.456144647157259, "grad_norm": 1.9030380249023438, "learning_rate": 1.8020221157299366e-05, "loss": 0.4191, "step": 21172 }, { "epoch": 3.4563079057997634, "grad_norm": 2.093278646469116, "learning_rate": 1.8020031612301275e-05, "loss": 0.5462, "step": 21173 }, { "epoch": 3.4564711644422674, "grad_norm": 1.751652717590332, "learning_rate": 1.801984205922701e-05, "loss": 0.4357, "step": 21174 }, { "epoch": 3.456634423084772, "grad_norm": 1.9454541206359863, "learning_rate": 1.8019652498076774e-05, "loss": 0.5385, "step": 21175 }, { "epoch": 3.4567976817272763, "grad_norm": 1.8903404474258423, "learning_rate": 1.8019462928850745e-05, "loss": 0.456, "step": 21176 }, { "epoch": 3.4569609403697807, "grad_norm": 1.8350569009780884, "learning_rate": 1.8019273351549124e-05, "loss": 0.4524, "step": 21177 }, { "epoch": 3.457124199012285, "grad_norm": 1.7905219793319702, "learning_rate": 1.801908376617209e-05, "loss": 0.4698, "step": 21178 }, { "epoch": 3.4572874576547896, "grad_norm": 1.9855200052261353, "learning_rate": 1.8018894172719845e-05, "loss": 0.476, "step": 21179 }, { "epoch": 3.457450716297294, "grad_norm": 1.8990521430969238, "learning_rate": 1.8018704571192573e-05, "loss": 0.4938, "step": 21180 }, { "epoch": 3.4576139749397985, "grad_norm": 1.8653440475463867, "learning_rate": 1.801851496159047e-05, "loss": 0.417, "step": 21181 }, { "epoch": 3.457777233582303, "grad_norm": 2.234570026397705, "learning_rate": 1.8018325343913725e-05, "loss": 0.5231, "step": 21182 }, { "epoch": 3.4579404922248074, "grad_norm": 1.571622610092163, "learning_rate": 1.8018135718162528e-05, "loss": 0.4609, "step": 21183 }, { "epoch": 3.4581037508673114, "grad_norm": 1.7340335845947266, "learning_rate": 1.8017946084337066e-05, "loss": 0.4295, "step": 21184 }, { "epoch": 3.458267009509816, "grad_norm": 2.4726390838623047, "learning_rate": 1.801775644243754e-05, "loss": 0.4948, "step": 21185 }, { "epoch": 3.4584302681523202, "grad_norm": 1.713821291923523, "learning_rate": 1.8017566792464136e-05, "loss": 0.4074, "step": 21186 }, { "epoch": 3.4585935267948247, "grad_norm": 2.5482707023620605, "learning_rate": 1.8017377134417043e-05, "loss": 0.4608, "step": 21187 }, { "epoch": 3.458756785437329, "grad_norm": 2.1162357330322266, "learning_rate": 1.801718746829645e-05, "loss": 0.4557, "step": 21188 }, { "epoch": 3.4589200440798336, "grad_norm": 2.1426002979278564, "learning_rate": 1.8016997794102552e-05, "loss": 0.4834, "step": 21189 }, { "epoch": 3.459083302722338, "grad_norm": 2.224295139312744, "learning_rate": 1.8016808111835544e-05, "loss": 0.5564, "step": 21190 }, { "epoch": 3.4592465613648424, "grad_norm": 2.118400812149048, "learning_rate": 1.8016618421495608e-05, "loss": 0.4224, "step": 21191 }, { "epoch": 3.4594098200073464, "grad_norm": 1.7902426719665527, "learning_rate": 1.8016428723082942e-05, "loss": 0.4316, "step": 21192 }, { "epoch": 3.459573078649851, "grad_norm": 1.9671820402145386, "learning_rate": 1.8016239016597735e-05, "loss": 0.5411, "step": 21193 }, { "epoch": 3.4597363372923553, "grad_norm": 2.055189847946167, "learning_rate": 1.8016049302040174e-05, "loss": 0.5005, "step": 21194 }, { "epoch": 3.4598995959348597, "grad_norm": 2.4946138858795166, "learning_rate": 1.8015859579410454e-05, "loss": 0.4943, "step": 21195 }, { "epoch": 3.460062854577364, "grad_norm": 1.7578963041305542, "learning_rate": 1.8015669848708768e-05, "loss": 0.5106, "step": 21196 }, { "epoch": 3.4602261132198686, "grad_norm": 2.274935007095337, "learning_rate": 1.80154801099353e-05, "loss": 0.5561, "step": 21197 }, { "epoch": 3.460389371862373, "grad_norm": 2.096498966217041, "learning_rate": 1.801529036309025e-05, "loss": 0.4727, "step": 21198 }, { "epoch": 3.4605526305048775, "grad_norm": 1.772322177886963, "learning_rate": 1.8015100608173806e-05, "loss": 0.3835, "step": 21199 }, { "epoch": 3.460715889147382, "grad_norm": 2.269930839538574, "learning_rate": 1.8014910845186154e-05, "loss": 0.6148, "step": 21200 }, { "epoch": 3.4608791477898864, "grad_norm": 2.3462443351745605, "learning_rate": 1.801472107412749e-05, "loss": 0.5315, "step": 21201 }, { "epoch": 3.4610424064323904, "grad_norm": 2.146003246307373, "learning_rate": 1.8014531294998004e-05, "loss": 0.5188, "step": 21202 }, { "epoch": 3.461205665074895, "grad_norm": 1.9517247676849365, "learning_rate": 1.801434150779789e-05, "loss": 0.482, "step": 21203 }, { "epoch": 3.4613689237173992, "grad_norm": 1.9535151720046997, "learning_rate": 1.801415171252733e-05, "loss": 0.5117, "step": 21204 }, { "epoch": 3.4615321823599037, "grad_norm": 1.794485330581665, "learning_rate": 1.8013961909186525e-05, "loss": 0.4267, "step": 21205 }, { "epoch": 3.461695441002408, "grad_norm": 2.3166918754577637, "learning_rate": 1.8013772097775666e-05, "loss": 0.4652, "step": 21206 }, { "epoch": 3.4618586996449126, "grad_norm": 1.5859977006912231, "learning_rate": 1.8013582278294935e-05, "loss": 0.4635, "step": 21207 }, { "epoch": 3.462021958287417, "grad_norm": 1.681929588317871, "learning_rate": 1.8013392450744535e-05, "loss": 0.4008, "step": 21208 }, { "epoch": 3.462185216929921, "grad_norm": 1.7605900764465332, "learning_rate": 1.8013202615124645e-05, "loss": 0.4041, "step": 21209 }, { "epoch": 3.4623484755724254, "grad_norm": 1.7194199562072754, "learning_rate": 1.801301277143546e-05, "loss": 0.4047, "step": 21210 }, { "epoch": 3.46251173421493, "grad_norm": 1.9643925428390503, "learning_rate": 1.801282291967718e-05, "loss": 0.5711, "step": 21211 }, { "epoch": 3.4626749928574343, "grad_norm": 1.7184643745422363, "learning_rate": 1.8012633059849987e-05, "loss": 0.4626, "step": 21212 }, { "epoch": 3.4628382514999387, "grad_norm": 1.99732506275177, "learning_rate": 1.8012443191954073e-05, "loss": 0.4598, "step": 21213 }, { "epoch": 3.463001510142443, "grad_norm": 2.0868823528289795, "learning_rate": 1.801225331598963e-05, "loss": 0.5176, "step": 21214 }, { "epoch": 3.4631647687849476, "grad_norm": 1.8849477767944336, "learning_rate": 1.8012063431956854e-05, "loss": 0.4669, "step": 21215 }, { "epoch": 3.463328027427452, "grad_norm": 1.94233238697052, "learning_rate": 1.801187353985593e-05, "loss": 0.4829, "step": 21216 }, { "epoch": 3.4634912860699565, "grad_norm": 1.9143906831741333, "learning_rate": 1.8011683639687053e-05, "loss": 0.432, "step": 21217 }, { "epoch": 3.463654544712461, "grad_norm": 2.1062347888946533, "learning_rate": 1.8011493731450412e-05, "loss": 0.5088, "step": 21218 }, { "epoch": 3.463817803354965, "grad_norm": 2.3095703125, "learning_rate": 1.80113038151462e-05, "loss": 0.552, "step": 21219 }, { "epoch": 3.4639810619974694, "grad_norm": 1.9436078071594238, "learning_rate": 1.8011113890774603e-05, "loss": 0.5178, "step": 21220 }, { "epoch": 3.464144320639974, "grad_norm": 1.8126323223114014, "learning_rate": 1.801092395833582e-05, "loss": 0.4451, "step": 21221 }, { "epoch": 3.4643075792824782, "grad_norm": 1.8780488967895508, "learning_rate": 1.8010734017830037e-05, "loss": 0.4164, "step": 21222 }, { "epoch": 3.4644708379249827, "grad_norm": 2.0003256797790527, "learning_rate": 1.801054406925745e-05, "loss": 0.4567, "step": 21223 }, { "epoch": 3.464634096567487, "grad_norm": 2.084533929824829, "learning_rate": 1.8010354112618245e-05, "loss": 0.5102, "step": 21224 }, { "epoch": 3.4647973552099915, "grad_norm": 2.3818116188049316, "learning_rate": 1.8010164147912614e-05, "loss": 0.5897, "step": 21225 }, { "epoch": 3.464960613852496, "grad_norm": 2.079043388366699, "learning_rate": 1.8009974175140753e-05, "loss": 0.5222, "step": 21226 }, { "epoch": 3.465123872495, "grad_norm": 2.0170371532440186, "learning_rate": 1.8009784194302847e-05, "loss": 0.5171, "step": 21227 }, { "epoch": 3.4652871311375044, "grad_norm": 1.8977174758911133, "learning_rate": 1.800959420539909e-05, "loss": 0.4707, "step": 21228 }, { "epoch": 3.465450389780009, "grad_norm": 2.1207475662231445, "learning_rate": 1.800940420842968e-05, "loss": 0.4446, "step": 21229 }, { "epoch": 3.4656136484225133, "grad_norm": 1.8950722217559814, "learning_rate": 1.8009214203394795e-05, "loss": 0.5263, "step": 21230 }, { "epoch": 3.4657769070650177, "grad_norm": 2.1599950790405273, "learning_rate": 1.8009024190294638e-05, "loss": 0.4816, "step": 21231 }, { "epoch": 3.465940165707522, "grad_norm": 2.3893823623657227, "learning_rate": 1.8008834169129392e-05, "loss": 0.5471, "step": 21232 }, { "epoch": 3.4661034243500266, "grad_norm": 2.1877524852752686, "learning_rate": 1.8008644139899254e-05, "loss": 0.498, "step": 21233 }, { "epoch": 3.466266682992531, "grad_norm": 1.6484885215759277, "learning_rate": 1.8008454102604414e-05, "loss": 0.4712, "step": 21234 }, { "epoch": 3.4664299416350355, "grad_norm": 2.362750768661499, "learning_rate": 1.8008264057245065e-05, "loss": 0.5148, "step": 21235 }, { "epoch": 3.46659320027754, "grad_norm": 1.809654712677002, "learning_rate": 1.8008074003821393e-05, "loss": 0.4657, "step": 21236 }, { "epoch": 3.466756458920044, "grad_norm": 1.8752782344818115, "learning_rate": 1.8007883942333594e-05, "loss": 0.4206, "step": 21237 }, { "epoch": 3.4669197175625484, "grad_norm": 2.0156776905059814, "learning_rate": 1.8007693872781856e-05, "loss": 0.513, "step": 21238 }, { "epoch": 3.467082976205053, "grad_norm": 2.592012405395508, "learning_rate": 1.8007503795166374e-05, "loss": 0.528, "step": 21239 }, { "epoch": 3.4672462348475572, "grad_norm": 2.2566869258880615, "learning_rate": 1.8007313709487334e-05, "loss": 0.5135, "step": 21240 }, { "epoch": 3.4674094934900617, "grad_norm": 1.8267377614974976, "learning_rate": 1.8007123615744938e-05, "loss": 0.4303, "step": 21241 }, { "epoch": 3.467572752132566, "grad_norm": 1.7113069295883179, "learning_rate": 1.8006933513939367e-05, "loss": 0.388, "step": 21242 }, { "epoch": 3.4677360107750705, "grad_norm": 1.800973892211914, "learning_rate": 1.8006743404070816e-05, "loss": 0.5099, "step": 21243 }, { "epoch": 3.467899269417575, "grad_norm": 1.8018099069595337, "learning_rate": 1.8006553286139476e-05, "loss": 0.3843, "step": 21244 }, { "epoch": 3.468062528060079, "grad_norm": 1.8253412246704102, "learning_rate": 1.800636316014554e-05, "loss": 0.4652, "step": 21245 }, { "epoch": 3.4682257867025834, "grad_norm": 2.0154378414154053, "learning_rate": 1.8006173026089197e-05, "loss": 0.4145, "step": 21246 }, { "epoch": 3.468389045345088, "grad_norm": 1.7293487787246704, "learning_rate": 1.8005982883970642e-05, "loss": 0.4445, "step": 21247 }, { "epoch": 3.4685523039875923, "grad_norm": 1.9004734754562378, "learning_rate": 1.8005792733790064e-05, "loss": 0.4464, "step": 21248 }, { "epoch": 3.4687155626300967, "grad_norm": 1.709053635597229, "learning_rate": 1.8005602575547657e-05, "loss": 0.3688, "step": 21249 }, { "epoch": 3.468878821272601, "grad_norm": 2.1022489070892334, "learning_rate": 1.8005412409243604e-05, "loss": 0.445, "step": 21250 }, { "epoch": 3.4690420799151056, "grad_norm": 1.8768103122711182, "learning_rate": 1.8005222234878108e-05, "loss": 0.4899, "step": 21251 }, { "epoch": 3.46920533855761, "grad_norm": 1.983720064163208, "learning_rate": 1.8005032052451355e-05, "loss": 0.4268, "step": 21252 }, { "epoch": 3.4693685972001145, "grad_norm": 2.116239547729492, "learning_rate": 1.8004841861963536e-05, "loss": 0.5091, "step": 21253 }, { "epoch": 3.469531855842619, "grad_norm": 2.1420977115631104, "learning_rate": 1.8004651663414842e-05, "loss": 0.4861, "step": 21254 }, { "epoch": 3.469695114485123, "grad_norm": 1.8050569295883179, "learning_rate": 1.8004461456805465e-05, "loss": 0.3975, "step": 21255 }, { "epoch": 3.4698583731276273, "grad_norm": 1.6975862979888916, "learning_rate": 1.80042712421356e-05, "loss": 0.4552, "step": 21256 }, { "epoch": 3.470021631770132, "grad_norm": 1.7784383296966553, "learning_rate": 1.8004081019405437e-05, "loss": 0.4213, "step": 21257 }, { "epoch": 3.4701848904126362, "grad_norm": 1.830610990524292, "learning_rate": 1.8003890788615167e-05, "loss": 0.4033, "step": 21258 }, { "epoch": 3.4703481490551407, "grad_norm": 1.777361273765564, "learning_rate": 1.8003700549764978e-05, "loss": 0.4259, "step": 21259 }, { "epoch": 3.470511407697645, "grad_norm": 1.93014395236969, "learning_rate": 1.8003510302855064e-05, "loss": 0.4178, "step": 21260 }, { "epoch": 3.4706746663401495, "grad_norm": 2.352508068084717, "learning_rate": 1.800332004788562e-05, "loss": 0.5639, "step": 21261 }, { "epoch": 3.4708379249826535, "grad_norm": 2.308037519454956, "learning_rate": 1.8003129784856832e-05, "loss": 0.5256, "step": 21262 }, { "epoch": 3.471001183625158, "grad_norm": 1.9087399244308472, "learning_rate": 1.8002939513768898e-05, "loss": 0.4295, "step": 21263 }, { "epoch": 3.4711644422676624, "grad_norm": 2.0348024368286133, "learning_rate": 1.8002749234622004e-05, "loss": 0.4566, "step": 21264 }, { "epoch": 3.471327700910167, "grad_norm": 2.0161807537078857, "learning_rate": 1.800255894741634e-05, "loss": 0.5173, "step": 21265 }, { "epoch": 3.4714909595526713, "grad_norm": 2.0968809127807617, "learning_rate": 1.8002368652152108e-05, "loss": 0.4516, "step": 21266 }, { "epoch": 3.4716542181951757, "grad_norm": 2.224179267883301, "learning_rate": 1.800217834882949e-05, "loss": 0.5133, "step": 21267 }, { "epoch": 3.47181747683768, "grad_norm": 1.7945784330368042, "learning_rate": 1.800198803744868e-05, "loss": 0.4552, "step": 21268 }, { "epoch": 3.4719807354801846, "grad_norm": 2.3148539066314697, "learning_rate": 1.8001797718009873e-05, "loss": 0.5583, "step": 21269 }, { "epoch": 3.472143994122689, "grad_norm": 1.9752217531204224, "learning_rate": 1.8001607390513258e-05, "loss": 0.5026, "step": 21270 }, { "epoch": 3.4723072527651935, "grad_norm": 1.8717889785766602, "learning_rate": 1.800141705495902e-05, "loss": 0.4475, "step": 21271 }, { "epoch": 3.4724705114076975, "grad_norm": 1.8912441730499268, "learning_rate": 1.800122671134736e-05, "loss": 0.4614, "step": 21272 }, { "epoch": 3.472633770050202, "grad_norm": 2.306675910949707, "learning_rate": 1.800103635967847e-05, "loss": 0.528, "step": 21273 }, { "epoch": 3.4727970286927063, "grad_norm": 1.836708903312683, "learning_rate": 1.8000845999952537e-05, "loss": 0.4273, "step": 21274 }, { "epoch": 3.472960287335211, "grad_norm": 2.1793787479400635, "learning_rate": 1.800065563216975e-05, "loss": 0.453, "step": 21275 }, { "epoch": 3.473123545977715, "grad_norm": 2.102919578552246, "learning_rate": 1.800046525633031e-05, "loss": 0.5372, "step": 21276 }, { "epoch": 3.4732868046202197, "grad_norm": 2.1907730102539062, "learning_rate": 1.8000274872434405e-05, "loss": 0.4679, "step": 21277 }, { "epoch": 3.473450063262724, "grad_norm": 1.9364709854125977, "learning_rate": 1.800008448048222e-05, "loss": 0.5185, "step": 21278 }, { "epoch": 3.4736133219052285, "grad_norm": 1.746322751045227, "learning_rate": 1.7999894080473955e-05, "loss": 0.4686, "step": 21279 }, { "epoch": 3.4737765805477325, "grad_norm": 1.9581146240234375, "learning_rate": 1.7999703672409802e-05, "loss": 0.4809, "step": 21280 }, { "epoch": 3.473939839190237, "grad_norm": 2.021892547607422, "learning_rate": 1.7999513256289942e-05, "loss": 0.62, "step": 21281 }, { "epoch": 3.4741030978327414, "grad_norm": 1.9520258903503418, "learning_rate": 1.7999322832114582e-05, "loss": 0.4822, "step": 21282 }, { "epoch": 3.474266356475246, "grad_norm": 1.8956135511398315, "learning_rate": 1.7999132399883898e-05, "loss": 0.4477, "step": 21283 }, { "epoch": 3.4744296151177503, "grad_norm": 1.8128836154937744, "learning_rate": 1.7998941959598097e-05, "loss": 0.4027, "step": 21284 }, { "epoch": 3.4745928737602547, "grad_norm": 2.1774659156799316, "learning_rate": 1.799875151125736e-05, "loss": 0.511, "step": 21285 }, { "epoch": 3.474756132402759, "grad_norm": 1.7098389863967896, "learning_rate": 1.799856105486188e-05, "loss": 0.4396, "step": 21286 }, { "epoch": 3.4749193910452636, "grad_norm": 1.7613424062728882, "learning_rate": 1.7998370590411856e-05, "loss": 0.459, "step": 21287 }, { "epoch": 3.475082649687768, "grad_norm": 1.8039993047714233, "learning_rate": 1.7998180117907473e-05, "loss": 0.4318, "step": 21288 }, { "epoch": 3.4752459083302725, "grad_norm": 1.8935953378677368, "learning_rate": 1.7997989637348924e-05, "loss": 0.4692, "step": 21289 }, { "epoch": 3.4754091669727765, "grad_norm": 1.687728762626648, "learning_rate": 1.7997799148736403e-05, "loss": 0.4306, "step": 21290 }, { "epoch": 3.475572425615281, "grad_norm": 2.0684814453125, "learning_rate": 1.79976086520701e-05, "loss": 0.5321, "step": 21291 }, { "epoch": 3.4757356842577853, "grad_norm": 1.9167503118515015, "learning_rate": 1.7997418147350205e-05, "loss": 0.4556, "step": 21292 }, { "epoch": 3.47589894290029, "grad_norm": 1.9986389875411987, "learning_rate": 1.7997227634576916e-05, "loss": 0.5042, "step": 21293 }, { "epoch": 3.476062201542794, "grad_norm": 1.6685458421707153, "learning_rate": 1.7997037113750418e-05, "loss": 0.4363, "step": 21294 }, { "epoch": 3.4762254601852987, "grad_norm": 1.6885007619857788, "learning_rate": 1.799684658487091e-05, "loss": 0.4201, "step": 21295 }, { "epoch": 3.476388718827803, "grad_norm": 1.9609819650650024, "learning_rate": 1.7996656047938575e-05, "loss": 0.4555, "step": 21296 }, { "epoch": 3.476551977470307, "grad_norm": 1.7455947399139404, "learning_rate": 1.799646550295361e-05, "loss": 0.4502, "step": 21297 }, { "epoch": 3.4767152361128115, "grad_norm": 1.5091599225997925, "learning_rate": 1.7996274949916206e-05, "loss": 0.4151, "step": 21298 }, { "epoch": 3.476878494755316, "grad_norm": 1.9627236127853394, "learning_rate": 1.7996084388826556e-05, "loss": 0.4644, "step": 21299 }, { "epoch": 3.4770417533978204, "grad_norm": 2.3361434936523438, "learning_rate": 1.799589381968485e-05, "loss": 0.5809, "step": 21300 }, { "epoch": 3.477205012040325, "grad_norm": 2.017847776412964, "learning_rate": 1.7995703242491285e-05, "loss": 0.4438, "step": 21301 }, { "epoch": 3.4773682706828293, "grad_norm": 2.286539316177368, "learning_rate": 1.7995512657246047e-05, "loss": 0.5987, "step": 21302 }, { "epoch": 3.4775315293253337, "grad_norm": 1.8194544315338135, "learning_rate": 1.7995322063949328e-05, "loss": 0.419, "step": 21303 }, { "epoch": 3.477694787967838, "grad_norm": 1.7078354358673096, "learning_rate": 1.7995131462601323e-05, "loss": 0.3972, "step": 21304 }, { "epoch": 3.4778580466103426, "grad_norm": 2.2964837551116943, "learning_rate": 1.7994940853202222e-05, "loss": 0.4455, "step": 21305 }, { "epoch": 3.478021305252847, "grad_norm": 2.096977710723877, "learning_rate": 1.799475023575222e-05, "loss": 0.4903, "step": 21306 }, { "epoch": 3.478184563895351, "grad_norm": 1.708762288093567, "learning_rate": 1.7994559610251504e-05, "loss": 0.3908, "step": 21307 }, { "epoch": 3.4783478225378555, "grad_norm": 1.6981143951416016, "learning_rate": 1.799436897670027e-05, "loss": 0.4214, "step": 21308 }, { "epoch": 3.47851108118036, "grad_norm": 1.8428019285202026, "learning_rate": 1.7994178335098707e-05, "loss": 0.4108, "step": 21309 }, { "epoch": 3.4786743398228643, "grad_norm": 2.141819715499878, "learning_rate": 1.7993987685447008e-05, "loss": 0.4594, "step": 21310 }, { "epoch": 3.4788375984653688, "grad_norm": 2.1814026832580566, "learning_rate": 1.799379702774537e-05, "loss": 0.4982, "step": 21311 }, { "epoch": 3.479000857107873, "grad_norm": 1.8222205638885498, "learning_rate": 1.7993606361993977e-05, "loss": 0.4129, "step": 21312 }, { "epoch": 3.4791641157503777, "grad_norm": 1.732498049736023, "learning_rate": 1.7993415688193027e-05, "loss": 0.4188, "step": 21313 }, { "epoch": 3.479327374392882, "grad_norm": 1.9213263988494873, "learning_rate": 1.7993225006342706e-05, "loss": 0.4938, "step": 21314 }, { "epoch": 3.479490633035386, "grad_norm": 1.9185388088226318, "learning_rate": 1.7993034316443213e-05, "loss": 0.4872, "step": 21315 }, { "epoch": 3.4796538916778905, "grad_norm": 1.9076147079467773, "learning_rate": 1.7992843618494736e-05, "loss": 0.4358, "step": 21316 }, { "epoch": 3.479817150320395, "grad_norm": 2.177788257598877, "learning_rate": 1.7992652912497464e-05, "loss": 0.5434, "step": 21317 }, { "epoch": 3.4799804089628994, "grad_norm": 2.349494218826294, "learning_rate": 1.7992462198451597e-05, "loss": 0.6168, "step": 21318 }, { "epoch": 3.480143667605404, "grad_norm": 1.7946233749389648, "learning_rate": 1.799227147635732e-05, "loss": 0.4369, "step": 21319 }, { "epoch": 3.4803069262479083, "grad_norm": 1.6771551370620728, "learning_rate": 1.799208074621483e-05, "loss": 0.5181, "step": 21320 }, { "epoch": 3.4804701848904127, "grad_norm": 2.4260201454162598, "learning_rate": 1.7991890008024315e-05, "loss": 1.0344, "step": 21321 }, { "epoch": 3.480633443532917, "grad_norm": 2.184999942779541, "learning_rate": 1.799169926178597e-05, "loss": 0.5237, "step": 21322 }, { "epoch": 3.4807967021754216, "grad_norm": 1.8070627450942993, "learning_rate": 1.7991508507499984e-05, "loss": 0.4049, "step": 21323 }, { "epoch": 3.480959960817926, "grad_norm": 2.085395574569702, "learning_rate": 1.799131774516655e-05, "loss": 0.5216, "step": 21324 }, { "epoch": 3.48112321946043, "grad_norm": 1.9180389642715454, "learning_rate": 1.7991126974785867e-05, "loss": 0.4967, "step": 21325 }, { "epoch": 3.4812864781029345, "grad_norm": 2.2707433700561523, "learning_rate": 1.7990936196358117e-05, "loss": 0.5578, "step": 21326 }, { "epoch": 3.481449736745439, "grad_norm": 1.8068013191223145, "learning_rate": 1.79907454098835e-05, "loss": 0.4175, "step": 21327 }, { "epoch": 3.4816129953879433, "grad_norm": 2.0803966522216797, "learning_rate": 1.79905546153622e-05, "loss": 0.4409, "step": 21328 }, { "epoch": 3.4817762540304478, "grad_norm": 2.2074756622314453, "learning_rate": 1.7990363812794417e-05, "loss": 0.5435, "step": 21329 }, { "epoch": 3.481939512672952, "grad_norm": 2.082111358642578, "learning_rate": 1.7990173002180336e-05, "loss": 0.497, "step": 21330 }, { "epoch": 3.4821027713154566, "grad_norm": 1.923783779144287, "learning_rate": 1.7989982183520154e-05, "loss": 0.5471, "step": 21331 }, { "epoch": 3.482266029957961, "grad_norm": 2.1159305572509766, "learning_rate": 1.7989791356814063e-05, "loss": 0.5137, "step": 21332 }, { "epoch": 3.482429288600465, "grad_norm": 2.150412082672119, "learning_rate": 1.7989600522062253e-05, "loss": 0.5204, "step": 21333 }, { "epoch": 3.4825925472429695, "grad_norm": 1.9601919651031494, "learning_rate": 1.7989409679264918e-05, "loss": 0.4961, "step": 21334 }, { "epoch": 3.482755805885474, "grad_norm": 2.074127435684204, "learning_rate": 1.798921882842225e-05, "loss": 0.5193, "step": 21335 }, { "epoch": 3.4829190645279784, "grad_norm": 1.7986592054367065, "learning_rate": 1.7989027969534444e-05, "loss": 0.4538, "step": 21336 }, { "epoch": 3.483082323170483, "grad_norm": 1.7303813695907593, "learning_rate": 1.7988837102601685e-05, "loss": 0.401, "step": 21337 }, { "epoch": 3.4832455818129873, "grad_norm": 2.097687244415283, "learning_rate": 1.798864622762417e-05, "loss": 0.5322, "step": 21338 }, { "epoch": 3.4834088404554917, "grad_norm": 2.2190914154052734, "learning_rate": 1.7988455344602093e-05, "loss": 0.4726, "step": 21339 }, { "epoch": 3.483572099097996, "grad_norm": 2.0345168113708496, "learning_rate": 1.798826445353564e-05, "loss": 0.546, "step": 21340 }, { "epoch": 3.4837353577405006, "grad_norm": 1.680475115776062, "learning_rate": 1.7988073554425007e-05, "loss": 0.4133, "step": 21341 }, { "epoch": 3.483898616383005, "grad_norm": 1.6678942441940308, "learning_rate": 1.7987882647270386e-05, "loss": 0.4266, "step": 21342 }, { "epoch": 3.484061875025509, "grad_norm": 1.7349426746368408, "learning_rate": 1.798769173207197e-05, "loss": 0.4979, "step": 21343 }, { "epoch": 3.4842251336680135, "grad_norm": 2.052516222000122, "learning_rate": 1.7987500808829952e-05, "loss": 0.4199, "step": 21344 }, { "epoch": 3.484388392310518, "grad_norm": 1.678758144378662, "learning_rate": 1.7987309877544522e-05, "loss": 0.4302, "step": 21345 }, { "epoch": 3.4845516509530223, "grad_norm": 2.2251384258270264, "learning_rate": 1.798711893821587e-05, "loss": 0.5192, "step": 21346 }, { "epoch": 3.4847149095955268, "grad_norm": 1.7319260835647583, "learning_rate": 1.7986927990844196e-05, "loss": 0.4986, "step": 21347 }, { "epoch": 3.484878168238031, "grad_norm": 2.0195224285125732, "learning_rate": 1.7986737035429684e-05, "loss": 0.5321, "step": 21348 }, { "epoch": 3.4850414268805356, "grad_norm": 2.5790135860443115, "learning_rate": 1.7986546071972532e-05, "loss": 0.6036, "step": 21349 }, { "epoch": 3.4852046855230396, "grad_norm": 1.9409687519073486, "learning_rate": 1.798635510047293e-05, "loss": 0.3972, "step": 21350 }, { "epoch": 3.485367944165544, "grad_norm": 2.816086769104004, "learning_rate": 1.7986164120931072e-05, "loss": 0.6303, "step": 21351 }, { "epoch": 3.4855312028080485, "grad_norm": 2.162673234939575, "learning_rate": 1.7985973133347146e-05, "loss": 0.505, "step": 21352 }, { "epoch": 3.485694461450553, "grad_norm": 1.960461139678955, "learning_rate": 1.7985782137721347e-05, "loss": 0.4862, "step": 21353 }, { "epoch": 3.4858577200930574, "grad_norm": 2.1101746559143066, "learning_rate": 1.7985591134053872e-05, "loss": 0.5387, "step": 21354 }, { "epoch": 3.486020978735562, "grad_norm": 1.8434666395187378, "learning_rate": 1.7985400122344906e-05, "loss": 0.4559, "step": 21355 }, { "epoch": 3.4861842373780663, "grad_norm": 1.9327114820480347, "learning_rate": 1.798520910259464e-05, "loss": 0.4484, "step": 21356 }, { "epoch": 3.4863474960205707, "grad_norm": 2.116147041320801, "learning_rate": 1.7985018074803276e-05, "loss": 0.593, "step": 21357 }, { "epoch": 3.486510754663075, "grad_norm": 2.022848606109619, "learning_rate": 1.7984827038970998e-05, "loss": 0.483, "step": 21358 }, { "epoch": 3.4866740133055796, "grad_norm": 1.5659527778625488, "learning_rate": 1.7984635995098003e-05, "loss": 0.3915, "step": 21359 }, { "epoch": 3.4868372719480836, "grad_norm": 1.7751175165176392, "learning_rate": 1.798444494318448e-05, "loss": 0.4559, "step": 21360 }, { "epoch": 3.487000530590588, "grad_norm": 1.8978996276855469, "learning_rate": 1.7984253883230627e-05, "loss": 0.4715, "step": 21361 }, { "epoch": 3.4871637892330924, "grad_norm": 2.0019471645355225, "learning_rate": 1.7984062815236627e-05, "loss": 0.4555, "step": 21362 }, { "epoch": 3.487327047875597, "grad_norm": 1.8096907138824463, "learning_rate": 1.798387173920268e-05, "loss": 0.4365, "step": 21363 }, { "epoch": 3.4874903065181013, "grad_norm": 2.1952297687530518, "learning_rate": 1.798368065512898e-05, "loss": 0.544, "step": 21364 }, { "epoch": 3.4876535651606058, "grad_norm": 1.9609490633010864, "learning_rate": 1.7983489563015714e-05, "loss": 0.4851, "step": 21365 }, { "epoch": 3.48781682380311, "grad_norm": 2.3711516857147217, "learning_rate": 1.7983298462863074e-05, "loss": 0.4716, "step": 21366 }, { "epoch": 3.4879800824456146, "grad_norm": 1.5992709398269653, "learning_rate": 1.7983107354671255e-05, "loss": 0.428, "step": 21367 }, { "epoch": 3.4881433410881186, "grad_norm": 1.99225652217865, "learning_rate": 1.798291623844045e-05, "loss": 0.4672, "step": 21368 }, { "epoch": 3.488306599730623, "grad_norm": 1.8408533334732056, "learning_rate": 1.7982725114170847e-05, "loss": 0.424, "step": 21369 }, { "epoch": 3.4884698583731275, "grad_norm": 2.0111734867095947, "learning_rate": 1.7982533981862645e-05, "loss": 0.519, "step": 21370 }, { "epoch": 3.488633117015632, "grad_norm": 1.70526921749115, "learning_rate": 1.7982342841516032e-05, "loss": 0.445, "step": 21371 }, { "epoch": 3.4887963756581364, "grad_norm": 1.6910055875778198, "learning_rate": 1.7982151693131206e-05, "loss": 0.4566, "step": 21372 }, { "epoch": 3.488959634300641, "grad_norm": 2.245670795440674, "learning_rate": 1.7981960536708348e-05, "loss": 0.5742, "step": 21373 }, { "epoch": 3.4891228929431453, "grad_norm": 1.8699562549591064, "learning_rate": 1.7981769372247666e-05, "loss": 0.5015, "step": 21374 }, { "epoch": 3.4892861515856497, "grad_norm": 2.480267286300659, "learning_rate": 1.798157819974934e-05, "loss": 0.5899, "step": 21375 }, { "epoch": 3.489449410228154, "grad_norm": 1.8270187377929688, "learning_rate": 1.7981387019213565e-05, "loss": 0.4623, "step": 21376 }, { "epoch": 3.4896126688706586, "grad_norm": 1.6397690773010254, "learning_rate": 1.798119583064054e-05, "loss": 0.4329, "step": 21377 }, { "epoch": 3.4897759275131626, "grad_norm": 1.8272289037704468, "learning_rate": 1.798100463403045e-05, "loss": 0.4492, "step": 21378 }, { "epoch": 3.489939186155667, "grad_norm": 2.417215585708618, "learning_rate": 1.7980813429383493e-05, "loss": 0.5888, "step": 21379 }, { "epoch": 3.4901024447981714, "grad_norm": 2.300342559814453, "learning_rate": 1.7980622216699853e-05, "loss": 0.5226, "step": 21380 }, { "epoch": 3.490265703440676, "grad_norm": 1.8518065214157104, "learning_rate": 1.7980430995979734e-05, "loss": 0.4346, "step": 21381 }, { "epoch": 3.4904289620831803, "grad_norm": 2.2765753269195557, "learning_rate": 1.7980239767223322e-05, "loss": 0.5716, "step": 21382 }, { "epoch": 3.4905922207256848, "grad_norm": 2.047126293182373, "learning_rate": 1.798004853043081e-05, "loss": 0.5413, "step": 21383 }, { "epoch": 3.490755479368189, "grad_norm": 1.793073058128357, "learning_rate": 1.7979857285602392e-05, "loss": 0.4242, "step": 21384 }, { "epoch": 3.4909187380106936, "grad_norm": 1.9788018465042114, "learning_rate": 1.797966603273826e-05, "loss": 0.4603, "step": 21385 }, { "epoch": 3.4910819966531976, "grad_norm": 2.1548047065734863, "learning_rate": 1.797947477183861e-05, "loss": 0.5189, "step": 21386 }, { "epoch": 3.491245255295702, "grad_norm": 1.8458387851715088, "learning_rate": 1.7979283502903623e-05, "loss": 0.4472, "step": 21387 }, { "epoch": 3.4914085139382065, "grad_norm": 2.335958242416382, "learning_rate": 1.7979092225933503e-05, "loss": 0.5786, "step": 21388 }, { "epoch": 3.491571772580711, "grad_norm": 2.298442840576172, "learning_rate": 1.797890094092844e-05, "loss": 0.5791, "step": 21389 }, { "epoch": 3.4917350312232154, "grad_norm": 2.068309783935547, "learning_rate": 1.797870964788863e-05, "loss": 0.4059, "step": 21390 }, { "epoch": 3.49189828986572, "grad_norm": 1.9568456411361694, "learning_rate": 1.7978518346814255e-05, "loss": 0.5166, "step": 21391 }, { "epoch": 3.4920615485082243, "grad_norm": 1.9300334453582764, "learning_rate": 1.7978327037705517e-05, "loss": 0.4644, "step": 21392 }, { "epoch": 3.4922248071507287, "grad_norm": 1.9972997903823853, "learning_rate": 1.7978135720562603e-05, "loss": 0.4507, "step": 21393 }, { "epoch": 3.492388065793233, "grad_norm": 2.0039780139923096, "learning_rate": 1.7977944395385713e-05, "loss": 0.5517, "step": 21394 }, { "epoch": 3.492551324435737, "grad_norm": 1.6284312009811401, "learning_rate": 1.797775306217503e-05, "loss": 0.4095, "step": 21395 }, { "epoch": 3.4927145830782416, "grad_norm": 1.8548059463500977, "learning_rate": 1.7977561720930757e-05, "loss": 0.4893, "step": 21396 }, { "epoch": 3.492877841720746, "grad_norm": 1.938089370727539, "learning_rate": 1.7977370371653076e-05, "loss": 0.5062, "step": 21397 }, { "epoch": 3.4930411003632504, "grad_norm": 1.8138529062271118, "learning_rate": 1.797717901434219e-05, "loss": 0.4599, "step": 21398 }, { "epoch": 3.493204359005755, "grad_norm": 1.9369051456451416, "learning_rate": 1.7976987648998286e-05, "loss": 0.456, "step": 21399 }, { "epoch": 3.4933676176482593, "grad_norm": 2.7433619499206543, "learning_rate": 1.7976796275621556e-05, "loss": 0.5839, "step": 21400 }, { "epoch": 3.4935308762907638, "grad_norm": 2.0655081272125244, "learning_rate": 1.7976604894212192e-05, "loss": 0.3931, "step": 21401 }, { "epoch": 3.493694134933268, "grad_norm": 2.2988603115081787, "learning_rate": 1.7976413504770394e-05, "loss": 0.6239, "step": 21402 }, { "epoch": 3.493857393575772, "grad_norm": 1.8183262348175049, "learning_rate": 1.797622210729635e-05, "loss": 0.4286, "step": 21403 }, { "epoch": 3.4940206522182766, "grad_norm": 1.8026652336120605, "learning_rate": 1.7976030701790247e-05, "loss": 0.4234, "step": 21404 }, { "epoch": 3.494183910860781, "grad_norm": 1.8665648698806763, "learning_rate": 1.797583928825229e-05, "loss": 0.4416, "step": 21405 }, { "epoch": 3.4943471695032855, "grad_norm": 1.8351439237594604, "learning_rate": 1.7975647866682658e-05, "loss": 0.3977, "step": 21406 }, { "epoch": 3.49451042814579, "grad_norm": 1.680648684501648, "learning_rate": 1.7975456437081554e-05, "loss": 0.4141, "step": 21407 }, { "epoch": 3.4946736867882944, "grad_norm": 1.6289929151535034, "learning_rate": 1.7975264999449166e-05, "loss": 0.4446, "step": 21408 }, { "epoch": 3.494836945430799, "grad_norm": 2.064397096633911, "learning_rate": 1.7975073553785692e-05, "loss": 0.5162, "step": 21409 }, { "epoch": 3.4950002040733033, "grad_norm": 1.455003023147583, "learning_rate": 1.797488210009132e-05, "loss": 0.3935, "step": 21410 }, { "epoch": 3.4951634627158077, "grad_norm": 1.8108874559402466, "learning_rate": 1.797469063836624e-05, "loss": 0.4906, "step": 21411 }, { "epoch": 3.495326721358312, "grad_norm": 1.8237783908843994, "learning_rate": 1.7974499168610652e-05, "loss": 0.5037, "step": 21412 }, { "epoch": 3.495489980000816, "grad_norm": 2.3899624347686768, "learning_rate": 1.797430769082474e-05, "loss": 0.5704, "step": 21413 }, { "epoch": 3.4956532386433206, "grad_norm": 1.6310737133026123, "learning_rate": 1.797411620500871e-05, "loss": 0.4027, "step": 21414 }, { "epoch": 3.495816497285825, "grad_norm": 1.600378155708313, "learning_rate": 1.7973924711162743e-05, "loss": 0.4157, "step": 21415 }, { "epoch": 3.4959797559283294, "grad_norm": 1.8992335796356201, "learning_rate": 1.7973733209287036e-05, "loss": 0.429, "step": 21416 }, { "epoch": 3.496143014570834, "grad_norm": 1.8333258628845215, "learning_rate": 1.7973541699381785e-05, "loss": 0.4534, "step": 21417 }, { "epoch": 3.4963062732133383, "grad_norm": 1.851679801940918, "learning_rate": 1.797335018144718e-05, "loss": 0.499, "step": 21418 }, { "epoch": 3.4964695318558427, "grad_norm": 1.9531182050704956, "learning_rate": 1.7973158655483407e-05, "loss": 0.4508, "step": 21419 }, { "epoch": 3.496632790498347, "grad_norm": 1.8379591703414917, "learning_rate": 1.797296712149067e-05, "loss": 0.4822, "step": 21420 }, { "epoch": 3.496796049140851, "grad_norm": 2.0871529579162598, "learning_rate": 1.7972775579469154e-05, "loss": 0.4916, "step": 21421 }, { "epoch": 3.4969593077833556, "grad_norm": 1.6418026685714722, "learning_rate": 1.797258402941906e-05, "loss": 0.3996, "step": 21422 }, { "epoch": 3.49712256642586, "grad_norm": 2.238381862640381, "learning_rate": 1.7972392471340573e-05, "loss": 0.503, "step": 21423 }, { "epoch": 3.4972858250683645, "grad_norm": 2.1394596099853516, "learning_rate": 1.7972200905233886e-05, "loss": 0.4438, "step": 21424 }, { "epoch": 3.497449083710869, "grad_norm": 2.5640652179718018, "learning_rate": 1.79720093310992e-05, "loss": 0.6384, "step": 21425 }, { "epoch": 3.4976123423533734, "grad_norm": 2.0639195442199707, "learning_rate": 1.7971817748936702e-05, "loss": 0.5534, "step": 21426 }, { "epoch": 3.497775600995878, "grad_norm": 1.811910629272461, "learning_rate": 1.7971626158746585e-05, "loss": 0.4514, "step": 21427 }, { "epoch": 3.4979388596383822, "grad_norm": 1.89130699634552, "learning_rate": 1.7971434560529042e-05, "loss": 0.4873, "step": 21428 }, { "epoch": 3.4981021182808867, "grad_norm": 1.740348219871521, "learning_rate": 1.7971242954284266e-05, "loss": 0.4461, "step": 21429 }, { "epoch": 3.498265376923391, "grad_norm": 1.9776170253753662, "learning_rate": 1.797105134001245e-05, "loss": 0.431, "step": 21430 }, { "epoch": 3.498428635565895, "grad_norm": 1.9376847743988037, "learning_rate": 1.797085971771379e-05, "loss": 0.4967, "step": 21431 }, { "epoch": 3.4985918942083996, "grad_norm": 2.1445791721343994, "learning_rate": 1.7970668087388474e-05, "loss": 0.5363, "step": 21432 }, { "epoch": 3.498755152850904, "grad_norm": 2.090359687805176, "learning_rate": 1.7970476449036697e-05, "loss": 0.4268, "step": 21433 }, { "epoch": 3.4989184114934084, "grad_norm": 1.945372223854065, "learning_rate": 1.7970284802658656e-05, "loss": 0.4785, "step": 21434 }, { "epoch": 3.499081670135913, "grad_norm": 2.1928179264068604, "learning_rate": 1.7970093148254534e-05, "loss": 0.5491, "step": 21435 }, { "epoch": 3.4992449287784173, "grad_norm": 1.676426649093628, "learning_rate": 1.7969901485824537e-05, "loss": 0.4486, "step": 21436 }, { "epoch": 3.4994081874209217, "grad_norm": 2.145658493041992, "learning_rate": 1.7969709815368846e-05, "loss": 0.5813, "step": 21437 }, { "epoch": 3.4995714460634257, "grad_norm": 2.3921892642974854, "learning_rate": 1.7969518136887664e-05, "loss": 0.5935, "step": 21438 }, { "epoch": 3.49973470470593, "grad_norm": 2.194542169570923, "learning_rate": 1.7969326450381176e-05, "loss": 0.5153, "step": 21439 }, { "epoch": 3.4998979633484346, "grad_norm": 1.7813619375228882, "learning_rate": 1.796913475584958e-05, "loss": 0.4281, "step": 21440 }, { "epoch": 3.500061221990939, "grad_norm": 1.9746230840682983, "learning_rate": 1.7968943053293068e-05, "loss": 0.521, "step": 21441 }, { "epoch": 3.5002244806334435, "grad_norm": 1.8163566589355469, "learning_rate": 1.796875134271183e-05, "loss": 0.4428, "step": 21442 }, { "epoch": 3.500387739275948, "grad_norm": 2.223592519760132, "learning_rate": 1.7968559624106063e-05, "loss": 0.493, "step": 21443 }, { "epoch": 3.5005509979184524, "grad_norm": 1.9403737783432007, "learning_rate": 1.7968367897475957e-05, "loss": 0.3887, "step": 21444 }, { "epoch": 3.500714256560957, "grad_norm": 1.862080454826355, "learning_rate": 1.796817616282171e-05, "loss": 0.4848, "step": 21445 }, { "epoch": 3.5008775152034612, "grad_norm": 1.7106045484542847, "learning_rate": 1.7967984420143508e-05, "loss": 0.3799, "step": 21446 }, { "epoch": 3.5010407738459657, "grad_norm": 1.9159374237060547, "learning_rate": 1.7967792669441552e-05, "loss": 0.452, "step": 21447 }, { "epoch": 3.50120403248847, "grad_norm": 2.0106635093688965, "learning_rate": 1.796760091071603e-05, "loss": 0.5094, "step": 21448 }, { "epoch": 3.501367291130974, "grad_norm": 2.2013609409332275, "learning_rate": 1.7967409143967135e-05, "loss": 0.4246, "step": 21449 }, { "epoch": 3.5015305497734786, "grad_norm": 1.8464900255203247, "learning_rate": 1.7967217369195058e-05, "loss": 0.478, "step": 21450 }, { "epoch": 3.501693808415983, "grad_norm": 1.7714333534240723, "learning_rate": 1.79670255864e-05, "loss": 0.4508, "step": 21451 }, { "epoch": 3.5018570670584874, "grad_norm": 1.7407780885696411, "learning_rate": 1.7966833795582146e-05, "loss": 0.4601, "step": 21452 }, { "epoch": 3.502020325700992, "grad_norm": 1.6129822731018066, "learning_rate": 1.796664199674169e-05, "loss": 0.414, "step": 21453 }, { "epoch": 3.5021835843434963, "grad_norm": 2.1674394607543945, "learning_rate": 1.7966450189878832e-05, "loss": 0.5376, "step": 21454 }, { "epoch": 3.5023468429860003, "grad_norm": 2.394810676574707, "learning_rate": 1.796625837499376e-05, "loss": 0.5781, "step": 21455 }, { "epoch": 3.5025101016285047, "grad_norm": 1.7505104541778564, "learning_rate": 1.796606655208667e-05, "loss": 0.4208, "step": 21456 }, { "epoch": 3.502673360271009, "grad_norm": 2.081622362136841, "learning_rate": 1.7965874721157752e-05, "loss": 0.5642, "step": 21457 }, { "epoch": 3.5028366189135136, "grad_norm": 1.5422455072402954, "learning_rate": 1.7965682882207197e-05, "loss": 0.3758, "step": 21458 }, { "epoch": 3.502999877556018, "grad_norm": 2.2181339263916016, "learning_rate": 1.7965491035235207e-05, "loss": 0.4833, "step": 21459 }, { "epoch": 3.5031631361985225, "grad_norm": 1.6297990083694458, "learning_rate": 1.7965299180241963e-05, "loss": 0.3964, "step": 21460 }, { "epoch": 3.503326394841027, "grad_norm": 1.819348692893982, "learning_rate": 1.796510731722767e-05, "loss": 0.4466, "step": 21461 }, { "epoch": 3.5034896534835314, "grad_norm": 1.70993173122406, "learning_rate": 1.7964915446192514e-05, "loss": 0.5013, "step": 21462 }, { "epoch": 3.503652912126036, "grad_norm": 1.862148642539978, "learning_rate": 1.796472356713669e-05, "loss": 0.4691, "step": 21463 }, { "epoch": 3.5038161707685402, "grad_norm": 2.131789445877075, "learning_rate": 1.796453168006039e-05, "loss": 0.5269, "step": 21464 }, { "epoch": 3.5039794294110447, "grad_norm": 1.956376552581787, "learning_rate": 1.7964339784963813e-05, "loss": 0.4288, "step": 21465 }, { "epoch": 3.5041426880535487, "grad_norm": 1.980639100074768, "learning_rate": 1.7964147881847145e-05, "loss": 0.4688, "step": 21466 }, { "epoch": 3.504305946696053, "grad_norm": 2.0385794639587402, "learning_rate": 1.7963955970710586e-05, "loss": 0.4646, "step": 21467 }, { "epoch": 3.5044692053385575, "grad_norm": 1.856468915939331, "learning_rate": 1.796376405155432e-05, "loss": 0.5216, "step": 21468 }, { "epoch": 3.504632463981062, "grad_norm": 1.7827941179275513, "learning_rate": 1.796357212437855e-05, "loss": 0.4628, "step": 21469 }, { "epoch": 3.5047957226235664, "grad_norm": 2.060934543609619, "learning_rate": 1.796338018918346e-05, "loss": 0.4588, "step": 21470 }, { "epoch": 3.504958981266071, "grad_norm": 1.7658027410507202, "learning_rate": 1.7963188245969255e-05, "loss": 0.4168, "step": 21471 }, { "epoch": 3.5051222399085753, "grad_norm": 2.116361141204834, "learning_rate": 1.7962996294736115e-05, "loss": 0.5041, "step": 21472 }, { "epoch": 3.5052854985510793, "grad_norm": 1.8178595304489136, "learning_rate": 1.7962804335484242e-05, "loss": 0.4685, "step": 21473 }, { "epoch": 3.5054487571935837, "grad_norm": 2.1635851860046387, "learning_rate": 1.796261236821383e-05, "loss": 0.4991, "step": 21474 }, { "epoch": 3.505612015836088, "grad_norm": 1.8032642602920532, "learning_rate": 1.7962420392925066e-05, "loss": 0.4978, "step": 21475 }, { "epoch": 3.5057752744785926, "grad_norm": 1.766854166984558, "learning_rate": 1.796222840961815e-05, "loss": 0.3997, "step": 21476 }, { "epoch": 3.505938533121097, "grad_norm": 1.8257678747177124, "learning_rate": 1.796203641829327e-05, "loss": 0.4631, "step": 21477 }, { "epoch": 3.5061017917636015, "grad_norm": 2.1904351711273193, "learning_rate": 1.796184441895062e-05, "loss": 0.4004, "step": 21478 }, { "epoch": 3.506265050406106, "grad_norm": 1.895747184753418, "learning_rate": 1.79616524115904e-05, "loss": 0.4689, "step": 21479 }, { "epoch": 3.5064283090486104, "grad_norm": 1.8694920539855957, "learning_rate": 1.796146039621279e-05, "loss": 0.4272, "step": 21480 }, { "epoch": 3.506591567691115, "grad_norm": 1.730603814125061, "learning_rate": 1.7961268372817997e-05, "loss": 0.4385, "step": 21481 }, { "epoch": 3.5067548263336192, "grad_norm": 1.887961745262146, "learning_rate": 1.796107634140621e-05, "loss": 0.5288, "step": 21482 }, { "epoch": 3.5069180849761237, "grad_norm": 2.0714221000671387, "learning_rate": 1.7960884301977616e-05, "loss": 0.4926, "step": 21483 }, { "epoch": 3.5070813436186277, "grad_norm": 1.8807117938995361, "learning_rate": 1.796069225453242e-05, "loss": 0.4957, "step": 21484 }, { "epoch": 3.507244602261132, "grad_norm": 2.0148673057556152, "learning_rate": 1.7960500199070803e-05, "loss": 0.3945, "step": 21485 }, { "epoch": 3.5074078609036365, "grad_norm": 1.9513566493988037, "learning_rate": 1.796030813559297e-05, "loss": 0.4576, "step": 21486 }, { "epoch": 3.507571119546141, "grad_norm": 1.7912977933883667, "learning_rate": 1.7960116064099105e-05, "loss": 0.4915, "step": 21487 }, { "epoch": 3.5077343781886454, "grad_norm": 1.5234085321426392, "learning_rate": 1.7959923984589407e-05, "loss": 0.4215, "step": 21488 }, { "epoch": 3.50789763683115, "grad_norm": 1.7890084981918335, "learning_rate": 1.7959731897064065e-05, "loss": 0.4613, "step": 21489 }, { "epoch": 3.5080608954736543, "grad_norm": 1.9349170923233032, "learning_rate": 1.795953980152328e-05, "loss": 0.4935, "step": 21490 }, { "epoch": 3.5082241541161583, "grad_norm": 2.1787092685699463, "learning_rate": 1.7959347697967232e-05, "loss": 0.4988, "step": 21491 }, { "epoch": 3.5083874127586627, "grad_norm": 2.0271105766296387, "learning_rate": 1.795915558639613e-05, "loss": 0.5073, "step": 21492 }, { "epoch": 3.508550671401167, "grad_norm": 2.4688198566436768, "learning_rate": 1.795896346681016e-05, "loss": 0.5714, "step": 21493 }, { "epoch": 3.5087139300436716, "grad_norm": 1.67516028881073, "learning_rate": 1.7958771339209515e-05, "loss": 0.4188, "step": 21494 }, { "epoch": 3.508877188686176, "grad_norm": 1.455501675605774, "learning_rate": 1.7958579203594388e-05, "loss": 0.4234, "step": 21495 }, { "epoch": 3.5090404473286805, "grad_norm": 1.5643770694732666, "learning_rate": 1.7958387059964978e-05, "loss": 0.3775, "step": 21496 }, { "epoch": 3.509203705971185, "grad_norm": 1.7740834951400757, "learning_rate": 1.795819490832147e-05, "loss": 0.488, "step": 21497 }, { "epoch": 3.5093669646136894, "grad_norm": 2.0904040336608887, "learning_rate": 1.7958002748664062e-05, "loss": 0.4867, "step": 21498 }, { "epoch": 3.509530223256194, "grad_norm": 1.9366720914840698, "learning_rate": 1.795781058099295e-05, "loss": 0.4268, "step": 21499 }, { "epoch": 3.5096934818986982, "grad_norm": 2.2488601207733154, "learning_rate": 1.7957618405308323e-05, "loss": 0.5185, "step": 21500 }, { "epoch": 3.5098567405412027, "grad_norm": 2.0473275184631348, "learning_rate": 1.7957426221610378e-05, "loss": 0.5366, "step": 21501 }, { "epoch": 3.5100199991837067, "grad_norm": 2.2546048164367676, "learning_rate": 1.7957234029899305e-05, "loss": 0.5291, "step": 21502 }, { "epoch": 3.510183257826211, "grad_norm": 1.778641939163208, "learning_rate": 1.79570418301753e-05, "loss": 0.4599, "step": 21503 }, { "epoch": 3.5103465164687155, "grad_norm": 1.9320021867752075, "learning_rate": 1.7956849622438554e-05, "loss": 0.5095, "step": 21504 }, { "epoch": 3.51050977511122, "grad_norm": 1.999456763267517, "learning_rate": 1.7956657406689266e-05, "loss": 0.4666, "step": 21505 }, { "epoch": 3.5106730337537244, "grad_norm": 2.250261068344116, "learning_rate": 1.7956465182927625e-05, "loss": 0.4586, "step": 21506 }, { "epoch": 3.510836292396229, "grad_norm": 1.8797156810760498, "learning_rate": 1.7956272951153825e-05, "loss": 0.4788, "step": 21507 }, { "epoch": 3.510999551038733, "grad_norm": 2.255868434906006, "learning_rate": 1.795608071136806e-05, "loss": 0.4544, "step": 21508 }, { "epoch": 3.5111628096812373, "grad_norm": 1.9515498876571655, "learning_rate": 1.7955888463570526e-05, "loss": 0.4691, "step": 21509 }, { "epoch": 3.5113260683237417, "grad_norm": 2.469118118286133, "learning_rate": 1.7955696207761414e-05, "loss": 0.4916, "step": 21510 }, { "epoch": 3.511489326966246, "grad_norm": 2.1155712604522705, "learning_rate": 1.7955503943940915e-05, "loss": 0.4966, "step": 21511 }, { "epoch": 3.5116525856087506, "grad_norm": 2.22451114654541, "learning_rate": 1.7955311672109226e-05, "loss": 0.5682, "step": 21512 }, { "epoch": 3.511815844251255, "grad_norm": 2.6496288776397705, "learning_rate": 1.7955119392266544e-05, "loss": 0.5312, "step": 21513 }, { "epoch": 3.5119791028937595, "grad_norm": 1.8858494758605957, "learning_rate": 1.7954927104413057e-05, "loss": 0.4183, "step": 21514 }, { "epoch": 3.512142361536264, "grad_norm": 2.3717312812805176, "learning_rate": 1.795473480854896e-05, "loss": 0.5272, "step": 21515 }, { "epoch": 3.5123056201787684, "grad_norm": 2.0531458854675293, "learning_rate": 1.7954542504674445e-05, "loss": 0.4533, "step": 21516 }, { "epoch": 3.512468878821273, "grad_norm": 1.9913923740386963, "learning_rate": 1.7954350192789712e-05, "loss": 0.4478, "step": 21517 }, { "epoch": 3.5126321374637772, "grad_norm": 2.0492546558380127, "learning_rate": 1.7954157872894948e-05, "loss": 0.4793, "step": 21518 }, { "epoch": 3.512795396106281, "grad_norm": 1.682174563407898, "learning_rate": 1.795396554499035e-05, "loss": 0.4646, "step": 21519 }, { "epoch": 3.5129586547487857, "grad_norm": 2.0802738666534424, "learning_rate": 1.795377320907611e-05, "loss": 0.4906, "step": 21520 }, { "epoch": 3.51312191339129, "grad_norm": 1.7561254501342773, "learning_rate": 1.7953580865152424e-05, "loss": 0.4372, "step": 21521 }, { "epoch": 3.5132851720337945, "grad_norm": 2.028834581375122, "learning_rate": 1.7953388513219486e-05, "loss": 0.4088, "step": 21522 }, { "epoch": 3.513448430676299, "grad_norm": 1.6765140295028687, "learning_rate": 1.7953196153277485e-05, "loss": 0.4722, "step": 21523 }, { "epoch": 3.5136116893188034, "grad_norm": 2.1330645084381104, "learning_rate": 1.7953003785326618e-05, "loss": 0.4799, "step": 21524 }, { "epoch": 3.513774947961308, "grad_norm": 2.254066228866577, "learning_rate": 1.795281140936708e-05, "loss": 0.5204, "step": 21525 }, { "epoch": 3.513938206603812, "grad_norm": 2.0806684494018555, "learning_rate": 1.795261902539906e-05, "loss": 0.4902, "step": 21526 }, { "epoch": 3.5141014652463163, "grad_norm": 1.9616671800613403, "learning_rate": 1.7952426633422756e-05, "loss": 0.4817, "step": 21527 }, { "epoch": 3.5142647238888207, "grad_norm": 2.0564920902252197, "learning_rate": 1.7952234233438357e-05, "loss": 0.5459, "step": 21528 }, { "epoch": 3.514427982531325, "grad_norm": 1.6380068063735962, "learning_rate": 1.7952041825446064e-05, "loss": 0.4085, "step": 21529 }, { "epoch": 3.5145912411738296, "grad_norm": 2.285451889038086, "learning_rate": 1.7951849409446068e-05, "loss": 0.5456, "step": 21530 }, { "epoch": 3.514754499816334, "grad_norm": 1.9096788167953491, "learning_rate": 1.795165698543856e-05, "loss": 0.4889, "step": 21531 }, { "epoch": 3.5149177584588385, "grad_norm": 1.8577792644500732, "learning_rate": 1.7951464553423735e-05, "loss": 0.4076, "step": 21532 }, { "epoch": 3.515081017101343, "grad_norm": 2.110475778579712, "learning_rate": 1.7951272113401786e-05, "loss": 0.4839, "step": 21533 }, { "epoch": 3.5152442757438473, "grad_norm": 2.3612494468688965, "learning_rate": 1.7951079665372912e-05, "loss": 0.4738, "step": 21534 }, { "epoch": 3.515407534386352, "grad_norm": 1.7992242574691772, "learning_rate": 1.7950887209337304e-05, "loss": 0.4245, "step": 21535 }, { "epoch": 3.5155707930288562, "grad_norm": 2.040595531463623, "learning_rate": 1.795069474529515e-05, "loss": 0.5655, "step": 21536 }, { "epoch": 3.51573405167136, "grad_norm": 1.7020783424377441, "learning_rate": 1.795050227324665e-05, "loss": 0.4242, "step": 21537 }, { "epoch": 3.5158973103138647, "grad_norm": 2.408257484436035, "learning_rate": 1.7950309793191997e-05, "loss": 0.6124, "step": 21538 }, { "epoch": 3.516060568956369, "grad_norm": 1.7855437994003296, "learning_rate": 1.7950117305131384e-05, "loss": 0.4686, "step": 21539 }, { "epoch": 3.5162238275988735, "grad_norm": 1.7210885286331177, "learning_rate": 1.7949924809065005e-05, "loss": 0.4191, "step": 21540 }, { "epoch": 3.516387086241378, "grad_norm": 1.9555072784423828, "learning_rate": 1.7949732304993054e-05, "loss": 0.5179, "step": 21541 }, { "epoch": 3.5165503448838824, "grad_norm": 2.3435912132263184, "learning_rate": 1.7949539792915726e-05, "loss": 0.4966, "step": 21542 }, { "epoch": 3.516713603526387, "grad_norm": 1.9104732275009155, "learning_rate": 1.7949347272833208e-05, "loss": 0.5182, "step": 21543 }, { "epoch": 3.516876862168891, "grad_norm": 1.9277229309082031, "learning_rate": 1.7949154744745704e-05, "loss": 0.5077, "step": 21544 }, { "epoch": 3.5170401208113953, "grad_norm": 2.2222392559051514, "learning_rate": 1.7948962208653403e-05, "loss": 0.4309, "step": 21545 }, { "epoch": 3.5172033794538997, "grad_norm": 1.8090215921401978, "learning_rate": 1.79487696645565e-05, "loss": 0.4507, "step": 21546 }, { "epoch": 3.517366638096404, "grad_norm": 1.8759061098098755, "learning_rate": 1.7948577112455184e-05, "loss": 0.4607, "step": 21547 }, { "epoch": 3.5175298967389086, "grad_norm": 2.1204543113708496, "learning_rate": 1.794838455234966e-05, "loss": 0.5331, "step": 21548 }, { "epoch": 3.517693155381413, "grad_norm": 2.3077149391174316, "learning_rate": 1.7948191984240105e-05, "loss": 0.4564, "step": 21549 }, { "epoch": 3.5178564140239175, "grad_norm": 1.8142027854919434, "learning_rate": 1.794799940812673e-05, "loss": 0.4144, "step": 21550 }, { "epoch": 3.518019672666422, "grad_norm": 1.6174523830413818, "learning_rate": 1.7947806824009722e-05, "loss": 0.4211, "step": 21551 }, { "epoch": 3.5181829313089263, "grad_norm": 1.8331791162490845, "learning_rate": 1.794761423188927e-05, "loss": 0.51, "step": 21552 }, { "epoch": 3.518346189951431, "grad_norm": 1.92872154712677, "learning_rate": 1.7947421631765576e-05, "loss": 0.4179, "step": 21553 }, { "epoch": 3.518509448593935, "grad_norm": 1.9096215963363647, "learning_rate": 1.794722902363883e-05, "loss": 0.4577, "step": 21554 }, { "epoch": 3.518672707236439, "grad_norm": 1.8638043403625488, "learning_rate": 1.7947036407509226e-05, "loss": 0.4421, "step": 21555 }, { "epoch": 3.5188359658789437, "grad_norm": 2.1219875812530518, "learning_rate": 1.7946843783376958e-05, "loss": 0.497, "step": 21556 }, { "epoch": 3.518999224521448, "grad_norm": 1.8037364482879639, "learning_rate": 1.7946651151242223e-05, "loss": 0.4325, "step": 21557 }, { "epoch": 3.5191624831639525, "grad_norm": 1.9750947952270508, "learning_rate": 1.794645851110521e-05, "loss": 0.4493, "step": 21558 }, { "epoch": 3.519325741806457, "grad_norm": 1.6023133993148804, "learning_rate": 1.7946265862966114e-05, "loss": 0.3754, "step": 21559 }, { "epoch": 3.5194890004489614, "grad_norm": 2.2017757892608643, "learning_rate": 1.7946073206825134e-05, "loss": 0.4743, "step": 21560 }, { "epoch": 3.5196522590914654, "grad_norm": 1.5828827619552612, "learning_rate": 1.7945880542682458e-05, "loss": 0.3851, "step": 21561 }, { "epoch": 3.51981551773397, "grad_norm": 1.9536089897155762, "learning_rate": 1.7945687870538285e-05, "loss": 0.5011, "step": 21562 }, { "epoch": 3.5199787763764743, "grad_norm": 1.7312904596328735, "learning_rate": 1.7945495190392802e-05, "loss": 0.4555, "step": 21563 }, { "epoch": 3.5201420350189787, "grad_norm": 2.7398412227630615, "learning_rate": 1.7945302502246212e-05, "loss": 0.558, "step": 21564 }, { "epoch": 3.520305293661483, "grad_norm": 1.8847248554229736, "learning_rate": 1.79451098060987e-05, "loss": 0.4669, "step": 21565 }, { "epoch": 3.5204685523039876, "grad_norm": 2.471045970916748, "learning_rate": 1.794491710195047e-05, "loss": 0.5561, "step": 21566 }, { "epoch": 3.520631810946492, "grad_norm": 2.13836407661438, "learning_rate": 1.7944724389801706e-05, "loss": 0.4703, "step": 21567 }, { "epoch": 3.5207950695889965, "grad_norm": 2.157484769821167, "learning_rate": 1.794453166965261e-05, "loss": 0.4938, "step": 21568 }, { "epoch": 3.520958328231501, "grad_norm": 1.978920340538025, "learning_rate": 1.7944338941503366e-05, "loss": 0.5058, "step": 21569 }, { "epoch": 3.5211215868740053, "grad_norm": 2.0106053352355957, "learning_rate": 1.7944146205354182e-05, "loss": 0.4785, "step": 21570 }, { "epoch": 3.5212848455165098, "grad_norm": 1.8264528512954712, "learning_rate": 1.7943953461205243e-05, "loss": 0.5399, "step": 21571 }, { "epoch": 3.5214481041590138, "grad_norm": 1.9645869731903076, "learning_rate": 1.7943760709056745e-05, "loss": 0.4638, "step": 21572 }, { "epoch": 3.521611362801518, "grad_norm": 1.9780124425888062, "learning_rate": 1.794356794890888e-05, "loss": 0.4328, "step": 21573 }, { "epoch": 3.5217746214440226, "grad_norm": 1.8667747974395752, "learning_rate": 1.794337518076185e-05, "loss": 0.4381, "step": 21574 }, { "epoch": 3.521937880086527, "grad_norm": 1.8691811561584473, "learning_rate": 1.7943182404615837e-05, "loss": 0.4358, "step": 21575 }, { "epoch": 3.5221011387290315, "grad_norm": 1.7430098056793213, "learning_rate": 1.7942989620471042e-05, "loss": 0.4019, "step": 21576 }, { "epoch": 3.522264397371536, "grad_norm": 1.8753516674041748, "learning_rate": 1.794279682832766e-05, "loss": 0.4693, "step": 21577 }, { "epoch": 3.5224276560140404, "grad_norm": 1.876793384552002, "learning_rate": 1.7942604028185883e-05, "loss": 0.5135, "step": 21578 }, { "epoch": 3.5225909146565444, "grad_norm": 1.97151780128479, "learning_rate": 1.794241122004591e-05, "loss": 0.4558, "step": 21579 }, { "epoch": 3.522754173299049, "grad_norm": 2.1132986545562744, "learning_rate": 1.7942218403907923e-05, "loss": 0.4754, "step": 21580 }, { "epoch": 3.5229174319415533, "grad_norm": 1.7900360822677612, "learning_rate": 1.794202557977213e-05, "loss": 0.4465, "step": 21581 }, { "epoch": 3.5230806905840577, "grad_norm": 1.9255763292312622, "learning_rate": 1.7941832747638722e-05, "loss": 0.5113, "step": 21582 }, { "epoch": 3.523243949226562, "grad_norm": 2.5041909217834473, "learning_rate": 1.7941639907507886e-05, "loss": 0.5618, "step": 21583 }, { "epoch": 3.5234072078690666, "grad_norm": 2.5619101524353027, "learning_rate": 1.794144705937982e-05, "loss": 0.5155, "step": 21584 }, { "epoch": 3.523570466511571, "grad_norm": 1.7418034076690674, "learning_rate": 1.7941254203254722e-05, "loss": 0.4311, "step": 21585 }, { "epoch": 3.5237337251540755, "grad_norm": 2.377488136291504, "learning_rate": 1.7941061339132778e-05, "loss": 0.6626, "step": 21586 }, { "epoch": 3.52389698379658, "grad_norm": 1.863032579421997, "learning_rate": 1.7940868467014193e-05, "loss": 0.4222, "step": 21587 }, { "epoch": 3.5240602424390843, "grad_norm": 2.1582629680633545, "learning_rate": 1.7940675586899155e-05, "loss": 0.5982, "step": 21588 }, { "epoch": 3.5242235010815888, "grad_norm": 2.626539945602417, "learning_rate": 1.7940482698787856e-05, "loss": 0.59, "step": 21589 }, { "epoch": 3.5243867597240928, "grad_norm": 2.4685988426208496, "learning_rate": 1.7940289802680494e-05, "loss": 0.5238, "step": 21590 }, { "epoch": 3.524550018366597, "grad_norm": 1.998069167137146, "learning_rate": 1.794009689857726e-05, "loss": 0.5142, "step": 21591 }, { "epoch": 3.5247132770091016, "grad_norm": 2.1140782833099365, "learning_rate": 1.7939903986478354e-05, "loss": 0.5809, "step": 21592 }, { "epoch": 3.524876535651606, "grad_norm": 1.588952660560608, "learning_rate": 1.7939711066383968e-05, "loss": 0.3248, "step": 21593 }, { "epoch": 3.5250397942941105, "grad_norm": 1.9125397205352783, "learning_rate": 1.7939518138294293e-05, "loss": 0.4639, "step": 21594 }, { "epoch": 3.525203052936615, "grad_norm": 2.5177743434906006, "learning_rate": 1.7939325202209524e-05, "loss": 0.5484, "step": 21595 }, { "epoch": 3.525366311579119, "grad_norm": 1.551340103149414, "learning_rate": 1.7939132258129855e-05, "loss": 0.3882, "step": 21596 }, { "epoch": 3.5255295702216234, "grad_norm": 2.1130964756011963, "learning_rate": 1.7938939306055486e-05, "loss": 0.5691, "step": 21597 }, { "epoch": 3.525692828864128, "grad_norm": 2.0161805152893066, "learning_rate": 1.7938746345986607e-05, "loss": 0.4555, "step": 21598 }, { "epoch": 3.5258560875066323, "grad_norm": 2.2528586387634277, "learning_rate": 1.793855337792341e-05, "loss": 0.5095, "step": 21599 }, { "epoch": 3.5260193461491367, "grad_norm": 2.1500892639160156, "learning_rate": 1.7938360401866096e-05, "loss": 0.538, "step": 21600 }, { "epoch": 3.526182604791641, "grad_norm": 2.0692646503448486, "learning_rate": 1.793816741781485e-05, "loss": 0.5108, "step": 21601 }, { "epoch": 3.5263458634341456, "grad_norm": 1.9263144731521606, "learning_rate": 1.793797442576988e-05, "loss": 0.463, "step": 21602 }, { "epoch": 3.52650912207665, "grad_norm": 1.7477391958236694, "learning_rate": 1.793778142573136e-05, "loss": 0.4927, "step": 21603 }, { "epoch": 3.5266723807191545, "grad_norm": 2.040956974029541, "learning_rate": 1.79375884176995e-05, "loss": 0.4623, "step": 21604 }, { "epoch": 3.526835639361659, "grad_norm": 1.851593255996704, "learning_rate": 1.7937395401674494e-05, "loss": 0.4745, "step": 21605 }, { "epoch": 3.5269988980041633, "grad_norm": 1.7882030010223389, "learning_rate": 1.793720237765653e-05, "loss": 0.428, "step": 21606 }, { "epoch": 3.5271621566466673, "grad_norm": 1.7831555604934692, "learning_rate": 1.793700934564581e-05, "loss": 0.4588, "step": 21607 }, { "epoch": 3.5273254152891718, "grad_norm": 2.167576313018799, "learning_rate": 1.7936816305642523e-05, "loss": 0.5073, "step": 21608 }, { "epoch": 3.527488673931676, "grad_norm": 2.707531690597534, "learning_rate": 1.793662325764686e-05, "loss": 0.5154, "step": 21609 }, { "epoch": 3.5276519325741806, "grad_norm": 1.9498847723007202, "learning_rate": 1.793643020165902e-05, "loss": 0.4543, "step": 21610 }, { "epoch": 3.527815191216685, "grad_norm": 1.8858611583709717, "learning_rate": 1.79362371376792e-05, "loss": 0.4149, "step": 21611 }, { "epoch": 3.5279784498591895, "grad_norm": 2.2761147022247314, "learning_rate": 1.7936044065707588e-05, "loss": 0.5405, "step": 21612 }, { "epoch": 3.528141708501694, "grad_norm": 1.8460187911987305, "learning_rate": 1.7935850985744383e-05, "loss": 0.4989, "step": 21613 }, { "epoch": 3.528304967144198, "grad_norm": 1.5404495000839233, "learning_rate": 1.793565789778978e-05, "loss": 0.4165, "step": 21614 }, { "epoch": 3.5284682257867024, "grad_norm": 1.935246229171753, "learning_rate": 1.793546480184397e-05, "loss": 0.4177, "step": 21615 }, { "epoch": 3.528631484429207, "grad_norm": 1.9539284706115723, "learning_rate": 1.7935271697907147e-05, "loss": 0.5011, "step": 21616 }, { "epoch": 3.5287947430717113, "grad_norm": 1.7750403881072998, "learning_rate": 1.793507858597951e-05, "loss": 0.4661, "step": 21617 }, { "epoch": 3.5289580017142157, "grad_norm": 2.088954210281372, "learning_rate": 1.793488546606125e-05, "loss": 0.4474, "step": 21618 }, { "epoch": 3.52912126035672, "grad_norm": 2.080098867416382, "learning_rate": 1.7934692338152565e-05, "loss": 0.4949, "step": 21619 }, { "epoch": 3.5292845189992246, "grad_norm": 2.114932060241699, "learning_rate": 1.7934499202253643e-05, "loss": 0.4199, "step": 21620 }, { "epoch": 3.529447777641729, "grad_norm": 1.9335047006607056, "learning_rate": 1.7934306058364685e-05, "loss": 0.5114, "step": 21621 }, { "epoch": 3.5296110362842334, "grad_norm": 2.0729074478149414, "learning_rate": 1.7934112906485883e-05, "loss": 0.5018, "step": 21622 }, { "epoch": 3.529774294926738, "grad_norm": 1.7829475402832031, "learning_rate": 1.793391974661743e-05, "loss": 0.4611, "step": 21623 }, { "epoch": 3.5299375535692423, "grad_norm": 1.8307665586471558, "learning_rate": 1.7933726578759522e-05, "loss": 0.4176, "step": 21624 }, { "epoch": 3.5301008122117463, "grad_norm": 1.740548849105835, "learning_rate": 1.7933533402912354e-05, "loss": 0.4113, "step": 21625 }, { "epoch": 3.5302640708542508, "grad_norm": 2.2148282527923584, "learning_rate": 1.793334021907612e-05, "loss": 0.5166, "step": 21626 }, { "epoch": 3.530427329496755, "grad_norm": 2.1598780155181885, "learning_rate": 1.7933147027251013e-05, "loss": 0.5729, "step": 21627 }, { "epoch": 3.5305905881392596, "grad_norm": 1.9214674234390259, "learning_rate": 1.793295382743723e-05, "loss": 0.4761, "step": 21628 }, { "epoch": 3.530753846781764, "grad_norm": 2.06543231010437, "learning_rate": 1.7932760619634965e-05, "loss": 0.4593, "step": 21629 }, { "epoch": 3.5309171054242685, "grad_norm": 2.4460363388061523, "learning_rate": 1.7932567403844413e-05, "loss": 0.615, "step": 21630 }, { "epoch": 3.531080364066773, "grad_norm": 1.6071645021438599, "learning_rate": 1.7932374180065767e-05, "loss": 0.4356, "step": 21631 }, { "epoch": 3.531243622709277, "grad_norm": 1.9552395343780518, "learning_rate": 1.7932180948299222e-05, "loss": 0.4492, "step": 21632 }, { "epoch": 3.5314068813517814, "grad_norm": 1.9600430727005005, "learning_rate": 1.7931987708544973e-05, "loss": 0.4786, "step": 21633 }, { "epoch": 3.531570139994286, "grad_norm": 1.7287484407424927, "learning_rate": 1.7931794460803217e-05, "loss": 0.4224, "step": 21634 }, { "epoch": 3.5317333986367903, "grad_norm": 1.894500494003296, "learning_rate": 1.793160120507414e-05, "loss": 0.4432, "step": 21635 }, { "epoch": 3.5318966572792947, "grad_norm": 2.0175135135650635, "learning_rate": 1.793140794135795e-05, "loss": 0.4358, "step": 21636 }, { "epoch": 3.532059915921799, "grad_norm": 1.965950846672058, "learning_rate": 1.793121466965483e-05, "loss": 0.5342, "step": 21637 }, { "epoch": 3.5322231745643036, "grad_norm": 1.747358798980713, "learning_rate": 1.793102138996498e-05, "loss": 0.5368, "step": 21638 }, { "epoch": 3.532386433206808, "grad_norm": 1.6710983514785767, "learning_rate": 1.793082810228859e-05, "loss": 0.378, "step": 21639 }, { "epoch": 3.5325496918493124, "grad_norm": 2.0838115215301514, "learning_rate": 1.793063480662586e-05, "loss": 0.4205, "step": 21640 }, { "epoch": 3.532712950491817, "grad_norm": 1.5848270654678345, "learning_rate": 1.7930441502976986e-05, "loss": 0.3861, "step": 21641 }, { "epoch": 3.5328762091343213, "grad_norm": 2.1173973083496094, "learning_rate": 1.7930248191342155e-05, "loss": 0.5438, "step": 21642 }, { "epoch": 3.5330394677768253, "grad_norm": 1.941787600517273, "learning_rate": 1.7930054871721573e-05, "loss": 0.5476, "step": 21643 }, { "epoch": 3.5332027264193298, "grad_norm": 1.8184525966644287, "learning_rate": 1.7929861544115425e-05, "loss": 0.4288, "step": 21644 }, { "epoch": 3.533365985061834, "grad_norm": 2.0609934329986572, "learning_rate": 1.7929668208523904e-05, "loss": 0.499, "step": 21645 }, { "epoch": 3.5335292437043386, "grad_norm": 2.0541367530822754, "learning_rate": 1.7929474864947214e-05, "loss": 0.4529, "step": 21646 }, { "epoch": 3.533692502346843, "grad_norm": 3.0668203830718994, "learning_rate": 1.792928151338554e-05, "loss": 0.5557, "step": 21647 }, { "epoch": 3.5338557609893475, "grad_norm": 1.8305186033248901, "learning_rate": 1.7929088153839087e-05, "loss": 0.3961, "step": 21648 }, { "epoch": 3.5340190196318515, "grad_norm": 1.8534737825393677, "learning_rate": 1.792889478630804e-05, "loss": 0.4624, "step": 21649 }, { "epoch": 3.534182278274356, "grad_norm": 1.8149558305740356, "learning_rate": 1.79287014107926e-05, "loss": 0.541, "step": 21650 }, { "epoch": 3.5343455369168604, "grad_norm": 2.0861122608184814, "learning_rate": 1.7928508027292965e-05, "loss": 0.5733, "step": 21651 }, { "epoch": 3.534508795559365, "grad_norm": 1.7285877466201782, "learning_rate": 1.7928314635809318e-05, "loss": 0.5443, "step": 21652 }, { "epoch": 3.5346720542018693, "grad_norm": 2.4443423748016357, "learning_rate": 1.7928121236341863e-05, "loss": 0.4983, "step": 21653 }, { "epoch": 3.5348353128443737, "grad_norm": 2.068243980407715, "learning_rate": 1.792792782889079e-05, "loss": 0.57, "step": 21654 }, { "epoch": 3.534998571486878, "grad_norm": 1.682749629020691, "learning_rate": 1.7927734413456295e-05, "loss": 0.3675, "step": 21655 }, { "epoch": 3.5351618301293826, "grad_norm": 1.9028257131576538, "learning_rate": 1.7927540990038577e-05, "loss": 0.4093, "step": 21656 }, { "epoch": 3.535325088771887, "grad_norm": 2.4809670448303223, "learning_rate": 1.7927347558637828e-05, "loss": 0.5727, "step": 21657 }, { "epoch": 3.5354883474143914, "grad_norm": 2.0622386932373047, "learning_rate": 1.7927154119254234e-05, "loss": 0.5125, "step": 21658 }, { "epoch": 3.535651606056896, "grad_norm": 1.7839202880859375, "learning_rate": 1.7926960671888008e-05, "loss": 0.4667, "step": 21659 }, { "epoch": 3.5358148646994, "grad_norm": 1.8614294528961182, "learning_rate": 1.792676721653933e-05, "loss": 0.4834, "step": 21660 }, { "epoch": 3.5359781233419043, "grad_norm": 2.1629655361175537, "learning_rate": 1.7926573753208397e-05, "loss": 0.4391, "step": 21661 }, { "epoch": 3.5361413819844087, "grad_norm": 2.2033724784851074, "learning_rate": 1.792638028189541e-05, "loss": 0.4823, "step": 21662 }, { "epoch": 3.536304640626913, "grad_norm": 2.157137632369995, "learning_rate": 1.792618680260056e-05, "loss": 0.4896, "step": 21663 }, { "epoch": 3.5364678992694176, "grad_norm": 1.6557374000549316, "learning_rate": 1.792599331532404e-05, "loss": 0.3655, "step": 21664 }, { "epoch": 3.536631157911922, "grad_norm": 1.8339899778366089, "learning_rate": 1.792579982006605e-05, "loss": 0.4686, "step": 21665 }, { "epoch": 3.5367944165544265, "grad_norm": 2.041252851486206, "learning_rate": 1.7925606316826778e-05, "loss": 0.5275, "step": 21666 }, { "epoch": 3.5369576751969305, "grad_norm": 2.300205707550049, "learning_rate": 1.7925412805606425e-05, "loss": 0.5371, "step": 21667 }, { "epoch": 3.537120933839435, "grad_norm": 2.0601654052734375, "learning_rate": 1.792521928640518e-05, "loss": 0.5168, "step": 21668 }, { "epoch": 3.5372841924819394, "grad_norm": 2.0181725025177, "learning_rate": 1.7925025759223248e-05, "loss": 0.5047, "step": 21669 }, { "epoch": 3.537447451124444, "grad_norm": 1.8391714096069336, "learning_rate": 1.792483222406081e-05, "loss": 0.4286, "step": 21670 }, { "epoch": 3.5376107097669482, "grad_norm": 1.8549611568450928, "learning_rate": 1.7924638680918075e-05, "loss": 0.4344, "step": 21671 }, { "epoch": 3.5377739684094527, "grad_norm": 1.8284976482391357, "learning_rate": 1.7924445129795226e-05, "loss": 0.4585, "step": 21672 }, { "epoch": 3.537937227051957, "grad_norm": 1.7270172834396362, "learning_rate": 1.7924251570692463e-05, "loss": 0.4373, "step": 21673 }, { "epoch": 3.5381004856944616, "grad_norm": 1.8173067569732666, "learning_rate": 1.7924058003609982e-05, "loss": 0.4737, "step": 21674 }, { "epoch": 3.538263744336966, "grad_norm": 1.964009404182434, "learning_rate": 1.792386442854798e-05, "loss": 0.4445, "step": 21675 }, { "epoch": 3.5384270029794704, "grad_norm": 1.9847629070281982, "learning_rate": 1.7923670845506645e-05, "loss": 0.5064, "step": 21676 }, { "epoch": 3.538590261621975, "grad_norm": 2.0680956840515137, "learning_rate": 1.7923477254486173e-05, "loss": 0.4686, "step": 21677 }, { "epoch": 3.538753520264479, "grad_norm": 2.1789932250976562, "learning_rate": 1.7923283655486765e-05, "loss": 0.5615, "step": 21678 }, { "epoch": 3.5389167789069833, "grad_norm": 2.285919189453125, "learning_rate": 1.7923090048508612e-05, "loss": 0.594, "step": 21679 }, { "epoch": 3.5390800375494877, "grad_norm": 2.308199405670166, "learning_rate": 1.792289643355191e-05, "loss": 0.5168, "step": 21680 }, { "epoch": 3.539243296191992, "grad_norm": 1.6913506984710693, "learning_rate": 1.792270281061685e-05, "loss": 0.4467, "step": 21681 }, { "epoch": 3.5394065548344966, "grad_norm": 2.0481886863708496, "learning_rate": 1.7922509179703635e-05, "loss": 0.5017, "step": 21682 }, { "epoch": 3.539569813477001, "grad_norm": 1.7306315898895264, "learning_rate": 1.792231554081245e-05, "loss": 0.4483, "step": 21683 }, { "epoch": 3.539733072119505, "grad_norm": 2.3499722480773926, "learning_rate": 1.7922121893943502e-05, "loss": 0.5371, "step": 21684 }, { "epoch": 3.5398963307620095, "grad_norm": 2.2366199493408203, "learning_rate": 1.7921928239096976e-05, "loss": 0.4761, "step": 21685 }, { "epoch": 3.540059589404514, "grad_norm": 2.1430466175079346, "learning_rate": 1.792173457627307e-05, "loss": 0.4775, "step": 21686 }, { "epoch": 3.5402228480470184, "grad_norm": 1.5870423316955566, "learning_rate": 1.792154090547198e-05, "loss": 0.415, "step": 21687 }, { "epoch": 3.540386106689523, "grad_norm": 2.310187816619873, "learning_rate": 1.7921347226693897e-05, "loss": 0.5152, "step": 21688 }, { "epoch": 3.5405493653320272, "grad_norm": 1.9125274419784546, "learning_rate": 1.7921153539939023e-05, "loss": 0.4707, "step": 21689 }, { "epoch": 3.5407126239745317, "grad_norm": 1.9577605724334717, "learning_rate": 1.7920959845207552e-05, "loss": 0.4405, "step": 21690 }, { "epoch": 3.540875882617036, "grad_norm": 1.9524221420288086, "learning_rate": 1.7920766142499673e-05, "loss": 0.4095, "step": 21691 }, { "epoch": 3.5410391412595406, "grad_norm": 1.5903910398483276, "learning_rate": 1.7920572431815583e-05, "loss": 0.3756, "step": 21692 }, { "epoch": 3.541202399902045, "grad_norm": 1.8112382888793945, "learning_rate": 1.7920378713155482e-05, "loss": 0.4015, "step": 21693 }, { "epoch": 3.5413656585445494, "grad_norm": 2.114095687866211, "learning_rate": 1.792018498651956e-05, "loss": 0.5103, "step": 21694 }, { "epoch": 3.5415289171870534, "grad_norm": 1.9036415815353394, "learning_rate": 1.7919991251908015e-05, "loss": 0.4332, "step": 21695 }, { "epoch": 3.541692175829558, "grad_norm": 2.618079662322998, "learning_rate": 1.7919797509321043e-05, "loss": 0.5671, "step": 21696 }, { "epoch": 3.5418554344720623, "grad_norm": 1.804371953010559, "learning_rate": 1.791960375875883e-05, "loss": 0.4634, "step": 21697 }, { "epoch": 3.5420186931145667, "grad_norm": 1.9165059328079224, "learning_rate": 1.7919410000221584e-05, "loss": 0.504, "step": 21698 }, { "epoch": 3.542181951757071, "grad_norm": 1.8716095685958862, "learning_rate": 1.7919216233709492e-05, "loss": 0.4351, "step": 21699 }, { "epoch": 3.5423452103995756, "grad_norm": 1.7372773885726929, "learning_rate": 1.7919022459222754e-05, "loss": 0.3934, "step": 21700 }, { "epoch": 3.54250846904208, "grad_norm": 1.9441611766815186, "learning_rate": 1.791882867676156e-05, "loss": 0.492, "step": 21701 }, { "epoch": 3.542671727684584, "grad_norm": 1.575247049331665, "learning_rate": 1.791863488632611e-05, "loss": 0.3505, "step": 21702 }, { "epoch": 3.5428349863270885, "grad_norm": 1.9203051328659058, "learning_rate": 1.7918441087916592e-05, "loss": 0.4855, "step": 21703 }, { "epoch": 3.542998244969593, "grad_norm": 2.112823247909546, "learning_rate": 1.791824728153321e-05, "loss": 0.4872, "step": 21704 }, { "epoch": 3.5431615036120974, "grad_norm": 1.9391429424285889, "learning_rate": 1.7918053467176152e-05, "loss": 0.455, "step": 21705 }, { "epoch": 3.543324762254602, "grad_norm": 2.1396899223327637, "learning_rate": 1.7917859644845618e-05, "loss": 0.5053, "step": 21706 }, { "epoch": 3.5434880208971062, "grad_norm": 1.3440672159194946, "learning_rate": 1.7917665814541804e-05, "loss": 0.336, "step": 21707 }, { "epoch": 3.5436512795396107, "grad_norm": 1.5423774719238281, "learning_rate": 1.79174719762649e-05, "loss": 0.3715, "step": 21708 }, { "epoch": 3.543814538182115, "grad_norm": 1.8455897569656372, "learning_rate": 1.79172781300151e-05, "loss": 0.4795, "step": 21709 }, { "epoch": 3.5439777968246196, "grad_norm": 2.166618585586548, "learning_rate": 1.7917084275792607e-05, "loss": 0.7218, "step": 21710 }, { "epoch": 3.544141055467124, "grad_norm": 1.828371524810791, "learning_rate": 1.7916890413597613e-05, "loss": 0.4554, "step": 21711 }, { "epoch": 3.5443043141096284, "grad_norm": 2.109280586242676, "learning_rate": 1.7916696543430315e-05, "loss": 0.4512, "step": 21712 }, { "epoch": 3.5444675727521324, "grad_norm": 2.440739393234253, "learning_rate": 1.79165026652909e-05, "loss": 0.6106, "step": 21713 }, { "epoch": 3.544630831394637, "grad_norm": 1.8263051509857178, "learning_rate": 1.7916308779179572e-05, "loss": 0.3945, "step": 21714 }, { "epoch": 3.5447940900371413, "grad_norm": 1.8276294469833374, "learning_rate": 1.7916114885096525e-05, "loss": 0.42, "step": 21715 }, { "epoch": 3.5449573486796457, "grad_norm": 2.1324996948242188, "learning_rate": 1.7915920983041946e-05, "loss": 0.496, "step": 21716 }, { "epoch": 3.54512060732215, "grad_norm": 2.1053407192230225, "learning_rate": 1.791572707301604e-05, "loss": 0.4848, "step": 21717 }, { "epoch": 3.5452838659646546, "grad_norm": 2.0043864250183105, "learning_rate": 1.7915533155019e-05, "loss": 0.4899, "step": 21718 }, { "epoch": 3.545447124607159, "grad_norm": 2.0873000621795654, "learning_rate": 1.7915339229051022e-05, "loss": 0.41, "step": 21719 }, { "epoch": 3.545610383249663, "grad_norm": 2.350756883621216, "learning_rate": 1.7915145295112297e-05, "loss": 0.5165, "step": 21720 }, { "epoch": 3.5457736418921675, "grad_norm": 2.242382049560547, "learning_rate": 1.791495135320302e-05, "loss": 0.4249, "step": 21721 }, { "epoch": 3.545936900534672, "grad_norm": 1.9794108867645264, "learning_rate": 1.7914757403323394e-05, "loss": 0.4553, "step": 21722 }, { "epoch": 3.5461001591771764, "grad_norm": 1.9659132957458496, "learning_rate": 1.7914563445473606e-05, "loss": 0.57, "step": 21723 }, { "epoch": 3.546263417819681, "grad_norm": 2.2469308376312256, "learning_rate": 1.7914369479653858e-05, "loss": 0.4784, "step": 21724 }, { "epoch": 3.5464266764621852, "grad_norm": 1.9585318565368652, "learning_rate": 1.791417550586434e-05, "loss": 0.5007, "step": 21725 }, { "epoch": 3.5465899351046897, "grad_norm": 2.0572028160095215, "learning_rate": 1.791398152410525e-05, "loss": 0.4983, "step": 21726 }, { "epoch": 3.546753193747194, "grad_norm": 2.0181455612182617, "learning_rate": 1.791378753437678e-05, "loss": 0.4733, "step": 21727 }, { "epoch": 3.5469164523896985, "grad_norm": 1.9279091358184814, "learning_rate": 1.7913593536679132e-05, "loss": 0.4679, "step": 21728 }, { "epoch": 3.547079711032203, "grad_norm": 2.3654625415802, "learning_rate": 1.7913399531012498e-05, "loss": 0.5265, "step": 21729 }, { "epoch": 3.5472429696747074, "grad_norm": 2.062120199203491, "learning_rate": 1.791320551737707e-05, "loss": 0.4767, "step": 21730 }, { "epoch": 3.5474062283172114, "grad_norm": 2.1517417430877686, "learning_rate": 1.7913011495773046e-05, "loss": 0.5365, "step": 21731 }, { "epoch": 3.547569486959716, "grad_norm": 1.8661046028137207, "learning_rate": 1.7912817466200624e-05, "loss": 0.453, "step": 21732 }, { "epoch": 3.5477327456022203, "grad_norm": 1.8333173990249634, "learning_rate": 1.7912623428659995e-05, "loss": 0.4768, "step": 21733 }, { "epoch": 3.5478960042447247, "grad_norm": 2.215090274810791, "learning_rate": 1.7912429383151357e-05, "loss": 0.5331, "step": 21734 }, { "epoch": 3.548059262887229, "grad_norm": 2.455280065536499, "learning_rate": 1.7912235329674903e-05, "loss": 0.5724, "step": 21735 }, { "epoch": 3.5482225215297336, "grad_norm": 1.9033924341201782, "learning_rate": 1.791204126823083e-05, "loss": 0.4631, "step": 21736 }, { "epoch": 3.5483857801722376, "grad_norm": 1.9915063381195068, "learning_rate": 1.7911847198819336e-05, "loss": 0.486, "step": 21737 }, { "epoch": 3.548549038814742, "grad_norm": 1.936903476715088, "learning_rate": 1.7911653121440613e-05, "loss": 0.48, "step": 21738 }, { "epoch": 3.5487122974572465, "grad_norm": 2.1180784702301025, "learning_rate": 1.7911459036094856e-05, "loss": 0.4866, "step": 21739 }, { "epoch": 3.548875556099751, "grad_norm": 2.105349063873291, "learning_rate": 1.7911264942782264e-05, "loss": 0.5026, "step": 21740 }, { "epoch": 3.5490388147422554, "grad_norm": 2.001584053039551, "learning_rate": 1.7911070841503028e-05, "loss": 0.519, "step": 21741 }, { "epoch": 3.54920207338476, "grad_norm": 2.09356951713562, "learning_rate": 1.7910876732257346e-05, "loss": 0.5123, "step": 21742 }, { "epoch": 3.5493653320272642, "grad_norm": 1.9471542835235596, "learning_rate": 1.7910682615045418e-05, "loss": 0.5318, "step": 21743 }, { "epoch": 3.5495285906697687, "grad_norm": 2.349172592163086, "learning_rate": 1.791048848986743e-05, "loss": 0.4898, "step": 21744 }, { "epoch": 3.549691849312273, "grad_norm": 2.184384346008301, "learning_rate": 1.7910294356723583e-05, "loss": 0.5149, "step": 21745 }, { "epoch": 3.5498551079547775, "grad_norm": 1.4552487134933472, "learning_rate": 1.791010021561407e-05, "loss": 0.3999, "step": 21746 }, { "epoch": 3.550018366597282, "grad_norm": 2.101381540298462, "learning_rate": 1.7909906066539092e-05, "loss": 0.4572, "step": 21747 }, { "epoch": 3.550181625239786, "grad_norm": 1.7882421016693115, "learning_rate": 1.7909711909498838e-05, "loss": 0.4399, "step": 21748 }, { "epoch": 3.5503448838822904, "grad_norm": 1.635260820388794, "learning_rate": 1.790951774449351e-05, "loss": 0.4268, "step": 21749 }, { "epoch": 3.550508142524795, "grad_norm": 1.6482477188110352, "learning_rate": 1.7909323571523295e-05, "loss": 0.3835, "step": 21750 }, { "epoch": 3.5506714011672993, "grad_norm": 1.8987377882003784, "learning_rate": 1.7909129390588393e-05, "loss": 0.4595, "step": 21751 }, { "epoch": 3.5508346598098037, "grad_norm": 1.9870256185531616, "learning_rate": 1.7908935201689e-05, "loss": 0.4918, "step": 21752 }, { "epoch": 3.550997918452308, "grad_norm": 2.0126659870147705, "learning_rate": 1.7908741004825315e-05, "loss": 0.4452, "step": 21753 }, { "epoch": 3.5511611770948126, "grad_norm": 1.8838257789611816, "learning_rate": 1.790854679999753e-05, "loss": 0.4515, "step": 21754 }, { "epoch": 3.5513244357373166, "grad_norm": 1.887506127357483, "learning_rate": 1.7908352587205836e-05, "loss": 0.4589, "step": 21755 }, { "epoch": 3.551487694379821, "grad_norm": 1.954933524131775, "learning_rate": 1.7908158366450435e-05, "loss": 0.4318, "step": 21756 }, { "epoch": 3.5516509530223255, "grad_norm": 1.917312741279602, "learning_rate": 1.790796413773152e-05, "loss": 0.4799, "step": 21757 }, { "epoch": 3.55181421166483, "grad_norm": 2.308961868286133, "learning_rate": 1.790776990104929e-05, "loss": 0.4868, "step": 21758 }, { "epoch": 3.5519774703073344, "grad_norm": 2.505998373031616, "learning_rate": 1.7907575656403937e-05, "loss": 0.5611, "step": 21759 }, { "epoch": 3.552140728949839, "grad_norm": 2.2484285831451416, "learning_rate": 1.7907381403795655e-05, "loss": 0.599, "step": 21760 }, { "epoch": 3.5523039875923432, "grad_norm": 1.8783317804336548, "learning_rate": 1.7907187143224643e-05, "loss": 0.4457, "step": 21761 }, { "epoch": 3.5524672462348477, "grad_norm": 2.232701063156128, "learning_rate": 1.79069928746911e-05, "loss": 0.544, "step": 21762 }, { "epoch": 3.552630504877352, "grad_norm": 1.6900278329849243, "learning_rate": 1.7906798598195214e-05, "loss": 0.4101, "step": 21763 }, { "epoch": 3.5527937635198565, "grad_norm": 1.6848167181015015, "learning_rate": 1.7906604313737182e-05, "loss": 0.4135, "step": 21764 }, { "epoch": 3.552957022162361, "grad_norm": 2.0945608615875244, "learning_rate": 1.7906410021317203e-05, "loss": 0.5067, "step": 21765 }, { "epoch": 3.553120280804865, "grad_norm": 2.4566822052001953, "learning_rate": 1.790621572093547e-05, "loss": 0.603, "step": 21766 }, { "epoch": 3.5532835394473694, "grad_norm": 2.033275842666626, "learning_rate": 1.790602141259218e-05, "loss": 0.4914, "step": 21767 }, { "epoch": 3.553446798089874, "grad_norm": 1.779721736907959, "learning_rate": 1.7905827096287532e-05, "loss": 0.4282, "step": 21768 }, { "epoch": 3.5536100567323783, "grad_norm": 1.6036971807479858, "learning_rate": 1.7905632772021716e-05, "loss": 0.428, "step": 21769 }, { "epoch": 3.5537733153748827, "grad_norm": 1.8984403610229492, "learning_rate": 1.790543843979493e-05, "loss": 0.4404, "step": 21770 }, { "epoch": 3.553936574017387, "grad_norm": 2.1132194995880127, "learning_rate": 1.7905244099607368e-05, "loss": 0.512, "step": 21771 }, { "epoch": 3.554099832659891, "grad_norm": 2.0005879402160645, "learning_rate": 1.7905049751459228e-05, "loss": 0.4752, "step": 21772 }, { "epoch": 3.5542630913023956, "grad_norm": 1.9058263301849365, "learning_rate": 1.7904855395350706e-05, "loss": 0.4962, "step": 21773 }, { "epoch": 3.5544263499449, "grad_norm": 2.1389994621276855, "learning_rate": 1.7904661031281996e-05, "loss": 0.5097, "step": 21774 }, { "epoch": 3.5545896085874045, "grad_norm": 2.016444206237793, "learning_rate": 1.7904466659253293e-05, "loss": 0.4492, "step": 21775 }, { "epoch": 3.554752867229909, "grad_norm": 2.152681350708008, "learning_rate": 1.79042722792648e-05, "loss": 0.5079, "step": 21776 }, { "epoch": 3.5549161258724133, "grad_norm": 1.99491286277771, "learning_rate": 1.79040778913167e-05, "loss": 0.4626, "step": 21777 }, { "epoch": 3.555079384514918, "grad_norm": 1.9207217693328857, "learning_rate": 1.79038834954092e-05, "loss": 0.5205, "step": 21778 }, { "epoch": 3.5552426431574222, "grad_norm": 1.8845394849777222, "learning_rate": 1.790368909154249e-05, "loss": 0.4797, "step": 21779 }, { "epoch": 3.5554059017999267, "grad_norm": 1.9803330898284912, "learning_rate": 1.7903494679716767e-05, "loss": 0.4845, "step": 21780 }, { "epoch": 3.555569160442431, "grad_norm": 1.7239718437194824, "learning_rate": 1.7903300259932225e-05, "loss": 0.4212, "step": 21781 }, { "epoch": 3.5557324190849355, "grad_norm": 1.9936513900756836, "learning_rate": 1.7903105832189066e-05, "loss": 0.4369, "step": 21782 }, { "epoch": 3.5558956777274395, "grad_norm": 2.1598148345947266, "learning_rate": 1.7902911396487477e-05, "loss": 0.5011, "step": 21783 }, { "epoch": 3.556058936369944, "grad_norm": 1.8471391201019287, "learning_rate": 1.790271695282766e-05, "loss": 0.4294, "step": 21784 }, { "epoch": 3.5562221950124484, "grad_norm": 1.9795513153076172, "learning_rate": 1.7902522501209812e-05, "loss": 0.4865, "step": 21785 }, { "epoch": 3.556385453654953, "grad_norm": 1.8210233449935913, "learning_rate": 1.7902328041634122e-05, "loss": 0.4026, "step": 21786 }, { "epoch": 3.5565487122974573, "grad_norm": 1.7639062404632568, "learning_rate": 1.7902133574100794e-05, "loss": 0.3892, "step": 21787 }, { "epoch": 3.5567119709399617, "grad_norm": 1.6412688493728638, "learning_rate": 1.7901939098610016e-05, "loss": 0.4225, "step": 21788 }, { "epoch": 3.556875229582466, "grad_norm": 2.381150245666504, "learning_rate": 1.7901744615161986e-05, "loss": 0.5657, "step": 21789 }, { "epoch": 3.55703848822497, "grad_norm": 1.660722255706787, "learning_rate": 1.7901550123756906e-05, "loss": 0.4094, "step": 21790 }, { "epoch": 3.5572017468674746, "grad_norm": 1.9002405405044556, "learning_rate": 1.7901355624394963e-05, "loss": 0.4937, "step": 21791 }, { "epoch": 3.557365005509979, "grad_norm": 2.231276750564575, "learning_rate": 1.790116111707636e-05, "loss": 0.5768, "step": 21792 }, { "epoch": 3.5575282641524835, "grad_norm": 2.256606101989746, "learning_rate": 1.7900966601801287e-05, "loss": 0.6485, "step": 21793 }, { "epoch": 3.557691522794988, "grad_norm": 2.0654640197753906, "learning_rate": 1.7900772078569946e-05, "loss": 0.4711, "step": 21794 }, { "epoch": 3.5578547814374923, "grad_norm": 2.2056949138641357, "learning_rate": 1.790057754738253e-05, "loss": 0.5536, "step": 21795 }, { "epoch": 3.558018040079997, "grad_norm": 2.3299999237060547, "learning_rate": 1.790038300823923e-05, "loss": 0.4997, "step": 21796 }, { "epoch": 3.558181298722501, "grad_norm": 1.983149766921997, "learning_rate": 1.790018846114025e-05, "loss": 0.5096, "step": 21797 }, { "epoch": 3.5583445573650057, "grad_norm": 1.7953990697860718, "learning_rate": 1.789999390608578e-05, "loss": 0.461, "step": 21798 }, { "epoch": 3.55850781600751, "grad_norm": 1.8091925382614136, "learning_rate": 1.789979934307602e-05, "loss": 0.4718, "step": 21799 }, { "epoch": 3.5586710746500145, "grad_norm": 1.809705138206482, "learning_rate": 1.7899604772111163e-05, "loss": 0.43, "step": 21800 }, { "epoch": 3.5588343332925185, "grad_norm": 1.9264963865280151, "learning_rate": 1.7899410193191408e-05, "loss": 0.4816, "step": 21801 }, { "epoch": 3.558997591935023, "grad_norm": 1.8900479078292847, "learning_rate": 1.7899215606316945e-05, "loss": 0.5016, "step": 21802 }, { "epoch": 3.5591608505775274, "grad_norm": 1.6633331775665283, "learning_rate": 1.789902101148798e-05, "loss": 0.3666, "step": 21803 }, { "epoch": 3.559324109220032, "grad_norm": 1.701056957244873, "learning_rate": 1.7898826408704696e-05, "loss": 0.4372, "step": 21804 }, { "epoch": 3.5594873678625363, "grad_norm": 1.9202502965927124, "learning_rate": 1.78986317979673e-05, "loss": 0.5144, "step": 21805 }, { "epoch": 3.5596506265050407, "grad_norm": 1.9996602535247803, "learning_rate": 1.7898437179275986e-05, "loss": 0.5419, "step": 21806 }, { "epoch": 3.559813885147545, "grad_norm": 2.012939691543579, "learning_rate": 1.7898242552630945e-05, "loss": 0.5008, "step": 21807 }, { "epoch": 3.559977143790049, "grad_norm": 2.2351670265197754, "learning_rate": 1.7898047918032376e-05, "loss": 0.5318, "step": 21808 }, { "epoch": 3.5601404024325536, "grad_norm": 2.150667428970337, "learning_rate": 1.7897853275480476e-05, "loss": 0.5227, "step": 21809 }, { "epoch": 3.560303661075058, "grad_norm": 1.770318865776062, "learning_rate": 1.789765862497544e-05, "loss": 0.4273, "step": 21810 }, { "epoch": 3.5604669197175625, "grad_norm": 1.7943822145462036, "learning_rate": 1.789746396651746e-05, "loss": 0.474, "step": 21811 }, { "epoch": 3.560630178360067, "grad_norm": 2.685622453689575, "learning_rate": 1.789726930010674e-05, "loss": 0.5462, "step": 21812 }, { "epoch": 3.5607934370025713, "grad_norm": 2.206141471862793, "learning_rate": 1.789707462574347e-05, "loss": 0.574, "step": 21813 }, { "epoch": 3.5609566956450758, "grad_norm": 1.8622727394104004, "learning_rate": 1.789687994342785e-05, "loss": 0.4504, "step": 21814 }, { "epoch": 3.56111995428758, "grad_norm": 1.8577104806900024, "learning_rate": 1.7896685253160072e-05, "loss": 0.4616, "step": 21815 }, { "epoch": 3.5612832129300847, "grad_norm": 2.570255994796753, "learning_rate": 1.7896490554940334e-05, "loss": 0.5651, "step": 21816 }, { "epoch": 3.561446471572589, "grad_norm": 1.8978209495544434, "learning_rate": 1.789629584876883e-05, "loss": 0.444, "step": 21817 }, { "epoch": 3.5616097302150935, "grad_norm": 2.1560051441192627, "learning_rate": 1.7896101134645762e-05, "loss": 0.6093, "step": 21818 }, { "epoch": 3.5617729888575975, "grad_norm": 1.5103833675384521, "learning_rate": 1.789590641257132e-05, "loss": 0.3874, "step": 21819 }, { "epoch": 3.561936247500102, "grad_norm": 1.976035237312317, "learning_rate": 1.7895711682545704e-05, "loss": 0.4608, "step": 21820 }, { "epoch": 3.5620995061426064, "grad_norm": 1.9082832336425781, "learning_rate": 1.789551694456911e-05, "loss": 0.4821, "step": 21821 }, { "epoch": 3.562262764785111, "grad_norm": 1.777070164680481, "learning_rate": 1.7895322198641726e-05, "loss": 0.4579, "step": 21822 }, { "epoch": 3.5624260234276153, "grad_norm": 2.0965476036071777, "learning_rate": 1.789512744476376e-05, "loss": 0.5242, "step": 21823 }, { "epoch": 3.5625892820701197, "grad_norm": 1.9089587926864624, "learning_rate": 1.7894932682935402e-05, "loss": 0.4402, "step": 21824 }, { "epoch": 3.5627525407126237, "grad_norm": 1.9371379613876343, "learning_rate": 1.7894737913156847e-05, "loss": 0.5137, "step": 21825 }, { "epoch": 3.562915799355128, "grad_norm": 1.842288613319397, "learning_rate": 1.7894543135428297e-05, "loss": 0.4866, "step": 21826 }, { "epoch": 3.5630790579976326, "grad_norm": 1.937748908996582, "learning_rate": 1.789434834974994e-05, "loss": 0.4705, "step": 21827 }, { "epoch": 3.563242316640137, "grad_norm": 1.686930775642395, "learning_rate": 1.789415355612198e-05, "loss": 0.4165, "step": 21828 }, { "epoch": 3.5634055752826415, "grad_norm": 2.349452495574951, "learning_rate": 1.7893958754544607e-05, "loss": 0.5502, "step": 21829 }, { "epoch": 3.563568833925146, "grad_norm": 2.1617016792297363, "learning_rate": 1.789376394501802e-05, "loss": 0.5081, "step": 21830 }, { "epoch": 3.5637320925676503, "grad_norm": 2.03582763671875, "learning_rate": 1.7893569127542412e-05, "loss": 0.5334, "step": 21831 }, { "epoch": 3.5638953512101548, "grad_norm": 1.5998218059539795, "learning_rate": 1.7893374302117986e-05, "loss": 0.4301, "step": 21832 }, { "epoch": 3.564058609852659, "grad_norm": 2.423078775405884, "learning_rate": 1.7893179468744936e-05, "loss": 0.5291, "step": 21833 }, { "epoch": 3.5642218684951636, "grad_norm": 1.9467145204544067, "learning_rate": 1.789298462742345e-05, "loss": 0.4513, "step": 21834 }, { "epoch": 3.564385127137668, "grad_norm": 1.707524299621582, "learning_rate": 1.7892789778153735e-05, "loss": 0.4357, "step": 21835 }, { "epoch": 3.564548385780172, "grad_norm": 2.027599334716797, "learning_rate": 1.7892594920935985e-05, "loss": 0.4532, "step": 21836 }, { "epoch": 3.5647116444226765, "grad_norm": 2.192307233810425, "learning_rate": 1.789240005577039e-05, "loss": 0.5131, "step": 21837 }, { "epoch": 3.564874903065181, "grad_norm": 1.9819724559783936, "learning_rate": 1.7892205182657148e-05, "loss": 0.5017, "step": 21838 }, { "epoch": 3.5650381617076854, "grad_norm": 1.7695872783660889, "learning_rate": 1.7892010301596464e-05, "loss": 0.4067, "step": 21839 }, { "epoch": 3.56520142035019, "grad_norm": 1.8648720979690552, "learning_rate": 1.7891815412588524e-05, "loss": 0.4683, "step": 21840 }, { "epoch": 3.5653646789926943, "grad_norm": 1.664868950843811, "learning_rate": 1.7891620515633524e-05, "loss": 0.3888, "step": 21841 }, { "epoch": 3.5655279376351987, "grad_norm": 2.1154863834381104, "learning_rate": 1.789142561073167e-05, "loss": 0.4819, "step": 21842 }, { "epoch": 3.5656911962777027, "grad_norm": 1.8787294626235962, "learning_rate": 1.7891230697883152e-05, "loss": 0.3863, "step": 21843 }, { "epoch": 3.565854454920207, "grad_norm": 1.8323928117752075, "learning_rate": 1.7891035777088163e-05, "loss": 0.4813, "step": 21844 }, { "epoch": 3.5660177135627116, "grad_norm": 1.8686903715133667, "learning_rate": 1.789084084834691e-05, "loss": 0.521, "step": 21845 }, { "epoch": 3.566180972205216, "grad_norm": 1.9573335647583008, "learning_rate": 1.7890645911659577e-05, "loss": 0.4518, "step": 21846 }, { "epoch": 3.5663442308477205, "grad_norm": 2.1004364490509033, "learning_rate": 1.7890450967026366e-05, "loss": 0.4553, "step": 21847 }, { "epoch": 3.566507489490225, "grad_norm": 2.59836745262146, "learning_rate": 1.7890256014447475e-05, "loss": 0.5981, "step": 21848 }, { "epoch": 3.5666707481327293, "grad_norm": 2.919497489929199, "learning_rate": 1.7890061053923097e-05, "loss": 0.4993, "step": 21849 }, { "epoch": 3.5668340067752338, "grad_norm": 2.1575231552124023, "learning_rate": 1.788986608545343e-05, "loss": 0.503, "step": 21850 }, { "epoch": 3.566997265417738, "grad_norm": 2.232895851135254, "learning_rate": 1.788967110903867e-05, "loss": 0.5445, "step": 21851 }, { "epoch": 3.5671605240602426, "grad_norm": 1.8924622535705566, "learning_rate": 1.7889476124679014e-05, "loss": 0.4485, "step": 21852 }, { "epoch": 3.567323782702747, "grad_norm": 1.8439452648162842, "learning_rate": 1.788928113237465e-05, "loss": 0.48, "step": 21853 }, { "epoch": 3.567487041345251, "grad_norm": 2.121872901916504, "learning_rate": 1.7889086132125792e-05, "loss": 0.4942, "step": 21854 }, { "epoch": 3.5676502999877555, "grad_norm": 1.8981105089187622, "learning_rate": 1.7888891123932622e-05, "loss": 0.4577, "step": 21855 }, { "epoch": 3.56781355863026, "grad_norm": 1.8151183128356934, "learning_rate": 1.7888696107795343e-05, "loss": 0.3827, "step": 21856 }, { "epoch": 3.5679768172727644, "grad_norm": 1.980865240097046, "learning_rate": 1.7888501083714146e-05, "loss": 0.5436, "step": 21857 }, { "epoch": 3.568140075915269, "grad_norm": 2.2292392253875732, "learning_rate": 1.7888306051689236e-05, "loss": 0.4906, "step": 21858 }, { "epoch": 3.5683033345577733, "grad_norm": 1.9866108894348145, "learning_rate": 1.7888111011720797e-05, "loss": 0.4832, "step": 21859 }, { "epoch": 3.5684665932002777, "grad_norm": 2.0010173320770264, "learning_rate": 1.7887915963809036e-05, "loss": 0.4351, "step": 21860 }, { "epoch": 3.5686298518427817, "grad_norm": 1.8890204429626465, "learning_rate": 1.7887720907954145e-05, "loss": 0.4404, "step": 21861 }, { "epoch": 3.568793110485286, "grad_norm": 1.877220630645752, "learning_rate": 1.788752584415632e-05, "loss": 0.4611, "step": 21862 }, { "epoch": 3.5689563691277906, "grad_norm": 1.7451977729797363, "learning_rate": 1.788733077241576e-05, "loss": 0.4968, "step": 21863 }, { "epoch": 3.569119627770295, "grad_norm": 2.042057991027832, "learning_rate": 1.788713569273266e-05, "loss": 0.4634, "step": 21864 }, { "epoch": 3.5692828864127994, "grad_norm": 1.8852342367172241, "learning_rate": 1.7886940605107218e-05, "loss": 0.4067, "step": 21865 }, { "epoch": 3.569446145055304, "grad_norm": 1.892330288887024, "learning_rate": 1.7886745509539628e-05, "loss": 0.4385, "step": 21866 }, { "epoch": 3.5696094036978083, "grad_norm": 1.9815340042114258, "learning_rate": 1.7886550406030084e-05, "loss": 0.4493, "step": 21867 }, { "epoch": 3.5697726623403128, "grad_norm": 1.6243996620178223, "learning_rate": 1.788635529457879e-05, "loss": 0.3522, "step": 21868 }, { "epoch": 3.569935920982817, "grad_norm": 1.7499668598175049, "learning_rate": 1.7886160175185935e-05, "loss": 0.4039, "step": 21869 }, { "epoch": 3.5700991796253216, "grad_norm": 1.5884125232696533, "learning_rate": 1.7885965047851722e-05, "loss": 0.4416, "step": 21870 }, { "epoch": 3.570262438267826, "grad_norm": 1.6384893655776978, "learning_rate": 1.7885769912576342e-05, "loss": 0.4293, "step": 21871 }, { "epoch": 3.57042569691033, "grad_norm": 2.0757851600646973, "learning_rate": 1.7885574769359998e-05, "loss": 0.4339, "step": 21872 }, { "epoch": 3.5705889555528345, "grad_norm": 1.6665027141571045, "learning_rate": 1.788537961820288e-05, "loss": 0.3964, "step": 21873 }, { "epoch": 3.570752214195339, "grad_norm": 2.264808416366577, "learning_rate": 1.7885184459105182e-05, "loss": 0.5632, "step": 21874 }, { "epoch": 3.5709154728378434, "grad_norm": 2.107412099838257, "learning_rate": 1.788498929206711e-05, "loss": 0.5596, "step": 21875 }, { "epoch": 3.571078731480348, "grad_norm": 1.825740098953247, "learning_rate": 1.7884794117088856e-05, "loss": 0.4141, "step": 21876 }, { "epoch": 3.5712419901228523, "grad_norm": 2.2766451835632324, "learning_rate": 1.7884598934170614e-05, "loss": 0.5639, "step": 21877 }, { "epoch": 3.5714052487653563, "grad_norm": 1.9639520645141602, "learning_rate": 1.7884403743312583e-05, "loss": 0.4961, "step": 21878 }, { "epoch": 3.5715685074078607, "grad_norm": 1.9645298719406128, "learning_rate": 1.7884208544514963e-05, "loss": 0.4925, "step": 21879 }, { "epoch": 3.571731766050365, "grad_norm": 1.836526870727539, "learning_rate": 1.7884013337777944e-05, "loss": 0.5053, "step": 21880 }, { "epoch": 3.5718950246928696, "grad_norm": 2.2814979553222656, "learning_rate": 1.7883818123101727e-05, "loss": 0.4692, "step": 21881 }, { "epoch": 3.572058283335374, "grad_norm": 1.8461856842041016, "learning_rate": 1.7883622900486506e-05, "loss": 0.4583, "step": 21882 }, { "epoch": 3.5722215419778784, "grad_norm": 2.1552135944366455, "learning_rate": 1.788342766993248e-05, "loss": 0.474, "step": 21883 }, { "epoch": 3.572384800620383, "grad_norm": 2.1552369594573975, "learning_rate": 1.7883232431439843e-05, "loss": 0.5266, "step": 21884 }, { "epoch": 3.5725480592628873, "grad_norm": 2.1935689449310303, "learning_rate": 1.7883037185008793e-05, "loss": 0.6067, "step": 21885 }, { "epoch": 3.5727113179053918, "grad_norm": 1.939322590827942, "learning_rate": 1.788284193063953e-05, "loss": 0.4514, "step": 21886 }, { "epoch": 3.572874576547896, "grad_norm": 2.0657079219818115, "learning_rate": 1.7882646668332244e-05, "loss": 0.4337, "step": 21887 }, { "epoch": 3.5730378351904006, "grad_norm": 2.7103140354156494, "learning_rate": 1.7882451398087138e-05, "loss": 0.5602, "step": 21888 }, { "epoch": 3.5732010938329046, "grad_norm": 1.7269357442855835, "learning_rate": 1.78822561199044e-05, "loss": 0.4644, "step": 21889 }, { "epoch": 3.573364352475409, "grad_norm": 1.8001269102096558, "learning_rate": 1.7882060833784237e-05, "loss": 0.4151, "step": 21890 }, { "epoch": 3.5735276111179135, "grad_norm": 1.992209553718567, "learning_rate": 1.788186553972684e-05, "loss": 0.4845, "step": 21891 }, { "epoch": 3.573690869760418, "grad_norm": 1.6378120183944702, "learning_rate": 1.7881670237732404e-05, "loss": 0.4096, "step": 21892 }, { "epoch": 3.5738541284029224, "grad_norm": 1.8799859285354614, "learning_rate": 1.7881474927801134e-05, "loss": 0.4123, "step": 21893 }, { "epoch": 3.574017387045427, "grad_norm": 1.676257610321045, "learning_rate": 1.7881279609933216e-05, "loss": 0.4057, "step": 21894 }, { "epoch": 3.5741806456879313, "grad_norm": 1.6666085720062256, "learning_rate": 1.788108428412885e-05, "loss": 0.406, "step": 21895 }, { "epoch": 3.5743439043304353, "grad_norm": 1.7100847959518433, "learning_rate": 1.7880888950388237e-05, "loss": 0.4399, "step": 21896 }, { "epoch": 3.5745071629729397, "grad_norm": 2.001616954803467, "learning_rate": 1.788069360871157e-05, "loss": 0.4405, "step": 21897 }, { "epoch": 3.574670421615444, "grad_norm": 1.6783391237258911, "learning_rate": 1.788049825909905e-05, "loss": 0.4569, "step": 21898 }, { "epoch": 3.5748336802579486, "grad_norm": 1.8999356031417847, "learning_rate": 1.788030290155087e-05, "loss": 0.3987, "step": 21899 }, { "epoch": 3.574996938900453, "grad_norm": 1.5542922019958496, "learning_rate": 1.788010753606722e-05, "loss": 0.3758, "step": 21900 }, { "epoch": 3.5751601975429574, "grad_norm": 1.755906581878662, "learning_rate": 1.787991216264831e-05, "loss": 0.3717, "step": 21901 }, { "epoch": 3.575323456185462, "grad_norm": 2.043030023574829, "learning_rate": 1.787971678129433e-05, "loss": 0.5434, "step": 21902 }, { "epoch": 3.5754867148279663, "grad_norm": 2.2840068340301514, "learning_rate": 1.7879521392005474e-05, "loss": 0.558, "step": 21903 }, { "epoch": 3.5756499734704708, "grad_norm": 2.3171422481536865, "learning_rate": 1.7879325994781943e-05, "loss": 0.4954, "step": 21904 }, { "epoch": 3.575813232112975, "grad_norm": 2.0707130432128906, "learning_rate": 1.7879130589623936e-05, "loss": 0.5345, "step": 21905 }, { "epoch": 3.5759764907554796, "grad_norm": 1.9905937910079956, "learning_rate": 1.7878935176531646e-05, "loss": 0.5119, "step": 21906 }, { "epoch": 3.5761397493979836, "grad_norm": 1.9951069355010986, "learning_rate": 1.7878739755505266e-05, "loss": 0.439, "step": 21907 }, { "epoch": 3.576303008040488, "grad_norm": 1.9053592681884766, "learning_rate": 1.7878544326545003e-05, "loss": 0.4903, "step": 21908 }, { "epoch": 3.5764662666829925, "grad_norm": 2.0357470512390137, "learning_rate": 1.7878348889651043e-05, "loss": 0.4617, "step": 21909 }, { "epoch": 3.576629525325497, "grad_norm": 2.21885085105896, "learning_rate": 1.787815344482359e-05, "loss": 0.6372, "step": 21910 }, { "epoch": 3.5767927839680014, "grad_norm": 1.877608060836792, "learning_rate": 1.787795799206284e-05, "loss": 0.4118, "step": 21911 }, { "epoch": 3.576956042610506, "grad_norm": 1.837758183479309, "learning_rate": 1.787776253136899e-05, "loss": 0.3927, "step": 21912 }, { "epoch": 3.57711930125301, "grad_norm": 1.6679167747497559, "learning_rate": 1.787756706274223e-05, "loss": 0.4456, "step": 21913 }, { "epoch": 3.5772825598955142, "grad_norm": 1.8095427751541138, "learning_rate": 1.7877371586182764e-05, "loss": 0.4842, "step": 21914 }, { "epoch": 3.5774458185380187, "grad_norm": 1.6888880729675293, "learning_rate": 1.7877176101690788e-05, "loss": 0.4199, "step": 21915 }, { "epoch": 3.577609077180523, "grad_norm": 1.9366626739501953, "learning_rate": 1.7876980609266493e-05, "loss": 0.4424, "step": 21916 }, { "epoch": 3.5777723358230276, "grad_norm": 2.172219753265381, "learning_rate": 1.7876785108910087e-05, "loss": 0.5308, "step": 21917 }, { "epoch": 3.577935594465532, "grad_norm": 1.9644886255264282, "learning_rate": 1.7876589600621757e-05, "loss": 0.503, "step": 21918 }, { "epoch": 3.5780988531080364, "grad_norm": 2.2235496044158936, "learning_rate": 1.7876394084401704e-05, "loss": 0.5668, "step": 21919 }, { "epoch": 3.578262111750541, "grad_norm": 1.6160476207733154, "learning_rate": 1.7876198560250127e-05, "loss": 0.4469, "step": 21920 }, { "epoch": 3.5784253703930453, "grad_norm": 1.6632403135299683, "learning_rate": 1.7876003028167218e-05, "loss": 0.4427, "step": 21921 }, { "epoch": 3.5785886290355498, "grad_norm": 2.1309149265289307, "learning_rate": 1.7875807488153173e-05, "loss": 0.493, "step": 21922 }, { "epoch": 3.578751887678054, "grad_norm": 2.063325881958008, "learning_rate": 1.7875611940208197e-05, "loss": 0.5302, "step": 21923 }, { "epoch": 3.578915146320558, "grad_norm": 1.86729097366333, "learning_rate": 1.7875416384332478e-05, "loss": 0.5042, "step": 21924 }, { "epoch": 3.5790784049630626, "grad_norm": 1.755597710609436, "learning_rate": 1.787522082052622e-05, "loss": 0.4285, "step": 21925 }, { "epoch": 3.579241663605567, "grad_norm": 2.2097668647766113, "learning_rate": 1.7875025248789612e-05, "loss": 0.5006, "step": 21926 }, { "epoch": 3.5794049222480715, "grad_norm": 2.170842170715332, "learning_rate": 1.787482966912286e-05, "loss": 0.4987, "step": 21927 }, { "epoch": 3.579568180890576, "grad_norm": 2.0937066078186035, "learning_rate": 1.7874634081526155e-05, "loss": 0.5259, "step": 21928 }, { "epoch": 3.5797314395330804, "grad_norm": 2.074016809463501, "learning_rate": 1.7874438485999695e-05, "loss": 0.5493, "step": 21929 }, { "epoch": 3.579894698175585, "grad_norm": 1.7127869129180908, "learning_rate": 1.787424288254368e-05, "loss": 0.4656, "step": 21930 }, { "epoch": 3.580057956818089, "grad_norm": 1.9170527458190918, "learning_rate": 1.7874047271158303e-05, "loss": 0.4569, "step": 21931 }, { "epoch": 3.5802212154605932, "grad_norm": 2.057943344116211, "learning_rate": 1.787385165184376e-05, "loss": 0.4825, "step": 21932 }, { "epoch": 3.5803844741030977, "grad_norm": 1.8369253873825073, "learning_rate": 1.7873656024600254e-05, "loss": 0.4584, "step": 21933 }, { "epoch": 3.580547732745602, "grad_norm": 2.1961116790771484, "learning_rate": 1.787346038942798e-05, "loss": 0.4766, "step": 21934 }, { "epoch": 3.5807109913881066, "grad_norm": 2.202058792114258, "learning_rate": 1.787326474632713e-05, "loss": 0.4895, "step": 21935 }, { "epoch": 3.580874250030611, "grad_norm": 1.874743938446045, "learning_rate": 1.7873069095297903e-05, "loss": 0.4552, "step": 21936 }, { "epoch": 3.5810375086731154, "grad_norm": 1.8474429845809937, "learning_rate": 1.7872873436340504e-05, "loss": 0.4795, "step": 21937 }, { "epoch": 3.58120076731562, "grad_norm": 1.9237874746322632, "learning_rate": 1.7872677769455117e-05, "loss": 0.4904, "step": 21938 }, { "epoch": 3.5813640259581243, "grad_norm": 2.1012330055236816, "learning_rate": 1.787248209464195e-05, "loss": 0.5151, "step": 21939 }, { "epoch": 3.5815272846006287, "grad_norm": 1.9552347660064697, "learning_rate": 1.7872286411901192e-05, "loss": 0.4952, "step": 21940 }, { "epoch": 3.581690543243133, "grad_norm": 1.781921148300171, "learning_rate": 1.7872090721233048e-05, "loss": 0.44, "step": 21941 }, { "epoch": 3.581853801885637, "grad_norm": 1.6887034177780151, "learning_rate": 1.7871895022637705e-05, "loss": 0.4611, "step": 21942 }, { "epoch": 3.5820170605281416, "grad_norm": 2.0518107414245605, "learning_rate": 1.787169931611537e-05, "loss": 0.5357, "step": 21943 }, { "epoch": 3.582180319170646, "grad_norm": 1.7392672300338745, "learning_rate": 1.7871503601666233e-05, "loss": 0.525, "step": 21944 }, { "epoch": 3.5823435778131505, "grad_norm": 1.809040904045105, "learning_rate": 1.7871307879290495e-05, "loss": 0.4111, "step": 21945 }, { "epoch": 3.582506836455655, "grad_norm": 2.2967939376831055, "learning_rate": 1.7871112148988354e-05, "loss": 0.4626, "step": 21946 }, { "epoch": 3.5826700950981594, "grad_norm": 2.119206428527832, "learning_rate": 1.787091641076e-05, "loss": 0.4915, "step": 21947 }, { "epoch": 3.582833353740664, "grad_norm": 1.6280481815338135, "learning_rate": 1.787072066460564e-05, "loss": 0.3816, "step": 21948 }, { "epoch": 3.582996612383168, "grad_norm": 2.4555723667144775, "learning_rate": 1.787052491052547e-05, "loss": 0.5501, "step": 21949 }, { "epoch": 3.5831598710256722, "grad_norm": 1.6049343347549438, "learning_rate": 1.7870329148519675e-05, "loss": 0.41, "step": 21950 }, { "epoch": 3.5833231296681767, "grad_norm": 1.8840042352676392, "learning_rate": 1.7870133378588468e-05, "loss": 0.468, "step": 21951 }, { "epoch": 3.583486388310681, "grad_norm": 1.867613673210144, "learning_rate": 1.7869937600732034e-05, "loss": 0.4822, "step": 21952 }, { "epoch": 3.5836496469531856, "grad_norm": 2.506969928741455, "learning_rate": 1.7869741814950576e-05, "loss": 0.5632, "step": 21953 }, { "epoch": 3.58381290559569, "grad_norm": 2.0794849395751953, "learning_rate": 1.786954602124429e-05, "loss": 0.4388, "step": 21954 }, { "epoch": 3.5839761642381944, "grad_norm": 2.1662180423736572, "learning_rate": 1.7869350219613375e-05, "loss": 0.5382, "step": 21955 }, { "epoch": 3.584139422880699, "grad_norm": 1.911665678024292, "learning_rate": 1.7869154410058024e-05, "loss": 0.5181, "step": 21956 }, { "epoch": 3.5843026815232033, "grad_norm": 2.2791547775268555, "learning_rate": 1.786895859257844e-05, "loss": 0.4961, "step": 21957 }, { "epoch": 3.5844659401657077, "grad_norm": 2.0318894386291504, "learning_rate": 1.7868762767174813e-05, "loss": 0.4851, "step": 21958 }, { "epoch": 3.584629198808212, "grad_norm": 1.6760984659194946, "learning_rate": 1.7868566933847344e-05, "loss": 0.4746, "step": 21959 }, { "epoch": 3.584792457450716, "grad_norm": 2.365039825439453, "learning_rate": 1.7868371092596233e-05, "loss": 0.5885, "step": 21960 }, { "epoch": 3.5849557160932206, "grad_norm": 1.937918782234192, "learning_rate": 1.786817524342167e-05, "loss": 0.4531, "step": 21961 }, { "epoch": 3.585118974735725, "grad_norm": 1.6597603559494019, "learning_rate": 1.786797938632386e-05, "loss": 0.3623, "step": 21962 }, { "epoch": 3.5852822333782295, "grad_norm": 2.1196014881134033, "learning_rate": 1.7867783521303e-05, "loss": 0.4829, "step": 21963 }, { "epoch": 3.585445492020734, "grad_norm": 1.9540287256240845, "learning_rate": 1.7867587648359278e-05, "loss": 0.4592, "step": 21964 }, { "epoch": 3.5856087506632384, "grad_norm": 2.3246374130249023, "learning_rate": 1.78673917674929e-05, "loss": 0.4783, "step": 21965 }, { "epoch": 3.5857720093057424, "grad_norm": 1.8145066499710083, "learning_rate": 1.7867195878704062e-05, "loss": 0.3899, "step": 21966 }, { "epoch": 3.585935267948247, "grad_norm": 1.8273468017578125, "learning_rate": 1.7866999981992957e-05, "loss": 0.4864, "step": 21967 }, { "epoch": 3.5860985265907512, "grad_norm": 1.577427864074707, "learning_rate": 1.7866804077359787e-05, "loss": 0.3926, "step": 21968 }, { "epoch": 3.5862617852332557, "grad_norm": 1.6128255128860474, "learning_rate": 1.786660816480475e-05, "loss": 0.411, "step": 21969 }, { "epoch": 3.58642504387576, "grad_norm": 2.3314104080200195, "learning_rate": 1.7866412244328034e-05, "loss": 0.5685, "step": 21970 }, { "epoch": 3.5865883025182645, "grad_norm": 1.677990198135376, "learning_rate": 1.7866216315929844e-05, "loss": 0.4395, "step": 21971 }, { "epoch": 3.586751561160769, "grad_norm": 1.8138025999069214, "learning_rate": 1.786602037961038e-05, "loss": 0.4286, "step": 21972 }, { "epoch": 3.5869148198032734, "grad_norm": 2.1595637798309326, "learning_rate": 1.7865824435369836e-05, "loss": 0.5799, "step": 21973 }, { "epoch": 3.587078078445778, "grad_norm": 1.8805086612701416, "learning_rate": 1.7865628483208407e-05, "loss": 0.4834, "step": 21974 }, { "epoch": 3.5872413370882823, "grad_norm": 2.220792055130005, "learning_rate": 1.7865432523126292e-05, "loss": 0.5379, "step": 21975 }, { "epoch": 3.5874045957307867, "grad_norm": 1.7504358291625977, "learning_rate": 1.7865236555123687e-05, "loss": 0.507, "step": 21976 }, { "epoch": 3.5875678543732907, "grad_norm": 2.029754877090454, "learning_rate": 1.7865040579200793e-05, "loss": 0.5585, "step": 21977 }, { "epoch": 3.587731113015795, "grad_norm": 1.897865653038025, "learning_rate": 1.7864844595357808e-05, "loss": 0.4356, "step": 21978 }, { "epoch": 3.5878943716582996, "grad_norm": 2.0741615295410156, "learning_rate": 1.7864648603594922e-05, "loss": 0.5805, "step": 21979 }, { "epoch": 3.588057630300804, "grad_norm": 1.8194315433502197, "learning_rate": 1.786445260391234e-05, "loss": 0.414, "step": 21980 }, { "epoch": 3.5882208889433085, "grad_norm": 1.712816834449768, "learning_rate": 1.7864256596310254e-05, "loss": 0.4191, "step": 21981 }, { "epoch": 3.588384147585813, "grad_norm": 1.705500602722168, "learning_rate": 1.7864060580788863e-05, "loss": 0.4222, "step": 21982 }, { "epoch": 3.5885474062283174, "grad_norm": 1.805416464805603, "learning_rate": 1.786386455734837e-05, "loss": 0.5118, "step": 21983 }, { "epoch": 3.5887106648708214, "grad_norm": 2.2393100261688232, "learning_rate": 1.7863668525988963e-05, "loss": 0.5537, "step": 21984 }, { "epoch": 3.588873923513326, "grad_norm": 2.1463422775268555, "learning_rate": 1.7863472486710843e-05, "loss": 0.4762, "step": 21985 }, { "epoch": 3.5890371821558302, "grad_norm": 1.6486272811889648, "learning_rate": 1.786327643951421e-05, "loss": 0.431, "step": 21986 }, { "epoch": 3.5892004407983347, "grad_norm": 1.8784397840499878, "learning_rate": 1.786308038439926e-05, "loss": 0.4348, "step": 21987 }, { "epoch": 3.589363699440839, "grad_norm": 1.8369419574737549, "learning_rate": 1.786288432136619e-05, "loss": 0.4446, "step": 21988 }, { "epoch": 3.5895269580833435, "grad_norm": 1.8326104879379272, "learning_rate": 1.78626882504152e-05, "loss": 0.4397, "step": 21989 }, { "epoch": 3.589690216725848, "grad_norm": 2.123246908187866, "learning_rate": 1.7862492171546478e-05, "loss": 0.5025, "step": 21990 }, { "epoch": 3.5898534753683524, "grad_norm": 1.9270087480545044, "learning_rate": 1.7862296084760235e-05, "loss": 0.4683, "step": 21991 }, { "epoch": 3.590016734010857, "grad_norm": 1.8469078540802002, "learning_rate": 1.786209999005666e-05, "loss": 0.4982, "step": 21992 }, { "epoch": 3.5901799926533613, "grad_norm": 1.6273964643478394, "learning_rate": 1.7861903887435952e-05, "loss": 0.4068, "step": 21993 }, { "epoch": 3.5903432512958657, "grad_norm": 2.567214012145996, "learning_rate": 1.7861707776898312e-05, "loss": 0.5165, "step": 21994 }, { "epoch": 3.5905065099383697, "grad_norm": 2.0252280235290527, "learning_rate": 1.786151165844393e-05, "loss": 0.4752, "step": 21995 }, { "epoch": 3.590669768580874, "grad_norm": 1.9283732175827026, "learning_rate": 1.786131553207301e-05, "loss": 0.4675, "step": 21996 }, { "epoch": 3.5908330272233786, "grad_norm": 2.2514729499816895, "learning_rate": 1.7861119397785747e-05, "loss": 0.5399, "step": 21997 }, { "epoch": 3.590996285865883, "grad_norm": 2.0396018028259277, "learning_rate": 1.786092325558234e-05, "loss": 0.4513, "step": 21998 }, { "epoch": 3.5911595445083875, "grad_norm": 2.610347270965576, "learning_rate": 1.7860727105462982e-05, "loss": 0.5731, "step": 21999 }, { "epoch": 3.591322803150892, "grad_norm": 2.050309658050537, "learning_rate": 1.7860530947427878e-05, "loss": 0.4945, "step": 22000 }, { "epoch": 3.591486061793396, "grad_norm": 1.7506142854690552, "learning_rate": 1.7860334781477217e-05, "loss": 0.414, "step": 22001 }, { "epoch": 3.5916493204359003, "grad_norm": 2.122093439102173, "learning_rate": 1.7860138607611203e-05, "loss": 0.5221, "step": 22002 }, { "epoch": 3.591812579078405, "grad_norm": 1.7674121856689453, "learning_rate": 1.7859942425830032e-05, "loss": 0.4704, "step": 22003 }, { "epoch": 3.5919758377209092, "grad_norm": 1.7330156564712524, "learning_rate": 1.78597462361339e-05, "loss": 0.4793, "step": 22004 }, { "epoch": 3.5921390963634137, "grad_norm": 1.9280768632888794, "learning_rate": 1.785955003852301e-05, "loss": 0.5163, "step": 22005 }, { "epoch": 3.592302355005918, "grad_norm": 2.0397238731384277, "learning_rate": 1.785935383299755e-05, "loss": 0.539, "step": 22006 }, { "epoch": 3.5924656136484225, "grad_norm": 1.893534541130066, "learning_rate": 1.7859157619557725e-05, "loss": 0.5751, "step": 22007 }, { "epoch": 3.592628872290927, "grad_norm": 1.9998337030410767, "learning_rate": 1.785896139820373e-05, "loss": 0.4515, "step": 22008 }, { "epoch": 3.5927921309334314, "grad_norm": 1.606572151184082, "learning_rate": 1.785876516893576e-05, "loss": 0.4443, "step": 22009 }, { "epoch": 3.592955389575936, "grad_norm": 1.5289106369018555, "learning_rate": 1.785856893175402e-05, "loss": 0.4614, "step": 22010 }, { "epoch": 3.5931186482184403, "grad_norm": 1.744092583656311, "learning_rate": 1.78583726866587e-05, "loss": 0.3826, "step": 22011 }, { "epoch": 3.5932819068609443, "grad_norm": 2.181769371032715, "learning_rate": 1.7858176433650002e-05, "loss": 0.5026, "step": 22012 }, { "epoch": 3.5934451655034487, "grad_norm": 1.8024682998657227, "learning_rate": 1.7857980172728124e-05, "loss": 0.4322, "step": 22013 }, { "epoch": 3.593608424145953, "grad_norm": 1.6423038244247437, "learning_rate": 1.785778390389326e-05, "loss": 0.3901, "step": 22014 }, { "epoch": 3.5937716827884576, "grad_norm": 1.883238673210144, "learning_rate": 1.7857587627145614e-05, "loss": 0.4562, "step": 22015 }, { "epoch": 3.593934941430962, "grad_norm": 3.5575013160705566, "learning_rate": 1.7857391342485373e-05, "loss": 0.6973, "step": 22016 }, { "epoch": 3.5940982000734665, "grad_norm": 1.679741382598877, "learning_rate": 1.7857195049912747e-05, "loss": 0.4695, "step": 22017 }, { "epoch": 3.594261458715971, "grad_norm": 2.076328992843628, "learning_rate": 1.7856998749427924e-05, "loss": 0.5654, "step": 22018 }, { "epoch": 3.594424717358475, "grad_norm": 1.8652887344360352, "learning_rate": 1.7856802441031106e-05, "loss": 0.4588, "step": 22019 }, { "epoch": 3.5945879760009793, "grad_norm": 1.7397220134735107, "learning_rate": 1.785660612472249e-05, "loss": 0.4331, "step": 22020 }, { "epoch": 3.594751234643484, "grad_norm": 1.9583208560943604, "learning_rate": 1.7856409800502272e-05, "loss": 0.4825, "step": 22021 }, { "epoch": 3.594914493285988, "grad_norm": 2.018350124359131, "learning_rate": 1.7856213468370652e-05, "loss": 0.491, "step": 22022 }, { "epoch": 3.5950777519284927, "grad_norm": 1.7894577980041504, "learning_rate": 1.785601712832783e-05, "loss": 0.4777, "step": 22023 }, { "epoch": 3.595241010570997, "grad_norm": 2.092780351638794, "learning_rate": 1.7855820780374e-05, "loss": 0.5027, "step": 22024 }, { "epoch": 3.5954042692135015, "grad_norm": 2.030932903289795, "learning_rate": 1.7855624424509357e-05, "loss": 0.5569, "step": 22025 }, { "epoch": 3.595567527856006, "grad_norm": 1.8963302373886108, "learning_rate": 1.7855428060734104e-05, "loss": 0.4807, "step": 22026 }, { "epoch": 3.5957307864985104, "grad_norm": 2.0286102294921875, "learning_rate": 1.785523168904844e-05, "loss": 0.4472, "step": 22027 }, { "epoch": 3.595894045141015, "grad_norm": 2.1596531867980957, "learning_rate": 1.7855035309452557e-05, "loss": 0.4495, "step": 22028 }, { "epoch": 3.5960573037835193, "grad_norm": 2.5360872745513916, "learning_rate": 1.7854838921946656e-05, "loss": 0.5891, "step": 22029 }, { "epoch": 3.5962205624260233, "grad_norm": 1.9399665594100952, "learning_rate": 1.7854642526530935e-05, "loss": 0.4661, "step": 22030 }, { "epoch": 3.5963838210685277, "grad_norm": 1.8540287017822266, "learning_rate": 1.785444612320559e-05, "loss": 0.4143, "step": 22031 }, { "epoch": 3.596547079711032, "grad_norm": 2.0524580478668213, "learning_rate": 1.785424971197082e-05, "loss": 0.5199, "step": 22032 }, { "epoch": 3.5967103383535366, "grad_norm": 1.9789398908615112, "learning_rate": 1.7854053292826822e-05, "loss": 0.5037, "step": 22033 }, { "epoch": 3.596873596996041, "grad_norm": 2.1529672145843506, "learning_rate": 1.7853856865773797e-05, "loss": 0.4829, "step": 22034 }, { "epoch": 3.5970368556385455, "grad_norm": 1.5289849042892456, "learning_rate": 1.7853660430811937e-05, "loss": 0.4006, "step": 22035 }, { "epoch": 3.59720011428105, "grad_norm": 2.3152832984924316, "learning_rate": 1.7853463987941445e-05, "loss": 0.4843, "step": 22036 }, { "epoch": 3.597363372923554, "grad_norm": 2.1946394443511963, "learning_rate": 1.785326753716252e-05, "loss": 0.5241, "step": 22037 }, { "epoch": 3.5975266315660583, "grad_norm": 1.8342212438583374, "learning_rate": 1.785307107847535e-05, "loss": 0.4027, "step": 22038 }, { "epoch": 3.597689890208563, "grad_norm": 2.0708534717559814, "learning_rate": 1.7852874611880144e-05, "loss": 0.5129, "step": 22039 }, { "epoch": 3.597853148851067, "grad_norm": 1.8897058963775635, "learning_rate": 1.785267813737709e-05, "loss": 0.4244, "step": 22040 }, { "epoch": 3.5980164074935717, "grad_norm": 1.9650834798812866, "learning_rate": 1.7852481654966397e-05, "loss": 0.4629, "step": 22041 }, { "epoch": 3.598179666136076, "grad_norm": 1.4966697692871094, "learning_rate": 1.7852285164648257e-05, "loss": 0.3886, "step": 22042 }, { "epoch": 3.5983429247785805, "grad_norm": 2.144688367843628, "learning_rate": 1.7852088666422865e-05, "loss": 0.503, "step": 22043 }, { "epoch": 3.598506183421085, "grad_norm": 1.5675601959228516, "learning_rate": 1.7851892160290424e-05, "loss": 0.3992, "step": 22044 }, { "epoch": 3.5986694420635894, "grad_norm": 2.095331907272339, "learning_rate": 1.785169564625113e-05, "loss": 0.5021, "step": 22045 }, { "epoch": 3.598832700706094, "grad_norm": 1.7978808879852295, "learning_rate": 1.7851499124305176e-05, "loss": 0.4342, "step": 22046 }, { "epoch": 3.5989959593485983, "grad_norm": 2.0460994243621826, "learning_rate": 1.785130259445277e-05, "loss": 0.494, "step": 22047 }, { "epoch": 3.5991592179911023, "grad_norm": 2.1176719665527344, "learning_rate": 1.78511060566941e-05, "loss": 0.5097, "step": 22048 }, { "epoch": 3.5993224766336067, "grad_norm": 1.7713459730148315, "learning_rate": 1.785090951102937e-05, "loss": 0.4384, "step": 22049 }, { "epoch": 3.599485735276111, "grad_norm": 2.7088472843170166, "learning_rate": 1.7850712957458777e-05, "loss": 0.5602, "step": 22050 }, { "epoch": 3.5996489939186156, "grad_norm": 2.0783345699310303, "learning_rate": 1.7850516395982517e-05, "loss": 0.4862, "step": 22051 }, { "epoch": 3.59981225256112, "grad_norm": 2.0260002613067627, "learning_rate": 1.785031982660079e-05, "loss": 0.4937, "step": 22052 }, { "epoch": 3.5999755112036245, "grad_norm": 1.5685373544692993, "learning_rate": 1.7850123249313794e-05, "loss": 0.4104, "step": 22053 }, { "epoch": 3.6001387698461285, "grad_norm": 2.0836071968078613, "learning_rate": 1.7849926664121726e-05, "loss": 0.4758, "step": 22054 }, { "epoch": 3.600302028488633, "grad_norm": 1.733259677886963, "learning_rate": 1.784973007102478e-05, "loss": 0.4608, "step": 22055 }, { "epoch": 3.6004652871311373, "grad_norm": 1.978702425956726, "learning_rate": 1.7849533470023163e-05, "loss": 0.4832, "step": 22056 }, { "epoch": 3.6006285457736418, "grad_norm": 1.8370130062103271, "learning_rate": 1.7849336861117063e-05, "loss": 0.5083, "step": 22057 }, { "epoch": 3.600791804416146, "grad_norm": 1.939282774925232, "learning_rate": 1.7849140244306685e-05, "loss": 0.4159, "step": 22058 }, { "epoch": 3.6009550630586507, "grad_norm": 1.714128851890564, "learning_rate": 1.7848943619592226e-05, "loss": 0.3979, "step": 22059 }, { "epoch": 3.601118321701155, "grad_norm": 1.689745306968689, "learning_rate": 1.7848746986973883e-05, "loss": 0.425, "step": 22060 }, { "epoch": 3.6012815803436595, "grad_norm": 1.7488118410110474, "learning_rate": 1.7848550346451855e-05, "loss": 0.4456, "step": 22061 }, { "epoch": 3.601444838986164, "grad_norm": 1.9752912521362305, "learning_rate": 1.784835369802633e-05, "loss": 0.4847, "step": 22062 }, { "epoch": 3.6016080976286684, "grad_norm": 1.9434155225753784, "learning_rate": 1.7848157041697523e-05, "loss": 0.4504, "step": 22063 }, { "epoch": 3.601771356271173, "grad_norm": 1.835526943206787, "learning_rate": 1.7847960377465626e-05, "loss": 0.449, "step": 22064 }, { "epoch": 3.601934614913677, "grad_norm": 2.0322976112365723, "learning_rate": 1.784776370533083e-05, "loss": 0.5386, "step": 22065 }, { "epoch": 3.6020978735561813, "grad_norm": 1.6664210557937622, "learning_rate": 1.784756702529334e-05, "loss": 0.3806, "step": 22066 }, { "epoch": 3.6022611321986857, "grad_norm": 1.8364405632019043, "learning_rate": 1.7847370337353354e-05, "loss": 0.3918, "step": 22067 }, { "epoch": 3.60242439084119, "grad_norm": 1.9507808685302734, "learning_rate": 1.7847173641511065e-05, "loss": 0.4519, "step": 22068 }, { "epoch": 3.6025876494836946, "grad_norm": 2.0777316093444824, "learning_rate": 1.7846976937766674e-05, "loss": 0.4636, "step": 22069 }, { "epoch": 3.602750908126199, "grad_norm": 1.8532869815826416, "learning_rate": 1.784678022612038e-05, "loss": 0.4177, "step": 22070 }, { "epoch": 3.6029141667687035, "grad_norm": 1.9416067600250244, "learning_rate": 1.784658350657238e-05, "loss": 0.4954, "step": 22071 }, { "epoch": 3.6030774254112075, "grad_norm": 1.8262828588485718, "learning_rate": 1.7846386779122874e-05, "loss": 0.3963, "step": 22072 }, { "epoch": 3.603240684053712, "grad_norm": 1.9209303855895996, "learning_rate": 1.7846190043772062e-05, "loss": 0.524, "step": 22073 }, { "epoch": 3.6034039426962163, "grad_norm": 1.8653384447097778, "learning_rate": 1.784599330052013e-05, "loss": 0.5063, "step": 22074 }, { "epoch": 3.6035672013387208, "grad_norm": 2.240053415298462, "learning_rate": 1.7845796549367287e-05, "loss": 0.4936, "step": 22075 }, { "epoch": 3.603730459981225, "grad_norm": 1.5715417861938477, "learning_rate": 1.7845599790313735e-05, "loss": 0.3824, "step": 22076 }, { "epoch": 3.6038937186237296, "grad_norm": 1.8503203392028809, "learning_rate": 1.7845403023359663e-05, "loss": 0.3821, "step": 22077 }, { "epoch": 3.604056977266234, "grad_norm": 2.1915974617004395, "learning_rate": 1.784520624850527e-05, "loss": 0.516, "step": 22078 }, { "epoch": 3.6042202359087385, "grad_norm": 1.8449496030807495, "learning_rate": 1.7845009465750757e-05, "loss": 0.4602, "step": 22079 }, { "epoch": 3.604383494551243, "grad_norm": 1.9956400394439697, "learning_rate": 1.7844812675096323e-05, "loss": 0.4607, "step": 22080 }, { "epoch": 3.6045467531937474, "grad_norm": 2.148228645324707, "learning_rate": 1.7844615876542165e-05, "loss": 0.5186, "step": 22081 }, { "epoch": 3.604710011836252, "grad_norm": 2.082609176635742, "learning_rate": 1.784441907008848e-05, "loss": 0.4934, "step": 22082 }, { "epoch": 3.604873270478756, "grad_norm": 2.304389238357544, "learning_rate": 1.7844222255735467e-05, "loss": 0.473, "step": 22083 }, { "epoch": 3.6050365291212603, "grad_norm": 1.6587742567062378, "learning_rate": 1.7844025433483326e-05, "loss": 0.4126, "step": 22084 }, { "epoch": 3.6051997877637647, "grad_norm": 2.3180723190307617, "learning_rate": 1.784382860333225e-05, "loss": 0.43, "step": 22085 }, { "epoch": 3.605363046406269, "grad_norm": 1.8238945007324219, "learning_rate": 1.7843631765282444e-05, "loss": 0.4168, "step": 22086 }, { "epoch": 3.6055263050487736, "grad_norm": 2.1867661476135254, "learning_rate": 1.7843434919334103e-05, "loss": 0.6766, "step": 22087 }, { "epoch": 3.605689563691278, "grad_norm": 1.746467113494873, "learning_rate": 1.7843238065487423e-05, "loss": 0.3883, "step": 22088 }, { "epoch": 3.6058528223337825, "grad_norm": 1.778753399848938, "learning_rate": 1.7843041203742605e-05, "loss": 0.4663, "step": 22089 }, { "epoch": 3.6060160809762865, "grad_norm": 1.9210304021835327, "learning_rate": 1.7842844334099845e-05, "loss": 0.4103, "step": 22090 }, { "epoch": 3.606179339618791, "grad_norm": 1.8654370307922363, "learning_rate": 1.7842647456559347e-05, "loss": 0.4338, "step": 22091 }, { "epoch": 3.6063425982612953, "grad_norm": 1.7468037605285645, "learning_rate": 1.7842450571121303e-05, "loss": 0.4665, "step": 22092 }, { "epoch": 3.6065058569037998, "grad_norm": 2.3079447746276855, "learning_rate": 1.7842253677785913e-05, "loss": 0.5332, "step": 22093 }, { "epoch": 3.606669115546304, "grad_norm": 2.416902780532837, "learning_rate": 1.7842056776553375e-05, "loss": 0.5745, "step": 22094 }, { "epoch": 3.6068323741888086, "grad_norm": 1.966343641281128, "learning_rate": 1.7841859867423887e-05, "loss": 0.4578, "step": 22095 }, { "epoch": 3.606995632831313, "grad_norm": 2.7476131916046143, "learning_rate": 1.784166295039765e-05, "loss": 0.5277, "step": 22096 }, { "epoch": 3.6071588914738175, "grad_norm": 1.704429030418396, "learning_rate": 1.784146602547486e-05, "loss": 0.4329, "step": 22097 }, { "epoch": 3.607322150116322, "grad_norm": 1.7304866313934326, "learning_rate": 1.7841269092655714e-05, "loss": 0.4502, "step": 22098 }, { "epoch": 3.6074854087588264, "grad_norm": 2.238543748855591, "learning_rate": 1.7841072151940415e-05, "loss": 0.5821, "step": 22099 }, { "epoch": 3.607648667401331, "grad_norm": 2.2112176418304443, "learning_rate": 1.784087520332916e-05, "loss": 0.523, "step": 22100 }, { "epoch": 3.607811926043835, "grad_norm": 1.7333790063858032, "learning_rate": 1.784067824682214e-05, "loss": 0.4404, "step": 22101 }, { "epoch": 3.6079751846863393, "grad_norm": 2.297736644744873, "learning_rate": 1.7840481282419566e-05, "loss": 0.5421, "step": 22102 }, { "epoch": 3.6081384433288437, "grad_norm": 1.995438575744629, "learning_rate": 1.7840284310121624e-05, "loss": 0.421, "step": 22103 }, { "epoch": 3.608301701971348, "grad_norm": 1.8134878873825073, "learning_rate": 1.7840087329928518e-05, "loss": 0.445, "step": 22104 }, { "epoch": 3.6084649606138526, "grad_norm": 1.757948398590088, "learning_rate": 1.783989034184045e-05, "loss": 0.394, "step": 22105 }, { "epoch": 3.608628219256357, "grad_norm": 1.8883261680603027, "learning_rate": 1.7839693345857612e-05, "loss": 0.4483, "step": 22106 }, { "epoch": 3.608791477898861, "grad_norm": 1.9345953464508057, "learning_rate": 1.7839496341980205e-05, "loss": 0.5223, "step": 22107 }, { "epoch": 3.6089547365413654, "grad_norm": 1.8664345741271973, "learning_rate": 1.7839299330208426e-05, "loss": 0.4383, "step": 22108 }, { "epoch": 3.60911799518387, "grad_norm": 2.128328800201416, "learning_rate": 1.7839102310542477e-05, "loss": 0.5142, "step": 22109 }, { "epoch": 3.6092812538263743, "grad_norm": 2.186048746109009, "learning_rate": 1.7838905282982554e-05, "loss": 0.5596, "step": 22110 }, { "epoch": 3.6094445124688788, "grad_norm": 1.7043958902359009, "learning_rate": 1.7838708247528853e-05, "loss": 0.4209, "step": 22111 }, { "epoch": 3.609607771111383, "grad_norm": 1.5709689855575562, "learning_rate": 1.7838511204181577e-05, "loss": 0.3956, "step": 22112 }, { "epoch": 3.6097710297538876, "grad_norm": 2.3655128479003906, "learning_rate": 1.7838314152940923e-05, "loss": 0.6252, "step": 22113 }, { "epoch": 3.609934288396392, "grad_norm": 2.2516307830810547, "learning_rate": 1.7838117093807087e-05, "loss": 0.4895, "step": 22114 }, { "epoch": 3.6100975470388965, "grad_norm": 2.0226621627807617, "learning_rate": 1.783792002678027e-05, "loss": 0.5014, "step": 22115 }, { "epoch": 3.610260805681401, "grad_norm": 1.941792368888855, "learning_rate": 1.7837722951860668e-05, "loss": 0.4447, "step": 22116 }, { "epoch": 3.6104240643239054, "grad_norm": 1.9674570560455322, "learning_rate": 1.783752586904848e-05, "loss": 0.4783, "step": 22117 }, { "epoch": 3.6105873229664094, "grad_norm": 2.237872838973999, "learning_rate": 1.783732877834391e-05, "loss": 0.4829, "step": 22118 }, { "epoch": 3.610750581608914, "grad_norm": 1.7767207622528076, "learning_rate": 1.783713167974715e-05, "loss": 0.4344, "step": 22119 }, { "epoch": 3.6109138402514183, "grad_norm": 2.118683099746704, "learning_rate": 1.78369345732584e-05, "loss": 0.5402, "step": 22120 }, { "epoch": 3.6110770988939227, "grad_norm": 1.952714204788208, "learning_rate": 1.783673745887786e-05, "loss": 0.4294, "step": 22121 }, { "epoch": 3.611240357536427, "grad_norm": 2.3240389823913574, "learning_rate": 1.7836540336605727e-05, "loss": 0.5615, "step": 22122 }, { "epoch": 3.6114036161789316, "grad_norm": 2.0729119777679443, "learning_rate": 1.78363432064422e-05, "loss": 0.4915, "step": 22123 }, { "epoch": 3.611566874821436, "grad_norm": 2.4105117321014404, "learning_rate": 1.7836146068387474e-05, "loss": 0.5895, "step": 22124 }, { "epoch": 3.61173013346394, "grad_norm": 2.0153985023498535, "learning_rate": 1.7835948922441754e-05, "loss": 0.4327, "step": 22125 }, { "epoch": 3.6118933921064444, "grad_norm": 2.2740862369537354, "learning_rate": 1.7835751768605237e-05, "loss": 0.4797, "step": 22126 }, { "epoch": 3.612056650748949, "grad_norm": 1.8318463563919067, "learning_rate": 1.7835554606878117e-05, "loss": 0.4778, "step": 22127 }, { "epoch": 3.6122199093914533, "grad_norm": 2.1110849380493164, "learning_rate": 1.7835357437260597e-05, "loss": 0.5754, "step": 22128 }, { "epoch": 3.6123831680339578, "grad_norm": 1.7666465044021606, "learning_rate": 1.7835160259752874e-05, "loss": 0.4478, "step": 22129 }, { "epoch": 3.612546426676462, "grad_norm": 1.9890663623809814, "learning_rate": 1.783496307435515e-05, "loss": 0.4434, "step": 22130 }, { "epoch": 3.6127096853189666, "grad_norm": 1.637001633644104, "learning_rate": 1.7834765881067617e-05, "loss": 0.4293, "step": 22131 }, { "epoch": 3.612872943961471, "grad_norm": 2.089423418045044, "learning_rate": 1.7834568679890476e-05, "loss": 0.5261, "step": 22132 }, { "epoch": 3.6130362026039755, "grad_norm": 2.0863993167877197, "learning_rate": 1.783437147082393e-05, "loss": 0.5515, "step": 22133 }, { "epoch": 3.61319946124648, "grad_norm": 1.9463030099868774, "learning_rate": 1.783417425386817e-05, "loss": 0.5165, "step": 22134 }, { "epoch": 3.6133627198889844, "grad_norm": 2.1839258670806885, "learning_rate": 1.78339770290234e-05, "loss": 0.5465, "step": 22135 }, { "epoch": 3.6135259785314884, "grad_norm": 2.831221580505371, "learning_rate": 1.7833779796289817e-05, "loss": 0.5268, "step": 22136 }, { "epoch": 3.613689237173993, "grad_norm": 2.1998424530029297, "learning_rate": 1.783358255566762e-05, "loss": 0.5313, "step": 22137 }, { "epoch": 3.6138524958164973, "grad_norm": 1.9784510135650635, "learning_rate": 1.7833385307157006e-05, "loss": 0.5346, "step": 22138 }, { "epoch": 3.6140157544590017, "grad_norm": 1.7721282243728638, "learning_rate": 1.7833188050758176e-05, "loss": 0.4394, "step": 22139 }, { "epoch": 3.614179013101506, "grad_norm": 2.1749427318573, "learning_rate": 1.783299078647133e-05, "loss": 0.5245, "step": 22140 }, { "epoch": 3.6143422717440106, "grad_norm": 2.0090317726135254, "learning_rate": 1.783279351429666e-05, "loss": 0.4865, "step": 22141 }, { "epoch": 3.6145055303865146, "grad_norm": 1.8033640384674072, "learning_rate": 1.7832596234234376e-05, "loss": 0.4041, "step": 22142 }, { "epoch": 3.614668789029019, "grad_norm": 1.8352144956588745, "learning_rate": 1.7832398946284667e-05, "loss": 0.4668, "step": 22143 }, { "epoch": 3.6148320476715234, "grad_norm": 2.126009941101074, "learning_rate": 1.783220165044773e-05, "loss": 0.5247, "step": 22144 }, { "epoch": 3.614995306314028, "grad_norm": 1.8437715768814087, "learning_rate": 1.783200434672377e-05, "loss": 0.5007, "step": 22145 }, { "epoch": 3.6151585649565323, "grad_norm": 2.014897108078003, "learning_rate": 1.7831807035112985e-05, "loss": 0.5018, "step": 22146 }, { "epoch": 3.6153218235990368, "grad_norm": 2.2062606811523438, "learning_rate": 1.783160971561557e-05, "loss": 0.5033, "step": 22147 }, { "epoch": 3.615485082241541, "grad_norm": 2.222020387649536, "learning_rate": 1.783141238823173e-05, "loss": 0.4913, "step": 22148 }, { "epoch": 3.6156483408840456, "grad_norm": 1.9956907033920288, "learning_rate": 1.783121505296166e-05, "loss": 0.5266, "step": 22149 }, { "epoch": 3.61581159952655, "grad_norm": 2.015956401824951, "learning_rate": 1.7831017709805555e-05, "loss": 0.5662, "step": 22150 }, { "epoch": 3.6159748581690545, "grad_norm": 1.5607242584228516, "learning_rate": 1.7830820358763617e-05, "loss": 0.3779, "step": 22151 }, { "epoch": 3.616138116811559, "grad_norm": 1.9018778800964355, "learning_rate": 1.783062299983605e-05, "loss": 0.4425, "step": 22152 }, { "epoch": 3.616301375454063, "grad_norm": 1.9425938129425049, "learning_rate": 1.7830425633023042e-05, "loss": 0.4714, "step": 22153 }, { "epoch": 3.6164646340965674, "grad_norm": 2.083599805831909, "learning_rate": 1.78302282583248e-05, "loss": 0.4639, "step": 22154 }, { "epoch": 3.616627892739072, "grad_norm": 1.6850042343139648, "learning_rate": 1.783003087574152e-05, "loss": 0.436, "step": 22155 }, { "epoch": 3.6167911513815763, "grad_norm": 1.9779869318008423, "learning_rate": 1.7829833485273402e-05, "loss": 0.4993, "step": 22156 }, { "epoch": 3.6169544100240807, "grad_norm": 1.6692111492156982, "learning_rate": 1.7829636086920642e-05, "loss": 0.4452, "step": 22157 }, { "epoch": 3.617117668666585, "grad_norm": 2.125345230102539, "learning_rate": 1.782943868068344e-05, "loss": 0.5238, "step": 22158 }, { "epoch": 3.6172809273090896, "grad_norm": 2.2232186794281006, "learning_rate": 1.7829241266561998e-05, "loss": 0.5728, "step": 22159 }, { "epoch": 3.6174441859515936, "grad_norm": 2.0803756713867188, "learning_rate": 1.782904384455651e-05, "loss": 0.47, "step": 22160 }, { "epoch": 3.617607444594098, "grad_norm": 1.8626041412353516, "learning_rate": 1.782884641466718e-05, "loss": 0.4738, "step": 22161 }, { "epoch": 3.6177707032366024, "grad_norm": 1.9737060070037842, "learning_rate": 1.78286489768942e-05, "loss": 0.4395, "step": 22162 }, { "epoch": 3.617933961879107, "grad_norm": 2.3468399047851562, "learning_rate": 1.7828451531237773e-05, "loss": 0.4768, "step": 22163 }, { "epoch": 3.6180972205216113, "grad_norm": 1.862005352973938, "learning_rate": 1.78282540776981e-05, "loss": 0.4766, "step": 22164 }, { "epoch": 3.6182604791641158, "grad_norm": 1.7639528512954712, "learning_rate": 1.7828056616275374e-05, "loss": 0.428, "step": 22165 }, { "epoch": 3.61842373780662, "grad_norm": 2.270991086959839, "learning_rate": 1.78278591469698e-05, "loss": 0.5028, "step": 22166 }, { "epoch": 3.6185869964491246, "grad_norm": 1.881662130355835, "learning_rate": 1.7827661669781574e-05, "loss": 0.4525, "step": 22167 }, { "epoch": 3.618750255091629, "grad_norm": 1.9171619415283203, "learning_rate": 1.782746418471089e-05, "loss": 0.4597, "step": 22168 }, { "epoch": 3.6189135137341335, "grad_norm": 1.6120082139968872, "learning_rate": 1.7827266691757956e-05, "loss": 0.4048, "step": 22169 }, { "epoch": 3.619076772376638, "grad_norm": 1.9992955923080444, "learning_rate": 1.7827069190922964e-05, "loss": 0.5062, "step": 22170 }, { "epoch": 3.619240031019142, "grad_norm": 2.033933401107788, "learning_rate": 1.782687168220612e-05, "loss": 0.5272, "step": 22171 }, { "epoch": 3.6194032896616464, "grad_norm": 1.631243348121643, "learning_rate": 1.782667416560761e-05, "loss": 0.4355, "step": 22172 }, { "epoch": 3.619566548304151, "grad_norm": 1.8545106649398804, "learning_rate": 1.7826476641127648e-05, "loss": 0.4401, "step": 22173 }, { "epoch": 3.6197298069466552, "grad_norm": 1.8558882474899292, "learning_rate": 1.7826279108766425e-05, "loss": 0.5033, "step": 22174 }, { "epoch": 3.6198930655891597, "grad_norm": 1.9172236919403076, "learning_rate": 1.782608156852414e-05, "loss": 0.4508, "step": 22175 }, { "epoch": 3.620056324231664, "grad_norm": 1.9368468523025513, "learning_rate": 1.7825884020400993e-05, "loss": 0.435, "step": 22176 }, { "epoch": 3.6202195828741686, "grad_norm": 2.055910587310791, "learning_rate": 1.7825686464397183e-05, "loss": 0.4306, "step": 22177 }, { "epoch": 3.6203828415166726, "grad_norm": 1.7121984958648682, "learning_rate": 1.782548890051291e-05, "loss": 0.4225, "step": 22178 }, { "epoch": 3.620546100159177, "grad_norm": 1.8627114295959473, "learning_rate": 1.782529132874837e-05, "loss": 0.4981, "step": 22179 }, { "epoch": 3.6207093588016814, "grad_norm": 2.2951090335845947, "learning_rate": 1.7825093749103766e-05, "loss": 0.5711, "step": 22180 }, { "epoch": 3.620872617444186, "grad_norm": 2.0401346683502197, "learning_rate": 1.7824896161579292e-05, "loss": 0.4334, "step": 22181 }, { "epoch": 3.6210358760866903, "grad_norm": 1.9772412776947021, "learning_rate": 1.7824698566175152e-05, "loss": 0.466, "step": 22182 }, { "epoch": 3.6211991347291947, "grad_norm": 2.048539876937866, "learning_rate": 1.782450096289154e-05, "loss": 0.4876, "step": 22183 }, { "epoch": 3.621362393371699, "grad_norm": 1.944333553314209, "learning_rate": 1.782430335172866e-05, "loss": 0.4217, "step": 22184 }, { "epoch": 3.6215256520142036, "grad_norm": 1.6959633827209473, "learning_rate": 1.7824105732686706e-05, "loss": 0.4304, "step": 22185 }, { "epoch": 3.621688910656708, "grad_norm": 1.9982280731201172, "learning_rate": 1.7823908105765883e-05, "loss": 0.5077, "step": 22186 }, { "epoch": 3.6218521692992125, "grad_norm": 1.8711038827896118, "learning_rate": 1.7823710470966386e-05, "loss": 0.4295, "step": 22187 }, { "epoch": 3.622015427941717, "grad_norm": 1.902862310409546, "learning_rate": 1.7823512828288412e-05, "loss": 0.5129, "step": 22188 }, { "epoch": 3.622178686584221, "grad_norm": 1.8664665222167969, "learning_rate": 1.7823315177732165e-05, "loss": 0.4472, "step": 22189 }, { "epoch": 3.6223419452267254, "grad_norm": 1.8363382816314697, "learning_rate": 1.782311751929784e-05, "loss": 0.4686, "step": 22190 }, { "epoch": 3.62250520386923, "grad_norm": 2.128692388534546, "learning_rate": 1.7822919852985638e-05, "loss": 0.4745, "step": 22191 }, { "epoch": 3.6226684625117342, "grad_norm": 2.269441843032837, "learning_rate": 1.7822722178795758e-05, "loss": 0.5619, "step": 22192 }, { "epoch": 3.6228317211542387, "grad_norm": 2.443877935409546, "learning_rate": 1.78225244967284e-05, "loss": 0.5402, "step": 22193 }, { "epoch": 3.622994979796743, "grad_norm": 1.8383476734161377, "learning_rate": 1.782232680678376e-05, "loss": 0.4636, "step": 22194 }, { "epoch": 3.623158238439247, "grad_norm": 1.846387505531311, "learning_rate": 1.7822129108962043e-05, "loss": 0.4923, "step": 22195 }, { "epoch": 3.6233214970817516, "grad_norm": 1.8662346601486206, "learning_rate": 1.782193140326344e-05, "loss": 0.4559, "step": 22196 }, { "epoch": 3.623484755724256, "grad_norm": 2.054503917694092, "learning_rate": 1.7821733689688154e-05, "loss": 0.5138, "step": 22197 }, { "epoch": 3.6236480143667604, "grad_norm": 1.7322131395339966, "learning_rate": 1.7821535968236387e-05, "loss": 0.445, "step": 22198 }, { "epoch": 3.623811273009265, "grad_norm": 1.728078842163086, "learning_rate": 1.7821338238908332e-05, "loss": 0.4545, "step": 22199 }, { "epoch": 3.6239745316517693, "grad_norm": 2.1443793773651123, "learning_rate": 1.7821140501704195e-05, "loss": 0.4942, "step": 22200 }, { "epoch": 3.6241377902942737, "grad_norm": 1.8059443235397339, "learning_rate": 1.782094275662417e-05, "loss": 0.4857, "step": 22201 }, { "epoch": 3.624301048936778, "grad_norm": 2.458599090576172, "learning_rate": 1.7820745003668456e-05, "loss": 0.4937, "step": 22202 }, { "epoch": 3.6244643075792826, "grad_norm": 1.8071177005767822, "learning_rate": 1.7820547242837256e-05, "loss": 0.4319, "step": 22203 }, { "epoch": 3.624627566221787, "grad_norm": 1.7743791341781616, "learning_rate": 1.782034947413077e-05, "loss": 0.3942, "step": 22204 }, { "epoch": 3.6247908248642915, "grad_norm": 2.0775487422943115, "learning_rate": 1.7820151697549184e-05, "loss": 0.5325, "step": 22205 }, { "epoch": 3.6249540835067955, "grad_norm": 1.7206403017044067, "learning_rate": 1.781995391309272e-05, "loss": 0.3907, "step": 22206 }, { "epoch": 3.6251173421493, "grad_norm": 1.651721715927124, "learning_rate": 1.7819756120761556e-05, "loss": 0.4146, "step": 22207 }, { "epoch": 3.6252806007918044, "grad_norm": 1.8328087329864502, "learning_rate": 1.7819558320555902e-05, "loss": 0.3805, "step": 22208 }, { "epoch": 3.625443859434309, "grad_norm": 1.9612901210784912, "learning_rate": 1.7819360512475955e-05, "loss": 0.4528, "step": 22209 }, { "epoch": 3.6256071180768132, "grad_norm": 2.0912911891937256, "learning_rate": 1.7819162696521914e-05, "loss": 0.4795, "step": 22210 }, { "epoch": 3.6257703767193177, "grad_norm": 1.9684643745422363, "learning_rate": 1.781896487269398e-05, "loss": 0.5266, "step": 22211 }, { "epoch": 3.625933635361822, "grad_norm": 1.9553793668746948, "learning_rate": 1.7818767040992345e-05, "loss": 0.508, "step": 22212 }, { "epoch": 3.626096894004326, "grad_norm": 2.0514461994171143, "learning_rate": 1.781856920141722e-05, "loss": 0.5137, "step": 22213 }, { "epoch": 3.6262601526468305, "grad_norm": 2.0570685863494873, "learning_rate": 1.7818371353968797e-05, "loss": 0.4886, "step": 22214 }, { "epoch": 3.626423411289335, "grad_norm": 2.071004629135132, "learning_rate": 1.7818173498647274e-05, "loss": 0.4716, "step": 22215 }, { "epoch": 3.6265866699318394, "grad_norm": 1.7154748439788818, "learning_rate": 1.781797563545285e-05, "loss": 0.436, "step": 22216 }, { "epoch": 3.626749928574344, "grad_norm": 2.4866549968719482, "learning_rate": 1.781777776438573e-05, "loss": 0.4776, "step": 22217 }, { "epoch": 3.6269131872168483, "grad_norm": 2.0467584133148193, "learning_rate": 1.781757988544611e-05, "loss": 0.4926, "step": 22218 }, { "epoch": 3.6270764458593527, "grad_norm": 1.9851776361465454, "learning_rate": 1.7817381998634187e-05, "loss": 0.4014, "step": 22219 }, { "epoch": 3.627239704501857, "grad_norm": 2.0743985176086426, "learning_rate": 1.7817184103950166e-05, "loss": 0.5022, "step": 22220 }, { "epoch": 3.6274029631443616, "grad_norm": 2.0754120349884033, "learning_rate": 1.781698620139424e-05, "loss": 0.488, "step": 22221 }, { "epoch": 3.627566221786866, "grad_norm": 1.805215835571289, "learning_rate": 1.781678829096661e-05, "loss": 0.4371, "step": 22222 }, { "epoch": 3.6277294804293705, "grad_norm": 2.1818981170654297, "learning_rate": 1.781659037266748e-05, "loss": 0.4265, "step": 22223 }, { "epoch": 3.6278927390718745, "grad_norm": 2.271246910095215, "learning_rate": 1.781639244649704e-05, "loss": 0.5478, "step": 22224 }, { "epoch": 3.628055997714379, "grad_norm": 2.062424421310425, "learning_rate": 1.78161945124555e-05, "loss": 0.5436, "step": 22225 }, { "epoch": 3.6282192563568834, "grad_norm": 1.7606080770492554, "learning_rate": 1.7815996570543053e-05, "loss": 0.4673, "step": 22226 }, { "epoch": 3.628382514999388, "grad_norm": 2.1525027751922607, "learning_rate": 1.7815798620759897e-05, "loss": 0.5059, "step": 22227 }, { "epoch": 3.6285457736418922, "grad_norm": 1.930280327796936, "learning_rate": 1.7815600663106237e-05, "loss": 0.4512, "step": 22228 }, { "epoch": 3.6287090322843967, "grad_norm": 2.0866823196411133, "learning_rate": 1.7815402697582265e-05, "loss": 0.55, "step": 22229 }, { "epoch": 3.6288722909269007, "grad_norm": 1.9048620462417603, "learning_rate": 1.781520472418819e-05, "loss": 0.4701, "step": 22230 }, { "epoch": 3.629035549569405, "grad_norm": 1.678763747215271, "learning_rate": 1.78150067429242e-05, "loss": 0.4262, "step": 22231 }, { "epoch": 3.6291988082119095, "grad_norm": 2.023594856262207, "learning_rate": 1.7814808753790506e-05, "loss": 0.4842, "step": 22232 }, { "epoch": 3.629362066854414, "grad_norm": 1.8978159427642822, "learning_rate": 1.7814610756787297e-05, "loss": 0.4543, "step": 22233 }, { "epoch": 3.6295253254969184, "grad_norm": 2.1696882247924805, "learning_rate": 1.7814412751914782e-05, "loss": 0.5312, "step": 22234 }, { "epoch": 3.629688584139423, "grad_norm": 1.9322112798690796, "learning_rate": 1.7814214739173152e-05, "loss": 0.4967, "step": 22235 }, { "epoch": 3.6298518427819273, "grad_norm": 1.673377275466919, "learning_rate": 1.7814016718562613e-05, "loss": 0.4286, "step": 22236 }, { "epoch": 3.6300151014244317, "grad_norm": 2.2200803756713867, "learning_rate": 1.7813818690083358e-05, "loss": 0.5197, "step": 22237 }, { "epoch": 3.630178360066936, "grad_norm": 1.9119254350662231, "learning_rate": 1.7813620653735587e-05, "loss": 0.4453, "step": 22238 }, { "epoch": 3.6303416187094406, "grad_norm": 1.8180071115493774, "learning_rate": 1.781342260951951e-05, "loss": 0.4767, "step": 22239 }, { "epoch": 3.630504877351945, "grad_norm": 1.9431490898132324, "learning_rate": 1.7813224557435313e-05, "loss": 0.473, "step": 22240 }, { "epoch": 3.630668135994449, "grad_norm": 1.8145979642868042, "learning_rate": 1.78130264974832e-05, "loss": 0.4461, "step": 22241 }, { "epoch": 3.6308313946369535, "grad_norm": 1.83763587474823, "learning_rate": 1.7812828429663375e-05, "loss": 0.4947, "step": 22242 }, { "epoch": 3.630994653279458, "grad_norm": 1.7229279279708862, "learning_rate": 1.7812630353976032e-05, "loss": 0.4451, "step": 22243 }, { "epoch": 3.6311579119219624, "grad_norm": 2.1747677326202393, "learning_rate": 1.781243227042137e-05, "loss": 0.5481, "step": 22244 }, { "epoch": 3.631321170564467, "grad_norm": 2.0306737422943115, "learning_rate": 1.7812234178999595e-05, "loss": 0.4907, "step": 22245 }, { "epoch": 3.6314844292069712, "grad_norm": 1.8663150072097778, "learning_rate": 1.7812036079710903e-05, "loss": 0.4264, "step": 22246 }, { "epoch": 3.6316476878494757, "grad_norm": 2.4992425441741943, "learning_rate": 1.7811837972555487e-05, "loss": 0.9484, "step": 22247 }, { "epoch": 3.6318109464919797, "grad_norm": 1.8206520080566406, "learning_rate": 1.7811639857533558e-05, "loss": 0.4333, "step": 22248 }, { "epoch": 3.631974205134484, "grad_norm": 1.79257333278656, "learning_rate": 1.7811441734645308e-05, "loss": 0.4398, "step": 22249 }, { "epoch": 3.6321374637769885, "grad_norm": 2.1300365924835205, "learning_rate": 1.7811243603890934e-05, "loss": 0.5692, "step": 22250 }, { "epoch": 3.632300722419493, "grad_norm": 1.6796329021453857, "learning_rate": 1.7811045465270647e-05, "loss": 0.4661, "step": 22251 }, { "epoch": 3.6324639810619974, "grad_norm": 1.7673457860946655, "learning_rate": 1.7810847318784632e-05, "loss": 0.4401, "step": 22252 }, { "epoch": 3.632627239704502, "grad_norm": 1.828446865081787, "learning_rate": 1.78106491644331e-05, "loss": 0.4708, "step": 22253 }, { "epoch": 3.6327904983470063, "grad_norm": 2.198716640472412, "learning_rate": 1.7810451002216246e-05, "loss": 0.5201, "step": 22254 }, { "epoch": 3.6329537569895107, "grad_norm": 1.7453234195709229, "learning_rate": 1.781025283213427e-05, "loss": 0.4748, "step": 22255 }, { "epoch": 3.633117015632015, "grad_norm": 1.8886045217514038, "learning_rate": 1.7810054654187372e-05, "loss": 0.3954, "step": 22256 }, { "epoch": 3.6332802742745196, "grad_norm": 2.3775596618652344, "learning_rate": 1.7809856468375752e-05, "loss": 0.557, "step": 22257 }, { "epoch": 3.633443532917024, "grad_norm": 1.8000128269195557, "learning_rate": 1.7809658274699603e-05, "loss": 0.4983, "step": 22258 }, { "epoch": 3.633606791559528, "grad_norm": 2.3628523349761963, "learning_rate": 1.7809460073159134e-05, "loss": 0.5121, "step": 22259 }, { "epoch": 3.6337700502020325, "grad_norm": 2.1220734119415283, "learning_rate": 1.7809261863754542e-05, "loss": 0.4777, "step": 22260 }, { "epoch": 3.633933308844537, "grad_norm": 2.3694796562194824, "learning_rate": 1.7809063646486026e-05, "loss": 0.4881, "step": 22261 }, { "epoch": 3.6340965674870414, "grad_norm": 2.1046969890594482, "learning_rate": 1.7808865421353784e-05, "loss": 0.4546, "step": 22262 }, { "epoch": 3.634259826129546, "grad_norm": 1.8847228288650513, "learning_rate": 1.7808667188358014e-05, "loss": 0.4604, "step": 22263 }, { "epoch": 3.6344230847720502, "grad_norm": 2.217400550842285, "learning_rate": 1.7808468947498922e-05, "loss": 0.5473, "step": 22264 }, { "epoch": 3.6345863434145547, "grad_norm": 1.6362099647521973, "learning_rate": 1.78082706987767e-05, "loss": 0.4248, "step": 22265 }, { "epoch": 3.6347496020570587, "grad_norm": 2.1573314666748047, "learning_rate": 1.7808072442191554e-05, "loss": 0.5425, "step": 22266 }, { "epoch": 3.634912860699563, "grad_norm": 1.7252026796340942, "learning_rate": 1.780787417774368e-05, "loss": 0.435, "step": 22267 }, { "epoch": 3.6350761193420675, "grad_norm": 2.2214579582214355, "learning_rate": 1.7807675905433278e-05, "loss": 0.6017, "step": 22268 }, { "epoch": 3.635239377984572, "grad_norm": 2.0013840198516846, "learning_rate": 1.780747762526055e-05, "loss": 0.4777, "step": 22269 }, { "epoch": 3.6354026366270764, "grad_norm": 2.143167495727539, "learning_rate": 1.7807279337225694e-05, "loss": 0.5463, "step": 22270 }, { "epoch": 3.635565895269581, "grad_norm": 1.908907175064087, "learning_rate": 1.7807081041328908e-05, "loss": 0.4845, "step": 22271 }, { "epoch": 3.6357291539120853, "grad_norm": 1.8023905754089355, "learning_rate": 1.7806882737570393e-05, "loss": 0.4402, "step": 22272 }, { "epoch": 3.6358924125545897, "grad_norm": 2.0850412845611572, "learning_rate": 1.7806684425950355e-05, "loss": 0.4904, "step": 22273 }, { "epoch": 3.636055671197094, "grad_norm": 2.0633718967437744, "learning_rate": 1.7806486106468983e-05, "loss": 0.4987, "step": 22274 }, { "epoch": 3.6362189298395986, "grad_norm": 2.2465248107910156, "learning_rate": 1.7806287779126482e-05, "loss": 0.5995, "step": 22275 }, { "epoch": 3.636382188482103, "grad_norm": 2.034846305847168, "learning_rate": 1.780608944392305e-05, "loss": 0.4566, "step": 22276 }, { "epoch": 3.636545447124607, "grad_norm": 1.908725619316101, "learning_rate": 1.780589110085889e-05, "loss": 0.4832, "step": 22277 }, { "epoch": 3.6367087057671115, "grad_norm": 2.408667802810669, "learning_rate": 1.78056927499342e-05, "loss": 0.4763, "step": 22278 }, { "epoch": 3.636871964409616, "grad_norm": 1.7818105220794678, "learning_rate": 1.780549439114918e-05, "loss": 0.3638, "step": 22279 }, { "epoch": 3.6370352230521203, "grad_norm": 1.72776198387146, "learning_rate": 1.7805296024504026e-05, "loss": 0.433, "step": 22280 }, { "epoch": 3.637198481694625, "grad_norm": 1.917036533355713, "learning_rate": 1.7805097649998947e-05, "loss": 0.5271, "step": 22281 }, { "epoch": 3.6373617403371292, "grad_norm": 1.8326754570007324, "learning_rate": 1.780489926763413e-05, "loss": 0.4484, "step": 22282 }, { "epoch": 3.637524998979633, "grad_norm": 1.765150547027588, "learning_rate": 1.7804700877409784e-05, "loss": 0.3895, "step": 22283 }, { "epoch": 3.6376882576221377, "grad_norm": 1.7073445320129395, "learning_rate": 1.780450247932611e-05, "loss": 0.4395, "step": 22284 }, { "epoch": 3.637851516264642, "grad_norm": 1.826615810394287, "learning_rate": 1.7804304073383298e-05, "loss": 0.4891, "step": 22285 }, { "epoch": 3.6380147749071465, "grad_norm": 1.996622920036316, "learning_rate": 1.780410565958156e-05, "loss": 0.5103, "step": 22286 }, { "epoch": 3.638178033549651, "grad_norm": 1.962227463722229, "learning_rate": 1.7803907237921082e-05, "loss": 0.5039, "step": 22287 }, { "epoch": 3.6383412921921554, "grad_norm": 2.1577606201171875, "learning_rate": 1.7803708808402077e-05, "loss": 0.5376, "step": 22288 }, { "epoch": 3.63850455083466, "grad_norm": 2.4692487716674805, "learning_rate": 1.7803510371024738e-05, "loss": 0.5061, "step": 22289 }, { "epoch": 3.6386678094771643, "grad_norm": 2.4384024143218994, "learning_rate": 1.7803311925789267e-05, "loss": 0.5243, "step": 22290 }, { "epoch": 3.6388310681196687, "grad_norm": 1.7909634113311768, "learning_rate": 1.7803113472695862e-05, "loss": 0.4336, "step": 22291 }, { "epoch": 3.638994326762173, "grad_norm": 2.271132230758667, "learning_rate": 1.7802915011744725e-05, "loss": 0.5585, "step": 22292 }, { "epoch": 3.6391575854046776, "grad_norm": 1.8109623193740845, "learning_rate": 1.7802716542936053e-05, "loss": 0.4237, "step": 22293 }, { "epoch": 3.6393208440471816, "grad_norm": 1.6990612745285034, "learning_rate": 1.7802518066270045e-05, "loss": 0.4262, "step": 22294 }, { "epoch": 3.639484102689686, "grad_norm": 2.457380533218384, "learning_rate": 1.780231958174691e-05, "loss": 0.5035, "step": 22295 }, { "epoch": 3.6396473613321905, "grad_norm": 2.0693557262420654, "learning_rate": 1.780212108936684e-05, "loss": 0.4414, "step": 22296 }, { "epoch": 3.639810619974695, "grad_norm": 1.7942826747894287, "learning_rate": 1.7801922589130033e-05, "loss": 0.4274, "step": 22297 }, { "epoch": 3.6399738786171993, "grad_norm": 1.8830196857452393, "learning_rate": 1.780172408103669e-05, "loss": 0.4614, "step": 22298 }, { "epoch": 3.640137137259704, "grad_norm": 1.6464533805847168, "learning_rate": 1.7801525565087017e-05, "loss": 0.4309, "step": 22299 }, { "epoch": 3.640300395902208, "grad_norm": 1.4244880676269531, "learning_rate": 1.780132704128121e-05, "loss": 0.3813, "step": 22300 }, { "epoch": 3.640463654544712, "grad_norm": 2.055643320083618, "learning_rate": 1.7801128509619468e-05, "loss": 0.4624, "step": 22301 }, { "epoch": 3.6406269131872167, "grad_norm": 2.4181535243988037, "learning_rate": 1.7800929970101986e-05, "loss": 0.5133, "step": 22302 }, { "epoch": 3.640790171829721, "grad_norm": 1.919787883758545, "learning_rate": 1.780073142272898e-05, "loss": 0.4753, "step": 22303 }, { "epoch": 3.6409534304722255, "grad_norm": 2.297555685043335, "learning_rate": 1.7800532867500632e-05, "loss": 0.5284, "step": 22304 }, { "epoch": 3.64111668911473, "grad_norm": 2.0335564613342285, "learning_rate": 1.7800334304417152e-05, "loss": 0.4397, "step": 22305 }, { "epoch": 3.6412799477572344, "grad_norm": 1.8258469104766846, "learning_rate": 1.7800135733478736e-05, "loss": 0.4991, "step": 22306 }, { "epoch": 3.641443206399739, "grad_norm": 2.048990488052368, "learning_rate": 1.7799937154685587e-05, "loss": 0.4505, "step": 22307 }, { "epoch": 3.6416064650422433, "grad_norm": 1.853342056274414, "learning_rate": 1.77997385680379e-05, "loss": 0.4023, "step": 22308 }, { "epoch": 3.6417697236847477, "grad_norm": 1.9720091819763184, "learning_rate": 1.7799539973535883e-05, "loss": 0.4642, "step": 22309 }, { "epoch": 3.641932982327252, "grad_norm": 2.387601852416992, "learning_rate": 1.779934137117973e-05, "loss": 0.5654, "step": 22310 }, { "epoch": 3.6420962409697566, "grad_norm": 2.167843818664551, "learning_rate": 1.7799142760969645e-05, "loss": 0.5213, "step": 22311 }, { "epoch": 3.6422594996122606, "grad_norm": 1.9190088510513306, "learning_rate": 1.779894414290582e-05, "loss": 0.4547, "step": 22312 }, { "epoch": 3.642422758254765, "grad_norm": 1.851482629776001, "learning_rate": 1.7798745516988463e-05, "loss": 0.4549, "step": 22313 }, { "epoch": 3.6425860168972695, "grad_norm": 1.85097336769104, "learning_rate": 1.779854688321777e-05, "loss": 0.4382, "step": 22314 }, { "epoch": 3.642749275539774, "grad_norm": 1.9080549478530884, "learning_rate": 1.7798348241593942e-05, "loss": 0.4861, "step": 22315 }, { "epoch": 3.6429125341822783, "grad_norm": 1.521224856376648, "learning_rate": 1.779814959211718e-05, "loss": 0.4074, "step": 22316 }, { "epoch": 3.6430757928247828, "grad_norm": 1.8452973365783691, "learning_rate": 1.7797950934787683e-05, "loss": 0.4679, "step": 22317 }, { "epoch": 3.643239051467287, "grad_norm": 1.942610740661621, "learning_rate": 1.7797752269605654e-05, "loss": 0.5473, "step": 22318 }, { "epoch": 3.643402310109791, "grad_norm": 1.8627897500991821, "learning_rate": 1.779755359657129e-05, "loss": 0.4079, "step": 22319 }, { "epoch": 3.6435655687522956, "grad_norm": 2.263631820678711, "learning_rate": 1.779735491568479e-05, "loss": 0.5387, "step": 22320 }, { "epoch": 3.6437288273948, "grad_norm": 2.0295004844665527, "learning_rate": 1.7797156226946353e-05, "loss": 0.5077, "step": 22321 }, { "epoch": 3.6438920860373045, "grad_norm": 1.5275533199310303, "learning_rate": 1.7796957530356187e-05, "loss": 0.4102, "step": 22322 }, { "epoch": 3.644055344679809, "grad_norm": 1.9492062330245972, "learning_rate": 1.7796758825914485e-05, "loss": 0.4273, "step": 22323 }, { "epoch": 3.6442186033223134, "grad_norm": 2.069120168685913, "learning_rate": 1.779656011362145e-05, "loss": 0.5276, "step": 22324 }, { "epoch": 3.644381861964818, "grad_norm": 1.7003391981124878, "learning_rate": 1.7796361393477278e-05, "loss": 0.4072, "step": 22325 }, { "epoch": 3.6445451206073223, "grad_norm": 2.3023457527160645, "learning_rate": 1.7796162665482172e-05, "loss": 0.6193, "step": 22326 }, { "epoch": 3.6447083792498267, "grad_norm": 2.2725491523742676, "learning_rate": 1.7795963929636335e-05, "loss": 0.5449, "step": 22327 }, { "epoch": 3.644871637892331, "grad_norm": 2.356009006500244, "learning_rate": 1.7795765185939965e-05, "loss": 0.4779, "step": 22328 }, { "epoch": 3.6450348965348356, "grad_norm": 2.1241252422332764, "learning_rate": 1.7795566434393257e-05, "loss": 0.4648, "step": 22329 }, { "epoch": 3.6451981551773396, "grad_norm": 1.9269254207611084, "learning_rate": 1.779536767499642e-05, "loss": 0.488, "step": 22330 }, { "epoch": 3.645361413819844, "grad_norm": 1.9518089294433594, "learning_rate": 1.7795168907749652e-05, "loss": 0.5054, "step": 22331 }, { "epoch": 3.6455246724623485, "grad_norm": 2.0756514072418213, "learning_rate": 1.7794970132653145e-05, "loss": 0.5183, "step": 22332 }, { "epoch": 3.645687931104853, "grad_norm": 1.8090118169784546, "learning_rate": 1.779477134970711e-05, "loss": 0.5157, "step": 22333 }, { "epoch": 3.6458511897473573, "grad_norm": 2.7770121097564697, "learning_rate": 1.779457255891174e-05, "loss": 0.5653, "step": 22334 }, { "epoch": 3.6460144483898618, "grad_norm": 1.6087220907211304, "learning_rate": 1.7794373760267235e-05, "loss": 0.4332, "step": 22335 }, { "epoch": 3.6461777070323658, "grad_norm": 1.946505069732666, "learning_rate": 1.7794174953773802e-05, "loss": 0.4716, "step": 22336 }, { "epoch": 3.64634096567487, "grad_norm": 1.8234583139419556, "learning_rate": 1.7793976139431635e-05, "loss": 0.482, "step": 22337 }, { "epoch": 3.6465042243173746, "grad_norm": 2.2445878982543945, "learning_rate": 1.7793777317240936e-05, "loss": 0.4583, "step": 22338 }, { "epoch": 3.646667482959879, "grad_norm": 2.0013158321380615, "learning_rate": 1.7793578487201904e-05, "loss": 0.5389, "step": 22339 }, { "epoch": 3.6468307416023835, "grad_norm": 1.5991144180297852, "learning_rate": 1.7793379649314743e-05, "loss": 0.4149, "step": 22340 }, { "epoch": 3.646994000244888, "grad_norm": 2.046417474746704, "learning_rate": 1.779318080357965e-05, "loss": 0.4767, "step": 22341 }, { "epoch": 3.6471572588873924, "grad_norm": 1.9243831634521484, "learning_rate": 1.7792981949996828e-05, "loss": 0.4914, "step": 22342 }, { "epoch": 3.647320517529897, "grad_norm": 1.743822693824768, "learning_rate": 1.7792783088566473e-05, "loss": 0.395, "step": 22343 }, { "epoch": 3.6474837761724013, "grad_norm": 1.7007439136505127, "learning_rate": 1.7792584219288786e-05, "loss": 0.4583, "step": 22344 }, { "epoch": 3.6476470348149057, "grad_norm": 1.9168442487716675, "learning_rate": 1.7792385342163968e-05, "loss": 0.4878, "step": 22345 }, { "epoch": 3.64781029345741, "grad_norm": 2.2556300163269043, "learning_rate": 1.7792186457192224e-05, "loss": 0.5378, "step": 22346 }, { "epoch": 3.647973552099914, "grad_norm": 1.6213366985321045, "learning_rate": 1.7791987564373746e-05, "loss": 0.4135, "step": 22347 }, { "epoch": 3.6481368107424186, "grad_norm": 1.9111948013305664, "learning_rate": 1.779178866370874e-05, "loss": 0.4888, "step": 22348 }, { "epoch": 3.648300069384923, "grad_norm": 1.8221899271011353, "learning_rate": 1.779158975519741e-05, "loss": 0.4583, "step": 22349 }, { "epoch": 3.6484633280274275, "grad_norm": 1.926403284072876, "learning_rate": 1.7791390838839946e-05, "loss": 0.4812, "step": 22350 }, { "epoch": 3.648626586669932, "grad_norm": 1.9495692253112793, "learning_rate": 1.7791191914636553e-05, "loss": 0.4692, "step": 22351 }, { "epoch": 3.6487898453124363, "grad_norm": 2.4718642234802246, "learning_rate": 1.7790992982587436e-05, "loss": 0.4655, "step": 22352 }, { "epoch": 3.6489531039549408, "grad_norm": 1.7724136114120483, "learning_rate": 1.779079404269279e-05, "loss": 0.4285, "step": 22353 }, { "epoch": 3.6491163625974448, "grad_norm": 2.0617635250091553, "learning_rate": 1.7790595094952815e-05, "loss": 0.47, "step": 22354 }, { "epoch": 3.649279621239949, "grad_norm": 1.6019126176834106, "learning_rate": 1.7790396139367712e-05, "loss": 0.4044, "step": 22355 }, { "epoch": 3.6494428798824536, "grad_norm": 1.995518684387207, "learning_rate": 1.779019717593768e-05, "loss": 0.4714, "step": 22356 }, { "epoch": 3.649606138524958, "grad_norm": 1.846318244934082, "learning_rate": 1.7789998204662925e-05, "loss": 0.453, "step": 22357 }, { "epoch": 3.6497693971674625, "grad_norm": 1.6230682134628296, "learning_rate": 1.7789799225543648e-05, "loss": 0.3676, "step": 22358 }, { "epoch": 3.649932655809967, "grad_norm": 1.803182601928711, "learning_rate": 1.778960023858004e-05, "loss": 0.4344, "step": 22359 }, { "epoch": 3.6500959144524714, "grad_norm": 1.904961347579956, "learning_rate": 1.7789401243772307e-05, "loss": 0.412, "step": 22360 }, { "epoch": 3.650259173094976, "grad_norm": 1.9557011127471924, "learning_rate": 1.778920224112065e-05, "loss": 0.4697, "step": 22361 }, { "epoch": 3.6504224317374803, "grad_norm": 1.7860610485076904, "learning_rate": 1.7789003230625266e-05, "loss": 0.3976, "step": 22362 }, { "epoch": 3.6505856903799847, "grad_norm": 1.8320955038070679, "learning_rate": 1.778880421228636e-05, "loss": 0.4135, "step": 22363 }, { "epoch": 3.650748949022489, "grad_norm": 2.3296947479248047, "learning_rate": 1.7788605186104128e-05, "loss": 0.6629, "step": 22364 }, { "epoch": 3.650912207664993, "grad_norm": 2.1047706604003906, "learning_rate": 1.7788406152078776e-05, "loss": 0.4405, "step": 22365 }, { "epoch": 3.6510754663074976, "grad_norm": 1.9450560808181763, "learning_rate": 1.77882071102105e-05, "loss": 0.4525, "step": 22366 }, { "epoch": 3.651238724950002, "grad_norm": 1.7828776836395264, "learning_rate": 1.77880080604995e-05, "loss": 0.4189, "step": 22367 }, { "epoch": 3.6514019835925065, "grad_norm": 1.8950464725494385, "learning_rate": 1.7787809002945978e-05, "loss": 0.4963, "step": 22368 }, { "epoch": 3.651565242235011, "grad_norm": 1.665479302406311, "learning_rate": 1.7787609937550137e-05, "loss": 0.4504, "step": 22369 }, { "epoch": 3.6517285008775153, "grad_norm": 2.010477066040039, "learning_rate": 1.778741086431217e-05, "loss": 0.4557, "step": 22370 }, { "epoch": 3.6518917595200193, "grad_norm": 2.015735387802124, "learning_rate": 1.778721178323229e-05, "loss": 0.527, "step": 22371 }, { "epoch": 3.6520550181625238, "grad_norm": 2.0763766765594482, "learning_rate": 1.7787012694310685e-05, "loss": 0.4385, "step": 22372 }, { "epoch": 3.652218276805028, "grad_norm": 1.9208872318267822, "learning_rate": 1.778681359754756e-05, "loss": 0.5189, "step": 22373 }, { "epoch": 3.6523815354475326, "grad_norm": 1.872377634048462, "learning_rate": 1.7786614492943114e-05, "loss": 0.456, "step": 22374 }, { "epoch": 3.652544794090037, "grad_norm": 2.065157890319824, "learning_rate": 1.778641538049755e-05, "loss": 0.4551, "step": 22375 }, { "epoch": 3.6527080527325415, "grad_norm": 1.736631155014038, "learning_rate": 1.7786216260211072e-05, "loss": 0.4353, "step": 22376 }, { "epoch": 3.652871311375046, "grad_norm": 1.9423978328704834, "learning_rate": 1.7786017132083874e-05, "loss": 0.449, "step": 22377 }, { "epoch": 3.6530345700175504, "grad_norm": 1.984809160232544, "learning_rate": 1.778581799611616e-05, "loss": 0.4289, "step": 22378 }, { "epoch": 3.653197828660055, "grad_norm": 2.4081368446350098, "learning_rate": 1.7785618852308127e-05, "loss": 0.5765, "step": 22379 }, { "epoch": 3.6533610873025593, "grad_norm": 2.1571428775787354, "learning_rate": 1.7785419700659982e-05, "loss": 0.4846, "step": 22380 }, { "epoch": 3.6535243459450637, "grad_norm": 2.3857364654541016, "learning_rate": 1.7785220541171913e-05, "loss": 0.5634, "step": 22381 }, { "epoch": 3.6536876045875677, "grad_norm": 1.5948424339294434, "learning_rate": 1.7785021373844138e-05, "loss": 0.396, "step": 22382 }, { "epoch": 3.653850863230072, "grad_norm": 2.7234880924224854, "learning_rate": 1.7784822198676843e-05, "loss": 0.4107, "step": 22383 }, { "epoch": 3.6540141218725766, "grad_norm": 2.0047192573547363, "learning_rate": 1.7784623015670237e-05, "loss": 0.4675, "step": 22384 }, { "epoch": 3.654177380515081, "grad_norm": 1.882136344909668, "learning_rate": 1.7784423824824514e-05, "loss": 0.4111, "step": 22385 }, { "epoch": 3.6543406391575854, "grad_norm": 2.2260327339172363, "learning_rate": 1.778422462613988e-05, "loss": 0.4977, "step": 22386 }, { "epoch": 3.65450389780009, "grad_norm": 2.075655698776245, "learning_rate": 1.7784025419616537e-05, "loss": 0.5796, "step": 22387 }, { "epoch": 3.6546671564425943, "grad_norm": 1.9760347604751587, "learning_rate": 1.7783826205254682e-05, "loss": 0.4874, "step": 22388 }, { "epoch": 3.6548304150850983, "grad_norm": 2.213862657546997, "learning_rate": 1.778362698305451e-05, "loss": 0.4633, "step": 22389 }, { "epoch": 3.6549936737276028, "grad_norm": 1.9600203037261963, "learning_rate": 1.7783427753016234e-05, "loss": 0.4419, "step": 22390 }, { "epoch": 3.655156932370107, "grad_norm": 1.7172023057937622, "learning_rate": 1.7783228515140044e-05, "loss": 0.4301, "step": 22391 }, { "epoch": 3.6553201910126116, "grad_norm": 1.7866120338439941, "learning_rate": 1.778302926942615e-05, "loss": 0.4129, "step": 22392 }, { "epoch": 3.655483449655116, "grad_norm": 1.918962001800537, "learning_rate": 1.7782830015874743e-05, "loss": 0.4875, "step": 22393 }, { "epoch": 3.6556467082976205, "grad_norm": 2.038156032562256, "learning_rate": 1.778263075448603e-05, "loss": 0.5635, "step": 22394 }, { "epoch": 3.655809966940125, "grad_norm": 1.8593599796295166, "learning_rate": 1.778243148526021e-05, "loss": 0.3974, "step": 22395 }, { "epoch": 3.6559732255826294, "grad_norm": 1.6222929954528809, "learning_rate": 1.7782232208197485e-05, "loss": 0.4098, "step": 22396 }, { "epoch": 3.656136484225134, "grad_norm": 1.7496681213378906, "learning_rate": 1.7782032923298053e-05, "loss": 0.4315, "step": 22397 }, { "epoch": 3.6562997428676383, "grad_norm": 1.5291106700897217, "learning_rate": 1.7781833630562114e-05, "loss": 0.4144, "step": 22398 }, { "epoch": 3.6564630015101427, "grad_norm": 1.897107720375061, "learning_rate": 1.7781634329989873e-05, "loss": 0.5369, "step": 22399 }, { "epoch": 3.6566262601526467, "grad_norm": 1.947068691253662, "learning_rate": 1.7781435021581527e-05, "loss": 0.4723, "step": 22400 }, { "epoch": 3.656789518795151, "grad_norm": 1.7083923816680908, "learning_rate": 1.778123570533728e-05, "loss": 0.4462, "step": 22401 }, { "epoch": 3.6569527774376556, "grad_norm": 1.8946540355682373, "learning_rate": 1.778103638125733e-05, "loss": 0.404, "step": 22402 }, { "epoch": 3.65711603608016, "grad_norm": 2.404752492904663, "learning_rate": 1.7780837049341877e-05, "loss": 0.5855, "step": 22403 }, { "epoch": 3.6572792947226644, "grad_norm": 1.85878324508667, "learning_rate": 1.7780637709591123e-05, "loss": 0.5048, "step": 22404 }, { "epoch": 3.657442553365169, "grad_norm": 2.274066209793091, "learning_rate": 1.778043836200527e-05, "loss": 0.5943, "step": 22405 }, { "epoch": 3.6576058120076733, "grad_norm": 1.9421266317367554, "learning_rate": 1.7780239006584515e-05, "loss": 0.4488, "step": 22406 }, { "epoch": 3.6577690706501773, "grad_norm": 2.154812812805176, "learning_rate": 1.7780039643329065e-05, "loss": 0.5418, "step": 22407 }, { "epoch": 3.6579323292926818, "grad_norm": 2.1577842235565186, "learning_rate": 1.7779840272239118e-05, "loss": 0.5304, "step": 22408 }, { "epoch": 3.658095587935186, "grad_norm": 1.8961687088012695, "learning_rate": 1.7779640893314873e-05, "loss": 0.5198, "step": 22409 }, { "epoch": 3.6582588465776906, "grad_norm": 1.7120375633239746, "learning_rate": 1.7779441506556528e-05, "loss": 0.4467, "step": 22410 }, { "epoch": 3.658422105220195, "grad_norm": 2.0129330158233643, "learning_rate": 1.777924211196429e-05, "loss": 0.5211, "step": 22411 }, { "epoch": 3.6585853638626995, "grad_norm": 1.6782236099243164, "learning_rate": 1.7779042709538357e-05, "loss": 0.5075, "step": 22412 }, { "epoch": 3.658748622505204, "grad_norm": 1.8590404987335205, "learning_rate": 1.777884329927893e-05, "loss": 0.4129, "step": 22413 }, { "epoch": 3.6589118811477084, "grad_norm": 1.7141211032867432, "learning_rate": 1.777864388118621e-05, "loss": 0.4479, "step": 22414 }, { "epoch": 3.659075139790213, "grad_norm": 1.753636121749878, "learning_rate": 1.7778444455260398e-05, "loss": 0.4402, "step": 22415 }, { "epoch": 3.6592383984327173, "grad_norm": 1.8484007120132446, "learning_rate": 1.7778245021501696e-05, "loss": 0.5257, "step": 22416 }, { "epoch": 3.6594016570752217, "grad_norm": 1.925479769706726, "learning_rate": 1.77780455799103e-05, "loss": 0.477, "step": 22417 }, { "epoch": 3.6595649157177257, "grad_norm": 1.9635902643203735, "learning_rate": 1.7777846130486415e-05, "loss": 0.3926, "step": 22418 }, { "epoch": 3.65972817436023, "grad_norm": 1.9456751346588135, "learning_rate": 1.7777646673230243e-05, "loss": 0.4573, "step": 22419 }, { "epoch": 3.6598914330027346, "grad_norm": 2.1104753017425537, "learning_rate": 1.777744720814198e-05, "loss": 0.5288, "step": 22420 }, { "epoch": 3.660054691645239, "grad_norm": 1.8694359064102173, "learning_rate": 1.7777247735221832e-05, "loss": 0.4177, "step": 22421 }, { "epoch": 3.6602179502877434, "grad_norm": 2.0288069248199463, "learning_rate": 1.7777048254469996e-05, "loss": 0.495, "step": 22422 }, { "epoch": 3.660381208930248, "grad_norm": 2.119198799133301, "learning_rate": 1.7776848765886676e-05, "loss": 0.5076, "step": 22423 }, { "epoch": 3.660544467572752, "grad_norm": 2.2290356159210205, "learning_rate": 1.777664926947207e-05, "loss": 0.4749, "step": 22424 }, { "epoch": 3.6607077262152563, "grad_norm": 2.026521921157837, "learning_rate": 1.777644976522638e-05, "loss": 0.4234, "step": 22425 }, { "epoch": 3.6608709848577607, "grad_norm": 2.111849546432495, "learning_rate": 1.777625025314981e-05, "loss": 0.4879, "step": 22426 }, { "epoch": 3.661034243500265, "grad_norm": 1.4521069526672363, "learning_rate": 1.7776050733242555e-05, "loss": 0.3782, "step": 22427 }, { "epoch": 3.6611975021427696, "grad_norm": 1.8426274061203003, "learning_rate": 1.7775851205504823e-05, "loss": 0.45, "step": 22428 }, { "epoch": 3.661360760785274, "grad_norm": 2.6842174530029297, "learning_rate": 1.7775651669936806e-05, "loss": 0.5022, "step": 22429 }, { "epoch": 3.6615240194277785, "grad_norm": 1.9781715869903564, "learning_rate": 1.777545212653871e-05, "loss": 0.5294, "step": 22430 }, { "epoch": 3.661687278070283, "grad_norm": 1.615718126296997, "learning_rate": 1.777525257531074e-05, "loss": 0.4012, "step": 22431 }, { "epoch": 3.6618505367127874, "grad_norm": 1.9443203210830688, "learning_rate": 1.777505301625309e-05, "loss": 0.4456, "step": 22432 }, { "epoch": 3.662013795355292, "grad_norm": 1.9159280061721802, "learning_rate": 1.7774853449365964e-05, "loss": 0.3981, "step": 22433 }, { "epoch": 3.6621770539977962, "grad_norm": 2.354854106903076, "learning_rate": 1.7774653874649565e-05, "loss": 0.5839, "step": 22434 }, { "epoch": 3.6623403126403002, "grad_norm": 2.157796621322632, "learning_rate": 1.7774454292104087e-05, "loss": 0.4864, "step": 22435 }, { "epoch": 3.6625035712828047, "grad_norm": 1.9007441997528076, "learning_rate": 1.777425470172974e-05, "loss": 0.4494, "step": 22436 }, { "epoch": 3.662666829925309, "grad_norm": 2.5063092708587646, "learning_rate": 1.7774055103526718e-05, "loss": 0.5535, "step": 22437 }, { "epoch": 3.6628300885678136, "grad_norm": 1.9301866292953491, "learning_rate": 1.7773855497495223e-05, "loss": 0.4317, "step": 22438 }, { "epoch": 3.662993347210318, "grad_norm": 1.6268908977508545, "learning_rate": 1.7773655883635463e-05, "loss": 0.3988, "step": 22439 }, { "epoch": 3.6631566058528224, "grad_norm": 1.7507965564727783, "learning_rate": 1.7773456261947627e-05, "loss": 0.4644, "step": 22440 }, { "epoch": 3.663319864495327, "grad_norm": 2.1462371349334717, "learning_rate": 1.7773256632431927e-05, "loss": 0.4874, "step": 22441 }, { "epoch": 3.663483123137831, "grad_norm": 2.112705707550049, "learning_rate": 1.7773056995088557e-05, "loss": 0.519, "step": 22442 }, { "epoch": 3.6636463817803353, "grad_norm": 1.8613348007202148, "learning_rate": 1.7772857349917726e-05, "loss": 0.4648, "step": 22443 }, { "epoch": 3.6638096404228397, "grad_norm": 2.042346239089966, "learning_rate": 1.7772657696919623e-05, "loss": 0.4543, "step": 22444 }, { "epoch": 3.663972899065344, "grad_norm": 2.0664069652557373, "learning_rate": 1.777245803609446e-05, "loss": 0.5116, "step": 22445 }, { "epoch": 3.6641361577078486, "grad_norm": 1.8013904094696045, "learning_rate": 1.777225836744243e-05, "loss": 0.4477, "step": 22446 }, { "epoch": 3.664299416350353, "grad_norm": 2.0006022453308105, "learning_rate": 1.777205869096374e-05, "loss": 0.5061, "step": 22447 }, { "epoch": 3.6644626749928575, "grad_norm": 2.3036210536956787, "learning_rate": 1.7771859006658593e-05, "loss": 0.509, "step": 22448 }, { "epoch": 3.664625933635362, "grad_norm": 1.9798282384872437, "learning_rate": 1.7771659314527178e-05, "loss": 0.5161, "step": 22449 }, { "epoch": 3.6647891922778664, "grad_norm": 2.077794075012207, "learning_rate": 1.777145961456971e-05, "loss": 0.5175, "step": 22450 }, { "epoch": 3.664952450920371, "grad_norm": 1.925161361694336, "learning_rate": 1.7771259906786383e-05, "loss": 0.4679, "step": 22451 }, { "epoch": 3.6651157095628752, "grad_norm": 1.9558348655700684, "learning_rate": 1.7771060191177397e-05, "loss": 0.4853, "step": 22452 }, { "epoch": 3.6652789682053792, "grad_norm": 1.717094898223877, "learning_rate": 1.7770860467742957e-05, "loss": 0.4585, "step": 22453 }, { "epoch": 3.6654422268478837, "grad_norm": 2.006680965423584, "learning_rate": 1.777066073648327e-05, "loss": 0.4667, "step": 22454 }, { "epoch": 3.665605485490388, "grad_norm": 2.1161680221557617, "learning_rate": 1.777046099739852e-05, "loss": 0.5265, "step": 22455 }, { "epoch": 3.6657687441328926, "grad_norm": 2.048006296157837, "learning_rate": 1.7770261250488918e-05, "loss": 0.4952, "step": 22456 }, { "epoch": 3.665932002775397, "grad_norm": 1.8958491086959839, "learning_rate": 1.777006149575467e-05, "loss": 0.422, "step": 22457 }, { "epoch": 3.6660952614179014, "grad_norm": 1.744854211807251, "learning_rate": 1.776986173319597e-05, "loss": 0.3807, "step": 22458 }, { "epoch": 3.6662585200604054, "grad_norm": 1.8040794134140015, "learning_rate": 1.7769661962813017e-05, "loss": 0.4121, "step": 22459 }, { "epoch": 3.66642177870291, "grad_norm": 1.8944091796875, "learning_rate": 1.7769462184606022e-05, "loss": 0.469, "step": 22460 }, { "epoch": 3.6665850373454143, "grad_norm": 1.6642768383026123, "learning_rate": 1.776926239857518e-05, "loss": 0.4425, "step": 22461 }, { "epoch": 3.6667482959879187, "grad_norm": 2.1072235107421875, "learning_rate": 1.7769062604720692e-05, "loss": 0.4772, "step": 22462 }, { "epoch": 3.666911554630423, "grad_norm": 2.1657180786132812, "learning_rate": 1.776886280304276e-05, "loss": 0.4969, "step": 22463 }, { "epoch": 3.6670748132729276, "grad_norm": 2.055373430252075, "learning_rate": 1.7768662993541584e-05, "loss": 0.5909, "step": 22464 }, { "epoch": 3.667238071915432, "grad_norm": 2.167182683944702, "learning_rate": 1.7768463176217368e-05, "loss": 0.5353, "step": 22465 }, { "epoch": 3.6674013305579365, "grad_norm": 2.0124292373657227, "learning_rate": 1.776826335107031e-05, "loss": 0.5107, "step": 22466 }, { "epoch": 3.667564589200441, "grad_norm": 1.7322502136230469, "learning_rate": 1.7768063518100617e-05, "loss": 0.4071, "step": 22467 }, { "epoch": 3.6677278478429454, "grad_norm": 2.538423776626587, "learning_rate": 1.7767863677308482e-05, "loss": 0.5448, "step": 22468 }, { "epoch": 3.66789110648545, "grad_norm": 2.8923802375793457, "learning_rate": 1.776766382869411e-05, "loss": 0.468, "step": 22469 }, { "epoch": 3.668054365127954, "grad_norm": 2.2023158073425293, "learning_rate": 1.7767463972257706e-05, "loss": 0.5131, "step": 22470 }, { "epoch": 3.6682176237704582, "grad_norm": 1.8018794059753418, "learning_rate": 1.7767264107999465e-05, "loss": 0.4232, "step": 22471 }, { "epoch": 3.6683808824129627, "grad_norm": 2.0618350505828857, "learning_rate": 1.7767064235919594e-05, "loss": 0.4443, "step": 22472 }, { "epoch": 3.668544141055467, "grad_norm": 2.17514967918396, "learning_rate": 1.7766864356018287e-05, "loss": 0.6798, "step": 22473 }, { "epoch": 3.6687073996979715, "grad_norm": 1.6413689851760864, "learning_rate": 1.7766664468295753e-05, "loss": 0.4282, "step": 22474 }, { "epoch": 3.668870658340476, "grad_norm": 2.1025607585906982, "learning_rate": 1.7766464572752188e-05, "loss": 0.484, "step": 22475 }, { "epoch": 3.6690339169829804, "grad_norm": 2.1742453575134277, "learning_rate": 1.7766264669387795e-05, "loss": 0.5037, "step": 22476 }, { "epoch": 3.6691971756254844, "grad_norm": 2.2033839225769043, "learning_rate": 1.776606475820278e-05, "loss": 0.5292, "step": 22477 }, { "epoch": 3.669360434267989, "grad_norm": 1.9608436822891235, "learning_rate": 1.7765864839197334e-05, "loss": 0.4632, "step": 22478 }, { "epoch": 3.6695236929104933, "grad_norm": 1.3851292133331299, "learning_rate": 1.776566491237167e-05, "loss": 0.3487, "step": 22479 }, { "epoch": 3.6696869515529977, "grad_norm": 2.291963577270508, "learning_rate": 1.7765464977725977e-05, "loss": 0.4364, "step": 22480 }, { "epoch": 3.669850210195502, "grad_norm": 1.9384984970092773, "learning_rate": 1.776526503526047e-05, "loss": 0.4983, "step": 22481 }, { "epoch": 3.6700134688380066, "grad_norm": 2.194282293319702, "learning_rate": 1.7765065084975336e-05, "loss": 0.5706, "step": 22482 }, { "epoch": 3.670176727480511, "grad_norm": 2.1957879066467285, "learning_rate": 1.7764865126870788e-05, "loss": 0.4858, "step": 22483 }, { "epoch": 3.6703399861230155, "grad_norm": 1.7065232992172241, "learning_rate": 1.776466516094702e-05, "loss": 0.4222, "step": 22484 }, { "epoch": 3.67050324476552, "grad_norm": 2.1453592777252197, "learning_rate": 1.776446518720424e-05, "loss": 0.471, "step": 22485 }, { "epoch": 3.6706665034080244, "grad_norm": 1.8088390827178955, "learning_rate": 1.776426520564264e-05, "loss": 0.4137, "step": 22486 }, { "epoch": 3.670829762050529, "grad_norm": 1.9937180280685425, "learning_rate": 1.7764065216262435e-05, "loss": 0.4993, "step": 22487 }, { "epoch": 3.670993020693033, "grad_norm": 2.6800708770751953, "learning_rate": 1.7763865219063813e-05, "loss": 0.5063, "step": 22488 }, { "epoch": 3.6711562793355372, "grad_norm": 1.7987886667251587, "learning_rate": 1.7763665214046982e-05, "loss": 0.3915, "step": 22489 }, { "epoch": 3.6713195379780417, "grad_norm": 2.0353641510009766, "learning_rate": 1.776346520121214e-05, "loss": 0.464, "step": 22490 }, { "epoch": 3.671482796620546, "grad_norm": 1.889338731765747, "learning_rate": 1.7763265180559493e-05, "loss": 0.4652, "step": 22491 }, { "epoch": 3.6716460552630505, "grad_norm": 1.9534378051757812, "learning_rate": 1.7763065152089242e-05, "loss": 0.385, "step": 22492 }, { "epoch": 3.671809313905555, "grad_norm": 2.0139963626861572, "learning_rate": 1.776286511580158e-05, "loss": 0.4986, "step": 22493 }, { "epoch": 3.6719725725480594, "grad_norm": 2.384521484375, "learning_rate": 1.776266507169672e-05, "loss": 0.5316, "step": 22494 }, { "epoch": 3.6721358311905634, "grad_norm": 1.6144839525222778, "learning_rate": 1.7762465019774857e-05, "loss": 0.3799, "step": 22495 }, { "epoch": 3.672299089833068, "grad_norm": 2.4623074531555176, "learning_rate": 1.7762264960036195e-05, "loss": 0.4479, "step": 22496 }, { "epoch": 3.6724623484755723, "grad_norm": 1.698807954788208, "learning_rate": 1.7762064892480936e-05, "loss": 0.4604, "step": 22497 }, { "epoch": 3.6726256071180767, "grad_norm": 1.9938009977340698, "learning_rate": 1.7761864817109274e-05, "loss": 0.4221, "step": 22498 }, { "epoch": 3.672788865760581, "grad_norm": 1.8456218242645264, "learning_rate": 1.776166473392142e-05, "loss": 0.437, "step": 22499 }, { "epoch": 3.6729521244030856, "grad_norm": 2.289736270904541, "learning_rate": 1.776146464291757e-05, "loss": 0.5156, "step": 22500 }, { "epoch": 3.67311538304559, "grad_norm": 2.5736966133117676, "learning_rate": 1.7761264544097926e-05, "loss": 0.5502, "step": 22501 }, { "epoch": 3.6732786416880945, "grad_norm": 1.8314142227172852, "learning_rate": 1.7761064437462695e-05, "loss": 0.5292, "step": 22502 }, { "epoch": 3.673441900330599, "grad_norm": 2.2922091484069824, "learning_rate": 1.7760864323012074e-05, "loss": 0.4886, "step": 22503 }, { "epoch": 3.6736051589731034, "grad_norm": 2.0926976203918457, "learning_rate": 1.7760664200746262e-05, "loss": 0.4608, "step": 22504 }, { "epoch": 3.673768417615608, "grad_norm": 1.9876596927642822, "learning_rate": 1.776046407066546e-05, "loss": 0.5044, "step": 22505 }, { "epoch": 3.673931676258112, "grad_norm": 1.8399171829223633, "learning_rate": 1.776026393276988e-05, "loss": 0.4471, "step": 22506 }, { "epoch": 3.6740949349006162, "grad_norm": 2.137864351272583, "learning_rate": 1.776006378705971e-05, "loss": 0.5032, "step": 22507 }, { "epoch": 3.6742581935431207, "grad_norm": 2.187730073928833, "learning_rate": 1.775986363353516e-05, "loss": 0.455, "step": 22508 }, { "epoch": 3.674421452185625, "grad_norm": 1.9397547245025635, "learning_rate": 1.775966347219643e-05, "loss": 0.4989, "step": 22509 }, { "epoch": 3.6745847108281295, "grad_norm": 1.9493346214294434, "learning_rate": 1.7759463303043723e-05, "loss": 0.406, "step": 22510 }, { "epoch": 3.674747969470634, "grad_norm": 1.8702490329742432, "learning_rate": 1.7759263126077236e-05, "loss": 0.4304, "step": 22511 }, { "epoch": 3.674911228113138, "grad_norm": 2.0156259536743164, "learning_rate": 1.775906294129717e-05, "loss": 0.5141, "step": 22512 }, { "epoch": 3.6750744867556424, "grad_norm": 2.3772201538085938, "learning_rate": 1.7758862748703735e-05, "loss": 0.5494, "step": 22513 }, { "epoch": 3.675237745398147, "grad_norm": 1.8794108629226685, "learning_rate": 1.7758662548297128e-05, "loss": 0.4797, "step": 22514 }, { "epoch": 3.6754010040406513, "grad_norm": 2.0986545085906982, "learning_rate": 1.7758462340077544e-05, "loss": 0.4692, "step": 22515 }, { "epoch": 3.6755642626831557, "grad_norm": 1.876260757446289, "learning_rate": 1.7758262124045195e-05, "loss": 0.466, "step": 22516 }, { "epoch": 3.67572752132566, "grad_norm": 2.182234525680542, "learning_rate": 1.7758061900200276e-05, "loss": 0.5359, "step": 22517 }, { "epoch": 3.6758907799681646, "grad_norm": 1.914815902709961, "learning_rate": 1.775786166854299e-05, "loss": 0.5056, "step": 22518 }, { "epoch": 3.676054038610669, "grad_norm": 1.866990089416504, "learning_rate": 1.7757661429073543e-05, "loss": 0.3933, "step": 22519 }, { "epoch": 3.6762172972531735, "grad_norm": 2.0504894256591797, "learning_rate": 1.775746118179213e-05, "loss": 0.4718, "step": 22520 }, { "epoch": 3.676380555895678, "grad_norm": 1.6681385040283203, "learning_rate": 1.7757260926698958e-05, "loss": 0.4168, "step": 22521 }, { "epoch": 3.6765438145381824, "grad_norm": 2.3603148460388184, "learning_rate": 1.7757060663794225e-05, "loss": 0.5924, "step": 22522 }, { "epoch": 3.6767070731806863, "grad_norm": 1.9641660451889038, "learning_rate": 1.7756860393078135e-05, "loss": 0.4884, "step": 22523 }, { "epoch": 3.676870331823191, "grad_norm": 2.067687511444092, "learning_rate": 1.7756660114550885e-05, "loss": 0.5201, "step": 22524 }, { "epoch": 3.6770335904656952, "grad_norm": 2.045755386352539, "learning_rate": 1.7756459828212685e-05, "loss": 0.4442, "step": 22525 }, { "epoch": 3.6771968491081997, "grad_norm": 2.1903908252716064, "learning_rate": 1.775625953406373e-05, "loss": 0.5501, "step": 22526 }, { "epoch": 3.677360107750704, "grad_norm": 2.1149380207061768, "learning_rate": 1.775605923210422e-05, "loss": 0.542, "step": 22527 }, { "epoch": 3.6775233663932085, "grad_norm": 2.0194122791290283, "learning_rate": 1.7755858922334365e-05, "loss": 0.4819, "step": 22528 }, { "epoch": 3.677686625035713, "grad_norm": 1.787771463394165, "learning_rate": 1.7755658604754364e-05, "loss": 0.3653, "step": 22529 }, { "epoch": 3.677849883678217, "grad_norm": 1.6377027034759521, "learning_rate": 1.7755458279364414e-05, "loss": 0.4167, "step": 22530 }, { "epoch": 3.6780131423207214, "grad_norm": 2.0914101600646973, "learning_rate": 1.7755257946164718e-05, "loss": 0.5067, "step": 22531 }, { "epoch": 3.678176400963226, "grad_norm": 2.125473737716675, "learning_rate": 1.775505760515548e-05, "loss": 0.5069, "step": 22532 }, { "epoch": 3.6783396596057303, "grad_norm": 1.8771744966506958, "learning_rate": 1.7754857256336904e-05, "loss": 0.4536, "step": 22533 }, { "epoch": 3.6785029182482347, "grad_norm": 2.2294349670410156, "learning_rate": 1.7754656899709185e-05, "loss": 0.4504, "step": 22534 }, { "epoch": 3.678666176890739, "grad_norm": 2.0039944648742676, "learning_rate": 1.7754456535272527e-05, "loss": 0.4657, "step": 22535 }, { "epoch": 3.6788294355332436, "grad_norm": 1.9394651651382446, "learning_rate": 1.775425616302714e-05, "loss": 0.4413, "step": 22536 }, { "epoch": 3.678992694175748, "grad_norm": 2.0772604942321777, "learning_rate": 1.7754055782973214e-05, "loss": 0.5325, "step": 22537 }, { "epoch": 3.6791559528182525, "grad_norm": 1.9925020933151245, "learning_rate": 1.775385539511096e-05, "loss": 0.4957, "step": 22538 }, { "epoch": 3.679319211460757, "grad_norm": 2.1672537326812744, "learning_rate": 1.775365499944057e-05, "loss": 0.5006, "step": 22539 }, { "epoch": 3.6794824701032613, "grad_norm": 1.791269302368164, "learning_rate": 1.7753454595962257e-05, "loss": 0.4712, "step": 22540 }, { "epoch": 3.6796457287457653, "grad_norm": 2.09488844871521, "learning_rate": 1.7753254184676214e-05, "loss": 0.519, "step": 22541 }, { "epoch": 3.67980898738827, "grad_norm": 1.9440083503723145, "learning_rate": 1.7753053765582648e-05, "loss": 0.4561, "step": 22542 }, { "epoch": 3.679972246030774, "grad_norm": 1.7787131071090698, "learning_rate": 1.7752853338681755e-05, "loss": 0.4239, "step": 22543 }, { "epoch": 3.6801355046732787, "grad_norm": 2.1386358737945557, "learning_rate": 1.7752652903973747e-05, "loss": 0.5606, "step": 22544 }, { "epoch": 3.680298763315783, "grad_norm": 2.4518816471099854, "learning_rate": 1.7752452461458815e-05, "loss": 0.5091, "step": 22545 }, { "epoch": 3.6804620219582875, "grad_norm": 2.559586524963379, "learning_rate": 1.7752252011137167e-05, "loss": 0.5034, "step": 22546 }, { "epoch": 3.680625280600792, "grad_norm": 2.028212547302246, "learning_rate": 1.7752051553009002e-05, "loss": 0.4142, "step": 22547 }, { "epoch": 3.680788539243296, "grad_norm": 1.633283019065857, "learning_rate": 1.7751851087074524e-05, "loss": 0.4085, "step": 22548 }, { "epoch": 3.6809517978858004, "grad_norm": 2.151711940765381, "learning_rate": 1.7751650613333936e-05, "loss": 0.6361, "step": 22549 }, { "epoch": 3.681115056528305, "grad_norm": 1.7081454992294312, "learning_rate": 1.7751450131787435e-05, "loss": 0.4502, "step": 22550 }, { "epoch": 3.6812783151708093, "grad_norm": 2.2623963356018066, "learning_rate": 1.775124964243523e-05, "loss": 0.5264, "step": 22551 }, { "epoch": 3.6814415738133137, "grad_norm": 1.9153774976730347, "learning_rate": 1.775104914527751e-05, "loss": 0.5116, "step": 22552 }, { "epoch": 3.681604832455818, "grad_norm": 1.6309800148010254, "learning_rate": 1.7750848640314493e-05, "loss": 0.3567, "step": 22553 }, { "epoch": 3.6817680910983226, "grad_norm": 1.9512583017349243, "learning_rate": 1.775064812754637e-05, "loss": 0.4636, "step": 22554 }, { "epoch": 3.681931349740827, "grad_norm": 1.8953527212142944, "learning_rate": 1.7750447606973348e-05, "loss": 0.4616, "step": 22555 }, { "epoch": 3.6820946083833315, "grad_norm": 1.8328437805175781, "learning_rate": 1.7750247078595628e-05, "loss": 0.4551, "step": 22556 }, { "epoch": 3.682257867025836, "grad_norm": 2.305293083190918, "learning_rate": 1.775004654241341e-05, "loss": 0.4716, "step": 22557 }, { "epoch": 3.6824211256683403, "grad_norm": 1.8306286334991455, "learning_rate": 1.7749845998426897e-05, "loss": 0.4781, "step": 22558 }, { "epoch": 3.6825843843108443, "grad_norm": 2.080498695373535, "learning_rate": 1.7749645446636292e-05, "loss": 0.5334, "step": 22559 }, { "epoch": 3.6827476429533488, "grad_norm": 1.9007569551467896, "learning_rate": 1.7749444887041797e-05, "loss": 0.4659, "step": 22560 }, { "epoch": 3.682910901595853, "grad_norm": 2.405712604522705, "learning_rate": 1.7749244319643614e-05, "loss": 0.4864, "step": 22561 }, { "epoch": 3.6830741602383577, "grad_norm": 2.207479476928711, "learning_rate": 1.774904374444194e-05, "loss": 0.5654, "step": 22562 }, { "epoch": 3.683237418880862, "grad_norm": 2.221914052963257, "learning_rate": 1.774884316143698e-05, "loss": 0.4848, "step": 22563 }, { "epoch": 3.6834006775233665, "grad_norm": 2.192126750946045, "learning_rate": 1.7748642570628942e-05, "loss": 0.5374, "step": 22564 }, { "epoch": 3.6835639361658705, "grad_norm": 2.0213143825531006, "learning_rate": 1.7748441972018022e-05, "loss": 0.4733, "step": 22565 }, { "epoch": 3.683727194808375, "grad_norm": 2.1391613483428955, "learning_rate": 1.774824136560442e-05, "loss": 0.46, "step": 22566 }, { "epoch": 3.6838904534508794, "grad_norm": 1.7485368251800537, "learning_rate": 1.7748040751388345e-05, "loss": 0.4107, "step": 22567 }, { "epoch": 3.684053712093384, "grad_norm": 2.1267013549804688, "learning_rate": 1.7747840129369994e-05, "loss": 0.427, "step": 22568 }, { "epoch": 3.6842169707358883, "grad_norm": 1.7924203872680664, "learning_rate": 1.7747639499549566e-05, "loss": 0.4086, "step": 22569 }, { "epoch": 3.6843802293783927, "grad_norm": 1.8756608963012695, "learning_rate": 1.7747438861927272e-05, "loss": 0.4992, "step": 22570 }, { "epoch": 3.684543488020897, "grad_norm": 1.9620383977890015, "learning_rate": 1.7747238216503308e-05, "loss": 0.4446, "step": 22571 }, { "epoch": 3.6847067466634016, "grad_norm": 2.055100202560425, "learning_rate": 1.7747037563277875e-05, "loss": 0.5561, "step": 22572 }, { "epoch": 3.684870005305906, "grad_norm": 1.8024566173553467, "learning_rate": 1.774683690225118e-05, "loss": 0.4315, "step": 22573 }, { "epoch": 3.6850332639484105, "grad_norm": 2.2819857597351074, "learning_rate": 1.774663623342342e-05, "loss": 0.5001, "step": 22574 }, { "epoch": 3.685196522590915, "grad_norm": 1.87857186794281, "learning_rate": 1.77464355567948e-05, "loss": 0.4897, "step": 22575 }, { "epoch": 3.685359781233419, "grad_norm": 2.427971839904785, "learning_rate": 1.7746234872365523e-05, "loss": 0.5397, "step": 22576 }, { "epoch": 3.6855230398759233, "grad_norm": 2.119966983795166, "learning_rate": 1.774603418013579e-05, "loss": 0.5154, "step": 22577 }, { "epoch": 3.6856862985184278, "grad_norm": 2.1360254287719727, "learning_rate": 1.77458334801058e-05, "loss": 0.4962, "step": 22578 }, { "epoch": 3.685849557160932, "grad_norm": 2.040832042694092, "learning_rate": 1.7745632772275757e-05, "loss": 0.4861, "step": 22579 }, { "epoch": 3.6860128158034366, "grad_norm": 1.6377930641174316, "learning_rate": 1.7745432056645862e-05, "loss": 0.4424, "step": 22580 }, { "epoch": 3.686176074445941, "grad_norm": 2.075577974319458, "learning_rate": 1.7745231333216323e-05, "loss": 0.6306, "step": 22581 }, { "epoch": 3.6863393330884455, "grad_norm": 2.302476644515991, "learning_rate": 1.7745030601987338e-05, "loss": 0.4603, "step": 22582 }, { "epoch": 3.6865025917309495, "grad_norm": 1.5639688968658447, "learning_rate": 1.774482986295911e-05, "loss": 0.3968, "step": 22583 }, { "epoch": 3.686665850373454, "grad_norm": 2.0373947620391846, "learning_rate": 1.774462911613184e-05, "loss": 0.4622, "step": 22584 }, { "epoch": 3.6868291090159584, "grad_norm": 2.100177049636841, "learning_rate": 1.774442836150573e-05, "loss": 0.4961, "step": 22585 }, { "epoch": 3.686992367658463, "grad_norm": 1.770605444908142, "learning_rate": 1.774422759908098e-05, "loss": 0.3465, "step": 22586 }, { "epoch": 3.6871556263009673, "grad_norm": 2.1638691425323486, "learning_rate": 1.7744026828857798e-05, "loss": 0.4659, "step": 22587 }, { "epoch": 3.6873188849434717, "grad_norm": 1.6470953226089478, "learning_rate": 1.7743826050836382e-05, "loss": 0.4085, "step": 22588 }, { "epoch": 3.687482143585976, "grad_norm": 2.0982425212860107, "learning_rate": 1.7743625265016937e-05, "loss": 0.4897, "step": 22589 }, { "epoch": 3.6876454022284806, "grad_norm": 1.8215526342391968, "learning_rate": 1.7743424471399662e-05, "loss": 0.4507, "step": 22590 }, { "epoch": 3.687808660870985, "grad_norm": 2.2261295318603516, "learning_rate": 1.7743223669984758e-05, "loss": 0.4476, "step": 22591 }, { "epoch": 3.6879719195134895, "grad_norm": 2.1864657402038574, "learning_rate": 1.7743022860772434e-05, "loss": 0.7628, "step": 22592 }, { "epoch": 3.688135178155994, "grad_norm": 1.7968361377716064, "learning_rate": 1.7742822043762888e-05, "loss": 0.4528, "step": 22593 }, { "epoch": 3.688298436798498, "grad_norm": 2.06701922416687, "learning_rate": 1.774262121895632e-05, "loss": 0.4862, "step": 22594 }, { "epoch": 3.6884616954410023, "grad_norm": 2.0207715034484863, "learning_rate": 1.7742420386352933e-05, "loss": 0.502, "step": 22595 }, { "epoch": 3.6886249540835068, "grad_norm": 2.4895875453948975, "learning_rate": 1.7742219545952934e-05, "loss": 0.5312, "step": 22596 }, { "epoch": 3.688788212726011, "grad_norm": 1.7755155563354492, "learning_rate": 1.7742018697756522e-05, "loss": 0.4557, "step": 22597 }, { "epoch": 3.6889514713685156, "grad_norm": 1.941685438156128, "learning_rate": 1.7741817841763897e-05, "loss": 0.4654, "step": 22598 }, { "epoch": 3.68911473001102, "grad_norm": 1.6946049928665161, "learning_rate": 1.7741616977975266e-05, "loss": 0.4276, "step": 22599 }, { "epoch": 3.689277988653524, "grad_norm": 2.0887205600738525, "learning_rate": 1.7741416106390828e-05, "loss": 0.5046, "step": 22600 }, { "epoch": 3.6894412472960285, "grad_norm": 2.5414185523986816, "learning_rate": 1.7741215227010783e-05, "loss": 0.673, "step": 22601 }, { "epoch": 3.689604505938533, "grad_norm": 2.0199458599090576, "learning_rate": 1.7741014339835338e-05, "loss": 0.423, "step": 22602 }, { "epoch": 3.6897677645810374, "grad_norm": 1.9886295795440674, "learning_rate": 1.7740813444864696e-05, "loss": 0.4234, "step": 22603 }, { "epoch": 3.689931023223542, "grad_norm": 1.8028677701950073, "learning_rate": 1.7740612542099054e-05, "loss": 0.4323, "step": 22604 }, { "epoch": 3.6900942818660463, "grad_norm": 2.013157367706299, "learning_rate": 1.7740411631538618e-05, "loss": 0.4666, "step": 22605 }, { "epoch": 3.6902575405085507, "grad_norm": 2.001412868499756, "learning_rate": 1.7740210713183592e-05, "loss": 0.4749, "step": 22606 }, { "epoch": 3.690420799151055, "grad_norm": 1.8571668863296509, "learning_rate": 1.7740009787034172e-05, "loss": 0.4663, "step": 22607 }, { "epoch": 3.6905840577935596, "grad_norm": 1.830897331237793, "learning_rate": 1.773980885309057e-05, "loss": 0.4332, "step": 22608 }, { "epoch": 3.690747316436064, "grad_norm": 2.101346254348755, "learning_rate": 1.7739607911352975e-05, "loss": 0.4841, "step": 22609 }, { "epoch": 3.6909105750785685, "grad_norm": 2.3275065422058105, "learning_rate": 1.77394069618216e-05, "loss": 0.448, "step": 22610 }, { "epoch": 3.6910738337210725, "grad_norm": 2.2216296195983887, "learning_rate": 1.7739206004496645e-05, "loss": 0.482, "step": 22611 }, { "epoch": 3.691237092363577, "grad_norm": 2.2711174488067627, "learning_rate": 1.773900503937831e-05, "loss": 0.5387, "step": 22612 }, { "epoch": 3.6914003510060813, "grad_norm": 2.159762144088745, "learning_rate": 1.7738804066466802e-05, "loss": 0.4821, "step": 22613 }, { "epoch": 3.6915636096485858, "grad_norm": 1.9215916395187378, "learning_rate": 1.7738603085762318e-05, "loss": 0.5127, "step": 22614 }, { "epoch": 3.69172686829109, "grad_norm": 1.6510564088821411, "learning_rate": 1.7738402097265063e-05, "loss": 0.4794, "step": 22615 }, { "epoch": 3.6918901269335946, "grad_norm": 1.948182463645935, "learning_rate": 1.773820110097524e-05, "loss": 0.4496, "step": 22616 }, { "epoch": 3.692053385576099, "grad_norm": 1.8772952556610107, "learning_rate": 1.773800009689305e-05, "loss": 0.4904, "step": 22617 }, { "epoch": 3.692216644218603, "grad_norm": 2.0522282123565674, "learning_rate": 1.7737799085018698e-05, "loss": 0.5406, "step": 22618 }, { "epoch": 3.6923799028611075, "grad_norm": 2.0267105102539062, "learning_rate": 1.773759806535238e-05, "loss": 0.4597, "step": 22619 }, { "epoch": 3.692543161503612, "grad_norm": 1.8477516174316406, "learning_rate": 1.7737397037894305e-05, "loss": 0.4283, "step": 22620 }, { "epoch": 3.6927064201461164, "grad_norm": 1.7658097743988037, "learning_rate": 1.7737196002644673e-05, "loss": 0.4767, "step": 22621 }, { "epoch": 3.692869678788621, "grad_norm": 2.2970738410949707, "learning_rate": 1.7736994959603687e-05, "loss": 0.53, "step": 22622 }, { "epoch": 3.6930329374311253, "grad_norm": 1.883893370628357, "learning_rate": 1.773679390877155e-05, "loss": 0.3717, "step": 22623 }, { "epoch": 3.6931961960736297, "grad_norm": 2.14288330078125, "learning_rate": 1.7736592850148464e-05, "loss": 0.5507, "step": 22624 }, { "epoch": 3.693359454716134, "grad_norm": 1.9903130531311035, "learning_rate": 1.773639178373463e-05, "loss": 0.529, "step": 22625 }, { "epoch": 3.6935227133586386, "grad_norm": 2.0320825576782227, "learning_rate": 1.773619070953025e-05, "loss": 0.4839, "step": 22626 }, { "epoch": 3.693685972001143, "grad_norm": 1.794758677482605, "learning_rate": 1.7735989627535527e-05, "loss": 0.4401, "step": 22627 }, { "epoch": 3.6938492306436475, "grad_norm": 2.2599589824676514, "learning_rate": 1.773578853775067e-05, "loss": 0.524, "step": 22628 }, { "epoch": 3.6940124892861514, "grad_norm": 1.856946349143982, "learning_rate": 1.773558744017587e-05, "loss": 0.5072, "step": 22629 }, { "epoch": 3.694175747928656, "grad_norm": 2.3810856342315674, "learning_rate": 1.7735386334811343e-05, "loss": 0.5525, "step": 22630 }, { "epoch": 3.6943390065711603, "grad_norm": 1.4796767234802246, "learning_rate": 1.773518522165728e-05, "loss": 0.3474, "step": 22631 }, { "epoch": 3.6945022652136648, "grad_norm": 2.010093927383423, "learning_rate": 1.7734984100713883e-05, "loss": 0.4851, "step": 22632 }, { "epoch": 3.694665523856169, "grad_norm": 2.0061802864074707, "learning_rate": 1.7734782971981365e-05, "loss": 0.4303, "step": 22633 }, { "epoch": 3.6948287824986736, "grad_norm": 1.9469337463378906, "learning_rate": 1.773458183545992e-05, "loss": 0.5491, "step": 22634 }, { "epoch": 3.694992041141178, "grad_norm": 1.521757960319519, "learning_rate": 1.7734380691149753e-05, "loss": 0.3973, "step": 22635 }, { "epoch": 3.695155299783682, "grad_norm": 1.967618703842163, "learning_rate": 1.773417953905107e-05, "loss": 0.4849, "step": 22636 }, { "epoch": 3.6953185584261865, "grad_norm": 1.852314829826355, "learning_rate": 1.7733978379164066e-05, "loss": 0.4632, "step": 22637 }, { "epoch": 3.695481817068691, "grad_norm": 1.8488118648529053, "learning_rate": 1.7733777211488948e-05, "loss": 0.4799, "step": 22638 }, { "epoch": 3.6956450757111954, "grad_norm": 1.8985193967819214, "learning_rate": 1.773357603602592e-05, "loss": 0.5196, "step": 22639 }, { "epoch": 3.6958083343537, "grad_norm": 1.6303659677505493, "learning_rate": 1.7733374852775184e-05, "loss": 0.4193, "step": 22640 }, { "epoch": 3.6959715929962043, "grad_norm": 2.2391350269317627, "learning_rate": 1.773317366173694e-05, "loss": 0.4844, "step": 22641 }, { "epoch": 3.6961348516387087, "grad_norm": 1.5273574590682983, "learning_rate": 1.773297246291139e-05, "loss": 0.4386, "step": 22642 }, { "epoch": 3.696298110281213, "grad_norm": 1.8074885606765747, "learning_rate": 1.7732771256298744e-05, "loss": 0.4549, "step": 22643 }, { "epoch": 3.6964613689237176, "grad_norm": 1.811329960823059, "learning_rate": 1.7732570041899198e-05, "loss": 0.4867, "step": 22644 }, { "epoch": 3.696624627566222, "grad_norm": 1.9575586318969727, "learning_rate": 1.7732368819712956e-05, "loss": 0.4213, "step": 22645 }, { "epoch": 3.6967878862087264, "grad_norm": 1.8156191110610962, "learning_rate": 1.7732167589740217e-05, "loss": 0.4297, "step": 22646 }, { "epoch": 3.6969511448512304, "grad_norm": 1.925270915031433, "learning_rate": 1.773196635198119e-05, "loss": 0.4888, "step": 22647 }, { "epoch": 3.697114403493735, "grad_norm": 1.7146767377853394, "learning_rate": 1.7731765106436073e-05, "loss": 0.4685, "step": 22648 }, { "epoch": 3.6972776621362393, "grad_norm": 1.9461886882781982, "learning_rate": 1.7731563853105073e-05, "loss": 0.4988, "step": 22649 }, { "epoch": 3.6974409207787438, "grad_norm": 2.043883800506592, "learning_rate": 1.773136259198839e-05, "loss": 0.4777, "step": 22650 }, { "epoch": 3.697604179421248, "grad_norm": 2.363006353378296, "learning_rate": 1.7731161323086227e-05, "loss": 0.5872, "step": 22651 }, { "epoch": 3.6977674380637526, "grad_norm": 1.5712478160858154, "learning_rate": 1.7730960046398785e-05, "loss": 0.3609, "step": 22652 }, { "epoch": 3.6979306967062566, "grad_norm": 1.642033338546753, "learning_rate": 1.773075876192627e-05, "loss": 0.476, "step": 22653 }, { "epoch": 3.698093955348761, "grad_norm": 1.8878151178359985, "learning_rate": 1.773055746966888e-05, "loss": 0.4799, "step": 22654 }, { "epoch": 3.6982572139912655, "grad_norm": 1.8766729831695557, "learning_rate": 1.7730356169626827e-05, "loss": 0.4479, "step": 22655 }, { "epoch": 3.69842047263377, "grad_norm": 2.3328702449798584, "learning_rate": 1.7730154861800302e-05, "loss": 0.5183, "step": 22656 }, { "epoch": 3.6985837312762744, "grad_norm": 2.1131885051727295, "learning_rate": 1.7729953546189514e-05, "loss": 0.517, "step": 22657 }, { "epoch": 3.698746989918779, "grad_norm": 2.158393144607544, "learning_rate": 1.7729752222794666e-05, "loss": 0.5048, "step": 22658 }, { "epoch": 3.6989102485612833, "grad_norm": 1.8549941778182983, "learning_rate": 1.7729550891615958e-05, "loss": 0.4591, "step": 22659 }, { "epoch": 3.6990735072037877, "grad_norm": 1.9977953433990479, "learning_rate": 1.7729349552653596e-05, "loss": 0.4901, "step": 22660 }, { "epoch": 3.699236765846292, "grad_norm": 2.3179643154144287, "learning_rate": 1.7729148205907778e-05, "loss": 0.4662, "step": 22661 }, { "epoch": 3.6994000244887966, "grad_norm": 2.3281564712524414, "learning_rate": 1.7728946851378716e-05, "loss": 0.504, "step": 22662 }, { "epoch": 3.699563283131301, "grad_norm": 1.9731714725494385, "learning_rate": 1.77287454890666e-05, "loss": 0.5237, "step": 22663 }, { "epoch": 3.699726541773805, "grad_norm": 1.5387974977493286, "learning_rate": 1.772854411897164e-05, "loss": 0.3668, "step": 22664 }, { "epoch": 3.6998898004163094, "grad_norm": 2.0729525089263916, "learning_rate": 1.772834274109404e-05, "loss": 0.5371, "step": 22665 }, { "epoch": 3.700053059058814, "grad_norm": 1.7668805122375488, "learning_rate": 1.7728141355434004e-05, "loss": 0.4169, "step": 22666 }, { "epoch": 3.7002163177013183, "grad_norm": 1.9354339838027954, "learning_rate": 1.7727939961991727e-05, "loss": 0.4733, "step": 22667 }, { "epoch": 3.7003795763438228, "grad_norm": 1.6906572580337524, "learning_rate": 1.772773856076742e-05, "loss": 0.3791, "step": 22668 }, { "epoch": 3.700542834986327, "grad_norm": 1.6497420072555542, "learning_rate": 1.772753715176128e-05, "loss": 0.4116, "step": 22669 }, { "epoch": 3.7007060936288316, "grad_norm": 2.3594419956207275, "learning_rate": 1.7727335734973512e-05, "loss": 0.635, "step": 22670 }, { "epoch": 3.7008693522713356, "grad_norm": 1.6846617460250854, "learning_rate": 1.772713431040432e-05, "loss": 0.4379, "step": 22671 }, { "epoch": 3.70103261091384, "grad_norm": 1.671708106994629, "learning_rate": 1.7726932878053905e-05, "loss": 0.4898, "step": 22672 }, { "epoch": 3.7011958695563445, "grad_norm": 2.010401725769043, "learning_rate": 1.772673143792247e-05, "loss": 0.4403, "step": 22673 }, { "epoch": 3.701359128198849, "grad_norm": 1.9795032739639282, "learning_rate": 1.7726529990010218e-05, "loss": 0.4586, "step": 22674 }, { "epoch": 3.7015223868413534, "grad_norm": 1.9454597234725952, "learning_rate": 1.7726328534317355e-05, "loss": 0.4821, "step": 22675 }, { "epoch": 3.701685645483858, "grad_norm": 1.5449669361114502, "learning_rate": 1.772612707084408e-05, "loss": 0.3788, "step": 22676 }, { "epoch": 3.7018489041263622, "grad_norm": 1.430256724357605, "learning_rate": 1.7725925599590596e-05, "loss": 0.4245, "step": 22677 }, { "epoch": 3.7020121627688667, "grad_norm": 2.009056568145752, "learning_rate": 1.772572412055711e-05, "loss": 0.4926, "step": 22678 }, { "epoch": 3.702175421411371, "grad_norm": 1.9216012954711914, "learning_rate": 1.7725522633743822e-05, "loss": 0.4912, "step": 22679 }, { "epoch": 3.7023386800538756, "grad_norm": 1.8228888511657715, "learning_rate": 1.772532113915093e-05, "loss": 0.513, "step": 22680 }, { "epoch": 3.70250193869638, "grad_norm": 2.07669734954834, "learning_rate": 1.7725119636778644e-05, "loss": 0.5081, "step": 22681 }, { "epoch": 3.702665197338884, "grad_norm": 1.9309055805206299, "learning_rate": 1.772491812662717e-05, "loss": 0.5021, "step": 22682 }, { "epoch": 3.7028284559813884, "grad_norm": 2.0777156352996826, "learning_rate": 1.7724716608696697e-05, "loss": 0.5106, "step": 22683 }, { "epoch": 3.702991714623893, "grad_norm": 1.5553944110870361, "learning_rate": 1.772451508298744e-05, "loss": 0.3609, "step": 22684 }, { "epoch": 3.7031549732663973, "grad_norm": 1.8316936492919922, "learning_rate": 1.77243135494996e-05, "loss": 0.5044, "step": 22685 }, { "epoch": 3.7033182319089017, "grad_norm": 2.0514676570892334, "learning_rate": 1.7724112008233375e-05, "loss": 0.4847, "step": 22686 }, { "epoch": 3.703481490551406, "grad_norm": 2.196439266204834, "learning_rate": 1.7723910459188974e-05, "loss": 0.5142, "step": 22687 }, { "epoch": 3.70364474919391, "grad_norm": 1.8026695251464844, "learning_rate": 1.7723708902366597e-05, "loss": 0.4015, "step": 22688 }, { "epoch": 3.7038080078364146, "grad_norm": 2.194547176361084, "learning_rate": 1.7723507337766442e-05, "loss": 0.5653, "step": 22689 }, { "epoch": 3.703971266478919, "grad_norm": 1.7894165515899658, "learning_rate": 1.7723305765388726e-05, "loss": 0.4649, "step": 22690 }, { "epoch": 3.7041345251214235, "grad_norm": 1.6918275356292725, "learning_rate": 1.7723104185233638e-05, "loss": 0.4515, "step": 22691 }, { "epoch": 3.704297783763928, "grad_norm": 1.883306622505188, "learning_rate": 1.7722902597301385e-05, "loss": 0.4075, "step": 22692 }, { "epoch": 3.7044610424064324, "grad_norm": 1.6573477983474731, "learning_rate": 1.7722701001592173e-05, "loss": 0.4045, "step": 22693 }, { "epoch": 3.704624301048937, "grad_norm": 1.6838510036468506, "learning_rate": 1.7722499398106203e-05, "loss": 0.4219, "step": 22694 }, { "epoch": 3.7047875596914412, "grad_norm": 1.8255748748779297, "learning_rate": 1.7722297786843677e-05, "loss": 0.5058, "step": 22695 }, { "epoch": 3.7049508183339457, "grad_norm": 1.7398850917816162, "learning_rate": 1.77220961678048e-05, "loss": 0.4224, "step": 22696 }, { "epoch": 3.70511407697645, "grad_norm": 1.7798130512237549, "learning_rate": 1.7721894540989776e-05, "loss": 0.4288, "step": 22697 }, { "epoch": 3.7052773356189546, "grad_norm": 2.1852502822875977, "learning_rate": 1.7721692906398805e-05, "loss": 0.4953, "step": 22698 }, { "epoch": 3.7054405942614586, "grad_norm": 2.096400022506714, "learning_rate": 1.772149126403209e-05, "loss": 0.4746, "step": 22699 }, { "epoch": 3.705603852903963, "grad_norm": 2.0927107334136963, "learning_rate": 1.7721289613889835e-05, "loss": 0.5098, "step": 22700 }, { "epoch": 3.7057671115464674, "grad_norm": 1.5189509391784668, "learning_rate": 1.7721087955972247e-05, "loss": 0.3961, "step": 22701 }, { "epoch": 3.705930370188972, "grad_norm": 2.100198745727539, "learning_rate": 1.7720886290279523e-05, "loss": 0.6052, "step": 22702 }, { "epoch": 3.7060936288314763, "grad_norm": 1.6478521823883057, "learning_rate": 1.7720684616811866e-05, "loss": 0.3824, "step": 22703 }, { "epoch": 3.7062568874739807, "grad_norm": 2.2588982582092285, "learning_rate": 1.7720482935569488e-05, "loss": 0.5826, "step": 22704 }, { "epoch": 3.706420146116485, "grad_norm": 1.9390804767608643, "learning_rate": 1.7720281246552583e-05, "loss": 0.4794, "step": 22705 }, { "epoch": 3.706583404758989, "grad_norm": 2.1972291469573975, "learning_rate": 1.7720079549761357e-05, "loss": 0.4908, "step": 22706 }, { "epoch": 3.7067466634014936, "grad_norm": 2.338203191757202, "learning_rate": 1.771987784519601e-05, "loss": 0.471, "step": 22707 }, { "epoch": 3.706909922043998, "grad_norm": 1.8888522386550903, "learning_rate": 1.771967613285675e-05, "loss": 0.4485, "step": 22708 }, { "epoch": 3.7070731806865025, "grad_norm": 2.257686138153076, "learning_rate": 1.7719474412743777e-05, "loss": 0.4875, "step": 22709 }, { "epoch": 3.707236439329007, "grad_norm": 2.049229145050049, "learning_rate": 1.7719272684857297e-05, "loss": 0.4958, "step": 22710 }, { "epoch": 3.7073996979715114, "grad_norm": 1.9203836917877197, "learning_rate": 1.771907094919751e-05, "loss": 0.4439, "step": 22711 }, { "epoch": 3.707562956614016, "grad_norm": 2.5923569202423096, "learning_rate": 1.771886920576462e-05, "loss": 0.5692, "step": 22712 }, { "epoch": 3.7077262152565202, "grad_norm": 2.011335611343384, "learning_rate": 1.7718667454558835e-05, "loss": 0.4346, "step": 22713 }, { "epoch": 3.7078894738990247, "grad_norm": 2.0316061973571777, "learning_rate": 1.771846569558035e-05, "loss": 0.4536, "step": 22714 }, { "epoch": 3.708052732541529, "grad_norm": 1.878449559211731, "learning_rate": 1.7718263928829372e-05, "loss": 0.4353, "step": 22715 }, { "epoch": 3.7082159911840336, "grad_norm": 1.920636534690857, "learning_rate": 1.7718062154306108e-05, "loss": 0.5171, "step": 22716 }, { "epoch": 3.7083792498265375, "grad_norm": 1.9491304159164429, "learning_rate": 1.7717860372010754e-05, "loss": 0.4733, "step": 22717 }, { "epoch": 3.708542508469042, "grad_norm": 2.1780471801757812, "learning_rate": 1.771765858194352e-05, "loss": 0.4818, "step": 22718 }, { "epoch": 3.7087057671115464, "grad_norm": 2.0583291053771973, "learning_rate": 1.77174567841046e-05, "loss": 0.4878, "step": 22719 }, { "epoch": 3.708869025754051, "grad_norm": 1.7924336194992065, "learning_rate": 1.771725497849421e-05, "loss": 0.4772, "step": 22720 }, { "epoch": 3.7090322843965553, "grad_norm": 1.9000600576400757, "learning_rate": 1.771705316511254e-05, "loss": 0.4663, "step": 22721 }, { "epoch": 3.7091955430390597, "grad_norm": 2.13399076461792, "learning_rate": 1.7716851343959804e-05, "loss": 0.5418, "step": 22722 }, { "epoch": 3.709358801681564, "grad_norm": 1.9658676385879517, "learning_rate": 1.7716649515036198e-05, "loss": 0.492, "step": 22723 }, { "epoch": 3.709522060324068, "grad_norm": 2.0932908058166504, "learning_rate": 1.7716447678341932e-05, "loss": 0.568, "step": 22724 }, { "epoch": 3.7096853189665726, "grad_norm": 1.6947450637817383, "learning_rate": 1.7716245833877202e-05, "loss": 0.4206, "step": 22725 }, { "epoch": 3.709848577609077, "grad_norm": 1.8194360733032227, "learning_rate": 1.7716043981642212e-05, "loss": 0.4097, "step": 22726 }, { "epoch": 3.7100118362515815, "grad_norm": 1.9109816551208496, "learning_rate": 1.7715842121637173e-05, "loss": 0.5003, "step": 22727 }, { "epoch": 3.710175094894086, "grad_norm": 2.0135700702667236, "learning_rate": 1.771564025386228e-05, "loss": 0.4753, "step": 22728 }, { "epoch": 3.7103383535365904, "grad_norm": 1.9731465578079224, "learning_rate": 1.7715438378317742e-05, "loss": 0.4787, "step": 22729 }, { "epoch": 3.710501612179095, "grad_norm": 1.7874786853790283, "learning_rate": 1.7715236495003755e-05, "loss": 0.4452, "step": 22730 }, { "epoch": 3.7106648708215992, "grad_norm": 1.9445605278015137, "learning_rate": 1.771503460392053e-05, "loss": 0.4964, "step": 22731 }, { "epoch": 3.7108281294641037, "grad_norm": 1.8239285945892334, "learning_rate": 1.7714832705068266e-05, "loss": 0.4338, "step": 22732 }, { "epoch": 3.710991388106608, "grad_norm": 2.079432964324951, "learning_rate": 1.771463079844717e-05, "loss": 0.5362, "step": 22733 }, { "epoch": 3.7111546467491126, "grad_norm": 2.317857503890991, "learning_rate": 1.7714428884057443e-05, "loss": 0.624, "step": 22734 }, { "epoch": 3.7113179053916165, "grad_norm": 2.0485095977783203, "learning_rate": 1.7714226961899283e-05, "loss": 0.523, "step": 22735 }, { "epoch": 3.711481164034121, "grad_norm": 1.7843348979949951, "learning_rate": 1.7714025031972904e-05, "loss": 0.457, "step": 22736 }, { "epoch": 3.7116444226766254, "grad_norm": 2.416715621948242, "learning_rate": 1.77138230942785e-05, "loss": 0.554, "step": 22737 }, { "epoch": 3.71180768131913, "grad_norm": 1.966399073600769, "learning_rate": 1.771362114881628e-05, "loss": 0.4677, "step": 22738 }, { "epoch": 3.7119709399616343, "grad_norm": 1.9134879112243652, "learning_rate": 1.7713419195586444e-05, "loss": 0.5015, "step": 22739 }, { "epoch": 3.7121341986041387, "grad_norm": 1.673136830329895, "learning_rate": 1.77132172345892e-05, "loss": 0.4173, "step": 22740 }, { "epoch": 3.7122974572466427, "grad_norm": 2.21917724609375, "learning_rate": 1.7713015265824745e-05, "loss": 0.6096, "step": 22741 }, { "epoch": 3.712460715889147, "grad_norm": 1.671455979347229, "learning_rate": 1.7712813289293288e-05, "loss": 0.4165, "step": 22742 }, { "epoch": 3.7126239745316516, "grad_norm": 2.1379213333129883, "learning_rate": 1.771261130499503e-05, "loss": 0.5162, "step": 22743 }, { "epoch": 3.712787233174156, "grad_norm": 2.270228147506714, "learning_rate": 1.771240931293017e-05, "loss": 0.4412, "step": 22744 }, { "epoch": 3.7129504918166605, "grad_norm": 1.8042283058166504, "learning_rate": 1.7712207313098924e-05, "loss": 0.4725, "step": 22745 }, { "epoch": 3.713113750459165, "grad_norm": 2.0378379821777344, "learning_rate": 1.771200530550148e-05, "loss": 0.4724, "step": 22746 }, { "epoch": 3.7132770091016694, "grad_norm": 2.285029888153076, "learning_rate": 1.771180329013805e-05, "loss": 0.4322, "step": 22747 }, { "epoch": 3.713440267744174, "grad_norm": 1.887068510055542, "learning_rate": 1.771160126700884e-05, "loss": 0.454, "step": 22748 }, { "epoch": 3.7136035263866782, "grad_norm": 2.5014660358428955, "learning_rate": 1.7711399236114048e-05, "loss": 0.5755, "step": 22749 }, { "epoch": 3.7137667850291827, "grad_norm": 1.8802212476730347, "learning_rate": 1.771119719745388e-05, "loss": 0.4628, "step": 22750 }, { "epoch": 3.713930043671687, "grad_norm": 2.420943021774292, "learning_rate": 1.7710995151028533e-05, "loss": 0.5615, "step": 22751 }, { "epoch": 3.714093302314191, "grad_norm": 2.041447877883911, "learning_rate": 1.771079309683822e-05, "loss": 0.5199, "step": 22752 }, { "epoch": 3.7142565609566955, "grad_norm": 2.1768717765808105, "learning_rate": 1.7710591034883143e-05, "loss": 0.5106, "step": 22753 }, { "epoch": 3.7144198195992, "grad_norm": 1.7220244407653809, "learning_rate": 1.7710388965163496e-05, "loss": 0.4368, "step": 22754 }, { "epoch": 3.7145830782417044, "grad_norm": 1.8661932945251465, "learning_rate": 1.7710186887679494e-05, "loss": 0.4545, "step": 22755 }, { "epoch": 3.714746336884209, "grad_norm": 1.7738341093063354, "learning_rate": 1.7709984802431334e-05, "loss": 0.4958, "step": 22756 }, { "epoch": 3.7149095955267133, "grad_norm": 1.8474464416503906, "learning_rate": 1.7709782709419223e-05, "loss": 0.4276, "step": 22757 }, { "epoch": 3.7150728541692177, "grad_norm": 1.9281634092330933, "learning_rate": 1.7709580608643364e-05, "loss": 0.4576, "step": 22758 }, { "epoch": 3.7152361128117217, "grad_norm": 1.855699896812439, "learning_rate": 1.7709378500103956e-05, "loss": 0.4273, "step": 22759 }, { "epoch": 3.715399371454226, "grad_norm": 1.8748750686645508, "learning_rate": 1.770917638380121e-05, "loss": 0.4719, "step": 22760 }, { "epoch": 3.7155626300967306, "grad_norm": 2.5992395877838135, "learning_rate": 1.7708974259735323e-05, "loss": 0.4418, "step": 22761 }, { "epoch": 3.715725888739235, "grad_norm": 1.6984690427780151, "learning_rate": 1.77087721279065e-05, "loss": 0.4212, "step": 22762 }, { "epoch": 3.7158891473817395, "grad_norm": 2.657233715057373, "learning_rate": 1.7708569988314946e-05, "loss": 0.4912, "step": 22763 }, { "epoch": 3.716052406024244, "grad_norm": 2.342334747314453, "learning_rate": 1.7708367840960864e-05, "loss": 0.4806, "step": 22764 }, { "epoch": 3.7162156646667484, "grad_norm": 1.6562262773513794, "learning_rate": 1.770816568584446e-05, "loss": 0.4114, "step": 22765 }, { "epoch": 3.716378923309253, "grad_norm": 1.597888708114624, "learning_rate": 1.770796352296593e-05, "loss": 0.4424, "step": 22766 }, { "epoch": 3.7165421819517572, "grad_norm": 1.7062016725540161, "learning_rate": 1.770776135232549e-05, "loss": 0.4089, "step": 22767 }, { "epoch": 3.7167054405942617, "grad_norm": 2.050788164138794, "learning_rate": 1.7707559173923333e-05, "loss": 0.481, "step": 22768 }, { "epoch": 3.716868699236766, "grad_norm": 2.225301504135132, "learning_rate": 1.7707356987759664e-05, "loss": 0.5278, "step": 22769 }, { "epoch": 3.71703195787927, "grad_norm": 1.9455546140670776, "learning_rate": 1.770715479383469e-05, "loss": 0.5306, "step": 22770 }, { "epoch": 3.7171952165217745, "grad_norm": 2.1466095447540283, "learning_rate": 1.770695259214861e-05, "loss": 0.4876, "step": 22771 }, { "epoch": 3.717358475164279, "grad_norm": 2.3048605918884277, "learning_rate": 1.7706750382701637e-05, "loss": 0.5856, "step": 22772 }, { "epoch": 3.7175217338067834, "grad_norm": 1.9004733562469482, "learning_rate": 1.7706548165493967e-05, "loss": 0.4125, "step": 22773 }, { "epoch": 3.717684992449288, "grad_norm": 2.0992767810821533, "learning_rate": 1.77063459405258e-05, "loss": 0.528, "step": 22774 }, { "epoch": 3.7178482510917923, "grad_norm": 2.0954127311706543, "learning_rate": 1.770614370779735e-05, "loss": 0.4385, "step": 22775 }, { "epoch": 3.7180115097342963, "grad_norm": 1.6401571035385132, "learning_rate": 1.7705941467308814e-05, "loss": 0.3831, "step": 22776 }, { "epoch": 3.7181747683768007, "grad_norm": 1.9294123649597168, "learning_rate": 1.7705739219060395e-05, "loss": 0.4242, "step": 22777 }, { "epoch": 3.718338027019305, "grad_norm": 2.094940662384033, "learning_rate": 1.77055369630523e-05, "loss": 0.4986, "step": 22778 }, { "epoch": 3.7185012856618096, "grad_norm": 2.0022380352020264, "learning_rate": 1.7705334699284734e-05, "loss": 0.435, "step": 22779 }, { "epoch": 3.718664544304314, "grad_norm": 1.7763264179229736, "learning_rate": 1.7705132427757895e-05, "loss": 0.4332, "step": 22780 }, { "epoch": 3.7188278029468185, "grad_norm": 2.2970869541168213, "learning_rate": 1.770493014847199e-05, "loss": 0.5555, "step": 22781 }, { "epoch": 3.718991061589323, "grad_norm": 2.0729012489318848, "learning_rate": 1.770472786142722e-05, "loss": 0.5011, "step": 22782 }, { "epoch": 3.7191543202318273, "grad_norm": 1.6910687685012817, "learning_rate": 1.7704525566623796e-05, "loss": 0.382, "step": 22783 }, { "epoch": 3.719317578874332, "grad_norm": 1.664824366569519, "learning_rate": 1.7704323264061914e-05, "loss": 0.3869, "step": 22784 }, { "epoch": 3.7194808375168362, "grad_norm": 2.1328492164611816, "learning_rate": 1.770412095374178e-05, "loss": 0.4966, "step": 22785 }, { "epoch": 3.7196440961593407, "grad_norm": 1.7778578996658325, "learning_rate": 1.77039186356636e-05, "loss": 0.456, "step": 22786 }, { "epoch": 3.7198073548018447, "grad_norm": 1.8645472526550293, "learning_rate": 1.7703716309827574e-05, "loss": 0.4219, "step": 22787 }, { "epoch": 3.719970613444349, "grad_norm": 1.8806356191635132, "learning_rate": 1.770351397623391e-05, "loss": 0.4382, "step": 22788 }, { "epoch": 3.7201338720868535, "grad_norm": 1.797041416168213, "learning_rate": 1.7703311634882806e-05, "loss": 0.4445, "step": 22789 }, { "epoch": 3.720297130729358, "grad_norm": 3.21651029586792, "learning_rate": 1.7703109285774474e-05, "loss": 0.4848, "step": 22790 }, { "epoch": 3.7204603893718624, "grad_norm": 1.8864994049072266, "learning_rate": 1.770290692890911e-05, "loss": 0.4466, "step": 22791 }, { "epoch": 3.720623648014367, "grad_norm": 1.7413454055786133, "learning_rate": 1.770270456428692e-05, "loss": 0.4686, "step": 22792 }, { "epoch": 3.7207869066568713, "grad_norm": 2.0666263103485107, "learning_rate": 1.7702502191908112e-05, "loss": 0.4652, "step": 22793 }, { "epoch": 3.7209501652993753, "grad_norm": 2.0566697120666504, "learning_rate": 1.7702299811772882e-05, "loss": 0.478, "step": 22794 }, { "epoch": 3.7211134239418797, "grad_norm": 2.3022544384002686, "learning_rate": 1.7702097423881443e-05, "loss": 0.4455, "step": 22795 }, { "epoch": 3.721276682584384, "grad_norm": 2.0006942749023438, "learning_rate": 1.7701895028233987e-05, "loss": 0.4834, "step": 22796 }, { "epoch": 3.7214399412268886, "grad_norm": 2.424396276473999, "learning_rate": 1.770169262483073e-05, "loss": 0.4935, "step": 22797 }, { "epoch": 3.721603199869393, "grad_norm": 2.2348108291625977, "learning_rate": 1.7701490213671873e-05, "loss": 0.4966, "step": 22798 }, { "epoch": 3.7217664585118975, "grad_norm": 1.9795523881912231, "learning_rate": 1.770128779475761e-05, "loss": 0.4629, "step": 22799 }, { "epoch": 3.721929717154402, "grad_norm": 1.8332141637802124, "learning_rate": 1.7701085368088157e-05, "loss": 0.4019, "step": 22800 }, { "epoch": 3.7220929757969063, "grad_norm": 1.9693727493286133, "learning_rate": 1.7700882933663712e-05, "loss": 0.4525, "step": 22801 }, { "epoch": 3.722256234439411, "grad_norm": 2.1263041496276855, "learning_rate": 1.770068049148448e-05, "loss": 0.4436, "step": 22802 }, { "epoch": 3.722419493081915, "grad_norm": 1.914694905281067, "learning_rate": 1.7700478041550667e-05, "loss": 0.4326, "step": 22803 }, { "epoch": 3.7225827517244197, "grad_norm": 2.1791810989379883, "learning_rate": 1.7700275583862472e-05, "loss": 0.5423, "step": 22804 }, { "epoch": 3.7227460103669237, "grad_norm": 1.923614740371704, "learning_rate": 1.77000731184201e-05, "loss": 0.4263, "step": 22805 }, { "epoch": 3.722909269009428, "grad_norm": 2.047898530960083, "learning_rate": 1.769987064522376e-05, "loss": 0.4808, "step": 22806 }, { "epoch": 3.7230725276519325, "grad_norm": 2.170731544494629, "learning_rate": 1.769966816427365e-05, "loss": 0.5192, "step": 22807 }, { "epoch": 3.723235786294437, "grad_norm": 2.0533971786499023, "learning_rate": 1.769946567556998e-05, "loss": 0.4515, "step": 22808 }, { "epoch": 3.7233990449369414, "grad_norm": 1.9000937938690186, "learning_rate": 1.7699263179112947e-05, "loss": 0.4788, "step": 22809 }, { "epoch": 3.723562303579446, "grad_norm": 2.052455186843872, "learning_rate": 1.7699060674902756e-05, "loss": 0.4141, "step": 22810 }, { "epoch": 3.7237255622219503, "grad_norm": 1.8414310216903687, "learning_rate": 1.7698858162939618e-05, "loss": 0.4028, "step": 22811 }, { "epoch": 3.7238888208644543, "grad_norm": 1.927718997001648, "learning_rate": 1.769865564322373e-05, "loss": 0.4603, "step": 22812 }, { "epoch": 3.7240520795069587, "grad_norm": 1.8579682111740112, "learning_rate": 1.7698453115755294e-05, "loss": 0.4805, "step": 22813 }, { "epoch": 3.724215338149463, "grad_norm": 2.1244590282440186, "learning_rate": 1.769825058053452e-05, "loss": 0.4198, "step": 22814 }, { "epoch": 3.7243785967919676, "grad_norm": 1.7332881689071655, "learning_rate": 1.7698048037561612e-05, "loss": 0.3946, "step": 22815 }, { "epoch": 3.724541855434472, "grad_norm": 2.184218406677246, "learning_rate": 1.769784548683677e-05, "loss": 0.575, "step": 22816 }, { "epoch": 3.7247051140769765, "grad_norm": 1.7889639139175415, "learning_rate": 1.76976429283602e-05, "loss": 0.3882, "step": 22817 }, { "epoch": 3.724868372719481, "grad_norm": 1.8887100219726562, "learning_rate": 1.7697440362132105e-05, "loss": 0.4916, "step": 22818 }, { "epoch": 3.7250316313619853, "grad_norm": 2.4295334815979004, "learning_rate": 1.7697237788152687e-05, "loss": 0.5491, "step": 22819 }, { "epoch": 3.72519489000449, "grad_norm": 1.5273540019989014, "learning_rate": 1.769703520642216e-05, "loss": 0.4346, "step": 22820 }, { "epoch": 3.725358148646994, "grad_norm": 2.0985546112060547, "learning_rate": 1.7696832616940713e-05, "loss": 0.5258, "step": 22821 }, { "epoch": 3.7255214072894987, "grad_norm": 2.1624417304992676, "learning_rate": 1.769663001970856e-05, "loss": 0.5136, "step": 22822 }, { "epoch": 3.7256846659320026, "grad_norm": 1.80424165725708, "learning_rate": 1.7696427414725904e-05, "loss": 0.4395, "step": 22823 }, { "epoch": 3.725847924574507, "grad_norm": 1.9322447776794434, "learning_rate": 1.7696224801992947e-05, "loss": 0.5415, "step": 22824 }, { "epoch": 3.7260111832170115, "grad_norm": 1.7078863382339478, "learning_rate": 1.7696022181509892e-05, "loss": 0.447, "step": 22825 }, { "epoch": 3.726174441859516, "grad_norm": 2.136220932006836, "learning_rate": 1.7695819553276947e-05, "loss": 0.4521, "step": 22826 }, { "epoch": 3.7263377005020204, "grad_norm": 2.08801007270813, "learning_rate": 1.7695616917294314e-05, "loss": 0.5452, "step": 22827 }, { "epoch": 3.726500959144525, "grad_norm": 1.6181789636611938, "learning_rate": 1.7695414273562194e-05, "loss": 0.4262, "step": 22828 }, { "epoch": 3.726664217787029, "grad_norm": 1.8421907424926758, "learning_rate": 1.7695211622080796e-05, "loss": 0.5199, "step": 22829 }, { "epoch": 3.7268274764295333, "grad_norm": 1.8081350326538086, "learning_rate": 1.769500896285032e-05, "loss": 0.4654, "step": 22830 }, { "epoch": 3.7269907350720377, "grad_norm": 1.9364638328552246, "learning_rate": 1.7694806295870975e-05, "loss": 0.4904, "step": 22831 }, { "epoch": 3.727153993714542, "grad_norm": 1.7142043113708496, "learning_rate": 1.769460362114296e-05, "loss": 0.4519, "step": 22832 }, { "epoch": 3.7273172523570466, "grad_norm": 1.8020275831222534, "learning_rate": 1.7694400938666484e-05, "loss": 0.4466, "step": 22833 }, { "epoch": 3.727480510999551, "grad_norm": 1.695197582244873, "learning_rate": 1.7694198248441745e-05, "loss": 0.4547, "step": 22834 }, { "epoch": 3.7276437696420555, "grad_norm": 2.3513026237487793, "learning_rate": 1.7693995550468952e-05, "loss": 0.6308, "step": 22835 }, { "epoch": 3.72780702828456, "grad_norm": 1.7846788167953491, "learning_rate": 1.7693792844748308e-05, "loss": 0.3682, "step": 22836 }, { "epoch": 3.7279702869270643, "grad_norm": 2.136815309524536, "learning_rate": 1.7693590131280016e-05, "loss": 0.5737, "step": 22837 }, { "epoch": 3.7281335455695688, "grad_norm": 1.8786089420318604, "learning_rate": 1.769338741006428e-05, "loss": 0.4482, "step": 22838 }, { "epoch": 3.728296804212073, "grad_norm": 1.8632042407989502, "learning_rate": 1.7693184681101305e-05, "loss": 0.432, "step": 22839 }, { "epoch": 3.728460062854577, "grad_norm": 1.9512913227081299, "learning_rate": 1.7692981944391293e-05, "loss": 0.5029, "step": 22840 }, { "epoch": 3.7286233214970816, "grad_norm": 2.1620447635650635, "learning_rate": 1.7692779199934453e-05, "loss": 0.5201, "step": 22841 }, { "epoch": 3.728786580139586, "grad_norm": 1.8350428342819214, "learning_rate": 1.7692576447730986e-05, "loss": 0.453, "step": 22842 }, { "epoch": 3.7289498387820905, "grad_norm": 2.160250186920166, "learning_rate": 1.76923736877811e-05, "loss": 0.5167, "step": 22843 }, { "epoch": 3.729113097424595, "grad_norm": 1.8119174242019653, "learning_rate": 1.769217092008499e-05, "loss": 0.4956, "step": 22844 }, { "epoch": 3.7292763560670994, "grad_norm": 2.0382466316223145, "learning_rate": 1.7691968144642868e-05, "loss": 0.5246, "step": 22845 }, { "epoch": 3.729439614709604, "grad_norm": 1.9365278482437134, "learning_rate": 1.769176536145494e-05, "loss": 0.4575, "step": 22846 }, { "epoch": 3.729602873352108, "grad_norm": 1.951371431350708, "learning_rate": 1.76915625705214e-05, "loss": 0.4352, "step": 22847 }, { "epoch": 3.7297661319946123, "grad_norm": 1.5835156440734863, "learning_rate": 1.7691359771842462e-05, "loss": 0.4399, "step": 22848 }, { "epoch": 3.7299293906371167, "grad_norm": 1.8726884126663208, "learning_rate": 1.7691156965418325e-05, "loss": 0.4255, "step": 22849 }, { "epoch": 3.730092649279621, "grad_norm": 1.9562766551971436, "learning_rate": 1.7690954151249196e-05, "loss": 0.4819, "step": 22850 }, { "epoch": 3.7302559079221256, "grad_norm": 2.106698989868164, "learning_rate": 1.769075132933528e-05, "loss": 0.4315, "step": 22851 }, { "epoch": 3.73041916656463, "grad_norm": 1.7830629348754883, "learning_rate": 1.7690548499676778e-05, "loss": 0.4325, "step": 22852 }, { "epoch": 3.7305824252071345, "grad_norm": 2.118077278137207, "learning_rate": 1.7690345662273895e-05, "loss": 0.5196, "step": 22853 }, { "epoch": 3.730745683849639, "grad_norm": 2.109145402908325, "learning_rate": 1.7690142817126836e-05, "loss": 0.4816, "step": 22854 }, { "epoch": 3.7309089424921433, "grad_norm": 2.0305912494659424, "learning_rate": 1.7689939964235803e-05, "loss": 0.4226, "step": 22855 }, { "epoch": 3.7310722011346478, "grad_norm": 2.0506958961486816, "learning_rate": 1.7689737103601005e-05, "loss": 0.5225, "step": 22856 }, { "epoch": 3.731235459777152, "grad_norm": 2.029242992401123, "learning_rate": 1.768953423522265e-05, "loss": 0.4589, "step": 22857 }, { "epoch": 3.731398718419656, "grad_norm": 1.5539193153381348, "learning_rate": 1.7689331359100927e-05, "loss": 0.4335, "step": 22858 }, { "epoch": 3.7315619770621606, "grad_norm": 2.0961239337921143, "learning_rate": 1.768912847523605e-05, "loss": 0.4947, "step": 22859 }, { "epoch": 3.731725235704665, "grad_norm": 1.9994629621505737, "learning_rate": 1.7688925583628226e-05, "loss": 0.4932, "step": 22860 }, { "epoch": 3.7318884943471695, "grad_norm": 1.8786498308181763, "learning_rate": 1.7688722684277656e-05, "loss": 0.5094, "step": 22861 }, { "epoch": 3.732051752989674, "grad_norm": 1.7633424997329712, "learning_rate": 1.7688519777184544e-05, "loss": 0.4451, "step": 22862 }, { "epoch": 3.7322150116321784, "grad_norm": 1.9364328384399414, "learning_rate": 1.768831686234909e-05, "loss": 0.5108, "step": 22863 }, { "epoch": 3.732378270274683, "grad_norm": 1.8507157564163208, "learning_rate": 1.7688113939771508e-05, "loss": 0.4439, "step": 22864 }, { "epoch": 3.732541528917187, "grad_norm": 1.969465970993042, "learning_rate": 1.7687911009451997e-05, "loss": 0.4608, "step": 22865 }, { "epoch": 3.7327047875596913, "grad_norm": 1.723888874053955, "learning_rate": 1.768770807139076e-05, "loss": 0.3769, "step": 22866 }, { "epoch": 3.7328680462021957, "grad_norm": 2.2447011470794678, "learning_rate": 1.7687505125588006e-05, "loss": 0.5012, "step": 22867 }, { "epoch": 3.7330313048447, "grad_norm": 2.09597110748291, "learning_rate": 1.7687302172043933e-05, "loss": 0.5348, "step": 22868 }, { "epoch": 3.7331945634872046, "grad_norm": 1.9004772901535034, "learning_rate": 1.7687099210758747e-05, "loss": 0.4421, "step": 22869 }, { "epoch": 3.733357822129709, "grad_norm": 1.9539334774017334, "learning_rate": 1.768689624173266e-05, "loss": 0.4679, "step": 22870 }, { "epoch": 3.7335210807722135, "grad_norm": 2.1005115509033203, "learning_rate": 1.7686693264965867e-05, "loss": 0.4686, "step": 22871 }, { "epoch": 3.733684339414718, "grad_norm": 1.8601828813552856, "learning_rate": 1.7686490280458575e-05, "loss": 0.4256, "step": 22872 }, { "epoch": 3.7338475980572223, "grad_norm": 1.8900108337402344, "learning_rate": 1.768628728821099e-05, "loss": 0.3878, "step": 22873 }, { "epoch": 3.7340108566997268, "grad_norm": 2.0130884647369385, "learning_rate": 1.768608428822332e-05, "loss": 0.4505, "step": 22874 }, { "epoch": 3.734174115342231, "grad_norm": 2.0302186012268066, "learning_rate": 1.768588128049576e-05, "loss": 0.5073, "step": 22875 }, { "epoch": 3.734337373984735, "grad_norm": 2.0449979305267334, "learning_rate": 1.768567826502852e-05, "loss": 0.5379, "step": 22876 }, { "epoch": 3.7345006326272396, "grad_norm": 1.6892672777175903, "learning_rate": 1.7685475241821808e-05, "loss": 0.5049, "step": 22877 }, { "epoch": 3.734663891269744, "grad_norm": 1.9955275058746338, "learning_rate": 1.768527221087582e-05, "loss": 0.4429, "step": 22878 }, { "epoch": 3.7348271499122485, "grad_norm": 2.1159310340881348, "learning_rate": 1.7685069172190766e-05, "loss": 0.5437, "step": 22879 }, { "epoch": 3.734990408554753, "grad_norm": 1.9398870468139648, "learning_rate": 1.7684866125766853e-05, "loss": 0.4657, "step": 22880 }, { "epoch": 3.7351536671972574, "grad_norm": 1.933645486831665, "learning_rate": 1.7684663071604278e-05, "loss": 0.4488, "step": 22881 }, { "epoch": 3.7353169258397614, "grad_norm": 1.873692512512207, "learning_rate": 1.768446000970325e-05, "loss": 0.4796, "step": 22882 }, { "epoch": 3.735480184482266, "grad_norm": 1.7155770063400269, "learning_rate": 1.7684256940063974e-05, "loss": 0.4456, "step": 22883 }, { "epoch": 3.7356434431247703, "grad_norm": 1.951156735420227, "learning_rate": 1.7684053862686652e-05, "loss": 0.5407, "step": 22884 }, { "epoch": 3.7358067017672747, "grad_norm": 2.1994519233703613, "learning_rate": 1.768385077757149e-05, "loss": 0.4378, "step": 22885 }, { "epoch": 3.735969960409779, "grad_norm": 1.974714994430542, "learning_rate": 1.768364768471869e-05, "loss": 0.5072, "step": 22886 }, { "epoch": 3.7361332190522836, "grad_norm": 2.410322904586792, "learning_rate": 1.7683444584128463e-05, "loss": 0.5941, "step": 22887 }, { "epoch": 3.736296477694788, "grad_norm": 1.8280802965164185, "learning_rate": 1.7683241475801008e-05, "loss": 0.4082, "step": 22888 }, { "epoch": 3.7364597363372924, "grad_norm": 2.342724084854126, "learning_rate": 1.768303835973653e-05, "loss": 0.5863, "step": 22889 }, { "epoch": 3.736622994979797, "grad_norm": 1.7929778099060059, "learning_rate": 1.7682835235935236e-05, "loss": 0.459, "step": 22890 }, { "epoch": 3.7367862536223013, "grad_norm": 2.1280555725097656, "learning_rate": 1.7682632104397326e-05, "loss": 0.5799, "step": 22891 }, { "epoch": 3.7369495122648058, "grad_norm": 2.0665156841278076, "learning_rate": 1.768242896512301e-05, "loss": 0.4956, "step": 22892 }, { "epoch": 3.7371127709073098, "grad_norm": 1.6387856006622314, "learning_rate": 1.768222581811249e-05, "loss": 0.426, "step": 22893 }, { "epoch": 3.737276029549814, "grad_norm": 1.8131228685379028, "learning_rate": 1.768202266336597e-05, "loss": 0.4229, "step": 22894 }, { "epoch": 3.7374392881923186, "grad_norm": 1.843874216079712, "learning_rate": 1.7681819500883656e-05, "loss": 0.6006, "step": 22895 }, { "epoch": 3.737602546834823, "grad_norm": 2.203575372695923, "learning_rate": 1.768161633066575e-05, "loss": 0.5195, "step": 22896 }, { "epoch": 3.7377658054773275, "grad_norm": 2.1657025814056396, "learning_rate": 1.768141315271246e-05, "loss": 0.5436, "step": 22897 }, { "epoch": 3.737929064119832, "grad_norm": 1.64663827419281, "learning_rate": 1.7681209967023988e-05, "loss": 0.447, "step": 22898 }, { "epoch": 3.7380923227623364, "grad_norm": 1.8429611921310425, "learning_rate": 1.768100677360054e-05, "loss": 0.4035, "step": 22899 }, { "epoch": 3.7382555814048404, "grad_norm": 2.0315117835998535, "learning_rate": 1.768080357244232e-05, "loss": 0.5347, "step": 22900 }, { "epoch": 3.738418840047345, "grad_norm": 1.944069266319275, "learning_rate": 1.7680600363549534e-05, "loss": 0.4673, "step": 22901 }, { "epoch": 3.7385820986898493, "grad_norm": 1.7927143573760986, "learning_rate": 1.7680397146922384e-05, "loss": 0.4104, "step": 22902 }, { "epoch": 3.7387453573323537, "grad_norm": 1.6668224334716797, "learning_rate": 1.7680193922561077e-05, "loss": 0.3841, "step": 22903 }, { "epoch": 3.738908615974858, "grad_norm": 2.0732734203338623, "learning_rate": 1.7679990690465815e-05, "loss": 0.5378, "step": 22904 }, { "epoch": 3.7390718746173626, "grad_norm": 2.027411460876465, "learning_rate": 1.7679787450636806e-05, "loss": 0.4668, "step": 22905 }, { "epoch": 3.739235133259867, "grad_norm": 2.065343141555786, "learning_rate": 1.7679584203074257e-05, "loss": 0.5155, "step": 22906 }, { "epoch": 3.7393983919023714, "grad_norm": 1.8813180923461914, "learning_rate": 1.7679380947778362e-05, "loss": 0.4211, "step": 22907 }, { "epoch": 3.739561650544876, "grad_norm": 2.1671295166015625, "learning_rate": 1.7679177684749334e-05, "loss": 0.4861, "step": 22908 }, { "epoch": 3.7397249091873803, "grad_norm": 2.0184051990509033, "learning_rate": 1.7678974413987377e-05, "loss": 0.5024, "step": 22909 }, { "epoch": 3.7398881678298848, "grad_norm": 1.9485032558441162, "learning_rate": 1.7678771135492696e-05, "loss": 0.57, "step": 22910 }, { "epoch": 3.7400514264723888, "grad_norm": 2.5055594444274902, "learning_rate": 1.7678567849265492e-05, "loss": 0.5299, "step": 22911 }, { "epoch": 3.740214685114893, "grad_norm": 1.894830346107483, "learning_rate": 1.767836455530598e-05, "loss": 0.4665, "step": 22912 }, { "epoch": 3.7403779437573976, "grad_norm": 1.9159787893295288, "learning_rate": 1.7678161253614346e-05, "loss": 0.4248, "step": 22913 }, { "epoch": 3.740541202399902, "grad_norm": 2.1855523586273193, "learning_rate": 1.7677957944190813e-05, "loss": 0.5616, "step": 22914 }, { "epoch": 3.7407044610424065, "grad_norm": 1.7926640510559082, "learning_rate": 1.7677754627035573e-05, "loss": 0.4297, "step": 22915 }, { "epoch": 3.740867719684911, "grad_norm": 2.0009617805480957, "learning_rate": 1.767755130214884e-05, "loss": 0.4811, "step": 22916 }, { "epoch": 3.741030978327415, "grad_norm": 1.7172850370407104, "learning_rate": 1.7677347969530814e-05, "loss": 0.4719, "step": 22917 }, { "epoch": 3.7411942369699194, "grad_norm": 2.171311616897583, "learning_rate": 1.76771446291817e-05, "loss": 0.4619, "step": 22918 }, { "epoch": 3.741357495612424, "grad_norm": 2.034564733505249, "learning_rate": 1.7676941281101703e-05, "loss": 0.5113, "step": 22919 }, { "epoch": 3.7415207542549282, "grad_norm": 2.035005569458008, "learning_rate": 1.7676737925291027e-05, "loss": 0.5146, "step": 22920 }, { "epoch": 3.7416840128974327, "grad_norm": 1.5793389081954956, "learning_rate": 1.7676534561749885e-05, "loss": 0.4117, "step": 22921 }, { "epoch": 3.741847271539937, "grad_norm": 1.9760212898254395, "learning_rate": 1.767633119047847e-05, "loss": 0.483, "step": 22922 }, { "epoch": 3.7420105301824416, "grad_norm": 1.9455811977386475, "learning_rate": 1.767612781147699e-05, "loss": 0.4418, "step": 22923 }, { "epoch": 3.742173788824946, "grad_norm": 2.031787157058716, "learning_rate": 1.767592442474565e-05, "loss": 0.503, "step": 22924 }, { "epoch": 3.7423370474674504, "grad_norm": 1.6996452808380127, "learning_rate": 1.767572103028466e-05, "loss": 0.4283, "step": 22925 }, { "epoch": 3.742500306109955, "grad_norm": 1.5792951583862305, "learning_rate": 1.767551762809422e-05, "loss": 0.4284, "step": 22926 }, { "epoch": 3.7426635647524593, "grad_norm": 2.0644593238830566, "learning_rate": 1.7675314218174537e-05, "loss": 0.4779, "step": 22927 }, { "epoch": 3.7428268233949633, "grad_norm": 1.957065224647522, "learning_rate": 1.767511080052581e-05, "loss": 0.4047, "step": 22928 }, { "epoch": 3.7429900820374677, "grad_norm": 1.8435401916503906, "learning_rate": 1.767490737514825e-05, "loss": 0.4716, "step": 22929 }, { "epoch": 3.743153340679972, "grad_norm": 1.9670809507369995, "learning_rate": 1.767470394204206e-05, "loss": 0.4867, "step": 22930 }, { "epoch": 3.7433165993224766, "grad_norm": 1.9158556461334229, "learning_rate": 1.767450050120745e-05, "loss": 0.5296, "step": 22931 }, { "epoch": 3.743479857964981, "grad_norm": 1.7503457069396973, "learning_rate": 1.7674297052644615e-05, "loss": 0.4596, "step": 22932 }, { "epoch": 3.7436431166074855, "grad_norm": 1.9555561542510986, "learning_rate": 1.767409359635377e-05, "loss": 0.3949, "step": 22933 }, { "epoch": 3.74380637524999, "grad_norm": 2.063776969909668, "learning_rate": 1.767389013233511e-05, "loss": 0.4214, "step": 22934 }, { "epoch": 3.743969633892494, "grad_norm": 2.133620500564575, "learning_rate": 1.7673686660588847e-05, "loss": 0.4912, "step": 22935 }, { "epoch": 3.7441328925349984, "grad_norm": 2.0229508876800537, "learning_rate": 1.7673483181115183e-05, "loss": 0.5157, "step": 22936 }, { "epoch": 3.744296151177503, "grad_norm": 2.162501573562622, "learning_rate": 1.7673279693914322e-05, "loss": 0.5356, "step": 22937 }, { "epoch": 3.7444594098200072, "grad_norm": 1.9676910638809204, "learning_rate": 1.7673076198986474e-05, "loss": 0.4971, "step": 22938 }, { "epoch": 3.7446226684625117, "grad_norm": 1.943848967552185, "learning_rate": 1.7672872696331835e-05, "loss": 0.5262, "step": 22939 }, { "epoch": 3.744785927105016, "grad_norm": 1.7848647832870483, "learning_rate": 1.767266918595062e-05, "loss": 0.4264, "step": 22940 }, { "epoch": 3.7449491857475206, "grad_norm": 1.9555575847625732, "learning_rate": 1.7672465667843027e-05, "loss": 0.5711, "step": 22941 }, { "epoch": 3.745112444390025, "grad_norm": 2.0696961879730225, "learning_rate": 1.7672262142009264e-05, "loss": 0.4904, "step": 22942 }, { "epoch": 3.7452757030325294, "grad_norm": 2.004783868789673, "learning_rate": 1.7672058608449534e-05, "loss": 0.4921, "step": 22943 }, { "epoch": 3.745438961675034, "grad_norm": 2.1718807220458984, "learning_rate": 1.7671855067164044e-05, "loss": 0.4822, "step": 22944 }, { "epoch": 3.7456022203175383, "grad_norm": 1.5948879718780518, "learning_rate": 1.7671651518153e-05, "loss": 0.4128, "step": 22945 }, { "epoch": 3.7457654789600423, "grad_norm": 1.9243508577346802, "learning_rate": 1.76714479614166e-05, "loss": 0.4512, "step": 22946 }, { "epoch": 3.7459287376025467, "grad_norm": 1.9413063526153564, "learning_rate": 1.7671244396955057e-05, "loss": 0.526, "step": 22947 }, { "epoch": 3.746091996245051, "grad_norm": 2.3264923095703125, "learning_rate": 1.767104082476857e-05, "loss": 0.4234, "step": 22948 }, { "epoch": 3.7462552548875556, "grad_norm": 1.5960274934768677, "learning_rate": 1.7670837244857352e-05, "loss": 0.3867, "step": 22949 }, { "epoch": 3.74641851353006, "grad_norm": 1.4678224325180054, "learning_rate": 1.7670633657221602e-05, "loss": 0.3959, "step": 22950 }, { "epoch": 3.7465817721725645, "grad_norm": 1.7932617664337158, "learning_rate": 1.7670430061861525e-05, "loss": 0.4515, "step": 22951 }, { "epoch": 3.746745030815069, "grad_norm": 2.1297247409820557, "learning_rate": 1.7670226458777324e-05, "loss": 0.55, "step": 22952 }, { "epoch": 3.746908289457573, "grad_norm": 1.8476495742797852, "learning_rate": 1.767002284796921e-05, "loss": 0.4464, "step": 22953 }, { "epoch": 3.7470715481000774, "grad_norm": 2.1117706298828125, "learning_rate": 1.7669819229437383e-05, "loss": 0.4933, "step": 22954 }, { "epoch": 3.747234806742582, "grad_norm": 2.1615593433380127, "learning_rate": 1.766961560318205e-05, "loss": 0.4687, "step": 22955 }, { "epoch": 3.7473980653850862, "grad_norm": 2.1025640964508057, "learning_rate": 1.7669411969203417e-05, "loss": 0.5349, "step": 22956 }, { "epoch": 3.7475613240275907, "grad_norm": 1.7057194709777832, "learning_rate": 1.766920832750169e-05, "loss": 0.3887, "step": 22957 }, { "epoch": 3.747724582670095, "grad_norm": 1.6856813430786133, "learning_rate": 1.766900467807707e-05, "loss": 0.3328, "step": 22958 }, { "epoch": 3.7478878413125996, "grad_norm": 1.9372121095657349, "learning_rate": 1.7668801020929766e-05, "loss": 0.4374, "step": 22959 }, { "epoch": 3.748051099955104, "grad_norm": 1.6950373649597168, "learning_rate": 1.7668597356059977e-05, "loss": 0.4527, "step": 22960 }, { "epoch": 3.7482143585976084, "grad_norm": 1.9496501684188843, "learning_rate": 1.766839368346792e-05, "loss": 0.5186, "step": 22961 }, { "epoch": 3.748377617240113, "grad_norm": 2.173851490020752, "learning_rate": 1.7668190003153786e-05, "loss": 0.4556, "step": 22962 }, { "epoch": 3.7485408758826173, "grad_norm": 1.9086487293243408, "learning_rate": 1.766798631511779e-05, "loss": 0.3927, "step": 22963 }, { "epoch": 3.7487041345251213, "grad_norm": 2.1022486686706543, "learning_rate": 1.7667782619360135e-05, "loss": 0.5182, "step": 22964 }, { "epoch": 3.7488673931676257, "grad_norm": 2.178920269012451, "learning_rate": 1.7667578915881022e-05, "loss": 0.5514, "step": 22965 }, { "epoch": 3.74903065181013, "grad_norm": 2.051586627960205, "learning_rate": 1.7667375204680658e-05, "loss": 0.4846, "step": 22966 }, { "epoch": 3.7491939104526346, "grad_norm": 2.3657562732696533, "learning_rate": 1.7667171485759253e-05, "loss": 0.5198, "step": 22967 }, { "epoch": 3.749357169095139, "grad_norm": 2.0947704315185547, "learning_rate": 1.7666967759117007e-05, "loss": 0.5299, "step": 22968 }, { "epoch": 3.7495204277376435, "grad_norm": 1.8193367719650269, "learning_rate": 1.7666764024754128e-05, "loss": 0.4563, "step": 22969 }, { "epoch": 3.7496836863801475, "grad_norm": 2.2598154544830322, "learning_rate": 1.7666560282670814e-05, "loss": 0.5368, "step": 22970 }, { "epoch": 3.749846945022652, "grad_norm": 1.9097322225570679, "learning_rate": 1.7666356532867282e-05, "loss": 0.4248, "step": 22971 }, { "epoch": 3.7500102036651564, "grad_norm": 2.3599371910095215, "learning_rate": 1.7666152775343727e-05, "loss": 0.5079, "step": 22972 }, { "epoch": 3.750173462307661, "grad_norm": 2.014474868774414, "learning_rate": 1.766594901010036e-05, "loss": 0.5055, "step": 22973 }, { "epoch": 3.7503367209501652, "grad_norm": 1.8077329397201538, "learning_rate": 1.7665745237137386e-05, "loss": 0.5158, "step": 22974 }, { "epoch": 3.7504999795926697, "grad_norm": 1.9669840335845947, "learning_rate": 1.7665541456455004e-05, "loss": 0.5328, "step": 22975 }, { "epoch": 3.750663238235174, "grad_norm": 2.105314254760742, "learning_rate": 1.7665337668053428e-05, "loss": 0.5351, "step": 22976 }, { "epoch": 3.7508264968776786, "grad_norm": 1.923561930656433, "learning_rate": 1.7665133871932858e-05, "loss": 0.5067, "step": 22977 }, { "epoch": 3.750989755520183, "grad_norm": 2.709108352661133, "learning_rate": 1.76649300680935e-05, "loss": 0.4969, "step": 22978 }, { "epoch": 3.7511530141626874, "grad_norm": 2.0788612365722656, "learning_rate": 1.766472625653556e-05, "loss": 0.476, "step": 22979 }, { "epoch": 3.751316272805192, "grad_norm": 1.7332451343536377, "learning_rate": 1.766452243725924e-05, "loss": 0.4102, "step": 22980 }, { "epoch": 3.751479531447696, "grad_norm": 2.068096399307251, "learning_rate": 1.766431861026475e-05, "loss": 0.4654, "step": 22981 }, { "epoch": 3.7516427900902003, "grad_norm": 1.6915090084075928, "learning_rate": 1.7664114775552295e-05, "loss": 0.4149, "step": 22982 }, { "epoch": 3.7518060487327047, "grad_norm": 1.9414936304092407, "learning_rate": 1.7663910933122075e-05, "loss": 0.4345, "step": 22983 }, { "epoch": 3.751969307375209, "grad_norm": 2.179302215576172, "learning_rate": 1.76637070829743e-05, "loss": 0.5868, "step": 22984 }, { "epoch": 3.7521325660177136, "grad_norm": 2.175558567047119, "learning_rate": 1.7663503225109176e-05, "loss": 0.5467, "step": 22985 }, { "epoch": 3.752295824660218, "grad_norm": 1.7516133785247803, "learning_rate": 1.7663299359526903e-05, "loss": 0.4422, "step": 22986 }, { "epoch": 3.7524590833027225, "grad_norm": 2.5464015007019043, "learning_rate": 1.7663095486227693e-05, "loss": 0.5746, "step": 22987 }, { "epoch": 3.7526223419452265, "grad_norm": 1.7735238075256348, "learning_rate": 1.7662891605211745e-05, "loss": 0.4328, "step": 22988 }, { "epoch": 3.752785600587731, "grad_norm": 1.6567797660827637, "learning_rate": 1.766268771647927e-05, "loss": 0.4568, "step": 22989 }, { "epoch": 3.7529488592302354, "grad_norm": 1.9365907907485962, "learning_rate": 1.7662483820030468e-05, "loss": 0.4596, "step": 22990 }, { "epoch": 3.75311211787274, "grad_norm": 1.6345409154891968, "learning_rate": 1.7662279915865546e-05, "loss": 0.4146, "step": 22991 }, { "epoch": 3.7532753765152442, "grad_norm": 2.5289628505706787, "learning_rate": 1.7662076003984713e-05, "loss": 0.5804, "step": 22992 }, { "epoch": 3.7534386351577487, "grad_norm": 1.9465453624725342, "learning_rate": 1.766187208438817e-05, "loss": 0.5215, "step": 22993 }, { "epoch": 3.753601893800253, "grad_norm": 1.9351845979690552, "learning_rate": 1.7661668157076124e-05, "loss": 0.5121, "step": 22994 }, { "epoch": 3.7537651524427575, "grad_norm": 1.6497153043746948, "learning_rate": 1.7661464222048777e-05, "loss": 0.4508, "step": 22995 }, { "epoch": 3.753928411085262, "grad_norm": 1.9541358947753906, "learning_rate": 1.7661260279306343e-05, "loss": 0.3989, "step": 22996 }, { "epoch": 3.7540916697277664, "grad_norm": 1.887636661529541, "learning_rate": 1.766105632884902e-05, "loss": 0.5102, "step": 22997 }, { "epoch": 3.754254928370271, "grad_norm": 2.033665418624878, "learning_rate": 1.7660852370677014e-05, "loss": 0.5942, "step": 22998 }, { "epoch": 3.754418187012775, "grad_norm": 1.8582154512405396, "learning_rate": 1.7660648404790533e-05, "loss": 0.5053, "step": 22999 }, { "epoch": 3.7545814456552793, "grad_norm": 2.255333662033081, "learning_rate": 1.766044443118978e-05, "loss": 0.7726, "step": 23000 }, { "epoch": 3.7547447042977837, "grad_norm": 2.3654942512512207, "learning_rate": 1.7660240449874963e-05, "loss": 0.5554, "step": 23001 }, { "epoch": 3.754907962940288, "grad_norm": 1.9736225605010986, "learning_rate": 1.7660036460846285e-05, "loss": 0.467, "step": 23002 }, { "epoch": 3.7550712215827926, "grad_norm": 2.0581250190734863, "learning_rate": 1.7659832464103955e-05, "loss": 0.4315, "step": 23003 }, { "epoch": 3.755234480225297, "grad_norm": 1.7204099893569946, "learning_rate": 1.765962845964817e-05, "loss": 0.4012, "step": 23004 }, { "epoch": 3.755397738867801, "grad_norm": 1.6438791751861572, "learning_rate": 1.765942444747915e-05, "loss": 0.4222, "step": 23005 }, { "epoch": 3.7555609975103055, "grad_norm": 1.8704490661621094, "learning_rate": 1.7659220427597082e-05, "loss": 0.5131, "step": 23006 }, { "epoch": 3.75572425615281, "grad_norm": 1.8886982202529907, "learning_rate": 1.7659016400002187e-05, "loss": 0.4702, "step": 23007 }, { "epoch": 3.7558875147953144, "grad_norm": 1.8003798723220825, "learning_rate": 1.765881236469466e-05, "loss": 0.4628, "step": 23008 }, { "epoch": 3.756050773437819, "grad_norm": 1.6949044466018677, "learning_rate": 1.7658608321674717e-05, "loss": 0.4111, "step": 23009 }, { "epoch": 3.7562140320803232, "grad_norm": 2.12235426902771, "learning_rate": 1.7658404270942554e-05, "loss": 0.5319, "step": 23010 }, { "epoch": 3.7563772907228277, "grad_norm": 2.0816540718078613, "learning_rate": 1.7658200212498377e-05, "loss": 0.5419, "step": 23011 }, { "epoch": 3.756540549365332, "grad_norm": 1.8868941068649292, "learning_rate": 1.76579961463424e-05, "loss": 0.3852, "step": 23012 }, { "epoch": 3.7567038080078365, "grad_norm": 1.9055956602096558, "learning_rate": 1.7657792072474816e-05, "loss": 0.4359, "step": 23013 }, { "epoch": 3.756867066650341, "grad_norm": 2.5588831901550293, "learning_rate": 1.7657587990895845e-05, "loss": 0.5742, "step": 23014 }, { "epoch": 3.7570303252928454, "grad_norm": 2.609138011932373, "learning_rate": 1.7657383901605682e-05, "loss": 0.583, "step": 23015 }, { "epoch": 3.7571935839353494, "grad_norm": 2.6295864582061768, "learning_rate": 1.7657179804604535e-05, "loss": 0.5656, "step": 23016 }, { "epoch": 3.757356842577854, "grad_norm": 1.8823881149291992, "learning_rate": 1.765697569989261e-05, "loss": 0.463, "step": 23017 }, { "epoch": 3.7575201012203583, "grad_norm": 2.1124207973480225, "learning_rate": 1.7656771587470113e-05, "loss": 0.5249, "step": 23018 }, { "epoch": 3.7576833598628627, "grad_norm": 1.8459844589233398, "learning_rate": 1.7656567467337248e-05, "loss": 0.4795, "step": 23019 }, { "epoch": 3.757846618505367, "grad_norm": 2.026918888092041, "learning_rate": 1.7656363339494222e-05, "loss": 0.4212, "step": 23020 }, { "epoch": 3.7580098771478716, "grad_norm": 1.8679625988006592, "learning_rate": 1.765615920394124e-05, "loss": 0.4615, "step": 23021 }, { "epoch": 3.758173135790376, "grad_norm": 2.0778074264526367, "learning_rate": 1.7655955060678508e-05, "loss": 0.5032, "step": 23022 }, { "epoch": 3.75833639443288, "grad_norm": 1.7817494869232178, "learning_rate": 1.7655750909706234e-05, "loss": 0.509, "step": 23023 }, { "epoch": 3.7584996530753845, "grad_norm": 2.0154364109039307, "learning_rate": 1.7655546751024615e-05, "loss": 0.5059, "step": 23024 }, { "epoch": 3.758662911717889, "grad_norm": 2.343985080718994, "learning_rate": 1.7655342584633865e-05, "loss": 0.4996, "step": 23025 }, { "epoch": 3.7588261703603933, "grad_norm": 1.923092246055603, "learning_rate": 1.765513841053419e-05, "loss": 0.4053, "step": 23026 }, { "epoch": 3.758989429002898, "grad_norm": 1.925881028175354, "learning_rate": 1.765493422872579e-05, "loss": 0.5042, "step": 23027 }, { "epoch": 3.7591526876454022, "grad_norm": 2.0156002044677734, "learning_rate": 1.7654730039208875e-05, "loss": 0.4717, "step": 23028 }, { "epoch": 3.7593159462879067, "grad_norm": 2.0685815811157227, "learning_rate": 1.7654525841983645e-05, "loss": 0.5089, "step": 23029 }, { "epoch": 3.759479204930411, "grad_norm": 2.111213207244873, "learning_rate": 1.7654321637050314e-05, "loss": 0.4725, "step": 23030 }, { "epoch": 3.7596424635729155, "grad_norm": 1.813098669052124, "learning_rate": 1.7654117424409078e-05, "loss": 0.468, "step": 23031 }, { "epoch": 3.75980572221542, "grad_norm": 1.853060245513916, "learning_rate": 1.7653913204060153e-05, "loss": 0.4476, "step": 23032 }, { "epoch": 3.7599689808579244, "grad_norm": 2.0746426582336426, "learning_rate": 1.7653708976003738e-05, "loss": 0.4682, "step": 23033 }, { "epoch": 3.7601322395004284, "grad_norm": 2.1526689529418945, "learning_rate": 1.765350474024004e-05, "loss": 0.5183, "step": 23034 }, { "epoch": 3.760295498142933, "grad_norm": 1.9676839113235474, "learning_rate": 1.7653300496769264e-05, "loss": 0.5885, "step": 23035 }, { "epoch": 3.7604587567854373, "grad_norm": 2.129847764968872, "learning_rate": 1.7653096245591617e-05, "loss": 0.5168, "step": 23036 }, { "epoch": 3.7606220154279417, "grad_norm": 1.6065518856048584, "learning_rate": 1.7652891986707305e-05, "loss": 0.3917, "step": 23037 }, { "epoch": 3.760785274070446, "grad_norm": 2.123716115951538, "learning_rate": 1.7652687720116528e-05, "loss": 0.5459, "step": 23038 }, { "epoch": 3.7609485327129506, "grad_norm": 1.60530686378479, "learning_rate": 1.76524834458195e-05, "loss": 0.3722, "step": 23039 }, { "epoch": 3.761111791355455, "grad_norm": 1.9155726432800293, "learning_rate": 1.7652279163816422e-05, "loss": 0.46, "step": 23040 }, { "epoch": 3.761275049997959, "grad_norm": 2.030151128768921, "learning_rate": 1.7652074874107504e-05, "loss": 0.574, "step": 23041 }, { "epoch": 3.7614383086404635, "grad_norm": 1.8910614252090454, "learning_rate": 1.765187057669295e-05, "loss": 0.4668, "step": 23042 }, { "epoch": 3.761601567282968, "grad_norm": 1.5854413509368896, "learning_rate": 1.765166627157296e-05, "loss": 0.3576, "step": 23043 }, { "epoch": 3.7617648259254723, "grad_norm": 2.425570487976074, "learning_rate": 1.7651461958747745e-05, "loss": 0.5594, "step": 23044 }, { "epoch": 3.761928084567977, "grad_norm": 2.2216649055480957, "learning_rate": 1.765125763821751e-05, "loss": 0.4958, "step": 23045 }, { "epoch": 3.762091343210481, "grad_norm": 1.916029930114746, "learning_rate": 1.765105330998246e-05, "loss": 0.372, "step": 23046 }, { "epoch": 3.7622546018529857, "grad_norm": 2.031527042388916, "learning_rate": 1.7650848974042802e-05, "loss": 0.4842, "step": 23047 }, { "epoch": 3.76241786049549, "grad_norm": 2.0351288318634033, "learning_rate": 1.765064463039874e-05, "loss": 0.4682, "step": 23048 }, { "epoch": 3.7625811191379945, "grad_norm": 1.760978102684021, "learning_rate": 1.7650440279050484e-05, "loss": 0.3907, "step": 23049 }, { "epoch": 3.762744377780499, "grad_norm": 1.8539109230041504, "learning_rate": 1.7650235919998234e-05, "loss": 0.5013, "step": 23050 }, { "epoch": 3.7629076364230034, "grad_norm": 1.840098261833191, "learning_rate": 1.76500315532422e-05, "loss": 0.3867, "step": 23051 }, { "epoch": 3.7630708950655074, "grad_norm": 2.1360018253326416, "learning_rate": 1.7649827178782582e-05, "loss": 0.4907, "step": 23052 }, { "epoch": 3.763234153708012, "grad_norm": 1.8714721202850342, "learning_rate": 1.7649622796619597e-05, "loss": 0.4589, "step": 23053 }, { "epoch": 3.7633974123505163, "grad_norm": 1.917851209640503, "learning_rate": 1.7649418406753438e-05, "loss": 0.4724, "step": 23054 }, { "epoch": 3.7635606709930207, "grad_norm": 1.9018161296844482, "learning_rate": 1.7649214009184323e-05, "loss": 0.4467, "step": 23055 }, { "epoch": 3.763723929635525, "grad_norm": 1.883683204650879, "learning_rate": 1.7649009603912443e-05, "loss": 0.4746, "step": 23056 }, { "epoch": 3.7638871882780296, "grad_norm": 1.661158800125122, "learning_rate": 1.7648805190938018e-05, "loss": 0.4448, "step": 23057 }, { "epoch": 3.7640504469205336, "grad_norm": 2.5011982917785645, "learning_rate": 1.7648600770261248e-05, "loss": 0.4685, "step": 23058 }, { "epoch": 3.764213705563038, "grad_norm": 1.508231282234192, "learning_rate": 1.764839634188234e-05, "loss": 0.3801, "step": 23059 }, { "epoch": 3.7643769642055425, "grad_norm": 2.324399948120117, "learning_rate": 1.7648191905801493e-05, "loss": 0.4561, "step": 23060 }, { "epoch": 3.764540222848047, "grad_norm": 1.86802339553833, "learning_rate": 1.7647987462018924e-05, "loss": 0.4197, "step": 23061 }, { "epoch": 3.7647034814905513, "grad_norm": 1.732555866241455, "learning_rate": 1.7647783010534834e-05, "loss": 0.3925, "step": 23062 }, { "epoch": 3.764866740133056, "grad_norm": 2.1063528060913086, "learning_rate": 1.7647578551349426e-05, "loss": 0.5284, "step": 23063 }, { "epoch": 3.76502999877556, "grad_norm": 2.4516429901123047, "learning_rate": 1.764737408446291e-05, "loss": 0.6139, "step": 23064 }, { "epoch": 3.7651932574180647, "grad_norm": 1.8367981910705566, "learning_rate": 1.764716960987549e-05, "loss": 0.4337, "step": 23065 }, { "epoch": 3.765356516060569, "grad_norm": 2.1013505458831787, "learning_rate": 1.7646965127587373e-05, "loss": 0.5639, "step": 23066 }, { "epoch": 3.7655197747030735, "grad_norm": 1.4257094860076904, "learning_rate": 1.764676063759876e-05, "loss": 0.3796, "step": 23067 }, { "epoch": 3.765683033345578, "grad_norm": 1.7353764772415161, "learning_rate": 1.7646556139909864e-05, "loss": 0.4261, "step": 23068 }, { "epoch": 3.765846291988082, "grad_norm": 1.765100121498108, "learning_rate": 1.7646351634520892e-05, "loss": 0.4673, "step": 23069 }, { "epoch": 3.7660095506305864, "grad_norm": 2.3388094902038574, "learning_rate": 1.764614712143204e-05, "loss": 0.4993, "step": 23070 }, { "epoch": 3.766172809273091, "grad_norm": 1.8814085721969604, "learning_rate": 1.7645942600643523e-05, "loss": 0.4376, "step": 23071 }, { "epoch": 3.7663360679155953, "grad_norm": 1.902081847190857, "learning_rate": 1.7645738072155542e-05, "loss": 0.4536, "step": 23072 }, { "epoch": 3.7664993265580997, "grad_norm": 2.06219220161438, "learning_rate": 1.7645533535968306e-05, "loss": 0.4529, "step": 23073 }, { "epoch": 3.766662585200604, "grad_norm": 2.4254660606384277, "learning_rate": 1.764532899208202e-05, "loss": 0.5059, "step": 23074 }, { "epoch": 3.7668258438431086, "grad_norm": 1.8951719999313354, "learning_rate": 1.764512444049689e-05, "loss": 0.4179, "step": 23075 }, { "epoch": 3.7669891024856126, "grad_norm": 2.520554542541504, "learning_rate": 1.764491988121312e-05, "loss": 0.5606, "step": 23076 }, { "epoch": 3.767152361128117, "grad_norm": 2.028925657272339, "learning_rate": 1.764471531423092e-05, "loss": 0.4286, "step": 23077 }, { "epoch": 3.7673156197706215, "grad_norm": 2.3706583976745605, "learning_rate": 1.764451073955049e-05, "loss": 0.4595, "step": 23078 }, { "epoch": 3.767478878413126, "grad_norm": 1.6433601379394531, "learning_rate": 1.764430615717204e-05, "loss": 0.3757, "step": 23079 }, { "epoch": 3.7676421370556303, "grad_norm": 2.227065324783325, "learning_rate": 1.764410156709578e-05, "loss": 0.5196, "step": 23080 }, { "epoch": 3.7678053956981348, "grad_norm": 1.9733439683914185, "learning_rate": 1.764389696932191e-05, "loss": 0.4531, "step": 23081 }, { "epoch": 3.767968654340639, "grad_norm": 2.321364402770996, "learning_rate": 1.7643692363850636e-05, "loss": 0.5727, "step": 23082 }, { "epoch": 3.7681319129831436, "grad_norm": 1.8841944932937622, "learning_rate": 1.764348775068217e-05, "loss": 0.4483, "step": 23083 }, { "epoch": 3.768295171625648, "grad_norm": 2.33467173576355, "learning_rate": 1.7643283129816708e-05, "loss": 0.5179, "step": 23084 }, { "epoch": 3.7684584302681525, "grad_norm": 2.234750986099243, "learning_rate": 1.7643078501254468e-05, "loss": 0.5065, "step": 23085 }, { "epoch": 3.768621688910657, "grad_norm": 2.5131845474243164, "learning_rate": 1.7642873864995646e-05, "loss": 0.579, "step": 23086 }, { "epoch": 3.768784947553161, "grad_norm": 1.871673583984375, "learning_rate": 1.764266922104045e-05, "loss": 0.4621, "step": 23087 }, { "epoch": 3.7689482061956654, "grad_norm": 1.986013412475586, "learning_rate": 1.764246456938909e-05, "loss": 0.4593, "step": 23088 }, { "epoch": 3.76911146483817, "grad_norm": 2.2046456336975098, "learning_rate": 1.7642259910041773e-05, "loss": 0.5434, "step": 23089 }, { "epoch": 3.7692747234806743, "grad_norm": 1.9305312633514404, "learning_rate": 1.76420552429987e-05, "loss": 0.4592, "step": 23090 }, { "epoch": 3.7694379821231787, "grad_norm": 1.7664475440979004, "learning_rate": 1.764185056826008e-05, "loss": 0.4716, "step": 23091 }, { "epoch": 3.769601240765683, "grad_norm": 2.1075079441070557, "learning_rate": 1.7641645885826118e-05, "loss": 0.5131, "step": 23092 }, { "epoch": 3.7697644994081876, "grad_norm": 1.9660072326660156, "learning_rate": 1.764144119569702e-05, "loss": 0.4721, "step": 23093 }, { "epoch": 3.7699277580506916, "grad_norm": 2.0931239128112793, "learning_rate": 1.7641236497872995e-05, "loss": 0.5263, "step": 23094 }, { "epoch": 3.770091016693196, "grad_norm": 2.2069900035858154, "learning_rate": 1.7641031792354247e-05, "loss": 0.5225, "step": 23095 }, { "epoch": 3.7702542753357005, "grad_norm": 1.7628740072250366, "learning_rate": 1.7640827079140978e-05, "loss": 0.4286, "step": 23096 }, { "epoch": 3.770417533978205, "grad_norm": 1.946232557296753, "learning_rate": 1.76406223582334e-05, "loss": 0.4378, "step": 23097 }, { "epoch": 3.7705807926207093, "grad_norm": 1.766902208328247, "learning_rate": 1.764041762963172e-05, "loss": 0.4717, "step": 23098 }, { "epoch": 3.7707440512632138, "grad_norm": 1.8260222673416138, "learning_rate": 1.7640212893336143e-05, "loss": 0.4234, "step": 23099 }, { "epoch": 3.770907309905718, "grad_norm": 1.3892287015914917, "learning_rate": 1.7640008149346866e-05, "loss": 0.3486, "step": 23100 }, { "epoch": 3.7710705685482226, "grad_norm": 2.159170389175415, "learning_rate": 1.7639803397664108e-05, "loss": 0.5182, "step": 23101 }, { "epoch": 3.771233827190727, "grad_norm": 1.9814512729644775, "learning_rate": 1.763959863828807e-05, "loss": 0.4569, "step": 23102 }, { "epoch": 3.7713970858332315, "grad_norm": 1.8173273801803589, "learning_rate": 1.7639393871218957e-05, "loss": 0.5046, "step": 23103 }, { "epoch": 3.771560344475736, "grad_norm": 2.0256717205047607, "learning_rate": 1.763918909645698e-05, "loss": 0.4438, "step": 23104 }, { "epoch": 3.77172360311824, "grad_norm": 1.7308858633041382, "learning_rate": 1.7638984314002335e-05, "loss": 0.4158, "step": 23105 }, { "epoch": 3.7718868617607444, "grad_norm": 2.0139307975769043, "learning_rate": 1.7638779523855238e-05, "loss": 0.4627, "step": 23106 }, { "epoch": 3.772050120403249, "grad_norm": 1.962444543838501, "learning_rate": 1.7638574726015893e-05, "loss": 0.5279, "step": 23107 }, { "epoch": 3.7722133790457533, "grad_norm": 1.9312843084335327, "learning_rate": 1.7638369920484503e-05, "loss": 0.4522, "step": 23108 }, { "epoch": 3.7723766376882577, "grad_norm": 2.1055119037628174, "learning_rate": 1.763816510726128e-05, "loss": 0.4959, "step": 23109 }, { "epoch": 3.772539896330762, "grad_norm": 2.0531375408172607, "learning_rate": 1.7637960286346423e-05, "loss": 0.5793, "step": 23110 }, { "epoch": 3.772703154973266, "grad_norm": 1.67898428440094, "learning_rate": 1.7637755457740145e-05, "loss": 0.41, "step": 23111 }, { "epoch": 3.7728664136157706, "grad_norm": 1.7889186143875122, "learning_rate": 1.7637550621442647e-05, "loss": 0.4997, "step": 23112 }, { "epoch": 3.773029672258275, "grad_norm": 1.746947169303894, "learning_rate": 1.763734577745414e-05, "loss": 0.4095, "step": 23113 }, { "epoch": 3.7731929309007795, "grad_norm": 1.751565933227539, "learning_rate": 1.7637140925774824e-05, "loss": 0.4296, "step": 23114 }, { "epoch": 3.773356189543284, "grad_norm": 2.234745740890503, "learning_rate": 1.7636936066404912e-05, "loss": 0.5486, "step": 23115 }, { "epoch": 3.7735194481857883, "grad_norm": 2.341308355331421, "learning_rate": 1.7636731199344605e-05, "loss": 0.5477, "step": 23116 }, { "epoch": 3.7736827068282928, "grad_norm": 2.135298490524292, "learning_rate": 1.7636526324594116e-05, "loss": 0.4876, "step": 23117 }, { "epoch": 3.773845965470797, "grad_norm": 2.1215498447418213, "learning_rate": 1.7636321442153645e-05, "loss": 0.5222, "step": 23118 }, { "epoch": 3.7740092241133016, "grad_norm": 1.8242312669754028, "learning_rate": 1.7636116552023402e-05, "loss": 0.456, "step": 23119 }, { "epoch": 3.774172482755806, "grad_norm": 1.824111819267273, "learning_rate": 1.7635911654203588e-05, "loss": 0.4566, "step": 23120 }, { "epoch": 3.7743357413983105, "grad_norm": 1.8999475240707397, "learning_rate": 1.7635706748694415e-05, "loss": 0.4565, "step": 23121 }, { "epoch": 3.7744990000408145, "grad_norm": 2.345292091369629, "learning_rate": 1.7635501835496084e-05, "loss": 0.6159, "step": 23122 }, { "epoch": 3.774662258683319, "grad_norm": 2.014683723449707, "learning_rate": 1.7635296914608808e-05, "loss": 0.4246, "step": 23123 }, { "epoch": 3.7748255173258234, "grad_norm": 2.0633339881896973, "learning_rate": 1.763509198603279e-05, "loss": 0.5113, "step": 23124 }, { "epoch": 3.774988775968328, "grad_norm": 1.8681708574295044, "learning_rate": 1.7634887049768238e-05, "loss": 0.4992, "step": 23125 }, { "epoch": 3.7751520346108323, "grad_norm": 2.0618724822998047, "learning_rate": 1.7634682105815354e-05, "loss": 0.596, "step": 23126 }, { "epoch": 3.7753152932533367, "grad_norm": 2.2131268978118896, "learning_rate": 1.7634477154174348e-05, "loss": 0.5833, "step": 23127 }, { "epoch": 3.775478551895841, "grad_norm": 1.8606270551681519, "learning_rate": 1.7634272194845423e-05, "loss": 0.4997, "step": 23128 }, { "epoch": 3.775641810538345, "grad_norm": 1.8724889755249023, "learning_rate": 1.7634067227828794e-05, "loss": 0.4807, "step": 23129 }, { "epoch": 3.7758050691808496, "grad_norm": 2.103142738342285, "learning_rate": 1.7633862253124656e-05, "loss": 0.5511, "step": 23130 }, { "epoch": 3.775968327823354, "grad_norm": 2.164583206176758, "learning_rate": 1.7633657270733224e-05, "loss": 0.5598, "step": 23131 }, { "epoch": 3.7761315864658584, "grad_norm": 1.983074426651001, "learning_rate": 1.76334522806547e-05, "loss": 0.5364, "step": 23132 }, { "epoch": 3.776294845108363, "grad_norm": 1.9760761260986328, "learning_rate": 1.7633247282889287e-05, "loss": 0.4783, "step": 23133 }, { "epoch": 3.7764581037508673, "grad_norm": 2.1759073734283447, "learning_rate": 1.7633042277437202e-05, "loss": 0.442, "step": 23134 }, { "epoch": 3.7766213623933718, "grad_norm": 1.9528584480285645, "learning_rate": 1.7632837264298643e-05, "loss": 0.4743, "step": 23135 }, { "epoch": 3.776784621035876, "grad_norm": 1.863526701927185, "learning_rate": 1.763263224347382e-05, "loss": 0.4749, "step": 23136 }, { "epoch": 3.7769478796783806, "grad_norm": 1.9090877771377563, "learning_rate": 1.763242721496294e-05, "loss": 0.5309, "step": 23137 }, { "epoch": 3.777111138320885, "grad_norm": 1.8497302532196045, "learning_rate": 1.7632222178766202e-05, "loss": 0.4844, "step": 23138 }, { "epoch": 3.7772743969633895, "grad_norm": 1.952409267425537, "learning_rate": 1.7632017134883827e-05, "loss": 0.4911, "step": 23139 }, { "epoch": 3.7774376556058935, "grad_norm": 1.7797707319259644, "learning_rate": 1.7631812083316003e-05, "loss": 0.4401, "step": 23140 }, { "epoch": 3.777600914248398, "grad_norm": 1.781078577041626, "learning_rate": 1.7631607024062954e-05, "loss": 0.4819, "step": 23141 }, { "epoch": 3.7777641728909024, "grad_norm": 2.1003568172454834, "learning_rate": 1.7631401957124877e-05, "loss": 0.47, "step": 23142 }, { "epoch": 3.777927431533407, "grad_norm": 2.2372689247131348, "learning_rate": 1.7631196882501975e-05, "loss": 0.4883, "step": 23143 }, { "epoch": 3.7780906901759113, "grad_norm": 1.980507493019104, "learning_rate": 1.7630991800194465e-05, "loss": 0.4835, "step": 23144 }, { "epoch": 3.7782539488184157, "grad_norm": 2.214053153991699, "learning_rate": 1.7630786710202546e-05, "loss": 0.5055, "step": 23145 }, { "epoch": 3.7784172074609197, "grad_norm": 1.7017847299575806, "learning_rate": 1.763058161252643e-05, "loss": 0.407, "step": 23146 }, { "epoch": 3.778580466103424, "grad_norm": 1.8757593631744385, "learning_rate": 1.7630376507166318e-05, "loss": 0.5611, "step": 23147 }, { "epoch": 3.7787437247459286, "grad_norm": 1.6240699291229248, "learning_rate": 1.7630171394122418e-05, "loss": 0.4545, "step": 23148 }, { "epoch": 3.778906983388433, "grad_norm": 1.8678607940673828, "learning_rate": 1.762996627339494e-05, "loss": 0.4379, "step": 23149 }, { "epoch": 3.7790702420309374, "grad_norm": 2.354095220565796, "learning_rate": 1.7629761144984087e-05, "loss": 0.5744, "step": 23150 }, { "epoch": 3.779233500673442, "grad_norm": 1.7697378396987915, "learning_rate": 1.7629556008890066e-05, "loss": 0.448, "step": 23151 }, { "epoch": 3.7793967593159463, "grad_norm": 1.5937011241912842, "learning_rate": 1.7629350865113083e-05, "loss": 0.4381, "step": 23152 }, { "epoch": 3.7795600179584508, "grad_norm": 2.264492988586426, "learning_rate": 1.7629145713653346e-05, "loss": 0.5484, "step": 23153 }, { "epoch": 3.779723276600955, "grad_norm": 1.8976449966430664, "learning_rate": 1.7628940554511064e-05, "loss": 0.4752, "step": 23154 }, { "epoch": 3.7798865352434596, "grad_norm": 1.9514788389205933, "learning_rate": 1.762873538768644e-05, "loss": 0.412, "step": 23155 }, { "epoch": 3.780049793885964, "grad_norm": 2.078070640563965, "learning_rate": 1.762853021317968e-05, "loss": 0.4928, "step": 23156 }, { "epoch": 3.780213052528468, "grad_norm": 2.179572582244873, "learning_rate": 1.7628325030990994e-05, "loss": 0.5807, "step": 23157 }, { "epoch": 3.7803763111709725, "grad_norm": 1.7503794431686401, "learning_rate": 1.7628119841120583e-05, "loss": 0.4299, "step": 23158 }, { "epoch": 3.780539569813477, "grad_norm": 1.9791467189788818, "learning_rate": 1.762791464356866e-05, "loss": 0.5072, "step": 23159 }, { "epoch": 3.7807028284559814, "grad_norm": 1.7282094955444336, "learning_rate": 1.762770943833543e-05, "loss": 0.4332, "step": 23160 }, { "epoch": 3.780866087098486, "grad_norm": 2.1570489406585693, "learning_rate": 1.7627504225421096e-05, "loss": 0.4808, "step": 23161 }, { "epoch": 3.7810293457409903, "grad_norm": 1.6922965049743652, "learning_rate": 1.762729900482587e-05, "loss": 0.4225, "step": 23162 }, { "epoch": 3.7811926043834947, "grad_norm": 2.13996958732605, "learning_rate": 1.7627093776549954e-05, "loss": 0.5535, "step": 23163 }, { "epoch": 3.7813558630259987, "grad_norm": 2.0478434562683105, "learning_rate": 1.7626888540593557e-05, "loss": 0.5231, "step": 23164 }, { "epoch": 3.781519121668503, "grad_norm": 1.9815922975540161, "learning_rate": 1.7626683296956885e-05, "loss": 0.5173, "step": 23165 }, { "epoch": 3.7816823803110076, "grad_norm": 1.903427243232727, "learning_rate": 1.7626478045640146e-05, "loss": 0.4408, "step": 23166 }, { "epoch": 3.781845638953512, "grad_norm": 1.7167600393295288, "learning_rate": 1.7626272786643545e-05, "loss": 0.4586, "step": 23167 }, { "epoch": 3.7820088975960164, "grad_norm": 1.891153335571289, "learning_rate": 1.762606751996729e-05, "loss": 0.4375, "step": 23168 }, { "epoch": 3.782172156238521, "grad_norm": 2.1676816940307617, "learning_rate": 1.7625862245611585e-05, "loss": 0.5066, "step": 23169 }, { "epoch": 3.7823354148810253, "grad_norm": 2.254204750061035, "learning_rate": 1.762565696357664e-05, "loss": 0.5685, "step": 23170 }, { "epoch": 3.7824986735235298, "grad_norm": 2.1580991744995117, "learning_rate": 1.762545167386266e-05, "loss": 0.5186, "step": 23171 }, { "epoch": 3.782661932166034, "grad_norm": 2.2281925678253174, "learning_rate": 1.7625246376469852e-05, "loss": 0.5168, "step": 23172 }, { "epoch": 3.7828251908085386, "grad_norm": 2.194695234298706, "learning_rate": 1.7625041071398423e-05, "loss": 0.4804, "step": 23173 }, { "epoch": 3.782988449451043, "grad_norm": 1.9719855785369873, "learning_rate": 1.7624835758648583e-05, "loss": 0.4776, "step": 23174 }, { "epoch": 3.783151708093547, "grad_norm": 1.8949910402297974, "learning_rate": 1.762463043822053e-05, "loss": 0.4856, "step": 23175 }, { "epoch": 3.7833149667360515, "grad_norm": 2.253570795059204, "learning_rate": 1.762442511011448e-05, "loss": 0.4877, "step": 23176 }, { "epoch": 3.783478225378556, "grad_norm": 2.1772735118865967, "learning_rate": 1.7624219774330634e-05, "loss": 0.4766, "step": 23177 }, { "epoch": 3.7836414840210604, "grad_norm": 1.9598804712295532, "learning_rate": 1.76240144308692e-05, "loss": 0.5039, "step": 23178 }, { "epoch": 3.783804742663565, "grad_norm": 1.6073509454727173, "learning_rate": 1.7623809079730388e-05, "loss": 0.4543, "step": 23179 }, { "epoch": 3.7839680013060693, "grad_norm": 2.332367181777954, "learning_rate": 1.76236037209144e-05, "loss": 0.537, "step": 23180 }, { "epoch": 3.7841312599485737, "grad_norm": 1.9252774715423584, "learning_rate": 1.7623398354421447e-05, "loss": 0.4864, "step": 23181 }, { "epoch": 3.7842945185910777, "grad_norm": 1.9332387447357178, "learning_rate": 1.7623192980251737e-05, "loss": 0.4834, "step": 23182 }, { "epoch": 3.784457777233582, "grad_norm": 1.8366119861602783, "learning_rate": 1.762298759840547e-05, "loss": 0.5064, "step": 23183 }, { "epoch": 3.7846210358760866, "grad_norm": 2.3226418495178223, "learning_rate": 1.7622782208882852e-05, "loss": 0.5305, "step": 23184 }, { "epoch": 3.784784294518591, "grad_norm": 2.1441264152526855, "learning_rate": 1.76225768116841e-05, "loss": 0.567, "step": 23185 }, { "epoch": 3.7849475531610954, "grad_norm": 1.898820161819458, "learning_rate": 1.7622371406809416e-05, "loss": 0.4702, "step": 23186 }, { "epoch": 3.7851108118036, "grad_norm": 2.0110533237457275, "learning_rate": 1.7622165994259003e-05, "loss": 0.52, "step": 23187 }, { "epoch": 3.7852740704461043, "grad_norm": 2.000692129135132, "learning_rate": 1.7621960574033072e-05, "loss": 0.5055, "step": 23188 }, { "epoch": 3.7854373290886087, "grad_norm": 1.9572569131851196, "learning_rate": 1.762175514613183e-05, "loss": 0.4101, "step": 23189 }, { "epoch": 3.785600587731113, "grad_norm": 1.6161555051803589, "learning_rate": 1.762154971055548e-05, "loss": 0.4692, "step": 23190 }, { "epoch": 3.7857638463736176, "grad_norm": 1.9226027727127075, "learning_rate": 1.7621344267304232e-05, "loss": 0.4726, "step": 23191 }, { "epoch": 3.785927105016122, "grad_norm": 1.8084712028503418, "learning_rate": 1.7621138816378293e-05, "loss": 0.4484, "step": 23192 }, { "epoch": 3.786090363658626, "grad_norm": 1.5638514757156372, "learning_rate": 1.762093335777787e-05, "loss": 0.4145, "step": 23193 }, { "epoch": 3.7862536223011305, "grad_norm": 2.1185710430145264, "learning_rate": 1.7620727891503166e-05, "loss": 0.5298, "step": 23194 }, { "epoch": 3.786416880943635, "grad_norm": 1.8376624584197998, "learning_rate": 1.7620522417554397e-05, "loss": 0.4569, "step": 23195 }, { "epoch": 3.7865801395861394, "grad_norm": 1.8944997787475586, "learning_rate": 1.7620316935931757e-05, "loss": 0.4158, "step": 23196 }, { "epoch": 3.786743398228644, "grad_norm": 1.7095611095428467, "learning_rate": 1.7620111446635463e-05, "loss": 0.4515, "step": 23197 }, { "epoch": 3.7869066568711482, "grad_norm": 1.9853966236114502, "learning_rate": 1.761990594966572e-05, "loss": 0.4848, "step": 23198 }, { "epoch": 3.7870699155136522, "grad_norm": 2.3281607627868652, "learning_rate": 1.7619700445022735e-05, "loss": 0.5227, "step": 23199 }, { "epoch": 3.7872331741561567, "grad_norm": 1.784834384918213, "learning_rate": 1.761949493270671e-05, "loss": 0.4553, "step": 23200 }, { "epoch": 3.787396432798661, "grad_norm": 1.738325834274292, "learning_rate": 1.7619289412717856e-05, "loss": 0.4818, "step": 23201 }, { "epoch": 3.7875596914411656, "grad_norm": 1.7561290264129639, "learning_rate": 1.761908388505638e-05, "loss": 0.431, "step": 23202 }, { "epoch": 3.78772295008367, "grad_norm": 1.9559969902038574, "learning_rate": 1.761887834972249e-05, "loss": 0.4899, "step": 23203 }, { "epoch": 3.7878862087261744, "grad_norm": 2.1665639877319336, "learning_rate": 1.761867280671639e-05, "loss": 0.5373, "step": 23204 }, { "epoch": 3.788049467368679, "grad_norm": 2.304389715194702, "learning_rate": 1.761846725603829e-05, "loss": 0.5482, "step": 23205 }, { "epoch": 3.7882127260111833, "grad_norm": 2.0280635356903076, "learning_rate": 1.7618261697688394e-05, "loss": 0.4705, "step": 23206 }, { "epoch": 3.7883759846536877, "grad_norm": 1.9993411302566528, "learning_rate": 1.761805613166691e-05, "loss": 0.4472, "step": 23207 }, { "epoch": 3.788539243296192, "grad_norm": 1.8128271102905273, "learning_rate": 1.761785055797405e-05, "loss": 0.4521, "step": 23208 }, { "epoch": 3.7887025019386966, "grad_norm": 1.7695531845092773, "learning_rate": 1.761764497661001e-05, "loss": 0.4248, "step": 23209 }, { "epoch": 3.7888657605812006, "grad_norm": 1.9949537515640259, "learning_rate": 1.7617439387575007e-05, "loss": 0.4788, "step": 23210 }, { "epoch": 3.789029019223705, "grad_norm": 1.5193517208099365, "learning_rate": 1.7617233790869247e-05, "loss": 0.4021, "step": 23211 }, { "epoch": 3.7891922778662095, "grad_norm": 2.3501763343811035, "learning_rate": 1.7617028186492932e-05, "loss": 0.5606, "step": 23212 }, { "epoch": 3.789355536508714, "grad_norm": 1.9682022333145142, "learning_rate": 1.761682257444627e-05, "loss": 0.5036, "step": 23213 }, { "epoch": 3.7895187951512184, "grad_norm": 2.3735523223876953, "learning_rate": 1.761661695472947e-05, "loss": 0.5348, "step": 23214 }, { "epoch": 3.789682053793723, "grad_norm": 1.8617825508117676, "learning_rate": 1.761641132734274e-05, "loss": 0.417, "step": 23215 }, { "epoch": 3.7898453124362272, "grad_norm": 1.6194939613342285, "learning_rate": 1.7616205692286285e-05, "loss": 0.4026, "step": 23216 }, { "epoch": 3.7900085710787312, "grad_norm": 2.0996246337890625, "learning_rate": 1.7616000049560313e-05, "loss": 0.501, "step": 23217 }, { "epoch": 3.7901718297212357, "grad_norm": 1.8855726718902588, "learning_rate": 1.7615794399165034e-05, "loss": 0.4829, "step": 23218 }, { "epoch": 3.79033508836374, "grad_norm": 2.052504301071167, "learning_rate": 1.7615588741100647e-05, "loss": 0.4802, "step": 23219 }, { "epoch": 3.7904983470062446, "grad_norm": 2.2296135425567627, "learning_rate": 1.761538307536737e-05, "loss": 0.4834, "step": 23220 }, { "epoch": 3.790661605648749, "grad_norm": 2.0178723335266113, "learning_rate": 1.7615177401965398e-05, "loss": 0.4817, "step": 23221 }, { "epoch": 3.7908248642912534, "grad_norm": 1.9970539808273315, "learning_rate": 1.761497172089495e-05, "loss": 0.5088, "step": 23222 }, { "epoch": 3.790988122933758, "grad_norm": 2.440004587173462, "learning_rate": 1.7614766032156227e-05, "loss": 0.5269, "step": 23223 }, { "epoch": 3.7911513815762623, "grad_norm": 2.275466203689575, "learning_rate": 1.7614560335749433e-05, "loss": 0.5135, "step": 23224 }, { "epoch": 3.7913146402187667, "grad_norm": 1.876333475112915, "learning_rate": 1.761435463167478e-05, "loss": 0.4918, "step": 23225 }, { "epoch": 3.791477898861271, "grad_norm": 2.149351119995117, "learning_rate": 1.761414891993247e-05, "loss": 0.4908, "step": 23226 }, { "epoch": 3.7916411575037756, "grad_norm": 2.0334012508392334, "learning_rate": 1.761394320052272e-05, "loss": 0.5023, "step": 23227 }, { "epoch": 3.7918044161462796, "grad_norm": 1.905901551246643, "learning_rate": 1.761373747344573e-05, "loss": 0.4476, "step": 23228 }, { "epoch": 3.791967674788784, "grad_norm": 2.023758888244629, "learning_rate": 1.761353173870171e-05, "loss": 0.5131, "step": 23229 }, { "epoch": 3.7921309334312885, "grad_norm": 1.6801875829696655, "learning_rate": 1.761332599629086e-05, "loss": 0.4373, "step": 23230 }, { "epoch": 3.792294192073793, "grad_norm": 1.9611430168151855, "learning_rate": 1.76131202462134e-05, "loss": 0.561, "step": 23231 }, { "epoch": 3.7924574507162974, "grad_norm": 1.8625599145889282, "learning_rate": 1.7612914488469525e-05, "loss": 0.4429, "step": 23232 }, { "epoch": 3.792620709358802, "grad_norm": 2.02951979637146, "learning_rate": 1.7612708723059447e-05, "loss": 0.4472, "step": 23233 }, { "epoch": 3.792783968001306, "grad_norm": 1.8997238874435425, "learning_rate": 1.7612502949983375e-05, "loss": 0.4312, "step": 23234 }, { "epoch": 3.7929472266438102, "grad_norm": 2.0283803939819336, "learning_rate": 1.7612297169241515e-05, "loss": 0.4138, "step": 23235 }, { "epoch": 3.7931104852863147, "grad_norm": 2.0352330207824707, "learning_rate": 1.7612091380834072e-05, "loss": 0.515, "step": 23236 }, { "epoch": 3.793273743928819, "grad_norm": 1.9340440034866333, "learning_rate": 1.7611885584761256e-05, "loss": 0.4263, "step": 23237 }, { "epoch": 3.7934370025713235, "grad_norm": 1.9438564777374268, "learning_rate": 1.7611679781023272e-05, "loss": 0.499, "step": 23238 }, { "epoch": 3.793600261213828, "grad_norm": 2.045564651489258, "learning_rate": 1.761147396962033e-05, "loss": 0.4561, "step": 23239 }, { "epoch": 3.7937635198563324, "grad_norm": 1.862033724784851, "learning_rate": 1.7611268150552637e-05, "loss": 0.4783, "step": 23240 }, { "epoch": 3.793926778498837, "grad_norm": 2.312305450439453, "learning_rate": 1.7611062323820395e-05, "loss": 0.5045, "step": 23241 }, { "epoch": 3.7940900371413413, "grad_norm": 1.992712378501892, "learning_rate": 1.761085648942382e-05, "loss": 0.4486, "step": 23242 }, { "epoch": 3.7942532957838457, "grad_norm": 2.5504531860351562, "learning_rate": 1.761065064736311e-05, "loss": 0.5766, "step": 23243 }, { "epoch": 3.79441655442635, "grad_norm": 2.0018951892852783, "learning_rate": 1.761044479763848e-05, "loss": 0.5185, "step": 23244 }, { "epoch": 3.794579813068854, "grad_norm": 2.165015459060669, "learning_rate": 1.7610238940250133e-05, "loss": 0.5251, "step": 23245 }, { "epoch": 3.7947430717113586, "grad_norm": 2.097231388092041, "learning_rate": 1.7610033075198277e-05, "loss": 0.4994, "step": 23246 }, { "epoch": 3.794906330353863, "grad_norm": 2.2826666831970215, "learning_rate": 1.760982720248312e-05, "loss": 0.51, "step": 23247 }, { "epoch": 3.7950695889963675, "grad_norm": 2.007692337036133, "learning_rate": 1.7609621322104867e-05, "loss": 0.4247, "step": 23248 }, { "epoch": 3.795232847638872, "grad_norm": 1.9986295700073242, "learning_rate": 1.7609415434063734e-05, "loss": 0.4951, "step": 23249 }, { "epoch": 3.7953961062813764, "grad_norm": 1.760358452796936, "learning_rate": 1.7609209538359917e-05, "loss": 0.4569, "step": 23250 }, { "epoch": 3.795559364923881, "grad_norm": 2.166672468185425, "learning_rate": 1.7609003634993626e-05, "loss": 0.4979, "step": 23251 }, { "epoch": 3.795722623566385, "grad_norm": 1.6280664205551147, "learning_rate": 1.7608797723965076e-05, "loss": 0.4185, "step": 23252 }, { "epoch": 3.7958858822088892, "grad_norm": 1.6182948350906372, "learning_rate": 1.7608591805274465e-05, "loss": 0.4353, "step": 23253 }, { "epoch": 3.7960491408513937, "grad_norm": 2.069044351577759, "learning_rate": 1.7608385878922e-05, "loss": 0.5025, "step": 23254 }, { "epoch": 3.796212399493898, "grad_norm": 1.6750248670578003, "learning_rate": 1.7608179944907897e-05, "loss": 0.4441, "step": 23255 }, { "epoch": 3.7963756581364025, "grad_norm": 1.7688930034637451, "learning_rate": 1.760797400323236e-05, "loss": 0.4501, "step": 23256 }, { "epoch": 3.796538916778907, "grad_norm": 1.4388426542282104, "learning_rate": 1.7607768053895595e-05, "loss": 0.3667, "step": 23257 }, { "epoch": 3.7967021754214114, "grad_norm": 1.6962838172912598, "learning_rate": 1.7607562096897808e-05, "loss": 0.3991, "step": 23258 }, { "epoch": 3.796865434063916, "grad_norm": 2.071742534637451, "learning_rate": 1.7607356132239207e-05, "loss": 0.4132, "step": 23259 }, { "epoch": 3.7970286927064203, "grad_norm": 1.6939693689346313, "learning_rate": 1.760715015992e-05, "loss": 0.4208, "step": 23260 }, { "epoch": 3.7971919513489247, "grad_norm": 2.0750393867492676, "learning_rate": 1.76069441799404e-05, "loss": 0.4917, "step": 23261 }, { "epoch": 3.797355209991429, "grad_norm": 1.7868205308914185, "learning_rate": 1.7606738192300605e-05, "loss": 0.3902, "step": 23262 }, { "epoch": 3.797518468633933, "grad_norm": 1.9756014347076416, "learning_rate": 1.7606532197000828e-05, "loss": 0.4843, "step": 23263 }, { "epoch": 3.7976817272764376, "grad_norm": 1.8060498237609863, "learning_rate": 1.7606326194041274e-05, "loss": 0.3848, "step": 23264 }, { "epoch": 3.797844985918942, "grad_norm": 1.9385877847671509, "learning_rate": 1.760612018342215e-05, "loss": 0.4613, "step": 23265 }, { "epoch": 3.7980082445614465, "grad_norm": 1.8318513631820679, "learning_rate": 1.7605914165143668e-05, "loss": 0.473, "step": 23266 }, { "epoch": 3.798171503203951, "grad_norm": 2.0423192977905273, "learning_rate": 1.760570813920603e-05, "loss": 0.4462, "step": 23267 }, { "epoch": 3.7983347618464554, "grad_norm": 2.1512136459350586, "learning_rate": 1.7605502105609452e-05, "loss": 0.4825, "step": 23268 }, { "epoch": 3.79849802048896, "grad_norm": 2.1728322505950928, "learning_rate": 1.7605296064354126e-05, "loss": 0.5092, "step": 23269 }, { "epoch": 3.798661279131464, "grad_norm": 2.3964273929595947, "learning_rate": 1.7605090015440277e-05, "loss": 0.475, "step": 23270 }, { "epoch": 3.7988245377739682, "grad_norm": 2.1865622997283936, "learning_rate": 1.76048839588681e-05, "loss": 0.521, "step": 23271 }, { "epoch": 3.7989877964164727, "grad_norm": 2.891416311264038, "learning_rate": 1.760467789463781e-05, "loss": 0.4956, "step": 23272 }, { "epoch": 3.799151055058977, "grad_norm": 2.033639430999756, "learning_rate": 1.7604471822749608e-05, "loss": 0.467, "step": 23273 }, { "epoch": 3.7993143137014815, "grad_norm": 2.2464065551757812, "learning_rate": 1.7604265743203707e-05, "loss": 0.5273, "step": 23274 }, { "epoch": 3.799477572343986, "grad_norm": 2.347275972366333, "learning_rate": 1.7604059656000313e-05, "loss": 0.5658, "step": 23275 }, { "epoch": 3.7996408309864904, "grad_norm": 1.730177879333496, "learning_rate": 1.760385356113963e-05, "loss": 0.4111, "step": 23276 }, { "epoch": 3.799804089628995, "grad_norm": 1.7329071760177612, "learning_rate": 1.760364745862187e-05, "loss": 0.4175, "step": 23277 }, { "epoch": 3.7999673482714993, "grad_norm": 2.0184497833251953, "learning_rate": 1.7603441348447237e-05, "loss": 0.4635, "step": 23278 }, { "epoch": 3.8001306069140037, "grad_norm": 2.2603235244750977, "learning_rate": 1.7603235230615945e-05, "loss": 0.5303, "step": 23279 }, { "epoch": 3.800293865556508, "grad_norm": 2.190737724304199, "learning_rate": 1.7603029105128193e-05, "loss": 0.5873, "step": 23280 }, { "epoch": 3.800457124199012, "grad_norm": 2.609707832336426, "learning_rate": 1.7602822971984195e-05, "loss": 0.5195, "step": 23281 }, { "epoch": 3.8006203828415166, "grad_norm": 1.7887520790100098, "learning_rate": 1.7602616831184155e-05, "loss": 0.4313, "step": 23282 }, { "epoch": 3.800783641484021, "grad_norm": 2.3848588466644287, "learning_rate": 1.7602410682728285e-05, "loss": 0.5513, "step": 23283 }, { "epoch": 3.8009469001265255, "grad_norm": 2.096498966217041, "learning_rate": 1.7602204526616785e-05, "loss": 0.4834, "step": 23284 }, { "epoch": 3.80111015876903, "grad_norm": 1.740481972694397, "learning_rate": 1.760199836284987e-05, "loss": 0.43, "step": 23285 }, { "epoch": 3.8012734174115343, "grad_norm": 2.159165143966675, "learning_rate": 1.760179219142774e-05, "loss": 0.5779, "step": 23286 }, { "epoch": 3.8014366760540383, "grad_norm": 1.8075733184814453, "learning_rate": 1.7601586012350614e-05, "loss": 0.4935, "step": 23287 }, { "epoch": 3.801599934696543, "grad_norm": 1.8435183763504028, "learning_rate": 1.760137982561869e-05, "loss": 0.419, "step": 23288 }, { "epoch": 3.801763193339047, "grad_norm": 1.9511033296585083, "learning_rate": 1.760117363123218e-05, "loss": 0.446, "step": 23289 }, { "epoch": 3.8019264519815517, "grad_norm": 2.371065855026245, "learning_rate": 1.760096742919129e-05, "loss": 0.5341, "step": 23290 }, { "epoch": 3.802089710624056, "grad_norm": 1.808161973953247, "learning_rate": 1.7600761219496224e-05, "loss": 0.4694, "step": 23291 }, { "epoch": 3.8022529692665605, "grad_norm": 1.6421139240264893, "learning_rate": 1.76005550021472e-05, "loss": 0.429, "step": 23292 }, { "epoch": 3.802416227909065, "grad_norm": 1.84360933303833, "learning_rate": 1.7600348777144414e-05, "loss": 0.4676, "step": 23293 }, { "epoch": 3.8025794865515694, "grad_norm": 1.8932040929794312, "learning_rate": 1.760014254448808e-05, "loss": 0.447, "step": 23294 }, { "epoch": 3.802742745194074, "grad_norm": 2.2862226963043213, "learning_rate": 1.7599936304178404e-05, "loss": 0.5176, "step": 23295 }, { "epoch": 3.8029060038365783, "grad_norm": 1.8714359998703003, "learning_rate": 1.7599730056215594e-05, "loss": 0.4183, "step": 23296 }, { "epoch": 3.8030692624790827, "grad_norm": 2.2572083473205566, "learning_rate": 1.7599523800599857e-05, "loss": 0.5389, "step": 23297 }, { "epoch": 3.8032325211215867, "grad_norm": 2.0451269149780273, "learning_rate": 1.7599317537331406e-05, "loss": 0.4833, "step": 23298 }, { "epoch": 3.803395779764091, "grad_norm": 1.9774980545043945, "learning_rate": 1.7599111266410438e-05, "loss": 0.5284, "step": 23299 }, { "epoch": 3.8035590384065956, "grad_norm": 2.145592451095581, "learning_rate": 1.759890498783717e-05, "loss": 0.5146, "step": 23300 }, { "epoch": 3.8037222970491, "grad_norm": 1.9176217317581177, "learning_rate": 1.7598698701611806e-05, "loss": 0.4391, "step": 23301 }, { "epoch": 3.8038855556916045, "grad_norm": 1.7923957109451294, "learning_rate": 1.7598492407734558e-05, "loss": 0.4411, "step": 23302 }, { "epoch": 3.804048814334109, "grad_norm": 2.23480224609375, "learning_rate": 1.7598286106205628e-05, "loss": 0.4892, "step": 23303 }, { "epoch": 3.8042120729766133, "grad_norm": 2.3656320571899414, "learning_rate": 1.7598079797025226e-05, "loss": 0.4398, "step": 23304 }, { "epoch": 3.8043753316191173, "grad_norm": 1.7731746435165405, "learning_rate": 1.7597873480193558e-05, "loss": 0.4041, "step": 23305 }, { "epoch": 3.8045385902616218, "grad_norm": 2.306223154067993, "learning_rate": 1.7597667155710835e-05, "loss": 0.5161, "step": 23306 }, { "epoch": 3.804701848904126, "grad_norm": 1.901265025138855, "learning_rate": 1.7597460823577263e-05, "loss": 0.5043, "step": 23307 }, { "epoch": 3.8048651075466307, "grad_norm": 1.7783185243606567, "learning_rate": 1.759725448379305e-05, "loss": 0.4366, "step": 23308 }, { "epoch": 3.805028366189135, "grad_norm": 2.0795717239379883, "learning_rate": 1.7597048136358402e-05, "loss": 0.4667, "step": 23309 }, { "epoch": 3.8051916248316395, "grad_norm": 2.126870632171631, "learning_rate": 1.759684178127353e-05, "loss": 0.5599, "step": 23310 }, { "epoch": 3.805354883474144, "grad_norm": 1.7640999555587769, "learning_rate": 1.759663541853864e-05, "loss": 0.4769, "step": 23311 }, { "epoch": 3.8055181421166484, "grad_norm": 1.9200185537338257, "learning_rate": 1.759642904815394e-05, "loss": 0.4656, "step": 23312 }, { "epoch": 3.805681400759153, "grad_norm": 2.150055408477783, "learning_rate": 1.759622267011964e-05, "loss": 0.5488, "step": 23313 }, { "epoch": 3.8058446594016573, "grad_norm": 2.0350844860076904, "learning_rate": 1.7596016284435943e-05, "loss": 0.5009, "step": 23314 }, { "epoch": 3.8060079180441617, "grad_norm": 2.0638387203216553, "learning_rate": 1.7595809891103058e-05, "loss": 0.4973, "step": 23315 }, { "epoch": 3.8061711766866657, "grad_norm": 1.9913930892944336, "learning_rate": 1.75956034901212e-05, "loss": 0.4874, "step": 23316 }, { "epoch": 3.80633443532917, "grad_norm": 1.9954760074615479, "learning_rate": 1.759539708149057e-05, "loss": 0.4418, "step": 23317 }, { "epoch": 3.8064976939716746, "grad_norm": 1.8441153764724731, "learning_rate": 1.7595190665211375e-05, "loss": 0.4612, "step": 23318 }, { "epoch": 3.806660952614179, "grad_norm": 2.1182854175567627, "learning_rate": 1.7594984241283826e-05, "loss": 0.5279, "step": 23319 }, { "epoch": 3.8068242112566835, "grad_norm": 1.8089312314987183, "learning_rate": 1.759477780970813e-05, "loss": 0.4237, "step": 23320 }, { "epoch": 3.806987469899188, "grad_norm": 1.6935189962387085, "learning_rate": 1.7594571370484493e-05, "loss": 0.4458, "step": 23321 }, { "epoch": 3.8071507285416923, "grad_norm": 1.6396667957305908, "learning_rate": 1.759436492361313e-05, "loss": 0.4362, "step": 23322 }, { "epoch": 3.8073139871841963, "grad_norm": 1.9515340328216553, "learning_rate": 1.7594158469094236e-05, "loss": 0.4957, "step": 23323 }, { "epoch": 3.8074772458267008, "grad_norm": 1.7872902154922485, "learning_rate": 1.759395200692803e-05, "loss": 0.4897, "step": 23324 }, { "epoch": 3.807640504469205, "grad_norm": 2.083979606628418, "learning_rate": 1.7593745537114715e-05, "loss": 0.5492, "step": 23325 }, { "epoch": 3.8078037631117096, "grad_norm": 1.9911032915115356, "learning_rate": 1.7593539059654505e-05, "loss": 0.4681, "step": 23326 }, { "epoch": 3.807967021754214, "grad_norm": 2.0113401412963867, "learning_rate": 1.7593332574547595e-05, "loss": 0.4559, "step": 23327 }, { "epoch": 3.8081302803967185, "grad_norm": 2.1025583744049072, "learning_rate": 1.7593126081794208e-05, "loss": 0.5165, "step": 23328 }, { "epoch": 3.808293539039223, "grad_norm": 1.8550424575805664, "learning_rate": 1.7592919581394542e-05, "loss": 0.4215, "step": 23329 }, { "epoch": 3.8084567976817274, "grad_norm": 1.2572836875915527, "learning_rate": 1.759271307334881e-05, "loss": 0.3397, "step": 23330 }, { "epoch": 3.808620056324232, "grad_norm": 1.41861891746521, "learning_rate": 1.7592506557657217e-05, "loss": 0.3957, "step": 23331 }, { "epoch": 3.8087833149667363, "grad_norm": 2.0480592250823975, "learning_rate": 1.759230003431997e-05, "loss": 0.4837, "step": 23332 }, { "epoch": 3.8089465736092407, "grad_norm": 1.9567341804504395, "learning_rate": 1.7592093503337282e-05, "loss": 0.4777, "step": 23333 }, { "epoch": 3.8091098322517447, "grad_norm": 2.1066203117370605, "learning_rate": 1.7591886964709356e-05, "loss": 0.4693, "step": 23334 }, { "epoch": 3.809273090894249, "grad_norm": 1.7629594802856445, "learning_rate": 1.7591680418436402e-05, "loss": 0.459, "step": 23335 }, { "epoch": 3.8094363495367536, "grad_norm": 2.0367085933685303, "learning_rate": 1.759147386451863e-05, "loss": 0.4429, "step": 23336 }, { "epoch": 3.809599608179258, "grad_norm": 1.8601701259613037, "learning_rate": 1.7591267302956243e-05, "loss": 0.4561, "step": 23337 }, { "epoch": 3.8097628668217625, "grad_norm": 1.8036785125732422, "learning_rate": 1.7591060733749455e-05, "loss": 0.4283, "step": 23338 }, { "epoch": 3.809926125464267, "grad_norm": 2.5021207332611084, "learning_rate": 1.7590854156898466e-05, "loss": 0.5328, "step": 23339 }, { "epoch": 3.810089384106771, "grad_norm": 2.23898983001709, "learning_rate": 1.7590647572403493e-05, "loss": 0.5306, "step": 23340 }, { "epoch": 3.8102526427492753, "grad_norm": 1.7448630332946777, "learning_rate": 1.7590440980264738e-05, "loss": 0.4762, "step": 23341 }, { "epoch": 3.8104159013917798, "grad_norm": 2.01836895942688, "learning_rate": 1.759023438048241e-05, "loss": 0.512, "step": 23342 }, { "epoch": 3.810579160034284, "grad_norm": 1.937567114830017, "learning_rate": 1.759002777305672e-05, "loss": 0.5234, "step": 23343 }, { "epoch": 3.8107424186767886, "grad_norm": 2.0559608936309814, "learning_rate": 1.7589821157987873e-05, "loss": 0.4827, "step": 23344 }, { "epoch": 3.810905677319293, "grad_norm": 1.8365212678909302, "learning_rate": 1.7589614535276078e-05, "loss": 0.4283, "step": 23345 }, { "epoch": 3.8110689359617975, "grad_norm": 1.9810618162155151, "learning_rate": 1.7589407904921544e-05, "loss": 0.5001, "step": 23346 }, { "epoch": 3.811232194604302, "grad_norm": 2.322038412094116, "learning_rate": 1.758920126692448e-05, "loss": 0.6077, "step": 23347 }, { "epoch": 3.8113954532468064, "grad_norm": 1.8285366296768188, "learning_rate": 1.7588994621285088e-05, "loss": 0.4302, "step": 23348 }, { "epoch": 3.811558711889311, "grad_norm": 2.3299174308776855, "learning_rate": 1.7588787968003585e-05, "loss": 0.5086, "step": 23349 }, { "epoch": 3.8117219705318153, "grad_norm": 1.702500581741333, "learning_rate": 1.758858130708017e-05, "loss": 0.4174, "step": 23350 }, { "epoch": 3.8118852291743193, "grad_norm": 2.086087703704834, "learning_rate": 1.758837463851506e-05, "loss": 0.4889, "step": 23351 }, { "epoch": 3.8120484878168237, "grad_norm": 2.1959288120269775, "learning_rate": 1.7588167962308458e-05, "loss": 0.5213, "step": 23352 }, { "epoch": 3.812211746459328, "grad_norm": 1.7742990255355835, "learning_rate": 1.758796127846057e-05, "loss": 0.4477, "step": 23353 }, { "epoch": 3.8123750051018326, "grad_norm": 2.181635856628418, "learning_rate": 1.758775458697161e-05, "loss": 0.4744, "step": 23354 }, { "epoch": 3.812538263744337, "grad_norm": 2.0940968990325928, "learning_rate": 1.7587547887841777e-05, "loss": 0.5938, "step": 23355 }, { "epoch": 3.8127015223868415, "grad_norm": 2.0444271564483643, "learning_rate": 1.7587341181071293e-05, "loss": 0.5322, "step": 23356 }, { "epoch": 3.812864781029346, "grad_norm": 2.3496861457824707, "learning_rate": 1.7587134466660357e-05, "loss": 0.5527, "step": 23357 }, { "epoch": 3.81302803967185, "grad_norm": 1.6577463150024414, "learning_rate": 1.7586927744609178e-05, "loss": 0.4397, "step": 23358 }, { "epoch": 3.8131912983143543, "grad_norm": 2.0550224781036377, "learning_rate": 1.758672101491796e-05, "loss": 0.4888, "step": 23359 }, { "epoch": 3.8133545569568588, "grad_norm": 2.2219302654266357, "learning_rate": 1.758651427758692e-05, "loss": 0.5628, "step": 23360 }, { "epoch": 3.813517815599363, "grad_norm": 1.8279508352279663, "learning_rate": 1.7586307532616263e-05, "loss": 0.4123, "step": 23361 }, { "epoch": 3.8136810742418676, "grad_norm": 1.714410662651062, "learning_rate": 1.7586100780006194e-05, "loss": 0.3412, "step": 23362 }, { "epoch": 3.813844332884372, "grad_norm": 1.7590383291244507, "learning_rate": 1.7585894019756926e-05, "loss": 0.4302, "step": 23363 }, { "epoch": 3.8140075915268765, "grad_norm": 1.875018835067749, "learning_rate": 1.7585687251868663e-05, "loss": 0.4809, "step": 23364 }, { "epoch": 3.814170850169381, "grad_norm": 2.087392807006836, "learning_rate": 1.7585480476341617e-05, "loss": 0.4323, "step": 23365 }, { "epoch": 3.8143341088118854, "grad_norm": 2.2417173385620117, "learning_rate": 1.7585273693175994e-05, "loss": 0.4688, "step": 23366 }, { "epoch": 3.81449736745439, "grad_norm": 2.1690540313720703, "learning_rate": 1.7585066902371997e-05, "loss": 0.4928, "step": 23367 }, { "epoch": 3.8146606260968943, "grad_norm": 1.936016321182251, "learning_rate": 1.7584860103929844e-05, "loss": 0.4946, "step": 23368 }, { "epoch": 3.8148238847393983, "grad_norm": 2.024317979812622, "learning_rate": 1.758465329784974e-05, "loss": 0.5257, "step": 23369 }, { "epoch": 3.8149871433819027, "grad_norm": 2.0702285766601562, "learning_rate": 1.758444648413189e-05, "loss": 0.4725, "step": 23370 }, { "epoch": 3.815150402024407, "grad_norm": 2.204270839691162, "learning_rate": 1.7584239662776505e-05, "loss": 0.4891, "step": 23371 }, { "epoch": 3.8153136606669116, "grad_norm": 1.8587909936904907, "learning_rate": 1.7584032833783794e-05, "loss": 0.4372, "step": 23372 }, { "epoch": 3.815476919309416, "grad_norm": 2.056553363800049, "learning_rate": 1.7583825997153963e-05, "loss": 0.556, "step": 23373 }, { "epoch": 3.8156401779519205, "grad_norm": 1.8579577207565308, "learning_rate": 1.7583619152887222e-05, "loss": 0.4704, "step": 23374 }, { "epoch": 3.8158034365944244, "grad_norm": 1.7864689826965332, "learning_rate": 1.7583412300983774e-05, "loss": 0.4824, "step": 23375 }, { "epoch": 3.815966695236929, "grad_norm": 1.7588355541229248, "learning_rate": 1.758320544144384e-05, "loss": 0.4247, "step": 23376 }, { "epoch": 3.8161299538794333, "grad_norm": 1.6972771883010864, "learning_rate": 1.7582998574267615e-05, "loss": 0.4574, "step": 23377 }, { "epoch": 3.8162932125219378, "grad_norm": 1.911988377571106, "learning_rate": 1.758279169945531e-05, "loss": 0.3949, "step": 23378 }, { "epoch": 3.816456471164442, "grad_norm": 1.6938303709030151, "learning_rate": 1.7582584817007137e-05, "loss": 0.4402, "step": 23379 }, { "epoch": 3.8166197298069466, "grad_norm": 2.0764756202697754, "learning_rate": 1.7582377926923306e-05, "loss": 0.5258, "step": 23380 }, { "epoch": 3.816782988449451, "grad_norm": 2.413095712661743, "learning_rate": 1.7582171029204022e-05, "loss": 0.4751, "step": 23381 }, { "epoch": 3.8169462470919555, "grad_norm": 1.7862907648086548, "learning_rate": 1.7581964123849492e-05, "loss": 0.3883, "step": 23382 }, { "epoch": 3.81710950573446, "grad_norm": 1.7963197231292725, "learning_rate": 1.758175721085993e-05, "loss": 0.4681, "step": 23383 }, { "epoch": 3.8172727643769644, "grad_norm": 2.1921842098236084, "learning_rate": 1.7581550290235536e-05, "loss": 0.5805, "step": 23384 }, { "epoch": 3.817436023019469, "grad_norm": 1.5406447649002075, "learning_rate": 1.7581343361976523e-05, "loss": 0.4068, "step": 23385 }, { "epoch": 3.817599281661973, "grad_norm": 1.7790805101394653, "learning_rate": 1.75811364260831e-05, "loss": 0.4422, "step": 23386 }, { "epoch": 3.8177625403044773, "grad_norm": 1.9424426555633545, "learning_rate": 1.758092948255548e-05, "loss": 0.4654, "step": 23387 }, { "epoch": 3.8179257989469817, "grad_norm": 1.8414098024368286, "learning_rate": 1.7580722531393857e-05, "loss": 0.4159, "step": 23388 }, { "epoch": 3.818089057589486, "grad_norm": 2.2706761360168457, "learning_rate": 1.7580515572598457e-05, "loss": 0.5222, "step": 23389 }, { "epoch": 3.8182523162319906, "grad_norm": 1.8468170166015625, "learning_rate": 1.7580308606169473e-05, "loss": 0.4625, "step": 23390 }, { "epoch": 3.818415574874495, "grad_norm": 1.71596360206604, "learning_rate": 1.7580101632107123e-05, "loss": 0.4016, "step": 23391 }, { "epoch": 3.8185788335169994, "grad_norm": 1.9894027709960938, "learning_rate": 1.757989465041161e-05, "loss": 0.487, "step": 23392 }, { "epoch": 3.8187420921595034, "grad_norm": 2.0321760177612305, "learning_rate": 1.7579687661083152e-05, "loss": 0.4818, "step": 23393 }, { "epoch": 3.818905350802008, "grad_norm": 2.292989492416382, "learning_rate": 1.7579480664121944e-05, "loss": 0.4564, "step": 23394 }, { "epoch": 3.8190686094445123, "grad_norm": 2.223860263824463, "learning_rate": 1.7579273659528204e-05, "loss": 0.5048, "step": 23395 }, { "epoch": 3.8192318680870168, "grad_norm": 2.2345542907714844, "learning_rate": 1.7579066647302134e-05, "loss": 0.4919, "step": 23396 }, { "epoch": 3.819395126729521, "grad_norm": 2.16670560836792, "learning_rate": 1.757885962744395e-05, "loss": 0.5535, "step": 23397 }, { "epoch": 3.8195583853720256, "grad_norm": 1.471172571182251, "learning_rate": 1.7578652599953857e-05, "loss": 0.3996, "step": 23398 }, { "epoch": 3.81972164401453, "grad_norm": 2.056392192840576, "learning_rate": 1.757844556483206e-05, "loss": 0.4068, "step": 23399 }, { "epoch": 3.8198849026570345, "grad_norm": 1.8516055345535278, "learning_rate": 1.757823852207877e-05, "loss": 0.4601, "step": 23400 }, { "epoch": 3.820048161299539, "grad_norm": 2.168494701385498, "learning_rate": 1.7578031471694198e-05, "loss": 0.4405, "step": 23401 }, { "epoch": 3.8202114199420434, "grad_norm": 2.177993059158325, "learning_rate": 1.7577824413678547e-05, "loss": 0.4871, "step": 23402 }, { "epoch": 3.820374678584548, "grad_norm": 2.0240638256073, "learning_rate": 1.7577617348032034e-05, "loss": 0.5133, "step": 23403 }, { "epoch": 3.820537937227052, "grad_norm": 1.9041190147399902, "learning_rate": 1.7577410274754858e-05, "loss": 0.4751, "step": 23404 }, { "epoch": 3.8207011958695563, "grad_norm": 1.9322415590286255, "learning_rate": 1.757720319384723e-05, "loss": 0.4724, "step": 23405 }, { "epoch": 3.8208644545120607, "grad_norm": 1.566978096961975, "learning_rate": 1.7576996105309367e-05, "loss": 0.3786, "step": 23406 }, { "epoch": 3.821027713154565, "grad_norm": 1.8230193853378296, "learning_rate": 1.7576789009141465e-05, "loss": 0.5019, "step": 23407 }, { "epoch": 3.8211909717970696, "grad_norm": 1.9165247678756714, "learning_rate": 1.7576581905343744e-05, "loss": 0.4445, "step": 23408 }, { "epoch": 3.821354230439574, "grad_norm": 1.3583645820617676, "learning_rate": 1.75763747939164e-05, "loss": 0.3809, "step": 23409 }, { "epoch": 3.8215174890820784, "grad_norm": 2.2053823471069336, "learning_rate": 1.7576167674859656e-05, "loss": 0.5045, "step": 23410 }, { "epoch": 3.8216807477245824, "grad_norm": 1.6710975170135498, "learning_rate": 1.7575960548173706e-05, "loss": 0.454, "step": 23411 }, { "epoch": 3.821844006367087, "grad_norm": 1.8806122541427612, "learning_rate": 1.757575341385877e-05, "loss": 0.4463, "step": 23412 }, { "epoch": 3.8220072650095913, "grad_norm": 1.9281738996505737, "learning_rate": 1.7575546271915053e-05, "loss": 0.4898, "step": 23413 }, { "epoch": 3.8221705236520958, "grad_norm": 2.0836546421051025, "learning_rate": 1.7575339122342763e-05, "loss": 0.5004, "step": 23414 }, { "epoch": 3.8223337822946, "grad_norm": 1.7159970998764038, "learning_rate": 1.757513196514211e-05, "loss": 0.4299, "step": 23415 }, { "epoch": 3.8224970409371046, "grad_norm": 2.384687900543213, "learning_rate": 1.7574924800313296e-05, "loss": 0.5688, "step": 23416 }, { "epoch": 3.822660299579609, "grad_norm": 2.023350954055786, "learning_rate": 1.7574717627856538e-05, "loss": 0.4914, "step": 23417 }, { "epoch": 3.8228235582221135, "grad_norm": 1.9561930894851685, "learning_rate": 1.757451044777204e-05, "loss": 0.5272, "step": 23418 }, { "epoch": 3.822986816864618, "grad_norm": 1.7856265306472778, "learning_rate": 1.757430326006001e-05, "loss": 0.4569, "step": 23419 }, { "epoch": 3.8231500755071224, "grad_norm": 1.9370397329330444, "learning_rate": 1.7574096064720663e-05, "loss": 0.4752, "step": 23420 }, { "epoch": 3.823313334149627, "grad_norm": 1.8288588523864746, "learning_rate": 1.75738888617542e-05, "loss": 0.498, "step": 23421 }, { "epoch": 3.823476592792131, "grad_norm": 2.266710042953491, "learning_rate": 1.7573681651160836e-05, "loss": 0.5462, "step": 23422 }, { "epoch": 3.8236398514346353, "grad_norm": 1.6976370811462402, "learning_rate": 1.7573474432940775e-05, "loss": 0.443, "step": 23423 }, { "epoch": 3.8238031100771397, "grad_norm": 2.304675340652466, "learning_rate": 1.7573267207094226e-05, "loss": 0.6216, "step": 23424 }, { "epoch": 3.823966368719644, "grad_norm": 1.9542509317398071, "learning_rate": 1.7573059973621402e-05, "loss": 0.4072, "step": 23425 }, { "epoch": 3.8241296273621486, "grad_norm": 2.1385080814361572, "learning_rate": 1.7572852732522507e-05, "loss": 0.4388, "step": 23426 }, { "epoch": 3.824292886004653, "grad_norm": 2.112006425857544, "learning_rate": 1.7572645483797747e-05, "loss": 0.5233, "step": 23427 }, { "epoch": 3.824456144647157, "grad_norm": 1.7921377420425415, "learning_rate": 1.7572438227447342e-05, "loss": 0.516, "step": 23428 }, { "epoch": 3.8246194032896614, "grad_norm": 2.148597240447998, "learning_rate": 1.757223096347149e-05, "loss": 0.4727, "step": 23429 }, { "epoch": 3.824782661932166, "grad_norm": 1.8627310991287231, "learning_rate": 1.7572023691870403e-05, "loss": 0.4399, "step": 23430 }, { "epoch": 3.8249459205746703, "grad_norm": 2.3950412273406982, "learning_rate": 1.757181641264429e-05, "loss": 0.5086, "step": 23431 }, { "epoch": 3.8251091792171747, "grad_norm": 1.8297827243804932, "learning_rate": 1.7571609125793362e-05, "loss": 0.4011, "step": 23432 }, { "epoch": 3.825272437859679, "grad_norm": 2.2940750122070312, "learning_rate": 1.7571401831317825e-05, "loss": 0.5672, "step": 23433 }, { "epoch": 3.8254356965021836, "grad_norm": 2.0520572662353516, "learning_rate": 1.757119452921789e-05, "loss": 0.5659, "step": 23434 }, { "epoch": 3.825598955144688, "grad_norm": 1.6039937734603882, "learning_rate": 1.757098721949376e-05, "loss": 0.4291, "step": 23435 }, { "epoch": 3.8257622137871925, "grad_norm": 2.1462814807891846, "learning_rate": 1.757077990214565e-05, "loss": 0.4819, "step": 23436 }, { "epoch": 3.825925472429697, "grad_norm": 1.760806679725647, "learning_rate": 1.7570572577173767e-05, "loss": 0.4964, "step": 23437 }, { "epoch": 3.8260887310722014, "grad_norm": 1.8883804082870483, "learning_rate": 1.757036524457832e-05, "loss": 0.5478, "step": 23438 }, { "epoch": 3.8262519897147054, "grad_norm": 1.837489128112793, "learning_rate": 1.7570157904359516e-05, "loss": 0.4323, "step": 23439 }, { "epoch": 3.82641524835721, "grad_norm": 2.1400580406188965, "learning_rate": 1.7569950556517566e-05, "loss": 0.5585, "step": 23440 }, { "epoch": 3.8265785069997142, "grad_norm": 2.0638208389282227, "learning_rate": 1.756974320105268e-05, "loss": 0.5204, "step": 23441 }, { "epoch": 3.8267417656422187, "grad_norm": 1.961835265159607, "learning_rate": 1.756953583796506e-05, "loss": 0.4727, "step": 23442 }, { "epoch": 3.826905024284723, "grad_norm": 1.71251380443573, "learning_rate": 1.7569328467254922e-05, "loss": 0.3904, "step": 23443 }, { "epoch": 3.8270682829272276, "grad_norm": 2.0133726596832275, "learning_rate": 1.756912108892247e-05, "loss": 0.4434, "step": 23444 }, { "epoch": 3.827231541569732, "grad_norm": 1.9177758693695068, "learning_rate": 1.7568913702967916e-05, "loss": 0.4724, "step": 23445 }, { "epoch": 3.827394800212236, "grad_norm": 2.4783077239990234, "learning_rate": 1.756870630939147e-05, "loss": 0.4822, "step": 23446 }, { "epoch": 3.8275580588547404, "grad_norm": 1.7390193939208984, "learning_rate": 1.7568498908193335e-05, "loss": 0.4397, "step": 23447 }, { "epoch": 3.827721317497245, "grad_norm": 1.592681646347046, "learning_rate": 1.7568291499373727e-05, "loss": 0.3716, "step": 23448 }, { "epoch": 3.8278845761397493, "grad_norm": 1.9556621313095093, "learning_rate": 1.7568084082932852e-05, "loss": 0.4216, "step": 23449 }, { "epoch": 3.8280478347822537, "grad_norm": 1.8532615900039673, "learning_rate": 1.7567876658870917e-05, "loss": 0.4606, "step": 23450 }, { "epoch": 3.828211093424758, "grad_norm": 1.6895580291748047, "learning_rate": 1.7567669227188128e-05, "loss": 0.4087, "step": 23451 }, { "epoch": 3.8283743520672626, "grad_norm": 2.0819952487945557, "learning_rate": 1.75674617878847e-05, "loss": 0.5567, "step": 23452 }, { "epoch": 3.828537610709767, "grad_norm": 2.1826279163360596, "learning_rate": 1.7567254340960846e-05, "loss": 0.5245, "step": 23453 }, { "epoch": 3.8287008693522715, "grad_norm": 2.295776605606079, "learning_rate": 1.7567046886416763e-05, "loss": 0.5271, "step": 23454 }, { "epoch": 3.828864127994776, "grad_norm": 1.9180749654769897, "learning_rate": 1.756683942425267e-05, "loss": 0.4478, "step": 23455 }, { "epoch": 3.8290273866372804, "grad_norm": 2.0917580127716064, "learning_rate": 1.7566631954468767e-05, "loss": 0.49, "step": 23456 }, { "epoch": 3.8291906452797844, "grad_norm": 2.0696675777435303, "learning_rate": 1.756642447706527e-05, "loss": 0.5922, "step": 23457 }, { "epoch": 3.829353903922289, "grad_norm": 2.0585434436798096, "learning_rate": 1.7566216992042388e-05, "loss": 0.4786, "step": 23458 }, { "epoch": 3.8295171625647932, "grad_norm": 2.0347511768341064, "learning_rate": 1.756600949940032e-05, "loss": 0.4697, "step": 23459 }, { "epoch": 3.8296804212072977, "grad_norm": 2.0399091243743896, "learning_rate": 1.7565801999139292e-05, "loss": 0.4507, "step": 23460 }, { "epoch": 3.829843679849802, "grad_norm": 1.8976694345474243, "learning_rate": 1.7565594491259496e-05, "loss": 0.4291, "step": 23461 }, { "epoch": 3.8300069384923066, "grad_norm": 2.2400858402252197, "learning_rate": 1.756538697576115e-05, "loss": 0.5564, "step": 23462 }, { "epoch": 3.8301701971348106, "grad_norm": 2.041041851043701, "learning_rate": 1.7565179452644465e-05, "loss": 0.5351, "step": 23463 }, { "epoch": 3.830333455777315, "grad_norm": 1.9312193393707275, "learning_rate": 1.7564971921909643e-05, "loss": 0.4464, "step": 23464 }, { "epoch": 3.8304967144198194, "grad_norm": 1.9743212461471558, "learning_rate": 1.7564764383556898e-05, "loss": 0.5481, "step": 23465 }, { "epoch": 3.830659973062324, "grad_norm": 1.824891209602356, "learning_rate": 1.7564556837586433e-05, "loss": 0.4341, "step": 23466 }, { "epoch": 3.8308232317048283, "grad_norm": 1.9005969762802124, "learning_rate": 1.7564349283998466e-05, "loss": 0.4805, "step": 23467 }, { "epoch": 3.8309864903473327, "grad_norm": 1.5953189134597778, "learning_rate": 1.7564141722793203e-05, "loss": 0.433, "step": 23468 }, { "epoch": 3.831149748989837, "grad_norm": 1.6667755842208862, "learning_rate": 1.7563934153970847e-05, "loss": 0.4206, "step": 23469 }, { "epoch": 3.8313130076323416, "grad_norm": 2.074453592300415, "learning_rate": 1.756372657753161e-05, "loss": 0.4388, "step": 23470 }, { "epoch": 3.831476266274846, "grad_norm": 2.127742052078247, "learning_rate": 1.756351899347571e-05, "loss": 0.4467, "step": 23471 }, { "epoch": 3.8316395249173505, "grad_norm": 1.9836597442626953, "learning_rate": 1.756331140180334e-05, "loss": 0.4678, "step": 23472 }, { "epoch": 3.831802783559855, "grad_norm": 1.9707601070404053, "learning_rate": 1.756310380251472e-05, "loss": 0.4396, "step": 23473 }, { "epoch": 3.831966042202359, "grad_norm": 2.1409506797790527, "learning_rate": 1.756289619561006e-05, "loss": 0.5095, "step": 23474 }, { "epoch": 3.8321293008448634, "grad_norm": 2.0448741912841797, "learning_rate": 1.756268858108956e-05, "loss": 0.4768, "step": 23475 }, { "epoch": 3.832292559487368, "grad_norm": 2.0169663429260254, "learning_rate": 1.756248095895344e-05, "loss": 0.4498, "step": 23476 }, { "epoch": 3.8324558181298722, "grad_norm": 1.8448891639709473, "learning_rate": 1.75622733292019e-05, "loss": 0.4102, "step": 23477 }, { "epoch": 3.8326190767723767, "grad_norm": 2.113919258117676, "learning_rate": 1.7562065691835153e-05, "loss": 0.531, "step": 23478 }, { "epoch": 3.832782335414881, "grad_norm": 1.78950834274292, "learning_rate": 1.7561858046853408e-05, "loss": 0.5151, "step": 23479 }, { "epoch": 3.8329455940573856, "grad_norm": 1.6712921857833862, "learning_rate": 1.7561650394256874e-05, "loss": 0.4181, "step": 23480 }, { "epoch": 3.8331088526998895, "grad_norm": 2.1528000831604004, "learning_rate": 1.756144273404576e-05, "loss": 0.4829, "step": 23481 }, { "epoch": 3.833272111342394, "grad_norm": 2.057407855987549, "learning_rate": 1.7561235066220276e-05, "loss": 0.5044, "step": 23482 }, { "epoch": 3.8334353699848984, "grad_norm": 2.076446056365967, "learning_rate": 1.7561027390780633e-05, "loss": 0.475, "step": 23483 }, { "epoch": 3.833598628627403, "grad_norm": 1.7345067262649536, "learning_rate": 1.7560819707727034e-05, "loss": 0.4445, "step": 23484 }, { "epoch": 3.8337618872699073, "grad_norm": 2.0256707668304443, "learning_rate": 1.756061201705969e-05, "loss": 0.51, "step": 23485 }, { "epoch": 3.8339251459124117, "grad_norm": 2.2300288677215576, "learning_rate": 1.7560404318778813e-05, "loss": 0.4794, "step": 23486 }, { "epoch": 3.834088404554916, "grad_norm": 1.9660104513168335, "learning_rate": 1.756019661288461e-05, "loss": 0.4876, "step": 23487 }, { "epoch": 3.8342516631974206, "grad_norm": 1.969786286354065, "learning_rate": 1.7559988899377295e-05, "loss": 0.4507, "step": 23488 }, { "epoch": 3.834414921839925, "grad_norm": 1.8753588199615479, "learning_rate": 1.7559781178257073e-05, "loss": 0.484, "step": 23489 }, { "epoch": 3.8345781804824295, "grad_norm": 2.0159547328948975, "learning_rate": 1.755957344952415e-05, "loss": 0.4932, "step": 23490 }, { "epoch": 3.834741439124934, "grad_norm": 1.9845551252365112, "learning_rate": 1.7559365713178737e-05, "loss": 0.5383, "step": 23491 }, { "epoch": 3.834904697767438, "grad_norm": 1.9596607685089111, "learning_rate": 1.7559157969221047e-05, "loss": 0.4765, "step": 23492 }, { "epoch": 3.8350679564099424, "grad_norm": 2.2926650047302246, "learning_rate": 1.7558950217651288e-05, "loss": 0.5596, "step": 23493 }, { "epoch": 3.835231215052447, "grad_norm": 2.164003372192383, "learning_rate": 1.7558742458469668e-05, "loss": 0.522, "step": 23494 }, { "epoch": 3.8353944736949512, "grad_norm": 2.1127686500549316, "learning_rate": 1.7558534691676396e-05, "loss": 0.6593, "step": 23495 }, { "epoch": 3.8355577323374557, "grad_norm": 2.3489081859588623, "learning_rate": 1.7558326917271683e-05, "loss": 0.4734, "step": 23496 }, { "epoch": 3.83572099097996, "grad_norm": 2.0833542346954346, "learning_rate": 1.7558119135255735e-05, "loss": 0.5594, "step": 23497 }, { "epoch": 3.8358842496224645, "grad_norm": 1.9318429231643677, "learning_rate": 1.7557911345628765e-05, "loss": 0.4638, "step": 23498 }, { "epoch": 3.8360475082649685, "grad_norm": 2.259003162384033, "learning_rate": 1.7557703548390978e-05, "loss": 0.5162, "step": 23499 }, { "epoch": 3.836210766907473, "grad_norm": 1.7967538833618164, "learning_rate": 1.7557495743542586e-05, "loss": 0.4614, "step": 23500 }, { "epoch": 3.8363740255499774, "grad_norm": 1.7553751468658447, "learning_rate": 1.7557287931083796e-05, "loss": 0.4329, "step": 23501 }, { "epoch": 3.836537284192482, "grad_norm": 1.9720690250396729, "learning_rate": 1.7557080111014822e-05, "loss": 0.4527, "step": 23502 }, { "epoch": 3.8367005428349863, "grad_norm": 1.6270229816436768, "learning_rate": 1.755687228333587e-05, "loss": 0.4138, "step": 23503 }, { "epoch": 3.8368638014774907, "grad_norm": 1.4562962055206299, "learning_rate": 1.7556664448047146e-05, "loss": 0.3805, "step": 23504 }, { "epoch": 3.837027060119995, "grad_norm": 2.092674970626831, "learning_rate": 1.755645660514887e-05, "loss": 0.527, "step": 23505 }, { "epoch": 3.8371903187624996, "grad_norm": 1.743095874786377, "learning_rate": 1.7556248754641237e-05, "loss": 0.4221, "step": 23506 }, { "epoch": 3.837353577405004, "grad_norm": 1.8080140352249146, "learning_rate": 1.7556040896524468e-05, "loss": 0.4103, "step": 23507 }, { "epoch": 3.8375168360475085, "grad_norm": 1.4884861707687378, "learning_rate": 1.7555833030798764e-05, "loss": 0.373, "step": 23508 }, { "epoch": 3.837680094690013, "grad_norm": 1.810793399810791, "learning_rate": 1.755562515746434e-05, "loss": 0.42, "step": 23509 }, { "epoch": 3.837843353332517, "grad_norm": 1.8517518043518066, "learning_rate": 1.7555417276521405e-05, "loss": 0.4654, "step": 23510 }, { "epoch": 3.8380066119750214, "grad_norm": 1.806821584701538, "learning_rate": 1.7555209387970166e-05, "loss": 0.3851, "step": 23511 }, { "epoch": 3.838169870617526, "grad_norm": 2.214494228363037, "learning_rate": 1.7555001491810833e-05, "loss": 0.4814, "step": 23512 }, { "epoch": 3.8383331292600302, "grad_norm": 2.4347598552703857, "learning_rate": 1.7554793588043615e-05, "loss": 0.4972, "step": 23513 }, { "epoch": 3.8384963879025347, "grad_norm": 2.033979892730713, "learning_rate": 1.755458567666872e-05, "loss": 0.5138, "step": 23514 }, { "epoch": 3.838659646545039, "grad_norm": 2.1468307971954346, "learning_rate": 1.7554377757686363e-05, "loss": 0.4933, "step": 23515 }, { "epoch": 3.838822905187543, "grad_norm": 1.850946068763733, "learning_rate": 1.7554169831096748e-05, "loss": 0.4233, "step": 23516 }, { "epoch": 3.8389861638300475, "grad_norm": 2.0513205528259277, "learning_rate": 1.7553961896900086e-05, "loss": 0.4835, "step": 23517 }, { "epoch": 3.839149422472552, "grad_norm": 1.965533971786499, "learning_rate": 1.755375395509659e-05, "loss": 0.4618, "step": 23518 }, { "epoch": 3.8393126811150564, "grad_norm": 1.847693920135498, "learning_rate": 1.755354600568646e-05, "loss": 0.4367, "step": 23519 }, { "epoch": 3.839475939757561, "grad_norm": 2.066594123840332, "learning_rate": 1.755333804866991e-05, "loss": 0.5205, "step": 23520 }, { "epoch": 3.8396391984000653, "grad_norm": 2.139829158782959, "learning_rate": 1.7553130084047158e-05, "loss": 0.5022, "step": 23521 }, { "epoch": 3.8398024570425697, "grad_norm": 1.8536452054977417, "learning_rate": 1.75529221118184e-05, "loss": 0.4666, "step": 23522 }, { "epoch": 3.839965715685074, "grad_norm": 2.176578998565674, "learning_rate": 1.7552714131983854e-05, "loss": 0.5675, "step": 23523 }, { "epoch": 3.8401289743275786, "grad_norm": 1.612905740737915, "learning_rate": 1.7552506144543728e-05, "loss": 0.3826, "step": 23524 }, { "epoch": 3.840292232970083, "grad_norm": 2.053821325302124, "learning_rate": 1.7552298149498228e-05, "loss": 0.5202, "step": 23525 }, { "epoch": 3.8404554916125875, "grad_norm": 1.8639824390411377, "learning_rate": 1.7552090146847565e-05, "loss": 0.4664, "step": 23526 }, { "epoch": 3.8406187502550915, "grad_norm": 2.1709225177764893, "learning_rate": 1.7551882136591948e-05, "loss": 0.509, "step": 23527 }, { "epoch": 3.840782008897596, "grad_norm": 1.6034737825393677, "learning_rate": 1.7551674118731592e-05, "loss": 0.4396, "step": 23528 }, { "epoch": 3.8409452675401003, "grad_norm": 1.9860646724700928, "learning_rate": 1.75514660932667e-05, "loss": 0.5504, "step": 23529 }, { "epoch": 3.841108526182605, "grad_norm": 2.5073366165161133, "learning_rate": 1.7551258060197484e-05, "loss": 0.5818, "step": 23530 }, { "epoch": 3.8412717848251092, "grad_norm": 2.159803867340088, "learning_rate": 1.7551050019524155e-05, "loss": 0.4919, "step": 23531 }, { "epoch": 3.8414350434676137, "grad_norm": 1.9529757499694824, "learning_rate": 1.755084197124692e-05, "loss": 0.4724, "step": 23532 }, { "epoch": 3.841598302110118, "grad_norm": 2.1311559677124023, "learning_rate": 1.7550633915365986e-05, "loss": 0.4981, "step": 23533 }, { "epoch": 3.841761560752622, "grad_norm": 2.229048252105713, "learning_rate": 1.7550425851881566e-05, "loss": 0.4921, "step": 23534 }, { "epoch": 3.8419248193951265, "grad_norm": 2.2974023818969727, "learning_rate": 1.7550217780793873e-05, "loss": 0.4882, "step": 23535 }, { "epoch": 3.842088078037631, "grad_norm": 2.1003949642181396, "learning_rate": 1.755000970210311e-05, "loss": 0.5382, "step": 23536 }, { "epoch": 3.8422513366801354, "grad_norm": 1.7828983068466187, "learning_rate": 1.754980161580949e-05, "loss": 0.4151, "step": 23537 }, { "epoch": 3.84241459532264, "grad_norm": 2.247868776321411, "learning_rate": 1.754959352191322e-05, "loss": 0.5407, "step": 23538 }, { "epoch": 3.8425778539651443, "grad_norm": 2.009378433227539, "learning_rate": 1.7549385420414514e-05, "loss": 0.4356, "step": 23539 }, { "epoch": 3.8427411126076487, "grad_norm": 2.1189355850219727, "learning_rate": 1.7549177311313575e-05, "loss": 0.509, "step": 23540 }, { "epoch": 3.842904371250153, "grad_norm": 2.211669445037842, "learning_rate": 1.754896919461062e-05, "loss": 0.5566, "step": 23541 }, { "epoch": 3.8430676298926576, "grad_norm": 1.8523517847061157, "learning_rate": 1.7548761070305856e-05, "loss": 0.4484, "step": 23542 }, { "epoch": 3.843230888535162, "grad_norm": 1.339125156402588, "learning_rate": 1.754855293839949e-05, "loss": 0.3826, "step": 23543 }, { "epoch": 3.8433941471776665, "grad_norm": 2.1330699920654297, "learning_rate": 1.754834479889173e-05, "loss": 0.5174, "step": 23544 }, { "epoch": 3.8435574058201705, "grad_norm": 2.0621352195739746, "learning_rate": 1.7548136651782794e-05, "loss": 0.4925, "step": 23545 }, { "epoch": 3.843720664462675, "grad_norm": 2.5001718997955322, "learning_rate": 1.7547928497072884e-05, "loss": 0.53, "step": 23546 }, { "epoch": 3.8438839231051793, "grad_norm": 1.7431737184524536, "learning_rate": 1.7547720334762213e-05, "loss": 0.4642, "step": 23547 }, { "epoch": 3.844047181747684, "grad_norm": 1.7969703674316406, "learning_rate": 1.754751216485099e-05, "loss": 0.464, "step": 23548 }, { "epoch": 3.844210440390188, "grad_norm": 2.4851431846618652, "learning_rate": 1.754730398733942e-05, "loss": 0.5361, "step": 23549 }, { "epoch": 3.8443736990326927, "grad_norm": 1.9482324123382568, "learning_rate": 1.7547095802227723e-05, "loss": 0.5252, "step": 23550 }, { "epoch": 3.8445369576751967, "grad_norm": 1.8438940048217773, "learning_rate": 1.7546887609516097e-05, "loss": 0.4596, "step": 23551 }, { "epoch": 3.844700216317701, "grad_norm": 2.0327095985412598, "learning_rate": 1.754667940920476e-05, "loss": 0.4388, "step": 23552 }, { "epoch": 3.8448634749602055, "grad_norm": 1.9615267515182495, "learning_rate": 1.754647120129392e-05, "loss": 0.5006, "step": 23553 }, { "epoch": 3.84502673360271, "grad_norm": 1.757108449935913, "learning_rate": 1.7546262985783782e-05, "loss": 0.4357, "step": 23554 }, { "epoch": 3.8451899922452144, "grad_norm": 1.8228504657745361, "learning_rate": 1.7546054762674563e-05, "loss": 0.4544, "step": 23555 }, { "epoch": 3.845353250887719, "grad_norm": 1.869665265083313, "learning_rate": 1.7545846531966466e-05, "loss": 0.4782, "step": 23556 }, { "epoch": 3.8455165095302233, "grad_norm": 2.073820114135742, "learning_rate": 1.7545638293659703e-05, "loss": 0.5492, "step": 23557 }, { "epoch": 3.8456797681727277, "grad_norm": 2.089278221130371, "learning_rate": 1.7545430047754486e-05, "loss": 0.5303, "step": 23558 }, { "epoch": 3.845843026815232, "grad_norm": 1.8619471788406372, "learning_rate": 1.7545221794251028e-05, "loss": 0.4387, "step": 23559 }, { "epoch": 3.8460062854577366, "grad_norm": 1.5729259252548218, "learning_rate": 1.7545013533149525e-05, "loss": 0.4305, "step": 23560 }, { "epoch": 3.846169544100241, "grad_norm": 1.8736692667007446, "learning_rate": 1.7544805264450197e-05, "loss": 0.4708, "step": 23561 }, { "epoch": 3.8463328027427455, "grad_norm": 1.8589248657226562, "learning_rate": 1.7544596988153255e-05, "loss": 0.4711, "step": 23562 }, { "epoch": 3.8464960613852495, "grad_norm": 1.9077802896499634, "learning_rate": 1.7544388704258905e-05, "loss": 0.4683, "step": 23563 }, { "epoch": 3.846659320027754, "grad_norm": 1.940738320350647, "learning_rate": 1.7544180412767357e-05, "loss": 0.448, "step": 23564 }, { "epoch": 3.8468225786702583, "grad_norm": 1.9188824892044067, "learning_rate": 1.754397211367882e-05, "loss": 0.5137, "step": 23565 }, { "epoch": 3.846985837312763, "grad_norm": 2.681131601333618, "learning_rate": 1.754376380699351e-05, "loss": 0.4925, "step": 23566 }, { "epoch": 3.847149095955267, "grad_norm": 2.1279070377349854, "learning_rate": 1.7543555492711628e-05, "loss": 0.4638, "step": 23567 }, { "epoch": 3.8473123545977717, "grad_norm": 2.0773138999938965, "learning_rate": 1.7543347170833386e-05, "loss": 0.5522, "step": 23568 }, { "epoch": 3.8474756132402756, "grad_norm": 2.099576234817505, "learning_rate": 1.7543138841359e-05, "loss": 0.5397, "step": 23569 }, { "epoch": 3.84763887188278, "grad_norm": 1.6876417398452759, "learning_rate": 1.754293050428867e-05, "loss": 0.4985, "step": 23570 }, { "epoch": 3.8478021305252845, "grad_norm": 1.776253342628479, "learning_rate": 1.7542722159622614e-05, "loss": 0.4996, "step": 23571 }, { "epoch": 3.847965389167789, "grad_norm": 1.8267242908477783, "learning_rate": 1.754251380736104e-05, "loss": 0.4806, "step": 23572 }, { "epoch": 3.8481286478102934, "grad_norm": 1.6326017379760742, "learning_rate": 1.7542305447504152e-05, "loss": 0.3842, "step": 23573 }, { "epoch": 3.848291906452798, "grad_norm": 1.8651149272918701, "learning_rate": 1.754209708005217e-05, "loss": 0.5102, "step": 23574 }, { "epoch": 3.8484551650953023, "grad_norm": 2.382760524749756, "learning_rate": 1.7541888705005298e-05, "loss": 0.6008, "step": 23575 }, { "epoch": 3.8486184237378067, "grad_norm": 2.0915656089782715, "learning_rate": 1.7541680322363742e-05, "loss": 0.4812, "step": 23576 }, { "epoch": 3.848781682380311, "grad_norm": 1.8552953004837036, "learning_rate": 1.754147193212772e-05, "loss": 0.5237, "step": 23577 }, { "epoch": 3.8489449410228156, "grad_norm": 2.1469719409942627, "learning_rate": 1.7541263534297432e-05, "loss": 0.542, "step": 23578 }, { "epoch": 3.84910819966532, "grad_norm": 2.234879970550537, "learning_rate": 1.75410551288731e-05, "loss": 0.5778, "step": 23579 }, { "epoch": 3.849271458307824, "grad_norm": 1.7521555423736572, "learning_rate": 1.7540846715854926e-05, "loss": 0.4319, "step": 23580 }, { "epoch": 3.8494347169503285, "grad_norm": 2.0447192192077637, "learning_rate": 1.7540638295243115e-05, "loss": 0.4147, "step": 23581 }, { "epoch": 3.849597975592833, "grad_norm": 2.372156858444214, "learning_rate": 1.7540429867037892e-05, "loss": 0.4733, "step": 23582 }, { "epoch": 3.8497612342353373, "grad_norm": 2.0444114208221436, "learning_rate": 1.754022143123945e-05, "loss": 0.5559, "step": 23583 }, { "epoch": 3.8499244928778418, "grad_norm": 2.088841676712036, "learning_rate": 1.7540012987848015e-05, "loss": 0.4607, "step": 23584 }, { "epoch": 3.850087751520346, "grad_norm": 1.9084527492523193, "learning_rate": 1.7539804536863782e-05, "loss": 0.4256, "step": 23585 }, { "epoch": 3.8502510101628507, "grad_norm": 1.7953357696533203, "learning_rate": 1.7539596078286972e-05, "loss": 0.503, "step": 23586 }, { "epoch": 3.8504142688053546, "grad_norm": 2.1419875621795654, "learning_rate": 1.753938761211779e-05, "loss": 0.5035, "step": 23587 }, { "epoch": 3.850577527447859, "grad_norm": 2.294633388519287, "learning_rate": 1.7539179138356448e-05, "loss": 0.5158, "step": 23588 }, { "epoch": 3.8507407860903635, "grad_norm": 1.6144318580627441, "learning_rate": 1.7538970657003153e-05, "loss": 0.414, "step": 23589 }, { "epoch": 3.850904044732868, "grad_norm": 1.9486913681030273, "learning_rate": 1.7538762168058118e-05, "loss": 0.4144, "step": 23590 }, { "epoch": 3.8510673033753724, "grad_norm": 1.6307954788208008, "learning_rate": 1.7538553671521547e-05, "loss": 0.4041, "step": 23591 }, { "epoch": 3.851230562017877, "grad_norm": 2.069077730178833, "learning_rate": 1.753834516739366e-05, "loss": 0.4339, "step": 23592 }, { "epoch": 3.8513938206603813, "grad_norm": 2.2452802658081055, "learning_rate": 1.7538136655674658e-05, "loss": 0.5374, "step": 23593 }, { "epoch": 3.8515570793028857, "grad_norm": 1.956985354423523, "learning_rate": 1.7537928136364756e-05, "loss": 0.5323, "step": 23594 }, { "epoch": 3.85172033794539, "grad_norm": 2.055391788482666, "learning_rate": 1.7537719609464165e-05, "loss": 0.5192, "step": 23595 }, { "epoch": 3.8518835965878946, "grad_norm": 1.9883946180343628, "learning_rate": 1.7537511074973085e-05, "loss": 0.4312, "step": 23596 }, { "epoch": 3.852046855230399, "grad_norm": 2.0991051197052, "learning_rate": 1.753730253289174e-05, "loss": 0.5426, "step": 23597 }, { "epoch": 3.852210113872903, "grad_norm": 1.6478782892227173, "learning_rate": 1.753709398322033e-05, "loss": 0.3692, "step": 23598 }, { "epoch": 3.8523733725154075, "grad_norm": 2.4694247245788574, "learning_rate": 1.753688542595907e-05, "loss": 0.4779, "step": 23599 }, { "epoch": 3.852536631157912, "grad_norm": 1.833362340927124, "learning_rate": 1.7536676861108167e-05, "loss": 0.4299, "step": 23600 }, { "epoch": 3.8526998898004163, "grad_norm": 1.6668012142181396, "learning_rate": 1.7536468288667832e-05, "loss": 0.4418, "step": 23601 }, { "epoch": 3.8528631484429208, "grad_norm": 1.9023789167404175, "learning_rate": 1.753625970863828e-05, "loss": 0.4733, "step": 23602 }, { "epoch": 3.853026407085425, "grad_norm": 2.039609670639038, "learning_rate": 1.753605112101971e-05, "loss": 0.4934, "step": 23603 }, { "epoch": 3.853189665727929, "grad_norm": 2.1168105602264404, "learning_rate": 1.7535842525812342e-05, "loss": 0.5496, "step": 23604 }, { "epoch": 3.8533529243704336, "grad_norm": 1.6611520051956177, "learning_rate": 1.7535633923016382e-05, "loss": 0.4116, "step": 23605 }, { "epoch": 3.853516183012938, "grad_norm": 2.1097514629364014, "learning_rate": 1.7535425312632038e-05, "loss": 0.5411, "step": 23606 }, { "epoch": 3.8536794416554425, "grad_norm": 2.0802042484283447, "learning_rate": 1.7535216694659528e-05, "loss": 0.4752, "step": 23607 }, { "epoch": 3.853842700297947, "grad_norm": 1.8701859712600708, "learning_rate": 1.753500806909905e-05, "loss": 0.4907, "step": 23608 }, { "epoch": 3.8540059589404514, "grad_norm": 2.10382342338562, "learning_rate": 1.7534799435950827e-05, "loss": 0.4663, "step": 23609 }, { "epoch": 3.854169217582956, "grad_norm": 1.5181539058685303, "learning_rate": 1.7534590795215062e-05, "loss": 0.3493, "step": 23610 }, { "epoch": 3.8543324762254603, "grad_norm": 1.7792092561721802, "learning_rate": 1.7534382146891963e-05, "loss": 0.4039, "step": 23611 }, { "epoch": 3.8544957348679647, "grad_norm": 1.9791640043258667, "learning_rate": 1.753417349098175e-05, "loss": 0.5091, "step": 23612 }, { "epoch": 3.854658993510469, "grad_norm": 1.6073198318481445, "learning_rate": 1.7533964827484616e-05, "loss": 0.3703, "step": 23613 }, { "epoch": 3.8548222521529736, "grad_norm": 2.2645459175109863, "learning_rate": 1.753375615640079e-05, "loss": 0.5386, "step": 23614 }, { "epoch": 3.8549855107954776, "grad_norm": 2.012178897857666, "learning_rate": 1.753354747773047e-05, "loss": 0.4945, "step": 23615 }, { "epoch": 3.855148769437982, "grad_norm": 1.9413560628890991, "learning_rate": 1.7533338791473872e-05, "loss": 0.4734, "step": 23616 }, { "epoch": 3.8553120280804865, "grad_norm": 2.7093558311462402, "learning_rate": 1.7533130097631198e-05, "loss": 0.5729, "step": 23617 }, { "epoch": 3.855475286722991, "grad_norm": 2.087099313735962, "learning_rate": 1.753292139620267e-05, "loss": 0.524, "step": 23618 }, { "epoch": 3.8556385453654953, "grad_norm": 1.7487802505493164, "learning_rate": 1.7532712687188488e-05, "loss": 0.4476, "step": 23619 }, { "epoch": 3.8558018040079998, "grad_norm": 2.0163729190826416, "learning_rate": 1.753250397058887e-05, "loss": 0.4733, "step": 23620 }, { "epoch": 3.855965062650504, "grad_norm": 2.0556254386901855, "learning_rate": 1.753229524640402e-05, "loss": 0.472, "step": 23621 }, { "epoch": 3.856128321293008, "grad_norm": 2.1560165882110596, "learning_rate": 1.7532086514634152e-05, "loss": 0.5262, "step": 23622 }, { "epoch": 3.8562915799355126, "grad_norm": 1.8605068922042847, "learning_rate": 1.7531877775279476e-05, "loss": 0.4921, "step": 23623 }, { "epoch": 3.856454838578017, "grad_norm": 1.7367748022079468, "learning_rate": 1.75316690283402e-05, "loss": 0.4035, "step": 23624 }, { "epoch": 3.8566180972205215, "grad_norm": 1.6484689712524414, "learning_rate": 1.7531460273816533e-05, "loss": 0.3998, "step": 23625 }, { "epoch": 3.856781355863026, "grad_norm": 1.6900889873504639, "learning_rate": 1.7531251511708692e-05, "loss": 0.4648, "step": 23626 }, { "epoch": 3.8569446145055304, "grad_norm": 1.740208387374878, "learning_rate": 1.7531042742016878e-05, "loss": 0.4288, "step": 23627 }, { "epoch": 3.857107873148035, "grad_norm": 1.9729090929031372, "learning_rate": 1.7530833964741306e-05, "loss": 0.4181, "step": 23628 }, { "epoch": 3.8572711317905393, "grad_norm": 1.9846611022949219, "learning_rate": 1.753062517988219e-05, "loss": 0.5061, "step": 23629 }, { "epoch": 3.8574343904330437, "grad_norm": 2.3044958114624023, "learning_rate": 1.7530416387439736e-05, "loss": 0.4887, "step": 23630 }, { "epoch": 3.857597649075548, "grad_norm": 1.7479583024978638, "learning_rate": 1.7530207587414153e-05, "loss": 0.4539, "step": 23631 }, { "epoch": 3.8577609077180526, "grad_norm": 1.9237650632858276, "learning_rate": 1.752999877980565e-05, "loss": 0.5636, "step": 23632 }, { "epoch": 3.8579241663605566, "grad_norm": 1.849743127822876, "learning_rate": 1.752978996461445e-05, "loss": 0.4653, "step": 23633 }, { "epoch": 3.858087425003061, "grad_norm": 1.7669486999511719, "learning_rate": 1.7529581141840743e-05, "loss": 0.4617, "step": 23634 }, { "epoch": 3.8582506836455654, "grad_norm": 2.210994243621826, "learning_rate": 1.7529372311484754e-05, "loss": 0.6148, "step": 23635 }, { "epoch": 3.85841394228807, "grad_norm": 2.290788173675537, "learning_rate": 1.7529163473546692e-05, "loss": 0.5843, "step": 23636 }, { "epoch": 3.8585772009305743, "grad_norm": 2.4183082580566406, "learning_rate": 1.752895462802676e-05, "loss": 0.533, "step": 23637 }, { "epoch": 3.8587404595730788, "grad_norm": 2.593114137649536, "learning_rate": 1.7528745774925175e-05, "loss": 0.5411, "step": 23638 }, { "epoch": 3.858903718215583, "grad_norm": 2.06288743019104, "learning_rate": 1.7528536914242144e-05, "loss": 0.4919, "step": 23639 }, { "epoch": 3.859066976858087, "grad_norm": 2.3200972080230713, "learning_rate": 1.7528328045977876e-05, "loss": 0.5533, "step": 23640 }, { "epoch": 3.8592302355005916, "grad_norm": 1.610063076019287, "learning_rate": 1.7528119170132586e-05, "loss": 0.3881, "step": 23641 }, { "epoch": 3.859393494143096, "grad_norm": 1.8472261428833008, "learning_rate": 1.7527910286706483e-05, "loss": 0.4175, "step": 23642 }, { "epoch": 3.8595567527856005, "grad_norm": 1.8443464040756226, "learning_rate": 1.7527701395699774e-05, "loss": 0.4943, "step": 23643 }, { "epoch": 3.859720011428105, "grad_norm": 1.9211493730545044, "learning_rate": 1.7527492497112675e-05, "loss": 0.4108, "step": 23644 }, { "epoch": 3.8598832700706094, "grad_norm": 1.9613606929779053, "learning_rate": 1.752728359094539e-05, "loss": 0.4898, "step": 23645 }, { "epoch": 3.860046528713114, "grad_norm": 1.9427756071090698, "learning_rate": 1.752707467719813e-05, "loss": 0.5275, "step": 23646 }, { "epoch": 3.8602097873556183, "grad_norm": 2.0978026390075684, "learning_rate": 1.7526865755871115e-05, "loss": 0.4816, "step": 23647 }, { "epoch": 3.8603730459981227, "grad_norm": 1.8217517137527466, "learning_rate": 1.7526656826964544e-05, "loss": 0.4495, "step": 23648 }, { "epoch": 3.860536304640627, "grad_norm": 1.836165428161621, "learning_rate": 1.7526447890478633e-05, "loss": 0.3962, "step": 23649 }, { "epoch": 3.8606995632831316, "grad_norm": 1.9526872634887695, "learning_rate": 1.752623894641359e-05, "loss": 0.4136, "step": 23650 }, { "epoch": 3.8608628219256356, "grad_norm": 1.9747471809387207, "learning_rate": 1.7526029994769626e-05, "loss": 0.5104, "step": 23651 }, { "epoch": 3.86102608056814, "grad_norm": 1.7567038536071777, "learning_rate": 1.752582103554695e-05, "loss": 0.4443, "step": 23652 }, { "epoch": 3.8611893392106444, "grad_norm": 1.7209205627441406, "learning_rate": 1.752561206874578e-05, "loss": 0.4442, "step": 23653 }, { "epoch": 3.861352597853149, "grad_norm": 1.5424864292144775, "learning_rate": 1.7525403094366312e-05, "loss": 0.3879, "step": 23654 }, { "epoch": 3.8615158564956533, "grad_norm": 1.7298856973648071, "learning_rate": 1.7525194112408773e-05, "loss": 0.4097, "step": 23655 }, { "epoch": 3.8616791151381578, "grad_norm": 1.54424250125885, "learning_rate": 1.7524985122873362e-05, "loss": 0.3618, "step": 23656 }, { "epoch": 3.8618423737806618, "grad_norm": 2.163322925567627, "learning_rate": 1.7524776125760296e-05, "loss": 0.5734, "step": 23657 }, { "epoch": 3.862005632423166, "grad_norm": 1.9678411483764648, "learning_rate": 1.752456712106978e-05, "loss": 0.4623, "step": 23658 }, { "epoch": 3.8621688910656706, "grad_norm": 1.929163932800293, "learning_rate": 1.7524358108802025e-05, "loss": 0.4591, "step": 23659 }, { "epoch": 3.862332149708175, "grad_norm": 2.0534732341766357, "learning_rate": 1.7524149088957244e-05, "loss": 0.5115, "step": 23660 }, { "epoch": 3.8624954083506795, "grad_norm": 2.0425615310668945, "learning_rate": 1.752394006153565e-05, "loss": 0.5402, "step": 23661 }, { "epoch": 3.862658666993184, "grad_norm": 2.187140703201294, "learning_rate": 1.752373102653745e-05, "loss": 0.5125, "step": 23662 }, { "epoch": 3.8628219256356884, "grad_norm": 2.009028911590576, "learning_rate": 1.752352198396285e-05, "loss": 0.5289, "step": 23663 }, { "epoch": 3.862985184278193, "grad_norm": 2.2310190200805664, "learning_rate": 1.7523312933812067e-05, "loss": 0.4759, "step": 23664 }, { "epoch": 3.8631484429206973, "grad_norm": 1.5696316957473755, "learning_rate": 1.7523103876085313e-05, "loss": 0.3649, "step": 23665 }, { "epoch": 3.8633117015632017, "grad_norm": 1.907735824584961, "learning_rate": 1.7522894810782795e-05, "loss": 0.4507, "step": 23666 }, { "epoch": 3.863474960205706, "grad_norm": 2.127683162689209, "learning_rate": 1.7522685737904725e-05, "loss": 0.4934, "step": 23667 }, { "epoch": 3.86363821884821, "grad_norm": 2.0817837715148926, "learning_rate": 1.752247665745131e-05, "loss": 0.468, "step": 23668 }, { "epoch": 3.8638014774907146, "grad_norm": 1.9406670331954956, "learning_rate": 1.7522267569422764e-05, "loss": 0.4229, "step": 23669 }, { "epoch": 3.863964736133219, "grad_norm": 1.6175017356872559, "learning_rate": 1.7522058473819295e-05, "loss": 0.3858, "step": 23670 }, { "epoch": 3.8641279947757234, "grad_norm": 1.7821271419525146, "learning_rate": 1.7521849370641116e-05, "loss": 0.4497, "step": 23671 }, { "epoch": 3.864291253418228, "grad_norm": 1.9895728826522827, "learning_rate": 1.7521640259888436e-05, "loss": 0.4661, "step": 23672 }, { "epoch": 3.8644545120607323, "grad_norm": 1.6456637382507324, "learning_rate": 1.752143114156147e-05, "loss": 0.3854, "step": 23673 }, { "epoch": 3.8646177707032368, "grad_norm": 1.9426958560943604, "learning_rate": 1.752122201566042e-05, "loss": 0.4512, "step": 23674 }, { "epoch": 3.8647810293457407, "grad_norm": 2.3677351474761963, "learning_rate": 1.7521012882185502e-05, "loss": 0.5093, "step": 23675 }, { "epoch": 3.864944287988245, "grad_norm": 2.283442735671997, "learning_rate": 1.752080374113693e-05, "loss": 0.5438, "step": 23676 }, { "epoch": 3.8651075466307496, "grad_norm": 1.9110376834869385, "learning_rate": 1.7520594592514905e-05, "loss": 0.4807, "step": 23677 }, { "epoch": 3.865270805273254, "grad_norm": 2.1381821632385254, "learning_rate": 1.7520385436319646e-05, "loss": 0.5463, "step": 23678 }, { "epoch": 3.8654340639157585, "grad_norm": 1.9586560726165771, "learning_rate": 1.7520176272551363e-05, "loss": 0.4088, "step": 23679 }, { "epoch": 3.865597322558263, "grad_norm": 2.0603134632110596, "learning_rate": 1.7519967101210264e-05, "loss": 0.4819, "step": 23680 }, { "epoch": 3.8657605812007674, "grad_norm": 1.9893784523010254, "learning_rate": 1.7519757922296555e-05, "loss": 0.485, "step": 23681 }, { "epoch": 3.865923839843272, "grad_norm": 1.8171089887619019, "learning_rate": 1.7519548735810456e-05, "loss": 0.4117, "step": 23682 }, { "epoch": 3.8660870984857763, "grad_norm": 2.40132737159729, "learning_rate": 1.7519339541752173e-05, "loss": 0.5102, "step": 23683 }, { "epoch": 3.8662503571282807, "grad_norm": 1.839218258857727, "learning_rate": 1.7519130340121918e-05, "loss": 0.456, "step": 23684 }, { "epoch": 3.866413615770785, "grad_norm": 2.1397321224212646, "learning_rate": 1.75189211309199e-05, "loss": 0.4603, "step": 23685 }, { "epoch": 3.866576874413289, "grad_norm": 1.8774031400680542, "learning_rate": 1.7518711914146328e-05, "loss": 0.4567, "step": 23686 }, { "epoch": 3.8667401330557936, "grad_norm": 2.1550452709198, "learning_rate": 1.7518502689801417e-05, "loss": 0.4408, "step": 23687 }, { "epoch": 3.866903391698298, "grad_norm": 2.044003963470459, "learning_rate": 1.7518293457885376e-05, "loss": 0.5162, "step": 23688 }, { "epoch": 3.8670666503408024, "grad_norm": 2.6846282482147217, "learning_rate": 1.7518084218398417e-05, "loss": 0.5876, "step": 23689 }, { "epoch": 3.867229908983307, "grad_norm": 1.9405380487442017, "learning_rate": 1.7517874971340747e-05, "loss": 0.4822, "step": 23690 }, { "epoch": 3.8673931676258113, "grad_norm": 2.0313262939453125, "learning_rate": 1.751766571671258e-05, "loss": 0.4381, "step": 23691 }, { "epoch": 3.8675564262683153, "grad_norm": 2.028313159942627, "learning_rate": 1.7517456454514125e-05, "loss": 0.4435, "step": 23692 }, { "epoch": 3.8677196849108197, "grad_norm": 2.612182140350342, "learning_rate": 1.7517247184745595e-05, "loss": 0.5364, "step": 23693 }, { "epoch": 3.867882943553324, "grad_norm": 1.677525520324707, "learning_rate": 1.7517037907407196e-05, "loss": 0.3874, "step": 23694 }, { "epoch": 3.8680462021958286, "grad_norm": 2.1837642192840576, "learning_rate": 1.751682862249914e-05, "loss": 0.4957, "step": 23695 }, { "epoch": 3.868209460838333, "grad_norm": 2.1324429512023926, "learning_rate": 1.7516619330021644e-05, "loss": 0.5638, "step": 23696 }, { "epoch": 3.8683727194808375, "grad_norm": 1.6722798347473145, "learning_rate": 1.7516410029974915e-05, "loss": 0.3959, "step": 23697 }, { "epoch": 3.868535978123342, "grad_norm": 1.9445017576217651, "learning_rate": 1.751620072235916e-05, "loss": 0.4683, "step": 23698 }, { "epoch": 3.8686992367658464, "grad_norm": 2.2142763137817383, "learning_rate": 1.7515991407174594e-05, "loss": 0.5062, "step": 23699 }, { "epoch": 3.868862495408351, "grad_norm": 2.188197135925293, "learning_rate": 1.7515782084421426e-05, "loss": 0.5347, "step": 23700 }, { "epoch": 3.8690257540508552, "grad_norm": 1.813198208808899, "learning_rate": 1.751557275409987e-05, "loss": 0.4867, "step": 23701 }, { "epoch": 3.8691890126933597, "grad_norm": 2.3293659687042236, "learning_rate": 1.7515363416210134e-05, "loss": 0.5505, "step": 23702 }, { "epoch": 3.8693522713358637, "grad_norm": 1.9422156810760498, "learning_rate": 1.7515154070752427e-05, "loss": 0.4594, "step": 23703 }, { "epoch": 3.869515529978368, "grad_norm": 1.8738781213760376, "learning_rate": 1.7514944717726962e-05, "loss": 0.4517, "step": 23704 }, { "epoch": 3.8696787886208726, "grad_norm": 1.8071722984313965, "learning_rate": 1.751473535713395e-05, "loss": 0.4667, "step": 23705 }, { "epoch": 3.869842047263377, "grad_norm": 2.149587869644165, "learning_rate": 1.7514525988973604e-05, "loss": 0.485, "step": 23706 }, { "epoch": 3.8700053059058814, "grad_norm": 1.852715015411377, "learning_rate": 1.7514316613246128e-05, "loss": 0.4698, "step": 23707 }, { "epoch": 3.870168564548386, "grad_norm": 1.8915460109710693, "learning_rate": 1.751410722995174e-05, "loss": 0.4271, "step": 23708 }, { "epoch": 3.8703318231908903, "grad_norm": 2.034020185470581, "learning_rate": 1.7513897839090645e-05, "loss": 0.546, "step": 23709 }, { "epoch": 3.8704950818333943, "grad_norm": 2.4875636100769043, "learning_rate": 1.7513688440663062e-05, "loss": 0.5598, "step": 23710 }, { "epoch": 3.8706583404758987, "grad_norm": 2.1364529132843018, "learning_rate": 1.751347903466919e-05, "loss": 0.4452, "step": 23711 }, { "epoch": 3.870821599118403, "grad_norm": 1.6522787809371948, "learning_rate": 1.7513269621109252e-05, "loss": 0.3954, "step": 23712 }, { "epoch": 3.8709848577609076, "grad_norm": 2.1054317951202393, "learning_rate": 1.7513060199983453e-05, "loss": 0.5178, "step": 23713 }, { "epoch": 3.871148116403412, "grad_norm": 2.04706072807312, "learning_rate": 1.7512850771292003e-05, "loss": 0.5747, "step": 23714 }, { "epoch": 3.8713113750459165, "grad_norm": 2.201352596282959, "learning_rate": 1.7512641335035115e-05, "loss": 0.4385, "step": 23715 }, { "epoch": 3.871474633688421, "grad_norm": 1.627254843711853, "learning_rate": 1.7512431891212997e-05, "loss": 0.413, "step": 23716 }, { "epoch": 3.8716378923309254, "grad_norm": 2.1553800106048584, "learning_rate": 1.7512222439825864e-05, "loss": 0.5299, "step": 23717 }, { "epoch": 3.87180115097343, "grad_norm": 1.937456488609314, "learning_rate": 1.751201298087392e-05, "loss": 0.3859, "step": 23718 }, { "epoch": 3.8719644096159342, "grad_norm": 1.9290194511413574, "learning_rate": 1.751180351435739e-05, "loss": 0.5099, "step": 23719 }, { "epoch": 3.8721276682584387, "grad_norm": 1.8672878742218018, "learning_rate": 1.7511594040276468e-05, "loss": 0.4387, "step": 23720 }, { "epoch": 3.8722909269009427, "grad_norm": 1.673200011253357, "learning_rate": 1.7511384558631377e-05, "loss": 0.4703, "step": 23721 }, { "epoch": 3.872454185543447, "grad_norm": 2.0821259021759033, "learning_rate": 1.7511175069422327e-05, "loss": 0.5099, "step": 23722 }, { "epoch": 3.8726174441859516, "grad_norm": 2.091996669769287, "learning_rate": 1.7510965572649522e-05, "loss": 0.4665, "step": 23723 }, { "epoch": 3.872780702828456, "grad_norm": 1.8486050367355347, "learning_rate": 1.7510756068313175e-05, "loss": 0.4293, "step": 23724 }, { "epoch": 3.8729439614709604, "grad_norm": 2.090474843978882, "learning_rate": 1.75105465564135e-05, "loss": 0.5226, "step": 23725 }, { "epoch": 3.873107220113465, "grad_norm": 1.9527180194854736, "learning_rate": 1.7510337036950703e-05, "loss": 0.4633, "step": 23726 }, { "epoch": 3.8732704787559693, "grad_norm": 1.7230197191238403, "learning_rate": 1.7510127509925006e-05, "loss": 0.4188, "step": 23727 }, { "epoch": 3.8734337373984733, "grad_norm": 1.8560165166854858, "learning_rate": 1.750991797533661e-05, "loss": 0.5064, "step": 23728 }, { "epoch": 3.8735969960409777, "grad_norm": 2.125929355621338, "learning_rate": 1.7509708433185725e-05, "loss": 0.508, "step": 23729 }, { "epoch": 3.873760254683482, "grad_norm": 1.8666541576385498, "learning_rate": 1.7509498883472566e-05, "loss": 0.4774, "step": 23730 }, { "epoch": 3.8739235133259866, "grad_norm": 1.8192458152770996, "learning_rate": 1.7509289326197347e-05, "loss": 0.4435, "step": 23731 }, { "epoch": 3.874086771968491, "grad_norm": 1.8827069997787476, "learning_rate": 1.7509079761360275e-05, "loss": 0.5551, "step": 23732 }, { "epoch": 3.8742500306109955, "grad_norm": 1.7554839849472046, "learning_rate": 1.750887018896156e-05, "loss": 0.3799, "step": 23733 }, { "epoch": 3.8744132892535, "grad_norm": 1.8863712549209595, "learning_rate": 1.7508660609001417e-05, "loss": 0.4615, "step": 23734 }, { "epoch": 3.8745765478960044, "grad_norm": 2.1859617233276367, "learning_rate": 1.7508451021480054e-05, "loss": 0.4359, "step": 23735 }, { "epoch": 3.874739806538509, "grad_norm": 2.0907485485076904, "learning_rate": 1.7508241426397678e-05, "loss": 0.5032, "step": 23736 }, { "epoch": 3.8749030651810132, "grad_norm": 2.197258472442627, "learning_rate": 1.750803182375451e-05, "loss": 0.5509, "step": 23737 }, { "epoch": 3.8750663238235177, "grad_norm": 1.9746301174163818, "learning_rate": 1.7507822213550757e-05, "loss": 0.4613, "step": 23738 }, { "epoch": 3.8752295824660217, "grad_norm": 2.309138774871826, "learning_rate": 1.7507612595786625e-05, "loss": 0.4752, "step": 23739 }, { "epoch": 3.875392841108526, "grad_norm": 1.8547136783599854, "learning_rate": 1.750740297046233e-05, "loss": 0.4938, "step": 23740 }, { "epoch": 3.8755560997510305, "grad_norm": 2.019937515258789, "learning_rate": 1.7507193337578084e-05, "loss": 0.5796, "step": 23741 }, { "epoch": 3.875719358393535, "grad_norm": 1.8511327505111694, "learning_rate": 1.7506983697134096e-05, "loss": 0.466, "step": 23742 }, { "epoch": 3.8758826170360394, "grad_norm": 1.9204258918762207, "learning_rate": 1.7506774049130576e-05, "loss": 0.5289, "step": 23743 }, { "epoch": 3.876045875678544, "grad_norm": 2.156984567642212, "learning_rate": 1.750656439356774e-05, "loss": 0.5283, "step": 23744 }, { "epoch": 3.876209134321048, "grad_norm": 2.3308475017547607, "learning_rate": 1.750635473044579e-05, "loss": 0.5547, "step": 23745 }, { "epoch": 3.8763723929635523, "grad_norm": 1.9862765073776245, "learning_rate": 1.750614505976495e-05, "loss": 0.5108, "step": 23746 }, { "epoch": 3.8765356516060567, "grad_norm": 2.3168301582336426, "learning_rate": 1.750593538152542e-05, "loss": 0.5446, "step": 23747 }, { "epoch": 3.876698910248561, "grad_norm": 1.890107274055481, "learning_rate": 1.7505725695727414e-05, "loss": 0.4467, "step": 23748 }, { "epoch": 3.8768621688910656, "grad_norm": 1.6770411729812622, "learning_rate": 1.7505516002371144e-05, "loss": 0.4496, "step": 23749 }, { "epoch": 3.87702542753357, "grad_norm": 1.9209520816802979, "learning_rate": 1.7505306301456823e-05, "loss": 0.427, "step": 23750 }, { "epoch": 3.8771886861760745, "grad_norm": 2.0333566665649414, "learning_rate": 1.7505096592984658e-05, "loss": 0.5009, "step": 23751 }, { "epoch": 3.877351944818579, "grad_norm": 2.005866765975952, "learning_rate": 1.7504886876954868e-05, "loss": 0.496, "step": 23752 }, { "epoch": 3.8775152034610834, "grad_norm": 1.9821094274520874, "learning_rate": 1.7504677153367654e-05, "loss": 0.5216, "step": 23753 }, { "epoch": 3.877678462103588, "grad_norm": 1.9488263130187988, "learning_rate": 1.7504467422223235e-05, "loss": 0.4962, "step": 23754 }, { "epoch": 3.8778417207460922, "grad_norm": 2.0755462646484375, "learning_rate": 1.7504257683521816e-05, "loss": 0.5825, "step": 23755 }, { "epoch": 3.8780049793885962, "grad_norm": 2.0226359367370605, "learning_rate": 1.7504047937263612e-05, "loss": 0.4738, "step": 23756 }, { "epoch": 3.8781682380311007, "grad_norm": 1.8912899494171143, "learning_rate": 1.7503838183448838e-05, "loss": 0.46, "step": 23757 }, { "epoch": 3.878331496673605, "grad_norm": 1.636673927307129, "learning_rate": 1.75036284220777e-05, "loss": 0.3945, "step": 23758 }, { "epoch": 3.8784947553161095, "grad_norm": 2.042353868484497, "learning_rate": 1.7503418653150407e-05, "loss": 0.4688, "step": 23759 }, { "epoch": 3.878658013958614, "grad_norm": 2.315147638320923, "learning_rate": 1.7503208876667175e-05, "loss": 0.4741, "step": 23760 }, { "epoch": 3.8788212726011184, "grad_norm": 1.496518850326538, "learning_rate": 1.7502999092628212e-05, "loss": 0.3956, "step": 23761 }, { "epoch": 3.878984531243623, "grad_norm": 1.815963864326477, "learning_rate": 1.7502789301033735e-05, "loss": 0.4883, "step": 23762 }, { "epoch": 3.879147789886127, "grad_norm": 2.064023971557617, "learning_rate": 1.750257950188395e-05, "loss": 0.5147, "step": 23763 }, { "epoch": 3.8793110485286313, "grad_norm": 2.2077057361602783, "learning_rate": 1.7502369695179065e-05, "loss": 0.4768, "step": 23764 }, { "epoch": 3.8794743071711357, "grad_norm": 1.8766721487045288, "learning_rate": 1.7502159880919298e-05, "loss": 0.4134, "step": 23765 }, { "epoch": 3.87963756581364, "grad_norm": 2.4206438064575195, "learning_rate": 1.750195005910486e-05, "loss": 0.5626, "step": 23766 }, { "epoch": 3.8798008244561446, "grad_norm": 1.6061855554580688, "learning_rate": 1.750174022973596e-05, "loss": 0.427, "step": 23767 }, { "epoch": 3.879964083098649, "grad_norm": 2.0147016048431396, "learning_rate": 1.7501530392812807e-05, "loss": 0.5413, "step": 23768 }, { "epoch": 3.8801273417411535, "grad_norm": 2.1233999729156494, "learning_rate": 1.7501320548335617e-05, "loss": 0.432, "step": 23769 }, { "epoch": 3.880290600383658, "grad_norm": 2.2661075592041016, "learning_rate": 1.7501110696304598e-05, "loss": 0.5655, "step": 23770 }, { "epoch": 3.8804538590261624, "grad_norm": 1.9298876523971558, "learning_rate": 1.7500900836719962e-05, "loss": 0.4625, "step": 23771 }, { "epoch": 3.880617117668667, "grad_norm": 1.9598796367645264, "learning_rate": 1.750069096958192e-05, "loss": 0.4096, "step": 23772 }, { "epoch": 3.8807803763111712, "grad_norm": 1.8881205320358276, "learning_rate": 1.7500481094890686e-05, "loss": 0.4949, "step": 23773 }, { "epoch": 3.8809436349536752, "grad_norm": 1.9786773920059204, "learning_rate": 1.7500271212646473e-05, "loss": 0.4, "step": 23774 }, { "epoch": 3.8811068935961797, "grad_norm": 1.9449987411499023, "learning_rate": 1.7500061322849483e-05, "loss": 0.4643, "step": 23775 }, { "epoch": 3.881270152238684, "grad_norm": 2.2612080574035645, "learning_rate": 1.7499851425499938e-05, "loss": 0.5544, "step": 23776 }, { "epoch": 3.8814334108811885, "grad_norm": 2.20436954498291, "learning_rate": 1.7499641520598043e-05, "loss": 0.6113, "step": 23777 }, { "epoch": 3.881596669523693, "grad_norm": 1.9579681158065796, "learning_rate": 1.749943160814401e-05, "loss": 0.4414, "step": 23778 }, { "epoch": 3.8817599281661974, "grad_norm": 1.999233365058899, "learning_rate": 1.749922168813805e-05, "loss": 0.4711, "step": 23779 }, { "epoch": 3.8819231868087014, "grad_norm": 1.9500572681427002, "learning_rate": 1.7499011760580377e-05, "loss": 0.4962, "step": 23780 }, { "epoch": 3.882086445451206, "grad_norm": 2.407575845718384, "learning_rate": 1.7498801825471204e-05, "loss": 0.5832, "step": 23781 }, { "epoch": 3.8822497040937103, "grad_norm": 1.9885153770446777, "learning_rate": 1.7498591882810736e-05, "loss": 0.4743, "step": 23782 }, { "epoch": 3.8824129627362147, "grad_norm": 1.97114098072052, "learning_rate": 1.749838193259919e-05, "loss": 0.4311, "step": 23783 }, { "epoch": 3.882576221378719, "grad_norm": 2.176661968231201, "learning_rate": 1.7498171974836773e-05, "loss": 0.5293, "step": 23784 }, { "epoch": 3.8827394800212236, "grad_norm": 1.6616418361663818, "learning_rate": 1.74979620095237e-05, "loss": 0.3938, "step": 23785 }, { "epoch": 3.882902738663728, "grad_norm": 2.3082776069641113, "learning_rate": 1.749775203666018e-05, "loss": 0.5646, "step": 23786 }, { "epoch": 3.8830659973062325, "grad_norm": 1.7994616031646729, "learning_rate": 1.749754205624643e-05, "loss": 0.4702, "step": 23787 }, { "epoch": 3.883229255948737, "grad_norm": 2.309176445007324, "learning_rate": 1.7497332068282653e-05, "loss": 0.5779, "step": 23788 }, { "epoch": 3.8833925145912414, "grad_norm": 1.9905999898910522, "learning_rate": 1.7497122072769067e-05, "loss": 0.5224, "step": 23789 }, { "epoch": 3.883555773233746, "grad_norm": 2.122649908065796, "learning_rate": 1.749691206970588e-05, "loss": 0.4747, "step": 23790 }, { "epoch": 3.88371903187625, "grad_norm": 2.522921562194824, "learning_rate": 1.7496702059093305e-05, "loss": 0.5258, "step": 23791 }, { "epoch": 3.883882290518754, "grad_norm": 1.7685426473617554, "learning_rate": 1.749649204093155e-05, "loss": 0.4149, "step": 23792 }, { "epoch": 3.8840455491612587, "grad_norm": 1.65902841091156, "learning_rate": 1.7496282015220834e-05, "loss": 0.4423, "step": 23793 }, { "epoch": 3.884208807803763, "grad_norm": 1.9052081108093262, "learning_rate": 1.7496071981961363e-05, "loss": 0.4636, "step": 23794 }, { "epoch": 3.8843720664462675, "grad_norm": 2.179473400115967, "learning_rate": 1.749586194115335e-05, "loss": 0.5322, "step": 23795 }, { "epoch": 3.884535325088772, "grad_norm": 2.0521812438964844, "learning_rate": 1.7495651892797004e-05, "loss": 0.4988, "step": 23796 }, { "epoch": 3.8846985837312764, "grad_norm": 1.6222355365753174, "learning_rate": 1.749544183689254e-05, "loss": 0.4594, "step": 23797 }, { "epoch": 3.8848618423737804, "grad_norm": 2.0306456089019775, "learning_rate": 1.7495231773440163e-05, "loss": 0.5106, "step": 23798 }, { "epoch": 3.885025101016285, "grad_norm": 1.9592825174331665, "learning_rate": 1.7495021702440095e-05, "loss": 0.5024, "step": 23799 }, { "epoch": 3.8851883596587893, "grad_norm": 1.8153568506240845, "learning_rate": 1.7494811623892543e-05, "loss": 0.5317, "step": 23800 }, { "epoch": 3.8853516183012937, "grad_norm": 1.8985090255737305, "learning_rate": 1.7494601537797715e-05, "loss": 0.4862, "step": 23801 }, { "epoch": 3.885514876943798, "grad_norm": 1.9085984230041504, "learning_rate": 1.7494391444155826e-05, "loss": 0.5349, "step": 23802 }, { "epoch": 3.8856781355863026, "grad_norm": 1.8529717922210693, "learning_rate": 1.7494181342967083e-05, "loss": 0.4215, "step": 23803 }, { "epoch": 3.885841394228807, "grad_norm": 2.23519229888916, "learning_rate": 1.7493971234231708e-05, "loss": 0.4814, "step": 23804 }, { "epoch": 3.8860046528713115, "grad_norm": 2.180138349533081, "learning_rate": 1.7493761117949902e-05, "loss": 0.5361, "step": 23805 }, { "epoch": 3.886167911513816, "grad_norm": 1.7080305814743042, "learning_rate": 1.7493550994121883e-05, "loss": 0.4442, "step": 23806 }, { "epoch": 3.8863311701563203, "grad_norm": 1.962238073348999, "learning_rate": 1.7493340862747858e-05, "loss": 0.4571, "step": 23807 }, { "epoch": 3.886494428798825, "grad_norm": 1.793537974357605, "learning_rate": 1.7493130723828043e-05, "loss": 0.4898, "step": 23808 }, { "epoch": 3.886657687441329, "grad_norm": 2.2871015071868896, "learning_rate": 1.7492920577362646e-05, "loss": 0.6683, "step": 23809 }, { "epoch": 3.886820946083833, "grad_norm": 1.6083706617355347, "learning_rate": 1.7492710423351878e-05, "loss": 0.4464, "step": 23810 }, { "epoch": 3.8869842047263377, "grad_norm": 1.7411103248596191, "learning_rate": 1.7492500261795955e-05, "loss": 0.4321, "step": 23811 }, { "epoch": 3.887147463368842, "grad_norm": 1.384132981300354, "learning_rate": 1.7492290092695086e-05, "loss": 0.3926, "step": 23812 }, { "epoch": 3.8873107220113465, "grad_norm": 1.9964163303375244, "learning_rate": 1.7492079916049484e-05, "loss": 0.566, "step": 23813 }, { "epoch": 3.887473980653851, "grad_norm": 1.9131020307540894, "learning_rate": 1.7491869731859353e-05, "loss": 0.4284, "step": 23814 }, { "epoch": 3.8876372392963554, "grad_norm": 1.9458223581314087, "learning_rate": 1.749165954012492e-05, "loss": 0.4718, "step": 23815 }, { "epoch": 3.8878004979388594, "grad_norm": 1.451594352722168, "learning_rate": 1.7491449340846384e-05, "loss": 0.3643, "step": 23816 }, { "epoch": 3.887963756581364, "grad_norm": 2.279939889907837, "learning_rate": 1.749123913402396e-05, "loss": 0.5407, "step": 23817 }, { "epoch": 3.8881270152238683, "grad_norm": 1.7385236024856567, "learning_rate": 1.749102891965786e-05, "loss": 0.3792, "step": 23818 }, { "epoch": 3.8882902738663727, "grad_norm": 1.9615660905838013, "learning_rate": 1.7490818697748294e-05, "loss": 0.4379, "step": 23819 }, { "epoch": 3.888453532508877, "grad_norm": 2.097761631011963, "learning_rate": 1.749060846829548e-05, "loss": 0.4974, "step": 23820 }, { "epoch": 3.8886167911513816, "grad_norm": 1.9149850606918335, "learning_rate": 1.7490398231299625e-05, "loss": 0.4604, "step": 23821 }, { "epoch": 3.888780049793886, "grad_norm": 2.0552475452423096, "learning_rate": 1.7490187986760937e-05, "loss": 0.5192, "step": 23822 }, { "epoch": 3.8889433084363905, "grad_norm": 1.790759801864624, "learning_rate": 1.748997773467963e-05, "loss": 0.4299, "step": 23823 }, { "epoch": 3.889106567078895, "grad_norm": 1.9428870677947998, "learning_rate": 1.7489767475055924e-05, "loss": 0.4173, "step": 23824 }, { "epoch": 3.8892698257213993, "grad_norm": 1.731713056564331, "learning_rate": 1.7489557207890025e-05, "loss": 0.4269, "step": 23825 }, { "epoch": 3.889433084363904, "grad_norm": 2.1512997150421143, "learning_rate": 1.748934693318214e-05, "loss": 0.4896, "step": 23826 }, { "epoch": 3.8895963430064078, "grad_norm": 2.0283117294311523, "learning_rate": 1.7489136650932484e-05, "loss": 0.5322, "step": 23827 }, { "epoch": 3.889759601648912, "grad_norm": 1.66854727268219, "learning_rate": 1.748892636114127e-05, "loss": 0.4219, "step": 23828 }, { "epoch": 3.8899228602914167, "grad_norm": 1.9000486135482788, "learning_rate": 1.7488716063808708e-05, "loss": 0.4679, "step": 23829 }, { "epoch": 3.890086118933921, "grad_norm": 2.688830852508545, "learning_rate": 1.7488505758935016e-05, "loss": 0.5283, "step": 23830 }, { "epoch": 3.8902493775764255, "grad_norm": 2.1695680618286133, "learning_rate": 1.7488295446520394e-05, "loss": 0.4974, "step": 23831 }, { "epoch": 3.89041263621893, "grad_norm": 2.2009339332580566, "learning_rate": 1.7488085126565064e-05, "loss": 0.4609, "step": 23832 }, { "epoch": 3.890575894861434, "grad_norm": 1.7478480339050293, "learning_rate": 1.7487874799069234e-05, "loss": 0.3665, "step": 23833 }, { "epoch": 3.8907391535039384, "grad_norm": 1.693917989730835, "learning_rate": 1.7487664464033116e-05, "loss": 0.4602, "step": 23834 }, { "epoch": 3.890902412146443, "grad_norm": 2.192431926727295, "learning_rate": 1.748745412145692e-05, "loss": 0.5258, "step": 23835 }, { "epoch": 3.8910656707889473, "grad_norm": 1.637237787246704, "learning_rate": 1.7487243771340862e-05, "loss": 0.44, "step": 23836 }, { "epoch": 3.8912289294314517, "grad_norm": 2.172325849533081, "learning_rate": 1.7487033413685153e-05, "loss": 0.4977, "step": 23837 }, { "epoch": 3.891392188073956, "grad_norm": 2.144050359725952, "learning_rate": 1.748682304849e-05, "loss": 0.4961, "step": 23838 }, { "epoch": 3.8915554467164606, "grad_norm": 1.5756083726882935, "learning_rate": 1.748661267575562e-05, "loss": 0.4016, "step": 23839 }, { "epoch": 3.891718705358965, "grad_norm": 1.7350856065750122, "learning_rate": 1.7486402295482224e-05, "loss": 0.3882, "step": 23840 }, { "epoch": 3.8918819640014695, "grad_norm": 1.8631932735443115, "learning_rate": 1.748619190767002e-05, "loss": 0.5221, "step": 23841 }, { "epoch": 3.892045222643974, "grad_norm": 1.843705415725708, "learning_rate": 1.7485981512319225e-05, "loss": 0.4536, "step": 23842 }, { "epoch": 3.8922084812864783, "grad_norm": 1.9040157794952393, "learning_rate": 1.748577110943005e-05, "loss": 0.4499, "step": 23843 }, { "epoch": 3.8923717399289823, "grad_norm": 2.528174638748169, "learning_rate": 1.74855606990027e-05, "loss": 0.5044, "step": 23844 }, { "epoch": 3.8925349985714868, "grad_norm": 1.913366675376892, "learning_rate": 1.7485350281037394e-05, "loss": 0.4963, "step": 23845 }, { "epoch": 3.892698257213991, "grad_norm": 1.8456485271453857, "learning_rate": 1.7485139855534346e-05, "loss": 0.3952, "step": 23846 }, { "epoch": 3.8928615158564956, "grad_norm": 1.7815072536468506, "learning_rate": 1.748492942249376e-05, "loss": 0.4404, "step": 23847 }, { "epoch": 3.893024774499, "grad_norm": 2.025709390640259, "learning_rate": 1.7484718981915854e-05, "loss": 0.4734, "step": 23848 }, { "epoch": 3.8931880331415045, "grad_norm": 1.8652702569961548, "learning_rate": 1.7484508533800838e-05, "loss": 0.5247, "step": 23849 }, { "epoch": 3.893351291784009, "grad_norm": 2.0388827323913574, "learning_rate": 1.7484298078148926e-05, "loss": 0.4634, "step": 23850 }, { "epoch": 3.893514550426513, "grad_norm": 1.685713291168213, "learning_rate": 1.7484087614960325e-05, "loss": 0.4486, "step": 23851 }, { "epoch": 3.8936778090690174, "grad_norm": 1.46418035030365, "learning_rate": 1.748387714423525e-05, "loss": 0.3355, "step": 23852 }, { "epoch": 3.893841067711522, "grad_norm": 1.7546802759170532, "learning_rate": 1.7483666665973912e-05, "loss": 0.4202, "step": 23853 }, { "epoch": 3.8940043263540263, "grad_norm": 1.4761430025100708, "learning_rate": 1.7483456180176524e-05, "loss": 0.3544, "step": 23854 }, { "epoch": 3.8941675849965307, "grad_norm": 1.7844141721725464, "learning_rate": 1.74832456868433e-05, "loss": 0.4694, "step": 23855 }, { "epoch": 3.894330843639035, "grad_norm": 2.381570339202881, "learning_rate": 1.748303518597445e-05, "loss": 0.4632, "step": 23856 }, { "epoch": 3.8944941022815396, "grad_norm": 1.7313669919967651, "learning_rate": 1.7482824677570183e-05, "loss": 0.3823, "step": 23857 }, { "epoch": 3.894657360924044, "grad_norm": 1.6822295188903809, "learning_rate": 1.7482614161630714e-05, "loss": 0.4182, "step": 23858 }, { "epoch": 3.8948206195665485, "grad_norm": 1.7117778062820435, "learning_rate": 1.7482403638156254e-05, "loss": 0.4324, "step": 23859 }, { "epoch": 3.894983878209053, "grad_norm": 1.9198715686798096, "learning_rate": 1.7482193107147012e-05, "loss": 0.4672, "step": 23860 }, { "epoch": 3.8951471368515573, "grad_norm": 2.0141613483428955, "learning_rate": 1.748198256860321e-05, "loss": 0.5589, "step": 23861 }, { "epoch": 3.8953103954940613, "grad_norm": 2.041499376296997, "learning_rate": 1.748177202252505e-05, "loss": 0.4754, "step": 23862 }, { "epoch": 3.8954736541365658, "grad_norm": 1.827560544013977, "learning_rate": 1.7481561468912753e-05, "loss": 0.4524, "step": 23863 }, { "epoch": 3.89563691277907, "grad_norm": 2.029911756515503, "learning_rate": 1.7481350907766522e-05, "loss": 0.5111, "step": 23864 }, { "epoch": 3.8958001714215746, "grad_norm": 2.243804454803467, "learning_rate": 1.748114033908657e-05, "loss": 0.5654, "step": 23865 }, { "epoch": 3.895963430064079, "grad_norm": 2.0367565155029297, "learning_rate": 1.7480929762873112e-05, "loss": 0.4779, "step": 23866 }, { "epoch": 3.8961266887065835, "grad_norm": 2.453911781311035, "learning_rate": 1.7480719179126362e-05, "loss": 0.5874, "step": 23867 }, { "epoch": 3.896289947349088, "grad_norm": 1.937495231628418, "learning_rate": 1.748050858784653e-05, "loss": 0.4618, "step": 23868 }, { "epoch": 3.896453205991592, "grad_norm": 1.7666090726852417, "learning_rate": 1.7480297989033824e-05, "loss": 0.4343, "step": 23869 }, { "epoch": 3.8966164646340964, "grad_norm": 2.123525619506836, "learning_rate": 1.7480087382688466e-05, "loss": 0.5195, "step": 23870 }, { "epoch": 3.896779723276601, "grad_norm": 1.9825831651687622, "learning_rate": 1.7479876768810657e-05, "loss": 0.4489, "step": 23871 }, { "epoch": 3.8969429819191053, "grad_norm": 2.4854183197021484, "learning_rate": 1.7479666147400614e-05, "loss": 0.6297, "step": 23872 }, { "epoch": 3.8971062405616097, "grad_norm": 1.5996588468551636, "learning_rate": 1.7479455518458555e-05, "loss": 0.3756, "step": 23873 }, { "epoch": 3.897269499204114, "grad_norm": 1.8512053489685059, "learning_rate": 1.747924488198468e-05, "loss": 0.4803, "step": 23874 }, { "epoch": 3.8974327578466186, "grad_norm": 1.9689805507659912, "learning_rate": 1.7479034237979212e-05, "loss": 0.4713, "step": 23875 }, { "epoch": 3.897596016489123, "grad_norm": 2.095067024230957, "learning_rate": 1.7478823586442355e-05, "loss": 0.581, "step": 23876 }, { "epoch": 3.8977592751316275, "grad_norm": 1.7630119323730469, "learning_rate": 1.7478612927374326e-05, "loss": 0.4142, "step": 23877 }, { "epoch": 3.897922533774132, "grad_norm": 1.7824336290359497, "learning_rate": 1.7478402260775336e-05, "loss": 0.4387, "step": 23878 }, { "epoch": 3.8980857924166363, "grad_norm": 1.8317912817001343, "learning_rate": 1.7478191586645597e-05, "loss": 0.4415, "step": 23879 }, { "epoch": 3.8982490510591403, "grad_norm": 1.994340419769287, "learning_rate": 1.747798090498532e-05, "loss": 0.4957, "step": 23880 }, { "epoch": 3.8984123097016448, "grad_norm": 1.8859870433807373, "learning_rate": 1.7477770215794717e-05, "loss": 0.4022, "step": 23881 }, { "epoch": 3.898575568344149, "grad_norm": 2.099963426589966, "learning_rate": 1.7477559519074006e-05, "loss": 0.5351, "step": 23882 }, { "epoch": 3.8987388269866536, "grad_norm": 1.8907684087753296, "learning_rate": 1.7477348814823392e-05, "loss": 0.4801, "step": 23883 }, { "epoch": 3.898902085629158, "grad_norm": 1.7323837280273438, "learning_rate": 1.747713810304309e-05, "loss": 0.4506, "step": 23884 }, { "epoch": 3.8990653442716625, "grad_norm": 1.6165430545806885, "learning_rate": 1.747692738373331e-05, "loss": 0.3844, "step": 23885 }, { "epoch": 3.8992286029141665, "grad_norm": 2.121844530105591, "learning_rate": 1.747671665689427e-05, "loss": 0.5511, "step": 23886 }, { "epoch": 3.899391861556671, "grad_norm": 1.959507703781128, "learning_rate": 1.7476505922526173e-05, "loss": 0.4867, "step": 23887 }, { "epoch": 3.8995551201991754, "grad_norm": 1.8857344388961792, "learning_rate": 1.7476295180629238e-05, "loss": 0.4467, "step": 23888 }, { "epoch": 3.89971837884168, "grad_norm": 1.8292381763458252, "learning_rate": 1.7476084431203677e-05, "loss": 0.4384, "step": 23889 }, { "epoch": 3.8998816374841843, "grad_norm": 1.56513512134552, "learning_rate": 1.7475873674249704e-05, "loss": 0.3845, "step": 23890 }, { "epoch": 3.9000448961266887, "grad_norm": 2.170381784439087, "learning_rate": 1.7475662909767523e-05, "loss": 0.471, "step": 23891 }, { "epoch": 3.900208154769193, "grad_norm": 2.025282144546509, "learning_rate": 1.7475452137757357e-05, "loss": 0.4669, "step": 23892 }, { "epoch": 3.9003714134116976, "grad_norm": 2.774904251098633, "learning_rate": 1.747524135821941e-05, "loss": 0.5405, "step": 23893 }, { "epoch": 3.900534672054202, "grad_norm": 2.1773300170898438, "learning_rate": 1.7475030571153893e-05, "loss": 0.4867, "step": 23894 }, { "epoch": 3.9006979306967065, "grad_norm": 1.8798518180847168, "learning_rate": 1.7474819776561025e-05, "loss": 0.4265, "step": 23895 }, { "epoch": 3.900861189339211, "grad_norm": 2.009694814682007, "learning_rate": 1.7474608974441016e-05, "loss": 0.5017, "step": 23896 }, { "epoch": 3.901024447981715, "grad_norm": 1.8760039806365967, "learning_rate": 1.7474398164794076e-05, "loss": 0.4576, "step": 23897 }, { "epoch": 3.9011877066242193, "grad_norm": 2.5337209701538086, "learning_rate": 1.747418734762042e-05, "loss": 0.5707, "step": 23898 }, { "epoch": 3.9013509652667238, "grad_norm": 1.7248964309692383, "learning_rate": 1.7473976522920262e-05, "loss": 0.397, "step": 23899 }, { "epoch": 3.901514223909228, "grad_norm": 1.7351590394973755, "learning_rate": 1.7473765690693812e-05, "loss": 0.3972, "step": 23900 }, { "epoch": 3.9016774825517326, "grad_norm": 2.1231534481048584, "learning_rate": 1.7473554850941278e-05, "loss": 0.5064, "step": 23901 }, { "epoch": 3.901840741194237, "grad_norm": 2.19242525100708, "learning_rate": 1.7473344003662877e-05, "loss": 0.4792, "step": 23902 }, { "epoch": 3.9020039998367415, "grad_norm": 1.9903329610824585, "learning_rate": 1.7473133148858825e-05, "loss": 0.4794, "step": 23903 }, { "epoch": 3.9021672584792455, "grad_norm": 1.9561694860458374, "learning_rate": 1.7472922286529325e-05, "loss": 0.4849, "step": 23904 }, { "epoch": 3.90233051712175, "grad_norm": 1.8164950609207153, "learning_rate": 1.7472711416674595e-05, "loss": 0.4485, "step": 23905 }, { "epoch": 3.9024937757642544, "grad_norm": 1.9468854665756226, "learning_rate": 1.7472500539294848e-05, "loss": 0.5162, "step": 23906 }, { "epoch": 3.902657034406759, "grad_norm": 1.8991676568984985, "learning_rate": 1.747228965439029e-05, "loss": 0.4382, "step": 23907 }, { "epoch": 3.9028202930492633, "grad_norm": 2.01000714302063, "learning_rate": 1.7472078761961144e-05, "loss": 0.5271, "step": 23908 }, { "epoch": 3.9029835516917677, "grad_norm": 1.9040812253952026, "learning_rate": 1.7471867862007617e-05, "loss": 0.532, "step": 23909 }, { "epoch": 3.903146810334272, "grad_norm": 2.2548062801361084, "learning_rate": 1.747165695452992e-05, "loss": 0.5504, "step": 23910 }, { "epoch": 3.9033100689767766, "grad_norm": 2.352060317993164, "learning_rate": 1.7471446039528262e-05, "loss": 0.4857, "step": 23911 }, { "epoch": 3.903473327619281, "grad_norm": 1.7890734672546387, "learning_rate": 1.7471235117002865e-05, "loss": 0.4396, "step": 23912 }, { "epoch": 3.9036365862617854, "grad_norm": 2.0777344703674316, "learning_rate": 1.7471024186953936e-05, "loss": 0.4909, "step": 23913 }, { "epoch": 3.90379984490429, "grad_norm": 1.879359483718872, "learning_rate": 1.7470813249381686e-05, "loss": 0.466, "step": 23914 }, { "epoch": 3.903963103546794, "grad_norm": 2.233551263809204, "learning_rate": 1.7470602304286328e-05, "loss": 0.5638, "step": 23915 }, { "epoch": 3.9041263621892983, "grad_norm": 2.0354557037353516, "learning_rate": 1.7470391351668076e-05, "loss": 0.5261, "step": 23916 }, { "epoch": 3.9042896208318028, "grad_norm": 1.893904209136963, "learning_rate": 1.7470180391527142e-05, "loss": 0.5317, "step": 23917 }, { "epoch": 3.904452879474307, "grad_norm": 1.887716293334961, "learning_rate": 1.746996942386374e-05, "loss": 0.4745, "step": 23918 }, { "epoch": 3.9046161381168116, "grad_norm": 2.457451581954956, "learning_rate": 1.746975844867808e-05, "loss": 0.5192, "step": 23919 }, { "epoch": 3.904779396759316, "grad_norm": 1.6762768030166626, "learning_rate": 1.7469547465970373e-05, "loss": 0.4538, "step": 23920 }, { "epoch": 3.90494265540182, "grad_norm": 1.7850768566131592, "learning_rate": 1.7469336475740836e-05, "loss": 0.5206, "step": 23921 }, { "epoch": 3.9051059140443245, "grad_norm": 1.7130217552185059, "learning_rate": 1.746912547798968e-05, "loss": 0.4271, "step": 23922 }, { "epoch": 3.905269172686829, "grad_norm": 1.7696126699447632, "learning_rate": 1.7468914472717117e-05, "loss": 0.4903, "step": 23923 }, { "epoch": 3.9054324313293334, "grad_norm": 1.8045097589492798, "learning_rate": 1.746870345992336e-05, "loss": 0.4188, "step": 23924 }, { "epoch": 3.905595689971838, "grad_norm": 2.283768653869629, "learning_rate": 1.7468492439608614e-05, "loss": 0.4976, "step": 23925 }, { "epoch": 3.9057589486143423, "grad_norm": 2.0377895832061768, "learning_rate": 1.7468281411773102e-05, "loss": 0.524, "step": 23926 }, { "epoch": 3.9059222072568467, "grad_norm": 2.279463291168213, "learning_rate": 1.746807037641703e-05, "loss": 0.5925, "step": 23927 }, { "epoch": 3.906085465899351, "grad_norm": 1.9054603576660156, "learning_rate": 1.7467859333540616e-05, "loss": 0.5242, "step": 23928 }, { "epoch": 3.9062487245418556, "grad_norm": 2.318444013595581, "learning_rate": 1.7467648283144073e-05, "loss": 0.5348, "step": 23929 }, { "epoch": 3.90641198318436, "grad_norm": 2.084970235824585, "learning_rate": 1.7467437225227605e-05, "loss": 0.5447, "step": 23930 }, { "epoch": 3.9065752418268644, "grad_norm": 2.356926918029785, "learning_rate": 1.746722615979143e-05, "loss": 0.4589, "step": 23931 }, { "epoch": 3.9067385004693684, "grad_norm": 1.6220520734786987, "learning_rate": 1.7467015086835763e-05, "loss": 0.4546, "step": 23932 }, { "epoch": 3.906901759111873, "grad_norm": 1.6936687231063843, "learning_rate": 1.7466804006360812e-05, "loss": 0.4483, "step": 23933 }, { "epoch": 3.9070650177543773, "grad_norm": 2.3618946075439453, "learning_rate": 1.746659291836679e-05, "loss": 0.8283, "step": 23934 }, { "epoch": 3.9072282763968817, "grad_norm": 1.827668309211731, "learning_rate": 1.7466381822853915e-05, "loss": 0.4685, "step": 23935 }, { "epoch": 3.907391535039386, "grad_norm": 2.4176347255706787, "learning_rate": 1.7466170719822395e-05, "loss": 0.5913, "step": 23936 }, { "epoch": 3.9075547936818906, "grad_norm": 2.12162446975708, "learning_rate": 1.746595960927244e-05, "loss": 0.5589, "step": 23937 }, { "epoch": 3.907718052324395, "grad_norm": 1.6132596731185913, "learning_rate": 1.7465748491204265e-05, "loss": 0.4007, "step": 23938 }, { "epoch": 3.907881310966899, "grad_norm": 1.7571297883987427, "learning_rate": 1.7465537365618087e-05, "loss": 0.4361, "step": 23939 }, { "epoch": 3.9080445696094035, "grad_norm": 1.960942029953003, "learning_rate": 1.7465326232514113e-05, "loss": 0.5175, "step": 23940 }, { "epoch": 3.908207828251908, "grad_norm": 1.8142931461334229, "learning_rate": 1.7465115091892556e-05, "loss": 0.438, "step": 23941 }, { "epoch": 3.9083710868944124, "grad_norm": 2.2404773235321045, "learning_rate": 1.746490394375363e-05, "loss": 0.4899, "step": 23942 }, { "epoch": 3.908534345536917, "grad_norm": 1.8379714488983154, "learning_rate": 1.7464692788097554e-05, "loss": 0.4874, "step": 23943 }, { "epoch": 3.9086976041794212, "grad_norm": 1.65809965133667, "learning_rate": 1.7464481624924527e-05, "loss": 0.3911, "step": 23944 }, { "epoch": 3.9088608628219257, "grad_norm": 1.8126819133758545, "learning_rate": 1.7464270454234773e-05, "loss": 0.4417, "step": 23945 }, { "epoch": 3.90902412146443, "grad_norm": 1.9984692335128784, "learning_rate": 1.7464059276028497e-05, "loss": 0.4707, "step": 23946 }, { "epoch": 3.9091873801069346, "grad_norm": 1.8024210929870605, "learning_rate": 1.7463848090305918e-05, "loss": 0.4519, "step": 23947 }, { "epoch": 3.909350638749439, "grad_norm": 1.64905846118927, "learning_rate": 1.7463636897067243e-05, "loss": 0.439, "step": 23948 }, { "epoch": 3.9095138973919434, "grad_norm": 2.1868960857391357, "learning_rate": 1.7463425696312692e-05, "loss": 0.5366, "step": 23949 }, { "epoch": 3.9096771560344474, "grad_norm": 2.0600368976593018, "learning_rate": 1.7463214488042472e-05, "loss": 0.5454, "step": 23950 }, { "epoch": 3.909840414676952, "grad_norm": 2.056631088256836, "learning_rate": 1.7463003272256796e-05, "loss": 0.5315, "step": 23951 }, { "epoch": 3.9100036733194563, "grad_norm": 2.4596338272094727, "learning_rate": 1.7462792048955878e-05, "loss": 0.626, "step": 23952 }, { "epoch": 3.9101669319619607, "grad_norm": 2.0179765224456787, "learning_rate": 1.746258081813993e-05, "loss": 0.4447, "step": 23953 }, { "epoch": 3.910330190604465, "grad_norm": 2.0815253257751465, "learning_rate": 1.7462369579809166e-05, "loss": 0.5715, "step": 23954 }, { "epoch": 3.9104934492469696, "grad_norm": 2.0564374923706055, "learning_rate": 1.74621583339638e-05, "loss": 0.4511, "step": 23955 }, { "epoch": 3.910656707889474, "grad_norm": 1.9918384552001953, "learning_rate": 1.746194708060404e-05, "loss": 0.4438, "step": 23956 }, { "epoch": 3.910819966531978, "grad_norm": 1.8449569940567017, "learning_rate": 1.7461735819730095e-05, "loss": 0.3823, "step": 23957 }, { "epoch": 3.9109832251744825, "grad_norm": 1.605424165725708, "learning_rate": 1.7461524551342193e-05, "loss": 0.414, "step": 23958 }, { "epoch": 3.911146483816987, "grad_norm": 1.9942340850830078, "learning_rate": 1.7461313275440536e-05, "loss": 0.4794, "step": 23959 }, { "epoch": 3.9113097424594914, "grad_norm": 2.084458827972412, "learning_rate": 1.7461101992025334e-05, "loss": 0.5042, "step": 23960 }, { "epoch": 3.911473001101996, "grad_norm": 1.7471436262130737, "learning_rate": 1.7460890701096807e-05, "loss": 0.4599, "step": 23961 }, { "epoch": 3.9116362597445002, "grad_norm": 2.066012382507324, "learning_rate": 1.7460679402655166e-05, "loss": 0.4021, "step": 23962 }, { "epoch": 3.9117995183870047, "grad_norm": 2.1312599182128906, "learning_rate": 1.7460468096700624e-05, "loss": 0.5514, "step": 23963 }, { "epoch": 3.911962777029509, "grad_norm": 2.1156513690948486, "learning_rate": 1.7460256783233395e-05, "loss": 0.4537, "step": 23964 }, { "epoch": 3.9121260356720136, "grad_norm": 2.206556797027588, "learning_rate": 1.7460045462253684e-05, "loss": 0.5406, "step": 23965 }, { "epoch": 3.912289294314518, "grad_norm": 2.033421039581299, "learning_rate": 1.745983413376171e-05, "loss": 0.4694, "step": 23966 }, { "epoch": 3.9124525529570224, "grad_norm": 1.8892335891723633, "learning_rate": 1.7459622797757685e-05, "loss": 0.5085, "step": 23967 }, { "epoch": 3.9126158115995264, "grad_norm": 1.9789210557937622, "learning_rate": 1.7459411454241822e-05, "loss": 0.4874, "step": 23968 }, { "epoch": 3.912779070242031, "grad_norm": 2.0771327018737793, "learning_rate": 1.7459200103214336e-05, "loss": 0.5752, "step": 23969 }, { "epoch": 3.9129423288845353, "grad_norm": 1.855369210243225, "learning_rate": 1.7458988744675433e-05, "loss": 0.4508, "step": 23970 }, { "epoch": 3.9131055875270397, "grad_norm": 2.1267387866973877, "learning_rate": 1.7458777378625337e-05, "loss": 0.4956, "step": 23971 }, { "epoch": 3.913268846169544, "grad_norm": 1.6483250856399536, "learning_rate": 1.7458566005064247e-05, "loss": 0.4275, "step": 23972 }, { "epoch": 3.9134321048120486, "grad_norm": 1.9137766361236572, "learning_rate": 1.7458354623992387e-05, "loss": 0.4959, "step": 23973 }, { "epoch": 3.9135953634545526, "grad_norm": 1.762290596961975, "learning_rate": 1.7458143235409964e-05, "loss": 0.4051, "step": 23974 }, { "epoch": 3.913758622097057, "grad_norm": 1.984047293663025, "learning_rate": 1.7457931839317195e-05, "loss": 0.4432, "step": 23975 }, { "epoch": 3.9139218807395615, "grad_norm": 2.509962558746338, "learning_rate": 1.745772043571429e-05, "loss": 0.488, "step": 23976 }, { "epoch": 3.914085139382066, "grad_norm": 1.8473174571990967, "learning_rate": 1.745750902460146e-05, "loss": 0.432, "step": 23977 }, { "epoch": 3.9142483980245704, "grad_norm": 2.131563663482666, "learning_rate": 1.7457297605978922e-05, "loss": 0.464, "step": 23978 }, { "epoch": 3.914411656667075, "grad_norm": 1.90422523021698, "learning_rate": 1.7457086179846888e-05, "loss": 0.5037, "step": 23979 }, { "epoch": 3.9145749153095792, "grad_norm": 1.5636345148086548, "learning_rate": 1.7456874746205568e-05, "loss": 0.436, "step": 23980 }, { "epoch": 3.9147381739520837, "grad_norm": 1.9329279661178589, "learning_rate": 1.7456663305055183e-05, "loss": 0.4706, "step": 23981 }, { "epoch": 3.914901432594588, "grad_norm": 1.917812466621399, "learning_rate": 1.745645185639593e-05, "loss": 0.3995, "step": 23982 }, { "epoch": 3.9150646912370926, "grad_norm": 1.7279753684997559, "learning_rate": 1.745624040022804e-05, "loss": 0.4827, "step": 23983 }, { "epoch": 3.915227949879597, "grad_norm": 1.824454665184021, "learning_rate": 1.7456028936551716e-05, "loss": 0.5234, "step": 23984 }, { "epoch": 3.915391208522101, "grad_norm": 2.1512248516082764, "learning_rate": 1.7455817465367175e-05, "loss": 0.4284, "step": 23985 }, { "epoch": 3.9155544671646054, "grad_norm": 2.1519880294799805, "learning_rate": 1.7455605986674622e-05, "loss": 0.493, "step": 23986 }, { "epoch": 3.91571772580711, "grad_norm": 1.7704869508743286, "learning_rate": 1.745539450047428e-05, "loss": 0.5065, "step": 23987 }, { "epoch": 3.9158809844496143, "grad_norm": 2.191248893737793, "learning_rate": 1.745518300676636e-05, "loss": 0.5124, "step": 23988 }, { "epoch": 3.9160442430921187, "grad_norm": 1.885573387145996, "learning_rate": 1.7454971505551068e-05, "loss": 0.4761, "step": 23989 }, { "epoch": 3.916207501734623, "grad_norm": 1.9792579412460327, "learning_rate": 1.7454759996828622e-05, "loss": 0.4833, "step": 23990 }, { "epoch": 3.9163707603771276, "grad_norm": 2.3809688091278076, "learning_rate": 1.745454848059924e-05, "loss": 0.5825, "step": 23991 }, { "epoch": 3.9165340190196316, "grad_norm": 1.7451552152633667, "learning_rate": 1.7454336956863124e-05, "loss": 0.4941, "step": 23992 }, { "epoch": 3.916697277662136, "grad_norm": 2.389880895614624, "learning_rate": 1.745412542562049e-05, "loss": 0.5475, "step": 23993 }, { "epoch": 3.9168605363046405, "grad_norm": 1.729797601699829, "learning_rate": 1.745391388687156e-05, "loss": 0.4123, "step": 23994 }, { "epoch": 3.917023794947145, "grad_norm": 2.0040953159332275, "learning_rate": 1.745370234061654e-05, "loss": 0.5087, "step": 23995 }, { "epoch": 3.9171870535896494, "grad_norm": 2.3683366775512695, "learning_rate": 1.745349078685564e-05, "loss": 0.5224, "step": 23996 }, { "epoch": 3.917350312232154, "grad_norm": 2.011293649673462, "learning_rate": 1.7453279225589082e-05, "loss": 0.4904, "step": 23997 }, { "epoch": 3.9175135708746582, "grad_norm": 2.195147752761841, "learning_rate": 1.745306765681707e-05, "loss": 0.8397, "step": 23998 }, { "epoch": 3.9176768295171627, "grad_norm": 1.892170786857605, "learning_rate": 1.7452856080539823e-05, "loss": 0.515, "step": 23999 }, { "epoch": 3.917840088159667, "grad_norm": 1.714804768562317, "learning_rate": 1.745264449675755e-05, "loss": 0.4137, "step": 24000 }, { "epoch": 3.9180033468021715, "grad_norm": 2.012085437774658, "learning_rate": 1.7452432905470465e-05, "loss": 0.4828, "step": 24001 }, { "epoch": 3.918166605444676, "grad_norm": 1.903132677078247, "learning_rate": 1.7452221306678785e-05, "loss": 0.5174, "step": 24002 }, { "epoch": 3.91832986408718, "grad_norm": 1.6091960668563843, "learning_rate": 1.7452009700382717e-05, "loss": 0.4279, "step": 24003 }, { "epoch": 3.9184931227296844, "grad_norm": 1.6562248468399048, "learning_rate": 1.745179808658248e-05, "loss": 0.4364, "step": 24004 }, { "epoch": 3.918656381372189, "grad_norm": 2.0175023078918457, "learning_rate": 1.7451586465278284e-05, "loss": 0.4702, "step": 24005 }, { "epoch": 3.9188196400146933, "grad_norm": 2.057021141052246, "learning_rate": 1.7451374836470337e-05, "loss": 0.5309, "step": 24006 }, { "epoch": 3.9189828986571977, "grad_norm": 2.3321962356567383, "learning_rate": 1.7451163200158865e-05, "loss": 0.6011, "step": 24007 }, { "epoch": 3.919146157299702, "grad_norm": 1.9632493257522583, "learning_rate": 1.7450951556344073e-05, "loss": 0.486, "step": 24008 }, { "epoch": 3.919309415942206, "grad_norm": 1.8041930198669434, "learning_rate": 1.745073990502617e-05, "loss": 0.4452, "step": 24009 }, { "epoch": 3.9194726745847106, "grad_norm": 1.9169249534606934, "learning_rate": 1.7450528246205374e-05, "loss": 0.5573, "step": 24010 }, { "epoch": 3.919635933227215, "grad_norm": 1.8610533475875854, "learning_rate": 1.74503165798819e-05, "loss": 0.4614, "step": 24011 }, { "epoch": 3.9197991918697195, "grad_norm": 1.8388113975524902, "learning_rate": 1.7450104906055963e-05, "loss": 0.4289, "step": 24012 }, { "epoch": 3.919962450512224, "grad_norm": 2.123126983642578, "learning_rate": 1.744989322472777e-05, "loss": 0.4027, "step": 24013 }, { "epoch": 3.9201257091547284, "grad_norm": 2.2270874977111816, "learning_rate": 1.7449681535897537e-05, "loss": 0.6047, "step": 24014 }, { "epoch": 3.920288967797233, "grad_norm": 1.670896291732788, "learning_rate": 1.7449469839565472e-05, "loss": 0.4524, "step": 24015 }, { "epoch": 3.9204522264397372, "grad_norm": 1.7772091627120972, "learning_rate": 1.7449258135731796e-05, "loss": 0.4361, "step": 24016 }, { "epoch": 3.9206154850822417, "grad_norm": 2.2124319076538086, "learning_rate": 1.7449046424396718e-05, "loss": 0.5523, "step": 24017 }, { "epoch": 3.920778743724746, "grad_norm": 2.313669204711914, "learning_rate": 1.7448834705560455e-05, "loss": 0.5735, "step": 24018 }, { "epoch": 3.9209420023672505, "grad_norm": 2.2359588146209717, "learning_rate": 1.7448622979223214e-05, "loss": 0.5295, "step": 24019 }, { "epoch": 3.9211052610097545, "grad_norm": 1.5773550271987915, "learning_rate": 1.7448411245385215e-05, "loss": 0.4301, "step": 24020 }, { "epoch": 3.921268519652259, "grad_norm": 2.120574951171875, "learning_rate": 1.7448199504046665e-05, "loss": 0.5011, "step": 24021 }, { "epoch": 3.9214317782947634, "grad_norm": 2.1078503131866455, "learning_rate": 1.7447987755207783e-05, "loss": 0.596, "step": 24022 }, { "epoch": 3.921595036937268, "grad_norm": 1.8106762170791626, "learning_rate": 1.7447775998868778e-05, "loss": 0.4635, "step": 24023 }, { "epoch": 3.9217582955797723, "grad_norm": 2.000761032104492, "learning_rate": 1.7447564235029864e-05, "loss": 0.4747, "step": 24024 }, { "epoch": 3.9219215542222767, "grad_norm": 1.804222583770752, "learning_rate": 1.744735246369125e-05, "loss": 0.4652, "step": 24025 }, { "epoch": 3.922084812864781, "grad_norm": 1.9302278757095337, "learning_rate": 1.7447140684853162e-05, "loss": 0.4366, "step": 24026 }, { "epoch": 3.922248071507285, "grad_norm": 1.6722393035888672, "learning_rate": 1.7446928898515803e-05, "loss": 0.3958, "step": 24027 }, { "epoch": 3.9224113301497896, "grad_norm": 1.7779250144958496, "learning_rate": 1.7446717104679388e-05, "loss": 0.4783, "step": 24028 }, { "epoch": 3.922574588792294, "grad_norm": 2.1050548553466797, "learning_rate": 1.7446505303344132e-05, "loss": 0.583, "step": 24029 }, { "epoch": 3.9227378474347985, "grad_norm": 1.871662974357605, "learning_rate": 1.7446293494510246e-05, "loss": 0.4595, "step": 24030 }, { "epoch": 3.922901106077303, "grad_norm": 1.4116753339767456, "learning_rate": 1.7446081678177943e-05, "loss": 0.381, "step": 24031 }, { "epoch": 3.9230643647198074, "grad_norm": 2.1021010875701904, "learning_rate": 1.7445869854347436e-05, "loss": 0.5105, "step": 24032 }, { "epoch": 3.923227623362312, "grad_norm": 1.9684844017028809, "learning_rate": 1.7445658023018947e-05, "loss": 0.4416, "step": 24033 }, { "epoch": 3.9233908820048162, "grad_norm": 1.7304447889328003, "learning_rate": 1.7445446184192674e-05, "loss": 0.4421, "step": 24034 }, { "epoch": 3.9235541406473207, "grad_norm": 1.8597270250320435, "learning_rate": 1.7445234337868842e-05, "loss": 0.5007, "step": 24035 }, { "epoch": 3.923717399289825, "grad_norm": 1.9052553176879883, "learning_rate": 1.7445022484047664e-05, "loss": 0.4563, "step": 24036 }, { "epoch": 3.9238806579323295, "grad_norm": 1.9475653171539307, "learning_rate": 1.744481062272935e-05, "loss": 0.414, "step": 24037 }, { "epoch": 3.9240439165748335, "grad_norm": 1.5312649011611938, "learning_rate": 1.744459875391411e-05, "loss": 0.3573, "step": 24038 }, { "epoch": 3.924207175217338, "grad_norm": 1.8824281692504883, "learning_rate": 1.7444386877602163e-05, "loss": 0.4885, "step": 24039 }, { "epoch": 3.9243704338598424, "grad_norm": 2.009709596633911, "learning_rate": 1.744417499379372e-05, "loss": 0.5429, "step": 24040 }, { "epoch": 3.924533692502347, "grad_norm": 2.1415462493896484, "learning_rate": 1.7443963102488998e-05, "loss": 0.4767, "step": 24041 }, { "epoch": 3.9246969511448513, "grad_norm": 1.9670565128326416, "learning_rate": 1.74437512036882e-05, "loss": 0.4779, "step": 24042 }, { "epoch": 3.9248602097873557, "grad_norm": 1.8450887203216553, "learning_rate": 1.7443539297391553e-05, "loss": 0.4925, "step": 24043 }, { "epoch": 3.92502346842986, "grad_norm": 2.3261637687683105, "learning_rate": 1.7443327383599263e-05, "loss": 0.5877, "step": 24044 }, { "epoch": 3.925186727072364, "grad_norm": 1.9591796398162842, "learning_rate": 1.744311546231154e-05, "loss": 0.5242, "step": 24045 }, { "epoch": 3.9253499857148686, "grad_norm": 2.083953619003296, "learning_rate": 1.7442903533528608e-05, "loss": 0.4239, "step": 24046 }, { "epoch": 3.925513244357373, "grad_norm": 2.171262502670288, "learning_rate": 1.744269159725067e-05, "loss": 0.4964, "step": 24047 }, { "epoch": 3.9256765029998775, "grad_norm": 2.3785293102264404, "learning_rate": 1.7442479653477943e-05, "loss": 0.4528, "step": 24048 }, { "epoch": 3.925839761642382, "grad_norm": 2.261870861053467, "learning_rate": 1.7442267702210642e-05, "loss": 0.5964, "step": 24049 }, { "epoch": 3.9260030202848863, "grad_norm": 2.0390501022338867, "learning_rate": 1.744205574344898e-05, "loss": 0.4244, "step": 24050 }, { "epoch": 3.926166278927391, "grad_norm": 2.004124164581299, "learning_rate": 1.744184377719317e-05, "loss": 0.5583, "step": 24051 }, { "epoch": 3.9263295375698952, "grad_norm": 1.7860928773880005, "learning_rate": 1.7441631803443426e-05, "loss": 0.4224, "step": 24052 }, { "epoch": 3.9264927962123997, "grad_norm": 2.6709847450256348, "learning_rate": 1.744141982219996e-05, "loss": 0.4857, "step": 24053 }, { "epoch": 3.926656054854904, "grad_norm": 2.0079245567321777, "learning_rate": 1.7441207833462986e-05, "loss": 0.4962, "step": 24054 }, { "epoch": 3.9268193134974085, "grad_norm": 2.125631093978882, "learning_rate": 1.7440995837232716e-05, "loss": 0.5166, "step": 24055 }, { "epoch": 3.9269825721399125, "grad_norm": 1.8991504907608032, "learning_rate": 1.7440783833509366e-05, "loss": 0.4802, "step": 24056 }, { "epoch": 3.927145830782417, "grad_norm": 2.0452351570129395, "learning_rate": 1.7440571822293152e-05, "loss": 0.4576, "step": 24057 }, { "epoch": 3.9273090894249214, "grad_norm": 1.6810939311981201, "learning_rate": 1.744035980358428e-05, "loss": 0.3791, "step": 24058 }, { "epoch": 3.927472348067426, "grad_norm": 1.9768201112747192, "learning_rate": 1.744014777738297e-05, "loss": 0.4575, "step": 24059 }, { "epoch": 3.9276356067099303, "grad_norm": 2.420397996902466, "learning_rate": 1.743993574368943e-05, "loss": 0.5039, "step": 24060 }, { "epoch": 3.9277988653524347, "grad_norm": 2.1433568000793457, "learning_rate": 1.7439723702503883e-05, "loss": 0.5264, "step": 24061 }, { "epoch": 3.9279621239949387, "grad_norm": 1.8679838180541992, "learning_rate": 1.743951165382653e-05, "loss": 0.5356, "step": 24062 }, { "epoch": 3.928125382637443, "grad_norm": 1.8804478645324707, "learning_rate": 1.7439299597657593e-05, "loss": 0.4757, "step": 24063 }, { "epoch": 3.9282886412799476, "grad_norm": 1.842559576034546, "learning_rate": 1.7439087533997283e-05, "loss": 0.4983, "step": 24064 }, { "epoch": 3.928451899922452, "grad_norm": 1.841646671295166, "learning_rate": 1.743887546284581e-05, "loss": 0.4529, "step": 24065 }, { "epoch": 3.9286151585649565, "grad_norm": 2.047738552093506, "learning_rate": 1.74386633842034e-05, "loss": 0.455, "step": 24066 }, { "epoch": 3.928778417207461, "grad_norm": 2.146733045578003, "learning_rate": 1.7438451298070252e-05, "loss": 0.5292, "step": 24067 }, { "epoch": 3.9289416758499653, "grad_norm": 1.936222791671753, "learning_rate": 1.7438239204446586e-05, "loss": 0.484, "step": 24068 }, { "epoch": 3.92910493449247, "grad_norm": 2.361083507537842, "learning_rate": 1.7438027103332617e-05, "loss": 0.5395, "step": 24069 }, { "epoch": 3.929268193134974, "grad_norm": 1.6604249477386475, "learning_rate": 1.7437814994728553e-05, "loss": 0.4037, "step": 24070 }, { "epoch": 3.9294314517774787, "grad_norm": 1.8501794338226318, "learning_rate": 1.7437602878634618e-05, "loss": 0.4474, "step": 24071 }, { "epoch": 3.929594710419983, "grad_norm": 1.6228623390197754, "learning_rate": 1.7437390755051013e-05, "loss": 0.3774, "step": 24072 }, { "epoch": 3.929757969062487, "grad_norm": 1.940098762512207, "learning_rate": 1.743717862397796e-05, "loss": 0.5356, "step": 24073 }, { "epoch": 3.9299212277049915, "grad_norm": 1.6857881546020508, "learning_rate": 1.743696648541567e-05, "loss": 0.4516, "step": 24074 }, { "epoch": 3.930084486347496, "grad_norm": 1.9735428094863892, "learning_rate": 1.7436754339364353e-05, "loss": 0.4941, "step": 24075 }, { "epoch": 3.9302477449900004, "grad_norm": 2.1944198608398438, "learning_rate": 1.7436542185824232e-05, "loss": 0.5532, "step": 24076 }, { "epoch": 3.930411003632505, "grad_norm": 2.0201752185821533, "learning_rate": 1.743633002479551e-05, "loss": 0.5233, "step": 24077 }, { "epoch": 3.9305742622750093, "grad_norm": 1.898734211921692, "learning_rate": 1.743611785627841e-05, "loss": 0.4515, "step": 24078 }, { "epoch": 3.9307375209175137, "grad_norm": 1.8797656297683716, "learning_rate": 1.743590568027314e-05, "loss": 0.5134, "step": 24079 }, { "epoch": 3.9309007795600177, "grad_norm": 2.2178730964660645, "learning_rate": 1.743569349677991e-05, "loss": 0.5618, "step": 24080 }, { "epoch": 3.931064038202522, "grad_norm": 2.013435125350952, "learning_rate": 1.7435481305798946e-05, "loss": 0.4922, "step": 24081 }, { "epoch": 3.9312272968450266, "grad_norm": 1.6835421323776245, "learning_rate": 1.743526910733045e-05, "loss": 0.3887, "step": 24082 }, { "epoch": 3.931390555487531, "grad_norm": 2.194446563720703, "learning_rate": 1.7435056901374642e-05, "loss": 0.5546, "step": 24083 }, { "epoch": 3.9315538141300355, "grad_norm": 2.097532033920288, "learning_rate": 1.7434844687931735e-05, "loss": 0.5083, "step": 24084 }, { "epoch": 3.93171707277254, "grad_norm": 1.730328917503357, "learning_rate": 1.7434632467001937e-05, "loss": 0.4079, "step": 24085 }, { "epoch": 3.9318803314150443, "grad_norm": 2.066235303878784, "learning_rate": 1.7434420238585466e-05, "loss": 0.4947, "step": 24086 }, { "epoch": 3.9320435900575488, "grad_norm": 2.08664870262146, "learning_rate": 1.743420800268254e-05, "loss": 0.5271, "step": 24087 }, { "epoch": 3.932206848700053, "grad_norm": 1.877664566040039, "learning_rate": 1.743399575929337e-05, "loss": 0.4955, "step": 24088 }, { "epoch": 3.9323701073425577, "grad_norm": 2.272752046585083, "learning_rate": 1.7433783508418163e-05, "loss": 0.4916, "step": 24089 }, { "epoch": 3.932533365985062, "grad_norm": 1.7456473112106323, "learning_rate": 1.743357125005714e-05, "loss": 0.4462, "step": 24090 }, { "epoch": 3.932696624627566, "grad_norm": 1.9134600162506104, "learning_rate": 1.7433358984210512e-05, "loss": 0.575, "step": 24091 }, { "epoch": 3.9328598832700705, "grad_norm": 2.0419700145721436, "learning_rate": 1.7433146710878496e-05, "loss": 0.4864, "step": 24092 }, { "epoch": 3.933023141912575, "grad_norm": 1.8402296304702759, "learning_rate": 1.7432934430061297e-05, "loss": 0.4579, "step": 24093 }, { "epoch": 3.9331864005550794, "grad_norm": 2.407306671142578, "learning_rate": 1.7432722141759143e-05, "loss": 0.5792, "step": 24094 }, { "epoch": 3.933349659197584, "grad_norm": 2.161846399307251, "learning_rate": 1.7432509845972236e-05, "loss": 0.5113, "step": 24095 }, { "epoch": 3.9335129178400883, "grad_norm": 1.713871717453003, "learning_rate": 1.7432297542700793e-05, "loss": 0.4341, "step": 24096 }, { "epoch": 3.9336761764825927, "grad_norm": 2.233642816543579, "learning_rate": 1.743208523194503e-05, "loss": 0.5375, "step": 24097 }, { "epoch": 3.9338394351250967, "grad_norm": 2.202204704284668, "learning_rate": 1.7431872913705155e-05, "loss": 0.5475, "step": 24098 }, { "epoch": 3.934002693767601, "grad_norm": 1.9731138944625854, "learning_rate": 1.743166058798139e-05, "loss": 0.4809, "step": 24099 }, { "epoch": 3.9341659524101056, "grad_norm": 1.8628696203231812, "learning_rate": 1.7431448254773943e-05, "loss": 0.5214, "step": 24100 }, { "epoch": 3.93432921105261, "grad_norm": 1.963911533355713, "learning_rate": 1.743123591408303e-05, "loss": 0.4294, "step": 24101 }, { "epoch": 3.9344924696951145, "grad_norm": 1.9568452835083008, "learning_rate": 1.7431023565908868e-05, "loss": 0.4131, "step": 24102 }, { "epoch": 3.934655728337619, "grad_norm": 1.7930219173431396, "learning_rate": 1.7430811210251663e-05, "loss": 0.4577, "step": 24103 }, { "epoch": 3.9348189869801233, "grad_norm": 2.4444525241851807, "learning_rate": 1.7430598847111633e-05, "loss": 0.6907, "step": 24104 }, { "epoch": 3.9349822456226278, "grad_norm": 2.0228118896484375, "learning_rate": 1.7430386476488993e-05, "loss": 0.4808, "step": 24105 }, { "epoch": 3.935145504265132, "grad_norm": 1.8385319709777832, "learning_rate": 1.7430174098383958e-05, "loss": 0.4738, "step": 24106 }, { "epoch": 3.9353087629076366, "grad_norm": 1.4172794818878174, "learning_rate": 1.7429961712796735e-05, "loss": 0.3981, "step": 24107 }, { "epoch": 3.935472021550141, "grad_norm": 1.7863861322402954, "learning_rate": 1.7429749319727545e-05, "loss": 0.4117, "step": 24108 }, { "epoch": 3.935635280192645, "grad_norm": 2.081463575363159, "learning_rate": 1.74295369191766e-05, "loss": 0.4418, "step": 24109 }, { "epoch": 3.9357985388351495, "grad_norm": 2.249937057495117, "learning_rate": 1.742932451114411e-05, "loss": 0.5264, "step": 24110 }, { "epoch": 3.935961797477654, "grad_norm": 1.975319266319275, "learning_rate": 1.7429112095630296e-05, "loss": 0.4411, "step": 24111 }, { "epoch": 3.9361250561201584, "grad_norm": 2.1206676959991455, "learning_rate": 1.7428899672635368e-05, "loss": 0.5467, "step": 24112 }, { "epoch": 3.936288314762663, "grad_norm": 1.9975751638412476, "learning_rate": 1.7428687242159543e-05, "loss": 0.5032, "step": 24113 }, { "epoch": 3.9364515734051673, "grad_norm": 1.6268293857574463, "learning_rate": 1.7428474804203028e-05, "loss": 0.4129, "step": 24114 }, { "epoch": 3.9366148320476713, "grad_norm": 2.0420329570770264, "learning_rate": 1.7428262358766036e-05, "loss": 0.4622, "step": 24115 }, { "epoch": 3.9367780906901757, "grad_norm": 2.3096256256103516, "learning_rate": 1.742804990584879e-05, "loss": 0.5888, "step": 24116 }, { "epoch": 3.93694134933268, "grad_norm": 2.1145129203796387, "learning_rate": 1.7427837445451504e-05, "loss": 0.4386, "step": 24117 }, { "epoch": 3.9371046079751846, "grad_norm": 2.0815577507019043, "learning_rate": 1.7427624977574383e-05, "loss": 0.4839, "step": 24118 }, { "epoch": 3.937267866617689, "grad_norm": 1.9010491371154785, "learning_rate": 1.7427412502217643e-05, "loss": 0.4143, "step": 24119 }, { "epoch": 3.9374311252601935, "grad_norm": 2.2649219036102295, "learning_rate": 1.7427200019381504e-05, "loss": 0.5305, "step": 24120 }, { "epoch": 3.937594383902698, "grad_norm": 2.4437620639801025, "learning_rate": 1.742698752906618e-05, "loss": 0.4661, "step": 24121 }, { "epoch": 3.9377576425452023, "grad_norm": 1.9650262594223022, "learning_rate": 1.7426775031271876e-05, "loss": 0.4882, "step": 24122 }, { "epoch": 3.9379209011877068, "grad_norm": 1.9459922313690186, "learning_rate": 1.7426562525998813e-05, "loss": 0.441, "step": 24123 }, { "epoch": 3.938084159830211, "grad_norm": 2.1399471759796143, "learning_rate": 1.7426350013247205e-05, "loss": 0.5141, "step": 24124 }, { "epoch": 3.9382474184727156, "grad_norm": 2.101796865463257, "learning_rate": 1.7426137493017265e-05, "loss": 0.5169, "step": 24125 }, { "epoch": 3.9384106771152196, "grad_norm": 2.1498076915740967, "learning_rate": 1.7425924965309204e-05, "loss": 0.4533, "step": 24126 }, { "epoch": 3.938573935757724, "grad_norm": 2.0029213428497314, "learning_rate": 1.742571243012324e-05, "loss": 0.4806, "step": 24127 }, { "epoch": 3.9387371944002285, "grad_norm": 2.038111448287964, "learning_rate": 1.7425499887459585e-05, "loss": 0.4401, "step": 24128 }, { "epoch": 3.938900453042733, "grad_norm": 2.0913126468658447, "learning_rate": 1.7425287337318456e-05, "loss": 0.512, "step": 24129 }, { "epoch": 3.9390637116852374, "grad_norm": 2.5753939151763916, "learning_rate": 1.742507477970006e-05, "loss": 0.5469, "step": 24130 }, { "epoch": 3.939226970327742, "grad_norm": 2.0984981060028076, "learning_rate": 1.7424862214604622e-05, "loss": 0.4564, "step": 24131 }, { "epoch": 3.9393902289702463, "grad_norm": 2.0352065563201904, "learning_rate": 1.7424649642032343e-05, "loss": 0.5197, "step": 24132 }, { "epoch": 3.9395534876127503, "grad_norm": 1.8862227201461792, "learning_rate": 1.7424437061983445e-05, "loss": 0.4742, "step": 24133 }, { "epoch": 3.9397167462552547, "grad_norm": 1.997671127319336, "learning_rate": 1.7424224474458144e-05, "loss": 0.4527, "step": 24134 }, { "epoch": 3.939880004897759, "grad_norm": 2.06223464012146, "learning_rate": 1.742401187945665e-05, "loss": 0.4466, "step": 24135 }, { "epoch": 3.9400432635402636, "grad_norm": 2.138505697250366, "learning_rate": 1.742379927697918e-05, "loss": 0.4661, "step": 24136 }, { "epoch": 3.940206522182768, "grad_norm": 1.7650582790374756, "learning_rate": 1.742358666702594e-05, "loss": 0.4515, "step": 24137 }, { "epoch": 3.9403697808252724, "grad_norm": 2.27907395362854, "learning_rate": 1.7423374049597156e-05, "loss": 0.4717, "step": 24138 }, { "epoch": 3.940533039467777, "grad_norm": 1.8639899492263794, "learning_rate": 1.7423161424693032e-05, "loss": 0.4786, "step": 24139 }, { "epoch": 3.9406962981102813, "grad_norm": 2.2449796199798584, "learning_rate": 1.742294879231379e-05, "loss": 0.5061, "step": 24140 }, { "epoch": 3.9408595567527858, "grad_norm": 2.14555025100708, "learning_rate": 1.7422736152459636e-05, "loss": 0.5227, "step": 24141 }, { "epoch": 3.94102281539529, "grad_norm": 2.304199457168579, "learning_rate": 1.7422523505130793e-05, "loss": 0.4952, "step": 24142 }, { "epoch": 3.9411860740377946, "grad_norm": 1.7609738111495972, "learning_rate": 1.742231085032747e-05, "loss": 0.4367, "step": 24143 }, { "epoch": 3.9413493326802986, "grad_norm": 2.1839473247528076, "learning_rate": 1.7422098188049885e-05, "loss": 0.4663, "step": 24144 }, { "epoch": 3.941512591322803, "grad_norm": 1.8194465637207031, "learning_rate": 1.7421885518298243e-05, "loss": 0.4399, "step": 24145 }, { "epoch": 3.9416758499653075, "grad_norm": 2.2848057746887207, "learning_rate": 1.7421672841072766e-05, "loss": 0.5071, "step": 24146 }, { "epoch": 3.941839108607812, "grad_norm": 2.483560085296631, "learning_rate": 1.7421460156373666e-05, "loss": 0.4952, "step": 24147 }, { "epoch": 3.9420023672503164, "grad_norm": 1.8269479274749756, "learning_rate": 1.742124746420116e-05, "loss": 0.405, "step": 24148 }, { "epoch": 3.942165625892821, "grad_norm": 1.7403037548065186, "learning_rate": 1.7421034764555456e-05, "loss": 0.3963, "step": 24149 }, { "epoch": 3.942328884535325, "grad_norm": 1.999984622001648, "learning_rate": 1.7420822057436777e-05, "loss": 0.465, "step": 24150 }, { "epoch": 3.9424921431778293, "grad_norm": 1.9641478061676025, "learning_rate": 1.742060934284533e-05, "loss": 0.5091, "step": 24151 }, { "epoch": 3.9426554018203337, "grad_norm": 1.7906384468078613, "learning_rate": 1.742039662078133e-05, "loss": 0.401, "step": 24152 }, { "epoch": 3.942818660462838, "grad_norm": 2.267810344696045, "learning_rate": 1.7420183891244994e-05, "loss": 0.5001, "step": 24153 }, { "epoch": 3.9429819191053426, "grad_norm": 2.1192026138305664, "learning_rate": 1.7419971154236534e-05, "loss": 0.395, "step": 24154 }, { "epoch": 3.943145177747847, "grad_norm": 1.798912763595581, "learning_rate": 1.7419758409756163e-05, "loss": 0.433, "step": 24155 }, { "epoch": 3.9433084363903514, "grad_norm": 1.6733181476593018, "learning_rate": 1.74195456578041e-05, "loss": 0.4076, "step": 24156 }, { "epoch": 3.943471695032856, "grad_norm": 2.360536575317383, "learning_rate": 1.7419332898380557e-05, "loss": 0.5572, "step": 24157 }, { "epoch": 3.9436349536753603, "grad_norm": 1.9929001331329346, "learning_rate": 1.741912013148575e-05, "loss": 0.4843, "step": 24158 }, { "epoch": 3.9437982123178648, "grad_norm": 1.9346442222595215, "learning_rate": 1.7418907357119884e-05, "loss": 0.5293, "step": 24159 }, { "epoch": 3.943961470960369, "grad_norm": 1.917750358581543, "learning_rate": 1.7418694575283185e-05, "loss": 0.4201, "step": 24160 }, { "epoch": 3.944124729602873, "grad_norm": 1.768137812614441, "learning_rate": 1.7418481785975862e-05, "loss": 0.4554, "step": 24161 }, { "epoch": 3.9442879882453776, "grad_norm": 2.4611682891845703, "learning_rate": 1.7418268989198132e-05, "loss": 0.4964, "step": 24162 }, { "epoch": 3.944451246887882, "grad_norm": 1.9302871227264404, "learning_rate": 1.74180561849502e-05, "loss": 0.4581, "step": 24163 }, { "epoch": 3.9446145055303865, "grad_norm": 1.8337887525558472, "learning_rate": 1.7417843373232294e-05, "loss": 0.4616, "step": 24164 }, { "epoch": 3.944777764172891, "grad_norm": 2.236107110977173, "learning_rate": 1.741763055404462e-05, "loss": 0.5244, "step": 24165 }, { "epoch": 3.9449410228153954, "grad_norm": 1.6151769161224365, "learning_rate": 1.7417417727387392e-05, "loss": 0.4487, "step": 24166 }, { "epoch": 3.9451042814579, "grad_norm": 1.8871803283691406, "learning_rate": 1.7417204893260828e-05, "loss": 0.4724, "step": 24167 }, { "epoch": 3.945267540100404, "grad_norm": 1.6240501403808594, "learning_rate": 1.741699205166514e-05, "loss": 0.401, "step": 24168 }, { "epoch": 3.9454307987429083, "grad_norm": 1.7709084749221802, "learning_rate": 1.7416779202600545e-05, "loss": 0.377, "step": 24169 }, { "epoch": 3.9455940573854127, "grad_norm": 1.6000335216522217, "learning_rate": 1.7416566346067253e-05, "loss": 0.3768, "step": 24170 }, { "epoch": 3.945757316027917, "grad_norm": 2.2481095790863037, "learning_rate": 1.741635348206548e-05, "loss": 0.5365, "step": 24171 }, { "epoch": 3.9459205746704216, "grad_norm": 2.1291613578796387, "learning_rate": 1.7416140610595444e-05, "loss": 0.4443, "step": 24172 }, { "epoch": 3.946083833312926, "grad_norm": 1.9031213521957397, "learning_rate": 1.7415927731657357e-05, "loss": 0.4904, "step": 24173 }, { "epoch": 3.9462470919554304, "grad_norm": 1.8492456674575806, "learning_rate": 1.741571484525143e-05, "loss": 0.4672, "step": 24174 }, { "epoch": 3.946410350597935, "grad_norm": 2.0967438220977783, "learning_rate": 1.741550195137788e-05, "loss": 0.426, "step": 24175 }, { "epoch": 3.9465736092404393, "grad_norm": 1.78412926197052, "learning_rate": 1.7415289050036922e-05, "loss": 0.4293, "step": 24176 }, { "epoch": 3.9467368678829438, "grad_norm": 2.287393569946289, "learning_rate": 1.741507614122877e-05, "loss": 0.4643, "step": 24177 }, { "epoch": 3.946900126525448, "grad_norm": 1.9953323602676392, "learning_rate": 1.741486322495364e-05, "loss": 0.4513, "step": 24178 }, { "epoch": 3.947063385167952, "grad_norm": 1.954750418663025, "learning_rate": 1.7414650301211744e-05, "loss": 0.4528, "step": 24179 }, { "epoch": 3.9472266438104566, "grad_norm": 2.208622455596924, "learning_rate": 1.7414437370003295e-05, "loss": 0.4202, "step": 24180 }, { "epoch": 3.947389902452961, "grad_norm": 1.8523790836334229, "learning_rate": 1.7414224431328512e-05, "loss": 0.5007, "step": 24181 }, { "epoch": 3.9475531610954655, "grad_norm": 2.3156285285949707, "learning_rate": 1.7414011485187604e-05, "loss": 0.4943, "step": 24182 }, { "epoch": 3.94771641973797, "grad_norm": 2.192162275314331, "learning_rate": 1.7413798531580792e-05, "loss": 0.4992, "step": 24183 }, { "epoch": 3.9478796783804744, "grad_norm": 2.152275323867798, "learning_rate": 1.7413585570508282e-05, "loss": 0.5692, "step": 24184 }, { "epoch": 3.948042937022979, "grad_norm": 1.8932667970657349, "learning_rate": 1.7413372601970298e-05, "loss": 0.4165, "step": 24185 }, { "epoch": 3.948206195665483, "grad_norm": 1.7344772815704346, "learning_rate": 1.7413159625967047e-05, "loss": 0.4423, "step": 24186 }, { "epoch": 3.9483694543079872, "grad_norm": 1.9147145748138428, "learning_rate": 1.741294664249875e-05, "loss": 0.4668, "step": 24187 }, { "epoch": 3.9485327129504917, "grad_norm": 1.9114477634429932, "learning_rate": 1.741273365156561e-05, "loss": 0.4603, "step": 24188 }, { "epoch": 3.948695971592996, "grad_norm": 1.6364765167236328, "learning_rate": 1.7412520653167856e-05, "loss": 0.4054, "step": 24189 }, { "epoch": 3.9488592302355006, "grad_norm": 1.8055148124694824, "learning_rate": 1.7412307647305694e-05, "loss": 0.4046, "step": 24190 }, { "epoch": 3.949022488878005, "grad_norm": 1.7951594591140747, "learning_rate": 1.7412094633979342e-05, "loss": 0.4316, "step": 24191 }, { "epoch": 3.9491857475205094, "grad_norm": 1.6891149282455444, "learning_rate": 1.741188161318901e-05, "loss": 0.4448, "step": 24192 }, { "epoch": 3.949349006163014, "grad_norm": 2.0485565662384033, "learning_rate": 1.7411668584934916e-05, "loss": 0.5004, "step": 24193 }, { "epoch": 3.9495122648055183, "grad_norm": 1.947776198387146, "learning_rate": 1.7411455549217272e-05, "loss": 0.4463, "step": 24194 }, { "epoch": 3.9496755234480228, "grad_norm": 1.8145724534988403, "learning_rate": 1.7411242506036295e-05, "loss": 0.4132, "step": 24195 }, { "epoch": 3.949838782090527, "grad_norm": 2.2644882202148438, "learning_rate": 1.74110294553922e-05, "loss": 0.449, "step": 24196 }, { "epoch": 3.950002040733031, "grad_norm": 2.0079457759857178, "learning_rate": 1.74108163972852e-05, "loss": 0.4333, "step": 24197 }, { "epoch": 3.9501652993755356, "grad_norm": 1.6250948905944824, "learning_rate": 1.741060333171551e-05, "loss": 0.4162, "step": 24198 }, { "epoch": 3.95032855801804, "grad_norm": 1.9688539505004883, "learning_rate": 1.7410390258683345e-05, "loss": 0.3986, "step": 24199 }, { "epoch": 3.9504918166605445, "grad_norm": 1.8572533130645752, "learning_rate": 1.7410177178188917e-05, "loss": 0.4695, "step": 24200 }, { "epoch": 3.950655075303049, "grad_norm": 1.821720004081726, "learning_rate": 1.7409964090232444e-05, "loss": 0.4375, "step": 24201 }, { "epoch": 3.9508183339455534, "grad_norm": 1.952287197113037, "learning_rate": 1.7409750994814142e-05, "loss": 0.48, "step": 24202 }, { "epoch": 3.9509815925880574, "grad_norm": 2.083766222000122, "learning_rate": 1.7409537891934216e-05, "loss": 0.4832, "step": 24203 }, { "epoch": 3.951144851230562, "grad_norm": 2.289121150970459, "learning_rate": 1.740932478159289e-05, "loss": 0.5237, "step": 24204 }, { "epoch": 3.9513081098730662, "grad_norm": 2.0107712745666504, "learning_rate": 1.740911166379038e-05, "loss": 0.4775, "step": 24205 }, { "epoch": 3.9514713685155707, "grad_norm": 1.860302209854126, "learning_rate": 1.740889853852689e-05, "loss": 0.4559, "step": 24206 }, { "epoch": 3.951634627158075, "grad_norm": 1.8303136825561523, "learning_rate": 1.7408685405802647e-05, "loss": 0.4463, "step": 24207 }, { "epoch": 3.9517978858005796, "grad_norm": 2.4042348861694336, "learning_rate": 1.7408472265617855e-05, "loss": 0.5677, "step": 24208 }, { "epoch": 3.951961144443084, "grad_norm": 2.1850063800811768, "learning_rate": 1.7408259117972738e-05, "loss": 0.4802, "step": 24209 }, { "epoch": 3.9521244030855884, "grad_norm": 1.9369573593139648, "learning_rate": 1.74080459628675e-05, "loss": 0.4976, "step": 24210 }, { "epoch": 3.952287661728093, "grad_norm": 1.7975178956985474, "learning_rate": 1.740783280030237e-05, "loss": 0.4773, "step": 24211 }, { "epoch": 3.9524509203705973, "grad_norm": 2.208322525024414, "learning_rate": 1.7407619630277546e-05, "loss": 0.5327, "step": 24212 }, { "epoch": 3.9526141790131017, "grad_norm": 2.03696608543396, "learning_rate": 1.7407406452793254e-05, "loss": 0.5126, "step": 24213 }, { "epoch": 3.9527774376556057, "grad_norm": 1.8990023136138916, "learning_rate": 1.7407193267849706e-05, "loss": 0.4808, "step": 24214 }, { "epoch": 3.95294069629811, "grad_norm": 1.6853413581848145, "learning_rate": 1.740698007544712e-05, "loss": 0.3813, "step": 24215 }, { "epoch": 3.9531039549406146, "grad_norm": 2.0250704288482666, "learning_rate": 1.7406766875585704e-05, "loss": 0.4906, "step": 24216 }, { "epoch": 3.953267213583119, "grad_norm": 2.187098979949951, "learning_rate": 1.7406553668265673e-05, "loss": 0.5369, "step": 24217 }, { "epoch": 3.9534304722256235, "grad_norm": 1.6413333415985107, "learning_rate": 1.7406340453487246e-05, "loss": 0.4385, "step": 24218 }, { "epoch": 3.953593730868128, "grad_norm": 2.164850950241089, "learning_rate": 1.7406127231250637e-05, "loss": 0.5859, "step": 24219 }, { "epoch": 3.9537569895106324, "grad_norm": 2.0592103004455566, "learning_rate": 1.7405914001556058e-05, "loss": 0.4741, "step": 24220 }, { "epoch": 3.9539202481531364, "grad_norm": 2.1305336952209473, "learning_rate": 1.7405700764403726e-05, "loss": 0.5419, "step": 24221 }, { "epoch": 3.954083506795641, "grad_norm": 2.1256885528564453, "learning_rate": 1.740548751979386e-05, "loss": 0.4727, "step": 24222 }, { "epoch": 3.9542467654381452, "grad_norm": 1.8486944437026978, "learning_rate": 1.7405274267726667e-05, "loss": 0.4926, "step": 24223 }, { "epoch": 3.9544100240806497, "grad_norm": 2.3249752521514893, "learning_rate": 1.7405061008202363e-05, "loss": 0.5369, "step": 24224 }, { "epoch": 3.954573282723154, "grad_norm": 1.999398112297058, "learning_rate": 1.7404847741221166e-05, "loss": 0.4528, "step": 24225 }, { "epoch": 3.9547365413656586, "grad_norm": 1.9744844436645508, "learning_rate": 1.740463446678329e-05, "loss": 0.5115, "step": 24226 }, { "epoch": 3.954899800008163, "grad_norm": 2.1741700172424316, "learning_rate": 1.740442118488895e-05, "loss": 0.5412, "step": 24227 }, { "epoch": 3.9550630586506674, "grad_norm": 1.9782332181930542, "learning_rate": 1.7404207895538358e-05, "loss": 0.4578, "step": 24228 }, { "epoch": 3.955226317293172, "grad_norm": 1.812188982963562, "learning_rate": 1.7403994598731732e-05, "loss": 0.4171, "step": 24229 }, { "epoch": 3.9553895759356763, "grad_norm": 1.6337260007858276, "learning_rate": 1.7403781294469282e-05, "loss": 0.45, "step": 24230 }, { "epoch": 3.9555528345781807, "grad_norm": 1.9043667316436768, "learning_rate": 1.7403567982751227e-05, "loss": 0.4416, "step": 24231 }, { "epoch": 3.9557160932206847, "grad_norm": 1.9158737659454346, "learning_rate": 1.7403354663577782e-05, "loss": 0.4612, "step": 24232 }, { "epoch": 3.955879351863189, "grad_norm": 1.7599722146987915, "learning_rate": 1.7403141336949163e-05, "loss": 0.4549, "step": 24233 }, { "epoch": 3.9560426105056936, "grad_norm": 1.8745893239974976, "learning_rate": 1.740292800286558e-05, "loss": 0.4672, "step": 24234 }, { "epoch": 3.956205869148198, "grad_norm": 2.2925918102264404, "learning_rate": 1.740271466132725e-05, "loss": 0.551, "step": 24235 }, { "epoch": 3.9563691277907025, "grad_norm": 1.837674856185913, "learning_rate": 1.740250131233439e-05, "loss": 0.4574, "step": 24236 }, { "epoch": 3.956532386433207, "grad_norm": 1.7448351383209229, "learning_rate": 1.7402287955887213e-05, "loss": 0.4306, "step": 24237 }, { "epoch": 3.956695645075711, "grad_norm": 1.7246249914169312, "learning_rate": 1.7402074591985932e-05, "loss": 0.4404, "step": 24238 }, { "epoch": 3.9568589037182154, "grad_norm": 2.045313596725464, "learning_rate": 1.7401861220630768e-05, "loss": 0.4594, "step": 24239 }, { "epoch": 3.95702216236072, "grad_norm": 1.8418339490890503, "learning_rate": 1.7401647841821928e-05, "loss": 0.399, "step": 24240 }, { "epoch": 3.9571854210032242, "grad_norm": 1.9700590372085571, "learning_rate": 1.740143445555963e-05, "loss": 0.4716, "step": 24241 }, { "epoch": 3.9573486796457287, "grad_norm": 1.82032310962677, "learning_rate": 1.7401221061844094e-05, "loss": 0.431, "step": 24242 }, { "epoch": 3.957511938288233, "grad_norm": 1.8091892004013062, "learning_rate": 1.7401007660675525e-05, "loss": 0.4413, "step": 24243 }, { "epoch": 3.9576751969307375, "grad_norm": 2.065617799758911, "learning_rate": 1.7400794252054147e-05, "loss": 0.482, "step": 24244 }, { "epoch": 3.957838455573242, "grad_norm": 1.7960723638534546, "learning_rate": 1.740058083598017e-05, "loss": 0.4601, "step": 24245 }, { "epoch": 3.9580017142157464, "grad_norm": 1.8826239109039307, "learning_rate": 1.740036741245381e-05, "loss": 0.4642, "step": 24246 }, { "epoch": 3.958164972858251, "grad_norm": 2.506391763687134, "learning_rate": 1.7400153981475282e-05, "loss": 0.526, "step": 24247 }, { "epoch": 3.9583282315007553, "grad_norm": 2.096041440963745, "learning_rate": 1.73999405430448e-05, "loss": 0.4924, "step": 24248 }, { "epoch": 3.9584914901432593, "grad_norm": 1.9373530149459839, "learning_rate": 1.7399727097162583e-05, "loss": 0.4623, "step": 24249 }, { "epoch": 3.9586547487857637, "grad_norm": 1.9089000225067139, "learning_rate": 1.739951364382884e-05, "loss": 0.5059, "step": 24250 }, { "epoch": 3.958818007428268, "grad_norm": 2.2436060905456543, "learning_rate": 1.7399300183043787e-05, "loss": 0.589, "step": 24251 }, { "epoch": 3.9589812660707726, "grad_norm": 1.9677025079727173, "learning_rate": 1.7399086714807644e-05, "loss": 0.4503, "step": 24252 }, { "epoch": 3.959144524713277, "grad_norm": 2.0867855548858643, "learning_rate": 1.7398873239120623e-05, "loss": 0.5109, "step": 24253 }, { "epoch": 3.9593077833557815, "grad_norm": 1.999489188194275, "learning_rate": 1.7398659755982937e-05, "loss": 0.5078, "step": 24254 }, { "epoch": 3.959471041998286, "grad_norm": 2.0186235904693604, "learning_rate": 1.73984462653948e-05, "loss": 0.5048, "step": 24255 }, { "epoch": 3.95963430064079, "grad_norm": 2.374739408493042, "learning_rate": 1.7398232767356437e-05, "loss": 0.5091, "step": 24256 }, { "epoch": 3.9597975592832944, "grad_norm": 2.2981491088867188, "learning_rate": 1.739801926186805e-05, "loss": 0.5836, "step": 24257 }, { "epoch": 3.959960817925799, "grad_norm": 1.8147159814834595, "learning_rate": 1.739780574892986e-05, "loss": 0.411, "step": 24258 }, { "epoch": 3.9601240765683032, "grad_norm": 1.9806959629058838, "learning_rate": 1.7397592228542082e-05, "loss": 0.501, "step": 24259 }, { "epoch": 3.9602873352108077, "grad_norm": 1.6052563190460205, "learning_rate": 1.739737870070493e-05, "loss": 0.3861, "step": 24260 }, { "epoch": 3.960450593853312, "grad_norm": 1.8665270805358887, "learning_rate": 1.739716516541862e-05, "loss": 0.4194, "step": 24261 }, { "epoch": 3.9606138524958165, "grad_norm": 1.9710499048233032, "learning_rate": 1.7396951622683368e-05, "loss": 0.4997, "step": 24262 }, { "epoch": 3.960777111138321, "grad_norm": 2.5728511810302734, "learning_rate": 1.7396738072499386e-05, "loss": 0.5469, "step": 24263 }, { "epoch": 3.9609403697808254, "grad_norm": 1.9924226999282837, "learning_rate": 1.739652451486689e-05, "loss": 0.5239, "step": 24264 }, { "epoch": 3.96110362842333, "grad_norm": 2.2492690086364746, "learning_rate": 1.73963109497861e-05, "loss": 0.5711, "step": 24265 }, { "epoch": 3.9612668870658343, "grad_norm": 2.2109014987945557, "learning_rate": 1.7396097377257222e-05, "loss": 0.4889, "step": 24266 }, { "epoch": 3.9614301457083383, "grad_norm": 2.276160955429077, "learning_rate": 1.7395883797280476e-05, "loss": 0.5465, "step": 24267 }, { "epoch": 3.9615934043508427, "grad_norm": 1.7127834558486938, "learning_rate": 1.739567020985608e-05, "loss": 0.4016, "step": 24268 }, { "epoch": 3.961756662993347, "grad_norm": 2.105912685394287, "learning_rate": 1.7395456614984246e-05, "loss": 0.5103, "step": 24269 }, { "epoch": 3.9619199216358516, "grad_norm": 1.954716682434082, "learning_rate": 1.739524301266519e-05, "loss": 0.4142, "step": 24270 }, { "epoch": 3.962083180278356, "grad_norm": 1.8605499267578125, "learning_rate": 1.739502940289912e-05, "loss": 0.4651, "step": 24271 }, { "epoch": 3.9622464389208605, "grad_norm": 1.9070380926132202, "learning_rate": 1.7394815785686267e-05, "loss": 0.463, "step": 24272 }, { "epoch": 3.962409697563365, "grad_norm": 1.994922161102295, "learning_rate": 1.7394602161026828e-05, "loss": 0.5004, "step": 24273 }, { "epoch": 3.962572956205869, "grad_norm": 1.9371230602264404, "learning_rate": 1.7394388528921028e-05, "loss": 0.4605, "step": 24274 }, { "epoch": 3.9627362148483734, "grad_norm": 2.1142845153808594, "learning_rate": 1.7394174889369083e-05, "loss": 0.48, "step": 24275 }, { "epoch": 3.962899473490878, "grad_norm": 1.6627027988433838, "learning_rate": 1.7393961242371203e-05, "loss": 0.4398, "step": 24276 }, { "epoch": 3.9630627321333822, "grad_norm": 1.9429537057876587, "learning_rate": 1.7393747587927613e-05, "loss": 0.5053, "step": 24277 }, { "epoch": 3.9632259907758867, "grad_norm": 1.590939998626709, "learning_rate": 1.7393533926038515e-05, "loss": 0.4169, "step": 24278 }, { "epoch": 3.963389249418391, "grad_norm": 2.2193689346313477, "learning_rate": 1.7393320256704132e-05, "loss": 0.5161, "step": 24279 }, { "epoch": 3.9635525080608955, "grad_norm": 1.654755711555481, "learning_rate": 1.7393106579924676e-05, "loss": 0.4213, "step": 24280 }, { "epoch": 3.9637157667034, "grad_norm": 1.9729654788970947, "learning_rate": 1.7392892895700366e-05, "loss": 0.5632, "step": 24281 }, { "epoch": 3.9638790253459044, "grad_norm": 1.6520642042160034, "learning_rate": 1.7392679204031412e-05, "loss": 0.3805, "step": 24282 }, { "epoch": 3.964042283988409, "grad_norm": 2.259885787963867, "learning_rate": 1.7392465504918035e-05, "loss": 0.4538, "step": 24283 }, { "epoch": 3.9642055426309133, "grad_norm": 2.3589887619018555, "learning_rate": 1.7392251798360447e-05, "loss": 0.5408, "step": 24284 }, { "epoch": 3.9643688012734173, "grad_norm": 2.2771124839782715, "learning_rate": 1.7392038084358863e-05, "loss": 0.5753, "step": 24285 }, { "epoch": 3.9645320599159217, "grad_norm": 2.0903961658477783, "learning_rate": 1.7391824362913493e-05, "loss": 0.4347, "step": 24286 }, { "epoch": 3.964695318558426, "grad_norm": 2.2098093032836914, "learning_rate": 1.7391610634024566e-05, "loss": 0.5434, "step": 24287 }, { "epoch": 3.9648585772009306, "grad_norm": 1.879234790802002, "learning_rate": 1.7391396897692284e-05, "loss": 0.4314, "step": 24288 }, { "epoch": 3.965021835843435, "grad_norm": 1.8875634670257568, "learning_rate": 1.739118315391687e-05, "loss": 0.4528, "step": 24289 }, { "epoch": 3.9651850944859395, "grad_norm": 1.610109567642212, "learning_rate": 1.7390969402698537e-05, "loss": 0.4197, "step": 24290 }, { "epoch": 3.9653483531284435, "grad_norm": 1.6539840698242188, "learning_rate": 1.7390755644037496e-05, "loss": 0.3915, "step": 24291 }, { "epoch": 3.965511611770948, "grad_norm": 1.5055443048477173, "learning_rate": 1.7390541877933967e-05, "loss": 0.3841, "step": 24292 }, { "epoch": 3.9656748704134523, "grad_norm": 2.8165900707244873, "learning_rate": 1.7390328104388166e-05, "loss": 0.7969, "step": 24293 }, { "epoch": 3.965838129055957, "grad_norm": 1.9053763151168823, "learning_rate": 1.7390114323400306e-05, "loss": 0.4989, "step": 24294 }, { "epoch": 3.9660013876984612, "grad_norm": 2.048948287963867, "learning_rate": 1.7389900534970605e-05, "loss": 0.5027, "step": 24295 }, { "epoch": 3.9661646463409657, "grad_norm": 1.5039234161376953, "learning_rate": 1.7389686739099275e-05, "loss": 0.3912, "step": 24296 }, { "epoch": 3.96632790498347, "grad_norm": 2.2070720195770264, "learning_rate": 1.738947293578653e-05, "loss": 0.4579, "step": 24297 }, { "epoch": 3.9664911636259745, "grad_norm": 1.9988046884536743, "learning_rate": 1.738925912503259e-05, "loss": 0.5035, "step": 24298 }, { "epoch": 3.966654422268479, "grad_norm": 1.6885360479354858, "learning_rate": 1.7389045306837667e-05, "loss": 0.4699, "step": 24299 }, { "epoch": 3.9668176809109834, "grad_norm": 1.7768465280532837, "learning_rate": 1.738883148120198e-05, "loss": 0.4667, "step": 24300 }, { "epoch": 3.966980939553488, "grad_norm": 1.6761037111282349, "learning_rate": 1.7388617648125736e-05, "loss": 0.5023, "step": 24301 }, { "epoch": 3.967144198195992, "grad_norm": 1.7355824708938599, "learning_rate": 1.738840380760916e-05, "loss": 0.4402, "step": 24302 }, { "epoch": 3.9673074568384963, "grad_norm": 2.145308256149292, "learning_rate": 1.7388189959652463e-05, "loss": 0.5012, "step": 24303 }, { "epoch": 3.9674707154810007, "grad_norm": 1.7650024890899658, "learning_rate": 1.7387976104255856e-05, "loss": 0.3981, "step": 24304 }, { "epoch": 3.967633974123505, "grad_norm": 2.0239083766937256, "learning_rate": 1.7387762241419568e-05, "loss": 0.4449, "step": 24305 }, { "epoch": 3.9677972327660096, "grad_norm": 1.875516653060913, "learning_rate": 1.73875483711438e-05, "loss": 0.4326, "step": 24306 }, { "epoch": 3.967960491408514, "grad_norm": 1.8567819595336914, "learning_rate": 1.7387334493428772e-05, "loss": 0.4321, "step": 24307 }, { "epoch": 3.9681237500510185, "grad_norm": 1.8574730157852173, "learning_rate": 1.73871206082747e-05, "loss": 0.4719, "step": 24308 }, { "epoch": 3.9682870086935225, "grad_norm": 1.867383599281311, "learning_rate": 1.73869067156818e-05, "loss": 0.4594, "step": 24309 }, { "epoch": 3.968450267336027, "grad_norm": 1.9351527690887451, "learning_rate": 1.738669281565029e-05, "loss": 0.5259, "step": 24310 }, { "epoch": 3.9686135259785313, "grad_norm": 2.1169233322143555, "learning_rate": 1.738647890818038e-05, "loss": 0.4814, "step": 24311 }, { "epoch": 3.968776784621036, "grad_norm": 1.951130986213684, "learning_rate": 1.738626499327229e-05, "loss": 0.4724, "step": 24312 }, { "epoch": 3.96894004326354, "grad_norm": 1.7409924268722534, "learning_rate": 1.7386051070926228e-05, "loss": 0.4557, "step": 24313 }, { "epoch": 3.9691033019060447, "grad_norm": 2.252248764038086, "learning_rate": 1.7385837141142418e-05, "loss": 0.549, "step": 24314 }, { "epoch": 3.969266560548549, "grad_norm": 1.7955716848373413, "learning_rate": 1.7385623203921073e-05, "loss": 0.4821, "step": 24315 }, { "epoch": 3.9694298191910535, "grad_norm": 1.7694470882415771, "learning_rate": 1.738540925926241e-05, "loss": 0.3668, "step": 24316 }, { "epoch": 3.969593077833558, "grad_norm": 2.298520088195801, "learning_rate": 1.7385195307166633e-05, "loss": 0.6178, "step": 24317 }, { "epoch": 3.9697563364760624, "grad_norm": 2.0401546955108643, "learning_rate": 1.7384981347633972e-05, "loss": 0.5113, "step": 24318 }, { "epoch": 3.969919595118567, "grad_norm": 2.2914421558380127, "learning_rate": 1.7384767380664636e-05, "loss": 0.6145, "step": 24319 }, { "epoch": 3.970082853761071, "grad_norm": 1.7706561088562012, "learning_rate": 1.7384553406258842e-05, "loss": 0.4261, "step": 24320 }, { "epoch": 3.9702461124035753, "grad_norm": 1.8738723993301392, "learning_rate": 1.73843394244168e-05, "loss": 0.4206, "step": 24321 }, { "epoch": 3.9704093710460797, "grad_norm": 1.7487070560455322, "learning_rate": 1.7384125435138735e-05, "loss": 0.42, "step": 24322 }, { "epoch": 3.970572629688584, "grad_norm": 2.1061651706695557, "learning_rate": 1.7383911438424858e-05, "loss": 0.4789, "step": 24323 }, { "epoch": 3.9707358883310886, "grad_norm": 1.9911842346191406, "learning_rate": 1.738369743427538e-05, "loss": 0.5304, "step": 24324 }, { "epoch": 3.970899146973593, "grad_norm": 1.8183162212371826, "learning_rate": 1.7383483422690526e-05, "loss": 0.4544, "step": 24325 }, { "epoch": 3.9710624056160975, "grad_norm": 1.9101407527923584, "learning_rate": 1.73832694036705e-05, "loss": 0.4543, "step": 24326 }, { "epoch": 3.9712256642586015, "grad_norm": 2.0530800819396973, "learning_rate": 1.7383055377215528e-05, "loss": 0.5767, "step": 24327 }, { "epoch": 3.971388922901106, "grad_norm": 1.6591376066207886, "learning_rate": 1.738284134332582e-05, "loss": 0.4249, "step": 24328 }, { "epoch": 3.9715521815436103, "grad_norm": 2.027940511703491, "learning_rate": 1.7382627302001594e-05, "loss": 0.4754, "step": 24329 }, { "epoch": 3.9717154401861148, "grad_norm": 1.8928015232086182, "learning_rate": 1.738241325324306e-05, "loss": 0.4882, "step": 24330 }, { "epoch": 3.971878698828619, "grad_norm": 1.704742193222046, "learning_rate": 1.7382199197050443e-05, "loss": 0.4705, "step": 24331 }, { "epoch": 3.9720419574711237, "grad_norm": 2.065239191055298, "learning_rate": 1.7381985133423952e-05, "loss": 0.4845, "step": 24332 }, { "epoch": 3.972205216113628, "grad_norm": 1.951247215270996, "learning_rate": 1.7381771062363804e-05, "loss": 0.4803, "step": 24333 }, { "epoch": 3.9723684747561325, "grad_norm": 1.7541269063949585, "learning_rate": 1.7381556983870215e-05, "loss": 0.4078, "step": 24334 }, { "epoch": 3.972531733398637, "grad_norm": 2.0277464389801025, "learning_rate": 1.73813428979434e-05, "loss": 0.5218, "step": 24335 }, { "epoch": 3.9726949920411414, "grad_norm": 2.1211068630218506, "learning_rate": 1.738112880458357e-05, "loss": 0.5294, "step": 24336 }, { "epoch": 3.972858250683646, "grad_norm": 2.2684552669525146, "learning_rate": 1.738091470379095e-05, "loss": 0.5387, "step": 24337 }, { "epoch": 3.97302150932615, "grad_norm": 2.005481004714966, "learning_rate": 1.738070059556575e-05, "loss": 0.4713, "step": 24338 }, { "epoch": 3.9731847679686543, "grad_norm": 1.6347213983535767, "learning_rate": 1.7380486479908186e-05, "loss": 0.4207, "step": 24339 }, { "epoch": 3.9733480266111587, "grad_norm": 2.038572311401367, "learning_rate": 1.7380272356818474e-05, "loss": 0.4799, "step": 24340 }, { "epoch": 3.973511285253663, "grad_norm": 1.839708924293518, "learning_rate": 1.738005822629683e-05, "loss": 0.5028, "step": 24341 }, { "epoch": 3.9736745438961676, "grad_norm": 1.8200266361236572, "learning_rate": 1.737984408834347e-05, "loss": 0.4426, "step": 24342 }, { "epoch": 3.973837802538672, "grad_norm": 2.3344309329986572, "learning_rate": 1.7379629942958607e-05, "loss": 0.4865, "step": 24343 }, { "epoch": 3.974001061181176, "grad_norm": 1.9259926080703735, "learning_rate": 1.737941579014246e-05, "loss": 0.4958, "step": 24344 }, { "epoch": 3.9741643198236805, "grad_norm": 1.7932831048965454, "learning_rate": 1.7379201629895242e-05, "loss": 0.453, "step": 24345 }, { "epoch": 3.974327578466185, "grad_norm": 1.7807080745697021, "learning_rate": 1.737898746221717e-05, "loss": 0.4245, "step": 24346 }, { "epoch": 3.9744908371086893, "grad_norm": 2.301236152648926, "learning_rate": 1.7378773287108465e-05, "loss": 0.5392, "step": 24347 }, { "epoch": 3.9746540957511938, "grad_norm": 1.5338387489318848, "learning_rate": 1.737855910456933e-05, "loss": 0.4294, "step": 24348 }, { "epoch": 3.974817354393698, "grad_norm": 2.406687021255493, "learning_rate": 1.737834491459999e-05, "loss": 0.4793, "step": 24349 }, { "epoch": 3.9749806130362026, "grad_norm": 1.734614610671997, "learning_rate": 1.737813071720066e-05, "loss": 0.4173, "step": 24350 }, { "epoch": 3.975143871678707, "grad_norm": 2.3590641021728516, "learning_rate": 1.7377916512371553e-05, "loss": 0.5759, "step": 24351 }, { "epoch": 3.9753071303212115, "grad_norm": 1.664637804031372, "learning_rate": 1.7377702300112887e-05, "loss": 0.3855, "step": 24352 }, { "epoch": 3.975470388963716, "grad_norm": 1.8504213094711304, "learning_rate": 1.7377488080424875e-05, "loss": 0.4058, "step": 24353 }, { "epoch": 3.9756336476062204, "grad_norm": 1.7201083898544312, "learning_rate": 1.7377273853307737e-05, "loss": 0.4487, "step": 24354 }, { "epoch": 3.9757969062487244, "grad_norm": 1.9447314739227295, "learning_rate": 1.7377059618761686e-05, "loss": 0.51, "step": 24355 }, { "epoch": 3.975960164891229, "grad_norm": 1.716626763343811, "learning_rate": 1.7376845376786936e-05, "loss": 0.3566, "step": 24356 }, { "epoch": 3.9761234235337333, "grad_norm": 2.4007985591888428, "learning_rate": 1.7376631127383706e-05, "loss": 0.5204, "step": 24357 }, { "epoch": 3.9762866821762377, "grad_norm": 2.3345224857330322, "learning_rate": 1.7376416870552212e-05, "loss": 0.4801, "step": 24358 }, { "epoch": 3.976449940818742, "grad_norm": 2.04022216796875, "learning_rate": 1.7376202606292665e-05, "loss": 0.5397, "step": 24359 }, { "epoch": 3.9766131994612466, "grad_norm": 2.138425827026367, "learning_rate": 1.7375988334605283e-05, "loss": 0.5135, "step": 24360 }, { "epoch": 3.976776458103751, "grad_norm": 1.5205320119857788, "learning_rate": 1.7375774055490287e-05, "loss": 0.4311, "step": 24361 }, { "epoch": 3.976939716746255, "grad_norm": 2.1140151023864746, "learning_rate": 1.7375559768947888e-05, "loss": 0.4843, "step": 24362 }, { "epoch": 3.9771029753887595, "grad_norm": 1.8151525259017944, "learning_rate": 1.7375345474978302e-05, "loss": 0.4668, "step": 24363 }, { "epoch": 3.977266234031264, "grad_norm": 1.6179347038269043, "learning_rate": 1.737513117358174e-05, "loss": 0.3967, "step": 24364 }, { "epoch": 3.9774294926737683, "grad_norm": 1.7295762300491333, "learning_rate": 1.737491686475843e-05, "loss": 0.4391, "step": 24365 }, { "epoch": 3.9775927513162728, "grad_norm": 2.305208921432495, "learning_rate": 1.7374702548508576e-05, "loss": 0.5267, "step": 24366 }, { "epoch": 3.977756009958777, "grad_norm": 1.7960538864135742, "learning_rate": 1.73744882248324e-05, "loss": 0.4228, "step": 24367 }, { "epoch": 3.9779192686012816, "grad_norm": 1.8690743446350098, "learning_rate": 1.7374273893730114e-05, "loss": 0.4243, "step": 24368 }, { "epoch": 3.978082527243786, "grad_norm": 1.7247693538665771, "learning_rate": 1.737405955520194e-05, "loss": 0.3477, "step": 24369 }, { "epoch": 3.9782457858862905, "grad_norm": 1.8762426376342773, "learning_rate": 1.7373845209248088e-05, "loss": 0.4621, "step": 24370 }, { "epoch": 3.978409044528795, "grad_norm": 2.006074905395508, "learning_rate": 1.7373630855868776e-05, "loss": 0.5273, "step": 24371 }, { "epoch": 3.9785723031712994, "grad_norm": 1.4970735311508179, "learning_rate": 1.7373416495064218e-05, "loss": 0.4126, "step": 24372 }, { "epoch": 3.9787355618138034, "grad_norm": 2.0130813121795654, "learning_rate": 1.7373202126834633e-05, "loss": 0.4465, "step": 24373 }, { "epoch": 3.978898820456308, "grad_norm": 2.0299856662750244, "learning_rate": 1.7372987751180238e-05, "loss": 0.4549, "step": 24374 }, { "epoch": 3.9790620790988123, "grad_norm": 1.933975338935852, "learning_rate": 1.737277336810124e-05, "loss": 0.4508, "step": 24375 }, { "epoch": 3.9792253377413167, "grad_norm": 1.8053960800170898, "learning_rate": 1.7372558977597867e-05, "loss": 0.4945, "step": 24376 }, { "epoch": 3.979388596383821, "grad_norm": 2.3825573921203613, "learning_rate": 1.7372344579670328e-05, "loss": 0.5519, "step": 24377 }, { "epoch": 3.9795518550263256, "grad_norm": 1.7804111242294312, "learning_rate": 1.7372130174318836e-05, "loss": 0.4399, "step": 24378 }, { "epoch": 3.9797151136688296, "grad_norm": 1.9016287326812744, "learning_rate": 1.737191576154362e-05, "loss": 0.5019, "step": 24379 }, { "epoch": 3.979878372311334, "grad_norm": 1.7409647703170776, "learning_rate": 1.7371701341344877e-05, "loss": 0.4173, "step": 24380 }, { "epoch": 3.9800416309538384, "grad_norm": 1.961897850036621, "learning_rate": 1.7371486913722836e-05, "loss": 0.4439, "step": 24381 }, { "epoch": 3.980204889596343, "grad_norm": 2.2111973762512207, "learning_rate": 1.737127247867771e-05, "loss": 0.5398, "step": 24382 }, { "epoch": 3.9803681482388473, "grad_norm": 1.8496570587158203, "learning_rate": 1.7371058036209712e-05, "loss": 0.4487, "step": 24383 }, { "epoch": 3.9805314068813518, "grad_norm": 1.9298979043960571, "learning_rate": 1.737084358631907e-05, "loss": 0.4698, "step": 24384 }, { "epoch": 3.980694665523856, "grad_norm": 2.276386022567749, "learning_rate": 1.737062912900598e-05, "loss": 0.5444, "step": 24385 }, { "epoch": 3.9808579241663606, "grad_norm": 1.663077473640442, "learning_rate": 1.7370414664270675e-05, "loss": 0.4089, "step": 24386 }, { "epoch": 3.981021182808865, "grad_norm": 2.376558303833008, "learning_rate": 1.737020019211336e-05, "loss": 0.4691, "step": 24387 }, { "epoch": 3.9811844414513695, "grad_norm": 1.981323003768921, "learning_rate": 1.7369985712534253e-05, "loss": 0.4857, "step": 24388 }, { "epoch": 3.981347700093874, "grad_norm": 2.1202056407928467, "learning_rate": 1.736977122553358e-05, "loss": 0.5224, "step": 24389 }, { "epoch": 3.981510958736378, "grad_norm": 1.5358221530914307, "learning_rate": 1.7369556731111545e-05, "loss": 0.4293, "step": 24390 }, { "epoch": 3.9816742173788824, "grad_norm": 1.7984302043914795, "learning_rate": 1.7369342229268368e-05, "loss": 0.3936, "step": 24391 }, { "epoch": 3.981837476021387, "grad_norm": 1.9791234731674194, "learning_rate": 1.7369127720004267e-05, "loss": 0.4173, "step": 24392 }, { "epoch": 3.9820007346638913, "grad_norm": 2.3530020713806152, "learning_rate": 1.7368913203319455e-05, "loss": 0.5254, "step": 24393 }, { "epoch": 3.9821639933063957, "grad_norm": 1.7741944789886475, "learning_rate": 1.736869867921415e-05, "loss": 0.4228, "step": 24394 }, { "epoch": 3.9823272519489, "grad_norm": 1.9740064144134521, "learning_rate": 1.7368484147688567e-05, "loss": 0.4856, "step": 24395 }, { "epoch": 3.9824905105914046, "grad_norm": 1.9654587507247925, "learning_rate": 1.7368269608742924e-05, "loss": 0.4912, "step": 24396 }, { "epoch": 3.9826537692339086, "grad_norm": 1.7639669179916382, "learning_rate": 1.7368055062377435e-05, "loss": 0.4607, "step": 24397 }, { "epoch": 3.982817027876413, "grad_norm": 1.854423999786377, "learning_rate": 1.7367840508592313e-05, "loss": 0.4398, "step": 24398 }, { "epoch": 3.9829802865189174, "grad_norm": 1.9766119718551636, "learning_rate": 1.736762594738778e-05, "loss": 0.4432, "step": 24399 }, { "epoch": 3.983143545161422, "grad_norm": 1.4722696542739868, "learning_rate": 1.736741137876405e-05, "loss": 0.3717, "step": 24400 }, { "epoch": 3.9833068038039263, "grad_norm": 2.61065673828125, "learning_rate": 1.7367196802721338e-05, "loss": 0.6119, "step": 24401 }, { "epoch": 3.9834700624464308, "grad_norm": 2.206887722015381, "learning_rate": 1.736698221925986e-05, "loss": 0.5318, "step": 24402 }, { "epoch": 3.983633321088935, "grad_norm": 2.1657726764678955, "learning_rate": 1.7366767628379836e-05, "loss": 0.5093, "step": 24403 }, { "epoch": 3.9837965797314396, "grad_norm": 1.9097493886947632, "learning_rate": 1.736655303008148e-05, "loss": 0.4582, "step": 24404 }, { "epoch": 3.983959838373944, "grad_norm": 1.7262955904006958, "learning_rate": 1.7366338424365e-05, "loss": 0.3875, "step": 24405 }, { "epoch": 3.9841230970164485, "grad_norm": 2.115410804748535, "learning_rate": 1.7366123811230626e-05, "loss": 0.5975, "step": 24406 }, { "epoch": 3.984286355658953, "grad_norm": 2.1130874156951904, "learning_rate": 1.7365909190678562e-05, "loss": 0.5202, "step": 24407 }, { "epoch": 3.984449614301457, "grad_norm": 1.9270232915878296, "learning_rate": 1.7365694562709034e-05, "loss": 0.5031, "step": 24408 }, { "epoch": 3.9846128729439614, "grad_norm": 1.8460789918899536, "learning_rate": 1.7365479927322253e-05, "loss": 0.549, "step": 24409 }, { "epoch": 3.984776131586466, "grad_norm": 1.8677235841751099, "learning_rate": 1.7365265284518434e-05, "loss": 0.4856, "step": 24410 }, { "epoch": 3.9849393902289703, "grad_norm": 1.7832587957382202, "learning_rate": 1.7365050634297794e-05, "loss": 0.418, "step": 24411 }, { "epoch": 3.9851026488714747, "grad_norm": 2.1345813274383545, "learning_rate": 1.736483597666055e-05, "loss": 0.5113, "step": 24412 }, { "epoch": 3.985265907513979, "grad_norm": 1.9418556690216064, "learning_rate": 1.7364621311606918e-05, "loss": 0.4476, "step": 24413 }, { "epoch": 3.9854291661564836, "grad_norm": 2.0685722827911377, "learning_rate": 1.7364406639137115e-05, "loss": 0.5186, "step": 24414 }, { "epoch": 3.9855924247989876, "grad_norm": 1.4968986511230469, "learning_rate": 1.736419195925136e-05, "loss": 0.3884, "step": 24415 }, { "epoch": 3.985755683441492, "grad_norm": 2.0165815353393555, "learning_rate": 1.736397727194986e-05, "loss": 0.4488, "step": 24416 }, { "epoch": 3.9859189420839964, "grad_norm": 1.8845634460449219, "learning_rate": 1.7363762577232842e-05, "loss": 0.4588, "step": 24417 }, { "epoch": 3.986082200726501, "grad_norm": 2.0281124114990234, "learning_rate": 1.736354787510051e-05, "loss": 0.4832, "step": 24418 }, { "epoch": 3.9862454593690053, "grad_norm": 2.1418023109436035, "learning_rate": 1.7363333165553095e-05, "loss": 0.532, "step": 24419 }, { "epoch": 3.9864087180115098, "grad_norm": 2.096243143081665, "learning_rate": 1.7363118448590803e-05, "loss": 0.4929, "step": 24420 }, { "epoch": 3.986571976654014, "grad_norm": 2.044294834136963, "learning_rate": 1.7362903724213852e-05, "loss": 0.5058, "step": 24421 }, { "epoch": 3.9867352352965186, "grad_norm": 1.6723741292953491, "learning_rate": 1.7362688992422456e-05, "loss": 0.3954, "step": 24422 }, { "epoch": 3.986898493939023, "grad_norm": 1.9871855974197388, "learning_rate": 1.7362474253216838e-05, "loss": 0.4891, "step": 24423 }, { "epoch": 3.9870617525815275, "grad_norm": 2.170292377471924, "learning_rate": 1.7362259506597208e-05, "loss": 0.5206, "step": 24424 }, { "epoch": 3.987225011224032, "grad_norm": 2.219327926635742, "learning_rate": 1.7362044752563787e-05, "loss": 0.4902, "step": 24425 }, { "epoch": 3.987388269866536, "grad_norm": 1.927240014076233, "learning_rate": 1.736182999111679e-05, "loss": 0.4126, "step": 24426 }, { "epoch": 3.9875515285090404, "grad_norm": 2.412748336791992, "learning_rate": 1.736161522225643e-05, "loss": 0.5496, "step": 24427 }, { "epoch": 3.987714787151545, "grad_norm": 1.960607647895813, "learning_rate": 1.7361400445982924e-05, "loss": 0.487, "step": 24428 }, { "epoch": 3.9878780457940493, "grad_norm": 2.4444024562835693, "learning_rate": 1.736118566229649e-05, "loss": 0.5617, "step": 24429 }, { "epoch": 3.9880413044365537, "grad_norm": 2.3578743934631348, "learning_rate": 1.7360970871197347e-05, "loss": 0.5302, "step": 24430 }, { "epoch": 3.988204563079058, "grad_norm": 2.194303512573242, "learning_rate": 1.7360756072685704e-05, "loss": 0.5677, "step": 24431 }, { "epoch": 3.988367821721562, "grad_norm": 2.177809238433838, "learning_rate": 1.7360541266761785e-05, "loss": 0.5089, "step": 24432 }, { "epoch": 3.9885310803640666, "grad_norm": 1.9641271829605103, "learning_rate": 1.73603264534258e-05, "loss": 0.5269, "step": 24433 }, { "epoch": 3.988694339006571, "grad_norm": 1.8683840036392212, "learning_rate": 1.7360111632677972e-05, "loss": 0.5168, "step": 24434 }, { "epoch": 3.9888575976490754, "grad_norm": 2.424187660217285, "learning_rate": 1.735989680451851e-05, "loss": 0.5865, "step": 24435 }, { "epoch": 3.98902085629158, "grad_norm": 1.8201355934143066, "learning_rate": 1.7359681968947633e-05, "loss": 0.4036, "step": 24436 }, { "epoch": 3.9891841149340843, "grad_norm": 2.268847942352295, "learning_rate": 1.7359467125965565e-05, "loss": 0.5191, "step": 24437 }, { "epoch": 3.9893473735765888, "grad_norm": 1.8152672052383423, "learning_rate": 1.735925227557251e-05, "loss": 0.4752, "step": 24438 }, { "epoch": 3.989510632219093, "grad_norm": 1.9066208600997925, "learning_rate": 1.735903741776869e-05, "loss": 0.4628, "step": 24439 }, { "epoch": 3.9896738908615976, "grad_norm": 2.1365668773651123, "learning_rate": 1.735882255255432e-05, "loss": 0.5055, "step": 24440 }, { "epoch": 3.989837149504102, "grad_norm": 2.0543227195739746, "learning_rate": 1.7358607679929623e-05, "loss": 0.4552, "step": 24441 }, { "epoch": 3.9900004081466065, "grad_norm": 1.7443686723709106, "learning_rate": 1.7358392799894806e-05, "loss": 0.4538, "step": 24442 }, { "epoch": 3.9901636667891105, "grad_norm": 1.8020973205566406, "learning_rate": 1.735817791245009e-05, "loss": 0.4366, "step": 24443 }, { "epoch": 3.990326925431615, "grad_norm": 2.307813882827759, "learning_rate": 1.735796301759569e-05, "loss": 0.5046, "step": 24444 }, { "epoch": 3.9904901840741194, "grad_norm": 1.8166894912719727, "learning_rate": 1.7357748115331826e-05, "loss": 0.4801, "step": 24445 }, { "epoch": 3.990653442716624, "grad_norm": 2.2313737869262695, "learning_rate": 1.735753320565871e-05, "loss": 0.6776, "step": 24446 }, { "epoch": 3.9908167013591282, "grad_norm": 1.7836766242980957, "learning_rate": 1.735731828857656e-05, "loss": 0.4315, "step": 24447 }, { "epoch": 3.9909799600016327, "grad_norm": 1.5175795555114746, "learning_rate": 1.735710336408559e-05, "loss": 0.3985, "step": 24448 }, { "epoch": 3.991143218644137, "grad_norm": 1.8845019340515137, "learning_rate": 1.7356888432186022e-05, "loss": 0.461, "step": 24449 }, { "epoch": 3.991306477286641, "grad_norm": 2.1381094455718994, "learning_rate": 1.7356673492878073e-05, "loss": 0.5057, "step": 24450 }, { "epoch": 3.9914697359291456, "grad_norm": 1.5866352319717407, "learning_rate": 1.735645854616195e-05, "loss": 0.4001, "step": 24451 }, { "epoch": 3.99163299457165, "grad_norm": 2.046919822692871, "learning_rate": 1.7356243592037876e-05, "loss": 0.4666, "step": 24452 }, { "epoch": 3.9917962532141544, "grad_norm": 1.876767873764038, "learning_rate": 1.735602863050607e-05, "loss": 0.4366, "step": 24453 }, { "epoch": 3.991959511856659, "grad_norm": 1.674424648284912, "learning_rate": 1.7355813661566738e-05, "loss": 0.4374, "step": 24454 }, { "epoch": 3.9921227704991633, "grad_norm": 1.6610066890716553, "learning_rate": 1.7355598685220108e-05, "loss": 0.3985, "step": 24455 }, { "epoch": 3.9922860291416677, "grad_norm": 1.924430012702942, "learning_rate": 1.735538370146639e-05, "loss": 0.5119, "step": 24456 }, { "epoch": 3.992449287784172, "grad_norm": 1.8450233936309814, "learning_rate": 1.735516871030581e-05, "loss": 0.506, "step": 24457 }, { "epoch": 3.9926125464266766, "grad_norm": 1.8077973127365112, "learning_rate": 1.735495371173857e-05, "loss": 0.4911, "step": 24458 }, { "epoch": 3.992775805069181, "grad_norm": 2.617914915084839, "learning_rate": 1.7354738705764896e-05, "loss": 0.5521, "step": 24459 }, { "epoch": 3.9929390637116855, "grad_norm": 1.6506218910217285, "learning_rate": 1.7354523692385e-05, "loss": 0.4059, "step": 24460 }, { "epoch": 3.9931023223541895, "grad_norm": 1.9090757369995117, "learning_rate": 1.73543086715991e-05, "loss": 0.4846, "step": 24461 }, { "epoch": 3.993265580996694, "grad_norm": 2.1261653900146484, "learning_rate": 1.7354093643407414e-05, "loss": 0.4171, "step": 24462 }, { "epoch": 3.9934288396391984, "grad_norm": 2.0884463787078857, "learning_rate": 1.735387860781016e-05, "loss": 0.4864, "step": 24463 }, { "epoch": 3.993592098281703, "grad_norm": 1.8367763757705688, "learning_rate": 1.735366356480755e-05, "loss": 0.4321, "step": 24464 }, { "epoch": 3.9937553569242072, "grad_norm": 2.252291679382324, "learning_rate": 1.7353448514399806e-05, "loss": 0.4181, "step": 24465 }, { "epoch": 3.9939186155667117, "grad_norm": 2.2002789974212646, "learning_rate": 1.735323345658714e-05, "loss": 0.553, "step": 24466 }, { "epoch": 3.9940818742092157, "grad_norm": 2.130370855331421, "learning_rate": 1.7353018391369768e-05, "loss": 0.4572, "step": 24467 }, { "epoch": 3.99424513285172, "grad_norm": 1.586480975151062, "learning_rate": 1.735280331874791e-05, "loss": 0.4175, "step": 24468 }, { "epoch": 3.9944083914942246, "grad_norm": 1.7675541639328003, "learning_rate": 1.7352588238721777e-05, "loss": 0.4292, "step": 24469 }, { "epoch": 3.994571650136729, "grad_norm": 2.40487003326416, "learning_rate": 1.7352373151291593e-05, "loss": 0.4896, "step": 24470 }, { "epoch": 3.9947349087792334, "grad_norm": 2.460209846496582, "learning_rate": 1.735215805645757e-05, "loss": 0.5956, "step": 24471 }, { "epoch": 3.994898167421738, "grad_norm": 2.12536883354187, "learning_rate": 1.7351942954219926e-05, "loss": 0.4511, "step": 24472 }, { "epoch": 3.9950614260642423, "grad_norm": 1.8756248950958252, "learning_rate": 1.7351727844578882e-05, "loss": 0.4795, "step": 24473 }, { "epoch": 3.9952246847067467, "grad_norm": 1.996233344078064, "learning_rate": 1.7351512727534645e-05, "loss": 0.452, "step": 24474 }, { "epoch": 3.995387943349251, "grad_norm": 1.6581991910934448, "learning_rate": 1.7351297603087434e-05, "loss": 0.4153, "step": 24475 }, { "epoch": 3.9955512019917556, "grad_norm": 1.9583762884140015, "learning_rate": 1.735108247123747e-05, "loss": 0.4973, "step": 24476 }, { "epoch": 3.99571446063426, "grad_norm": 2.1821134090423584, "learning_rate": 1.7350867331984975e-05, "loss": 0.4983, "step": 24477 }, { "epoch": 3.995877719276764, "grad_norm": 2.0296599864959717, "learning_rate": 1.7350652185330153e-05, "loss": 0.5137, "step": 24478 }, { "epoch": 3.9960409779192685, "grad_norm": 1.9506287574768066, "learning_rate": 1.7350437031273226e-05, "loss": 0.4231, "step": 24479 }, { "epoch": 3.996204236561773, "grad_norm": 2.244903802871704, "learning_rate": 1.7350221869814413e-05, "loss": 0.6763, "step": 24480 }, { "epoch": 3.9963674952042774, "grad_norm": 2.0388123989105225, "learning_rate": 1.7350006700953924e-05, "loss": 0.4704, "step": 24481 }, { "epoch": 3.996530753846782, "grad_norm": 1.947852611541748, "learning_rate": 1.7349791524691984e-05, "loss": 0.4582, "step": 24482 }, { "epoch": 3.9966940124892862, "grad_norm": 1.9512743949890137, "learning_rate": 1.7349576341028805e-05, "loss": 0.5029, "step": 24483 }, { "epoch": 3.9968572711317907, "grad_norm": 1.7676732540130615, "learning_rate": 1.7349361149964605e-05, "loss": 0.4542, "step": 24484 }, { "epoch": 3.9970205297742947, "grad_norm": 1.8225833177566528, "learning_rate": 1.73491459514996e-05, "loss": 0.4427, "step": 24485 }, { "epoch": 3.997183788416799, "grad_norm": 1.6507623195648193, "learning_rate": 1.7348930745634008e-05, "loss": 0.4236, "step": 24486 }, { "epoch": 3.9973470470593035, "grad_norm": 1.8688877820968628, "learning_rate": 1.7348715532368044e-05, "loss": 0.4433, "step": 24487 }, { "epoch": 3.997510305701808, "grad_norm": 2.501437187194824, "learning_rate": 1.7348500311701926e-05, "loss": 0.5849, "step": 24488 }, { "epoch": 3.9976735643443124, "grad_norm": 1.945715308189392, "learning_rate": 1.7348285083635867e-05, "loss": 0.4932, "step": 24489 }, { "epoch": 3.997836822986817, "grad_norm": 1.7216291427612305, "learning_rate": 1.734806984817009e-05, "loss": 0.4082, "step": 24490 }, { "epoch": 3.9980000816293213, "grad_norm": 1.9080226421356201, "learning_rate": 1.7347854605304806e-05, "loss": 0.4949, "step": 24491 }, { "epoch": 3.9981633402718257, "grad_norm": 1.734437108039856, "learning_rate": 1.734763935504024e-05, "loss": 0.4482, "step": 24492 }, { "epoch": 3.99832659891433, "grad_norm": 2.2727537155151367, "learning_rate": 1.7347424097376597e-05, "loss": 0.5212, "step": 24493 }, { "epoch": 3.9984898575568346, "grad_norm": 2.075669765472412, "learning_rate": 1.73472088323141e-05, "loss": 0.4725, "step": 24494 }, { "epoch": 3.998653116199339, "grad_norm": 2.044912338256836, "learning_rate": 1.7346993559852968e-05, "loss": 0.4593, "step": 24495 }, { "epoch": 3.998816374841843, "grad_norm": 1.6806361675262451, "learning_rate": 1.7346778279993417e-05, "loss": 0.3742, "step": 24496 }, { "epoch": 3.9989796334843475, "grad_norm": 2.2259833812713623, "learning_rate": 1.734656299273566e-05, "loss": 0.5029, "step": 24497 }, { "epoch": 3.999142892126852, "grad_norm": 2.1004698276519775, "learning_rate": 1.7346347698079915e-05, "loss": 0.5054, "step": 24498 }, { "epoch": 3.9993061507693564, "grad_norm": 2.064424991607666, "learning_rate": 1.73461323960264e-05, "loss": 0.4483, "step": 24499 }, { "epoch": 3.999469409411861, "grad_norm": 2.1474928855895996, "learning_rate": 1.734591708657533e-05, "loss": 0.4355, "step": 24500 }, { "epoch": 3.9996326680543652, "grad_norm": 1.8174575567245483, "learning_rate": 1.7345701769726932e-05, "loss": 0.4746, "step": 24501 }, { "epoch": 3.9997959266968697, "grad_norm": 2.172358274459839, "learning_rate": 1.7345486445481407e-05, "loss": 0.5197, "step": 24502 }, { "epoch": 3.9999591853393737, "grad_norm": 1.9918277263641357, "learning_rate": 1.734527111383898e-05, "loss": 0.4528, "step": 24503 }, { "epoch": 4.0, "grad_norm": 4.5611982345581055, "learning_rate": 1.734505577479987e-05, "loss": 0.696, "step": 24504 }, { "epoch": 4.000163258642504, "grad_norm": 1.3759816884994507, "learning_rate": 1.7344840428364287e-05, "loss": 0.3633, "step": 24505 }, { "epoch": 4.000326517285009, "grad_norm": 1.7194640636444092, "learning_rate": 1.734462507453245e-05, "loss": 0.37, "step": 24506 }, { "epoch": 4.000489775927513, "grad_norm": 1.7291855812072754, "learning_rate": 1.7344409713304582e-05, "loss": 0.4138, "step": 24507 }, { "epoch": 4.000653034570018, "grad_norm": 1.4867291450500488, "learning_rate": 1.7344194344680897e-05, "loss": 0.3688, "step": 24508 }, { "epoch": 4.000816293212522, "grad_norm": 1.700166940689087, "learning_rate": 1.7343978968661605e-05, "loss": 0.4473, "step": 24509 }, { "epoch": 4.000979551855027, "grad_norm": 1.624963641166687, "learning_rate": 1.734376358524693e-05, "loss": 0.3469, "step": 24510 }, { "epoch": 4.001142810497531, "grad_norm": 1.5989969968795776, "learning_rate": 1.7343548194437086e-05, "loss": 0.376, "step": 24511 }, { "epoch": 4.0013060691400355, "grad_norm": 2.0313849449157715, "learning_rate": 1.7343332796232293e-05, "loss": 0.5074, "step": 24512 }, { "epoch": 4.001469327782539, "grad_norm": 1.2318141460418701, "learning_rate": 1.7343117390632764e-05, "loss": 0.3068, "step": 24513 }, { "epoch": 4.0016325864250435, "grad_norm": 1.3704378604888916, "learning_rate": 1.7342901977638718e-05, "loss": 0.3175, "step": 24514 }, { "epoch": 4.001795845067548, "grad_norm": 2.041414737701416, "learning_rate": 1.7342686557250373e-05, "loss": 0.4192, "step": 24515 }, { "epoch": 4.001959103710052, "grad_norm": 1.3992640972137451, "learning_rate": 1.7342471129467945e-05, "loss": 0.3107, "step": 24516 }, { "epoch": 4.002122362352557, "grad_norm": 1.7178468704223633, "learning_rate": 1.7342255694291648e-05, "loss": 0.3306, "step": 24517 }, { "epoch": 4.002285620995061, "grad_norm": 2.13381028175354, "learning_rate": 1.7342040251721702e-05, "loss": 0.4188, "step": 24518 }, { "epoch": 4.002448879637566, "grad_norm": 1.8871780633926392, "learning_rate": 1.7341824801758328e-05, "loss": 0.3699, "step": 24519 }, { "epoch": 4.00261213828007, "grad_norm": 2.090540885925293, "learning_rate": 1.734160934440173e-05, "loss": 0.3842, "step": 24520 }, { "epoch": 4.002775396922575, "grad_norm": 1.67766273021698, "learning_rate": 1.734139387965214e-05, "loss": 0.3454, "step": 24521 }, { "epoch": 4.002938655565079, "grad_norm": 2.0989153385162354, "learning_rate": 1.7341178407509766e-05, "loss": 0.3982, "step": 24522 }, { "epoch": 4.003101914207583, "grad_norm": 1.9006186723709106, "learning_rate": 1.7340962927974828e-05, "loss": 0.3566, "step": 24523 }, { "epoch": 4.003265172850088, "grad_norm": 1.8775607347488403, "learning_rate": 1.7340747441047542e-05, "loss": 0.3657, "step": 24524 }, { "epoch": 4.003428431492592, "grad_norm": 1.9743586778640747, "learning_rate": 1.7340531946728125e-05, "loss": 0.3803, "step": 24525 }, { "epoch": 4.003591690135097, "grad_norm": 2.221385955810547, "learning_rate": 1.7340316445016797e-05, "loss": 0.3654, "step": 24526 }, { "epoch": 4.003754948777601, "grad_norm": 2.087578058242798, "learning_rate": 1.7340100935913768e-05, "loss": 0.3804, "step": 24527 }, { "epoch": 4.003918207420106, "grad_norm": 1.9461127519607544, "learning_rate": 1.7339885419419262e-05, "loss": 0.3845, "step": 24528 }, { "epoch": 4.00408146606261, "grad_norm": 1.8183393478393555, "learning_rate": 1.7339669895533493e-05, "loss": 0.3471, "step": 24529 }, { "epoch": 4.0042447247051145, "grad_norm": 1.8416218757629395, "learning_rate": 1.7339454364256676e-05, "loss": 0.3387, "step": 24530 }, { "epoch": 4.004407983347618, "grad_norm": 2.294296979904175, "learning_rate": 1.7339238825589033e-05, "loss": 0.4357, "step": 24531 }, { "epoch": 4.0045712419901225, "grad_norm": 2.0158588886260986, "learning_rate": 1.7339023279530777e-05, "loss": 0.355, "step": 24532 }, { "epoch": 4.004734500632627, "grad_norm": 2.3471436500549316, "learning_rate": 1.7338807726082128e-05, "loss": 0.3065, "step": 24533 }, { "epoch": 4.004897759275131, "grad_norm": 1.8976552486419678, "learning_rate": 1.73385921652433e-05, "loss": 0.3801, "step": 24534 }, { "epoch": 4.005061017917636, "grad_norm": 2.0956292152404785, "learning_rate": 1.733837659701451e-05, "loss": 0.3829, "step": 24535 }, { "epoch": 4.00522427656014, "grad_norm": 1.8197894096374512, "learning_rate": 1.733816102139598e-05, "loss": 0.3494, "step": 24536 }, { "epoch": 4.005387535202645, "grad_norm": 1.6919931173324585, "learning_rate": 1.7337945438387924e-05, "loss": 0.3075, "step": 24537 }, { "epoch": 4.005550793845149, "grad_norm": 1.9417668581008911, "learning_rate": 1.7337729847990556e-05, "loss": 0.3617, "step": 24538 }, { "epoch": 4.0057140524876536, "grad_norm": 2.113297700881958, "learning_rate": 1.73375142502041e-05, "loss": 0.3538, "step": 24539 }, { "epoch": 4.005877311130158, "grad_norm": 1.769315481185913, "learning_rate": 1.7337298645028764e-05, "loss": 0.3461, "step": 24540 }, { "epoch": 4.006040569772662, "grad_norm": 1.93052077293396, "learning_rate": 1.7337083032464774e-05, "loss": 0.3614, "step": 24541 }, { "epoch": 4.006203828415167, "grad_norm": 1.90720534324646, "learning_rate": 1.7336867412512343e-05, "loss": 0.3456, "step": 24542 }, { "epoch": 4.006367087057671, "grad_norm": 1.728916049003601, "learning_rate": 1.7336651785171687e-05, "loss": 0.341, "step": 24543 }, { "epoch": 4.006530345700176, "grad_norm": 2.078834056854248, "learning_rate": 1.7336436150443027e-05, "loss": 0.3401, "step": 24544 }, { "epoch": 4.00669360434268, "grad_norm": 2.1512889862060547, "learning_rate": 1.7336220508326572e-05, "loss": 0.3677, "step": 24545 }, { "epoch": 4.006856862985185, "grad_norm": 1.69480299949646, "learning_rate": 1.733600485882255e-05, "loss": 0.3417, "step": 24546 }, { "epoch": 4.007020121627689, "grad_norm": 2.191192150115967, "learning_rate": 1.7335789201931175e-05, "loss": 0.3727, "step": 24547 }, { "epoch": 4.0071833802701935, "grad_norm": 2.095205545425415, "learning_rate": 1.733557353765266e-05, "loss": 0.4073, "step": 24548 }, { "epoch": 4.007346638912697, "grad_norm": 2.0722243785858154, "learning_rate": 1.7335357865987223e-05, "loss": 0.393, "step": 24549 }, { "epoch": 4.0075098975552015, "grad_norm": 1.3379467725753784, "learning_rate": 1.7335142186935083e-05, "loss": 0.2813, "step": 24550 }, { "epoch": 4.007673156197706, "grad_norm": 1.8682538270950317, "learning_rate": 1.7334926500496458e-05, "loss": 0.3512, "step": 24551 }, { "epoch": 4.00783641484021, "grad_norm": 2.306736707687378, "learning_rate": 1.733471080667156e-05, "loss": 0.3828, "step": 24552 }, { "epoch": 4.007999673482715, "grad_norm": 3.018357753753662, "learning_rate": 1.7334495105460614e-05, "loss": 0.4901, "step": 24553 }, { "epoch": 4.008162932125219, "grad_norm": 1.6184693574905396, "learning_rate": 1.7334279396863833e-05, "loss": 0.35, "step": 24554 }, { "epoch": 4.008326190767724, "grad_norm": 1.8301939964294434, "learning_rate": 1.7334063680881434e-05, "loss": 0.3957, "step": 24555 }, { "epoch": 4.008489449410228, "grad_norm": 2.5403106212615967, "learning_rate": 1.7333847957513635e-05, "loss": 0.4318, "step": 24556 }, { "epoch": 4.0086527080527325, "grad_norm": 1.9763051271438599, "learning_rate": 1.7333632226760654e-05, "loss": 0.3896, "step": 24557 }, { "epoch": 4.008815966695237, "grad_norm": 2.589986801147461, "learning_rate": 1.7333416488622705e-05, "loss": 0.4166, "step": 24558 }, { "epoch": 4.008979225337741, "grad_norm": 1.8117579221725464, "learning_rate": 1.7333200743100008e-05, "loss": 0.3448, "step": 24559 }, { "epoch": 4.009142483980246, "grad_norm": 1.7785673141479492, "learning_rate": 1.7332984990192782e-05, "loss": 0.3693, "step": 24560 }, { "epoch": 4.00930574262275, "grad_norm": 1.8072937726974487, "learning_rate": 1.7332769229901242e-05, "loss": 0.3315, "step": 24561 }, { "epoch": 4.009469001265255, "grad_norm": 2.0655672550201416, "learning_rate": 1.7332553462225604e-05, "loss": 0.4151, "step": 24562 }, { "epoch": 4.009632259907759, "grad_norm": 1.6965813636779785, "learning_rate": 1.7332337687166085e-05, "loss": 0.3538, "step": 24563 }, { "epoch": 4.009795518550264, "grad_norm": 2.1637299060821533, "learning_rate": 1.7332121904722905e-05, "loss": 0.3841, "step": 24564 }, { "epoch": 4.009958777192768, "grad_norm": 2.0500307083129883, "learning_rate": 1.733190611489628e-05, "loss": 0.452, "step": 24565 }, { "epoch": 4.010122035835272, "grad_norm": 1.8412173986434937, "learning_rate": 1.7331690317686427e-05, "loss": 0.3724, "step": 24566 }, { "epoch": 4.010285294477776, "grad_norm": 1.948043704032898, "learning_rate": 1.7331474513093566e-05, "loss": 0.3529, "step": 24567 }, { "epoch": 4.0104485531202805, "grad_norm": 2.349623680114746, "learning_rate": 1.733125870111791e-05, "loss": 0.4088, "step": 24568 }, { "epoch": 4.010611811762785, "grad_norm": 2.1374878883361816, "learning_rate": 1.733104288175968e-05, "loss": 0.4111, "step": 24569 }, { "epoch": 4.010775070405289, "grad_norm": 2.5201361179351807, "learning_rate": 1.7330827055019094e-05, "loss": 0.4154, "step": 24570 }, { "epoch": 4.010938329047794, "grad_norm": 2.0899972915649414, "learning_rate": 1.7330611220896363e-05, "loss": 0.3619, "step": 24571 }, { "epoch": 4.011101587690298, "grad_norm": 1.712929368019104, "learning_rate": 1.7330395379391707e-05, "loss": 0.2779, "step": 24572 }, { "epoch": 4.011264846332803, "grad_norm": 1.8819975852966309, "learning_rate": 1.733017953050535e-05, "loss": 0.3775, "step": 24573 }, { "epoch": 4.011428104975307, "grad_norm": 1.7801523208618164, "learning_rate": 1.73299636742375e-05, "loss": 0.3434, "step": 24574 }, { "epoch": 4.0115913636178115, "grad_norm": 2.1001601219177246, "learning_rate": 1.7329747810588382e-05, "loss": 0.3648, "step": 24575 }, { "epoch": 4.011754622260316, "grad_norm": 1.810539960861206, "learning_rate": 1.7329531939558207e-05, "loss": 0.3482, "step": 24576 }, { "epoch": 4.01191788090282, "grad_norm": 1.9592523574829102, "learning_rate": 1.7329316061147197e-05, "loss": 0.3451, "step": 24577 }, { "epoch": 4.012081139545325, "grad_norm": 1.7880312204360962, "learning_rate": 1.7329100175355566e-05, "loss": 0.3193, "step": 24578 }, { "epoch": 4.012244398187829, "grad_norm": 1.9177886247634888, "learning_rate": 1.7328884282183537e-05, "loss": 0.349, "step": 24579 }, { "epoch": 4.012407656830334, "grad_norm": 2.0683035850524902, "learning_rate": 1.732866838163132e-05, "loss": 0.3759, "step": 24580 }, { "epoch": 4.012570915472838, "grad_norm": 2.150045394897461, "learning_rate": 1.7328452473699138e-05, "loss": 0.3872, "step": 24581 }, { "epoch": 4.012734174115343, "grad_norm": 1.9486820697784424, "learning_rate": 1.7328236558387205e-05, "loss": 0.4071, "step": 24582 }, { "epoch": 4.012897432757847, "grad_norm": 1.9701430797576904, "learning_rate": 1.7328020635695735e-05, "loss": 0.3987, "step": 24583 }, { "epoch": 4.013060691400351, "grad_norm": 1.9580442905426025, "learning_rate": 1.732780470562496e-05, "loss": 0.3386, "step": 24584 }, { "epoch": 4.013223950042855, "grad_norm": 2.1431097984313965, "learning_rate": 1.732758876817508e-05, "loss": 0.3574, "step": 24585 }, { "epoch": 4.0133872086853595, "grad_norm": 2.454213857650757, "learning_rate": 1.732737282334632e-05, "loss": 0.3866, "step": 24586 }, { "epoch": 4.013550467327864, "grad_norm": 2.153994560241699, "learning_rate": 1.73271568711389e-05, "loss": 0.3988, "step": 24587 }, { "epoch": 4.013713725970368, "grad_norm": 1.6797188520431519, "learning_rate": 1.7326940911553037e-05, "loss": 0.3173, "step": 24588 }, { "epoch": 4.013876984612873, "grad_norm": 1.8389464616775513, "learning_rate": 1.7326724944588944e-05, "loss": 0.3325, "step": 24589 }, { "epoch": 4.014040243255377, "grad_norm": 2.586357831954956, "learning_rate": 1.732650897024684e-05, "loss": 0.4559, "step": 24590 }, { "epoch": 4.014203501897882, "grad_norm": 1.9408403635025024, "learning_rate": 1.7326292988526943e-05, "loss": 0.4042, "step": 24591 }, { "epoch": 4.014366760540386, "grad_norm": 2.136228322982788, "learning_rate": 1.7326076999429476e-05, "loss": 0.3603, "step": 24592 }, { "epoch": 4.0145300191828905, "grad_norm": 1.9144153594970703, "learning_rate": 1.732586100295465e-05, "loss": 0.3441, "step": 24593 }, { "epoch": 4.014693277825395, "grad_norm": 1.8993300199508667, "learning_rate": 1.732564499910268e-05, "loss": 0.3545, "step": 24594 }, { "epoch": 4.014856536467899, "grad_norm": 2.3168680667877197, "learning_rate": 1.732542898787379e-05, "loss": 0.4105, "step": 24595 }, { "epoch": 4.015019795110404, "grad_norm": 1.8773047924041748, "learning_rate": 1.732521296926819e-05, "loss": 0.3436, "step": 24596 }, { "epoch": 4.015183053752908, "grad_norm": 2.1263744831085205, "learning_rate": 1.732499694328611e-05, "loss": 0.393, "step": 24597 }, { "epoch": 4.015346312395413, "grad_norm": 2.6076672077178955, "learning_rate": 1.7324780909927755e-05, "loss": 0.4299, "step": 24598 }, { "epoch": 4.015509571037917, "grad_norm": 2.451051950454712, "learning_rate": 1.7324564869193354e-05, "loss": 0.4832, "step": 24599 }, { "epoch": 4.015672829680422, "grad_norm": 2.180182933807373, "learning_rate": 1.732434882108311e-05, "loss": 0.3754, "step": 24600 }, { "epoch": 4.015836088322925, "grad_norm": 1.949605107307434, "learning_rate": 1.732413276559725e-05, "loss": 0.3651, "step": 24601 }, { "epoch": 4.01599934696543, "grad_norm": 2.273303985595703, "learning_rate": 1.7323916702735998e-05, "loss": 0.4165, "step": 24602 }, { "epoch": 4.016162605607934, "grad_norm": 2.075350046157837, "learning_rate": 1.7323700632499558e-05, "loss": 0.3777, "step": 24603 }, { "epoch": 4.0163258642504385, "grad_norm": 1.8600677251815796, "learning_rate": 1.7323484554888154e-05, "loss": 0.3672, "step": 24604 }, { "epoch": 4.016489122892943, "grad_norm": 1.9140207767486572, "learning_rate": 1.7323268469902e-05, "loss": 0.3507, "step": 24605 }, { "epoch": 4.016652381535447, "grad_norm": 1.9570472240447998, "learning_rate": 1.732305237754132e-05, "loss": 0.334, "step": 24606 }, { "epoch": 4.016815640177952, "grad_norm": 2.031806230545044, "learning_rate": 1.7322836277806327e-05, "loss": 0.3487, "step": 24607 }, { "epoch": 4.016978898820456, "grad_norm": 2.0917775630950928, "learning_rate": 1.732262017069724e-05, "loss": 0.4027, "step": 24608 }, { "epoch": 4.017142157462961, "grad_norm": 1.7361488342285156, "learning_rate": 1.7322404056214276e-05, "loss": 0.3489, "step": 24609 }, { "epoch": 4.017305416105465, "grad_norm": 1.8589004278182983, "learning_rate": 1.7322187934357652e-05, "loss": 0.364, "step": 24610 }, { "epoch": 4.0174686747479695, "grad_norm": 1.861515760421753, "learning_rate": 1.732197180512759e-05, "loss": 0.3618, "step": 24611 }, { "epoch": 4.017631933390474, "grad_norm": 2.140676259994507, "learning_rate": 1.7321755668524302e-05, "loss": 0.4271, "step": 24612 }, { "epoch": 4.017795192032978, "grad_norm": 1.5923500061035156, "learning_rate": 1.732153952454801e-05, "loss": 0.3011, "step": 24613 }, { "epoch": 4.017958450675483, "grad_norm": 1.9693456888198853, "learning_rate": 1.7321323373198928e-05, "loss": 0.3374, "step": 24614 }, { "epoch": 4.018121709317987, "grad_norm": 1.7563300132751465, "learning_rate": 1.7321107214477275e-05, "loss": 0.3965, "step": 24615 }, { "epoch": 4.018284967960492, "grad_norm": 2.526695489883423, "learning_rate": 1.732089104838327e-05, "loss": 0.4475, "step": 24616 }, { "epoch": 4.018448226602996, "grad_norm": 1.8868001699447632, "learning_rate": 1.732067487491713e-05, "loss": 0.4394, "step": 24617 }, { "epoch": 4.018611485245501, "grad_norm": 1.9299815893173218, "learning_rate": 1.732045869407907e-05, "loss": 0.2966, "step": 24618 }, { "epoch": 4.018774743888004, "grad_norm": 2.2604501247406006, "learning_rate": 1.7320242505869315e-05, "loss": 0.4205, "step": 24619 }, { "epoch": 4.018938002530509, "grad_norm": 2.2705891132354736, "learning_rate": 1.7320026310288072e-05, "loss": 0.3973, "step": 24620 }, { "epoch": 4.019101261173013, "grad_norm": 1.9252784252166748, "learning_rate": 1.7319810107335568e-05, "loss": 0.3922, "step": 24621 }, { "epoch": 4.0192645198155175, "grad_norm": 1.6891261339187622, "learning_rate": 1.7319593897012015e-05, "loss": 0.3127, "step": 24622 }, { "epoch": 4.019427778458022, "grad_norm": 2.6053121089935303, "learning_rate": 1.7319377679317635e-05, "loss": 0.4202, "step": 24623 }, { "epoch": 4.019591037100526, "grad_norm": 2.2606394290924072, "learning_rate": 1.731916145425264e-05, "loss": 0.4249, "step": 24624 }, { "epoch": 4.019754295743031, "grad_norm": 2.2602834701538086, "learning_rate": 1.7318945221817255e-05, "loss": 0.3683, "step": 24625 }, { "epoch": 4.019917554385535, "grad_norm": 2.3031423091888428, "learning_rate": 1.7318728982011693e-05, "loss": 0.4262, "step": 24626 }, { "epoch": 4.02008081302804, "grad_norm": 2.1445329189300537, "learning_rate": 1.7318512734836176e-05, "loss": 0.3857, "step": 24627 }, { "epoch": 4.020244071670544, "grad_norm": 2.1533782482147217, "learning_rate": 1.7318296480290912e-05, "loss": 0.3706, "step": 24628 }, { "epoch": 4.0204073303130485, "grad_norm": 2.273571252822876, "learning_rate": 1.731808021837613e-05, "loss": 0.4322, "step": 24629 }, { "epoch": 4.020570588955553, "grad_norm": 2.0520105361938477, "learning_rate": 1.7317863949092043e-05, "loss": 0.4011, "step": 24630 }, { "epoch": 4.020733847598057, "grad_norm": 2.314700126647949, "learning_rate": 1.7317647672438867e-05, "loss": 0.4292, "step": 24631 }, { "epoch": 4.020897106240562, "grad_norm": 1.946603536605835, "learning_rate": 1.7317431388416824e-05, "loss": 0.3799, "step": 24632 }, { "epoch": 4.021060364883066, "grad_norm": 2.683526039123535, "learning_rate": 1.7317215097026125e-05, "loss": 0.4756, "step": 24633 }, { "epoch": 4.021223623525571, "grad_norm": 1.562991976737976, "learning_rate": 1.7316998798267e-05, "loss": 0.3068, "step": 24634 }, { "epoch": 4.021386882168075, "grad_norm": 2.5822298526763916, "learning_rate": 1.731678249213965e-05, "loss": 0.3786, "step": 24635 }, { "epoch": 4.02155014081058, "grad_norm": 1.8502166271209717, "learning_rate": 1.731656617864431e-05, "loss": 0.3372, "step": 24636 }, { "epoch": 4.021713399453083, "grad_norm": 1.966315746307373, "learning_rate": 1.7316349857781185e-05, "loss": 0.3699, "step": 24637 }, { "epoch": 4.021876658095588, "grad_norm": 1.7790768146514893, "learning_rate": 1.73161335295505e-05, "loss": 0.3668, "step": 24638 }, { "epoch": 4.022039916738092, "grad_norm": 2.235748291015625, "learning_rate": 1.731591719395247e-05, "loss": 0.4813, "step": 24639 }, { "epoch": 4.0222031753805965, "grad_norm": 1.5489590167999268, "learning_rate": 1.731570085098731e-05, "loss": 0.3028, "step": 24640 }, { "epoch": 4.022366434023101, "grad_norm": 1.9135938882827759, "learning_rate": 1.7315484500655246e-05, "loss": 0.374, "step": 24641 }, { "epoch": 4.022529692665605, "grad_norm": 1.74357271194458, "learning_rate": 1.731526814295649e-05, "loss": 0.3401, "step": 24642 }, { "epoch": 4.02269295130811, "grad_norm": 1.9861558675765991, "learning_rate": 1.731505177789126e-05, "loss": 0.3473, "step": 24643 }, { "epoch": 4.022856209950614, "grad_norm": 2.2482621669769287, "learning_rate": 1.7314835405459772e-05, "loss": 0.4181, "step": 24644 }, { "epoch": 4.023019468593119, "grad_norm": 1.5671042203903198, "learning_rate": 1.7314619025662253e-05, "loss": 0.2899, "step": 24645 }, { "epoch": 4.023182727235623, "grad_norm": 1.7376095056533813, "learning_rate": 1.731440263849891e-05, "loss": 0.3576, "step": 24646 }, { "epoch": 4.0233459858781275, "grad_norm": 1.9044655561447144, "learning_rate": 1.7314186243969967e-05, "loss": 0.4455, "step": 24647 }, { "epoch": 4.023509244520632, "grad_norm": 2.134673833847046, "learning_rate": 1.731396984207564e-05, "loss": 0.392, "step": 24648 }, { "epoch": 4.023672503163136, "grad_norm": 1.7459988594055176, "learning_rate": 1.7313753432816148e-05, "loss": 0.3426, "step": 24649 }, { "epoch": 4.023835761805641, "grad_norm": 1.904217004776001, "learning_rate": 1.7313537016191706e-05, "loss": 0.3616, "step": 24650 }, { "epoch": 4.023999020448145, "grad_norm": 1.920271635055542, "learning_rate": 1.7313320592202537e-05, "loss": 0.3805, "step": 24651 }, { "epoch": 4.02416227909065, "grad_norm": 2.350644826889038, "learning_rate": 1.7313104160848856e-05, "loss": 0.4581, "step": 24652 }, { "epoch": 4.024325537733154, "grad_norm": 1.8792189359664917, "learning_rate": 1.7312887722130878e-05, "loss": 0.3188, "step": 24653 }, { "epoch": 4.024488796375658, "grad_norm": 1.79855477809906, "learning_rate": 1.7312671276048825e-05, "loss": 0.3774, "step": 24654 }, { "epoch": 4.024652055018162, "grad_norm": 1.8494254350662231, "learning_rate": 1.7312454822602916e-05, "loss": 0.3533, "step": 24655 }, { "epoch": 4.024815313660667, "grad_norm": 1.9269497394561768, "learning_rate": 1.7312238361793366e-05, "loss": 0.3195, "step": 24656 }, { "epoch": 4.024978572303171, "grad_norm": 1.892053484916687, "learning_rate": 1.7312021893620393e-05, "loss": 0.3664, "step": 24657 }, { "epoch": 4.0251418309456755, "grad_norm": 2.3914568424224854, "learning_rate": 1.7311805418084217e-05, "loss": 0.3686, "step": 24658 }, { "epoch": 4.02530508958818, "grad_norm": 2.049710512161255, "learning_rate": 1.7311588935185054e-05, "loss": 0.409, "step": 24659 }, { "epoch": 4.025468348230684, "grad_norm": 2.157951831817627, "learning_rate": 1.7311372444923122e-05, "loss": 0.4005, "step": 24660 }, { "epoch": 4.025631606873189, "grad_norm": 2.1454057693481445, "learning_rate": 1.7311155947298644e-05, "loss": 0.3722, "step": 24661 }, { "epoch": 4.025794865515693, "grad_norm": 2.4769630432128906, "learning_rate": 1.731093944231183e-05, "loss": 0.472, "step": 24662 }, { "epoch": 4.025958124158198, "grad_norm": 2.483635187149048, "learning_rate": 1.73107229299629e-05, "loss": 0.4367, "step": 24663 }, { "epoch": 4.026121382800702, "grad_norm": 2.2231996059417725, "learning_rate": 1.731050641025208e-05, "loss": 0.3662, "step": 24664 }, { "epoch": 4.0262846414432065, "grad_norm": 1.9716413021087646, "learning_rate": 1.731028988317958e-05, "loss": 0.3226, "step": 24665 }, { "epoch": 4.026447900085711, "grad_norm": 1.997195839881897, "learning_rate": 1.731007334874562e-05, "loss": 0.3798, "step": 24666 }, { "epoch": 4.026611158728215, "grad_norm": 1.780727505683899, "learning_rate": 1.7309856806950414e-05, "loss": 0.3517, "step": 24667 }, { "epoch": 4.02677441737072, "grad_norm": 2.0639500617980957, "learning_rate": 1.730964025779419e-05, "loss": 0.3847, "step": 24668 }, { "epoch": 4.026937676013224, "grad_norm": 2.2725112438201904, "learning_rate": 1.7309423701277154e-05, "loss": 0.3995, "step": 24669 }, { "epoch": 4.027100934655729, "grad_norm": 2.1561477184295654, "learning_rate": 1.7309207137399535e-05, "loss": 0.4312, "step": 24670 }, { "epoch": 4.027264193298233, "grad_norm": 2.3023884296417236, "learning_rate": 1.7308990566161544e-05, "loss": 0.3766, "step": 24671 }, { "epoch": 4.027427451940737, "grad_norm": 1.759785532951355, "learning_rate": 1.7308773987563406e-05, "loss": 0.287, "step": 24672 }, { "epoch": 4.027590710583241, "grad_norm": 2.24411940574646, "learning_rate": 1.730855740160533e-05, "loss": 0.3493, "step": 24673 }, { "epoch": 4.027753969225746, "grad_norm": 1.991821527481079, "learning_rate": 1.7308340808287537e-05, "loss": 0.3301, "step": 24674 }, { "epoch": 4.02791722786825, "grad_norm": 1.9293133020401, "learning_rate": 1.7308124207610252e-05, "loss": 0.4383, "step": 24675 }, { "epoch": 4.0280804865107545, "grad_norm": 2.1214895248413086, "learning_rate": 1.7307907599573684e-05, "loss": 0.3951, "step": 24676 }, { "epoch": 4.028243745153259, "grad_norm": 2.4013400077819824, "learning_rate": 1.7307690984178057e-05, "loss": 0.4236, "step": 24677 }, { "epoch": 4.028407003795763, "grad_norm": 1.7576632499694824, "learning_rate": 1.7307474361423587e-05, "loss": 0.2969, "step": 24678 }, { "epoch": 4.028570262438268, "grad_norm": 1.5853466987609863, "learning_rate": 1.730725773131049e-05, "loss": 0.3754, "step": 24679 }, { "epoch": 4.028733521080772, "grad_norm": 1.9320746660232544, "learning_rate": 1.7307041093838986e-05, "loss": 0.3232, "step": 24680 }, { "epoch": 4.028896779723277, "grad_norm": 2.106745958328247, "learning_rate": 1.73068244490093e-05, "loss": 0.4258, "step": 24681 }, { "epoch": 4.029060038365781, "grad_norm": 1.9843603372573853, "learning_rate": 1.7306607796821638e-05, "loss": 0.3605, "step": 24682 }, { "epoch": 4.0292232970082855, "grad_norm": 1.9216686487197876, "learning_rate": 1.7306391137276227e-05, "loss": 0.3344, "step": 24683 }, { "epoch": 4.02938655565079, "grad_norm": 2.011704683303833, "learning_rate": 1.730617447037328e-05, "loss": 0.356, "step": 24684 }, { "epoch": 4.029549814293294, "grad_norm": 2.22025465965271, "learning_rate": 1.7305957796113015e-05, "loss": 0.3894, "step": 24685 }, { "epoch": 4.029713072935799, "grad_norm": 2.0677242279052734, "learning_rate": 1.7305741114495654e-05, "loss": 0.3589, "step": 24686 }, { "epoch": 4.029876331578303, "grad_norm": 1.738715410232544, "learning_rate": 1.7305524425521415e-05, "loss": 0.3246, "step": 24687 }, { "epoch": 4.030039590220808, "grad_norm": 2.7755722999572754, "learning_rate": 1.7305307729190516e-05, "loss": 0.3803, "step": 24688 }, { "epoch": 4.030202848863311, "grad_norm": 2.646418333053589, "learning_rate": 1.7305091025503173e-05, "loss": 0.5098, "step": 24689 }, { "epoch": 4.030366107505816, "grad_norm": 2.0748143196105957, "learning_rate": 1.7304874314459603e-05, "loss": 0.3495, "step": 24690 }, { "epoch": 4.03052936614832, "grad_norm": 2.734858989715576, "learning_rate": 1.730465759606003e-05, "loss": 0.4437, "step": 24691 }, { "epoch": 4.030692624790825, "grad_norm": 2.40895414352417, "learning_rate": 1.7304440870304665e-05, "loss": 0.4105, "step": 24692 }, { "epoch": 4.030855883433329, "grad_norm": 1.8893777132034302, "learning_rate": 1.7304224137193732e-05, "loss": 0.3488, "step": 24693 }, { "epoch": 4.0310191420758335, "grad_norm": 1.3743106126785278, "learning_rate": 1.730400739672745e-05, "loss": 0.2895, "step": 24694 }, { "epoch": 4.031182400718338, "grad_norm": 1.8491483926773071, "learning_rate": 1.730379064890603e-05, "loss": 0.3249, "step": 24695 }, { "epoch": 4.031345659360842, "grad_norm": 1.9048960208892822, "learning_rate": 1.73035738937297e-05, "loss": 0.331, "step": 24696 }, { "epoch": 4.031508918003347, "grad_norm": 2.1846072673797607, "learning_rate": 1.7303357131198665e-05, "loss": 0.3807, "step": 24697 }, { "epoch": 4.031672176645851, "grad_norm": 1.9766566753387451, "learning_rate": 1.730314036131316e-05, "loss": 0.354, "step": 24698 }, { "epoch": 4.031835435288356, "grad_norm": 1.9705876111984253, "learning_rate": 1.7302923584073384e-05, "loss": 0.3653, "step": 24699 }, { "epoch": 4.03199869393086, "grad_norm": 2.249220848083496, "learning_rate": 1.7302706799479575e-05, "loss": 0.3827, "step": 24700 }, { "epoch": 4.0321619525733645, "grad_norm": 1.9181371927261353, "learning_rate": 1.7302490007531937e-05, "loss": 0.3563, "step": 24701 }, { "epoch": 4.032325211215869, "grad_norm": 1.9823635816574097, "learning_rate": 1.73022732082307e-05, "loss": 0.3463, "step": 24702 }, { "epoch": 4.032488469858373, "grad_norm": 2.399610996246338, "learning_rate": 1.730205640157607e-05, "loss": 0.4319, "step": 24703 }, { "epoch": 4.032651728500878, "grad_norm": 2.1875104904174805, "learning_rate": 1.730183958756827e-05, "loss": 0.3649, "step": 24704 }, { "epoch": 4.032814987143382, "grad_norm": 1.7815759181976318, "learning_rate": 1.7301622766207526e-05, "loss": 0.3105, "step": 24705 }, { "epoch": 4.032978245785887, "grad_norm": 2.324524402618408, "learning_rate": 1.7301405937494044e-05, "loss": 0.3886, "step": 24706 }, { "epoch": 4.03314150442839, "grad_norm": 2.5875864028930664, "learning_rate": 1.7301189101428055e-05, "loss": 0.4357, "step": 24707 }, { "epoch": 4.033304763070895, "grad_norm": 1.9198929071426392, "learning_rate": 1.7300972258009762e-05, "loss": 0.3627, "step": 24708 }, { "epoch": 4.033468021713399, "grad_norm": 1.949167013168335, "learning_rate": 1.7300755407239396e-05, "loss": 0.3626, "step": 24709 }, { "epoch": 4.033631280355904, "grad_norm": 1.521552324295044, "learning_rate": 1.730053854911717e-05, "loss": 0.2922, "step": 24710 }, { "epoch": 4.033794538998408, "grad_norm": 2.1226284503936768, "learning_rate": 1.7300321683643306e-05, "loss": 0.3699, "step": 24711 }, { "epoch": 4.0339577976409124, "grad_norm": 2.1836769580841064, "learning_rate": 1.7300104810818018e-05, "loss": 0.4108, "step": 24712 }, { "epoch": 4.034121056283417, "grad_norm": 1.9227032661437988, "learning_rate": 1.7299887930641528e-05, "loss": 0.3776, "step": 24713 }, { "epoch": 4.034284314925921, "grad_norm": 2.111098289489746, "learning_rate": 1.7299671043114052e-05, "loss": 0.3681, "step": 24714 }, { "epoch": 4.034447573568426, "grad_norm": 1.8446873426437378, "learning_rate": 1.7299454148235807e-05, "loss": 0.3532, "step": 24715 }, { "epoch": 4.03461083221093, "grad_norm": 2.2721445560455322, "learning_rate": 1.7299237246007018e-05, "loss": 0.4344, "step": 24716 }, { "epoch": 4.034774090853435, "grad_norm": 2.137983560562134, "learning_rate": 1.7299020336427896e-05, "loss": 0.4172, "step": 24717 }, { "epoch": 4.034937349495939, "grad_norm": 1.8754804134368896, "learning_rate": 1.7298803419498664e-05, "loss": 0.3935, "step": 24718 }, { "epoch": 4.0351006081384435, "grad_norm": 1.9859044551849365, "learning_rate": 1.7298586495219536e-05, "loss": 0.439, "step": 24719 }, { "epoch": 4.035263866780948, "grad_norm": 1.897135615348816, "learning_rate": 1.7298369563590734e-05, "loss": 0.3497, "step": 24720 }, { "epoch": 4.035427125423452, "grad_norm": 1.8634828329086304, "learning_rate": 1.729815262461248e-05, "loss": 0.362, "step": 24721 }, { "epoch": 4.035590384065957, "grad_norm": 2.029602289199829, "learning_rate": 1.7297935678284985e-05, "loss": 0.3696, "step": 24722 }, { "epoch": 4.035753642708461, "grad_norm": 1.593232274055481, "learning_rate": 1.729771872460847e-05, "loss": 0.3407, "step": 24723 }, { "epoch": 4.035916901350966, "grad_norm": 2.1311795711517334, "learning_rate": 1.7297501763583154e-05, "loss": 0.3976, "step": 24724 }, { "epoch": 4.036080159993469, "grad_norm": 1.9210264682769775, "learning_rate": 1.729728479520926e-05, "loss": 0.347, "step": 24725 }, { "epoch": 4.036243418635974, "grad_norm": 2.0872080326080322, "learning_rate": 1.7297067819486996e-05, "loss": 0.3569, "step": 24726 }, { "epoch": 4.036406677278478, "grad_norm": 2.0486934185028076, "learning_rate": 1.729685083641659e-05, "loss": 0.3632, "step": 24727 }, { "epoch": 4.036569935920983, "grad_norm": 2.4224720001220703, "learning_rate": 1.729663384599826e-05, "loss": 0.4726, "step": 24728 }, { "epoch": 4.036733194563487, "grad_norm": 2.448580265045166, "learning_rate": 1.7296416848232213e-05, "loss": 0.3904, "step": 24729 }, { "epoch": 4.036896453205991, "grad_norm": 2.267240285873413, "learning_rate": 1.7296199843118685e-05, "loss": 0.3537, "step": 24730 }, { "epoch": 4.037059711848496, "grad_norm": 2.0370020866394043, "learning_rate": 1.7295982830657882e-05, "loss": 0.3655, "step": 24731 }, { "epoch": 4.037222970491, "grad_norm": 1.9939885139465332, "learning_rate": 1.7295765810850026e-05, "loss": 0.3437, "step": 24732 }, { "epoch": 4.037386229133505, "grad_norm": 2.0842177867889404, "learning_rate": 1.7295548783695336e-05, "loss": 0.3733, "step": 24733 }, { "epoch": 4.037549487776009, "grad_norm": 2.3683815002441406, "learning_rate": 1.729533174919403e-05, "loss": 0.3871, "step": 24734 }, { "epoch": 4.037712746418514, "grad_norm": 2.446010112762451, "learning_rate": 1.7295114707346326e-05, "loss": 0.3775, "step": 24735 }, { "epoch": 4.037876005061018, "grad_norm": 2.1147778034210205, "learning_rate": 1.7294897658152448e-05, "loss": 0.3444, "step": 24736 }, { "epoch": 4.0380392637035225, "grad_norm": 2.0689666271209717, "learning_rate": 1.729468060161261e-05, "loss": 0.3595, "step": 24737 }, { "epoch": 4.038202522346027, "grad_norm": 2.185894012451172, "learning_rate": 1.7294463537727026e-05, "loss": 0.3851, "step": 24738 }, { "epoch": 4.038365780988531, "grad_norm": 1.9434367418289185, "learning_rate": 1.729424646649592e-05, "loss": 0.3569, "step": 24739 }, { "epoch": 4.038529039631036, "grad_norm": 2.540726900100708, "learning_rate": 1.7294029387919508e-05, "loss": 0.3889, "step": 24740 }, { "epoch": 4.03869229827354, "grad_norm": 1.8011360168457031, "learning_rate": 1.7293812301998013e-05, "loss": 0.3743, "step": 24741 }, { "epoch": 4.038855556916044, "grad_norm": 2.1382229328155518, "learning_rate": 1.7293595208731652e-05, "loss": 0.3937, "step": 24742 }, { "epoch": 4.039018815558548, "grad_norm": 2.296064615249634, "learning_rate": 1.729337810812064e-05, "loss": 0.436, "step": 24743 }, { "epoch": 4.039182074201053, "grad_norm": 1.9865866899490356, "learning_rate": 1.72931610001652e-05, "loss": 0.3864, "step": 24744 }, { "epoch": 4.039345332843557, "grad_norm": 1.9893217086791992, "learning_rate": 1.729294388486555e-05, "loss": 0.4068, "step": 24745 }, { "epoch": 4.039508591486062, "grad_norm": 1.7360905408859253, "learning_rate": 1.7292726762221902e-05, "loss": 0.3387, "step": 24746 }, { "epoch": 4.039671850128566, "grad_norm": 1.8136422634124756, "learning_rate": 1.7292509632234488e-05, "loss": 0.3524, "step": 24747 }, { "epoch": 4.03983510877107, "grad_norm": 1.4694567918777466, "learning_rate": 1.729229249490351e-05, "loss": 0.3302, "step": 24748 }, { "epoch": 4.039998367413575, "grad_norm": 1.9864834547042847, "learning_rate": 1.72920753502292e-05, "loss": 0.3851, "step": 24749 }, { "epoch": 4.040161626056079, "grad_norm": 1.7674410343170166, "learning_rate": 1.7291858198211772e-05, "loss": 0.3421, "step": 24750 }, { "epoch": 4.040324884698584, "grad_norm": 2.1639182567596436, "learning_rate": 1.7291641038851444e-05, "loss": 0.4164, "step": 24751 }, { "epoch": 4.040488143341088, "grad_norm": 1.9563337564468384, "learning_rate": 1.7291423872148438e-05, "loss": 0.3438, "step": 24752 }, { "epoch": 4.040651401983593, "grad_norm": 1.9036000967025757, "learning_rate": 1.7291206698102967e-05, "loss": 0.3997, "step": 24753 }, { "epoch": 4.040814660626097, "grad_norm": 1.9593783617019653, "learning_rate": 1.7290989516715256e-05, "loss": 0.3997, "step": 24754 }, { "epoch": 4.0409779192686015, "grad_norm": 2.1661806106567383, "learning_rate": 1.729077232798552e-05, "loss": 0.4143, "step": 24755 }, { "epoch": 4.041141177911106, "grad_norm": 2.0632424354553223, "learning_rate": 1.7290555131913974e-05, "loss": 0.3647, "step": 24756 }, { "epoch": 4.04130443655361, "grad_norm": 1.8265914916992188, "learning_rate": 1.729033792850084e-05, "loss": 0.3541, "step": 24757 }, { "epoch": 4.041467695196115, "grad_norm": 2.1008870601654053, "learning_rate": 1.7290120717746345e-05, "loss": 0.4301, "step": 24758 }, { "epoch": 4.041630953838619, "grad_norm": 1.8179280757904053, "learning_rate": 1.7289903499650695e-05, "loss": 0.324, "step": 24759 }, { "epoch": 4.041794212481123, "grad_norm": 1.9136911630630493, "learning_rate": 1.7289686274214116e-05, "loss": 0.3503, "step": 24760 }, { "epoch": 4.041957471123627, "grad_norm": 2.1811389923095703, "learning_rate": 1.7289469041436824e-05, "loss": 0.3261, "step": 24761 }, { "epoch": 4.042120729766132, "grad_norm": 1.9623682498931885, "learning_rate": 1.728925180131904e-05, "loss": 0.3285, "step": 24762 }, { "epoch": 4.042283988408636, "grad_norm": 1.8438504934310913, "learning_rate": 1.7289034553860983e-05, "loss": 0.392, "step": 24763 }, { "epoch": 4.042447247051141, "grad_norm": 1.8615411520004272, "learning_rate": 1.7288817299062864e-05, "loss": 0.4135, "step": 24764 }, { "epoch": 4.042610505693645, "grad_norm": 2.1381888389587402, "learning_rate": 1.7288600036924913e-05, "loss": 0.3817, "step": 24765 }, { "epoch": 4.042773764336149, "grad_norm": 2.6915080547332764, "learning_rate": 1.7288382767447344e-05, "loss": 0.4407, "step": 24766 }, { "epoch": 4.042937022978654, "grad_norm": 1.994687795639038, "learning_rate": 1.728816549063037e-05, "loss": 0.3301, "step": 24767 }, { "epoch": 4.043100281621158, "grad_norm": 1.7807897329330444, "learning_rate": 1.728794820647422e-05, "loss": 0.3496, "step": 24768 }, { "epoch": 4.043263540263663, "grad_norm": 2.2903904914855957, "learning_rate": 1.7287730914979107e-05, "loss": 0.4108, "step": 24769 }, { "epoch": 4.043426798906167, "grad_norm": 1.821264624595642, "learning_rate": 1.728751361614525e-05, "loss": 0.3353, "step": 24770 }, { "epoch": 4.043590057548672, "grad_norm": 2.886610746383667, "learning_rate": 1.728729630997287e-05, "loss": 0.3988, "step": 24771 }, { "epoch": 4.043753316191176, "grad_norm": 2.1982431411743164, "learning_rate": 1.7287078996462186e-05, "loss": 0.3957, "step": 24772 }, { "epoch": 4.0439165748336805, "grad_norm": 2.406987190246582, "learning_rate": 1.7286861675613413e-05, "loss": 0.3116, "step": 24773 }, { "epoch": 4.044079833476185, "grad_norm": 2.291510820388794, "learning_rate": 1.7286644347426774e-05, "loss": 0.3858, "step": 24774 }, { "epoch": 4.044243092118689, "grad_norm": 1.7403621673583984, "learning_rate": 1.7286427011902485e-05, "loss": 0.3136, "step": 24775 }, { "epoch": 4.044406350761194, "grad_norm": 2.006904125213623, "learning_rate": 1.7286209669040766e-05, "loss": 0.3961, "step": 24776 }, { "epoch": 4.044569609403698, "grad_norm": 2.53971266746521, "learning_rate": 1.7285992318841834e-05, "loss": 0.3685, "step": 24777 }, { "epoch": 4.044732868046202, "grad_norm": 2.0507779121398926, "learning_rate": 1.7285774961305913e-05, "loss": 0.3705, "step": 24778 }, { "epoch": 4.044896126688706, "grad_norm": 1.7721582651138306, "learning_rate": 1.7285557596433217e-05, "loss": 0.3177, "step": 24779 }, { "epoch": 4.045059385331211, "grad_norm": 2.0602402687072754, "learning_rate": 1.7285340224223965e-05, "loss": 0.3402, "step": 24780 }, { "epoch": 4.045222643973715, "grad_norm": 1.7405812740325928, "learning_rate": 1.728512284467838e-05, "loss": 0.3311, "step": 24781 }, { "epoch": 4.0453859026162196, "grad_norm": 2.5344505310058594, "learning_rate": 1.7284905457796678e-05, "loss": 0.4188, "step": 24782 }, { "epoch": 4.045549161258724, "grad_norm": 1.6988818645477295, "learning_rate": 1.7284688063579077e-05, "loss": 0.3388, "step": 24783 }, { "epoch": 4.045712419901228, "grad_norm": 1.701586127281189, "learning_rate": 1.7284470662025795e-05, "loss": 0.3197, "step": 24784 }, { "epoch": 4.045875678543733, "grad_norm": 2.8404228687286377, "learning_rate": 1.7284253253137055e-05, "loss": 0.4484, "step": 24785 }, { "epoch": 4.046038937186237, "grad_norm": 5.009791851043701, "learning_rate": 1.7284035836913074e-05, "loss": 0.4244, "step": 24786 }, { "epoch": 4.046202195828742, "grad_norm": 2.340726137161255, "learning_rate": 1.728381841335407e-05, "loss": 0.383, "step": 24787 }, { "epoch": 4.046365454471246, "grad_norm": 2.102886199951172, "learning_rate": 1.7283600982460266e-05, "loss": 0.4119, "step": 24788 }, { "epoch": 4.046528713113751, "grad_norm": 1.7743016481399536, "learning_rate": 1.7283383544231874e-05, "loss": 0.3406, "step": 24789 }, { "epoch": 4.046691971756255, "grad_norm": 2.4615986347198486, "learning_rate": 1.7283166098669118e-05, "loss": 0.3937, "step": 24790 }, { "epoch": 4.0468552303987595, "grad_norm": 2.014356851577759, "learning_rate": 1.7282948645772215e-05, "loss": 0.383, "step": 24791 }, { "epoch": 4.047018489041264, "grad_norm": 2.0650973320007324, "learning_rate": 1.7282731185541386e-05, "loss": 0.4327, "step": 24792 }, { "epoch": 4.047181747683768, "grad_norm": 1.7267487049102783, "learning_rate": 1.7282513717976847e-05, "loss": 0.3505, "step": 24793 }, { "epoch": 4.047345006326273, "grad_norm": 2.062802314758301, "learning_rate": 1.7282296243078823e-05, "loss": 0.3653, "step": 24794 }, { "epoch": 4.047508264968776, "grad_norm": 2.559295892715454, "learning_rate": 1.7282078760847523e-05, "loss": 0.4519, "step": 24795 }, { "epoch": 4.047671523611281, "grad_norm": 2.5510215759277344, "learning_rate": 1.7281861271283174e-05, "loss": 0.3731, "step": 24796 }, { "epoch": 4.047834782253785, "grad_norm": 1.7441152334213257, "learning_rate": 1.7281643774385993e-05, "loss": 0.3465, "step": 24797 }, { "epoch": 4.04799804089629, "grad_norm": 1.7466269731521606, "learning_rate": 1.7281426270156196e-05, "loss": 0.3386, "step": 24798 }, { "epoch": 4.048161299538794, "grad_norm": 1.8663915395736694, "learning_rate": 1.728120875859401e-05, "loss": 0.4158, "step": 24799 }, { "epoch": 4.0483245581812985, "grad_norm": 2.4010226726531982, "learning_rate": 1.7280991239699643e-05, "loss": 0.353, "step": 24800 }, { "epoch": 4.048487816823803, "grad_norm": 2.252702474594116, "learning_rate": 1.7280773713473322e-05, "loss": 0.4047, "step": 24801 }, { "epoch": 4.048651075466307, "grad_norm": 2.53143310546875, "learning_rate": 1.7280556179915265e-05, "loss": 0.4026, "step": 24802 }, { "epoch": 4.048814334108812, "grad_norm": 1.8167204856872559, "learning_rate": 1.7280338639025686e-05, "loss": 0.342, "step": 24803 }, { "epoch": 4.048977592751316, "grad_norm": 1.84067964553833, "learning_rate": 1.7280121090804813e-05, "loss": 0.357, "step": 24804 }, { "epoch": 4.049140851393821, "grad_norm": 2.9518728256225586, "learning_rate": 1.7279903535252857e-05, "loss": 0.4309, "step": 24805 }, { "epoch": 4.049304110036325, "grad_norm": 1.997244954109192, "learning_rate": 1.727968597237004e-05, "loss": 0.3564, "step": 24806 }, { "epoch": 4.04946736867883, "grad_norm": 1.8510701656341553, "learning_rate": 1.727946840215658e-05, "loss": 0.3805, "step": 24807 }, { "epoch": 4.049630627321334, "grad_norm": 2.0673961639404297, "learning_rate": 1.7279250824612702e-05, "loss": 0.3175, "step": 24808 }, { "epoch": 4.0497938859638385, "grad_norm": 2.2811694145202637, "learning_rate": 1.7279033239738617e-05, "loss": 0.3426, "step": 24809 }, { "epoch": 4.049957144606343, "grad_norm": 1.8980231285095215, "learning_rate": 1.727881564753455e-05, "loss": 0.3915, "step": 24810 }, { "epoch": 4.050120403248847, "grad_norm": 2.057246446609497, "learning_rate": 1.7278598048000715e-05, "loss": 0.3543, "step": 24811 }, { "epoch": 4.050283661891352, "grad_norm": 2.419642210006714, "learning_rate": 1.7278380441137333e-05, "loss": 0.3778, "step": 24812 }, { "epoch": 4.050446920533855, "grad_norm": 2.0714118480682373, "learning_rate": 1.7278162826944625e-05, "loss": 0.3751, "step": 24813 }, { "epoch": 4.05061017917636, "grad_norm": 1.8369131088256836, "learning_rate": 1.7277945205422812e-05, "loss": 0.3372, "step": 24814 }, { "epoch": 4.050773437818864, "grad_norm": 2.529573917388916, "learning_rate": 1.7277727576572108e-05, "loss": 0.4016, "step": 24815 }, { "epoch": 4.050936696461369, "grad_norm": 2.26982045173645, "learning_rate": 1.7277509940392732e-05, "loss": 0.4043, "step": 24816 }, { "epoch": 4.051099955103873, "grad_norm": 2.0788304805755615, "learning_rate": 1.7277292296884905e-05, "loss": 0.3667, "step": 24817 }, { "epoch": 4.0512632137463775, "grad_norm": 2.2905054092407227, "learning_rate": 1.727707464604885e-05, "loss": 0.4109, "step": 24818 }, { "epoch": 4.051426472388882, "grad_norm": 1.7574743032455444, "learning_rate": 1.7276856987884782e-05, "loss": 0.361, "step": 24819 }, { "epoch": 4.051589731031386, "grad_norm": 1.8098764419555664, "learning_rate": 1.727663932239292e-05, "loss": 0.3716, "step": 24820 }, { "epoch": 4.051752989673891, "grad_norm": 2.7594797611236572, "learning_rate": 1.7276421649573484e-05, "loss": 0.4355, "step": 24821 }, { "epoch": 4.051916248316395, "grad_norm": 1.9880701303482056, "learning_rate": 1.7276203969426695e-05, "loss": 0.3353, "step": 24822 }, { "epoch": 4.0520795069589, "grad_norm": 1.9498682022094727, "learning_rate": 1.727598628195277e-05, "loss": 0.3805, "step": 24823 }, { "epoch": 4.052242765601404, "grad_norm": 2.243654251098633, "learning_rate": 1.7275768587151927e-05, "loss": 0.433, "step": 24824 }, { "epoch": 4.052406024243909, "grad_norm": 2.0582962036132812, "learning_rate": 1.7275550885024385e-05, "loss": 0.3859, "step": 24825 }, { "epoch": 4.052569282886413, "grad_norm": 2.4037866592407227, "learning_rate": 1.727533317557037e-05, "loss": 0.4174, "step": 24826 }, { "epoch": 4.0527325415289175, "grad_norm": 2.454770565032959, "learning_rate": 1.7275115458790094e-05, "loss": 0.412, "step": 24827 }, { "epoch": 4.052895800171422, "grad_norm": 2.1050193309783936, "learning_rate": 1.727489773468378e-05, "loss": 0.3865, "step": 24828 }, { "epoch": 4.053059058813926, "grad_norm": 2.114612340927124, "learning_rate": 1.7274680003251645e-05, "loss": 0.3794, "step": 24829 }, { "epoch": 4.05322231745643, "grad_norm": 2.133859634399414, "learning_rate": 1.727446226449391e-05, "loss": 0.4052, "step": 24830 }, { "epoch": 4.053385576098934, "grad_norm": 1.902106523513794, "learning_rate": 1.7274244518410793e-05, "loss": 0.3487, "step": 24831 }, { "epoch": 4.053548834741439, "grad_norm": 2.162571907043457, "learning_rate": 1.727402676500251e-05, "loss": 0.4139, "step": 24832 }, { "epoch": 4.053712093383943, "grad_norm": 1.8209052085876465, "learning_rate": 1.727380900426929e-05, "loss": 0.3157, "step": 24833 }, { "epoch": 4.053875352026448, "grad_norm": 1.7274692058563232, "learning_rate": 1.7273591236211345e-05, "loss": 0.3445, "step": 24834 }, { "epoch": 4.054038610668952, "grad_norm": 1.9556397199630737, "learning_rate": 1.7273373460828894e-05, "loss": 0.3789, "step": 24835 }, { "epoch": 4.0542018693114565, "grad_norm": 2.013370990753174, "learning_rate": 1.7273155678122156e-05, "loss": 0.3708, "step": 24836 }, { "epoch": 4.054365127953961, "grad_norm": 2.0310893058776855, "learning_rate": 1.7272937888091355e-05, "loss": 0.3385, "step": 24837 }, { "epoch": 4.054528386596465, "grad_norm": 2.139716625213623, "learning_rate": 1.7272720090736707e-05, "loss": 0.4223, "step": 24838 }, { "epoch": 4.05469164523897, "grad_norm": 2.3322455883026123, "learning_rate": 1.727250228605843e-05, "loss": 0.4032, "step": 24839 }, { "epoch": 4.054854903881474, "grad_norm": 2.351181745529175, "learning_rate": 1.7272284474056748e-05, "loss": 0.4102, "step": 24840 }, { "epoch": 4.055018162523979, "grad_norm": 2.0578737258911133, "learning_rate": 1.7272066654731878e-05, "loss": 0.3906, "step": 24841 }, { "epoch": 4.055181421166483, "grad_norm": 2.4004430770874023, "learning_rate": 1.7271848828084035e-05, "loss": 0.6118, "step": 24842 }, { "epoch": 4.055344679808988, "grad_norm": 2.0039803981781006, "learning_rate": 1.7271630994113443e-05, "loss": 0.4042, "step": 24843 }, { "epoch": 4.055507938451492, "grad_norm": 2.3204150199890137, "learning_rate": 1.7271413152820323e-05, "loss": 0.4117, "step": 24844 }, { "epoch": 4.0556711970939965, "grad_norm": 1.863194227218628, "learning_rate": 1.727119530420489e-05, "loss": 0.3643, "step": 24845 }, { "epoch": 4.055834455736501, "grad_norm": 2.479419231414795, "learning_rate": 1.7270977448267364e-05, "loss": 0.4367, "step": 24846 }, { "epoch": 4.055997714379005, "grad_norm": 1.846832275390625, "learning_rate": 1.727075958500797e-05, "loss": 0.3453, "step": 24847 }, { "epoch": 4.056160973021509, "grad_norm": 1.7635730504989624, "learning_rate": 1.727054171442692e-05, "loss": 0.3613, "step": 24848 }, { "epoch": 4.056324231664013, "grad_norm": 1.881688117980957, "learning_rate": 1.7270323836524435e-05, "loss": 0.3662, "step": 24849 }, { "epoch": 4.056487490306518, "grad_norm": 2.299379348754883, "learning_rate": 1.727010595130074e-05, "loss": 0.4463, "step": 24850 }, { "epoch": 4.056650748949022, "grad_norm": 2.110992193222046, "learning_rate": 1.7269888058756046e-05, "loss": 0.3517, "step": 24851 }, { "epoch": 4.056814007591527, "grad_norm": 2.0924575328826904, "learning_rate": 1.726967015889058e-05, "loss": 0.431, "step": 24852 }, { "epoch": 4.056977266234031, "grad_norm": 1.7908846139907837, "learning_rate": 1.7269452251704554e-05, "loss": 0.3399, "step": 24853 }, { "epoch": 4.0571405248765355, "grad_norm": 2.1638429164886475, "learning_rate": 1.7269234337198196e-05, "loss": 0.4242, "step": 24854 }, { "epoch": 4.05730378351904, "grad_norm": 2.1716182231903076, "learning_rate": 1.7269016415371717e-05, "loss": 0.4266, "step": 24855 }, { "epoch": 4.057467042161544, "grad_norm": 1.7898828983306885, "learning_rate": 1.7268798486225346e-05, "loss": 0.3402, "step": 24856 }, { "epoch": 4.057630300804049, "grad_norm": 2.0002665519714355, "learning_rate": 1.7268580549759292e-05, "loss": 0.4194, "step": 24857 }, { "epoch": 4.057793559446553, "grad_norm": 2.1119282245635986, "learning_rate": 1.7268362605973782e-05, "loss": 0.3991, "step": 24858 }, { "epoch": 4.057956818089058, "grad_norm": 2.190427303314209, "learning_rate": 1.726814465486903e-05, "loss": 0.3923, "step": 24859 }, { "epoch": 4.058120076731562, "grad_norm": 1.7605504989624023, "learning_rate": 1.7267926696445257e-05, "loss": 0.3311, "step": 24860 }, { "epoch": 4.058283335374067, "grad_norm": 1.6075176000595093, "learning_rate": 1.726770873070269e-05, "loss": 0.3384, "step": 24861 }, { "epoch": 4.058446594016571, "grad_norm": 2.516188144683838, "learning_rate": 1.7267490757641535e-05, "loss": 0.441, "step": 24862 }, { "epoch": 4.0586098526590755, "grad_norm": 1.9389798641204834, "learning_rate": 1.7267272777262027e-05, "loss": 0.3536, "step": 24863 }, { "epoch": 4.05877311130158, "grad_norm": 1.8702726364135742, "learning_rate": 1.726705478956437e-05, "loss": 0.3243, "step": 24864 }, { "epoch": 4.058936369944084, "grad_norm": 2.1207332611083984, "learning_rate": 1.7266836794548794e-05, "loss": 0.3536, "step": 24865 }, { "epoch": 4.059099628586588, "grad_norm": 2.313127040863037, "learning_rate": 1.7266618792215514e-05, "loss": 0.4451, "step": 24866 }, { "epoch": 4.059262887229092, "grad_norm": 2.3276233673095703, "learning_rate": 1.7266400782564753e-05, "loss": 0.3704, "step": 24867 }, { "epoch": 4.059426145871597, "grad_norm": 2.6542656421661377, "learning_rate": 1.7266182765596725e-05, "loss": 0.4514, "step": 24868 }, { "epoch": 4.059589404514101, "grad_norm": 2.2893824577331543, "learning_rate": 1.7265964741311657e-05, "loss": 0.393, "step": 24869 }, { "epoch": 4.059752663156606, "grad_norm": 2.0564498901367188, "learning_rate": 1.7265746709709762e-05, "loss": 0.3705, "step": 24870 }, { "epoch": 4.05991592179911, "grad_norm": 2.178050994873047, "learning_rate": 1.7265528670791262e-05, "loss": 0.3919, "step": 24871 }, { "epoch": 4.0600791804416145, "grad_norm": 2.0790395736694336, "learning_rate": 1.7265310624556375e-05, "loss": 0.4789, "step": 24872 }, { "epoch": 4.060242439084119, "grad_norm": 1.8969407081604004, "learning_rate": 1.7265092571005326e-05, "loss": 0.3347, "step": 24873 }, { "epoch": 4.060405697726623, "grad_norm": 2.364386796951294, "learning_rate": 1.726487451013833e-05, "loss": 0.4405, "step": 24874 }, { "epoch": 4.060568956369128, "grad_norm": 1.898151159286499, "learning_rate": 1.7264656441955602e-05, "loss": 0.3591, "step": 24875 }, { "epoch": 4.060732215011632, "grad_norm": 2.246483564376831, "learning_rate": 1.726443836645737e-05, "loss": 0.3828, "step": 24876 }, { "epoch": 4.060895473654137, "grad_norm": 2.005389928817749, "learning_rate": 1.726422028364385e-05, "loss": 0.3807, "step": 24877 }, { "epoch": 4.061058732296641, "grad_norm": 2.864320993423462, "learning_rate": 1.7264002193515266e-05, "loss": 0.4817, "step": 24878 }, { "epoch": 4.061221990939146, "grad_norm": 2.328063726425171, "learning_rate": 1.726378409607183e-05, "loss": 0.39, "step": 24879 }, { "epoch": 4.06138524958165, "grad_norm": 2.0169694423675537, "learning_rate": 1.7263565991313767e-05, "loss": 0.364, "step": 24880 }, { "epoch": 4.0615485082241545, "grad_norm": 2.118955373764038, "learning_rate": 1.7263347879241294e-05, "loss": 0.4199, "step": 24881 }, { "epoch": 4.061711766866659, "grad_norm": 2.1841537952423096, "learning_rate": 1.7263129759854633e-05, "loss": 0.3513, "step": 24882 }, { "epoch": 4.0618750255091625, "grad_norm": 1.9254682064056396, "learning_rate": 1.7262911633154e-05, "loss": 0.3607, "step": 24883 }, { "epoch": 4.062038284151667, "grad_norm": 1.8166953325271606, "learning_rate": 1.7262693499139617e-05, "loss": 0.3539, "step": 24884 }, { "epoch": 4.062201542794171, "grad_norm": 1.859126091003418, "learning_rate": 1.7262475357811706e-05, "loss": 0.3155, "step": 24885 }, { "epoch": 4.062364801436676, "grad_norm": 1.6969562768936157, "learning_rate": 1.7262257209170485e-05, "loss": 0.3129, "step": 24886 }, { "epoch": 4.06252806007918, "grad_norm": 2.660111427307129, "learning_rate": 1.7262039053216168e-05, "loss": 0.3758, "step": 24887 }, { "epoch": 4.062691318721685, "grad_norm": 1.8065404891967773, "learning_rate": 1.7261820889948982e-05, "loss": 0.3201, "step": 24888 }, { "epoch": 4.062854577364189, "grad_norm": 2.1819467544555664, "learning_rate": 1.7261602719369147e-05, "loss": 0.3433, "step": 24889 }, { "epoch": 4.0630178360066935, "grad_norm": 1.8432414531707764, "learning_rate": 1.7261384541476877e-05, "loss": 0.3378, "step": 24890 }, { "epoch": 4.063181094649198, "grad_norm": 2.0533628463745117, "learning_rate": 1.7261166356272395e-05, "loss": 0.3508, "step": 24891 }, { "epoch": 4.063344353291702, "grad_norm": 2.144139528274536, "learning_rate": 1.7260948163755918e-05, "loss": 0.3437, "step": 24892 }, { "epoch": 4.063507611934207, "grad_norm": 2.8164565563201904, "learning_rate": 1.7260729963927673e-05, "loss": 0.4398, "step": 24893 }, { "epoch": 4.063670870576711, "grad_norm": 1.9442954063415527, "learning_rate": 1.7260511756787874e-05, "loss": 0.3457, "step": 24894 }, { "epoch": 4.063834129219216, "grad_norm": 2.626861572265625, "learning_rate": 1.726029354233674e-05, "loss": 0.4474, "step": 24895 }, { "epoch": 4.06399738786172, "grad_norm": 2.0091888904571533, "learning_rate": 1.7260075320574492e-05, "loss": 0.3521, "step": 24896 }, { "epoch": 4.064160646504225, "grad_norm": 2.2273404598236084, "learning_rate": 1.725985709150135e-05, "loss": 0.3638, "step": 24897 }, { "epoch": 4.064323905146729, "grad_norm": 1.8602041006088257, "learning_rate": 1.7259638855117535e-05, "loss": 0.3554, "step": 24898 }, { "epoch": 4.0644871637892335, "grad_norm": 1.793160080909729, "learning_rate": 1.7259420611423266e-05, "loss": 0.3538, "step": 24899 }, { "epoch": 4.064650422431738, "grad_norm": 2.4451045989990234, "learning_rate": 1.7259202360418765e-05, "loss": 0.4549, "step": 24900 }, { "epoch": 4.0648136810742415, "grad_norm": 1.7887550592422485, "learning_rate": 1.7258984102104243e-05, "loss": 0.3741, "step": 24901 }, { "epoch": 4.064976939716746, "grad_norm": 1.916051983833313, "learning_rate": 1.7258765836479925e-05, "loss": 0.3159, "step": 24902 }, { "epoch": 4.06514019835925, "grad_norm": 2.156728982925415, "learning_rate": 1.725854756354604e-05, "loss": 0.4085, "step": 24903 }, { "epoch": 4.065303457001755, "grad_norm": 2.0999715328216553, "learning_rate": 1.7258329283302793e-05, "loss": 0.3844, "step": 24904 }, { "epoch": 4.065466715644259, "grad_norm": 2.3290774822235107, "learning_rate": 1.7258110995750415e-05, "loss": 0.3863, "step": 24905 }, { "epoch": 4.065629974286764, "grad_norm": 2.086743116378784, "learning_rate": 1.7257892700889116e-05, "loss": 0.3439, "step": 24906 }, { "epoch": 4.065793232929268, "grad_norm": 2.0123326778411865, "learning_rate": 1.725767439871912e-05, "loss": 0.3763, "step": 24907 }, { "epoch": 4.0659564915717725, "grad_norm": 1.9237772226333618, "learning_rate": 1.7257456089240655e-05, "loss": 0.343, "step": 24908 }, { "epoch": 4.066119750214277, "grad_norm": 1.7933930158615112, "learning_rate": 1.7257237772453928e-05, "loss": 0.3574, "step": 24909 }, { "epoch": 4.066283008856781, "grad_norm": 2.2475802898406982, "learning_rate": 1.7257019448359167e-05, "loss": 0.3964, "step": 24910 }, { "epoch": 4.066446267499286, "grad_norm": 1.4040933847427368, "learning_rate": 1.7256801116956588e-05, "loss": 0.3085, "step": 24911 }, { "epoch": 4.06660952614179, "grad_norm": 2.2686619758605957, "learning_rate": 1.7256582778246412e-05, "loss": 0.4202, "step": 24912 }, { "epoch": 4.066772784784295, "grad_norm": 2.0942487716674805, "learning_rate": 1.7256364432228863e-05, "loss": 0.3905, "step": 24913 }, { "epoch": 4.066936043426799, "grad_norm": 1.8199082612991333, "learning_rate": 1.7256146078904153e-05, "loss": 0.3228, "step": 24914 }, { "epoch": 4.067099302069304, "grad_norm": 2.030118942260742, "learning_rate": 1.7255927718272503e-05, "loss": 0.3666, "step": 24915 }, { "epoch": 4.067262560711808, "grad_norm": 2.1851580142974854, "learning_rate": 1.725570935033414e-05, "loss": 0.4395, "step": 24916 }, { "epoch": 4.0674258193543125, "grad_norm": 2.2017219066619873, "learning_rate": 1.7255490975089283e-05, "loss": 0.3948, "step": 24917 }, { "epoch": 4.067589077996816, "grad_norm": 1.9982240200042725, "learning_rate": 1.7255272592538145e-05, "loss": 0.3511, "step": 24918 }, { "epoch": 4.0677523366393205, "grad_norm": 2.302027940750122, "learning_rate": 1.7255054202680948e-05, "loss": 0.4289, "step": 24919 }, { "epoch": 4.067915595281825, "grad_norm": 1.9297579526901245, "learning_rate": 1.7254835805517913e-05, "loss": 0.3423, "step": 24920 }, { "epoch": 4.068078853924329, "grad_norm": 2.016429901123047, "learning_rate": 1.725461740104926e-05, "loss": 0.4012, "step": 24921 }, { "epoch": 4.068242112566834, "grad_norm": 2.00941801071167, "learning_rate": 1.725439898927521e-05, "loss": 0.3937, "step": 24922 }, { "epoch": 4.068405371209338, "grad_norm": 2.0969045162200928, "learning_rate": 1.7254180570195987e-05, "loss": 0.3756, "step": 24923 }, { "epoch": 4.068568629851843, "grad_norm": 1.6961246728897095, "learning_rate": 1.72539621438118e-05, "loss": 0.3253, "step": 24924 }, { "epoch": 4.068731888494347, "grad_norm": 2.0494918823242188, "learning_rate": 1.7253743710122877e-05, "loss": 0.3295, "step": 24925 }, { "epoch": 4.0688951471368515, "grad_norm": 1.867377519607544, "learning_rate": 1.7253525269129437e-05, "loss": 0.3841, "step": 24926 }, { "epoch": 4.069058405779356, "grad_norm": 1.9927972555160522, "learning_rate": 1.7253306820831697e-05, "loss": 0.3492, "step": 24927 }, { "epoch": 4.06922166442186, "grad_norm": 2.4677910804748535, "learning_rate": 1.7253088365229883e-05, "loss": 0.3946, "step": 24928 }, { "epoch": 4.069384923064365, "grad_norm": 1.9015475511550903, "learning_rate": 1.7252869902324206e-05, "loss": 0.3884, "step": 24929 }, { "epoch": 4.069548181706869, "grad_norm": 1.8782198429107666, "learning_rate": 1.7252651432114892e-05, "loss": 0.3446, "step": 24930 }, { "epoch": 4.069711440349374, "grad_norm": 2.171586751937866, "learning_rate": 1.7252432954602163e-05, "loss": 0.3795, "step": 24931 }, { "epoch": 4.069874698991878, "grad_norm": 1.7535488605499268, "learning_rate": 1.725221446978623e-05, "loss": 0.3366, "step": 24932 }, { "epoch": 4.070037957634383, "grad_norm": 2.222627639770508, "learning_rate": 1.7251995977667325e-05, "loss": 0.3943, "step": 24933 }, { "epoch": 4.070201216276887, "grad_norm": 2.3204362392425537, "learning_rate": 1.7251777478245658e-05, "loss": 0.3798, "step": 24934 }, { "epoch": 4.0703644749193915, "grad_norm": 1.8638291358947754, "learning_rate": 1.7251558971521455e-05, "loss": 0.3607, "step": 24935 }, { "epoch": 4.070527733561895, "grad_norm": 2.0233712196350098, "learning_rate": 1.7251340457494934e-05, "loss": 0.3541, "step": 24936 }, { "epoch": 4.0706909922043994, "grad_norm": 2.725470542907715, "learning_rate": 1.7251121936166315e-05, "loss": 0.4312, "step": 24937 }, { "epoch": 4.070854250846904, "grad_norm": 1.9300581216812134, "learning_rate": 1.7250903407535817e-05, "loss": 0.3749, "step": 24938 }, { "epoch": 4.071017509489408, "grad_norm": 2.6058452129364014, "learning_rate": 1.7250684871603662e-05, "loss": 0.3964, "step": 24939 }, { "epoch": 4.071180768131913, "grad_norm": 2.076022148132324, "learning_rate": 1.725046632837007e-05, "loss": 0.311, "step": 24940 }, { "epoch": 4.071344026774417, "grad_norm": 1.9389511346817017, "learning_rate": 1.725024777783526e-05, "loss": 0.3772, "step": 24941 }, { "epoch": 4.071507285416922, "grad_norm": 2.346208095550537, "learning_rate": 1.7250029219999453e-05, "loss": 0.411, "step": 24942 }, { "epoch": 4.071670544059426, "grad_norm": 2.600252151489258, "learning_rate": 1.724981065486287e-05, "loss": 0.4043, "step": 24943 }, { "epoch": 4.0718338027019305, "grad_norm": 2.006709337234497, "learning_rate": 1.7249592082425725e-05, "loss": 0.3488, "step": 24944 }, { "epoch": 4.071997061344435, "grad_norm": 2.2301032543182373, "learning_rate": 1.7249373502688244e-05, "loss": 0.403, "step": 24945 }, { "epoch": 4.072160319986939, "grad_norm": 2.317821979522705, "learning_rate": 1.7249154915650644e-05, "loss": 0.3742, "step": 24946 }, { "epoch": 4.072323578629444, "grad_norm": 2.7385220527648926, "learning_rate": 1.724893632131315e-05, "loss": 0.4432, "step": 24947 }, { "epoch": 4.072486837271948, "grad_norm": 1.921575903892517, "learning_rate": 1.7248717719675983e-05, "loss": 0.3975, "step": 24948 }, { "epoch": 4.072650095914453, "grad_norm": 1.9601876735687256, "learning_rate": 1.7248499110739353e-05, "loss": 0.3916, "step": 24949 }, { "epoch": 4.072813354556957, "grad_norm": 1.9393489360809326, "learning_rate": 1.724828049450349e-05, "loss": 0.3895, "step": 24950 }, { "epoch": 4.072976613199462, "grad_norm": 2.2744061946868896, "learning_rate": 1.7248061870968606e-05, "loss": 0.4465, "step": 24951 }, { "epoch": 4.073139871841966, "grad_norm": 1.7046661376953125, "learning_rate": 1.724784324013493e-05, "loss": 0.3295, "step": 24952 }, { "epoch": 4.0733031304844705, "grad_norm": 1.7933449745178223, "learning_rate": 1.7247624602002677e-05, "loss": 0.3565, "step": 24953 }, { "epoch": 4.073466389126974, "grad_norm": 2.1121954917907715, "learning_rate": 1.7247405956572065e-05, "loss": 0.395, "step": 24954 }, { "epoch": 4.0736296477694784, "grad_norm": 1.891863465309143, "learning_rate": 1.724718730384332e-05, "loss": 0.4066, "step": 24955 }, { "epoch": 4.073792906411983, "grad_norm": 2.6306638717651367, "learning_rate": 1.7246968643816657e-05, "loss": 0.44, "step": 24956 }, { "epoch": 4.073956165054487, "grad_norm": 1.7807729244232178, "learning_rate": 1.72467499764923e-05, "loss": 0.3508, "step": 24957 }, { "epoch": 4.074119423696992, "grad_norm": 1.8651620149612427, "learning_rate": 1.7246531301870467e-05, "loss": 0.3475, "step": 24958 }, { "epoch": 4.074282682339496, "grad_norm": 2.0970466136932373, "learning_rate": 1.724631261995138e-05, "loss": 0.4077, "step": 24959 }, { "epoch": 4.074445940982001, "grad_norm": 2.540329933166504, "learning_rate": 1.724609393073526e-05, "loss": 0.4559, "step": 24960 }, { "epoch": 4.074609199624505, "grad_norm": 1.9987214803695679, "learning_rate": 1.724587523422232e-05, "loss": 0.354, "step": 24961 }, { "epoch": 4.0747724582670095, "grad_norm": 2.0053343772888184, "learning_rate": 1.724565653041279e-05, "loss": 0.4237, "step": 24962 }, { "epoch": 4.074935716909514, "grad_norm": 2.715850830078125, "learning_rate": 1.7245437819306886e-05, "loss": 0.432, "step": 24963 }, { "epoch": 4.075098975552018, "grad_norm": 2.4086978435516357, "learning_rate": 1.7245219100904823e-05, "loss": 0.4847, "step": 24964 }, { "epoch": 4.075262234194523, "grad_norm": 1.8317116498947144, "learning_rate": 1.7245000375206832e-05, "loss": 0.3413, "step": 24965 }, { "epoch": 4.075425492837027, "grad_norm": 2.008922576904297, "learning_rate": 1.7244781642213123e-05, "loss": 0.3415, "step": 24966 }, { "epoch": 4.075588751479532, "grad_norm": 2.1344234943389893, "learning_rate": 1.7244562901923925e-05, "loss": 0.4071, "step": 24967 }, { "epoch": 4.075752010122036, "grad_norm": 1.9832804203033447, "learning_rate": 1.7244344154339453e-05, "loss": 0.362, "step": 24968 }, { "epoch": 4.075915268764541, "grad_norm": 1.7619253396987915, "learning_rate": 1.7244125399459926e-05, "loss": 0.3563, "step": 24969 }, { "epoch": 4.076078527407045, "grad_norm": 1.8672451972961426, "learning_rate": 1.724390663728557e-05, "loss": 0.3528, "step": 24970 }, { "epoch": 4.076241786049549, "grad_norm": 2.19864821434021, "learning_rate": 1.7243687867816598e-05, "loss": 0.4504, "step": 24971 }, { "epoch": 4.076405044692053, "grad_norm": 2.0363051891326904, "learning_rate": 1.7243469091053237e-05, "loss": 0.3666, "step": 24972 }, { "epoch": 4.076568303334557, "grad_norm": 2.550018548965454, "learning_rate": 1.7243250306995702e-05, "loss": 0.3967, "step": 24973 }, { "epoch": 4.076731561977062, "grad_norm": 2.051201343536377, "learning_rate": 1.724303151564422e-05, "loss": 0.4129, "step": 24974 }, { "epoch": 4.076894820619566, "grad_norm": 1.8301246166229248, "learning_rate": 1.7242812716999004e-05, "loss": 0.3809, "step": 24975 }, { "epoch": 4.077058079262071, "grad_norm": 1.7928414344787598, "learning_rate": 1.7242593911060277e-05, "loss": 0.3361, "step": 24976 }, { "epoch": 4.077221337904575, "grad_norm": 2.0786821842193604, "learning_rate": 1.724237509782826e-05, "loss": 0.4406, "step": 24977 }, { "epoch": 4.07738459654708, "grad_norm": 1.9438196420669556, "learning_rate": 1.7242156277303176e-05, "loss": 0.3713, "step": 24978 }, { "epoch": 4.077547855189584, "grad_norm": 2.3515024185180664, "learning_rate": 1.724193744948524e-05, "loss": 0.4121, "step": 24979 }, { "epoch": 4.0777111138320885, "grad_norm": 2.08453106880188, "learning_rate": 1.7241718614374678e-05, "loss": 0.3568, "step": 24980 }, { "epoch": 4.077874372474593, "grad_norm": 2.0954973697662354, "learning_rate": 1.7241499771971704e-05, "loss": 0.3627, "step": 24981 }, { "epoch": 4.078037631117097, "grad_norm": 2.229264974594116, "learning_rate": 1.7241280922276543e-05, "loss": 0.4248, "step": 24982 }, { "epoch": 4.078200889759602, "grad_norm": 2.2371015548706055, "learning_rate": 1.7241062065289414e-05, "loss": 0.4358, "step": 24983 }, { "epoch": 4.078364148402106, "grad_norm": 1.995811939239502, "learning_rate": 1.7240843201010538e-05, "loss": 0.4015, "step": 24984 }, { "epoch": 4.078527407044611, "grad_norm": 1.9160773754119873, "learning_rate": 1.7240624329440134e-05, "loss": 0.3376, "step": 24985 }, { "epoch": 4.078690665687115, "grad_norm": 2.040519952774048, "learning_rate": 1.7240405450578423e-05, "loss": 0.3925, "step": 24986 }, { "epoch": 4.07885392432962, "grad_norm": 2.044238567352295, "learning_rate": 1.7240186564425625e-05, "loss": 0.356, "step": 24987 }, { "epoch": 4.079017182972124, "grad_norm": 2.301969528198242, "learning_rate": 1.7239967670981964e-05, "loss": 0.4549, "step": 24988 }, { "epoch": 4.079180441614628, "grad_norm": 2.185957670211792, "learning_rate": 1.7239748770247654e-05, "loss": 0.3724, "step": 24989 }, { "epoch": 4.079343700257132, "grad_norm": 2.0429019927978516, "learning_rate": 1.723952986222292e-05, "loss": 0.4024, "step": 24990 }, { "epoch": 4.079506958899636, "grad_norm": 1.683681607246399, "learning_rate": 1.723931094690798e-05, "loss": 0.3087, "step": 24991 }, { "epoch": 4.079670217542141, "grad_norm": 1.8554762601852417, "learning_rate": 1.723909202430306e-05, "loss": 0.3511, "step": 24992 }, { "epoch": 4.079833476184645, "grad_norm": 1.9679067134857178, "learning_rate": 1.7238873094408373e-05, "loss": 0.3618, "step": 24993 }, { "epoch": 4.07999673482715, "grad_norm": 2.1518077850341797, "learning_rate": 1.7238654157224144e-05, "loss": 0.4727, "step": 24994 }, { "epoch": 4.080159993469654, "grad_norm": 1.9059648513793945, "learning_rate": 1.723843521275059e-05, "loss": 0.3164, "step": 24995 }, { "epoch": 4.080323252112159, "grad_norm": 2.139193296432495, "learning_rate": 1.7238216260987935e-05, "loss": 0.4101, "step": 24996 }, { "epoch": 4.080486510754663, "grad_norm": 2.4150540828704834, "learning_rate": 1.72379973019364e-05, "loss": 0.3707, "step": 24997 }, { "epoch": 4.0806497693971675, "grad_norm": 2.0461690425872803, "learning_rate": 1.72377783355962e-05, "loss": 0.3794, "step": 24998 }, { "epoch": 4.080813028039672, "grad_norm": 2.313077211380005, "learning_rate": 1.7237559361967563e-05, "loss": 0.3826, "step": 24999 }, { "epoch": 4.080976286682176, "grad_norm": 1.8375853300094604, "learning_rate": 1.72373403810507e-05, "loss": 0.3719, "step": 25000 }, { "epoch": 4.081139545324681, "grad_norm": 1.5931862592697144, "learning_rate": 1.7237121392845843e-05, "loss": 0.2914, "step": 25001 }, { "epoch": 4.081302803967185, "grad_norm": 2.3382160663604736, "learning_rate": 1.7236902397353204e-05, "loss": 0.3662, "step": 25002 }, { "epoch": 4.08146606260969, "grad_norm": 2.158010959625244, "learning_rate": 1.723668339457301e-05, "loss": 0.3751, "step": 25003 }, { "epoch": 4.081629321252194, "grad_norm": 2.395658016204834, "learning_rate": 1.7236464384505475e-05, "loss": 0.3663, "step": 25004 }, { "epoch": 4.081792579894699, "grad_norm": 1.962337851524353, "learning_rate": 1.7236245367150818e-05, "loss": 0.3748, "step": 25005 }, { "epoch": 4.081955838537203, "grad_norm": 1.9032280445098877, "learning_rate": 1.723602634250927e-05, "loss": 0.3856, "step": 25006 }, { "epoch": 4.082119097179707, "grad_norm": 2.0551795959472656, "learning_rate": 1.7235807310581043e-05, "loss": 0.3656, "step": 25007 }, { "epoch": 4.082282355822211, "grad_norm": 2.0258169174194336, "learning_rate": 1.723558827136636e-05, "loss": 0.3498, "step": 25008 }, { "epoch": 4.082445614464715, "grad_norm": 2.267683506011963, "learning_rate": 1.723536922486544e-05, "loss": 0.4107, "step": 25009 }, { "epoch": 4.08260887310722, "grad_norm": 1.745848536491394, "learning_rate": 1.7235150171078508e-05, "loss": 0.3007, "step": 25010 }, { "epoch": 4.082772131749724, "grad_norm": 1.8379435539245605, "learning_rate": 1.7234931110005783e-05, "loss": 0.3255, "step": 25011 }, { "epoch": 4.082935390392229, "grad_norm": 1.7178928852081299, "learning_rate": 1.7234712041647477e-05, "loss": 0.3504, "step": 25012 }, { "epoch": 4.083098649034733, "grad_norm": 2.738872528076172, "learning_rate": 1.7234492966003828e-05, "loss": 0.4256, "step": 25013 }, { "epoch": 4.083261907677238, "grad_norm": 1.9682362079620361, "learning_rate": 1.7234273883075038e-05, "loss": 0.3621, "step": 25014 }, { "epoch": 4.083425166319742, "grad_norm": 2.2125937938690186, "learning_rate": 1.723405479286134e-05, "loss": 0.3621, "step": 25015 }, { "epoch": 4.0835884249622465, "grad_norm": 2.603576898574829, "learning_rate": 1.723383569536295e-05, "loss": 0.4756, "step": 25016 }, { "epoch": 4.083751683604751, "grad_norm": 2.2420976161956787, "learning_rate": 1.723361659058009e-05, "loss": 0.4412, "step": 25017 }, { "epoch": 4.083914942247255, "grad_norm": 1.7094924449920654, "learning_rate": 1.723339747851298e-05, "loss": 0.3248, "step": 25018 }, { "epoch": 4.08407820088976, "grad_norm": 2.0897915363311768, "learning_rate": 1.7233178359161837e-05, "loss": 0.3748, "step": 25019 }, { "epoch": 4.084241459532264, "grad_norm": 2.479527711868286, "learning_rate": 1.723295923252689e-05, "loss": 0.3768, "step": 25020 }, { "epoch": 4.084404718174769, "grad_norm": 1.9956508874893188, "learning_rate": 1.7232740098608355e-05, "loss": 0.3934, "step": 25021 }, { "epoch": 4.084567976817273, "grad_norm": 1.8879554271697998, "learning_rate": 1.723252095740645e-05, "loss": 0.3366, "step": 25022 }, { "epoch": 4.084731235459778, "grad_norm": 2.3619070053100586, "learning_rate": 1.7232301808921398e-05, "loss": 0.3895, "step": 25023 }, { "epoch": 4.084894494102281, "grad_norm": 1.8417766094207764, "learning_rate": 1.7232082653153422e-05, "loss": 0.4146, "step": 25024 }, { "epoch": 4.0850577527447856, "grad_norm": 1.6038168668746948, "learning_rate": 1.723186349010274e-05, "loss": 0.3195, "step": 25025 }, { "epoch": 4.08522101138729, "grad_norm": 1.9812475442886353, "learning_rate": 1.7231644319769574e-05, "loss": 0.3445, "step": 25026 }, { "epoch": 4.085384270029794, "grad_norm": 2.00785231590271, "learning_rate": 1.7231425142154142e-05, "loss": 0.3626, "step": 25027 }, { "epoch": 4.085547528672299, "grad_norm": 1.940775990486145, "learning_rate": 1.723120595725667e-05, "loss": 0.3243, "step": 25028 }, { "epoch": 4.085710787314803, "grad_norm": 1.8214948177337646, "learning_rate": 1.7230986765077373e-05, "loss": 0.3623, "step": 25029 }, { "epoch": 4.085874045957308, "grad_norm": 1.91639244556427, "learning_rate": 1.7230767565616477e-05, "loss": 0.337, "step": 25030 }, { "epoch": 4.086037304599812, "grad_norm": 1.97590970993042, "learning_rate": 1.7230548358874194e-05, "loss": 0.3507, "step": 25031 }, { "epoch": 4.086200563242317, "grad_norm": 1.9283443689346313, "learning_rate": 1.7230329144850758e-05, "loss": 0.3486, "step": 25032 }, { "epoch": 4.086363821884821, "grad_norm": 2.1988790035247803, "learning_rate": 1.7230109923546376e-05, "loss": 0.4054, "step": 25033 }, { "epoch": 4.0865270805273255, "grad_norm": 2.400758981704712, "learning_rate": 1.722989069496128e-05, "loss": 0.4165, "step": 25034 }, { "epoch": 4.08669033916983, "grad_norm": 2.020846366882324, "learning_rate": 1.7229671459095682e-05, "loss": 0.3589, "step": 25035 }, { "epoch": 4.086853597812334, "grad_norm": 1.9956165552139282, "learning_rate": 1.722945221594981e-05, "loss": 0.3644, "step": 25036 }, { "epoch": 4.087016856454839, "grad_norm": 2.445406436920166, "learning_rate": 1.722923296552388e-05, "loss": 0.4328, "step": 25037 }, { "epoch": 4.087180115097343, "grad_norm": 2.1012508869171143, "learning_rate": 1.7229013707818115e-05, "loss": 0.3686, "step": 25038 }, { "epoch": 4.087343373739848, "grad_norm": 2.1535089015960693, "learning_rate": 1.722879444283274e-05, "loss": 0.3697, "step": 25039 }, { "epoch": 4.087506632382352, "grad_norm": 2.4596452713012695, "learning_rate": 1.722857517056796e-05, "loss": 0.3555, "step": 25040 }, { "epoch": 4.087669891024857, "grad_norm": 2.321694850921631, "learning_rate": 1.7228355891024016e-05, "loss": 0.4088, "step": 25041 }, { "epoch": 4.08783314966736, "grad_norm": 2.0178322792053223, "learning_rate": 1.7228136604201114e-05, "loss": 0.3307, "step": 25042 }, { "epoch": 4.0879964083098645, "grad_norm": 2.3728058338165283, "learning_rate": 1.7227917310099485e-05, "loss": 0.3705, "step": 25043 }, { "epoch": 4.088159666952369, "grad_norm": 2.2711877822875977, "learning_rate": 1.7227698008719343e-05, "loss": 0.4169, "step": 25044 }, { "epoch": 4.088322925594873, "grad_norm": 1.9586373567581177, "learning_rate": 1.722747870006091e-05, "loss": 0.3473, "step": 25045 }, { "epoch": 4.088486184237378, "grad_norm": 1.831554651260376, "learning_rate": 1.7227259384124408e-05, "loss": 0.3339, "step": 25046 }, { "epoch": 4.088649442879882, "grad_norm": 1.6377136707305908, "learning_rate": 1.7227040060910057e-05, "loss": 0.3438, "step": 25047 }, { "epoch": 4.088812701522387, "grad_norm": 1.934842824935913, "learning_rate": 1.722682073041808e-05, "loss": 0.3739, "step": 25048 }, { "epoch": 4.088975960164891, "grad_norm": 1.7281274795532227, "learning_rate": 1.7226601392648697e-05, "loss": 0.3106, "step": 25049 }, { "epoch": 4.089139218807396, "grad_norm": 1.6250110864639282, "learning_rate": 1.722638204760213e-05, "loss": 0.3084, "step": 25050 }, { "epoch": 4.0893024774499, "grad_norm": 1.858460545539856, "learning_rate": 1.7226162695278596e-05, "loss": 0.3332, "step": 25051 }, { "epoch": 4.0894657360924045, "grad_norm": 1.836417555809021, "learning_rate": 1.7225943335678318e-05, "loss": 0.2969, "step": 25052 }, { "epoch": 4.089628994734909, "grad_norm": 2.0809576511383057, "learning_rate": 1.7225723968801516e-05, "loss": 0.3715, "step": 25053 }, { "epoch": 4.089792253377413, "grad_norm": 2.2319254875183105, "learning_rate": 1.7225504594648412e-05, "loss": 0.3948, "step": 25054 }, { "epoch": 4.089955512019918, "grad_norm": 1.902354121208191, "learning_rate": 1.7225285213219228e-05, "loss": 0.3028, "step": 25055 }, { "epoch": 4.090118770662422, "grad_norm": 2.4332783222198486, "learning_rate": 1.7225065824514183e-05, "loss": 0.3842, "step": 25056 }, { "epoch": 4.090282029304927, "grad_norm": 2.2251291275024414, "learning_rate": 1.7224846428533498e-05, "loss": 0.4249, "step": 25057 }, { "epoch": 4.090445287947431, "grad_norm": 2.211780071258545, "learning_rate": 1.72246270252774e-05, "loss": 0.3952, "step": 25058 }, { "epoch": 4.090608546589935, "grad_norm": 2.151787757873535, "learning_rate": 1.7224407614746098e-05, "loss": 0.3488, "step": 25059 }, { "epoch": 4.090771805232439, "grad_norm": 2.4580893516540527, "learning_rate": 1.722418819693982e-05, "loss": 0.4378, "step": 25060 }, { "epoch": 4.0909350638749435, "grad_norm": 1.7865103483200073, "learning_rate": 1.722396877185879e-05, "loss": 0.361, "step": 25061 }, { "epoch": 4.091098322517448, "grad_norm": 1.8272013664245605, "learning_rate": 1.7223749339503223e-05, "loss": 0.3338, "step": 25062 }, { "epoch": 4.091261581159952, "grad_norm": 2.0591540336608887, "learning_rate": 1.7223529899873344e-05, "loss": 0.3284, "step": 25063 }, { "epoch": 4.091424839802457, "grad_norm": 2.1701793670654297, "learning_rate": 1.722331045296937e-05, "loss": 0.4267, "step": 25064 }, { "epoch": 4.091588098444961, "grad_norm": 1.755298376083374, "learning_rate": 1.7223090998791524e-05, "loss": 0.3497, "step": 25065 }, { "epoch": 4.091751357087466, "grad_norm": 2.1619884967803955, "learning_rate": 1.722287153734003e-05, "loss": 0.353, "step": 25066 }, { "epoch": 4.09191461572997, "grad_norm": 2.077294111251831, "learning_rate": 1.7222652068615106e-05, "loss": 0.3492, "step": 25067 }, { "epoch": 4.092077874372475, "grad_norm": 1.9058653116226196, "learning_rate": 1.722243259261697e-05, "loss": 0.3625, "step": 25068 }, { "epoch": 4.092241133014979, "grad_norm": 1.983702540397644, "learning_rate": 1.7222213109345846e-05, "loss": 0.4222, "step": 25069 }, { "epoch": 4.0924043916574835, "grad_norm": 1.6303913593292236, "learning_rate": 1.722199361880196e-05, "loss": 0.3461, "step": 25070 }, { "epoch": 4.092567650299988, "grad_norm": 2.794731855392456, "learning_rate": 1.7221774120985525e-05, "loss": 0.4039, "step": 25071 }, { "epoch": 4.092730908942492, "grad_norm": 1.8530166149139404, "learning_rate": 1.7221554615896767e-05, "loss": 0.3431, "step": 25072 }, { "epoch": 4.092894167584997, "grad_norm": 2.152099847793579, "learning_rate": 1.7221335103535906e-05, "loss": 0.3768, "step": 25073 }, { "epoch": 4.093057426227501, "grad_norm": 2.0691497325897217, "learning_rate": 1.722111558390316e-05, "loss": 0.3818, "step": 25074 }, { "epoch": 4.093220684870006, "grad_norm": 1.7514393329620361, "learning_rate": 1.7220896056998753e-05, "loss": 0.3023, "step": 25075 }, { "epoch": 4.09338394351251, "grad_norm": 1.918279767036438, "learning_rate": 1.7220676522822905e-05, "loss": 0.3725, "step": 25076 }, { "epoch": 4.093547202155014, "grad_norm": 2.3174750804901123, "learning_rate": 1.722045698137584e-05, "loss": 0.3751, "step": 25077 }, { "epoch": 4.093710460797518, "grad_norm": 1.4996663331985474, "learning_rate": 1.7220237432657775e-05, "loss": 0.2994, "step": 25078 }, { "epoch": 4.0938737194400225, "grad_norm": 2.0937576293945312, "learning_rate": 1.7220017876668934e-05, "loss": 0.4006, "step": 25079 }, { "epoch": 4.094036978082527, "grad_norm": 2.0138118267059326, "learning_rate": 1.7219798313409538e-05, "loss": 0.3688, "step": 25080 }, { "epoch": 4.094200236725031, "grad_norm": 1.9365736246109009, "learning_rate": 1.7219578742879804e-05, "loss": 0.398, "step": 25081 }, { "epoch": 4.094363495367536, "grad_norm": 2.161644697189331, "learning_rate": 1.721935916507996e-05, "loss": 0.3694, "step": 25082 }, { "epoch": 4.09452675401004, "grad_norm": 1.8957457542419434, "learning_rate": 1.721913958001022e-05, "loss": 0.3236, "step": 25083 }, { "epoch": 4.094690012652545, "grad_norm": 1.748063564300537, "learning_rate": 1.721891998767081e-05, "loss": 0.3456, "step": 25084 }, { "epoch": 4.094853271295049, "grad_norm": 1.6197854280471802, "learning_rate": 1.721870038806195e-05, "loss": 0.3367, "step": 25085 }, { "epoch": 4.095016529937554, "grad_norm": 2.045865535736084, "learning_rate": 1.7218480781183857e-05, "loss": 0.3543, "step": 25086 }, { "epoch": 4.095179788580058, "grad_norm": 2.6702535152435303, "learning_rate": 1.7218261167036758e-05, "loss": 0.4161, "step": 25087 }, { "epoch": 4.0953430472225625, "grad_norm": 2.1248879432678223, "learning_rate": 1.7218041545620873e-05, "loss": 0.3502, "step": 25088 }, { "epoch": 4.095506305865067, "grad_norm": 1.734336256980896, "learning_rate": 1.7217821916936422e-05, "loss": 0.3354, "step": 25089 }, { "epoch": 4.095669564507571, "grad_norm": 2.3195385932922363, "learning_rate": 1.7217602280983622e-05, "loss": 0.3832, "step": 25090 }, { "epoch": 4.095832823150076, "grad_norm": 2.371894121170044, "learning_rate": 1.7217382637762703e-05, "loss": 0.3721, "step": 25091 }, { "epoch": 4.09599608179258, "grad_norm": 2.4948151111602783, "learning_rate": 1.721716298727388e-05, "loss": 0.4174, "step": 25092 }, { "epoch": 4.096159340435085, "grad_norm": 1.743330478668213, "learning_rate": 1.7216943329517377e-05, "loss": 0.348, "step": 25093 }, { "epoch": 4.096322599077588, "grad_norm": 1.754714012145996, "learning_rate": 1.7216723664493414e-05, "loss": 0.3481, "step": 25094 }, { "epoch": 4.096485857720093, "grad_norm": 1.9079902172088623, "learning_rate": 1.721650399220221e-05, "loss": 0.3656, "step": 25095 }, { "epoch": 4.096649116362597, "grad_norm": 2.4600095748901367, "learning_rate": 1.721628431264399e-05, "loss": 0.4136, "step": 25096 }, { "epoch": 4.0968123750051015, "grad_norm": 2.5057523250579834, "learning_rate": 1.7216064625818974e-05, "loss": 0.4552, "step": 25097 }, { "epoch": 4.096975633647606, "grad_norm": 2.1979241371154785, "learning_rate": 1.721584493172738e-05, "loss": 0.3746, "step": 25098 }, { "epoch": 4.09713889229011, "grad_norm": 1.8577485084533691, "learning_rate": 1.7215625230369438e-05, "loss": 0.3605, "step": 25099 }, { "epoch": 4.097302150932615, "grad_norm": 1.7315596342086792, "learning_rate": 1.7215405521745358e-05, "loss": 0.3387, "step": 25100 }, { "epoch": 4.097465409575119, "grad_norm": 1.9493974447250366, "learning_rate": 1.721518580585537e-05, "loss": 0.4019, "step": 25101 }, { "epoch": 4.097628668217624, "grad_norm": 2.0684003829956055, "learning_rate": 1.7214966082699686e-05, "loss": 0.361, "step": 25102 }, { "epoch": 4.097791926860128, "grad_norm": 1.9021624326705933, "learning_rate": 1.721474635227854e-05, "loss": 0.3336, "step": 25103 }, { "epoch": 4.097955185502633, "grad_norm": 2.0287506580352783, "learning_rate": 1.7214526614592138e-05, "loss": 0.4175, "step": 25104 }, { "epoch": 4.098118444145137, "grad_norm": 1.8469064235687256, "learning_rate": 1.721430686964072e-05, "loss": 0.355, "step": 25105 }, { "epoch": 4.0982817027876415, "grad_norm": 2.4125208854675293, "learning_rate": 1.721408711742449e-05, "loss": 0.4875, "step": 25106 }, { "epoch": 4.098444961430146, "grad_norm": 1.6568846702575684, "learning_rate": 1.7213867357943677e-05, "loss": 0.346, "step": 25107 }, { "epoch": 4.09860822007265, "grad_norm": 2.0248961448669434, "learning_rate": 1.7213647591198503e-05, "loss": 0.3744, "step": 25108 }, { "epoch": 4.098771478715155, "grad_norm": 1.7717660665512085, "learning_rate": 1.721342781718919e-05, "loss": 0.3444, "step": 25109 }, { "epoch": 4.098934737357659, "grad_norm": 1.6337199211120605, "learning_rate": 1.721320803591595e-05, "loss": 0.314, "step": 25110 }, { "epoch": 4.099097996000164, "grad_norm": 2.3689234256744385, "learning_rate": 1.7212988247379018e-05, "loss": 0.4297, "step": 25111 }, { "epoch": 4.099261254642667, "grad_norm": 2.481860399246216, "learning_rate": 1.721276845157861e-05, "loss": 0.3891, "step": 25112 }, { "epoch": 4.099424513285172, "grad_norm": 2.320225954055786, "learning_rate": 1.721254864851494e-05, "loss": 0.4292, "step": 25113 }, { "epoch": 4.099587771927676, "grad_norm": 2.1493828296661377, "learning_rate": 1.7212328838188237e-05, "loss": 0.4029, "step": 25114 }, { "epoch": 4.0997510305701805, "grad_norm": 2.4779751300811768, "learning_rate": 1.7212109020598722e-05, "loss": 0.4385, "step": 25115 }, { "epoch": 4.099914289212685, "grad_norm": 2.1802570819854736, "learning_rate": 1.7211889195746617e-05, "loss": 0.3559, "step": 25116 }, { "epoch": 4.100077547855189, "grad_norm": 1.9444886445999146, "learning_rate": 1.7211669363632137e-05, "loss": 0.4043, "step": 25117 }, { "epoch": 4.100240806497694, "grad_norm": 1.7789685726165771, "learning_rate": 1.721144952425551e-05, "loss": 0.336, "step": 25118 }, { "epoch": 4.100404065140198, "grad_norm": 2.1575446128845215, "learning_rate": 1.7211229677616957e-05, "loss": 0.4325, "step": 25119 }, { "epoch": 4.100567323782703, "grad_norm": 2.9046552181243896, "learning_rate": 1.7211009823716695e-05, "loss": 0.4385, "step": 25120 }, { "epoch": 4.100730582425207, "grad_norm": 2.125131130218506, "learning_rate": 1.721078996255495e-05, "loss": 0.4058, "step": 25121 }, { "epoch": 4.100893841067712, "grad_norm": 2.0659334659576416, "learning_rate": 1.721057009413194e-05, "loss": 0.3996, "step": 25122 }, { "epoch": 4.101057099710216, "grad_norm": 2.2152554988861084, "learning_rate": 1.721035021844789e-05, "loss": 0.444, "step": 25123 }, { "epoch": 4.1012203583527205, "grad_norm": 2.4139115810394287, "learning_rate": 1.7210130335503013e-05, "loss": 0.4283, "step": 25124 }, { "epoch": 4.101383616995225, "grad_norm": 1.9138742685317993, "learning_rate": 1.7209910445297543e-05, "loss": 0.3889, "step": 25125 }, { "epoch": 4.101546875637729, "grad_norm": 1.8021811246871948, "learning_rate": 1.7209690547831696e-05, "loss": 0.3358, "step": 25126 }, { "epoch": 4.101710134280234, "grad_norm": 1.8763912916183472, "learning_rate": 1.720947064310569e-05, "loss": 0.3454, "step": 25127 }, { "epoch": 4.101873392922738, "grad_norm": 2.27821683883667, "learning_rate": 1.7209250731119746e-05, "loss": 0.4486, "step": 25128 }, { "epoch": 4.102036651565243, "grad_norm": 1.9154284000396729, "learning_rate": 1.7209030811874093e-05, "loss": 0.3515, "step": 25129 }, { "epoch": 4.102199910207746, "grad_norm": 2.3683011531829834, "learning_rate": 1.7208810885368948e-05, "loss": 0.3224, "step": 25130 }, { "epoch": 4.102363168850251, "grad_norm": 1.9582182168960571, "learning_rate": 1.720859095160453e-05, "loss": 0.4159, "step": 25131 }, { "epoch": 4.102526427492755, "grad_norm": 2.3163483142852783, "learning_rate": 1.720837101058106e-05, "loss": 0.3991, "step": 25132 }, { "epoch": 4.1026896861352595, "grad_norm": 1.9458767175674438, "learning_rate": 1.7208151062298767e-05, "loss": 0.3547, "step": 25133 }, { "epoch": 4.102852944777764, "grad_norm": 2.3299131393432617, "learning_rate": 1.7207931106757867e-05, "loss": 0.3976, "step": 25134 }, { "epoch": 4.103016203420268, "grad_norm": 1.7647358179092407, "learning_rate": 1.7207711143958584e-05, "loss": 0.3782, "step": 25135 }, { "epoch": 4.103179462062773, "grad_norm": 2.955258846282959, "learning_rate": 1.7207491173901136e-05, "loss": 0.4646, "step": 25136 }, { "epoch": 4.103342720705277, "grad_norm": 1.860377550125122, "learning_rate": 1.7207271196585743e-05, "loss": 0.3841, "step": 25137 }, { "epoch": 4.103505979347782, "grad_norm": 2.1353695392608643, "learning_rate": 1.7207051212012634e-05, "loss": 0.4167, "step": 25138 }, { "epoch": 4.103669237990286, "grad_norm": 2.4408466815948486, "learning_rate": 1.7206831220182024e-05, "loss": 0.3973, "step": 25139 }, { "epoch": 4.103832496632791, "grad_norm": 2.0049498081207275, "learning_rate": 1.720661122109414e-05, "loss": 0.3579, "step": 25140 }, { "epoch": 4.103995755275295, "grad_norm": 2.1930031776428223, "learning_rate": 1.72063912147492e-05, "loss": 0.4159, "step": 25141 }, { "epoch": 4.1041590139177995, "grad_norm": 1.7147715091705322, "learning_rate": 1.720617120114742e-05, "loss": 0.3203, "step": 25142 }, { "epoch": 4.104322272560304, "grad_norm": 2.131957530975342, "learning_rate": 1.7205951180289032e-05, "loss": 0.3802, "step": 25143 }, { "epoch": 4.104485531202808, "grad_norm": 1.874054193496704, "learning_rate": 1.7205731152174255e-05, "loss": 0.3828, "step": 25144 }, { "epoch": 4.104648789845313, "grad_norm": 2.1755993366241455, "learning_rate": 1.7205511116803306e-05, "loss": 0.3867, "step": 25145 }, { "epoch": 4.104812048487817, "grad_norm": 2.056381940841675, "learning_rate": 1.7205291074176408e-05, "loss": 0.3618, "step": 25146 }, { "epoch": 4.104975307130321, "grad_norm": 2.0319418907165527, "learning_rate": 1.7205071024293785e-05, "loss": 0.3371, "step": 25147 }, { "epoch": 4.105138565772825, "grad_norm": 1.7954508066177368, "learning_rate": 1.720485096715566e-05, "loss": 0.4086, "step": 25148 }, { "epoch": 4.10530182441533, "grad_norm": 2.2589287757873535, "learning_rate": 1.720463090276225e-05, "loss": 0.4106, "step": 25149 }, { "epoch": 4.105465083057834, "grad_norm": 2.215886354446411, "learning_rate": 1.7204410831113778e-05, "loss": 0.3916, "step": 25150 }, { "epoch": 4.1056283417003385, "grad_norm": 2.4691965579986572, "learning_rate": 1.7204190752210465e-05, "loss": 0.4028, "step": 25151 }, { "epoch": 4.105791600342843, "grad_norm": 2.15065860748291, "learning_rate": 1.7203970666052535e-05, "loss": 0.4341, "step": 25152 }, { "epoch": 4.105954858985347, "grad_norm": 2.2761294841766357, "learning_rate": 1.720375057264021e-05, "loss": 0.3907, "step": 25153 }, { "epoch": 4.106118117627852, "grad_norm": 1.823081135749817, "learning_rate": 1.720353047197371e-05, "loss": 0.3432, "step": 25154 }, { "epoch": 4.106281376270356, "grad_norm": 1.7082163095474243, "learning_rate": 1.720331036405326e-05, "loss": 0.3289, "step": 25155 }, { "epoch": 4.106444634912861, "grad_norm": 2.0164315700531006, "learning_rate": 1.720309024887907e-05, "loss": 0.3416, "step": 25156 }, { "epoch": 4.106607893555365, "grad_norm": 2.29935622215271, "learning_rate": 1.7202870126451376e-05, "loss": 0.3539, "step": 25157 }, { "epoch": 4.10677115219787, "grad_norm": 2.8391904830932617, "learning_rate": 1.7202649996770392e-05, "loss": 0.9294, "step": 25158 }, { "epoch": 4.106934410840374, "grad_norm": 2.8116455078125, "learning_rate": 1.7202429859836337e-05, "loss": 0.3845, "step": 25159 }, { "epoch": 4.1070976694828785, "grad_norm": 2.3290727138519287, "learning_rate": 1.7202209715649444e-05, "loss": 0.3654, "step": 25160 }, { "epoch": 4.107260928125383, "grad_norm": 2.4160430431365967, "learning_rate": 1.7201989564209926e-05, "loss": 0.4425, "step": 25161 }, { "epoch": 4.107424186767887, "grad_norm": 2.3499765396118164, "learning_rate": 1.7201769405518006e-05, "loss": 0.3561, "step": 25162 }, { "epoch": 4.107587445410392, "grad_norm": 2.0794219970703125, "learning_rate": 1.7201549239573903e-05, "loss": 0.4339, "step": 25163 }, { "epoch": 4.107750704052896, "grad_norm": 1.9483829736709595, "learning_rate": 1.7201329066377845e-05, "loss": 0.382, "step": 25164 }, { "epoch": 4.1079139626954, "grad_norm": 2.068620443344116, "learning_rate": 1.720110888593005e-05, "loss": 0.3933, "step": 25165 }, { "epoch": 4.108077221337904, "grad_norm": 2.119382858276367, "learning_rate": 1.7200888698230743e-05, "loss": 0.3713, "step": 25166 }, { "epoch": 4.108240479980409, "grad_norm": 2.111302614212036, "learning_rate": 1.720066850328014e-05, "loss": 0.3838, "step": 25167 }, { "epoch": 4.108403738622913, "grad_norm": 2.545625686645508, "learning_rate": 1.7200448301078464e-05, "loss": 0.4336, "step": 25168 }, { "epoch": 4.1085669972654175, "grad_norm": 2.2438712120056152, "learning_rate": 1.720022809162594e-05, "loss": 0.3545, "step": 25169 }, { "epoch": 4.108730255907922, "grad_norm": 2.386364698410034, "learning_rate": 1.7200007874922788e-05, "loss": 0.4216, "step": 25170 }, { "epoch": 4.108893514550426, "grad_norm": 2.0989534854888916, "learning_rate": 1.719978765096923e-05, "loss": 0.3614, "step": 25171 }, { "epoch": 4.109056773192931, "grad_norm": 2.016679286956787, "learning_rate": 1.719956741976549e-05, "loss": 0.367, "step": 25172 }, { "epoch": 4.109220031835435, "grad_norm": 2.065474510192871, "learning_rate": 1.7199347181311785e-05, "loss": 0.3611, "step": 25173 }, { "epoch": 4.10938329047794, "grad_norm": 2.14770245552063, "learning_rate": 1.7199126935608342e-05, "loss": 0.3806, "step": 25174 }, { "epoch": 4.109546549120444, "grad_norm": 1.8175256252288818, "learning_rate": 1.7198906682655377e-05, "loss": 0.3337, "step": 25175 }, { "epoch": 4.109709807762949, "grad_norm": 2.3415613174438477, "learning_rate": 1.7198686422453115e-05, "loss": 0.4235, "step": 25176 }, { "epoch": 4.109873066405453, "grad_norm": 1.9998314380645752, "learning_rate": 1.719846615500178e-05, "loss": 0.4047, "step": 25177 }, { "epoch": 4.1100363250479575, "grad_norm": 2.4329025745391846, "learning_rate": 1.719824588030159e-05, "loss": 0.4247, "step": 25178 }, { "epoch": 4.110199583690462, "grad_norm": 2.6937026977539062, "learning_rate": 1.719802559835277e-05, "loss": 0.3839, "step": 25179 }, { "epoch": 4.110362842332966, "grad_norm": 2.222360372543335, "learning_rate": 1.7197805309155537e-05, "loss": 0.4143, "step": 25180 }, { "epoch": 4.110526100975471, "grad_norm": 2.6072704792022705, "learning_rate": 1.7197585012710117e-05, "loss": 0.4023, "step": 25181 }, { "epoch": 4.110689359617975, "grad_norm": 1.7540305852890015, "learning_rate": 1.7197364709016732e-05, "loss": 0.3212, "step": 25182 }, { "epoch": 4.110852618260479, "grad_norm": 2.205554962158203, "learning_rate": 1.7197144398075604e-05, "loss": 0.3767, "step": 25183 }, { "epoch": 4.111015876902983, "grad_norm": 2.111877202987671, "learning_rate": 1.719692407988695e-05, "loss": 0.4822, "step": 25184 }, { "epoch": 4.111179135545488, "grad_norm": 1.971250057220459, "learning_rate": 1.7196703754451e-05, "loss": 0.351, "step": 25185 }, { "epoch": 4.111342394187992, "grad_norm": 2.476287603378296, "learning_rate": 1.7196483421767968e-05, "loss": 0.4158, "step": 25186 }, { "epoch": 4.1115056528304965, "grad_norm": 2.558724880218506, "learning_rate": 1.719626308183808e-05, "loss": 0.4036, "step": 25187 }, { "epoch": 4.111668911473001, "grad_norm": 2.10551381111145, "learning_rate": 1.7196042734661553e-05, "loss": 0.3875, "step": 25188 }, { "epoch": 4.111832170115505, "grad_norm": 2.2387380599975586, "learning_rate": 1.7195822380238615e-05, "loss": 0.3553, "step": 25189 }, { "epoch": 4.11199542875801, "grad_norm": 1.8900258541107178, "learning_rate": 1.7195602018569487e-05, "loss": 0.3188, "step": 25190 }, { "epoch": 4.112158687400514, "grad_norm": 2.4153895378112793, "learning_rate": 1.7195381649654392e-05, "loss": 0.3856, "step": 25191 }, { "epoch": 4.112321946043019, "grad_norm": 1.8032464981079102, "learning_rate": 1.7195161273493546e-05, "loss": 0.3247, "step": 25192 }, { "epoch": 4.112485204685523, "grad_norm": 1.6360008716583252, "learning_rate": 1.7194940890087174e-05, "loss": 0.3462, "step": 25193 }, { "epoch": 4.112648463328028, "grad_norm": 2.2995424270629883, "learning_rate": 1.7194720499435503e-05, "loss": 0.4611, "step": 25194 }, { "epoch": 4.112811721970532, "grad_norm": 1.9291337728500366, "learning_rate": 1.7194500101538748e-05, "loss": 0.3796, "step": 25195 }, { "epoch": 4.1129749806130365, "grad_norm": 2.0466370582580566, "learning_rate": 1.719427969639713e-05, "loss": 0.3669, "step": 25196 }, { "epoch": 4.113138239255541, "grad_norm": 1.9638433456420898, "learning_rate": 1.719405928401088e-05, "loss": 0.4041, "step": 25197 }, { "epoch": 4.113301497898045, "grad_norm": 2.039158344268799, "learning_rate": 1.719383886438021e-05, "loss": 0.3676, "step": 25198 }, { "epoch": 4.11346475654055, "grad_norm": 1.799828290939331, "learning_rate": 1.7193618437505348e-05, "loss": 0.3664, "step": 25199 }, { "epoch": 4.113628015183053, "grad_norm": 2.066012382507324, "learning_rate": 1.7193398003386514e-05, "loss": 0.3951, "step": 25200 }, { "epoch": 4.113791273825558, "grad_norm": 2.102524995803833, "learning_rate": 1.719317756202393e-05, "loss": 0.3794, "step": 25201 }, { "epoch": 4.113954532468062, "grad_norm": 2.038663864135742, "learning_rate": 1.7192957113417815e-05, "loss": 0.3685, "step": 25202 }, { "epoch": 4.114117791110567, "grad_norm": 2.1306049823760986, "learning_rate": 1.7192736657568398e-05, "loss": 0.3742, "step": 25203 }, { "epoch": 4.114281049753071, "grad_norm": 2.3522226810455322, "learning_rate": 1.7192516194475897e-05, "loss": 0.414, "step": 25204 }, { "epoch": 4.1144443083955755, "grad_norm": 2.0202677249908447, "learning_rate": 1.7192295724140533e-05, "loss": 0.3724, "step": 25205 }, { "epoch": 4.11460756703808, "grad_norm": 1.8449894189834595, "learning_rate": 1.719207524656253e-05, "loss": 0.3761, "step": 25206 }, { "epoch": 4.114770825680584, "grad_norm": 2.084780216217041, "learning_rate": 1.7191854761742108e-05, "loss": 0.4027, "step": 25207 }, { "epoch": 4.114934084323089, "grad_norm": 1.5628814697265625, "learning_rate": 1.719163426967949e-05, "loss": 0.3538, "step": 25208 }, { "epoch": 4.115097342965593, "grad_norm": 1.825577735900879, "learning_rate": 1.71914137703749e-05, "loss": 0.3655, "step": 25209 }, { "epoch": 4.115260601608098, "grad_norm": 2.232879161834717, "learning_rate": 1.7191193263828557e-05, "loss": 0.4071, "step": 25210 }, { "epoch": 4.115423860250602, "grad_norm": 1.9818192720413208, "learning_rate": 1.719097275004068e-05, "loss": 0.358, "step": 25211 }, { "epoch": 4.115587118893107, "grad_norm": 2.1048123836517334, "learning_rate": 1.7190752229011502e-05, "loss": 0.3757, "step": 25212 }, { "epoch": 4.115750377535611, "grad_norm": 2.1376497745513916, "learning_rate": 1.7190531700741235e-05, "loss": 0.4767, "step": 25213 }, { "epoch": 4.1159136361781155, "grad_norm": 2.3248047828674316, "learning_rate": 1.7190311165230108e-05, "loss": 0.456, "step": 25214 }, { "epoch": 4.11607689482062, "grad_norm": 1.8481496572494507, "learning_rate": 1.7190090622478336e-05, "loss": 0.4028, "step": 25215 }, { "epoch": 4.116240153463124, "grad_norm": 1.9638482332229614, "learning_rate": 1.7189870072486146e-05, "loss": 0.4016, "step": 25216 }, { "epoch": 4.116403412105629, "grad_norm": 2.263165235519409, "learning_rate": 1.7189649515253757e-05, "loss": 0.3727, "step": 25217 }, { "epoch": 4.116566670748132, "grad_norm": 2.0705161094665527, "learning_rate": 1.7189428950781393e-05, "loss": 0.3119, "step": 25218 }, { "epoch": 4.116729929390637, "grad_norm": 2.3823459148406982, "learning_rate": 1.718920837906928e-05, "loss": 0.3558, "step": 25219 }, { "epoch": 4.116893188033141, "grad_norm": 1.9986616373062134, "learning_rate": 1.7188987800117634e-05, "loss": 0.355, "step": 25220 }, { "epoch": 4.117056446675646, "grad_norm": 2.2767884731292725, "learning_rate": 1.718876721392668e-05, "loss": 0.4483, "step": 25221 }, { "epoch": 4.11721970531815, "grad_norm": 2.2881546020507812, "learning_rate": 1.7188546620496634e-05, "loss": 0.4223, "step": 25222 }, { "epoch": 4.1173829639606545, "grad_norm": 1.7043484449386597, "learning_rate": 1.7188326019827727e-05, "loss": 0.2684, "step": 25223 }, { "epoch": 4.117546222603159, "grad_norm": 1.8612561225891113, "learning_rate": 1.718810541192018e-05, "loss": 0.3481, "step": 25224 }, { "epoch": 4.117709481245663, "grad_norm": 1.759475588798523, "learning_rate": 1.718788479677421e-05, "loss": 0.3345, "step": 25225 }, { "epoch": 4.117872739888168, "grad_norm": 2.0268173217773438, "learning_rate": 1.7187664174390043e-05, "loss": 0.3482, "step": 25226 }, { "epoch": 4.118035998530672, "grad_norm": 1.7169201374053955, "learning_rate": 1.71874435447679e-05, "loss": 0.3287, "step": 25227 }, { "epoch": 4.118199257173177, "grad_norm": 1.9505585432052612, "learning_rate": 1.7187222907908e-05, "loss": 0.4083, "step": 25228 }, { "epoch": 4.118362515815681, "grad_norm": 2.155693531036377, "learning_rate": 1.7187002263810575e-05, "loss": 0.4677, "step": 25229 }, { "epoch": 4.118525774458186, "grad_norm": 2.8166630268096924, "learning_rate": 1.7186781612475837e-05, "loss": 0.4531, "step": 25230 }, { "epoch": 4.11868903310069, "grad_norm": 2.720813274383545, "learning_rate": 1.7186560953904012e-05, "loss": 0.4613, "step": 25231 }, { "epoch": 4.1188522917431944, "grad_norm": 2.5879125595092773, "learning_rate": 1.718634028809532e-05, "loss": 0.366, "step": 25232 }, { "epoch": 4.119015550385699, "grad_norm": 2.030890703201294, "learning_rate": 1.7186119615049986e-05, "loss": 0.4542, "step": 25233 }, { "epoch": 4.119178809028203, "grad_norm": 2.181844472885132, "learning_rate": 1.7185898934768236e-05, "loss": 0.4079, "step": 25234 }, { "epoch": 4.119342067670708, "grad_norm": 1.9738162755966187, "learning_rate": 1.7185678247250283e-05, "loss": 0.3587, "step": 25235 }, { "epoch": 4.119505326313211, "grad_norm": 2.029005289077759, "learning_rate": 1.7185457552496355e-05, "loss": 0.3621, "step": 25236 }, { "epoch": 4.119668584955716, "grad_norm": 2.1329104900360107, "learning_rate": 1.718523685050667e-05, "loss": 0.3583, "step": 25237 }, { "epoch": 4.11983184359822, "grad_norm": 1.9952616691589355, "learning_rate": 1.718501614128146e-05, "loss": 0.3731, "step": 25238 }, { "epoch": 4.119995102240725, "grad_norm": 2.2001781463623047, "learning_rate": 1.7184795424820936e-05, "loss": 0.3977, "step": 25239 }, { "epoch": 4.120158360883229, "grad_norm": 2.0518975257873535, "learning_rate": 1.718457470112533e-05, "loss": 0.3247, "step": 25240 }, { "epoch": 4.1203216195257335, "grad_norm": 2.314006805419922, "learning_rate": 1.7184353970194854e-05, "loss": 0.4094, "step": 25241 }, { "epoch": 4.120484878168238, "grad_norm": 2.21172833442688, "learning_rate": 1.7184133232029735e-05, "loss": 0.3985, "step": 25242 }, { "epoch": 4.120648136810742, "grad_norm": 2.9010732173919678, "learning_rate": 1.7183912486630196e-05, "loss": 0.3511, "step": 25243 }, { "epoch": 4.120811395453247, "grad_norm": 2.4823150634765625, "learning_rate": 1.7183691733996463e-05, "loss": 0.4213, "step": 25244 }, { "epoch": 4.120974654095751, "grad_norm": 2.029151201248169, "learning_rate": 1.718347097412875e-05, "loss": 0.3697, "step": 25245 }, { "epoch": 4.121137912738256, "grad_norm": 2.0989716053009033, "learning_rate": 1.7183250207027285e-05, "loss": 0.3356, "step": 25246 }, { "epoch": 4.12130117138076, "grad_norm": 2.188054323196411, "learning_rate": 1.718302943269229e-05, "loss": 0.3786, "step": 25247 }, { "epoch": 4.121464430023265, "grad_norm": 1.7447090148925781, "learning_rate": 1.7182808651123987e-05, "loss": 0.3866, "step": 25248 }, { "epoch": 4.121627688665769, "grad_norm": 1.8088637590408325, "learning_rate": 1.7182587862322598e-05, "loss": 0.3663, "step": 25249 }, { "epoch": 4.1217909473082734, "grad_norm": 1.6351381540298462, "learning_rate": 1.7182367066288344e-05, "loss": 0.3796, "step": 25250 }, { "epoch": 4.121954205950778, "grad_norm": 2.3096516132354736, "learning_rate": 1.7182146263021445e-05, "loss": 0.4279, "step": 25251 }, { "epoch": 4.122117464593282, "grad_norm": 1.9846665859222412, "learning_rate": 1.718192545252213e-05, "loss": 0.3456, "step": 25252 }, { "epoch": 4.122280723235786, "grad_norm": 1.9196209907531738, "learning_rate": 1.7181704634790617e-05, "loss": 0.3812, "step": 25253 }, { "epoch": 4.12244398187829, "grad_norm": 2.0930824279785156, "learning_rate": 1.7181483809827128e-05, "loss": 0.351, "step": 25254 }, { "epoch": 4.122607240520795, "grad_norm": 1.617262363433838, "learning_rate": 1.718126297763189e-05, "loss": 0.3219, "step": 25255 }, { "epoch": 4.122770499163299, "grad_norm": 2.1248393058776855, "learning_rate": 1.718104213820512e-05, "loss": 0.3723, "step": 25256 }, { "epoch": 4.122933757805804, "grad_norm": 1.9667118787765503, "learning_rate": 1.7180821291547044e-05, "loss": 0.3614, "step": 25257 }, { "epoch": 4.123097016448308, "grad_norm": 2.130584478378296, "learning_rate": 1.7180600437657878e-05, "loss": 0.4048, "step": 25258 }, { "epoch": 4.1232602750908125, "grad_norm": 1.8355967998504639, "learning_rate": 1.7180379576537856e-05, "loss": 0.3261, "step": 25259 }, { "epoch": 4.123423533733317, "grad_norm": 1.9297746419906616, "learning_rate": 1.718015870818719e-05, "loss": 0.3256, "step": 25260 }, { "epoch": 4.123586792375821, "grad_norm": 2.1309640407562256, "learning_rate": 1.7179937832606105e-05, "loss": 0.3809, "step": 25261 }, { "epoch": 4.123750051018326, "grad_norm": 2.3657774925231934, "learning_rate": 1.717971694979483e-05, "loss": 0.439, "step": 25262 }, { "epoch": 4.12391330966083, "grad_norm": 2.0432968139648438, "learning_rate": 1.7179496059753576e-05, "loss": 0.3648, "step": 25263 }, { "epoch": 4.124076568303335, "grad_norm": 2.4348602294921875, "learning_rate": 1.7179275162482574e-05, "loss": 0.3939, "step": 25264 }, { "epoch": 4.124239826945839, "grad_norm": 1.9914543628692627, "learning_rate": 1.7179054257982045e-05, "loss": 0.3868, "step": 25265 }, { "epoch": 4.124403085588344, "grad_norm": 2.189256191253662, "learning_rate": 1.7178833346252208e-05, "loss": 0.4335, "step": 25266 }, { "epoch": 4.124566344230848, "grad_norm": 1.8637467622756958, "learning_rate": 1.717861242729329e-05, "loss": 0.3761, "step": 25267 }, { "epoch": 4.124729602873352, "grad_norm": 2.336961507797241, "learning_rate": 1.7178391501105508e-05, "loss": 0.4277, "step": 25268 }, { "epoch": 4.124892861515857, "grad_norm": 1.7939109802246094, "learning_rate": 1.717817056768909e-05, "loss": 0.3619, "step": 25269 }, { "epoch": 4.12505612015836, "grad_norm": 2.1457808017730713, "learning_rate": 1.717794962704425e-05, "loss": 0.3632, "step": 25270 }, { "epoch": 4.125219378800865, "grad_norm": 2.085277557373047, "learning_rate": 1.7177728679171224e-05, "loss": 0.3947, "step": 25271 }, { "epoch": 4.125382637443369, "grad_norm": 2.104033946990967, "learning_rate": 1.7177507724070227e-05, "loss": 0.3513, "step": 25272 }, { "epoch": 4.125545896085874, "grad_norm": 1.8225528001785278, "learning_rate": 1.7177286761741477e-05, "loss": 0.3481, "step": 25273 }, { "epoch": 4.125709154728378, "grad_norm": 2.30619478225708, "learning_rate": 1.7177065792185205e-05, "loss": 0.3808, "step": 25274 }, { "epoch": 4.125872413370883, "grad_norm": 2.349799633026123, "learning_rate": 1.717684481540163e-05, "loss": 0.343, "step": 25275 }, { "epoch": 4.126035672013387, "grad_norm": 2.211833953857422, "learning_rate": 1.7176623831390973e-05, "loss": 0.3898, "step": 25276 }, { "epoch": 4.1261989306558915, "grad_norm": 2.1654253005981445, "learning_rate": 1.7176402840153453e-05, "loss": 0.4334, "step": 25277 }, { "epoch": 4.126362189298396, "grad_norm": 1.8671026229858398, "learning_rate": 1.7176181841689302e-05, "loss": 0.3255, "step": 25278 }, { "epoch": 4.1265254479409, "grad_norm": 1.808140754699707, "learning_rate": 1.717596083599874e-05, "loss": 0.4087, "step": 25279 }, { "epoch": 4.126688706583405, "grad_norm": 2.0535695552825928, "learning_rate": 1.7175739823081985e-05, "loss": 0.3871, "step": 25280 }, { "epoch": 4.126851965225909, "grad_norm": 1.804795742034912, "learning_rate": 1.7175518802939262e-05, "loss": 0.3872, "step": 25281 }, { "epoch": 4.127015223868414, "grad_norm": 2.288583993911743, "learning_rate": 1.7175297775570793e-05, "loss": 0.3754, "step": 25282 }, { "epoch": 4.127178482510918, "grad_norm": 2.1676409244537354, "learning_rate": 1.71750767409768e-05, "loss": 0.3479, "step": 25283 }, { "epoch": 4.127341741153423, "grad_norm": 2.1954517364501953, "learning_rate": 1.7174855699157506e-05, "loss": 0.3695, "step": 25284 }, { "epoch": 4.127504999795927, "grad_norm": 1.922972559928894, "learning_rate": 1.717463465011314e-05, "loss": 0.4002, "step": 25285 }, { "epoch": 4.127668258438431, "grad_norm": 1.9890490770339966, "learning_rate": 1.717441359384391e-05, "loss": 0.5281, "step": 25286 }, { "epoch": 4.127831517080936, "grad_norm": 1.8704947233200073, "learning_rate": 1.7174192530350056e-05, "loss": 0.3397, "step": 25287 }, { "epoch": 4.12799477572344, "grad_norm": 2.1467974185943604, "learning_rate": 1.717397145963179e-05, "loss": 0.4197, "step": 25288 }, { "epoch": 4.128158034365944, "grad_norm": 2.0836470127105713, "learning_rate": 1.717375038168933e-05, "loss": 0.3844, "step": 25289 }, { "epoch": 4.128321293008448, "grad_norm": 1.437562346458435, "learning_rate": 1.7173529296522914e-05, "loss": 0.3062, "step": 25290 }, { "epoch": 4.128484551650953, "grad_norm": 1.925621747970581, "learning_rate": 1.717330820413275e-05, "loss": 0.3478, "step": 25291 }, { "epoch": 4.128647810293457, "grad_norm": 1.8061813116073608, "learning_rate": 1.717308710451907e-05, "loss": 0.3814, "step": 25292 }, { "epoch": 4.128811068935962, "grad_norm": 2.2377333641052246, "learning_rate": 1.717286599768209e-05, "loss": 0.3926, "step": 25293 }, { "epoch": 4.128974327578466, "grad_norm": 2.1693694591522217, "learning_rate": 1.717264488362204e-05, "loss": 0.3858, "step": 25294 }, { "epoch": 4.1291375862209705, "grad_norm": 2.0287914276123047, "learning_rate": 1.7172423762339134e-05, "loss": 0.3868, "step": 25295 }, { "epoch": 4.129300844863475, "grad_norm": 2.391197681427002, "learning_rate": 1.7172202633833603e-05, "loss": 0.4343, "step": 25296 }, { "epoch": 4.129464103505979, "grad_norm": 2.4699034690856934, "learning_rate": 1.7171981498105662e-05, "loss": 0.373, "step": 25297 }, { "epoch": 4.129627362148484, "grad_norm": 2.1788058280944824, "learning_rate": 1.717176035515554e-05, "loss": 0.3526, "step": 25298 }, { "epoch": 4.129790620790988, "grad_norm": 2.2769837379455566, "learning_rate": 1.7171539204983457e-05, "loss": 0.3812, "step": 25299 }, { "epoch": 4.129953879433493, "grad_norm": 2.17185115814209, "learning_rate": 1.7171318047589637e-05, "loss": 0.3569, "step": 25300 }, { "epoch": 4.130117138075997, "grad_norm": 1.7832629680633545, "learning_rate": 1.71710968829743e-05, "loss": 0.3097, "step": 25301 }, { "epoch": 4.130280396718502, "grad_norm": 1.9738765954971313, "learning_rate": 1.717087571113767e-05, "loss": 0.3321, "step": 25302 }, { "epoch": 4.130443655361006, "grad_norm": 2.4624578952789307, "learning_rate": 1.717065453207997e-05, "loss": 0.4539, "step": 25303 }, { "epoch": 4.13060691400351, "grad_norm": 2.1414849758148193, "learning_rate": 1.7170433345801425e-05, "loss": 0.3797, "step": 25304 }, { "epoch": 4.130770172646015, "grad_norm": 1.6124200820922852, "learning_rate": 1.7170212152302253e-05, "loss": 0.3326, "step": 25305 }, { "epoch": 4.130933431288518, "grad_norm": 2.070373296737671, "learning_rate": 1.7169990951582684e-05, "loss": 0.3807, "step": 25306 }, { "epoch": 4.131096689931023, "grad_norm": 2.0880255699157715, "learning_rate": 1.716976974364293e-05, "loss": 0.4126, "step": 25307 }, { "epoch": 4.131259948573527, "grad_norm": 2.074585437774658, "learning_rate": 1.7169548528483223e-05, "loss": 0.3556, "step": 25308 }, { "epoch": 4.131423207216032, "grad_norm": 2.2751686573028564, "learning_rate": 1.716932730610378e-05, "loss": 0.4977, "step": 25309 }, { "epoch": 4.131586465858536, "grad_norm": 1.8111857175827026, "learning_rate": 1.716910607650483e-05, "loss": 0.3826, "step": 25310 }, { "epoch": 4.131749724501041, "grad_norm": 2.1766412258148193, "learning_rate": 1.716888483968659e-05, "loss": 0.3719, "step": 25311 }, { "epoch": 4.131912983143545, "grad_norm": 2.240419387817383, "learning_rate": 1.7168663595649285e-05, "loss": 0.4637, "step": 25312 }, { "epoch": 4.1320762417860495, "grad_norm": 1.7185472249984741, "learning_rate": 1.716844234439314e-05, "loss": 0.3194, "step": 25313 }, { "epoch": 4.132239500428554, "grad_norm": 1.9932383298873901, "learning_rate": 1.716822108591837e-05, "loss": 0.417, "step": 25314 }, { "epoch": 4.132402759071058, "grad_norm": 2.1696677207946777, "learning_rate": 1.7167999820225208e-05, "loss": 0.4216, "step": 25315 }, { "epoch": 4.132566017713563, "grad_norm": 1.6221587657928467, "learning_rate": 1.7167778547313868e-05, "loss": 0.3441, "step": 25316 }, { "epoch": 4.132729276356067, "grad_norm": 2.0579733848571777, "learning_rate": 1.716755726718458e-05, "loss": 0.3255, "step": 25317 }, { "epoch": 4.132892534998572, "grad_norm": 2.110161304473877, "learning_rate": 1.7167335979837566e-05, "loss": 0.3678, "step": 25318 }, { "epoch": 4.133055793641076, "grad_norm": 1.9781880378723145, "learning_rate": 1.716711468527304e-05, "loss": 0.3688, "step": 25319 }, { "epoch": 4.1332190522835806, "grad_norm": 2.221745014190674, "learning_rate": 1.7166893383491237e-05, "loss": 0.4121, "step": 25320 }, { "epoch": 4.133382310926085, "grad_norm": 2.2028543949127197, "learning_rate": 1.7166672074492373e-05, "loss": 0.4161, "step": 25321 }, { "epoch": 4.133545569568589, "grad_norm": 2.710237741470337, "learning_rate": 1.716645075827667e-05, "loss": 0.3799, "step": 25322 }, { "epoch": 4.133708828211093, "grad_norm": 1.6337746381759644, "learning_rate": 1.7166229434844355e-05, "loss": 0.3075, "step": 25323 }, { "epoch": 4.133872086853597, "grad_norm": 1.753018856048584, "learning_rate": 1.7166008104195647e-05, "loss": 0.3071, "step": 25324 }, { "epoch": 4.134035345496102, "grad_norm": 2.3773508071899414, "learning_rate": 1.7165786766330775e-05, "loss": 0.3534, "step": 25325 }, { "epoch": 4.134198604138606, "grad_norm": 2.476109027862549, "learning_rate": 1.7165565421249954e-05, "loss": 0.5004, "step": 25326 }, { "epoch": 4.134361862781111, "grad_norm": 2.2959415912628174, "learning_rate": 1.716534406895341e-05, "loss": 0.3863, "step": 25327 }, { "epoch": 4.134525121423615, "grad_norm": 2.231226921081543, "learning_rate": 1.716512270944137e-05, "loss": 0.3894, "step": 25328 }, { "epoch": 4.13468838006612, "grad_norm": 2.317232131958008, "learning_rate": 1.716490134271405e-05, "loss": 0.4039, "step": 25329 }, { "epoch": 4.134851638708624, "grad_norm": 2.396653652191162, "learning_rate": 1.7164679968771674e-05, "loss": 0.4285, "step": 25330 }, { "epoch": 4.1350148973511285, "grad_norm": 1.8554128408432007, "learning_rate": 1.7164458587614474e-05, "loss": 0.3248, "step": 25331 }, { "epoch": 4.135178155993633, "grad_norm": 2.258493661880493, "learning_rate": 1.716423719924266e-05, "loss": 0.3745, "step": 25332 }, { "epoch": 4.135341414636137, "grad_norm": 2.5584640502929688, "learning_rate": 1.7164015803656463e-05, "loss": 0.3799, "step": 25333 }, { "epoch": 4.135504673278642, "grad_norm": 2.0060811042785645, "learning_rate": 1.7163794400856106e-05, "loss": 0.3233, "step": 25334 }, { "epoch": 4.135667931921146, "grad_norm": 2.063408851623535, "learning_rate": 1.7163572990841808e-05, "loss": 0.3612, "step": 25335 }, { "epoch": 4.135831190563651, "grad_norm": 1.9185891151428223, "learning_rate": 1.7163351573613797e-05, "loss": 0.3537, "step": 25336 }, { "epoch": 4.135994449206155, "grad_norm": 2.616485357284546, "learning_rate": 1.7163130149172287e-05, "loss": 0.4228, "step": 25337 }, { "epoch": 4.1361577078486595, "grad_norm": 2.5092999935150146, "learning_rate": 1.7162908717517513e-05, "loss": 0.4473, "step": 25338 }, { "epoch": 4.136320966491164, "grad_norm": 1.9151109457015991, "learning_rate": 1.7162687278649688e-05, "loss": 0.3561, "step": 25339 }, { "epoch": 4.136484225133668, "grad_norm": 2.106084108352661, "learning_rate": 1.7162465832569038e-05, "loss": 0.3612, "step": 25340 }, { "epoch": 4.136647483776172, "grad_norm": 1.820441484451294, "learning_rate": 1.7162244379275787e-05, "loss": 0.3623, "step": 25341 }, { "epoch": 4.136810742418676, "grad_norm": 1.9398490190505981, "learning_rate": 1.716202291877016e-05, "loss": 0.3661, "step": 25342 }, { "epoch": 4.136974001061181, "grad_norm": 1.8176711797714233, "learning_rate": 1.7161801451052378e-05, "loss": 0.3438, "step": 25343 }, { "epoch": 4.137137259703685, "grad_norm": 2.336008071899414, "learning_rate": 1.7161579976122663e-05, "loss": 0.426, "step": 25344 }, { "epoch": 4.13730051834619, "grad_norm": 2.357694387435913, "learning_rate": 1.716135849398124e-05, "loss": 0.3572, "step": 25345 }, { "epoch": 4.137463776988694, "grad_norm": 1.8609325885772705, "learning_rate": 1.716113700462833e-05, "loss": 0.3263, "step": 25346 }, { "epoch": 4.137627035631199, "grad_norm": 2.4218599796295166, "learning_rate": 1.7160915508064156e-05, "loss": 0.4277, "step": 25347 }, { "epoch": 4.137790294273703, "grad_norm": 2.2662246227264404, "learning_rate": 1.716069400428894e-05, "loss": 0.3821, "step": 25348 }, { "epoch": 4.1379535529162075, "grad_norm": 2.0792503356933594, "learning_rate": 1.7160472493302915e-05, "loss": 0.3557, "step": 25349 }, { "epoch": 4.138116811558712, "grad_norm": 1.9760617017745972, "learning_rate": 1.7160250975106286e-05, "loss": 0.3895, "step": 25350 }, { "epoch": 4.138280070201216, "grad_norm": 2.572277069091797, "learning_rate": 1.7160029449699293e-05, "loss": 0.433, "step": 25351 }, { "epoch": 4.138443328843721, "grad_norm": 2.381326675415039, "learning_rate": 1.715980791708215e-05, "loss": 0.4002, "step": 25352 }, { "epoch": 4.138606587486225, "grad_norm": 2.6028084754943848, "learning_rate": 1.715958637725508e-05, "loss": 0.4048, "step": 25353 }, { "epoch": 4.13876984612873, "grad_norm": 1.8989028930664062, "learning_rate": 1.7159364830218312e-05, "loss": 0.3374, "step": 25354 }, { "epoch": 4.138933104771234, "grad_norm": 2.3835713863372803, "learning_rate": 1.7159143275972066e-05, "loss": 0.3998, "step": 25355 }, { "epoch": 4.1390963634137385, "grad_norm": 2.080070734024048, "learning_rate": 1.7158921714516564e-05, "loss": 0.3654, "step": 25356 }, { "epoch": 4.139259622056243, "grad_norm": 1.8027634620666504, "learning_rate": 1.715870014585203e-05, "loss": 0.358, "step": 25357 }, { "epoch": 4.139422880698747, "grad_norm": 3.4387266635894775, "learning_rate": 1.7158478569978684e-05, "loss": 0.3752, "step": 25358 }, { "epoch": 4.139586139341251, "grad_norm": 1.9790955781936646, "learning_rate": 1.7158256986896753e-05, "loss": 0.3437, "step": 25359 }, { "epoch": 4.139749397983755, "grad_norm": 2.460087776184082, "learning_rate": 1.7158035396606457e-05, "loss": 0.4022, "step": 25360 }, { "epoch": 4.13991265662626, "grad_norm": 1.6707205772399902, "learning_rate": 1.715781379910803e-05, "loss": 0.3255, "step": 25361 }, { "epoch": 4.140075915268764, "grad_norm": 2.2906599044799805, "learning_rate": 1.715759219440168e-05, "loss": 0.3961, "step": 25362 }, { "epoch": 4.140239173911269, "grad_norm": 2.0772032737731934, "learning_rate": 1.7157370582487635e-05, "loss": 0.4224, "step": 25363 }, { "epoch": 4.140402432553773, "grad_norm": 2.143916130065918, "learning_rate": 1.7157148963366123e-05, "loss": 0.4826, "step": 25364 }, { "epoch": 4.140565691196278, "grad_norm": 1.8260514736175537, "learning_rate": 1.715692733703736e-05, "loss": 0.3414, "step": 25365 }, { "epoch": 4.140728949838782, "grad_norm": 2.3340866565704346, "learning_rate": 1.7156705703501576e-05, "loss": 0.4325, "step": 25366 }, { "epoch": 4.1408922084812865, "grad_norm": 1.9806978702545166, "learning_rate": 1.7156484062758992e-05, "loss": 0.3787, "step": 25367 }, { "epoch": 4.141055467123791, "grad_norm": 2.3863048553466797, "learning_rate": 1.715626241480983e-05, "loss": 0.4444, "step": 25368 }, { "epoch": 4.141218725766295, "grad_norm": 2.230276346206665, "learning_rate": 1.715604075965431e-05, "loss": 0.3768, "step": 25369 }, { "epoch": 4.1413819844088, "grad_norm": 2.11407208442688, "learning_rate": 1.7155819097292662e-05, "loss": 0.3774, "step": 25370 }, { "epoch": 4.141545243051304, "grad_norm": 2.0080769062042236, "learning_rate": 1.7155597427725107e-05, "loss": 0.3918, "step": 25371 }, { "epoch": 4.141708501693809, "grad_norm": 2.1280364990234375, "learning_rate": 1.7155375750951866e-05, "loss": 0.4065, "step": 25372 }, { "epoch": 4.141871760336313, "grad_norm": 1.756449818611145, "learning_rate": 1.7155154066973163e-05, "loss": 0.3353, "step": 25373 }, { "epoch": 4.1420350189788175, "grad_norm": 1.9760353565216064, "learning_rate": 1.7154932375789225e-05, "loss": 0.3579, "step": 25374 }, { "epoch": 4.142198277621322, "grad_norm": 2.1496729850769043, "learning_rate": 1.7154710677400265e-05, "loss": 0.3684, "step": 25375 }, { "epoch": 4.1423615362638255, "grad_norm": 2.2555291652679443, "learning_rate": 1.715448897180652e-05, "loss": 0.4273, "step": 25376 }, { "epoch": 4.14252479490633, "grad_norm": 1.7480087280273438, "learning_rate": 1.7154267259008204e-05, "loss": 0.3397, "step": 25377 }, { "epoch": 4.142688053548834, "grad_norm": 3.393967390060425, "learning_rate": 1.7154045539005545e-05, "loss": 0.3869, "step": 25378 }, { "epoch": 4.142851312191339, "grad_norm": 2.0990960597991943, "learning_rate": 1.715382381179876e-05, "loss": 0.369, "step": 25379 }, { "epoch": 4.143014570833843, "grad_norm": 1.7294139862060547, "learning_rate": 1.715360207738808e-05, "loss": 0.3551, "step": 25380 }, { "epoch": 4.143177829476348, "grad_norm": 1.9208239316940308, "learning_rate": 1.7153380335773723e-05, "loss": 0.4203, "step": 25381 }, { "epoch": 4.143341088118852, "grad_norm": 2.0005011558532715, "learning_rate": 1.7153158586955915e-05, "loss": 0.3997, "step": 25382 }, { "epoch": 4.143504346761357, "grad_norm": 1.5573039054870605, "learning_rate": 1.7152936830934878e-05, "loss": 0.3308, "step": 25383 }, { "epoch": 4.143667605403861, "grad_norm": 2.0122013092041016, "learning_rate": 1.715271506771083e-05, "loss": 0.3797, "step": 25384 }, { "epoch": 4.1438308640463655, "grad_norm": 2.121051549911499, "learning_rate": 1.7152493297284007e-05, "loss": 0.4048, "step": 25385 }, { "epoch": 4.14399412268887, "grad_norm": 1.7929105758666992, "learning_rate": 1.715227151965462e-05, "loss": 0.3274, "step": 25386 }, { "epoch": 4.144157381331374, "grad_norm": 2.017913818359375, "learning_rate": 1.7152049734822903e-05, "loss": 0.3962, "step": 25387 }, { "epoch": 4.144320639973879, "grad_norm": 1.939210295677185, "learning_rate": 1.715182794278907e-05, "loss": 0.3957, "step": 25388 }, { "epoch": 4.144483898616383, "grad_norm": 2.454991102218628, "learning_rate": 1.7151606143553348e-05, "loss": 0.5041, "step": 25389 }, { "epoch": 4.144647157258888, "grad_norm": 1.9020767211914062, "learning_rate": 1.715138433711596e-05, "loss": 0.3506, "step": 25390 }, { "epoch": 4.144810415901392, "grad_norm": 1.9452577829360962, "learning_rate": 1.715116252347713e-05, "loss": 0.3914, "step": 25391 }, { "epoch": 4.1449736745438965, "grad_norm": 1.9459024667739868, "learning_rate": 1.7150940702637084e-05, "loss": 0.3889, "step": 25392 }, { "epoch": 4.145136933186401, "grad_norm": 1.9284553527832031, "learning_rate": 1.715071887459604e-05, "loss": 0.3425, "step": 25393 }, { "epoch": 4.1453001918289045, "grad_norm": 1.8374738693237305, "learning_rate": 1.7150497039354228e-05, "loss": 0.3658, "step": 25394 }, { "epoch": 4.145463450471409, "grad_norm": 2.333786964416504, "learning_rate": 1.7150275196911862e-05, "loss": 0.3951, "step": 25395 }, { "epoch": 4.145626709113913, "grad_norm": 1.852095603942871, "learning_rate": 1.7150053347269175e-05, "loss": 0.3907, "step": 25396 }, { "epoch": 4.145789967756418, "grad_norm": 1.6789909601211548, "learning_rate": 1.7149831490426386e-05, "loss": 0.3323, "step": 25397 }, { "epoch": 4.145953226398922, "grad_norm": 2.4758059978485107, "learning_rate": 1.7149609626383718e-05, "loss": 0.3699, "step": 25398 }, { "epoch": 4.146116485041427, "grad_norm": 1.958249568939209, "learning_rate": 1.7149387755141392e-05, "loss": 0.3989, "step": 25399 }, { "epoch": 4.146279743683931, "grad_norm": 1.8899706602096558, "learning_rate": 1.7149165876699635e-05, "loss": 0.374, "step": 25400 }, { "epoch": 4.146443002326436, "grad_norm": 2.1135222911834717, "learning_rate": 1.7148943991058674e-05, "loss": 0.3968, "step": 25401 }, { "epoch": 4.14660626096894, "grad_norm": 2.4859201908111572, "learning_rate": 1.7148722098218726e-05, "loss": 0.4908, "step": 25402 }, { "epoch": 4.1467695196114445, "grad_norm": 2.206111431121826, "learning_rate": 1.7148500198180014e-05, "loss": 0.3775, "step": 25403 }, { "epoch": 4.146932778253949, "grad_norm": 1.9324865341186523, "learning_rate": 1.714827829094277e-05, "loss": 0.3477, "step": 25404 }, { "epoch": 4.147096036896453, "grad_norm": 1.7711317539215088, "learning_rate": 1.7148056376507208e-05, "loss": 0.3689, "step": 25405 }, { "epoch": 4.147259295538958, "grad_norm": 1.5608036518096924, "learning_rate": 1.7147834454873554e-05, "loss": 0.3267, "step": 25406 }, { "epoch": 4.147422554181462, "grad_norm": 1.7616467475891113, "learning_rate": 1.7147612526042034e-05, "loss": 0.3368, "step": 25407 }, { "epoch": 4.147585812823967, "grad_norm": 2.365966558456421, "learning_rate": 1.714739059001287e-05, "loss": 0.3376, "step": 25408 }, { "epoch": 4.147749071466471, "grad_norm": 1.8229016065597534, "learning_rate": 1.7147168646786284e-05, "loss": 0.3529, "step": 25409 }, { "epoch": 4.1479123301089755, "grad_norm": 1.8825401067733765, "learning_rate": 1.7146946696362502e-05, "loss": 0.3596, "step": 25410 }, { "epoch": 4.14807558875148, "grad_norm": 2.3157341480255127, "learning_rate": 1.714672473874175e-05, "loss": 0.4191, "step": 25411 }, { "epoch": 4.1482388473939835, "grad_norm": 1.837491512298584, "learning_rate": 1.7146502773924244e-05, "loss": 0.3008, "step": 25412 }, { "epoch": 4.148402106036488, "grad_norm": 2.586055040359497, "learning_rate": 1.7146280801910212e-05, "loss": 0.5228, "step": 25413 }, { "epoch": 4.148565364678992, "grad_norm": 1.9120925664901733, "learning_rate": 1.714605882269988e-05, "loss": 0.3716, "step": 25414 }, { "epoch": 4.148728623321497, "grad_norm": 2.239976406097412, "learning_rate": 1.7145836836293467e-05, "loss": 0.3307, "step": 25415 }, { "epoch": 4.148891881964001, "grad_norm": 2.37294340133667, "learning_rate": 1.7145614842691198e-05, "loss": 0.4343, "step": 25416 }, { "epoch": 4.149055140606506, "grad_norm": 2.380359411239624, "learning_rate": 1.71453928418933e-05, "loss": 0.4116, "step": 25417 }, { "epoch": 4.14921839924901, "grad_norm": 2.3046703338623047, "learning_rate": 1.7145170833899987e-05, "loss": 0.4612, "step": 25418 }, { "epoch": 4.149381657891515, "grad_norm": 1.6573445796966553, "learning_rate": 1.7144948818711493e-05, "loss": 0.3394, "step": 25419 }, { "epoch": 4.149544916534019, "grad_norm": 2.056800365447998, "learning_rate": 1.7144726796328034e-05, "loss": 0.3787, "step": 25420 }, { "epoch": 4.1497081751765235, "grad_norm": 2.3740386962890625, "learning_rate": 1.7144504766749842e-05, "loss": 0.3675, "step": 25421 }, { "epoch": 4.149871433819028, "grad_norm": 1.8478751182556152, "learning_rate": 1.7144282729977133e-05, "loss": 0.4024, "step": 25422 }, { "epoch": 4.150034692461532, "grad_norm": 1.7827448844909668, "learning_rate": 1.7144060686010132e-05, "loss": 0.4273, "step": 25423 }, { "epoch": 4.150197951104037, "grad_norm": 2.207472562789917, "learning_rate": 1.7143838634849067e-05, "loss": 0.3935, "step": 25424 }, { "epoch": 4.150361209746541, "grad_norm": 2.097263813018799, "learning_rate": 1.7143616576494154e-05, "loss": 0.3974, "step": 25425 }, { "epoch": 4.150524468389046, "grad_norm": 1.8716285228729248, "learning_rate": 1.7143394510945625e-05, "loss": 0.3762, "step": 25426 }, { "epoch": 4.15068772703155, "grad_norm": 1.765696406364441, "learning_rate": 1.7143172438203695e-05, "loss": 0.3539, "step": 25427 }, { "epoch": 4.1508509856740545, "grad_norm": 1.3865559101104736, "learning_rate": 1.71429503582686e-05, "loss": 0.2788, "step": 25428 }, { "epoch": 4.151014244316558, "grad_norm": 2.104179620742798, "learning_rate": 1.7142728271140546e-05, "loss": 0.392, "step": 25429 }, { "epoch": 4.1511775029590625, "grad_norm": 1.798256754875183, "learning_rate": 1.7142506176819775e-05, "loss": 0.3477, "step": 25430 }, { "epoch": 4.151340761601567, "grad_norm": 1.9960448741912842, "learning_rate": 1.7142284075306497e-05, "loss": 0.3904, "step": 25431 }, { "epoch": 4.151504020244071, "grad_norm": 2.4323415756225586, "learning_rate": 1.714206196660094e-05, "loss": 0.4619, "step": 25432 }, { "epoch": 4.151667278886576, "grad_norm": 2.126086473464966, "learning_rate": 1.7141839850703333e-05, "loss": 0.4082, "step": 25433 }, { "epoch": 4.15183053752908, "grad_norm": 3.0234451293945312, "learning_rate": 1.7141617727613894e-05, "loss": 0.4578, "step": 25434 }, { "epoch": 4.151993796171585, "grad_norm": 1.9178599119186401, "learning_rate": 1.7141395597332847e-05, "loss": 0.3869, "step": 25435 }, { "epoch": 4.152157054814089, "grad_norm": 2.2852861881256104, "learning_rate": 1.7141173459860415e-05, "loss": 0.4122, "step": 25436 }, { "epoch": 4.152320313456594, "grad_norm": 1.9641289710998535, "learning_rate": 1.7140951315196826e-05, "loss": 0.3895, "step": 25437 }, { "epoch": 4.152483572099098, "grad_norm": 2.0146119594573975, "learning_rate": 1.71407291633423e-05, "loss": 0.3588, "step": 25438 }, { "epoch": 4.1526468307416025, "grad_norm": 1.7816472053527832, "learning_rate": 1.714050700429706e-05, "loss": 0.3728, "step": 25439 }, { "epoch": 4.152810089384107, "grad_norm": 1.7523082494735718, "learning_rate": 1.7140284838061336e-05, "loss": 0.3305, "step": 25440 }, { "epoch": 4.152973348026611, "grad_norm": 2.275407075881958, "learning_rate": 1.714006266463534e-05, "loss": 0.4319, "step": 25441 }, { "epoch": 4.153136606669116, "grad_norm": 2.5294976234436035, "learning_rate": 1.713984048401931e-05, "loss": 0.3986, "step": 25442 }, { "epoch": 4.15329986531162, "grad_norm": 1.975624918937683, "learning_rate": 1.713961829621346e-05, "loss": 0.3662, "step": 25443 }, { "epoch": 4.153463123954125, "grad_norm": 1.8288111686706543, "learning_rate": 1.7139396101218014e-05, "loss": 0.3551, "step": 25444 }, { "epoch": 4.153626382596629, "grad_norm": 1.7447900772094727, "learning_rate": 1.7139173899033197e-05, "loss": 0.3245, "step": 25445 }, { "epoch": 4.1537896412391335, "grad_norm": 2.087604522705078, "learning_rate": 1.7138951689659236e-05, "loss": 0.426, "step": 25446 }, { "epoch": 4.153952899881637, "grad_norm": 1.4674605131149292, "learning_rate": 1.7138729473096356e-05, "loss": 0.3152, "step": 25447 }, { "epoch": 4.1541161585241415, "grad_norm": 1.9004815816879272, "learning_rate": 1.7138507249344773e-05, "loss": 0.3658, "step": 25448 }, { "epoch": 4.154279417166646, "grad_norm": 2.049609899520874, "learning_rate": 1.7138285018404715e-05, "loss": 0.3644, "step": 25449 }, { "epoch": 4.15444267580915, "grad_norm": 2.432077169418335, "learning_rate": 1.7138062780276404e-05, "loss": 0.4082, "step": 25450 }, { "epoch": 4.154605934451655, "grad_norm": 2.051542282104492, "learning_rate": 1.7137840534960073e-05, "loss": 0.3494, "step": 25451 }, { "epoch": 4.154769193094159, "grad_norm": 2.3131799697875977, "learning_rate": 1.7137618282455936e-05, "loss": 0.4312, "step": 25452 }, { "epoch": 4.154932451736664, "grad_norm": 1.8581349849700928, "learning_rate": 1.7137396022764216e-05, "loss": 0.3428, "step": 25453 }, { "epoch": 4.155095710379168, "grad_norm": 2.0407729148864746, "learning_rate": 1.713717375588514e-05, "loss": 0.379, "step": 25454 }, { "epoch": 4.155258969021673, "grad_norm": 2.3008577823638916, "learning_rate": 1.7136951481818935e-05, "loss": 0.3753, "step": 25455 }, { "epoch": 4.155422227664177, "grad_norm": 1.9454352855682373, "learning_rate": 1.713672920056582e-05, "loss": 0.3933, "step": 25456 }, { "epoch": 4.1555854863066815, "grad_norm": 2.80598521232605, "learning_rate": 1.713650691212602e-05, "loss": 0.4619, "step": 25457 }, { "epoch": 4.155748744949186, "grad_norm": 2.22538161277771, "learning_rate": 1.713628461649976e-05, "loss": 0.3942, "step": 25458 }, { "epoch": 4.15591200359169, "grad_norm": 1.599011778831482, "learning_rate": 1.7136062313687264e-05, "loss": 0.2785, "step": 25459 }, { "epoch": 4.156075262234195, "grad_norm": 2.094789981842041, "learning_rate": 1.7135840003688753e-05, "loss": 0.3855, "step": 25460 }, { "epoch": 4.156238520876699, "grad_norm": 2.2778849601745605, "learning_rate": 1.7135617686504455e-05, "loss": 0.3785, "step": 25461 }, { "epoch": 4.156401779519204, "grad_norm": 2.008981943130493, "learning_rate": 1.7135395362134593e-05, "loss": 0.3608, "step": 25462 }, { "epoch": 4.156565038161708, "grad_norm": 2.01694393157959, "learning_rate": 1.7135173030579388e-05, "loss": 0.3324, "step": 25463 }, { "epoch": 4.1567282968042125, "grad_norm": 2.293530225753784, "learning_rate": 1.7134950691839063e-05, "loss": 0.4528, "step": 25464 }, { "epoch": 4.156891555446716, "grad_norm": 1.9001562595367432, "learning_rate": 1.713472834591385e-05, "loss": 0.3537, "step": 25465 }, { "epoch": 4.1570548140892205, "grad_norm": 1.981797456741333, "learning_rate": 1.7134505992803964e-05, "loss": 0.3716, "step": 25466 }, { "epoch": 4.157218072731725, "grad_norm": 2.226858377456665, "learning_rate": 1.7134283632509636e-05, "loss": 0.4445, "step": 25467 }, { "epoch": 4.157381331374229, "grad_norm": 2.4941024780273438, "learning_rate": 1.7134061265031082e-05, "loss": 0.409, "step": 25468 }, { "epoch": 4.157544590016734, "grad_norm": 1.6532987356185913, "learning_rate": 1.713383889036853e-05, "loss": 0.3311, "step": 25469 }, { "epoch": 4.157707848659238, "grad_norm": 2.061728000640869, "learning_rate": 1.7133616508522205e-05, "loss": 0.4088, "step": 25470 }, { "epoch": 4.157871107301743, "grad_norm": 2.38454008102417, "learning_rate": 1.7133394119492335e-05, "loss": 0.3427, "step": 25471 }, { "epoch": 4.158034365944247, "grad_norm": 2.194411277770996, "learning_rate": 1.7133171723279134e-05, "loss": 0.4538, "step": 25472 }, { "epoch": 4.158197624586752, "grad_norm": 2.2205564975738525, "learning_rate": 1.713294931988283e-05, "loss": 0.3528, "step": 25473 }, { "epoch": 4.158360883229256, "grad_norm": 1.825300931930542, "learning_rate": 1.7132726909303652e-05, "loss": 0.4149, "step": 25474 }, { "epoch": 4.1585241418717604, "grad_norm": 2.396306276321411, "learning_rate": 1.713250449154182e-05, "loss": 0.4359, "step": 25475 }, { "epoch": 4.158687400514265, "grad_norm": 1.7281224727630615, "learning_rate": 1.713228206659755e-05, "loss": 0.3618, "step": 25476 }, { "epoch": 4.158850659156769, "grad_norm": 1.9299124479293823, "learning_rate": 1.7132059634471082e-05, "loss": 0.3746, "step": 25477 }, { "epoch": 4.159013917799274, "grad_norm": 2.1479742527008057, "learning_rate": 1.713183719516263e-05, "loss": 0.3285, "step": 25478 }, { "epoch": 4.159177176441778, "grad_norm": 2.033843994140625, "learning_rate": 1.713161474867242e-05, "loss": 0.3913, "step": 25479 }, { "epoch": 4.159340435084283, "grad_norm": 2.323502540588379, "learning_rate": 1.7131392295000676e-05, "loss": 0.4155, "step": 25480 }, { "epoch": 4.159503693726787, "grad_norm": 2.6608970165252686, "learning_rate": 1.713116983414762e-05, "loss": 0.5165, "step": 25481 }, { "epoch": 4.159666952369291, "grad_norm": 1.8379576206207275, "learning_rate": 1.713094736611348e-05, "loss": 0.3807, "step": 25482 }, { "epoch": 4.159830211011795, "grad_norm": 2.1602094173431396, "learning_rate": 1.7130724890898474e-05, "loss": 0.4393, "step": 25483 }, { "epoch": 4.1599934696542995, "grad_norm": 2.3825814723968506, "learning_rate": 1.7130502408502835e-05, "loss": 0.4149, "step": 25484 }, { "epoch": 4.160156728296804, "grad_norm": 2.003232479095459, "learning_rate": 1.7130279918926776e-05, "loss": 0.4205, "step": 25485 }, { "epoch": 4.160319986939308, "grad_norm": 1.995881199836731, "learning_rate": 1.713005742217053e-05, "loss": 0.3916, "step": 25486 }, { "epoch": 4.160483245581813, "grad_norm": 1.5205100774765015, "learning_rate": 1.712983491823432e-05, "loss": 0.3127, "step": 25487 }, { "epoch": 4.160646504224317, "grad_norm": 2.117269277572632, "learning_rate": 1.712961240711837e-05, "loss": 0.3775, "step": 25488 }, { "epoch": 4.160809762866822, "grad_norm": 2.090365409851074, "learning_rate": 1.7129389888822895e-05, "loss": 0.39, "step": 25489 }, { "epoch": 4.160973021509326, "grad_norm": 1.7338083982467651, "learning_rate": 1.712916736334813e-05, "loss": 0.3479, "step": 25490 }, { "epoch": 4.161136280151831, "grad_norm": 2.3502016067504883, "learning_rate": 1.71289448306943e-05, "loss": 0.4883, "step": 25491 }, { "epoch": 4.161299538794335, "grad_norm": 2.148895502090454, "learning_rate": 1.7128722290861618e-05, "loss": 0.4058, "step": 25492 }, { "epoch": 4.1614627974368394, "grad_norm": 2.2354626655578613, "learning_rate": 1.7128499743850315e-05, "loss": 0.4307, "step": 25493 }, { "epoch": 4.161626056079344, "grad_norm": 1.5892105102539062, "learning_rate": 1.7128277189660618e-05, "loss": 0.2964, "step": 25494 }, { "epoch": 4.161789314721848, "grad_norm": 2.020585775375366, "learning_rate": 1.7128054628292744e-05, "loss": 0.346, "step": 25495 }, { "epoch": 4.161952573364353, "grad_norm": 1.8840124607086182, "learning_rate": 1.7127832059746926e-05, "loss": 0.3515, "step": 25496 }, { "epoch": 4.162115832006857, "grad_norm": 2.248872995376587, "learning_rate": 1.7127609484023377e-05, "loss": 0.4696, "step": 25497 }, { "epoch": 4.162279090649362, "grad_norm": 2.0203840732574463, "learning_rate": 1.7127386901122333e-05, "loss": 0.3537, "step": 25498 }, { "epoch": 4.162442349291865, "grad_norm": 2.0319316387176514, "learning_rate": 1.7127164311044006e-05, "loss": 0.3456, "step": 25499 }, { "epoch": 4.16260560793437, "grad_norm": 1.8202115297317505, "learning_rate": 1.7126941713788633e-05, "loss": 0.3751, "step": 25500 }, { "epoch": 4.162768866576874, "grad_norm": 2.2546873092651367, "learning_rate": 1.7126719109356424e-05, "loss": 0.3203, "step": 25501 }, { "epoch": 4.1629321252193785, "grad_norm": 2.2763149738311768, "learning_rate": 1.7126496497747617e-05, "loss": 0.474, "step": 25502 }, { "epoch": 4.163095383861883, "grad_norm": 2.1424827575683594, "learning_rate": 1.712627387896243e-05, "loss": 0.3687, "step": 25503 }, { "epoch": 4.163258642504387, "grad_norm": 2.5817790031433105, "learning_rate": 1.7126051253001082e-05, "loss": 0.388, "step": 25504 }, { "epoch": 4.163421901146892, "grad_norm": 2.42244553565979, "learning_rate": 1.7125828619863804e-05, "loss": 0.4002, "step": 25505 }, { "epoch": 4.163585159789396, "grad_norm": 1.690605640411377, "learning_rate": 1.7125605979550823e-05, "loss": 0.3363, "step": 25506 }, { "epoch": 4.163748418431901, "grad_norm": 2.109422445297241, "learning_rate": 1.7125383332062354e-05, "loss": 0.473, "step": 25507 }, { "epoch": 4.163911677074405, "grad_norm": 2.1488044261932373, "learning_rate": 1.7125160677398625e-05, "loss": 0.3754, "step": 25508 }, { "epoch": 4.16407493571691, "grad_norm": 1.4553139209747314, "learning_rate": 1.7124938015559863e-05, "loss": 0.3187, "step": 25509 }, { "epoch": 4.164238194359414, "grad_norm": 1.9472662210464478, "learning_rate": 1.7124715346546295e-05, "loss": 0.3626, "step": 25510 }, { "epoch": 4.164401453001918, "grad_norm": 2.564880132675171, "learning_rate": 1.7124492670358134e-05, "loss": 0.4098, "step": 25511 }, { "epoch": 4.164564711644423, "grad_norm": 1.6167759895324707, "learning_rate": 1.7124269986995614e-05, "loss": 0.319, "step": 25512 }, { "epoch": 4.164727970286927, "grad_norm": 2.323418140411377, "learning_rate": 1.7124047296458956e-05, "loss": 0.4144, "step": 25513 }, { "epoch": 4.164891228929432, "grad_norm": 2.3526525497436523, "learning_rate": 1.7123824598748382e-05, "loss": 0.364, "step": 25514 }, { "epoch": 4.165054487571936, "grad_norm": 2.2588837146759033, "learning_rate": 1.7123601893864117e-05, "loss": 0.4112, "step": 25515 }, { "epoch": 4.165217746214441, "grad_norm": 2.3934550285339355, "learning_rate": 1.7123379181806395e-05, "loss": 0.4463, "step": 25516 }, { "epoch": 4.165381004856945, "grad_norm": 1.840232253074646, "learning_rate": 1.7123156462575425e-05, "loss": 0.3175, "step": 25517 }, { "epoch": 4.165544263499449, "grad_norm": 2.0280985832214355, "learning_rate": 1.7122933736171437e-05, "loss": 0.3625, "step": 25518 }, { "epoch": 4.165707522141953, "grad_norm": 2.828864336013794, "learning_rate": 1.712271100259466e-05, "loss": 0.397, "step": 25519 }, { "epoch": 4.1658707807844575, "grad_norm": 2.4171977043151855, "learning_rate": 1.7122488261845316e-05, "loss": 0.4435, "step": 25520 }, { "epoch": 4.166034039426962, "grad_norm": 1.956419587135315, "learning_rate": 1.712226551392363e-05, "loss": 0.3471, "step": 25521 }, { "epoch": 4.166197298069466, "grad_norm": 2.9425251483917236, "learning_rate": 1.712204275882982e-05, "loss": 0.449, "step": 25522 }, { "epoch": 4.166360556711971, "grad_norm": 1.8814737796783447, "learning_rate": 1.7121819996564113e-05, "loss": 0.318, "step": 25523 }, { "epoch": 4.166523815354475, "grad_norm": 1.950499415397644, "learning_rate": 1.712159722712674e-05, "loss": 0.3605, "step": 25524 }, { "epoch": 4.16668707399698, "grad_norm": 2.0930984020233154, "learning_rate": 1.712137445051792e-05, "loss": 0.3404, "step": 25525 }, { "epoch": 4.166850332639484, "grad_norm": 2.1028616428375244, "learning_rate": 1.7121151666737875e-05, "loss": 0.4112, "step": 25526 }, { "epoch": 4.167013591281989, "grad_norm": 2.149148941040039, "learning_rate": 1.7120928875786833e-05, "loss": 0.419, "step": 25527 }, { "epoch": 4.167176849924493, "grad_norm": 2.1349828243255615, "learning_rate": 1.712070607766502e-05, "loss": 0.4155, "step": 25528 }, { "epoch": 4.167340108566997, "grad_norm": 1.8916363716125488, "learning_rate": 1.7120483272372653e-05, "loss": 0.346, "step": 25529 }, { "epoch": 4.167503367209502, "grad_norm": 1.9483143091201782, "learning_rate": 1.712026045990997e-05, "loss": 0.3529, "step": 25530 }, { "epoch": 4.167666625852006, "grad_norm": 2.00018310546875, "learning_rate": 1.712003764027718e-05, "loss": 0.377, "step": 25531 }, { "epoch": 4.167829884494511, "grad_norm": 1.9009016752243042, "learning_rate": 1.711981481347451e-05, "loss": 0.3182, "step": 25532 }, { "epoch": 4.167993143137015, "grad_norm": 1.7948051691055298, "learning_rate": 1.71195919795022e-05, "loss": 0.3088, "step": 25533 }, { "epoch": 4.16815640177952, "grad_norm": 2.298161745071411, "learning_rate": 1.7119369138360453e-05, "loss": 0.371, "step": 25534 }, { "epoch": 4.168319660422023, "grad_norm": 2.2728075981140137, "learning_rate": 1.7119146290049505e-05, "loss": 0.4488, "step": 25535 }, { "epoch": 4.168482919064528, "grad_norm": 1.7924296855926514, "learning_rate": 1.711892343456958e-05, "loss": 0.3314, "step": 25536 }, { "epoch": 4.168646177707032, "grad_norm": 2.118797540664673, "learning_rate": 1.71187005719209e-05, "loss": 0.3976, "step": 25537 }, { "epoch": 4.1688094363495365, "grad_norm": 1.8555253744125366, "learning_rate": 1.711847770210369e-05, "loss": 0.3531, "step": 25538 }, { "epoch": 4.168972694992041, "grad_norm": 1.7309784889221191, "learning_rate": 1.7118254825118178e-05, "loss": 0.3127, "step": 25539 }, { "epoch": 4.169135953634545, "grad_norm": 1.8944131135940552, "learning_rate": 1.7118031940964584e-05, "loss": 0.4041, "step": 25540 }, { "epoch": 4.16929921227705, "grad_norm": 2.511810541152954, "learning_rate": 1.711780904964313e-05, "loss": 0.4417, "step": 25541 }, { "epoch": 4.169462470919554, "grad_norm": 1.9818696975708008, "learning_rate": 1.7117586151154048e-05, "loss": 0.3822, "step": 25542 }, { "epoch": 4.169625729562059, "grad_norm": 1.767699122428894, "learning_rate": 1.7117363245497557e-05, "loss": 0.3205, "step": 25543 }, { "epoch": 4.169788988204563, "grad_norm": 2.1306073665618896, "learning_rate": 1.7117140332673886e-05, "loss": 0.3702, "step": 25544 }, { "epoch": 4.169952246847068, "grad_norm": 2.1005361080169678, "learning_rate": 1.7116917412683256e-05, "loss": 0.2826, "step": 25545 }, { "epoch": 4.170115505489572, "grad_norm": 1.610560655593872, "learning_rate": 1.7116694485525887e-05, "loss": 0.3674, "step": 25546 }, { "epoch": 4.170278764132076, "grad_norm": 2.248108386993408, "learning_rate": 1.7116471551202013e-05, "loss": 0.3988, "step": 25547 }, { "epoch": 4.170442022774581, "grad_norm": 2.337987184524536, "learning_rate": 1.7116248609711854e-05, "loss": 0.3601, "step": 25548 }, { "epoch": 4.170605281417085, "grad_norm": 2.1203694343566895, "learning_rate": 1.7116025661055634e-05, "loss": 0.4413, "step": 25549 }, { "epoch": 4.17076854005959, "grad_norm": 1.9658702611923218, "learning_rate": 1.7115802705233576e-05, "loss": 0.3507, "step": 25550 }, { "epoch": 4.170931798702094, "grad_norm": 1.6229349374771118, "learning_rate": 1.711557974224591e-05, "loss": 0.3139, "step": 25551 }, { "epoch": 4.171095057344598, "grad_norm": 1.8419355154037476, "learning_rate": 1.7115356772092858e-05, "loss": 0.3634, "step": 25552 }, { "epoch": 4.171258315987102, "grad_norm": 1.8499208688735962, "learning_rate": 1.7115133794774638e-05, "loss": 0.3481, "step": 25553 }, { "epoch": 4.171421574629607, "grad_norm": 2.02693247795105, "learning_rate": 1.7114910810291482e-05, "loss": 0.3852, "step": 25554 }, { "epoch": 4.171584833272111, "grad_norm": 2.2694251537323, "learning_rate": 1.7114687818643617e-05, "loss": 0.4017, "step": 25555 }, { "epoch": 4.1717480919146155, "grad_norm": 1.9232734441757202, "learning_rate": 1.711446481983126e-05, "loss": 0.3453, "step": 25556 }, { "epoch": 4.17191135055712, "grad_norm": 2.1343982219696045, "learning_rate": 1.7114241813854638e-05, "loss": 0.3846, "step": 25557 }, { "epoch": 4.172074609199624, "grad_norm": 1.7873051166534424, "learning_rate": 1.7114018800713975e-05, "loss": 0.343, "step": 25558 }, { "epoch": 4.172237867842129, "grad_norm": 2.189865827560425, "learning_rate": 1.7113795780409498e-05, "loss": 0.3758, "step": 25559 }, { "epoch": 4.172401126484633, "grad_norm": 2.018261432647705, "learning_rate": 1.7113572752941436e-05, "loss": 0.3334, "step": 25560 }, { "epoch": 4.172564385127138, "grad_norm": 2.1814541816711426, "learning_rate": 1.7113349718310002e-05, "loss": 0.3766, "step": 25561 }, { "epoch": 4.172727643769642, "grad_norm": 2.124894618988037, "learning_rate": 1.7113126676515432e-05, "loss": 0.3665, "step": 25562 }, { "epoch": 4.1728909024121466, "grad_norm": 1.8839887380599976, "learning_rate": 1.711290362755794e-05, "loss": 0.2948, "step": 25563 }, { "epoch": 4.173054161054651, "grad_norm": 2.2965941429138184, "learning_rate": 1.7112680571437756e-05, "loss": 0.3834, "step": 25564 }, { "epoch": 4.173217419697155, "grad_norm": 2.2807486057281494, "learning_rate": 1.7112457508155107e-05, "loss": 0.3971, "step": 25565 }, { "epoch": 4.17338067833966, "grad_norm": 1.970916509628296, "learning_rate": 1.7112234437710213e-05, "loss": 0.4032, "step": 25566 }, { "epoch": 4.173543936982164, "grad_norm": 1.915055274963379, "learning_rate": 1.7112011360103303e-05, "loss": 0.3351, "step": 25567 }, { "epoch": 4.173707195624669, "grad_norm": 2.3545546531677246, "learning_rate": 1.71117882753346e-05, "loss": 0.4476, "step": 25568 }, { "epoch": 4.173870454267173, "grad_norm": 2.400235176086426, "learning_rate": 1.711156518340433e-05, "loss": 0.3978, "step": 25569 }, { "epoch": 4.174033712909677, "grad_norm": 2.106283187866211, "learning_rate": 1.711134208431271e-05, "loss": 0.3234, "step": 25570 }, { "epoch": 4.174196971552181, "grad_norm": 1.9396848678588867, "learning_rate": 1.7111118978059975e-05, "loss": 0.3549, "step": 25571 }, { "epoch": 4.174360230194686, "grad_norm": 5.104525566101074, "learning_rate": 1.7110895864646342e-05, "loss": 0.3689, "step": 25572 }, { "epoch": 4.17452348883719, "grad_norm": 1.8286994695663452, "learning_rate": 1.711067274407204e-05, "loss": 0.3376, "step": 25573 }, { "epoch": 4.1746867474796945, "grad_norm": 1.9581868648529053, "learning_rate": 1.711044961633729e-05, "loss": 0.3787, "step": 25574 }, { "epoch": 4.174850006122199, "grad_norm": 1.9934550523757935, "learning_rate": 1.7110226481442325e-05, "loss": 0.3197, "step": 25575 }, { "epoch": 4.175013264764703, "grad_norm": 2.0821070671081543, "learning_rate": 1.7110003339387358e-05, "loss": 0.4097, "step": 25576 }, { "epoch": 4.175176523407208, "grad_norm": 1.6680946350097656, "learning_rate": 1.7109780190172624e-05, "loss": 0.3429, "step": 25577 }, { "epoch": 4.175339782049712, "grad_norm": 1.8047115802764893, "learning_rate": 1.7109557033798338e-05, "loss": 0.3485, "step": 25578 }, { "epoch": 4.175503040692217, "grad_norm": 2.620866537094116, "learning_rate": 1.7109333870264736e-05, "loss": 0.456, "step": 25579 }, { "epoch": 4.175666299334721, "grad_norm": 2.096012830734253, "learning_rate": 1.7109110699572032e-05, "loss": 0.3999, "step": 25580 }, { "epoch": 4.1758295579772255, "grad_norm": 1.8714673519134521, "learning_rate": 1.7108887521720456e-05, "loss": 0.3169, "step": 25581 }, { "epoch": 4.17599281661973, "grad_norm": 2.4796302318573, "learning_rate": 1.7108664336710236e-05, "loss": 0.3779, "step": 25582 }, { "epoch": 4.176156075262234, "grad_norm": 2.1797683238983154, "learning_rate": 1.7108441144541587e-05, "loss": 0.3892, "step": 25583 }, { "epoch": 4.176319333904739, "grad_norm": 2.1566503047943115, "learning_rate": 1.710821794521474e-05, "loss": 0.459, "step": 25584 }, { "epoch": 4.176482592547243, "grad_norm": 1.5113762617111206, "learning_rate": 1.710799473872993e-05, "loss": 0.2979, "step": 25585 }, { "epoch": 4.176645851189748, "grad_norm": 2.508298873901367, "learning_rate": 1.710777152508736e-05, "loss": 0.3802, "step": 25586 }, { "epoch": 4.176809109832252, "grad_norm": 2.1749813556671143, "learning_rate": 1.710754830428727e-05, "loss": 0.3727, "step": 25587 }, { "epoch": 4.176972368474756, "grad_norm": 1.8591651916503906, "learning_rate": 1.710732507632988e-05, "loss": 0.3519, "step": 25588 }, { "epoch": 4.17713562711726, "grad_norm": 2.2913997173309326, "learning_rate": 1.7107101841215413e-05, "loss": 0.4214, "step": 25589 }, { "epoch": 4.177298885759765, "grad_norm": 1.6835144758224487, "learning_rate": 1.7106878598944102e-05, "loss": 0.3255, "step": 25590 }, { "epoch": 4.177462144402269, "grad_norm": 1.9851913452148438, "learning_rate": 1.7106655349516162e-05, "loss": 0.3942, "step": 25591 }, { "epoch": 4.1776254030447735, "grad_norm": 2.0171844959259033, "learning_rate": 1.710643209293182e-05, "loss": 0.3712, "step": 25592 }, { "epoch": 4.177788661687278, "grad_norm": 1.8030389547348022, "learning_rate": 1.7106208829191307e-05, "loss": 0.3538, "step": 25593 }, { "epoch": 4.177951920329782, "grad_norm": 1.9539517164230347, "learning_rate": 1.7105985558294843e-05, "loss": 0.3975, "step": 25594 }, { "epoch": 4.178115178972287, "grad_norm": 2.2388224601745605, "learning_rate": 1.7105762280242654e-05, "loss": 0.3858, "step": 25595 }, { "epoch": 4.178278437614791, "grad_norm": 2.088470697402954, "learning_rate": 1.710553899503496e-05, "loss": 0.4112, "step": 25596 }, { "epoch": 4.178441696257296, "grad_norm": 2.001708745956421, "learning_rate": 1.7105315702671995e-05, "loss": 0.3909, "step": 25597 }, { "epoch": 4.1786049548998, "grad_norm": 1.4923081398010254, "learning_rate": 1.710509240315398e-05, "loss": 0.2519, "step": 25598 }, { "epoch": 4.1787682135423045, "grad_norm": 2.15568470954895, "learning_rate": 1.7104869096481133e-05, "loss": 0.4107, "step": 25599 }, { "epoch": 4.178931472184809, "grad_norm": 1.771964192390442, "learning_rate": 1.710464578265369e-05, "loss": 0.3429, "step": 25600 }, { "epoch": 4.179094730827313, "grad_norm": 1.8516032695770264, "learning_rate": 1.7104422461671868e-05, "loss": 0.3754, "step": 25601 }, { "epoch": 4.179257989469818, "grad_norm": 1.6583765745162964, "learning_rate": 1.7104199133535895e-05, "loss": 0.3033, "step": 25602 }, { "epoch": 4.179421248112322, "grad_norm": 2.298504114151001, "learning_rate": 1.7103975798245997e-05, "loss": 0.3706, "step": 25603 }, { "epoch": 4.179584506754827, "grad_norm": 1.790515661239624, "learning_rate": 1.7103752455802394e-05, "loss": 0.368, "step": 25604 }, { "epoch": 4.17974776539733, "grad_norm": 2.405294895172119, "learning_rate": 1.7103529106205313e-05, "loss": 0.3773, "step": 25605 }, { "epoch": 4.179911024039835, "grad_norm": 1.9888763427734375, "learning_rate": 1.7103305749454985e-05, "loss": 0.3152, "step": 25606 }, { "epoch": 4.180074282682339, "grad_norm": 1.750461220741272, "learning_rate": 1.7103082385551627e-05, "loss": 0.3573, "step": 25607 }, { "epoch": 4.180237541324844, "grad_norm": 2.8523776531219482, "learning_rate": 1.7102859014495468e-05, "loss": 0.4223, "step": 25608 }, { "epoch": 4.180400799967348, "grad_norm": 2.16450572013855, "learning_rate": 1.7102635636286732e-05, "loss": 0.3746, "step": 25609 }, { "epoch": 4.1805640586098525, "grad_norm": 2.5021536350250244, "learning_rate": 1.710241225092564e-05, "loss": 0.4166, "step": 25610 }, { "epoch": 4.180727317252357, "grad_norm": 2.748575448989868, "learning_rate": 1.7102188858412426e-05, "loss": 0.3741, "step": 25611 }, { "epoch": 4.180890575894861, "grad_norm": 1.988253116607666, "learning_rate": 1.7101965458747306e-05, "loss": 0.4081, "step": 25612 }, { "epoch": 4.181053834537366, "grad_norm": 2.186087131500244, "learning_rate": 1.7101742051930513e-05, "loss": 0.4048, "step": 25613 }, { "epoch": 4.18121709317987, "grad_norm": 2.3266873359680176, "learning_rate": 1.7101518637962266e-05, "loss": 0.4363, "step": 25614 }, { "epoch": 4.181380351822375, "grad_norm": 2.317164659500122, "learning_rate": 1.7101295216842787e-05, "loss": 0.3837, "step": 25615 }, { "epoch": 4.181543610464879, "grad_norm": 2.0535964965820312, "learning_rate": 1.710107178857231e-05, "loss": 0.3774, "step": 25616 }, { "epoch": 4.1817068691073835, "grad_norm": 1.8595194816589355, "learning_rate": 1.7100848353151057e-05, "loss": 0.3887, "step": 25617 }, { "epoch": 4.181870127749888, "grad_norm": 2.228429079055786, "learning_rate": 1.710062491057925e-05, "loss": 0.3895, "step": 25618 }, { "epoch": 4.182033386392392, "grad_norm": 1.933612585067749, "learning_rate": 1.7100401460857114e-05, "loss": 0.3517, "step": 25619 }, { "epoch": 4.182196645034897, "grad_norm": 2.0053977966308594, "learning_rate": 1.7100178003984876e-05, "loss": 0.35, "step": 25620 }, { "epoch": 4.182359903677401, "grad_norm": 1.8542416095733643, "learning_rate": 1.7099954539962758e-05, "loss": 0.3429, "step": 25621 }, { "epoch": 4.182523162319906, "grad_norm": 2.2233192920684814, "learning_rate": 1.709973106879099e-05, "loss": 0.427, "step": 25622 }, { "epoch": 4.182686420962409, "grad_norm": 1.9218955039978027, "learning_rate": 1.70995075904698e-05, "loss": 0.3761, "step": 25623 }, { "epoch": 4.182849679604914, "grad_norm": 2.562157154083252, "learning_rate": 1.7099284104999404e-05, "loss": 0.4163, "step": 25624 }, { "epoch": 4.183012938247418, "grad_norm": 2.128577947616577, "learning_rate": 1.709906061238003e-05, "loss": 0.3466, "step": 25625 }, { "epoch": 4.183176196889923, "grad_norm": 2.0938875675201416, "learning_rate": 1.70988371126119e-05, "loss": 0.3669, "step": 25626 }, { "epoch": 4.183339455532427, "grad_norm": 2.060293674468994, "learning_rate": 1.709861360569525e-05, "loss": 0.4108, "step": 25627 }, { "epoch": 4.1835027141749315, "grad_norm": 2.1736714839935303, "learning_rate": 1.7098390091630294e-05, "loss": 0.4175, "step": 25628 }, { "epoch": 4.183665972817436, "grad_norm": 2.0806119441986084, "learning_rate": 1.7098166570417264e-05, "loss": 0.3701, "step": 25629 }, { "epoch": 4.18382923145994, "grad_norm": 2.4597795009613037, "learning_rate": 1.7097943042056377e-05, "loss": 0.3945, "step": 25630 }, { "epoch": 4.183992490102445, "grad_norm": 2.1955628395080566, "learning_rate": 1.7097719506547867e-05, "loss": 0.45, "step": 25631 }, { "epoch": 4.184155748744949, "grad_norm": 1.9222183227539062, "learning_rate": 1.7097495963891954e-05, "loss": 0.3336, "step": 25632 }, { "epoch": 4.184319007387454, "grad_norm": 1.7746680974960327, "learning_rate": 1.7097272414088865e-05, "loss": 0.32, "step": 25633 }, { "epoch": 4.184482266029958, "grad_norm": 1.9217358827590942, "learning_rate": 1.7097048857138827e-05, "loss": 0.4124, "step": 25634 }, { "epoch": 4.1846455246724625, "grad_norm": 1.8295433521270752, "learning_rate": 1.709682529304206e-05, "loss": 0.3914, "step": 25635 }, { "epoch": 4.184808783314967, "grad_norm": 1.9674489498138428, "learning_rate": 1.709660172179879e-05, "loss": 0.3647, "step": 25636 }, { "epoch": 4.184972041957471, "grad_norm": 2.043728828430176, "learning_rate": 1.7096378143409245e-05, "loss": 0.323, "step": 25637 }, { "epoch": 4.185135300599976, "grad_norm": 2.0240681171417236, "learning_rate": 1.709615455787365e-05, "loss": 0.3958, "step": 25638 }, { "epoch": 4.18529855924248, "grad_norm": 1.845848798751831, "learning_rate": 1.7095930965192233e-05, "loss": 0.332, "step": 25639 }, { "epoch": 4.185461817884985, "grad_norm": 1.869615912437439, "learning_rate": 1.709570736536521e-05, "loss": 0.3992, "step": 25640 }, { "epoch": 4.185625076527488, "grad_norm": 1.9437466859817505, "learning_rate": 1.7095483758392816e-05, "loss": 0.3668, "step": 25641 }, { "epoch": 4.185788335169993, "grad_norm": 1.930391550064087, "learning_rate": 1.7095260144275267e-05, "loss": 0.378, "step": 25642 }, { "epoch": 4.185951593812497, "grad_norm": 2.6754724979400635, "learning_rate": 1.7095036523012796e-05, "loss": 0.4861, "step": 25643 }, { "epoch": 4.186114852455002, "grad_norm": 2.2974979877471924, "learning_rate": 1.7094812894605625e-05, "loss": 0.4382, "step": 25644 }, { "epoch": 4.186278111097506, "grad_norm": 2.8615548610687256, "learning_rate": 1.7094589259053975e-05, "loss": 0.4851, "step": 25645 }, { "epoch": 4.1864413697400105, "grad_norm": 2.1919710636138916, "learning_rate": 1.709436561635808e-05, "loss": 0.4176, "step": 25646 }, { "epoch": 4.186604628382515, "grad_norm": 2.111375331878662, "learning_rate": 1.709414196651816e-05, "loss": 0.4275, "step": 25647 }, { "epoch": 4.186767887025019, "grad_norm": 2.0478665828704834, "learning_rate": 1.709391830953444e-05, "loss": 0.3846, "step": 25648 }, { "epoch": 4.186931145667524, "grad_norm": 1.7904512882232666, "learning_rate": 1.7093694645407148e-05, "loss": 0.3178, "step": 25649 }, { "epoch": 4.187094404310028, "grad_norm": 1.8514615297317505, "learning_rate": 1.7093470974136505e-05, "loss": 0.3601, "step": 25650 }, { "epoch": 4.187257662952533, "grad_norm": 1.911241888999939, "learning_rate": 1.709324729572274e-05, "loss": 0.3729, "step": 25651 }, { "epoch": 4.187420921595037, "grad_norm": 2.2337160110473633, "learning_rate": 1.709302361016608e-05, "loss": 0.3879, "step": 25652 }, { "epoch": 4.1875841802375415, "grad_norm": 2.177539348602295, "learning_rate": 1.709279991746674e-05, "loss": 0.3873, "step": 25653 }, { "epoch": 4.187747438880046, "grad_norm": 2.2643508911132812, "learning_rate": 1.7092576217624957e-05, "loss": 0.4006, "step": 25654 }, { "epoch": 4.18791069752255, "grad_norm": 2.2002203464508057, "learning_rate": 1.709235251064095e-05, "loss": 0.379, "step": 25655 }, { "epoch": 4.188073956165055, "grad_norm": 1.9178472757339478, "learning_rate": 1.7092128796514945e-05, "loss": 0.4021, "step": 25656 }, { "epoch": 4.188237214807559, "grad_norm": 2.1757333278656006, "learning_rate": 1.709190507524717e-05, "loss": 0.4263, "step": 25657 }, { "epoch": 4.188400473450063, "grad_norm": 2.17771577835083, "learning_rate": 1.709168134683785e-05, "loss": 0.3973, "step": 25658 }, { "epoch": 4.188563732092567, "grad_norm": 1.697224736213684, "learning_rate": 1.7091457611287205e-05, "loss": 0.3335, "step": 25659 }, { "epoch": 4.188726990735072, "grad_norm": 1.8360209465026855, "learning_rate": 1.7091233868595465e-05, "loss": 0.3041, "step": 25660 }, { "epoch": 4.188890249377576, "grad_norm": 1.6748135089874268, "learning_rate": 1.709101011876286e-05, "loss": 0.3153, "step": 25661 }, { "epoch": 4.189053508020081, "grad_norm": 1.8794196844100952, "learning_rate": 1.7090786361789602e-05, "loss": 0.3926, "step": 25662 }, { "epoch": 4.189216766662585, "grad_norm": 1.9131416082382202, "learning_rate": 1.709056259767593e-05, "loss": 0.3016, "step": 25663 }, { "epoch": 4.1893800253050895, "grad_norm": 1.9915518760681152, "learning_rate": 1.709033882642206e-05, "loss": 0.3182, "step": 25664 }, { "epoch": 4.189543283947594, "grad_norm": 2.842235565185547, "learning_rate": 1.709011504802822e-05, "loss": 0.4648, "step": 25665 }, { "epoch": 4.189706542590098, "grad_norm": 2.1514265537261963, "learning_rate": 1.708989126249464e-05, "loss": 0.4353, "step": 25666 }, { "epoch": 4.189869801232603, "grad_norm": 1.9770790338516235, "learning_rate": 1.7089667469821535e-05, "loss": 0.3525, "step": 25667 }, { "epoch": 4.190033059875107, "grad_norm": 2.6361873149871826, "learning_rate": 1.708944367000914e-05, "loss": 0.4108, "step": 25668 }, { "epoch": 4.190196318517612, "grad_norm": 1.8374794721603394, "learning_rate": 1.708921986305768e-05, "loss": 0.4268, "step": 25669 }, { "epoch": 4.190359577160116, "grad_norm": 2.1249823570251465, "learning_rate": 1.7088996048967375e-05, "loss": 0.3836, "step": 25670 }, { "epoch": 4.1905228358026205, "grad_norm": 2.223564386367798, "learning_rate": 1.708877222773845e-05, "loss": 0.4148, "step": 25671 }, { "epoch": 4.190686094445125, "grad_norm": 2.5835719108581543, "learning_rate": 1.7088548399371138e-05, "loss": 0.3961, "step": 25672 }, { "epoch": 4.190849353087629, "grad_norm": 2.8466708660125732, "learning_rate": 1.7088324563865658e-05, "loss": 0.356, "step": 25673 }, { "epoch": 4.191012611730134, "grad_norm": 1.6233305931091309, "learning_rate": 1.7088100721222235e-05, "loss": 0.3319, "step": 25674 }, { "epoch": 4.191175870372638, "grad_norm": 2.3448030948638916, "learning_rate": 1.7087876871441097e-05, "loss": 0.471, "step": 25675 }, { "epoch": 4.191339129015142, "grad_norm": 1.7877507209777832, "learning_rate": 1.7087653014522472e-05, "loss": 0.368, "step": 25676 }, { "epoch": 4.191502387657646, "grad_norm": 2.332414388656616, "learning_rate": 1.7087429150466578e-05, "loss": 0.4207, "step": 25677 }, { "epoch": 4.191665646300151, "grad_norm": 2.1518099308013916, "learning_rate": 1.7087205279273646e-05, "loss": 0.3782, "step": 25678 }, { "epoch": 4.191828904942655, "grad_norm": 2.138836622238159, "learning_rate": 1.7086981400943905e-05, "loss": 0.3607, "step": 25679 }, { "epoch": 4.19199216358516, "grad_norm": 1.8257930278778076, "learning_rate": 1.708675751547757e-05, "loss": 0.3333, "step": 25680 }, { "epoch": 4.192155422227664, "grad_norm": 1.7137181758880615, "learning_rate": 1.7086533622874873e-05, "loss": 0.3321, "step": 25681 }, { "epoch": 4.1923186808701685, "grad_norm": 1.8390636444091797, "learning_rate": 1.708630972313604e-05, "loss": 0.3516, "step": 25682 }, { "epoch": 4.192481939512673, "grad_norm": 1.9343422651290894, "learning_rate": 1.7086085816261295e-05, "loss": 0.347, "step": 25683 }, { "epoch": 4.192645198155177, "grad_norm": 2.290801763534546, "learning_rate": 1.7085861902250864e-05, "loss": 0.4165, "step": 25684 }, { "epoch": 4.192808456797682, "grad_norm": 2.147106885910034, "learning_rate": 1.708563798110497e-05, "loss": 0.3751, "step": 25685 }, { "epoch": 4.192971715440186, "grad_norm": 2.2474606037139893, "learning_rate": 1.708541405282384e-05, "loss": 0.4036, "step": 25686 }, { "epoch": 4.193134974082691, "grad_norm": 2.1214163303375244, "learning_rate": 1.70851901174077e-05, "loss": 0.3911, "step": 25687 }, { "epoch": 4.193298232725195, "grad_norm": 2.361937999725342, "learning_rate": 1.708496617485678e-05, "loss": 0.4131, "step": 25688 }, { "epoch": 4.1934614913676995, "grad_norm": 2.2648537158966064, "learning_rate": 1.7084742225171296e-05, "loss": 0.4226, "step": 25689 }, { "epoch": 4.193624750010204, "grad_norm": 2.2932424545288086, "learning_rate": 1.708451826835148e-05, "loss": 0.3948, "step": 25690 }, { "epoch": 4.193788008652708, "grad_norm": 2.066694974899292, "learning_rate": 1.7084294304397558e-05, "loss": 0.408, "step": 25691 }, { "epoch": 4.193951267295213, "grad_norm": 1.7955377101898193, "learning_rate": 1.7084070333309748e-05, "loss": 0.374, "step": 25692 }, { "epoch": 4.194114525937717, "grad_norm": 2.04184627532959, "learning_rate": 1.7083846355088287e-05, "loss": 0.3742, "step": 25693 }, { "epoch": 4.194277784580221, "grad_norm": 2.4184036254882812, "learning_rate": 1.708362236973339e-05, "loss": 0.4107, "step": 25694 }, { "epoch": 4.194441043222725, "grad_norm": 2.0751521587371826, "learning_rate": 1.708339837724529e-05, "loss": 0.362, "step": 25695 }, { "epoch": 4.19460430186523, "grad_norm": 2.2401413917541504, "learning_rate": 1.7083174377624206e-05, "loss": 0.3682, "step": 25696 }, { "epoch": 4.194767560507734, "grad_norm": 2.171555757522583, "learning_rate": 1.7082950370870373e-05, "loss": 0.4173, "step": 25697 }, { "epoch": 4.194930819150239, "grad_norm": 2.0228500366210938, "learning_rate": 1.7082726356984007e-05, "loss": 0.4948, "step": 25698 }, { "epoch": 4.195094077792743, "grad_norm": 2.053769826889038, "learning_rate": 1.708250233596534e-05, "loss": 0.4124, "step": 25699 }, { "epoch": 4.1952573364352475, "grad_norm": 1.9549363851547241, "learning_rate": 1.7082278307814593e-05, "loss": 0.3294, "step": 25700 }, { "epoch": 4.195420595077752, "grad_norm": 1.9849607944488525, "learning_rate": 1.7082054272531995e-05, "loss": 0.3794, "step": 25701 }, { "epoch": 4.195583853720256, "grad_norm": 2.4292948246002197, "learning_rate": 1.708183023011777e-05, "loss": 0.4674, "step": 25702 }, { "epoch": 4.195747112362761, "grad_norm": 1.8471693992614746, "learning_rate": 1.7081606180572143e-05, "loss": 0.361, "step": 25703 }, { "epoch": 4.195910371005265, "grad_norm": 2.0157582759857178, "learning_rate": 1.708138212389534e-05, "loss": 0.3529, "step": 25704 }, { "epoch": 4.19607362964777, "grad_norm": 2.0885322093963623, "learning_rate": 1.708115806008759e-05, "loss": 0.3793, "step": 25705 }, { "epoch": 4.196236888290274, "grad_norm": 1.845259666442871, "learning_rate": 1.7080933989149112e-05, "loss": 0.4034, "step": 25706 }, { "epoch": 4.1964001469327785, "grad_norm": 1.9294476509094238, "learning_rate": 1.708070991108014e-05, "loss": 0.3689, "step": 25707 }, { "epoch": 4.196563405575283, "grad_norm": 1.9270447492599487, "learning_rate": 1.7080485825880887e-05, "loss": 0.349, "step": 25708 }, { "epoch": 4.196726664217787, "grad_norm": 2.5288479328155518, "learning_rate": 1.7080261733551593e-05, "loss": 0.4442, "step": 25709 }, { "epoch": 4.196889922860292, "grad_norm": 2.646789789199829, "learning_rate": 1.7080037634092476e-05, "loss": 0.426, "step": 25710 }, { "epoch": 4.197053181502795, "grad_norm": 2.2363014221191406, "learning_rate": 1.7079813527503765e-05, "loss": 0.4075, "step": 25711 }, { "epoch": 4.1972164401453, "grad_norm": 2.015103816986084, "learning_rate": 1.707958941378568e-05, "loss": 0.3359, "step": 25712 }, { "epoch": 4.197379698787804, "grad_norm": 2.2365827560424805, "learning_rate": 1.707936529293845e-05, "loss": 0.4637, "step": 25713 }, { "epoch": 4.197542957430309, "grad_norm": 2.037757158279419, "learning_rate": 1.7079141164962302e-05, "loss": 0.4079, "step": 25714 }, { "epoch": 4.197706216072813, "grad_norm": 2.4160826206207275, "learning_rate": 1.7078917029857464e-05, "loss": 0.7387, "step": 25715 }, { "epoch": 4.197869474715318, "grad_norm": 2.0933890342712402, "learning_rate": 1.7078692887624156e-05, "loss": 0.3787, "step": 25716 }, { "epoch": 4.198032733357822, "grad_norm": 2.9901037216186523, "learning_rate": 1.7078468738262602e-05, "loss": 0.4769, "step": 25717 }, { "epoch": 4.1981959920003264, "grad_norm": 1.7765393257141113, "learning_rate": 1.707824458177304e-05, "loss": 0.3729, "step": 25718 }, { "epoch": 4.198359250642831, "grad_norm": 2.494781017303467, "learning_rate": 1.707802041815568e-05, "loss": 0.3951, "step": 25719 }, { "epoch": 4.198522509285335, "grad_norm": 1.5345019102096558, "learning_rate": 1.707779624741076e-05, "loss": 0.307, "step": 25720 }, { "epoch": 4.19868576792784, "grad_norm": 2.2916676998138428, "learning_rate": 1.7077572069538502e-05, "loss": 0.3656, "step": 25721 }, { "epoch": 4.198849026570344, "grad_norm": 2.0441908836364746, "learning_rate": 1.7077347884539128e-05, "loss": 0.3837, "step": 25722 }, { "epoch": 4.199012285212849, "grad_norm": 1.8957141637802124, "learning_rate": 1.7077123692412865e-05, "loss": 0.3814, "step": 25723 }, { "epoch": 4.199175543855353, "grad_norm": 2.292367696762085, "learning_rate": 1.7076899493159946e-05, "loss": 0.4129, "step": 25724 }, { "epoch": 4.1993388024978575, "grad_norm": 2.439974308013916, "learning_rate": 1.7076675286780586e-05, "loss": 0.3846, "step": 25725 }, { "epoch": 4.199502061140362, "grad_norm": 2.2406158447265625, "learning_rate": 1.707645107327502e-05, "loss": 0.4043, "step": 25726 }, { "epoch": 4.199665319782866, "grad_norm": 2.276658773422241, "learning_rate": 1.7076226852643465e-05, "loss": 0.4396, "step": 25727 }, { "epoch": 4.19982857842537, "grad_norm": 2.3228392601013184, "learning_rate": 1.7076002624886156e-05, "loss": 0.3565, "step": 25728 }, { "epoch": 4.199991837067874, "grad_norm": 1.7289923429489136, "learning_rate": 1.707577839000331e-05, "loss": 0.3509, "step": 25729 }, { "epoch": 4.200155095710379, "grad_norm": 2.1967718601226807, "learning_rate": 1.707555414799516e-05, "loss": 0.4592, "step": 25730 }, { "epoch": 4.200318354352883, "grad_norm": 1.6336698532104492, "learning_rate": 1.7075329898861926e-05, "loss": 0.3133, "step": 25731 }, { "epoch": 4.200481612995388, "grad_norm": 2.3337442874908447, "learning_rate": 1.7075105642603838e-05, "loss": 0.3472, "step": 25732 }, { "epoch": 4.200644871637892, "grad_norm": 2.4416825771331787, "learning_rate": 1.7074881379221122e-05, "loss": 0.429, "step": 25733 }, { "epoch": 4.200808130280397, "grad_norm": 2.419090986251831, "learning_rate": 1.7074657108714002e-05, "loss": 0.4035, "step": 25734 }, { "epoch": 4.200971388922901, "grad_norm": 2.4942970275878906, "learning_rate": 1.7074432831082705e-05, "loss": 0.5705, "step": 25735 }, { "epoch": 4.2011346475654054, "grad_norm": 1.4659578800201416, "learning_rate": 1.707420854632745e-05, "loss": 0.2576, "step": 25736 }, { "epoch": 4.20129790620791, "grad_norm": 2.0946896076202393, "learning_rate": 1.7073984254448476e-05, "loss": 0.3659, "step": 25737 }, { "epoch": 4.201461164850414, "grad_norm": 2.4563405513763428, "learning_rate": 1.7073759955445995e-05, "loss": 0.3562, "step": 25738 }, { "epoch": 4.201624423492919, "grad_norm": 2.05206036567688, "learning_rate": 1.707353564932025e-05, "loss": 0.3834, "step": 25739 }, { "epoch": 4.201787682135423, "grad_norm": 1.7416874170303345, "learning_rate": 1.7073311336071447e-05, "loss": 0.3461, "step": 25740 }, { "epoch": 4.201950940777928, "grad_norm": 2.113990306854248, "learning_rate": 1.7073087015699825e-05, "loss": 0.3435, "step": 25741 }, { "epoch": 4.202114199420432, "grad_norm": 1.7665220499038696, "learning_rate": 1.7072862688205606e-05, "loss": 0.3455, "step": 25742 }, { "epoch": 4.2022774580629365, "grad_norm": 1.9303206205368042, "learning_rate": 1.7072638353589015e-05, "loss": 0.3694, "step": 25743 }, { "epoch": 4.202440716705441, "grad_norm": 2.0808422565460205, "learning_rate": 1.707241401185028e-05, "loss": 0.4661, "step": 25744 }, { "epoch": 4.202603975347945, "grad_norm": 2.5574302673339844, "learning_rate": 1.7072189662989624e-05, "loss": 0.3446, "step": 25745 }, { "epoch": 4.20276723399045, "grad_norm": 1.713883876800537, "learning_rate": 1.7071965307007278e-05, "loss": 0.2917, "step": 25746 }, { "epoch": 4.202930492632953, "grad_norm": 1.6907775402069092, "learning_rate": 1.7071740943903464e-05, "loss": 0.3137, "step": 25747 }, { "epoch": 4.203093751275458, "grad_norm": 2.621656656265259, "learning_rate": 1.7071516573678408e-05, "loss": 0.443, "step": 25748 }, { "epoch": 4.203257009917962, "grad_norm": 1.5263688564300537, "learning_rate": 1.7071292196332335e-05, "loss": 0.2658, "step": 25749 }, { "epoch": 4.203420268560467, "grad_norm": 2.1166396141052246, "learning_rate": 1.7071067811865477e-05, "loss": 0.3495, "step": 25750 }, { "epoch": 4.203583527202971, "grad_norm": 1.935791254043579, "learning_rate": 1.7070843420278053e-05, "loss": 0.3615, "step": 25751 }, { "epoch": 4.203746785845476, "grad_norm": 2.1556005477905273, "learning_rate": 1.707061902157029e-05, "loss": 0.4359, "step": 25752 }, { "epoch": 4.20391004448798, "grad_norm": 2.156768321990967, "learning_rate": 1.7070394615742417e-05, "loss": 0.3864, "step": 25753 }, { "epoch": 4.204073303130484, "grad_norm": 2.0377142429351807, "learning_rate": 1.707017020279466e-05, "loss": 0.3728, "step": 25754 }, { "epoch": 4.204236561772989, "grad_norm": 2.423687696456909, "learning_rate": 1.7069945782727243e-05, "loss": 0.4253, "step": 25755 }, { "epoch": 4.204399820415493, "grad_norm": 2.369565963745117, "learning_rate": 1.7069721355540393e-05, "loss": 0.3495, "step": 25756 }, { "epoch": 4.204563079057998, "grad_norm": 2.0736231803894043, "learning_rate": 1.7069496921234333e-05, "loss": 0.3626, "step": 25757 }, { "epoch": 4.204726337700502, "grad_norm": 2.1645514965057373, "learning_rate": 1.7069272479809293e-05, "loss": 0.3913, "step": 25758 }, { "epoch": 4.204889596343007, "grad_norm": 1.8016053438186646, "learning_rate": 1.7069048031265497e-05, "loss": 0.3673, "step": 25759 }, { "epoch": 4.205052854985511, "grad_norm": 2.033883810043335, "learning_rate": 1.7068823575603172e-05, "loss": 0.3779, "step": 25760 }, { "epoch": 4.2052161136280155, "grad_norm": 1.883431315422058, "learning_rate": 1.7068599112822544e-05, "loss": 0.3733, "step": 25761 }, { "epoch": 4.20537937227052, "grad_norm": 2.358595609664917, "learning_rate": 1.7068374642923838e-05, "loss": 0.3987, "step": 25762 }, { "epoch": 4.205542630913024, "grad_norm": 1.9765785932540894, "learning_rate": 1.7068150165907276e-05, "loss": 0.3886, "step": 25763 }, { "epoch": 4.205705889555528, "grad_norm": 2.1090121269226074, "learning_rate": 1.7067925681773097e-05, "loss": 0.3552, "step": 25764 }, { "epoch": 4.205869148198032, "grad_norm": 2.174339532852173, "learning_rate": 1.7067701190521513e-05, "loss": 0.4207, "step": 25765 }, { "epoch": 4.206032406840537, "grad_norm": 2.238028049468994, "learning_rate": 1.706747669215276e-05, "loss": 0.4, "step": 25766 }, { "epoch": 4.206195665483041, "grad_norm": 2.2077512741088867, "learning_rate": 1.7067252186667058e-05, "loss": 0.4141, "step": 25767 }, { "epoch": 4.206358924125546, "grad_norm": 2.3951029777526855, "learning_rate": 1.7067027674064633e-05, "loss": 0.3558, "step": 25768 }, { "epoch": 4.20652218276805, "grad_norm": 1.9439316987991333, "learning_rate": 1.7066803154345716e-05, "loss": 0.3711, "step": 25769 }, { "epoch": 4.206685441410555, "grad_norm": 2.510401964187622, "learning_rate": 1.7066578627510525e-05, "loss": 0.2885, "step": 25770 }, { "epoch": 4.206848700053059, "grad_norm": 1.7123537063598633, "learning_rate": 1.7066354093559296e-05, "loss": 0.3425, "step": 25771 }, { "epoch": 4.207011958695563, "grad_norm": 1.9126030206680298, "learning_rate": 1.706612955249225e-05, "loss": 0.3777, "step": 25772 }, { "epoch": 4.207175217338068, "grad_norm": 2.0455477237701416, "learning_rate": 1.706590500430961e-05, "loss": 0.3817, "step": 25773 }, { "epoch": 4.207338475980572, "grad_norm": 1.8873136043548584, "learning_rate": 1.7065680449011607e-05, "loss": 0.3612, "step": 25774 }, { "epoch": 4.207501734623077, "grad_norm": 2.4079513549804688, "learning_rate": 1.706545588659847e-05, "loss": 0.4561, "step": 25775 }, { "epoch": 4.207664993265581, "grad_norm": 2.1078600883483887, "learning_rate": 1.7065231317070418e-05, "loss": 0.3964, "step": 25776 }, { "epoch": 4.207828251908086, "grad_norm": 1.987045407295227, "learning_rate": 1.7065006740427677e-05, "loss": 0.3672, "step": 25777 }, { "epoch": 4.20799151055059, "grad_norm": 2.0465853214263916, "learning_rate": 1.7064782156670477e-05, "loss": 0.4082, "step": 25778 }, { "epoch": 4.2081547691930945, "grad_norm": 2.033675193786621, "learning_rate": 1.7064557565799047e-05, "loss": 0.3814, "step": 25779 }, { "epoch": 4.208318027835599, "grad_norm": 1.8954352140426636, "learning_rate": 1.7064332967813607e-05, "loss": 0.3412, "step": 25780 }, { "epoch": 4.2084812864781025, "grad_norm": 1.9859014749526978, "learning_rate": 1.7064108362714385e-05, "loss": 0.4025, "step": 25781 }, { "epoch": 4.208644545120607, "grad_norm": 1.8542698621749878, "learning_rate": 1.7063883750501608e-05, "loss": 0.3933, "step": 25782 }, { "epoch": 4.208807803763111, "grad_norm": 2.000515937805176, "learning_rate": 1.7063659131175502e-05, "loss": 0.2929, "step": 25783 }, { "epoch": 4.208971062405616, "grad_norm": 1.867226481437683, "learning_rate": 1.7063434504736294e-05, "loss": 0.3635, "step": 25784 }, { "epoch": 4.20913432104812, "grad_norm": 1.712592363357544, "learning_rate": 1.706320987118421e-05, "loss": 0.296, "step": 25785 }, { "epoch": 4.209297579690625, "grad_norm": 1.7729519605636597, "learning_rate": 1.7062985230519473e-05, "loss": 0.3738, "step": 25786 }, { "epoch": 4.209460838333129, "grad_norm": 2.339115858078003, "learning_rate": 1.7062760582742312e-05, "loss": 0.4799, "step": 25787 }, { "epoch": 4.209624096975634, "grad_norm": 1.875240445137024, "learning_rate": 1.7062535927852956e-05, "loss": 0.3533, "step": 25788 }, { "epoch": 4.209787355618138, "grad_norm": 2.624218225479126, "learning_rate": 1.7062311265851625e-05, "loss": 0.4377, "step": 25789 }, { "epoch": 4.209950614260642, "grad_norm": 1.7559281587600708, "learning_rate": 1.706208659673855e-05, "loss": 0.343, "step": 25790 }, { "epoch": 4.210113872903147, "grad_norm": 2.1506969928741455, "learning_rate": 1.7061861920513957e-05, "loss": 0.4044, "step": 25791 }, { "epoch": 4.210277131545651, "grad_norm": 2.057974100112915, "learning_rate": 1.706163723717807e-05, "loss": 0.3625, "step": 25792 }, { "epoch": 4.210440390188156, "grad_norm": 1.987761378288269, "learning_rate": 1.7061412546731112e-05, "loss": 0.3783, "step": 25793 }, { "epoch": 4.21060364883066, "grad_norm": 2.795103073120117, "learning_rate": 1.7061187849173318e-05, "loss": 0.7962, "step": 25794 }, { "epoch": 4.210766907473165, "grad_norm": 2.419524908065796, "learning_rate": 1.706096314450491e-05, "loss": 0.3666, "step": 25795 }, { "epoch": 4.210930166115669, "grad_norm": 2.1075329780578613, "learning_rate": 1.706073843272611e-05, "loss": 0.4151, "step": 25796 }, { "epoch": 4.2110934247581735, "grad_norm": 2.2206811904907227, "learning_rate": 1.706051371383715e-05, "loss": 0.4001, "step": 25797 }, { "epoch": 4.211256683400678, "grad_norm": 2.6419994831085205, "learning_rate": 1.7060288987838257e-05, "loss": 0.4142, "step": 25798 }, { "epoch": 4.2114199420431815, "grad_norm": 1.9460327625274658, "learning_rate": 1.7060064254729652e-05, "loss": 0.3589, "step": 25799 }, { "epoch": 4.211583200685686, "grad_norm": 1.824395775794983, "learning_rate": 1.7059839514511565e-05, "loss": 0.3258, "step": 25800 }, { "epoch": 4.21174645932819, "grad_norm": 2.0422298908233643, "learning_rate": 1.7059614767184222e-05, "loss": 0.3728, "step": 25801 }, { "epoch": 4.211909717970695, "grad_norm": 2.3193211555480957, "learning_rate": 1.705939001274785e-05, "loss": 0.3935, "step": 25802 }, { "epoch": 4.212072976613199, "grad_norm": 1.973178505897522, "learning_rate": 1.7059165251202672e-05, "loss": 0.364, "step": 25803 }, { "epoch": 4.212236235255704, "grad_norm": 2.01039457321167, "learning_rate": 1.7058940482548917e-05, "loss": 0.3741, "step": 25804 }, { "epoch": 4.212399493898208, "grad_norm": 2.2458503246307373, "learning_rate": 1.7058715706786813e-05, "loss": 0.4341, "step": 25805 }, { "epoch": 4.2125627525407126, "grad_norm": 2.299427032470703, "learning_rate": 1.705849092391658e-05, "loss": 0.3731, "step": 25806 }, { "epoch": 4.212726011183217, "grad_norm": 1.882782220840454, "learning_rate": 1.7058266133938453e-05, "loss": 0.3446, "step": 25807 }, { "epoch": 4.212889269825721, "grad_norm": 1.9144484996795654, "learning_rate": 1.7058041336852647e-05, "loss": 0.4051, "step": 25808 }, { "epoch": 4.213052528468226, "grad_norm": 2.289008855819702, "learning_rate": 1.70578165326594e-05, "loss": 0.4131, "step": 25809 }, { "epoch": 4.21321578711073, "grad_norm": 2.497345209121704, "learning_rate": 1.7057591721358936e-05, "loss": 0.3925, "step": 25810 }, { "epoch": 4.213379045753235, "grad_norm": 1.8086291551589966, "learning_rate": 1.7057366902951478e-05, "loss": 0.3397, "step": 25811 }, { "epoch": 4.213542304395739, "grad_norm": 1.8848567008972168, "learning_rate": 1.705714207743725e-05, "loss": 0.3151, "step": 25812 }, { "epoch": 4.213705563038244, "grad_norm": 2.314988136291504, "learning_rate": 1.7056917244816485e-05, "loss": 0.4335, "step": 25813 }, { "epoch": 4.213868821680748, "grad_norm": 2.02938175201416, "learning_rate": 1.7056692405089404e-05, "loss": 0.3881, "step": 25814 }, { "epoch": 4.2140320803232525, "grad_norm": 1.8312557935714722, "learning_rate": 1.705646755825624e-05, "loss": 0.3834, "step": 25815 }, { "epoch": 4.214195338965757, "grad_norm": 2.1498844623565674, "learning_rate": 1.705624270431721e-05, "loss": 0.4303, "step": 25816 }, { "epoch": 4.2143585976082605, "grad_norm": 2.291478157043457, "learning_rate": 1.7056017843272545e-05, "loss": 0.4802, "step": 25817 }, { "epoch": 4.214521856250765, "grad_norm": 1.612616777420044, "learning_rate": 1.7055792975122477e-05, "loss": 0.3513, "step": 25818 }, { "epoch": 4.214685114893269, "grad_norm": 1.9582970142364502, "learning_rate": 1.705556809986722e-05, "loss": 0.3078, "step": 25819 }, { "epoch": 4.214848373535774, "grad_norm": 1.9850577116012573, "learning_rate": 1.7055343217507016e-05, "loss": 0.3752, "step": 25820 }, { "epoch": 4.215011632178278, "grad_norm": 2.239506721496582, "learning_rate": 1.7055118328042075e-05, "loss": 0.4457, "step": 25821 }, { "epoch": 4.215174890820783, "grad_norm": 1.7539126873016357, "learning_rate": 1.7054893431472636e-05, "loss": 0.3133, "step": 25822 }, { "epoch": 4.215338149463287, "grad_norm": 2.2065885066986084, "learning_rate": 1.7054668527798922e-05, "loss": 0.3482, "step": 25823 }, { "epoch": 4.2155014081057915, "grad_norm": 2.2359979152679443, "learning_rate": 1.705444361702116e-05, "loss": 0.4521, "step": 25824 }, { "epoch": 4.215664666748296, "grad_norm": 1.3995224237442017, "learning_rate": 1.7054218699139574e-05, "loss": 0.2999, "step": 25825 }, { "epoch": 4.2158279253908, "grad_norm": 1.9707311391830444, "learning_rate": 1.705399377415439e-05, "loss": 0.3754, "step": 25826 }, { "epoch": 4.215991184033305, "grad_norm": 2.0825724601745605, "learning_rate": 1.7053768842065833e-05, "loss": 0.3721, "step": 25827 }, { "epoch": 4.216154442675809, "grad_norm": 2.0394365787506104, "learning_rate": 1.705354390287414e-05, "loss": 0.3494, "step": 25828 }, { "epoch": 4.216317701318314, "grad_norm": 1.812547206878662, "learning_rate": 1.7053318956579526e-05, "loss": 0.3532, "step": 25829 }, { "epoch": 4.216480959960818, "grad_norm": 3.083277940750122, "learning_rate": 1.705309400318222e-05, "loss": 0.4717, "step": 25830 }, { "epoch": 4.216644218603323, "grad_norm": 2.226475477218628, "learning_rate": 1.7052869042682454e-05, "loss": 0.4366, "step": 25831 }, { "epoch": 4.216807477245827, "grad_norm": 1.866997241973877, "learning_rate": 1.705264407508045e-05, "loss": 0.3197, "step": 25832 }, { "epoch": 4.2169707358883315, "grad_norm": 2.1481409072875977, "learning_rate": 1.7052419100376435e-05, "loss": 0.37, "step": 25833 }, { "epoch": 4.217133994530835, "grad_norm": 2.139336347579956, "learning_rate": 1.7052194118570633e-05, "loss": 0.3913, "step": 25834 }, { "epoch": 4.2172972531733395, "grad_norm": 2.702925682067871, "learning_rate": 1.7051969129663276e-05, "loss": 0.3984, "step": 25835 }, { "epoch": 4.217460511815844, "grad_norm": 2.128337860107422, "learning_rate": 1.7051744133654588e-05, "loss": 0.3927, "step": 25836 }, { "epoch": 4.217623770458348, "grad_norm": 2.705125570297241, "learning_rate": 1.7051519130544795e-05, "loss": 0.4462, "step": 25837 }, { "epoch": 4.217787029100853, "grad_norm": 1.968066692352295, "learning_rate": 1.7051294120334126e-05, "loss": 0.3078, "step": 25838 }, { "epoch": 4.217950287743357, "grad_norm": 2.087172746658325, "learning_rate": 1.7051069103022803e-05, "loss": 0.3695, "step": 25839 }, { "epoch": 4.218113546385862, "grad_norm": 2.07175350189209, "learning_rate": 1.7050844078611058e-05, "loss": 0.3628, "step": 25840 }, { "epoch": 4.218276805028366, "grad_norm": 2.4337267875671387, "learning_rate": 1.7050619047099112e-05, "loss": 0.4015, "step": 25841 }, { "epoch": 4.2184400636708705, "grad_norm": 1.8985376358032227, "learning_rate": 1.7050394008487194e-05, "loss": 0.3711, "step": 25842 }, { "epoch": 4.218603322313375, "grad_norm": 2.1957695484161377, "learning_rate": 1.7050168962775535e-05, "loss": 0.3757, "step": 25843 }, { "epoch": 4.218766580955879, "grad_norm": 1.969650387763977, "learning_rate": 1.7049943909964356e-05, "loss": 0.3752, "step": 25844 }, { "epoch": 4.218929839598384, "grad_norm": 2.113535165786743, "learning_rate": 1.7049718850053883e-05, "loss": 0.3572, "step": 25845 }, { "epoch": 4.219093098240888, "grad_norm": 1.7617026567459106, "learning_rate": 1.704949378304435e-05, "loss": 0.3685, "step": 25846 }, { "epoch": 4.219256356883393, "grad_norm": 2.4367146492004395, "learning_rate": 1.7049268708935974e-05, "loss": 0.3454, "step": 25847 }, { "epoch": 4.219419615525897, "grad_norm": 2.3006253242492676, "learning_rate": 1.704904362772899e-05, "loss": 0.4205, "step": 25848 }, { "epoch": 4.219582874168402, "grad_norm": 2.3556225299835205, "learning_rate": 1.7048818539423616e-05, "loss": 0.3913, "step": 25849 }, { "epoch": 4.219746132810906, "grad_norm": 2.2359635829925537, "learning_rate": 1.7048593444020084e-05, "loss": 0.3905, "step": 25850 }, { "epoch": 4.2199093914534105, "grad_norm": 1.8218780755996704, "learning_rate": 1.7048368341518623e-05, "loss": 0.4011, "step": 25851 }, { "epoch": 4.220072650095914, "grad_norm": 1.9211618900299072, "learning_rate": 1.704814323191946e-05, "loss": 0.3794, "step": 25852 }, { "epoch": 4.2202359087384185, "grad_norm": 2.0622973442077637, "learning_rate": 1.7047918115222812e-05, "loss": 0.3632, "step": 25853 }, { "epoch": 4.220399167380923, "grad_norm": 2.034705638885498, "learning_rate": 1.7047692991428918e-05, "loss": 0.3573, "step": 25854 }, { "epoch": 4.220562426023427, "grad_norm": 1.966442584991455, "learning_rate": 1.7047467860537995e-05, "loss": 0.3718, "step": 25855 }, { "epoch": 4.220725684665932, "grad_norm": 2.3197784423828125, "learning_rate": 1.7047242722550278e-05, "loss": 0.413, "step": 25856 }, { "epoch": 4.220888943308436, "grad_norm": 1.8968055248260498, "learning_rate": 1.7047017577465987e-05, "loss": 0.3575, "step": 25857 }, { "epoch": 4.221052201950941, "grad_norm": 2.4810190200805664, "learning_rate": 1.704679242528535e-05, "loss": 0.4491, "step": 25858 }, { "epoch": 4.221215460593445, "grad_norm": 1.940148949623108, "learning_rate": 1.7046567266008597e-05, "loss": 0.4184, "step": 25859 }, { "epoch": 4.2213787192359495, "grad_norm": 1.9000332355499268, "learning_rate": 1.7046342099635948e-05, "loss": 0.3727, "step": 25860 }, { "epoch": 4.221541977878454, "grad_norm": 2.1594398021698, "learning_rate": 1.7046116926167637e-05, "loss": 0.4048, "step": 25861 }, { "epoch": 4.221705236520958, "grad_norm": 2.2182846069335938, "learning_rate": 1.704589174560389e-05, "loss": 0.4066, "step": 25862 }, { "epoch": 4.221868495163463, "grad_norm": 1.9405770301818848, "learning_rate": 1.7045666557944928e-05, "loss": 0.3847, "step": 25863 }, { "epoch": 4.222031753805967, "grad_norm": 2.1043238639831543, "learning_rate": 1.7045441363190987e-05, "loss": 0.3488, "step": 25864 }, { "epoch": 4.222195012448472, "grad_norm": 1.9259015321731567, "learning_rate": 1.7045216161342285e-05, "loss": 0.4442, "step": 25865 }, { "epoch": 4.222358271090976, "grad_norm": 2.4043662548065186, "learning_rate": 1.7044990952399055e-05, "loss": 0.4175, "step": 25866 }, { "epoch": 4.222521529733481, "grad_norm": 1.8639891147613525, "learning_rate": 1.7044765736361517e-05, "loss": 0.3774, "step": 25867 }, { "epoch": 4.222684788375985, "grad_norm": 2.094273090362549, "learning_rate": 1.7044540513229906e-05, "loss": 0.3913, "step": 25868 }, { "epoch": 4.2228480470184895, "grad_norm": 1.9828604459762573, "learning_rate": 1.704431528300444e-05, "loss": 0.4233, "step": 25869 }, { "epoch": 4.223011305660993, "grad_norm": 2.0310416221618652, "learning_rate": 1.7044090045685353e-05, "loss": 0.3506, "step": 25870 }, { "epoch": 4.2231745643034975, "grad_norm": 2.16868257522583, "learning_rate": 1.704386480127287e-05, "loss": 0.3892, "step": 25871 }, { "epoch": 4.223337822946002, "grad_norm": 2.3086423873901367, "learning_rate": 1.7043639549767216e-05, "loss": 0.3709, "step": 25872 }, { "epoch": 4.223501081588506, "grad_norm": 1.9113171100616455, "learning_rate": 1.7043414291168618e-05, "loss": 0.3655, "step": 25873 }, { "epoch": 4.223664340231011, "grad_norm": 1.8566911220550537, "learning_rate": 1.7043189025477307e-05, "loss": 0.3426, "step": 25874 }, { "epoch": 4.223827598873515, "grad_norm": 2.5838074684143066, "learning_rate": 1.7042963752693502e-05, "loss": 0.4417, "step": 25875 }, { "epoch": 4.22399085751602, "grad_norm": 2.3329684734344482, "learning_rate": 1.7042738472817436e-05, "loss": 0.3932, "step": 25876 }, { "epoch": 4.224154116158524, "grad_norm": 2.280210256576538, "learning_rate": 1.7042513185849336e-05, "loss": 0.393, "step": 25877 }, { "epoch": 4.2243173748010285, "grad_norm": 1.7964158058166504, "learning_rate": 1.7042287891789425e-05, "loss": 0.3018, "step": 25878 }, { "epoch": 4.224480633443533, "grad_norm": 1.6976768970489502, "learning_rate": 1.7042062590637934e-05, "loss": 0.3182, "step": 25879 }, { "epoch": 4.224643892086037, "grad_norm": 2.439384937286377, "learning_rate": 1.7041837282395084e-05, "loss": 0.3906, "step": 25880 }, { "epoch": 4.224807150728542, "grad_norm": 1.8593168258666992, "learning_rate": 1.704161196706111e-05, "loss": 0.3291, "step": 25881 }, { "epoch": 4.224970409371046, "grad_norm": 1.9994983673095703, "learning_rate": 1.704138664463623e-05, "loss": 0.3428, "step": 25882 }, { "epoch": 4.225133668013551, "grad_norm": 2.3464317321777344, "learning_rate": 1.704116131512068e-05, "loss": 0.3507, "step": 25883 }, { "epoch": 4.225296926656055, "grad_norm": 2.4963009357452393, "learning_rate": 1.704093597851468e-05, "loss": 0.3866, "step": 25884 }, { "epoch": 4.22546018529856, "grad_norm": 2.015688896179199, "learning_rate": 1.704071063481846e-05, "loss": 0.3439, "step": 25885 }, { "epoch": 4.225623443941064, "grad_norm": 2.4549355506896973, "learning_rate": 1.7040485284032248e-05, "loss": 0.3594, "step": 25886 }, { "epoch": 4.225786702583568, "grad_norm": 2.407121181488037, "learning_rate": 1.7040259926156268e-05, "loss": 0.3732, "step": 25887 }, { "epoch": 4.225949961226072, "grad_norm": 2.3624541759490967, "learning_rate": 1.7040034561190748e-05, "loss": 0.3797, "step": 25888 }, { "epoch": 4.2261132198685765, "grad_norm": 2.06173038482666, "learning_rate": 1.703980918913591e-05, "loss": 0.3533, "step": 25889 }, { "epoch": 4.226276478511081, "grad_norm": 2.0820231437683105, "learning_rate": 1.7039583809991992e-05, "loss": 0.3898, "step": 25890 }, { "epoch": 4.226439737153585, "grad_norm": 2.405602216720581, "learning_rate": 1.7039358423759216e-05, "loss": 0.4372, "step": 25891 }, { "epoch": 4.22660299579609, "grad_norm": 2.3531734943389893, "learning_rate": 1.7039133030437805e-05, "loss": 0.3737, "step": 25892 }, { "epoch": 4.226766254438594, "grad_norm": 1.8527214527130127, "learning_rate": 1.703890763002799e-05, "loss": 0.3661, "step": 25893 }, { "epoch": 4.226929513081099, "grad_norm": 2.257209062576294, "learning_rate": 1.7038682222529996e-05, "loss": 0.3591, "step": 25894 }, { "epoch": 4.227092771723603, "grad_norm": 2.457846164703369, "learning_rate": 1.7038456807944053e-05, "loss": 0.4191, "step": 25895 }, { "epoch": 4.2272560303661075, "grad_norm": 2.064378261566162, "learning_rate": 1.7038231386270382e-05, "loss": 0.3512, "step": 25896 }, { "epoch": 4.227419289008612, "grad_norm": 2.433544158935547, "learning_rate": 1.7038005957509217e-05, "loss": 0.3955, "step": 25897 }, { "epoch": 4.227582547651116, "grad_norm": 2.5395150184631348, "learning_rate": 1.7037780521660782e-05, "loss": 0.4861, "step": 25898 }, { "epoch": 4.227745806293621, "grad_norm": 2.0331575870513916, "learning_rate": 1.70375550787253e-05, "loss": 0.3898, "step": 25899 }, { "epoch": 4.227909064936125, "grad_norm": 2.041199207305908, "learning_rate": 1.7037329628703005e-05, "loss": 0.3584, "step": 25900 }, { "epoch": 4.22807232357863, "grad_norm": 2.007941722869873, "learning_rate": 1.703710417159412e-05, "loss": 0.375, "step": 25901 }, { "epoch": 4.228235582221134, "grad_norm": 2.1454012393951416, "learning_rate": 1.7036878707398874e-05, "loss": 0.3927, "step": 25902 }, { "epoch": 4.228398840863639, "grad_norm": 2.018625259399414, "learning_rate": 1.7036653236117492e-05, "loss": 0.3966, "step": 25903 }, { "epoch": 4.228562099506143, "grad_norm": 1.8338327407836914, "learning_rate": 1.7036427757750205e-05, "loss": 0.3209, "step": 25904 }, { "epoch": 4.228725358148647, "grad_norm": 2.266216516494751, "learning_rate": 1.7036202272297237e-05, "loss": 0.4337, "step": 25905 }, { "epoch": 4.228888616791151, "grad_norm": 2.4767138957977295, "learning_rate": 1.703597677975881e-05, "loss": 0.4464, "step": 25906 }, { "epoch": 4.2290518754336555, "grad_norm": 2.0030152797698975, "learning_rate": 1.703575128013516e-05, "loss": 0.3215, "step": 25907 }, { "epoch": 4.22921513407616, "grad_norm": 1.8350974321365356, "learning_rate": 1.7035525773426507e-05, "loss": 0.3801, "step": 25908 }, { "epoch": 4.229378392718664, "grad_norm": 2.0530200004577637, "learning_rate": 1.7035300259633084e-05, "loss": 0.3689, "step": 25909 }, { "epoch": 4.229541651361169, "grad_norm": 2.1513538360595703, "learning_rate": 1.7035074738755114e-05, "loss": 0.3978, "step": 25910 }, { "epoch": 4.229704910003673, "grad_norm": 2.3460066318511963, "learning_rate": 1.7034849210792828e-05, "loss": 0.3876, "step": 25911 }, { "epoch": 4.229868168646178, "grad_norm": 1.8563653230667114, "learning_rate": 1.703462367574645e-05, "loss": 0.3538, "step": 25912 }, { "epoch": 4.230031427288682, "grad_norm": 1.9713027477264404, "learning_rate": 1.7034398133616207e-05, "loss": 0.4302, "step": 25913 }, { "epoch": 4.2301946859311865, "grad_norm": 2.0301363468170166, "learning_rate": 1.703417258440233e-05, "loss": 0.3821, "step": 25914 }, { "epoch": 4.230357944573691, "grad_norm": 2.1657071113586426, "learning_rate": 1.703394702810504e-05, "loss": 0.3826, "step": 25915 }, { "epoch": 4.230521203216195, "grad_norm": 2.073535442352295, "learning_rate": 1.703372146472457e-05, "loss": 0.34, "step": 25916 }, { "epoch": 4.2306844618587, "grad_norm": 2.393050193786621, "learning_rate": 1.703349589426114e-05, "loss": 0.3853, "step": 25917 }, { "epoch": 4.230847720501204, "grad_norm": 2.271869421005249, "learning_rate": 1.7033270316714983e-05, "loss": 0.3755, "step": 25918 }, { "epoch": 4.231010979143709, "grad_norm": 2.0852930545806885, "learning_rate": 1.7033044732086326e-05, "loss": 0.4229, "step": 25919 }, { "epoch": 4.231174237786213, "grad_norm": 2.264219045639038, "learning_rate": 1.7032819140375395e-05, "loss": 0.4913, "step": 25920 }, { "epoch": 4.231337496428718, "grad_norm": 2.797537088394165, "learning_rate": 1.7032593541582415e-05, "loss": 0.3757, "step": 25921 }, { "epoch": 4.231500755071222, "grad_norm": 1.9700933694839478, "learning_rate": 1.7032367935707616e-05, "loss": 0.3315, "step": 25922 }, { "epoch": 4.231664013713726, "grad_norm": 2.3238189220428467, "learning_rate": 1.7032142322751226e-05, "loss": 0.4295, "step": 25923 }, { "epoch": 4.23182727235623, "grad_norm": 1.935665488243103, "learning_rate": 1.703191670271347e-05, "loss": 0.3527, "step": 25924 }, { "epoch": 4.2319905309987345, "grad_norm": 2.0303828716278076, "learning_rate": 1.7031691075594574e-05, "loss": 0.4034, "step": 25925 }, { "epoch": 4.232153789641239, "grad_norm": 1.5434740781784058, "learning_rate": 1.7031465441394766e-05, "loss": 0.3274, "step": 25926 }, { "epoch": 4.232317048283743, "grad_norm": 2.238999128341675, "learning_rate": 1.703123980011428e-05, "loss": 0.4005, "step": 25927 }, { "epoch": 4.232480306926248, "grad_norm": 1.9454866647720337, "learning_rate": 1.703101415175333e-05, "loss": 0.3684, "step": 25928 }, { "epoch": 4.232643565568752, "grad_norm": 2.249476909637451, "learning_rate": 1.7030788496312152e-05, "loss": 0.4244, "step": 25929 }, { "epoch": 4.232806824211257, "grad_norm": 2.2025952339172363, "learning_rate": 1.7030562833790978e-05, "loss": 0.3424, "step": 25930 }, { "epoch": 4.232970082853761, "grad_norm": 2.014859437942505, "learning_rate": 1.7030337164190025e-05, "loss": 0.355, "step": 25931 }, { "epoch": 4.2331333414962655, "grad_norm": 2.133537769317627, "learning_rate": 1.7030111487509523e-05, "loss": 0.3974, "step": 25932 }, { "epoch": 4.23329660013877, "grad_norm": 2.370826244354248, "learning_rate": 1.7029885803749703e-05, "loss": 0.4249, "step": 25933 }, { "epoch": 4.233459858781274, "grad_norm": 1.901308536529541, "learning_rate": 1.7029660112910787e-05, "loss": 0.3559, "step": 25934 }, { "epoch": 4.233623117423779, "grad_norm": 2.0806729793548584, "learning_rate": 1.7029434414993008e-05, "loss": 0.4203, "step": 25935 }, { "epoch": 4.233786376066283, "grad_norm": 2.4530155658721924, "learning_rate": 1.702920870999659e-05, "loss": 0.3506, "step": 25936 }, { "epoch": 4.233949634708788, "grad_norm": 2.255878448486328, "learning_rate": 1.702898299792176e-05, "loss": 0.3335, "step": 25937 }, { "epoch": 4.234112893351292, "grad_norm": 2.3780455589294434, "learning_rate": 1.7028757278768746e-05, "loss": 0.4278, "step": 25938 }, { "epoch": 4.234276151993797, "grad_norm": 1.937438726425171, "learning_rate": 1.7028531552537775e-05, "loss": 0.3723, "step": 25939 }, { "epoch": 4.2344394106363, "grad_norm": 2.1059272289276123, "learning_rate": 1.7028305819229075e-05, "loss": 0.3975, "step": 25940 }, { "epoch": 4.234602669278805, "grad_norm": 2.417571544647217, "learning_rate": 1.7028080078842873e-05, "loss": 0.4751, "step": 25941 }, { "epoch": 4.234765927921309, "grad_norm": 2.387808084487915, "learning_rate": 1.7027854331379395e-05, "loss": 0.4324, "step": 25942 }, { "epoch": 4.2349291865638135, "grad_norm": 1.865177035331726, "learning_rate": 1.702762857683887e-05, "loss": 0.3342, "step": 25943 }, { "epoch": 4.235092445206318, "grad_norm": 1.7936688661575317, "learning_rate": 1.7027402815221528e-05, "loss": 0.358, "step": 25944 }, { "epoch": 4.235255703848822, "grad_norm": 2.0805137157440186, "learning_rate": 1.7027177046527587e-05, "loss": 0.4607, "step": 25945 }, { "epoch": 4.235418962491327, "grad_norm": 2.015592336654663, "learning_rate": 1.7026951270757288e-05, "loss": 0.3767, "step": 25946 }, { "epoch": 4.235582221133831, "grad_norm": 2.147996425628662, "learning_rate": 1.7026725487910845e-05, "loss": 0.4222, "step": 25947 }, { "epoch": 4.235745479776336, "grad_norm": 1.6432651281356812, "learning_rate": 1.7026499697988496e-05, "loss": 0.3241, "step": 25948 }, { "epoch": 4.23590873841884, "grad_norm": 2.3903753757476807, "learning_rate": 1.7026273900990457e-05, "loss": 0.4131, "step": 25949 }, { "epoch": 4.2360719970613445, "grad_norm": 2.200246572494507, "learning_rate": 1.702604809691697e-05, "loss": 0.4407, "step": 25950 }, { "epoch": 4.236235255703849, "grad_norm": 1.9622162580490112, "learning_rate": 1.7025822285768248e-05, "loss": 0.3938, "step": 25951 }, { "epoch": 4.236398514346353, "grad_norm": 1.5688682794570923, "learning_rate": 1.702559646754453e-05, "loss": 0.3213, "step": 25952 }, { "epoch": 4.236561772988858, "grad_norm": 2.1274173259735107, "learning_rate": 1.7025370642246035e-05, "loss": 0.3643, "step": 25953 }, { "epoch": 4.236725031631362, "grad_norm": 2.418145179748535, "learning_rate": 1.7025144809872993e-05, "loss": 0.4007, "step": 25954 }, { "epoch": 4.236888290273867, "grad_norm": 2.6360204219818115, "learning_rate": 1.7024918970425633e-05, "loss": 0.4286, "step": 25955 }, { "epoch": 4.237051548916371, "grad_norm": 1.8641072511672974, "learning_rate": 1.7024693123904183e-05, "loss": 0.3567, "step": 25956 }, { "epoch": 4.237214807558875, "grad_norm": 2.065885066986084, "learning_rate": 1.7024467270308867e-05, "loss": 0.3603, "step": 25957 }, { "epoch": 4.237378066201379, "grad_norm": 2.075251579284668, "learning_rate": 1.7024241409639913e-05, "loss": 0.3483, "step": 25958 }, { "epoch": 4.237541324843884, "grad_norm": 2.094909906387329, "learning_rate": 1.702401554189755e-05, "loss": 0.3474, "step": 25959 }, { "epoch": 4.237704583486388, "grad_norm": 2.185300827026367, "learning_rate": 1.7023789667082012e-05, "loss": 0.3762, "step": 25960 }, { "epoch": 4.2378678421288924, "grad_norm": 2.1513991355895996, "learning_rate": 1.702356378519351e-05, "loss": 0.3717, "step": 25961 }, { "epoch": 4.238031100771397, "grad_norm": 1.6942863464355469, "learning_rate": 1.7023337896232288e-05, "loss": 0.3435, "step": 25962 }, { "epoch": 4.238194359413901, "grad_norm": 2.1619551181793213, "learning_rate": 1.7023112000198567e-05, "loss": 0.4282, "step": 25963 }, { "epoch": 4.238357618056406, "grad_norm": 2.308417797088623, "learning_rate": 1.702288609709257e-05, "loss": 0.4148, "step": 25964 }, { "epoch": 4.23852087669891, "grad_norm": 1.9345924854278564, "learning_rate": 1.702266018691453e-05, "loss": 0.3554, "step": 25965 }, { "epoch": 4.238684135341415, "grad_norm": 2.0610885620117188, "learning_rate": 1.7022434269664673e-05, "loss": 0.3509, "step": 25966 }, { "epoch": 4.238847393983919, "grad_norm": 2.321256399154663, "learning_rate": 1.7022208345343227e-05, "loss": 0.3941, "step": 25967 }, { "epoch": 4.2390106526264235, "grad_norm": 2.154202699661255, "learning_rate": 1.7021982413950417e-05, "loss": 0.4226, "step": 25968 }, { "epoch": 4.239173911268928, "grad_norm": 2.116095781326294, "learning_rate": 1.7021756475486475e-05, "loss": 0.3725, "step": 25969 }, { "epoch": 4.239337169911432, "grad_norm": 2.0137107372283936, "learning_rate": 1.7021530529951627e-05, "loss": 0.3895, "step": 25970 }, { "epoch": 4.239500428553937, "grad_norm": 2.2509422302246094, "learning_rate": 1.7021304577346097e-05, "loss": 0.3588, "step": 25971 }, { "epoch": 4.239663687196441, "grad_norm": 2.230841875076294, "learning_rate": 1.7021078617670115e-05, "loss": 0.3926, "step": 25972 }, { "epoch": 4.239826945838946, "grad_norm": 2.4159929752349854, "learning_rate": 1.7020852650923912e-05, "loss": 0.4293, "step": 25973 }, { "epoch": 4.23999020448145, "grad_norm": 2.566115379333496, "learning_rate": 1.702062667710771e-05, "loss": 0.4368, "step": 25974 }, { "epoch": 4.240153463123955, "grad_norm": 2.3392491340637207, "learning_rate": 1.7020400696221737e-05, "loss": 0.438, "step": 25975 }, { "epoch": 4.240316721766458, "grad_norm": 2.2361700534820557, "learning_rate": 1.7020174708266225e-05, "loss": 0.3916, "step": 25976 }, { "epoch": 4.240479980408963, "grad_norm": 2.255009412765503, "learning_rate": 1.7019948713241397e-05, "loss": 0.441, "step": 25977 }, { "epoch": 4.240643239051467, "grad_norm": 1.9451268911361694, "learning_rate": 1.7019722711147486e-05, "loss": 0.3876, "step": 25978 }, { "epoch": 4.2408064976939714, "grad_norm": 1.7987594604492188, "learning_rate": 1.7019496701984712e-05, "loss": 0.3142, "step": 25979 }, { "epoch": 4.240969756336476, "grad_norm": 1.8407812118530273, "learning_rate": 1.701927068575331e-05, "loss": 0.3662, "step": 25980 }, { "epoch": 4.24113301497898, "grad_norm": 2.0333454608917236, "learning_rate": 1.7019044662453502e-05, "loss": 0.3609, "step": 25981 }, { "epoch": 4.241296273621485, "grad_norm": 2.0548157691955566, "learning_rate": 1.7018818632085516e-05, "loss": 0.3454, "step": 25982 }, { "epoch": 4.241459532263989, "grad_norm": 1.7331854104995728, "learning_rate": 1.701859259464959e-05, "loss": 0.3286, "step": 25983 }, { "epoch": 4.241622790906494, "grad_norm": 2.0741257667541504, "learning_rate": 1.7018366550145934e-05, "loss": 0.3375, "step": 25984 }, { "epoch": 4.241786049548998, "grad_norm": 1.9739906787872314, "learning_rate": 1.701814049857479e-05, "loss": 0.364, "step": 25985 }, { "epoch": 4.2419493081915025, "grad_norm": 2.1533188819885254, "learning_rate": 1.7017914439936377e-05, "loss": 0.3788, "step": 25986 }, { "epoch": 4.242112566834007, "grad_norm": 1.9860517978668213, "learning_rate": 1.701768837423093e-05, "loss": 0.3954, "step": 25987 }, { "epoch": 4.242275825476511, "grad_norm": 1.9754726886749268, "learning_rate": 1.7017462301458664e-05, "loss": 0.3524, "step": 25988 }, { "epoch": 4.242439084119016, "grad_norm": 1.741445779800415, "learning_rate": 1.7017236221619823e-05, "loss": 0.3459, "step": 25989 }, { "epoch": 4.24260234276152, "grad_norm": 2.3262314796447754, "learning_rate": 1.7017010134714628e-05, "loss": 0.4117, "step": 25990 }, { "epoch": 4.242765601404025, "grad_norm": 1.8908541202545166, "learning_rate": 1.70167840407433e-05, "loss": 0.3722, "step": 25991 }, { "epoch": 4.242928860046529, "grad_norm": 1.8444396257400513, "learning_rate": 1.7016557939706075e-05, "loss": 0.3617, "step": 25992 }, { "epoch": 4.243092118689033, "grad_norm": 3.523672103881836, "learning_rate": 1.7016331831603178e-05, "loss": 0.4543, "step": 25993 }, { "epoch": 4.243255377331537, "grad_norm": 2.2291548252105713, "learning_rate": 1.7016105716434837e-05, "loss": 0.3631, "step": 25994 }, { "epoch": 4.243418635974042, "grad_norm": 2.2540202140808105, "learning_rate": 1.701587959420128e-05, "loss": 0.3855, "step": 25995 }, { "epoch": 4.243581894616546, "grad_norm": 2.2600371837615967, "learning_rate": 1.7015653464902736e-05, "loss": 0.401, "step": 25996 }, { "epoch": 4.24374515325905, "grad_norm": 1.8472758531570435, "learning_rate": 1.7015427328539425e-05, "loss": 0.3283, "step": 25997 }, { "epoch": 4.243908411901555, "grad_norm": 2.567239761352539, "learning_rate": 1.701520118511159e-05, "loss": 0.4108, "step": 25998 }, { "epoch": 4.244071670544059, "grad_norm": 1.998896837234497, "learning_rate": 1.701497503461944e-05, "loss": 0.4049, "step": 25999 }, { "epoch": 4.244234929186564, "grad_norm": 1.8724607229232788, "learning_rate": 1.7014748877063212e-05, "loss": 0.3292, "step": 26000 }, { "epoch": 4.244398187829068, "grad_norm": 2.10054612159729, "learning_rate": 1.701452271244314e-05, "loss": 0.3879, "step": 26001 }, { "epoch": 4.244561446471573, "grad_norm": 1.7530452013015747, "learning_rate": 1.701429654075944e-05, "loss": 0.3604, "step": 26002 }, { "epoch": 4.244724705114077, "grad_norm": 2.090813636779785, "learning_rate": 1.701407036201235e-05, "loss": 0.4091, "step": 26003 }, { "epoch": 4.2448879637565815, "grad_norm": 1.9504343271255493, "learning_rate": 1.701384417620209e-05, "loss": 0.3464, "step": 26004 }, { "epoch": 4.245051222399086, "grad_norm": 1.919350504875183, "learning_rate": 1.7013617983328892e-05, "loss": 0.3605, "step": 26005 }, { "epoch": 4.24521448104159, "grad_norm": 2.5859553813934326, "learning_rate": 1.701339178339298e-05, "loss": 0.3941, "step": 26006 }, { "epoch": 4.245377739684095, "grad_norm": 2.1446897983551025, "learning_rate": 1.7013165576394587e-05, "loss": 0.4253, "step": 26007 }, { "epoch": 4.245540998326599, "grad_norm": 1.731054425239563, "learning_rate": 1.7012939362333936e-05, "loss": 0.3313, "step": 26008 }, { "epoch": 4.245704256969104, "grad_norm": 2.1756198406219482, "learning_rate": 1.701271314121126e-05, "loss": 0.3826, "step": 26009 }, { "epoch": 4.245867515611607, "grad_norm": 2.2517125606536865, "learning_rate": 1.7012486913026784e-05, "loss": 0.3689, "step": 26010 }, { "epoch": 4.246030774254112, "grad_norm": 2.191232919692993, "learning_rate": 1.701226067778073e-05, "loss": 0.4291, "step": 26011 }, { "epoch": 4.246194032896616, "grad_norm": 1.8008582592010498, "learning_rate": 1.701203443547334e-05, "loss": 0.3389, "step": 26012 }, { "epoch": 4.246357291539121, "grad_norm": 2.384308099746704, "learning_rate": 1.7011808186104825e-05, "loss": 0.46, "step": 26013 }, { "epoch": 4.246520550181625, "grad_norm": 2.306887149810791, "learning_rate": 1.7011581929675424e-05, "loss": 0.4182, "step": 26014 }, { "epoch": 4.246683808824129, "grad_norm": 1.8488408327102661, "learning_rate": 1.7011355666185363e-05, "loss": 0.3417, "step": 26015 }, { "epoch": 4.246847067466634, "grad_norm": 2.262036085128784, "learning_rate": 1.701112939563487e-05, "loss": 0.4419, "step": 26016 }, { "epoch": 4.247010326109138, "grad_norm": 2.212773084640503, "learning_rate": 1.7010903118024166e-05, "loss": 0.4204, "step": 26017 }, { "epoch": 4.247173584751643, "grad_norm": 2.3906102180480957, "learning_rate": 1.701067683335349e-05, "loss": 0.418, "step": 26018 }, { "epoch": 4.247336843394147, "grad_norm": 2.30861759185791, "learning_rate": 1.701045054162306e-05, "loss": 0.4072, "step": 26019 }, { "epoch": 4.247500102036652, "grad_norm": 1.9109879732131958, "learning_rate": 1.701022424283311e-05, "loss": 0.3405, "step": 26020 }, { "epoch": 4.247663360679156, "grad_norm": 1.8970147371292114, "learning_rate": 1.7009997936983865e-05, "loss": 0.4635, "step": 26021 }, { "epoch": 4.2478266193216605, "grad_norm": 2.036965847015381, "learning_rate": 1.700977162407556e-05, "loss": 0.3819, "step": 26022 }, { "epoch": 4.247989877964165, "grad_norm": 2.838158130645752, "learning_rate": 1.700954530410841e-05, "loss": 0.4376, "step": 26023 }, { "epoch": 4.248153136606669, "grad_norm": 2.2223927974700928, "learning_rate": 1.700931897708265e-05, "loss": 0.4166, "step": 26024 }, { "epoch": 4.248316395249174, "grad_norm": 1.9953869581222534, "learning_rate": 1.700909264299851e-05, "loss": 0.3641, "step": 26025 }, { "epoch": 4.248479653891678, "grad_norm": 1.9950146675109863, "learning_rate": 1.7008866301856216e-05, "loss": 0.4013, "step": 26026 }, { "epoch": 4.248642912534183, "grad_norm": 2.098282814025879, "learning_rate": 1.7008639953655993e-05, "loss": 0.3763, "step": 26027 }, { "epoch": 4.248806171176686, "grad_norm": 1.8900312185287476, "learning_rate": 1.7008413598398074e-05, "loss": 0.3606, "step": 26028 }, { "epoch": 4.248969429819191, "grad_norm": 2.177732467651367, "learning_rate": 1.7008187236082684e-05, "loss": 0.3934, "step": 26029 }, { "epoch": 4.249132688461695, "grad_norm": 3.36160945892334, "learning_rate": 1.700796086671005e-05, "loss": 0.5397, "step": 26030 }, { "epoch": 4.2492959471041996, "grad_norm": 2.091885805130005, "learning_rate": 1.70077344902804e-05, "loss": 0.4091, "step": 26031 }, { "epoch": 4.249459205746704, "grad_norm": 1.9109526872634888, "learning_rate": 1.700750810679397e-05, "loss": 0.3641, "step": 26032 }, { "epoch": 4.249622464389208, "grad_norm": 2.0849533081054688, "learning_rate": 1.7007281716250972e-05, "loss": 0.3818, "step": 26033 }, { "epoch": 4.249785723031713, "grad_norm": 2.102933406829834, "learning_rate": 1.700705531865165e-05, "loss": 0.3791, "step": 26034 }, { "epoch": 4.249948981674217, "grad_norm": 2.0284111499786377, "learning_rate": 1.7006828913996222e-05, "loss": 0.3791, "step": 26035 }, { "epoch": 4.250112240316722, "grad_norm": 2.1687283515930176, "learning_rate": 1.700660250228492e-05, "loss": 0.3558, "step": 26036 }, { "epoch": 4.250275498959226, "grad_norm": 2.384092092514038, "learning_rate": 1.7006376083517972e-05, "loss": 0.4079, "step": 26037 }, { "epoch": 4.250438757601731, "grad_norm": 2.1289727687835693, "learning_rate": 1.7006149657695602e-05, "loss": 0.3791, "step": 26038 }, { "epoch": 4.250602016244235, "grad_norm": 2.425827741622925, "learning_rate": 1.700592322481804e-05, "loss": 0.3826, "step": 26039 }, { "epoch": 4.2507652748867395, "grad_norm": 2.2291624546051025, "learning_rate": 1.7005696784885518e-05, "loss": 0.3779, "step": 26040 }, { "epoch": 4.250928533529244, "grad_norm": 2.071732521057129, "learning_rate": 1.7005470337898264e-05, "loss": 0.3821, "step": 26041 }, { "epoch": 4.251091792171748, "grad_norm": 2.138765811920166, "learning_rate": 1.70052438838565e-05, "loss": 0.3551, "step": 26042 }, { "epoch": 4.251255050814253, "grad_norm": 1.8012391328811646, "learning_rate": 1.700501742276046e-05, "loss": 0.3491, "step": 26043 }, { "epoch": 4.251418309456757, "grad_norm": 2.6342766284942627, "learning_rate": 1.7004790954610366e-05, "loss": 0.4671, "step": 26044 }, { "epoch": 4.251581568099262, "grad_norm": 2.038409471511841, "learning_rate": 1.700456447940645e-05, "loss": 0.3579, "step": 26045 }, { "epoch": 4.251744826741765, "grad_norm": 2.298626184463501, "learning_rate": 1.7004337997148944e-05, "loss": 0.4409, "step": 26046 }, { "epoch": 4.25190808538427, "grad_norm": 2.1733903884887695, "learning_rate": 1.7004111507838067e-05, "loss": 0.436, "step": 26047 }, { "epoch": 4.252071344026774, "grad_norm": 1.6217762231826782, "learning_rate": 1.700388501147405e-05, "loss": 0.3628, "step": 26048 }, { "epoch": 4.2522346026692786, "grad_norm": 2.1706621646881104, "learning_rate": 1.7003658508057124e-05, "loss": 0.4408, "step": 26049 }, { "epoch": 4.252397861311783, "grad_norm": 1.9139199256896973, "learning_rate": 1.7003431997587516e-05, "loss": 0.3975, "step": 26050 }, { "epoch": 4.252561119954287, "grad_norm": 1.9823710918426514, "learning_rate": 1.7003205480065458e-05, "loss": 0.3737, "step": 26051 }, { "epoch": 4.252724378596792, "grad_norm": 2.2204673290252686, "learning_rate": 1.700297895549117e-05, "loss": 0.376, "step": 26052 }, { "epoch": 4.252887637239296, "grad_norm": 2.015692710876465, "learning_rate": 1.7002752423864883e-05, "loss": 0.3976, "step": 26053 }, { "epoch": 4.253050895881801, "grad_norm": 2.0995638370513916, "learning_rate": 1.700252588518683e-05, "loss": 0.4111, "step": 26054 }, { "epoch": 4.253214154524305, "grad_norm": 2.0018930435180664, "learning_rate": 1.7002299339457228e-05, "loss": 0.3819, "step": 26055 }, { "epoch": 4.25337741316681, "grad_norm": 2.299121618270874, "learning_rate": 1.700207278667632e-05, "loss": 0.4183, "step": 26056 }, { "epoch": 4.253540671809314, "grad_norm": 2.1345062255859375, "learning_rate": 1.7001846226844323e-05, "loss": 0.4477, "step": 26057 }, { "epoch": 4.2537039304518185, "grad_norm": 1.9973360300064087, "learning_rate": 1.7001619659961467e-05, "loss": 0.3297, "step": 26058 }, { "epoch": 4.253867189094323, "grad_norm": 2.2850148677825928, "learning_rate": 1.7001393086027987e-05, "loss": 0.433, "step": 26059 }, { "epoch": 4.254030447736827, "grad_norm": 2.0200603008270264, "learning_rate": 1.7001166505044106e-05, "loss": 0.3199, "step": 26060 }, { "epoch": 4.254193706379332, "grad_norm": 2.278477191925049, "learning_rate": 1.7000939917010048e-05, "loss": 0.368, "step": 26061 }, { "epoch": 4.254356965021836, "grad_norm": 2.4920766353607178, "learning_rate": 1.7000713321926046e-05, "loss": 0.3902, "step": 26062 }, { "epoch": 4.25452022366434, "grad_norm": 1.8684345483779907, "learning_rate": 1.700048671979233e-05, "loss": 0.3609, "step": 26063 }, { "epoch": 4.254683482306844, "grad_norm": 1.9212687015533447, "learning_rate": 1.7000260110609124e-05, "loss": 0.4078, "step": 26064 }, { "epoch": 4.254846740949349, "grad_norm": 2.2666051387786865, "learning_rate": 1.7000033494376658e-05, "loss": 0.3493, "step": 26065 }, { "epoch": 4.255009999591853, "grad_norm": 1.8751962184906006, "learning_rate": 1.699980687109516e-05, "loss": 0.3572, "step": 26066 }, { "epoch": 4.2551732582343575, "grad_norm": 1.9467928409576416, "learning_rate": 1.699958024076486e-05, "loss": 0.3341, "step": 26067 }, { "epoch": 4.255336516876862, "grad_norm": 2.1271426677703857, "learning_rate": 1.6999353603385983e-05, "loss": 0.4059, "step": 26068 }, { "epoch": 4.255499775519366, "grad_norm": 1.6164028644561768, "learning_rate": 1.6999126958958755e-05, "loss": 0.3035, "step": 26069 }, { "epoch": 4.255663034161871, "grad_norm": 2.369185209274292, "learning_rate": 1.6998900307483413e-05, "loss": 0.3635, "step": 26070 }, { "epoch": 4.255826292804375, "grad_norm": 1.6358109712600708, "learning_rate": 1.699867364896018e-05, "loss": 0.3128, "step": 26071 }, { "epoch": 4.25598955144688, "grad_norm": 1.7257565259933472, "learning_rate": 1.6998446983389286e-05, "loss": 0.3082, "step": 26072 }, { "epoch": 4.256152810089384, "grad_norm": 2.2143449783325195, "learning_rate": 1.6998220310770954e-05, "loss": 0.3678, "step": 26073 }, { "epoch": 4.256316068731889, "grad_norm": 2.386786699295044, "learning_rate": 1.6997993631105417e-05, "loss": 0.3815, "step": 26074 }, { "epoch": 4.256479327374393, "grad_norm": 2.2889580726623535, "learning_rate": 1.6997766944392902e-05, "loss": 0.41, "step": 26075 }, { "epoch": 4.2566425860168975, "grad_norm": 2.0363070964813232, "learning_rate": 1.699754025063364e-05, "loss": 0.3763, "step": 26076 }, { "epoch": 4.256805844659402, "grad_norm": 2.163041830062866, "learning_rate": 1.6997313549827853e-05, "loss": 0.4195, "step": 26077 }, { "epoch": 4.256969103301906, "grad_norm": 2.3561644554138184, "learning_rate": 1.6997086841975776e-05, "loss": 0.3755, "step": 26078 }, { "epoch": 4.257132361944411, "grad_norm": 2.3792974948883057, "learning_rate": 1.6996860127077633e-05, "loss": 0.3997, "step": 26079 }, { "epoch": 4.257295620586914, "grad_norm": 1.866492509841919, "learning_rate": 1.6996633405133656e-05, "loss": 0.3293, "step": 26080 }, { "epoch": 4.257458879229419, "grad_norm": 2.0467565059661865, "learning_rate": 1.699640667614407e-05, "loss": 0.424, "step": 26081 }, { "epoch": 4.257622137871923, "grad_norm": 1.8918077945709229, "learning_rate": 1.6996179940109103e-05, "loss": 0.3809, "step": 26082 }, { "epoch": 4.257785396514428, "grad_norm": 1.7473281621932983, "learning_rate": 1.6995953197028984e-05, "loss": 0.3322, "step": 26083 }, { "epoch": 4.257948655156932, "grad_norm": 1.8186841011047363, "learning_rate": 1.6995726446903943e-05, "loss": 0.362, "step": 26084 }, { "epoch": 4.2581119137994365, "grad_norm": 2.0771572589874268, "learning_rate": 1.699549968973421e-05, "loss": 0.3724, "step": 26085 }, { "epoch": 4.258275172441941, "grad_norm": 1.8949049711227417, "learning_rate": 1.699527292552001e-05, "loss": 0.3753, "step": 26086 }, { "epoch": 4.258438431084445, "grad_norm": 2.6652159690856934, "learning_rate": 1.6995046154261567e-05, "loss": 0.4574, "step": 26087 }, { "epoch": 4.25860168972695, "grad_norm": 2.26922345161438, "learning_rate": 1.6994819375959118e-05, "loss": 0.4445, "step": 26088 }, { "epoch": 4.258764948369454, "grad_norm": 2.1741435527801514, "learning_rate": 1.6994592590612885e-05, "loss": 0.4435, "step": 26089 }, { "epoch": 4.258928207011959, "grad_norm": 1.9395146369934082, "learning_rate": 1.6994365798223103e-05, "loss": 0.3276, "step": 26090 }, { "epoch": 4.259091465654463, "grad_norm": 2.3024590015411377, "learning_rate": 1.6994138998789997e-05, "loss": 0.4055, "step": 26091 }, { "epoch": 4.259254724296968, "grad_norm": 2.1568541526794434, "learning_rate": 1.6993912192313792e-05, "loss": 0.3661, "step": 26092 }, { "epoch": 4.259417982939472, "grad_norm": 1.892317295074463, "learning_rate": 1.6993685378794718e-05, "loss": 0.3417, "step": 26093 }, { "epoch": 4.2595812415819765, "grad_norm": 2.104749917984009, "learning_rate": 1.699345855823301e-05, "loss": 0.3661, "step": 26094 }, { "epoch": 4.259744500224481, "grad_norm": 2.5791451930999756, "learning_rate": 1.699323173062889e-05, "loss": 0.461, "step": 26095 }, { "epoch": 4.259907758866985, "grad_norm": 1.9669009447097778, "learning_rate": 1.6993004895982584e-05, "loss": 0.3785, "step": 26096 }, { "epoch": 4.26007101750949, "grad_norm": 2.1129932403564453, "learning_rate": 1.6992778054294325e-05, "loss": 0.435, "step": 26097 }, { "epoch": 4.260234276151994, "grad_norm": 1.629711389541626, "learning_rate": 1.699255120556434e-05, "loss": 0.3157, "step": 26098 }, { "epoch": 4.260397534794498, "grad_norm": 1.8174031972885132, "learning_rate": 1.699232434979286e-05, "loss": 0.3502, "step": 26099 }, { "epoch": 4.260560793437002, "grad_norm": 1.668990969657898, "learning_rate": 1.6992097486980107e-05, "loss": 0.3281, "step": 26100 }, { "epoch": 4.260724052079507, "grad_norm": 1.6659525632858276, "learning_rate": 1.699187061712632e-05, "loss": 0.3542, "step": 26101 }, { "epoch": 4.260887310722011, "grad_norm": 2.015955924987793, "learning_rate": 1.6991643740231714e-05, "loss": 0.3861, "step": 26102 }, { "epoch": 4.2610505693645155, "grad_norm": 2.145934581756592, "learning_rate": 1.6991416856296528e-05, "loss": 0.4261, "step": 26103 }, { "epoch": 4.26121382800702, "grad_norm": 1.8515971899032593, "learning_rate": 1.699118996532099e-05, "loss": 0.3447, "step": 26104 }, { "epoch": 4.261377086649524, "grad_norm": 1.8054993152618408, "learning_rate": 1.6990963067305322e-05, "loss": 0.3211, "step": 26105 }, { "epoch": 4.261540345292029, "grad_norm": 1.7143675088882446, "learning_rate": 1.699073616224976e-05, "loss": 0.3515, "step": 26106 }, { "epoch": 4.261703603934533, "grad_norm": 2.1873326301574707, "learning_rate": 1.6990509250154522e-05, "loss": 0.3849, "step": 26107 }, { "epoch": 4.261866862577038, "grad_norm": 2.1769940853118896, "learning_rate": 1.699028233101985e-05, "loss": 0.4209, "step": 26108 }, { "epoch": 4.262030121219542, "grad_norm": 1.960612416267395, "learning_rate": 1.699005540484596e-05, "loss": 0.357, "step": 26109 }, { "epoch": 4.262193379862047, "grad_norm": 1.7296258211135864, "learning_rate": 1.6989828471633086e-05, "loss": 0.3326, "step": 26110 }, { "epoch": 4.262356638504551, "grad_norm": 1.780946969985962, "learning_rate": 1.698960153138146e-05, "loss": 0.3625, "step": 26111 }, { "epoch": 4.2625198971470555, "grad_norm": 1.701419472694397, "learning_rate": 1.6989374584091307e-05, "loss": 0.3354, "step": 26112 }, { "epoch": 4.26268315578956, "grad_norm": 1.7962510585784912, "learning_rate": 1.6989147629762852e-05, "loss": 0.3546, "step": 26113 }, { "epoch": 4.262846414432064, "grad_norm": 1.8865593671798706, "learning_rate": 1.6988920668396332e-05, "loss": 0.3405, "step": 26114 }, { "epoch": 4.263009673074569, "grad_norm": 2.307560682296753, "learning_rate": 1.6988693699991968e-05, "loss": 0.4699, "step": 26115 }, { "epoch": 4.263172931717072, "grad_norm": 2.2463865280151367, "learning_rate": 1.698846672454999e-05, "loss": 0.3847, "step": 26116 }, { "epoch": 4.263336190359577, "grad_norm": 2.299456834793091, "learning_rate": 1.698823974207063e-05, "loss": 0.4672, "step": 26117 }, { "epoch": 4.263499449002081, "grad_norm": 2.447268486022949, "learning_rate": 1.6988012752554117e-05, "loss": 0.7154, "step": 26118 }, { "epoch": 4.263662707644586, "grad_norm": 2.0189778804779053, "learning_rate": 1.6987785756000672e-05, "loss": 0.3513, "step": 26119 }, { "epoch": 4.26382596628709, "grad_norm": 2.3450355529785156, "learning_rate": 1.698755875241053e-05, "loss": 0.4061, "step": 26120 }, { "epoch": 4.2639892249295945, "grad_norm": 1.8592230081558228, "learning_rate": 1.698733174178392e-05, "loss": 0.3559, "step": 26121 }, { "epoch": 4.264152483572099, "grad_norm": 1.9513962268829346, "learning_rate": 1.6987104724121067e-05, "loss": 0.3958, "step": 26122 }, { "epoch": 4.264315742214603, "grad_norm": 2.0562474727630615, "learning_rate": 1.69868776994222e-05, "loss": 0.4402, "step": 26123 }, { "epoch": 4.264479000857108, "grad_norm": 2.1358892917633057, "learning_rate": 1.6986650667687552e-05, "loss": 0.3725, "step": 26124 }, { "epoch": 4.264642259499612, "grad_norm": 2.5209572315216064, "learning_rate": 1.698642362891735e-05, "loss": 0.4101, "step": 26125 }, { "epoch": 4.264805518142117, "grad_norm": 2.075615167617798, "learning_rate": 1.6986196583111815e-05, "loss": 0.3121, "step": 26126 }, { "epoch": 4.264968776784621, "grad_norm": 1.912424087524414, "learning_rate": 1.698596953027119e-05, "loss": 0.3683, "step": 26127 }, { "epoch": 4.265132035427126, "grad_norm": 2.4009430408477783, "learning_rate": 1.698574247039569e-05, "loss": 0.4717, "step": 26128 }, { "epoch": 4.26529529406963, "grad_norm": 2.292938232421875, "learning_rate": 1.698551540348555e-05, "loss": 0.3943, "step": 26129 }, { "epoch": 4.2654585527121345, "grad_norm": 1.9772608280181885, "learning_rate": 1.6985288329540996e-05, "loss": 0.3439, "step": 26130 }, { "epoch": 4.265621811354639, "grad_norm": 1.7023541927337646, "learning_rate": 1.6985061248562263e-05, "loss": 0.3091, "step": 26131 }, { "epoch": 4.265785069997143, "grad_norm": 2.102595090866089, "learning_rate": 1.6984834160549573e-05, "loss": 0.3788, "step": 26132 }, { "epoch": 4.265948328639647, "grad_norm": 2.3572230339050293, "learning_rate": 1.6984607065503154e-05, "loss": 0.4102, "step": 26133 }, { "epoch": 4.266111587282151, "grad_norm": 2.1248114109039307, "learning_rate": 1.6984379963423242e-05, "loss": 0.3729, "step": 26134 }, { "epoch": 4.266274845924656, "grad_norm": 1.9777488708496094, "learning_rate": 1.6984152854310063e-05, "loss": 0.3952, "step": 26135 }, { "epoch": 4.26643810456716, "grad_norm": 2.677271604537964, "learning_rate": 1.6983925738163837e-05, "loss": 0.4291, "step": 26136 }, { "epoch": 4.266601363209665, "grad_norm": 1.8837214708328247, "learning_rate": 1.69836986149848e-05, "loss": 0.4114, "step": 26137 }, { "epoch": 4.266764621852169, "grad_norm": 2.902949333190918, "learning_rate": 1.6983471484773187e-05, "loss": 0.4641, "step": 26138 }, { "epoch": 4.2669278804946735, "grad_norm": 1.8169156312942505, "learning_rate": 1.6983244347529216e-05, "loss": 0.3675, "step": 26139 }, { "epoch": 4.267091139137178, "grad_norm": 1.5191184282302856, "learning_rate": 1.698301720325312e-05, "loss": 0.3134, "step": 26140 }, { "epoch": 4.267254397779682, "grad_norm": 2.078831434249878, "learning_rate": 1.6982790051945128e-05, "loss": 0.3655, "step": 26141 }, { "epoch": 4.267417656422187, "grad_norm": 2.4806478023529053, "learning_rate": 1.698256289360547e-05, "loss": 0.4603, "step": 26142 }, { "epoch": 4.267580915064691, "grad_norm": 2.0915863513946533, "learning_rate": 1.698233572823437e-05, "loss": 0.402, "step": 26143 }, { "epoch": 4.267744173707196, "grad_norm": 2.1791560649871826, "learning_rate": 1.6982108555832063e-05, "loss": 0.4216, "step": 26144 }, { "epoch": 4.2679074323497, "grad_norm": 2.005189895629883, "learning_rate": 1.698188137639877e-05, "loss": 0.4404, "step": 26145 }, { "epoch": 4.268070690992205, "grad_norm": 2.249403238296509, "learning_rate": 1.698165418993473e-05, "loss": 0.3758, "step": 26146 }, { "epoch": 4.268233949634709, "grad_norm": 1.9761130809783936, "learning_rate": 1.698142699644016e-05, "loss": 0.3778, "step": 26147 }, { "epoch": 4.2683972082772135, "grad_norm": 2.216827154159546, "learning_rate": 1.6981199795915298e-05, "loss": 0.4064, "step": 26148 }, { "epoch": 4.268560466919718, "grad_norm": 2.201894521713257, "learning_rate": 1.698097258836037e-05, "loss": 0.461, "step": 26149 }, { "epoch": 4.268723725562222, "grad_norm": 2.4693005084991455, "learning_rate": 1.6980745373775604e-05, "loss": 0.4504, "step": 26150 }, { "epoch": 4.268886984204727, "grad_norm": 1.9995497465133667, "learning_rate": 1.6980518152161233e-05, "loss": 0.3731, "step": 26151 }, { "epoch": 4.26905024284723, "grad_norm": 1.920210838317871, "learning_rate": 1.6980290923517475e-05, "loss": 0.3413, "step": 26152 }, { "epoch": 4.269213501489735, "grad_norm": 2.2935352325439453, "learning_rate": 1.698006368784457e-05, "loss": 0.466, "step": 26153 }, { "epoch": 4.269376760132239, "grad_norm": 2.381349802017212, "learning_rate": 1.697983644514274e-05, "loss": 0.4844, "step": 26154 }, { "epoch": 4.269540018774744, "grad_norm": 2.1286845207214355, "learning_rate": 1.697960919541222e-05, "loss": 0.4126, "step": 26155 }, { "epoch": 4.269703277417248, "grad_norm": 2.2004239559173584, "learning_rate": 1.6979381938653232e-05, "loss": 0.4033, "step": 26156 }, { "epoch": 4.2698665360597525, "grad_norm": 2.2551417350769043, "learning_rate": 1.6979154674866012e-05, "loss": 0.3544, "step": 26157 }, { "epoch": 4.270029794702257, "grad_norm": 1.8995376825332642, "learning_rate": 1.6978927404050784e-05, "loss": 0.3597, "step": 26158 }, { "epoch": 4.270193053344761, "grad_norm": 2.0100958347320557, "learning_rate": 1.6978700126207774e-05, "loss": 0.3897, "step": 26159 }, { "epoch": 4.270356311987266, "grad_norm": 2.3165743350982666, "learning_rate": 1.697847284133722e-05, "loss": 0.3836, "step": 26160 }, { "epoch": 4.27051957062977, "grad_norm": 2.2397050857543945, "learning_rate": 1.6978245549439342e-05, "loss": 0.395, "step": 26161 }, { "epoch": 4.270682829272275, "grad_norm": 2.446195363998413, "learning_rate": 1.6978018250514377e-05, "loss": 0.4233, "step": 26162 }, { "epoch": 4.270846087914779, "grad_norm": 2.326991558074951, "learning_rate": 1.6977790944562545e-05, "loss": 0.4277, "step": 26163 }, { "epoch": 4.271009346557284, "grad_norm": 2.1111137866973877, "learning_rate": 1.697756363158408e-05, "loss": 0.4006, "step": 26164 }, { "epoch": 4.271172605199788, "grad_norm": 1.9256850481033325, "learning_rate": 1.6977336311579212e-05, "loss": 0.4064, "step": 26165 }, { "epoch": 4.2713358638422925, "grad_norm": 2.3122682571411133, "learning_rate": 1.6977108984548166e-05, "loss": 0.3647, "step": 26166 }, { "epoch": 4.271499122484797, "grad_norm": 2.050997495651245, "learning_rate": 1.6976881650491174e-05, "loss": 0.3845, "step": 26167 }, { "epoch": 4.271662381127301, "grad_norm": 1.859452724456787, "learning_rate": 1.6976654309408464e-05, "loss": 0.3347, "step": 26168 }, { "epoch": 4.271825639769805, "grad_norm": 1.7406940460205078, "learning_rate": 1.6976426961300267e-05, "loss": 0.3032, "step": 26169 }, { "epoch": 4.271988898412309, "grad_norm": 1.629355788230896, "learning_rate": 1.697619960616681e-05, "loss": 0.3304, "step": 26170 }, { "epoch": 4.272152157054814, "grad_norm": 2.0379903316497803, "learning_rate": 1.697597224400832e-05, "loss": 0.3088, "step": 26171 }, { "epoch": 4.272315415697318, "grad_norm": 2.09542179107666, "learning_rate": 1.6975744874825028e-05, "loss": 0.4292, "step": 26172 }, { "epoch": 4.272478674339823, "grad_norm": 2.1458582878112793, "learning_rate": 1.6975517498617163e-05, "loss": 0.3737, "step": 26173 }, { "epoch": 4.272641932982327, "grad_norm": 1.443523645401001, "learning_rate": 1.6975290115384955e-05, "loss": 0.283, "step": 26174 }, { "epoch": 4.2728051916248315, "grad_norm": 1.8303916454315186, "learning_rate": 1.697506272512863e-05, "loss": 0.3356, "step": 26175 }, { "epoch": 4.272968450267336, "grad_norm": 2.4958722591400146, "learning_rate": 1.6974835327848417e-05, "loss": 0.4494, "step": 26176 }, { "epoch": 4.27313170890984, "grad_norm": 1.9464530944824219, "learning_rate": 1.697460792354455e-05, "loss": 0.3509, "step": 26177 }, { "epoch": 4.273294967552345, "grad_norm": 2.2294554710388184, "learning_rate": 1.6974380512217252e-05, "loss": 0.4167, "step": 26178 }, { "epoch": 4.273458226194849, "grad_norm": 1.8086986541748047, "learning_rate": 1.6974153093866757e-05, "loss": 0.308, "step": 26179 }, { "epoch": 4.273621484837354, "grad_norm": 2.284785747528076, "learning_rate": 1.697392566849329e-05, "loss": 0.407, "step": 26180 }, { "epoch": 4.273784743479858, "grad_norm": 2.2697882652282715, "learning_rate": 1.6973698236097082e-05, "loss": 0.3488, "step": 26181 }, { "epoch": 4.273948002122363, "grad_norm": 2.3326029777526855, "learning_rate": 1.697347079667836e-05, "loss": 0.3753, "step": 26182 }, { "epoch": 4.274111260764867, "grad_norm": 2.3148813247680664, "learning_rate": 1.697324335023736e-05, "loss": 0.4327, "step": 26183 }, { "epoch": 4.2742745194073715, "grad_norm": 2.256876230239868, "learning_rate": 1.6973015896774302e-05, "loss": 0.4019, "step": 26184 }, { "epoch": 4.274437778049876, "grad_norm": 2.073712110519409, "learning_rate": 1.697278843628942e-05, "loss": 0.4269, "step": 26185 }, { "epoch": 4.2746010366923795, "grad_norm": 1.9821479320526123, "learning_rate": 1.697256096878294e-05, "loss": 0.3752, "step": 26186 }, { "epoch": 4.274764295334884, "grad_norm": 1.9614590406417847, "learning_rate": 1.6972333494255096e-05, "loss": 0.3891, "step": 26187 }, { "epoch": 4.274927553977388, "grad_norm": 1.9316128492355347, "learning_rate": 1.697210601270611e-05, "loss": 0.3242, "step": 26188 }, { "epoch": 4.275090812619893, "grad_norm": 1.687091588973999, "learning_rate": 1.6971878524136218e-05, "loss": 0.3517, "step": 26189 }, { "epoch": 4.275254071262397, "grad_norm": 2.456191062927246, "learning_rate": 1.697165102854565e-05, "loss": 0.5015, "step": 26190 }, { "epoch": 4.275417329904902, "grad_norm": 2.456744909286499, "learning_rate": 1.6971423525934623e-05, "loss": 0.4046, "step": 26191 }, { "epoch": 4.275580588547406, "grad_norm": 1.9235190153121948, "learning_rate": 1.697119601630338e-05, "loss": 0.3811, "step": 26192 }, { "epoch": 4.2757438471899105, "grad_norm": 1.9606587886810303, "learning_rate": 1.697096849965214e-05, "loss": 0.3557, "step": 26193 }, { "epoch": 4.275907105832415, "grad_norm": 1.799331784248352, "learning_rate": 1.6970740975981143e-05, "loss": 0.3796, "step": 26194 }, { "epoch": 4.276070364474919, "grad_norm": 1.8020166158676147, "learning_rate": 1.6970513445290606e-05, "loss": 0.3389, "step": 26195 }, { "epoch": 4.276233623117424, "grad_norm": 2.377990961074829, "learning_rate": 1.6970285907580768e-05, "loss": 0.4236, "step": 26196 }, { "epoch": 4.276396881759928, "grad_norm": 2.227015972137451, "learning_rate": 1.6970058362851852e-05, "loss": 0.3966, "step": 26197 }, { "epoch": 4.276560140402433, "grad_norm": 2.834791660308838, "learning_rate": 1.6969830811104087e-05, "loss": 0.46, "step": 26198 }, { "epoch": 4.276723399044937, "grad_norm": 2.2775485515594482, "learning_rate": 1.6969603252337704e-05, "loss": 0.3753, "step": 26199 }, { "epoch": 4.276886657687442, "grad_norm": 2.8156659603118896, "learning_rate": 1.696937568655294e-05, "loss": 0.4394, "step": 26200 }, { "epoch": 4.277049916329946, "grad_norm": 2.2856557369232178, "learning_rate": 1.6969148113750007e-05, "loss": 0.3565, "step": 26201 }, { "epoch": 4.2772131749724505, "grad_norm": 2.0347633361816406, "learning_rate": 1.6968920533929147e-05, "loss": 0.3691, "step": 26202 }, { "epoch": 4.277376433614955, "grad_norm": 2.1988208293914795, "learning_rate": 1.6968692947090588e-05, "loss": 0.4119, "step": 26203 }, { "epoch": 4.277539692257459, "grad_norm": 2.5027244091033936, "learning_rate": 1.6968465353234555e-05, "loss": 0.4469, "step": 26204 }, { "epoch": 4.277702950899963, "grad_norm": 2.092778444290161, "learning_rate": 1.696823775236128e-05, "loss": 0.3482, "step": 26205 }, { "epoch": 4.277866209542467, "grad_norm": 2.2293293476104736, "learning_rate": 1.696801014447099e-05, "loss": 0.3707, "step": 26206 }, { "epoch": 4.278029468184972, "grad_norm": 1.9762738943099976, "learning_rate": 1.6967782529563916e-05, "loss": 0.4004, "step": 26207 }, { "epoch": 4.278192726827476, "grad_norm": 2.2024073600769043, "learning_rate": 1.696755490764029e-05, "loss": 0.335, "step": 26208 }, { "epoch": 4.278355985469981, "grad_norm": 2.267533779144287, "learning_rate": 1.6967327278700335e-05, "loss": 0.3352, "step": 26209 }, { "epoch": 4.278519244112485, "grad_norm": 1.724137306213379, "learning_rate": 1.6967099642744284e-05, "loss": 0.3339, "step": 26210 }, { "epoch": 4.2786825027549895, "grad_norm": 1.9145216941833496, "learning_rate": 1.6966871999772367e-05, "loss": 0.393, "step": 26211 }, { "epoch": 4.278845761397494, "grad_norm": 1.910502552986145, "learning_rate": 1.696664434978481e-05, "loss": 0.3392, "step": 26212 }, { "epoch": 4.279009020039998, "grad_norm": 2.6600468158721924, "learning_rate": 1.6966416692781842e-05, "loss": 0.3428, "step": 26213 }, { "epoch": 4.279172278682503, "grad_norm": 1.5250818729400635, "learning_rate": 1.6966189028763697e-05, "loss": 0.2995, "step": 26214 }, { "epoch": 4.279335537325007, "grad_norm": 1.9511018991470337, "learning_rate": 1.69659613577306e-05, "loss": 0.3509, "step": 26215 }, { "epoch": 4.279498795967512, "grad_norm": 1.9366222620010376, "learning_rate": 1.6965733679682784e-05, "loss": 0.3538, "step": 26216 }, { "epoch": 4.279662054610016, "grad_norm": 2.251250743865967, "learning_rate": 1.6965505994620475e-05, "loss": 0.3528, "step": 26217 }, { "epoch": 4.279825313252521, "grad_norm": 2.5165698528289795, "learning_rate": 1.6965278302543904e-05, "loss": 0.4168, "step": 26218 }, { "epoch": 4.279988571895025, "grad_norm": 1.8072868585586548, "learning_rate": 1.69650506034533e-05, "loss": 0.3368, "step": 26219 }, { "epoch": 4.2801518305375295, "grad_norm": 2.1576988697052, "learning_rate": 1.696482289734889e-05, "loss": 0.3822, "step": 26220 }, { "epoch": 4.280315089180034, "grad_norm": 1.9210089445114136, "learning_rate": 1.6964595184230906e-05, "loss": 0.3528, "step": 26221 }, { "epoch": 4.280478347822537, "grad_norm": 2.22149395942688, "learning_rate": 1.6964367464099577e-05, "loss": 0.38, "step": 26222 }, { "epoch": 4.280641606465042, "grad_norm": 2.0181326866149902, "learning_rate": 1.696413973695513e-05, "loss": 0.3449, "step": 26223 }, { "epoch": 4.280804865107546, "grad_norm": 1.8784854412078857, "learning_rate": 1.69639120027978e-05, "loss": 0.3708, "step": 26224 }, { "epoch": 4.280968123750051, "grad_norm": 2.0646846294403076, "learning_rate": 1.6963684261627808e-05, "loss": 0.3266, "step": 26225 }, { "epoch": 4.281131382392555, "grad_norm": 1.5801972150802612, "learning_rate": 1.696345651344539e-05, "loss": 0.2979, "step": 26226 }, { "epoch": 4.28129464103506, "grad_norm": 1.8687944412231445, "learning_rate": 1.696322875825077e-05, "loss": 0.3493, "step": 26227 }, { "epoch": 4.281457899677564, "grad_norm": 2.5290377140045166, "learning_rate": 1.6963000996044183e-05, "loss": 0.4093, "step": 26228 }, { "epoch": 4.2816211583200685, "grad_norm": 2.368163824081421, "learning_rate": 1.6962773226825854e-05, "loss": 0.3933, "step": 26229 }, { "epoch": 4.281784416962573, "grad_norm": 1.8242645263671875, "learning_rate": 1.6962545450596017e-05, "loss": 0.3606, "step": 26230 }, { "epoch": 4.281947675605077, "grad_norm": 1.6057759523391724, "learning_rate": 1.69623176673549e-05, "loss": 0.3037, "step": 26231 }, { "epoch": 4.282110934247582, "grad_norm": 2.1257894039154053, "learning_rate": 1.6962089877102727e-05, "loss": 0.3357, "step": 26232 }, { "epoch": 4.282274192890086, "grad_norm": 1.8580927848815918, "learning_rate": 1.6961862079839734e-05, "loss": 0.3757, "step": 26233 }, { "epoch": 4.282437451532591, "grad_norm": 1.7760485410690308, "learning_rate": 1.6961634275566147e-05, "loss": 0.3227, "step": 26234 }, { "epoch": 4.282600710175095, "grad_norm": 1.7134182453155518, "learning_rate": 1.6961406464282197e-05, "loss": 0.3396, "step": 26235 }, { "epoch": 4.2827639688176, "grad_norm": 2.407777786254883, "learning_rate": 1.696117864598811e-05, "loss": 0.372, "step": 26236 }, { "epoch": 4.282927227460104, "grad_norm": 1.989172339439392, "learning_rate": 1.696095082068412e-05, "loss": 0.3573, "step": 26237 }, { "epoch": 4.2830904861026085, "grad_norm": 1.931825041770935, "learning_rate": 1.6960722988370452e-05, "loss": 0.3598, "step": 26238 }, { "epoch": 4.283253744745112, "grad_norm": 2.199122190475464, "learning_rate": 1.696049514904734e-05, "loss": 0.3725, "step": 26239 }, { "epoch": 4.283417003387616, "grad_norm": 2.302412271499634, "learning_rate": 1.6960267302715014e-05, "loss": 0.4017, "step": 26240 }, { "epoch": 4.283580262030121, "grad_norm": 2.074368476867676, "learning_rate": 1.6960039449373696e-05, "loss": 0.3564, "step": 26241 }, { "epoch": 4.283743520672625, "grad_norm": 1.668949842453003, "learning_rate": 1.695981158902362e-05, "loss": 0.3299, "step": 26242 }, { "epoch": 4.28390677931513, "grad_norm": 2.06303334236145, "learning_rate": 1.695958372166502e-05, "loss": 0.3338, "step": 26243 }, { "epoch": 4.284070037957634, "grad_norm": 2.365520477294922, "learning_rate": 1.6959355847298115e-05, "loss": 0.3503, "step": 26244 }, { "epoch": 4.284233296600139, "grad_norm": 1.6387107372283936, "learning_rate": 1.6959127965923144e-05, "loss": 0.3023, "step": 26245 }, { "epoch": 4.284396555242643, "grad_norm": 2.0364458560943604, "learning_rate": 1.6958900077540335e-05, "loss": 0.35, "step": 26246 }, { "epoch": 4.2845598138851475, "grad_norm": 2.9514386653900146, "learning_rate": 1.695867218214991e-05, "loss": 0.3365, "step": 26247 }, { "epoch": 4.284723072527652, "grad_norm": 2.2232110500335693, "learning_rate": 1.6958444279752106e-05, "loss": 0.3997, "step": 26248 }, { "epoch": 4.284886331170156, "grad_norm": 2.085685968399048, "learning_rate": 1.6958216370347155e-05, "loss": 0.3626, "step": 26249 }, { "epoch": 4.285049589812661, "grad_norm": 2.1039483547210693, "learning_rate": 1.6957988453935276e-05, "loss": 0.4126, "step": 26250 }, { "epoch": 4.285212848455165, "grad_norm": 2.444951057434082, "learning_rate": 1.695776053051671e-05, "loss": 0.3912, "step": 26251 }, { "epoch": 4.28537610709767, "grad_norm": 2.348491907119751, "learning_rate": 1.6957532600091678e-05, "loss": 0.4028, "step": 26252 }, { "epoch": 4.285539365740174, "grad_norm": 2.1491401195526123, "learning_rate": 1.6957304662660413e-05, "loss": 0.3788, "step": 26253 }, { "epoch": 4.285702624382679, "grad_norm": 2.6322951316833496, "learning_rate": 1.695707671822314e-05, "loss": 0.4311, "step": 26254 }, { "epoch": 4.285865883025183, "grad_norm": 2.2443957328796387, "learning_rate": 1.6956848766780097e-05, "loss": 0.421, "step": 26255 }, { "epoch": 4.2860291416676874, "grad_norm": 2.187757968902588, "learning_rate": 1.695662080833151e-05, "loss": 0.3669, "step": 26256 }, { "epoch": 4.286192400310192, "grad_norm": 2.5199134349823, "learning_rate": 1.6956392842877604e-05, "loss": 0.4622, "step": 26257 }, { "epoch": 4.286355658952695, "grad_norm": 2.0013184547424316, "learning_rate": 1.6956164870418616e-05, "loss": 0.3378, "step": 26258 }, { "epoch": 4.2865189175952, "grad_norm": 1.8224049806594849, "learning_rate": 1.695593689095477e-05, "loss": 0.331, "step": 26259 }, { "epoch": 4.286682176237704, "grad_norm": 2.0323123931884766, "learning_rate": 1.6955708904486297e-05, "loss": 0.3426, "step": 26260 }, { "epoch": 4.286845434880209, "grad_norm": 2.1545042991638184, "learning_rate": 1.6955480911013428e-05, "loss": 0.3774, "step": 26261 }, { "epoch": 4.287008693522713, "grad_norm": 2.150343418121338, "learning_rate": 1.6955252910536392e-05, "loss": 0.3797, "step": 26262 }, { "epoch": 4.287171952165218, "grad_norm": 2.130786418914795, "learning_rate": 1.6955024903055413e-05, "loss": 0.3827, "step": 26263 }, { "epoch": 4.287335210807722, "grad_norm": 2.2453408241271973, "learning_rate": 1.6954796888570733e-05, "loss": 0.4001, "step": 26264 }, { "epoch": 4.2874984694502265, "grad_norm": 1.7976740598678589, "learning_rate": 1.6954568867082574e-05, "loss": 0.2986, "step": 26265 }, { "epoch": 4.287661728092731, "grad_norm": 1.978738784790039, "learning_rate": 1.695434083859116e-05, "loss": 0.3571, "step": 26266 }, { "epoch": 4.287824986735235, "grad_norm": 2.0921003818511963, "learning_rate": 1.695411280309673e-05, "loss": 0.3588, "step": 26267 }, { "epoch": 4.28798824537774, "grad_norm": 1.9003511667251587, "learning_rate": 1.695388476059951e-05, "loss": 0.3823, "step": 26268 }, { "epoch": 4.288151504020244, "grad_norm": 2.3350021839141846, "learning_rate": 1.695365671109973e-05, "loss": 0.3966, "step": 26269 }, { "epoch": 4.288314762662749, "grad_norm": 2.4017014503479004, "learning_rate": 1.695342865459762e-05, "loss": 0.3817, "step": 26270 }, { "epoch": 4.288478021305253, "grad_norm": 2.404733419418335, "learning_rate": 1.695320059109341e-05, "loss": 0.3811, "step": 26271 }, { "epoch": 4.288641279947758, "grad_norm": 1.9801467657089233, "learning_rate": 1.695297252058733e-05, "loss": 0.3681, "step": 26272 }, { "epoch": 4.288804538590262, "grad_norm": 1.8419934511184692, "learning_rate": 1.6952744443079602e-05, "loss": 0.3628, "step": 26273 }, { "epoch": 4.288967797232766, "grad_norm": 2.135256052017212, "learning_rate": 1.6952516358570468e-05, "loss": 0.4546, "step": 26274 }, { "epoch": 4.28913105587527, "grad_norm": 2.286241292953491, "learning_rate": 1.695228826706015e-05, "loss": 0.5289, "step": 26275 }, { "epoch": 4.289294314517774, "grad_norm": 2.182835817337036, "learning_rate": 1.695206016854888e-05, "loss": 0.4021, "step": 26276 }, { "epoch": 4.289457573160279, "grad_norm": 2.0771286487579346, "learning_rate": 1.6951832063036887e-05, "loss": 0.3971, "step": 26277 }, { "epoch": 4.289620831802783, "grad_norm": 2.369415283203125, "learning_rate": 1.69516039505244e-05, "loss": 0.4533, "step": 26278 }, { "epoch": 4.289784090445288, "grad_norm": 1.7606871128082275, "learning_rate": 1.695137583101165e-05, "loss": 0.333, "step": 26279 }, { "epoch": 4.289947349087792, "grad_norm": 1.7871193885803223, "learning_rate": 1.695114770449887e-05, "loss": 0.3651, "step": 26280 }, { "epoch": 4.290110607730297, "grad_norm": 2.2296841144561768, "learning_rate": 1.695091957098628e-05, "loss": 0.3818, "step": 26281 }, { "epoch": 4.290273866372801, "grad_norm": 1.6859772205352783, "learning_rate": 1.695069143047412e-05, "loss": 0.3448, "step": 26282 }, { "epoch": 4.2904371250153055, "grad_norm": 2.5706422328948975, "learning_rate": 1.6950463282962613e-05, "loss": 0.4884, "step": 26283 }, { "epoch": 4.29060038365781, "grad_norm": 2.0664846897125244, "learning_rate": 1.6950235128451992e-05, "loss": 0.3952, "step": 26284 }, { "epoch": 4.290763642300314, "grad_norm": 2.0542218685150146, "learning_rate": 1.695000696694249e-05, "loss": 0.3828, "step": 26285 }, { "epoch": 4.290926900942819, "grad_norm": 1.8720858097076416, "learning_rate": 1.6949778798434328e-05, "loss": 0.3791, "step": 26286 }, { "epoch": 4.291090159585323, "grad_norm": 2.0669443607330322, "learning_rate": 1.6949550622927743e-05, "loss": 0.4011, "step": 26287 }, { "epoch": 4.291253418227828, "grad_norm": 2.313006639480591, "learning_rate": 1.694932244042296e-05, "loss": 0.398, "step": 26288 }, { "epoch": 4.291416676870332, "grad_norm": 2.080174207687378, "learning_rate": 1.6949094250920216e-05, "loss": 0.3617, "step": 26289 }, { "epoch": 4.291579935512837, "grad_norm": 1.8641618490219116, "learning_rate": 1.6948866054419732e-05, "loss": 0.3168, "step": 26290 }, { "epoch": 4.291743194155341, "grad_norm": 2.1433193683624268, "learning_rate": 1.6948637850921743e-05, "loss": 0.3639, "step": 26291 }, { "epoch": 4.2919064527978446, "grad_norm": 2.0356268882751465, "learning_rate": 1.6948409640426476e-05, "loss": 0.3437, "step": 26292 }, { "epoch": 4.292069711440349, "grad_norm": 2.4091126918792725, "learning_rate": 1.6948181422934165e-05, "loss": 0.4632, "step": 26293 }, { "epoch": 4.292232970082853, "grad_norm": 2.0151584148406982, "learning_rate": 1.6947953198445036e-05, "loss": 0.3841, "step": 26294 }, { "epoch": 4.292396228725358, "grad_norm": 2.0855228900909424, "learning_rate": 1.6947724966959317e-05, "loss": 0.4177, "step": 26295 }, { "epoch": 4.292559487367862, "grad_norm": 2.0445165634155273, "learning_rate": 1.6947496728477245e-05, "loss": 0.371, "step": 26296 }, { "epoch": 4.292722746010367, "grad_norm": 2.4735536575317383, "learning_rate": 1.6947268482999042e-05, "loss": 0.424, "step": 26297 }, { "epoch": 4.292886004652871, "grad_norm": 2.4921512603759766, "learning_rate": 1.6947040230524945e-05, "loss": 0.3838, "step": 26298 }, { "epoch": 4.293049263295376, "grad_norm": 2.298614978790283, "learning_rate": 1.6946811971055178e-05, "loss": 0.401, "step": 26299 }, { "epoch": 4.29321252193788, "grad_norm": 2.2038540840148926, "learning_rate": 1.6946583704589973e-05, "loss": 0.358, "step": 26300 }, { "epoch": 4.2933757805803845, "grad_norm": 2.971250534057617, "learning_rate": 1.6946355431129563e-05, "loss": 0.3862, "step": 26301 }, { "epoch": 4.293539039222889, "grad_norm": 2.453827142715454, "learning_rate": 1.6946127150674172e-05, "loss": 0.4233, "step": 26302 }, { "epoch": 4.293702297865393, "grad_norm": 2.143728017807007, "learning_rate": 1.6945898863224034e-05, "loss": 0.3597, "step": 26303 }, { "epoch": 4.293865556507898, "grad_norm": 2.502312660217285, "learning_rate": 1.694567056877938e-05, "loss": 0.3927, "step": 26304 }, { "epoch": 4.294028815150402, "grad_norm": 2.4267983436584473, "learning_rate": 1.6945442267340434e-05, "loss": 0.4519, "step": 26305 }, { "epoch": 4.294192073792907, "grad_norm": 2.8007164001464844, "learning_rate": 1.694521395890743e-05, "loss": 0.4263, "step": 26306 }, { "epoch": 4.294355332435411, "grad_norm": 2.200904130935669, "learning_rate": 1.6944985643480596e-05, "loss": 0.4134, "step": 26307 }, { "epoch": 4.294518591077916, "grad_norm": 1.992593765258789, "learning_rate": 1.6944757321060164e-05, "loss": 0.3495, "step": 26308 }, { "epoch": 4.294681849720419, "grad_norm": 2.1706202030181885, "learning_rate": 1.6944528991646367e-05, "loss": 0.3576, "step": 26309 }, { "epoch": 4.2948451083629235, "grad_norm": 1.9058719873428345, "learning_rate": 1.694430065523943e-05, "loss": 0.3474, "step": 26310 }, { "epoch": 4.295008367005428, "grad_norm": 1.992310881614685, "learning_rate": 1.694407231183958e-05, "loss": 0.3987, "step": 26311 }, { "epoch": 4.295171625647932, "grad_norm": 1.96824049949646, "learning_rate": 1.6943843961447054e-05, "loss": 0.3529, "step": 26312 }, { "epoch": 4.295334884290437, "grad_norm": 1.765095829963684, "learning_rate": 1.694361560406208e-05, "loss": 0.3647, "step": 26313 }, { "epoch": 4.295498142932941, "grad_norm": 2.0650157928466797, "learning_rate": 1.6943387239684885e-05, "loss": 0.3782, "step": 26314 }, { "epoch": 4.295661401575446, "grad_norm": 2.007732629776001, "learning_rate": 1.69431588683157e-05, "loss": 0.3641, "step": 26315 }, { "epoch": 4.29582466021795, "grad_norm": 1.7549021244049072, "learning_rate": 1.694293048995476e-05, "loss": 0.3485, "step": 26316 }, { "epoch": 4.295987918860455, "grad_norm": 1.7800816297531128, "learning_rate": 1.6942702104602286e-05, "loss": 0.3478, "step": 26317 }, { "epoch": 4.296151177502959, "grad_norm": 2.0886032581329346, "learning_rate": 1.6942473712258515e-05, "loss": 0.3822, "step": 26318 }, { "epoch": 4.2963144361454635, "grad_norm": 2.467475414276123, "learning_rate": 1.6942245312923676e-05, "loss": 0.4725, "step": 26319 }, { "epoch": 4.296477694787968, "grad_norm": 2.2194020748138428, "learning_rate": 1.6942016906597997e-05, "loss": 0.4364, "step": 26320 }, { "epoch": 4.296640953430472, "grad_norm": 2.1087610721588135, "learning_rate": 1.6941788493281707e-05, "loss": 0.3809, "step": 26321 }, { "epoch": 4.296804212072977, "grad_norm": 2.3853774070739746, "learning_rate": 1.694156007297504e-05, "loss": 0.4123, "step": 26322 }, { "epoch": 4.296967470715481, "grad_norm": 2.521249771118164, "learning_rate": 1.694133164567822e-05, "loss": 0.4207, "step": 26323 }, { "epoch": 4.297130729357986, "grad_norm": 1.7045551538467407, "learning_rate": 1.6941103211391486e-05, "loss": 0.377, "step": 26324 }, { "epoch": 4.29729398800049, "grad_norm": 2.140948534011841, "learning_rate": 1.6940874770115062e-05, "loss": 0.3313, "step": 26325 }, { "epoch": 4.2974572466429946, "grad_norm": 2.222127676010132, "learning_rate": 1.6940646321849173e-05, "loss": 0.3405, "step": 26326 }, { "epoch": 4.297620505285499, "grad_norm": 2.403224468231201, "learning_rate": 1.6940417866594063e-05, "loss": 0.4583, "step": 26327 }, { "epoch": 4.2977837639280025, "grad_norm": 1.9739477634429932, "learning_rate": 1.6940189404349952e-05, "loss": 0.3272, "step": 26328 }, { "epoch": 4.297947022570507, "grad_norm": 2.9345710277557373, "learning_rate": 1.693996093511707e-05, "loss": 0.5102, "step": 26329 }, { "epoch": 4.298110281213011, "grad_norm": 1.769602656364441, "learning_rate": 1.693973245889565e-05, "loss": 0.3425, "step": 26330 }, { "epoch": 4.298273539855516, "grad_norm": 2.1428654193878174, "learning_rate": 1.693950397568592e-05, "loss": 0.403, "step": 26331 }, { "epoch": 4.29843679849802, "grad_norm": 1.9493507146835327, "learning_rate": 1.6939275485488115e-05, "loss": 0.3694, "step": 26332 }, { "epoch": 4.298600057140525, "grad_norm": 2.698052406311035, "learning_rate": 1.6939046988302458e-05, "loss": 0.4645, "step": 26333 }, { "epoch": 4.298763315783029, "grad_norm": 2.2494587898254395, "learning_rate": 1.6938818484129186e-05, "loss": 0.417, "step": 26334 }, { "epoch": 4.298926574425534, "grad_norm": 2.543839931488037, "learning_rate": 1.6938589972968522e-05, "loss": 0.4102, "step": 26335 }, { "epoch": 4.299089833068038, "grad_norm": 2.1125404834747314, "learning_rate": 1.69383614548207e-05, "loss": 0.3651, "step": 26336 }, { "epoch": 4.2992530917105425, "grad_norm": 2.3159148693084717, "learning_rate": 1.6938132929685952e-05, "loss": 0.4205, "step": 26337 }, { "epoch": 4.299416350353047, "grad_norm": 2.1208245754241943, "learning_rate": 1.6937904397564506e-05, "loss": 0.4088, "step": 26338 }, { "epoch": 4.299579608995551, "grad_norm": 1.935840129852295, "learning_rate": 1.693767585845659e-05, "loss": 0.316, "step": 26339 }, { "epoch": 4.299742867638056, "grad_norm": 2.2248435020446777, "learning_rate": 1.6937447312362437e-05, "loss": 0.3825, "step": 26340 }, { "epoch": 4.29990612628056, "grad_norm": 1.8448799848556519, "learning_rate": 1.6937218759282276e-05, "loss": 0.3351, "step": 26341 }, { "epoch": 4.300069384923065, "grad_norm": 2.6105716228485107, "learning_rate": 1.693699019921634e-05, "loss": 0.4317, "step": 26342 }, { "epoch": 4.300232643565569, "grad_norm": 2.2294764518737793, "learning_rate": 1.6936761632164853e-05, "loss": 0.4257, "step": 26343 }, { "epoch": 4.3003959022080736, "grad_norm": 2.1027896404266357, "learning_rate": 1.693653305812805e-05, "loss": 0.4021, "step": 26344 }, { "epoch": 4.300559160850577, "grad_norm": 2.0259850025177, "learning_rate": 1.693630447710616e-05, "loss": 0.3656, "step": 26345 }, { "epoch": 4.3007224194930815, "grad_norm": 2.311614751815796, "learning_rate": 1.6936075889099416e-05, "loss": 0.3954, "step": 26346 }, { "epoch": 4.300885678135586, "grad_norm": 2.253031015396118, "learning_rate": 1.6935847294108044e-05, "loss": 0.3934, "step": 26347 }, { "epoch": 4.30104893677809, "grad_norm": 1.8022733926773071, "learning_rate": 1.6935618692132275e-05, "loss": 0.346, "step": 26348 }, { "epoch": 4.301212195420595, "grad_norm": 2.0075595378875732, "learning_rate": 1.693539008317234e-05, "loss": 0.3891, "step": 26349 }, { "epoch": 4.301375454063099, "grad_norm": 1.8867568969726562, "learning_rate": 1.6935161467228466e-05, "loss": 0.3567, "step": 26350 }, { "epoch": 4.301538712705604, "grad_norm": 1.7131993770599365, "learning_rate": 1.693493284430089e-05, "loss": 0.3318, "step": 26351 }, { "epoch": 4.301701971348108, "grad_norm": 2.153536558151245, "learning_rate": 1.693470421438984e-05, "loss": 0.4599, "step": 26352 }, { "epoch": 4.301865229990613, "grad_norm": 2.7103066444396973, "learning_rate": 1.693447557749554e-05, "loss": 0.4547, "step": 26353 }, { "epoch": 4.302028488633117, "grad_norm": 2.097043991088867, "learning_rate": 1.693424693361823e-05, "loss": 0.3612, "step": 26354 }, { "epoch": 4.3021917472756215, "grad_norm": 2.061305046081543, "learning_rate": 1.693401828275813e-05, "loss": 0.3634, "step": 26355 }, { "epoch": 4.302355005918126, "grad_norm": 2.4710426330566406, "learning_rate": 1.693378962491548e-05, "loss": 0.4222, "step": 26356 }, { "epoch": 4.30251826456063, "grad_norm": 1.9475533962249756, "learning_rate": 1.69335609600905e-05, "loss": 0.3078, "step": 26357 }, { "epoch": 4.302681523203135, "grad_norm": 2.355884075164795, "learning_rate": 1.6933332288283432e-05, "loss": 0.429, "step": 26358 }, { "epoch": 4.302844781845639, "grad_norm": 2.113619565963745, "learning_rate": 1.6933103609494497e-05, "loss": 0.4032, "step": 26359 }, { "epoch": 4.303008040488144, "grad_norm": 2.600119113922119, "learning_rate": 1.693287492372393e-05, "loss": 0.3813, "step": 26360 }, { "epoch": 4.303171299130648, "grad_norm": 2.020425319671631, "learning_rate": 1.693264623097196e-05, "loss": 0.3548, "step": 26361 }, { "epoch": 4.303334557773152, "grad_norm": 2.732940912246704, "learning_rate": 1.6932417531238818e-05, "loss": 0.5469, "step": 26362 }, { "epoch": 4.303497816415656, "grad_norm": 1.9183251857757568, "learning_rate": 1.6932188824524728e-05, "loss": 0.387, "step": 26363 }, { "epoch": 4.3036610750581605, "grad_norm": 2.1172239780426025, "learning_rate": 1.6931960110829928e-05, "loss": 0.4049, "step": 26364 }, { "epoch": 4.303824333700665, "grad_norm": 2.336454153060913, "learning_rate": 1.693173139015465e-05, "loss": 0.404, "step": 26365 }, { "epoch": 4.303987592343169, "grad_norm": 2.7344541549682617, "learning_rate": 1.6931502662499116e-05, "loss": 0.5033, "step": 26366 }, { "epoch": 4.304150850985674, "grad_norm": 1.9487148523330688, "learning_rate": 1.6931273927863564e-05, "loss": 0.3875, "step": 26367 }, { "epoch": 4.304314109628178, "grad_norm": 1.9580057859420776, "learning_rate": 1.6931045186248217e-05, "loss": 0.3625, "step": 26368 }, { "epoch": 4.304477368270683, "grad_norm": 1.9546382427215576, "learning_rate": 1.6930816437653312e-05, "loss": 0.4089, "step": 26369 }, { "epoch": 4.304640626913187, "grad_norm": 2.2904837131500244, "learning_rate": 1.693058768207908e-05, "loss": 0.3957, "step": 26370 }, { "epoch": 4.304803885555692, "grad_norm": 1.5525367259979248, "learning_rate": 1.6930358919525742e-05, "loss": 0.3203, "step": 26371 }, { "epoch": 4.304967144198196, "grad_norm": 2.178358316421509, "learning_rate": 1.6930130149993534e-05, "loss": 0.4097, "step": 26372 }, { "epoch": 4.3051304028407005, "grad_norm": 1.9485657215118408, "learning_rate": 1.692990137348269e-05, "loss": 0.3965, "step": 26373 }, { "epoch": 4.305293661483205, "grad_norm": 1.9359678030014038, "learning_rate": 1.692967258999344e-05, "loss": 0.3285, "step": 26374 }, { "epoch": 4.305456920125709, "grad_norm": 2.2357726097106934, "learning_rate": 1.6929443799526005e-05, "loss": 0.4002, "step": 26375 }, { "epoch": 4.305620178768214, "grad_norm": 1.8429368734359741, "learning_rate": 1.6929215002080626e-05, "loss": 0.4075, "step": 26376 }, { "epoch": 4.305783437410718, "grad_norm": 2.0778486728668213, "learning_rate": 1.6928986197657525e-05, "loss": 0.4097, "step": 26377 }, { "epoch": 4.305946696053223, "grad_norm": 2.148904800415039, "learning_rate": 1.6928757386256943e-05, "loss": 0.3839, "step": 26378 }, { "epoch": 4.306109954695727, "grad_norm": 2.048454999923706, "learning_rate": 1.6928528567879103e-05, "loss": 0.3678, "step": 26379 }, { "epoch": 4.3062732133382315, "grad_norm": 1.928510069847107, "learning_rate": 1.6928299742524236e-05, "loss": 0.3692, "step": 26380 }, { "epoch": 4.306436471980735, "grad_norm": 2.460479736328125, "learning_rate": 1.6928070910192568e-05, "loss": 0.387, "step": 26381 }, { "epoch": 4.3065997306232395, "grad_norm": 2.5023140907287598, "learning_rate": 1.692784207088434e-05, "loss": 0.376, "step": 26382 }, { "epoch": 4.306762989265744, "grad_norm": 2.089411735534668, "learning_rate": 1.6927613224599774e-05, "loss": 0.3902, "step": 26383 }, { "epoch": 4.306926247908248, "grad_norm": 2.297954797744751, "learning_rate": 1.6927384371339105e-05, "loss": 0.3895, "step": 26384 }, { "epoch": 4.307089506550753, "grad_norm": 2.07651686668396, "learning_rate": 1.6927155511102563e-05, "loss": 0.3748, "step": 26385 }, { "epoch": 4.307252765193257, "grad_norm": 2.6212401390075684, "learning_rate": 1.6926926643890374e-05, "loss": 0.4964, "step": 26386 }, { "epoch": 4.307416023835762, "grad_norm": 1.7304308414459229, "learning_rate": 1.692669776970277e-05, "loss": 0.3822, "step": 26387 }, { "epoch": 4.307579282478266, "grad_norm": 1.8777657747268677, "learning_rate": 1.6926468888539988e-05, "loss": 0.3595, "step": 26388 }, { "epoch": 4.307742541120771, "grad_norm": 1.970970630645752, "learning_rate": 1.6926240000402253e-05, "loss": 0.3692, "step": 26389 }, { "epoch": 4.307905799763275, "grad_norm": 1.9037531614303589, "learning_rate": 1.6926011105289797e-05, "loss": 0.3705, "step": 26390 }, { "epoch": 4.3080690584057795, "grad_norm": 2.183248996734619, "learning_rate": 1.692578220320285e-05, "loss": 0.3935, "step": 26391 }, { "epoch": 4.308232317048284, "grad_norm": 1.8301825523376465, "learning_rate": 1.6925553294141638e-05, "loss": 0.3459, "step": 26392 }, { "epoch": 4.308395575690788, "grad_norm": 1.8445062637329102, "learning_rate": 1.6925324378106395e-05, "loss": 0.3286, "step": 26393 }, { "epoch": 4.308558834333293, "grad_norm": 2.143235445022583, "learning_rate": 1.6925095455097356e-05, "loss": 0.4118, "step": 26394 }, { "epoch": 4.308722092975797, "grad_norm": 2.1132919788360596, "learning_rate": 1.6924866525114743e-05, "loss": 0.4047, "step": 26395 }, { "epoch": 4.308885351618302, "grad_norm": 2.248229742050171, "learning_rate": 1.6924637588158796e-05, "loss": 0.3842, "step": 26396 }, { "epoch": 4.309048610260806, "grad_norm": 1.723607063293457, "learning_rate": 1.692440864422974e-05, "loss": 0.2922, "step": 26397 }, { "epoch": 4.30921186890331, "grad_norm": 1.6163804531097412, "learning_rate": 1.692417969332781e-05, "loss": 0.3124, "step": 26398 }, { "epoch": 4.309375127545814, "grad_norm": 2.085407257080078, "learning_rate": 1.692395073545323e-05, "loss": 0.3994, "step": 26399 }, { "epoch": 4.3095383861883185, "grad_norm": 1.909989595413208, "learning_rate": 1.692372177060623e-05, "loss": 0.4016, "step": 26400 }, { "epoch": 4.309701644830823, "grad_norm": 2.2670795917510986, "learning_rate": 1.6923492798787045e-05, "loss": 0.4221, "step": 26401 }, { "epoch": 4.309864903473327, "grad_norm": 2.1288506984710693, "learning_rate": 1.6923263819995906e-05, "loss": 0.3329, "step": 26402 }, { "epoch": 4.310028162115832, "grad_norm": 1.9737004041671753, "learning_rate": 1.692303483423304e-05, "loss": 0.382, "step": 26403 }, { "epoch": 4.310191420758336, "grad_norm": 2.0987062454223633, "learning_rate": 1.6922805841498685e-05, "loss": 0.4152, "step": 26404 }, { "epoch": 4.310354679400841, "grad_norm": 2.0236685276031494, "learning_rate": 1.6922576841793063e-05, "loss": 0.3649, "step": 26405 }, { "epoch": 4.310517938043345, "grad_norm": 2.6546645164489746, "learning_rate": 1.6922347835116407e-05, "loss": 0.4727, "step": 26406 }, { "epoch": 4.31068119668585, "grad_norm": 2.33244252204895, "learning_rate": 1.692211882146895e-05, "loss": 0.372, "step": 26407 }, { "epoch": 4.310844455328354, "grad_norm": 2.3354899883270264, "learning_rate": 1.692188980085092e-05, "loss": 0.3686, "step": 26408 }, { "epoch": 4.3110077139708585, "grad_norm": 2.2218525409698486, "learning_rate": 1.6921660773262552e-05, "loss": 0.3753, "step": 26409 }, { "epoch": 4.311170972613363, "grad_norm": 2.403897285461426, "learning_rate": 1.692143173870407e-05, "loss": 0.4185, "step": 26410 }, { "epoch": 4.311334231255867, "grad_norm": 1.8666642904281616, "learning_rate": 1.692120269717571e-05, "loss": 0.3461, "step": 26411 }, { "epoch": 4.311497489898372, "grad_norm": 1.9719961881637573, "learning_rate": 1.6920973648677698e-05, "loss": 0.3233, "step": 26412 }, { "epoch": 4.311660748540876, "grad_norm": 1.6967334747314453, "learning_rate": 1.692074459321027e-05, "loss": 0.2851, "step": 26413 }, { "epoch": 4.311824007183381, "grad_norm": 2.2296640872955322, "learning_rate": 1.6920515530773653e-05, "loss": 0.3711, "step": 26414 }, { "epoch": 4.311987265825884, "grad_norm": 1.9113456010818481, "learning_rate": 1.692028646136808e-05, "loss": 0.3849, "step": 26415 }, { "epoch": 4.312150524468389, "grad_norm": 2.298172950744629, "learning_rate": 1.6920057384993778e-05, "loss": 0.3476, "step": 26416 }, { "epoch": 4.312313783110893, "grad_norm": 1.8597698211669922, "learning_rate": 1.691982830165098e-05, "loss": 0.3307, "step": 26417 }, { "epoch": 4.3124770417533975, "grad_norm": 1.8490797281265259, "learning_rate": 1.691959921133992e-05, "loss": 0.4236, "step": 26418 }, { "epoch": 4.312640300395902, "grad_norm": 2.2746076583862305, "learning_rate": 1.6919370114060824e-05, "loss": 0.4374, "step": 26419 }, { "epoch": 4.312803559038406, "grad_norm": 1.979703664779663, "learning_rate": 1.6919141009813924e-05, "loss": 0.3674, "step": 26420 }, { "epoch": 4.312966817680911, "grad_norm": 2.5500752925872803, "learning_rate": 1.691891189859945e-05, "loss": 0.3932, "step": 26421 }, { "epoch": 4.313130076323415, "grad_norm": 2.3522989749908447, "learning_rate": 1.6918682780417633e-05, "loss": 0.3916, "step": 26422 }, { "epoch": 4.31329333496592, "grad_norm": 3.1607298851013184, "learning_rate": 1.6918453655268705e-05, "loss": 0.4352, "step": 26423 }, { "epoch": 4.313456593608424, "grad_norm": 2.171445608139038, "learning_rate": 1.69182245231529e-05, "loss": 0.3992, "step": 26424 }, { "epoch": 4.313619852250929, "grad_norm": 2.0149288177490234, "learning_rate": 1.691799538407044e-05, "loss": 0.38, "step": 26425 }, { "epoch": 4.313783110893433, "grad_norm": 2.0710582733154297, "learning_rate": 1.691776623802156e-05, "loss": 0.385, "step": 26426 }, { "epoch": 4.3139463695359375, "grad_norm": 1.9325262308120728, "learning_rate": 1.691753708500649e-05, "loss": 0.3283, "step": 26427 }, { "epoch": 4.314109628178442, "grad_norm": 2.3186583518981934, "learning_rate": 1.6917307925025468e-05, "loss": 0.3932, "step": 26428 }, { "epoch": 4.314272886820946, "grad_norm": 2.3920257091522217, "learning_rate": 1.6917078758078717e-05, "loss": 0.6067, "step": 26429 }, { "epoch": 4.314436145463451, "grad_norm": 2.244608163833618, "learning_rate": 1.6916849584166465e-05, "loss": 0.3647, "step": 26430 }, { "epoch": 4.314599404105955, "grad_norm": 2.1646475791931152, "learning_rate": 1.6916620403288952e-05, "loss": 0.3758, "step": 26431 }, { "epoch": 4.31476266274846, "grad_norm": 1.829162359237671, "learning_rate": 1.6916391215446403e-05, "loss": 0.3796, "step": 26432 }, { "epoch": 4.314925921390964, "grad_norm": 1.8781392574310303, "learning_rate": 1.6916162020639047e-05, "loss": 0.3552, "step": 26433 }, { "epoch": 4.315089180033468, "grad_norm": 2.297950506210327, "learning_rate": 1.6915932818867122e-05, "loss": 0.4427, "step": 26434 }, { "epoch": 4.315252438675972, "grad_norm": 2.351438283920288, "learning_rate": 1.6915703610130852e-05, "loss": 0.4147, "step": 26435 }, { "epoch": 4.3154156973184765, "grad_norm": 2.6243488788604736, "learning_rate": 1.691547439443047e-05, "loss": 0.4103, "step": 26436 }, { "epoch": 4.315578955960981, "grad_norm": 2.1141724586486816, "learning_rate": 1.691524517176621e-05, "loss": 0.3779, "step": 26437 }, { "epoch": 4.315742214603485, "grad_norm": 1.983359456062317, "learning_rate": 1.69150159421383e-05, "loss": 0.41, "step": 26438 }, { "epoch": 4.31590547324599, "grad_norm": 2.225860118865967, "learning_rate": 1.6914786705546967e-05, "loss": 0.4168, "step": 26439 }, { "epoch": 4.316068731888494, "grad_norm": 2.0135607719421387, "learning_rate": 1.6914557461992445e-05, "loss": 0.4381, "step": 26440 }, { "epoch": 4.316231990530999, "grad_norm": 1.932120680809021, "learning_rate": 1.691432821147497e-05, "loss": 0.3372, "step": 26441 }, { "epoch": 4.316395249173503, "grad_norm": 1.8824466466903687, "learning_rate": 1.6914098953994766e-05, "loss": 0.3428, "step": 26442 }, { "epoch": 4.316558507816008, "grad_norm": 1.596097469329834, "learning_rate": 1.6913869689552066e-05, "loss": 0.3343, "step": 26443 }, { "epoch": 4.316721766458512, "grad_norm": 2.4904346466064453, "learning_rate": 1.69136404181471e-05, "loss": 0.4418, "step": 26444 }, { "epoch": 4.3168850251010165, "grad_norm": 2.541691780090332, "learning_rate": 1.6913411139780104e-05, "loss": 0.4274, "step": 26445 }, { "epoch": 4.317048283743521, "grad_norm": 2.1377508640289307, "learning_rate": 1.69131818544513e-05, "loss": 0.3848, "step": 26446 }, { "epoch": 4.317211542386025, "grad_norm": 2.269658327102661, "learning_rate": 1.6912952562160928e-05, "loss": 0.3938, "step": 26447 }, { "epoch": 4.31737480102853, "grad_norm": 1.9575731754302979, "learning_rate": 1.6912723262909213e-05, "loss": 0.3939, "step": 26448 }, { "epoch": 4.317538059671034, "grad_norm": 2.3258230686187744, "learning_rate": 1.6912493956696384e-05, "loss": 0.4163, "step": 26449 }, { "epoch": 4.317701318313539, "grad_norm": 2.4298255443573, "learning_rate": 1.691226464352268e-05, "loss": 0.4663, "step": 26450 }, { "epoch": 4.317864576956042, "grad_norm": 2.4252169132232666, "learning_rate": 1.6912035323388325e-05, "loss": 0.7077, "step": 26451 }, { "epoch": 4.318027835598547, "grad_norm": 2.5182104110717773, "learning_rate": 1.6911805996293553e-05, "loss": 0.4103, "step": 26452 }, { "epoch": 4.318191094241051, "grad_norm": 2.034761667251587, "learning_rate": 1.6911576662238595e-05, "loss": 0.3903, "step": 26453 }, { "epoch": 4.3183543528835555, "grad_norm": 2.3639402389526367, "learning_rate": 1.691134732122368e-05, "loss": 0.4185, "step": 26454 }, { "epoch": 4.31851761152606, "grad_norm": 1.9855949878692627, "learning_rate": 1.6911117973249037e-05, "loss": 0.3956, "step": 26455 }, { "epoch": 4.318680870168564, "grad_norm": 2.201838254928589, "learning_rate": 1.6910888618314903e-05, "loss": 0.3696, "step": 26456 }, { "epoch": 4.318844128811069, "grad_norm": 2.7151272296905518, "learning_rate": 1.691065925642151e-05, "loss": 0.4343, "step": 26457 }, { "epoch": 4.319007387453573, "grad_norm": 2.197319507598877, "learning_rate": 1.691042988756908e-05, "loss": 0.4298, "step": 26458 }, { "epoch": 4.319170646096078, "grad_norm": 2.2593233585357666, "learning_rate": 1.691020051175785e-05, "loss": 0.396, "step": 26459 }, { "epoch": 4.319333904738582, "grad_norm": 1.5110071897506714, "learning_rate": 1.690997112898805e-05, "loss": 0.3359, "step": 26460 }, { "epoch": 4.319497163381087, "grad_norm": 2.284754991531372, "learning_rate": 1.690974173925991e-05, "loss": 0.4164, "step": 26461 }, { "epoch": 4.319660422023591, "grad_norm": 2.0381228923797607, "learning_rate": 1.6909512342573664e-05, "loss": 0.3914, "step": 26462 }, { "epoch": 4.3198236806660955, "grad_norm": 1.7045669555664062, "learning_rate": 1.6909282938929542e-05, "loss": 0.3845, "step": 26463 }, { "epoch": 4.3199869393086, "grad_norm": 2.1528351306915283, "learning_rate": 1.690905352832777e-05, "loss": 0.4101, "step": 26464 }, { "epoch": 4.320150197951104, "grad_norm": 1.6069036722183228, "learning_rate": 1.6908824110768584e-05, "loss": 0.2959, "step": 26465 }, { "epoch": 4.320313456593609, "grad_norm": 2.056576728820801, "learning_rate": 1.6908594686252216e-05, "loss": 0.3854, "step": 26466 }, { "epoch": 4.320476715236113, "grad_norm": 2.206972599029541, "learning_rate": 1.6908365254778894e-05, "loss": 0.4096, "step": 26467 }, { "epoch": 4.320639973878617, "grad_norm": 2.438471794128418, "learning_rate": 1.690813581634885e-05, "loss": 0.3946, "step": 26468 }, { "epoch": 4.320803232521121, "grad_norm": 2.2474513053894043, "learning_rate": 1.6907906370962315e-05, "loss": 0.3591, "step": 26469 }, { "epoch": 4.320966491163626, "grad_norm": 2.1151793003082275, "learning_rate": 1.690767691861952e-05, "loss": 0.401, "step": 26470 }, { "epoch": 4.32112974980613, "grad_norm": 1.9228490591049194, "learning_rate": 1.6907447459320697e-05, "loss": 0.3368, "step": 26471 }, { "epoch": 4.3212930084486345, "grad_norm": 2.1723310947418213, "learning_rate": 1.6907217993066072e-05, "loss": 0.4194, "step": 26472 }, { "epoch": 4.321456267091139, "grad_norm": 2.6581077575683594, "learning_rate": 1.6906988519855883e-05, "loss": 0.4239, "step": 26473 }, { "epoch": 4.321619525733643, "grad_norm": 1.8534570932388306, "learning_rate": 1.6906759039690358e-05, "loss": 0.3415, "step": 26474 }, { "epoch": 4.321782784376148, "grad_norm": 2.2318854331970215, "learning_rate": 1.6906529552569733e-05, "loss": 0.3679, "step": 26475 }, { "epoch": 4.321946043018652, "grad_norm": 2.297653913497925, "learning_rate": 1.690630005849423e-05, "loss": 0.3831, "step": 26476 }, { "epoch": 4.322109301661157, "grad_norm": 1.7649204730987549, "learning_rate": 1.6906070557464087e-05, "loss": 0.3707, "step": 26477 }, { "epoch": 4.322272560303661, "grad_norm": 2.389615535736084, "learning_rate": 1.690584104947953e-05, "loss": 0.4423, "step": 26478 }, { "epoch": 4.322435818946166, "grad_norm": 1.9313582181930542, "learning_rate": 1.6905611534540795e-05, "loss": 0.3627, "step": 26479 }, { "epoch": 4.32259907758867, "grad_norm": 2.761113166809082, "learning_rate": 1.690538201264811e-05, "loss": 0.4956, "step": 26480 }, { "epoch": 4.3227623362311745, "grad_norm": 2.0754942893981934, "learning_rate": 1.6905152483801707e-05, "loss": 0.3863, "step": 26481 }, { "epoch": 4.322925594873679, "grad_norm": 2.1217734813690186, "learning_rate": 1.690492294800182e-05, "loss": 0.3959, "step": 26482 }, { "epoch": 4.323088853516183, "grad_norm": 2.384706735610962, "learning_rate": 1.6904693405248672e-05, "loss": 0.3997, "step": 26483 }, { "epoch": 4.323252112158688, "grad_norm": 2.179975986480713, "learning_rate": 1.6904463855542507e-05, "loss": 0.4094, "step": 26484 }, { "epoch": 4.323415370801192, "grad_norm": 2.4437787532806396, "learning_rate": 1.6904234298883544e-05, "loss": 0.4182, "step": 26485 }, { "epoch": 4.323578629443697, "grad_norm": 2.013137102127075, "learning_rate": 1.6904004735272016e-05, "loss": 0.3542, "step": 26486 }, { "epoch": 4.3237418880862, "grad_norm": 1.5592929124832153, "learning_rate": 1.6903775164708163e-05, "loss": 0.2965, "step": 26487 }, { "epoch": 4.323905146728705, "grad_norm": 1.9806451797485352, "learning_rate": 1.690354558719221e-05, "loss": 0.4076, "step": 26488 }, { "epoch": 4.324068405371209, "grad_norm": 2.3693785667419434, "learning_rate": 1.6903316002724385e-05, "loss": 0.3851, "step": 26489 }, { "epoch": 4.3242316640137135, "grad_norm": 1.9629952907562256, "learning_rate": 1.6903086411304924e-05, "loss": 0.3308, "step": 26490 }, { "epoch": 4.324394922656218, "grad_norm": 2.4187841415405273, "learning_rate": 1.6902856812934057e-05, "loss": 0.4235, "step": 26491 }, { "epoch": 4.324558181298722, "grad_norm": 2.049129009246826, "learning_rate": 1.690262720761201e-05, "loss": 0.3659, "step": 26492 }, { "epoch": 4.324721439941227, "grad_norm": 2.275578498840332, "learning_rate": 1.6902397595339026e-05, "loss": 0.3806, "step": 26493 }, { "epoch": 4.324884698583731, "grad_norm": 2.1464521884918213, "learning_rate": 1.6902167976115326e-05, "loss": 0.4227, "step": 26494 }, { "epoch": 4.325047957226236, "grad_norm": 2.4261953830718994, "learning_rate": 1.6901938349941148e-05, "loss": 0.3627, "step": 26495 }, { "epoch": 4.32521121586874, "grad_norm": 1.8738707304000854, "learning_rate": 1.6901708716816715e-05, "loss": 0.3281, "step": 26496 }, { "epoch": 4.325374474511245, "grad_norm": 2.482849359512329, "learning_rate": 1.6901479076742273e-05, "loss": 0.4018, "step": 26497 }, { "epoch": 4.325537733153749, "grad_norm": 1.9826568365097046, "learning_rate": 1.6901249429718033e-05, "loss": 0.3894, "step": 26498 }, { "epoch": 4.3257009917962534, "grad_norm": 2.245633363723755, "learning_rate": 1.690101977574424e-05, "loss": 0.4591, "step": 26499 }, { "epoch": 4.325864250438758, "grad_norm": 2.137432336807251, "learning_rate": 1.6900790114821122e-05, "loss": 0.3615, "step": 26500 }, { "epoch": 4.326027509081262, "grad_norm": 1.9042508602142334, "learning_rate": 1.6900560446948908e-05, "loss": 0.3557, "step": 26501 }, { "epoch": 4.326190767723767, "grad_norm": 2.2818710803985596, "learning_rate": 1.6900330772127835e-05, "loss": 0.4215, "step": 26502 }, { "epoch": 4.326354026366271, "grad_norm": 2.218026638031006, "learning_rate": 1.690010109035813e-05, "loss": 0.3981, "step": 26503 }, { "epoch": 4.326517285008775, "grad_norm": 2.3609044551849365, "learning_rate": 1.6899871401640023e-05, "loss": 0.4691, "step": 26504 }, { "epoch": 4.326680543651279, "grad_norm": 2.4606289863586426, "learning_rate": 1.6899641705973747e-05, "loss": 0.468, "step": 26505 }, { "epoch": 4.326843802293784, "grad_norm": 1.9932124614715576, "learning_rate": 1.689941200335954e-05, "loss": 0.3447, "step": 26506 }, { "epoch": 4.327007060936288, "grad_norm": 2.0100979804992676, "learning_rate": 1.689918229379762e-05, "loss": 0.3851, "step": 26507 }, { "epoch": 4.3271703195787925, "grad_norm": 1.9325584173202515, "learning_rate": 1.6898952577288228e-05, "loss": 0.3928, "step": 26508 }, { "epoch": 4.327333578221297, "grad_norm": 1.870766520500183, "learning_rate": 1.6898722853831594e-05, "loss": 0.3954, "step": 26509 }, { "epoch": 4.327496836863801, "grad_norm": 2.39617919921875, "learning_rate": 1.6898493123427945e-05, "loss": 0.3748, "step": 26510 }, { "epoch": 4.327660095506306, "grad_norm": 2.0301620960235596, "learning_rate": 1.6898263386077515e-05, "loss": 0.3851, "step": 26511 }, { "epoch": 4.32782335414881, "grad_norm": 2.053013563156128, "learning_rate": 1.6898033641780537e-05, "loss": 0.3293, "step": 26512 }, { "epoch": 4.327986612791315, "grad_norm": 2.0635569095611572, "learning_rate": 1.6897803890537245e-05, "loss": 0.4063, "step": 26513 }, { "epoch": 4.328149871433819, "grad_norm": 2.219007968902588, "learning_rate": 1.689757413234786e-05, "loss": 0.4033, "step": 26514 }, { "epoch": 4.328313130076324, "grad_norm": 2.279411554336548, "learning_rate": 1.689734436721262e-05, "loss": 0.3952, "step": 26515 }, { "epoch": 4.328476388718828, "grad_norm": 2.1471712589263916, "learning_rate": 1.6897114595131762e-05, "loss": 0.3973, "step": 26516 }, { "epoch": 4.328639647361332, "grad_norm": 2.080192804336548, "learning_rate": 1.6896884816105505e-05, "loss": 0.3381, "step": 26517 }, { "epoch": 4.328802906003837, "grad_norm": 1.907184362411499, "learning_rate": 1.689665503013409e-05, "loss": 0.3555, "step": 26518 }, { "epoch": 4.328966164646341, "grad_norm": 1.8550710678100586, "learning_rate": 1.689642523721774e-05, "loss": 0.3924, "step": 26519 }, { "epoch": 4.329129423288846, "grad_norm": 2.2040855884552, "learning_rate": 1.68961954373567e-05, "loss": 0.3995, "step": 26520 }, { "epoch": 4.329292681931349, "grad_norm": 1.9340912103652954, "learning_rate": 1.689596563055119e-05, "loss": 0.3779, "step": 26521 }, { "epoch": 4.329455940573854, "grad_norm": 2.0091092586517334, "learning_rate": 1.689573581680144e-05, "loss": 0.3917, "step": 26522 }, { "epoch": 4.329619199216358, "grad_norm": 2.711697816848755, "learning_rate": 1.689550599610769e-05, "loss": 0.4474, "step": 26523 }, { "epoch": 4.329782457858863, "grad_norm": 2.179840564727783, "learning_rate": 1.6895276168470166e-05, "loss": 0.3964, "step": 26524 }, { "epoch": 4.329945716501367, "grad_norm": 2.2195754051208496, "learning_rate": 1.68950463338891e-05, "loss": 0.3406, "step": 26525 }, { "epoch": 4.3301089751438715, "grad_norm": 2.0376389026641846, "learning_rate": 1.6894816492364728e-05, "loss": 0.3749, "step": 26526 }, { "epoch": 4.330272233786376, "grad_norm": 2.4309184551239014, "learning_rate": 1.6894586643897276e-05, "loss": 0.4075, "step": 26527 }, { "epoch": 4.33043549242888, "grad_norm": 2.4669649600982666, "learning_rate": 1.6894356788486977e-05, "loss": 0.4911, "step": 26528 }, { "epoch": 4.330598751071385, "grad_norm": 2.267617702484131, "learning_rate": 1.6894126926134064e-05, "loss": 0.4248, "step": 26529 }, { "epoch": 4.330762009713889, "grad_norm": 1.784188151359558, "learning_rate": 1.6893897056838763e-05, "loss": 0.3539, "step": 26530 }, { "epoch": 4.330925268356394, "grad_norm": 1.9482488632202148, "learning_rate": 1.6893667180601313e-05, "loss": 0.3795, "step": 26531 }, { "epoch": 4.331088526998898, "grad_norm": 2.2605113983154297, "learning_rate": 1.6893437297421936e-05, "loss": 0.3745, "step": 26532 }, { "epoch": 4.331251785641403, "grad_norm": 2.066544532775879, "learning_rate": 1.6893207407300877e-05, "loss": 0.369, "step": 26533 }, { "epoch": 4.331415044283907, "grad_norm": 1.8937321901321411, "learning_rate": 1.6892977510238356e-05, "loss": 0.3672, "step": 26534 }, { "epoch": 4.331578302926411, "grad_norm": 1.821730136871338, "learning_rate": 1.6892747606234606e-05, "loss": 0.3714, "step": 26535 }, { "epoch": 4.331741561568916, "grad_norm": 2.5957236289978027, "learning_rate": 1.6892517695289866e-05, "loss": 0.3954, "step": 26536 }, { "epoch": 4.33190482021142, "grad_norm": 2.7986624240875244, "learning_rate": 1.689228777740436e-05, "loss": 0.3987, "step": 26537 }, { "epoch": 4.332068078853924, "grad_norm": 2.2857043743133545, "learning_rate": 1.6892057852578326e-05, "loss": 0.4335, "step": 26538 }, { "epoch": 4.332231337496428, "grad_norm": 1.9730256795883179, "learning_rate": 1.6891827920811987e-05, "loss": 0.3509, "step": 26539 }, { "epoch": 4.332394596138933, "grad_norm": 2.1110951900482178, "learning_rate": 1.6891597982105583e-05, "loss": 0.348, "step": 26540 }, { "epoch": 4.332557854781437, "grad_norm": 1.8634426593780518, "learning_rate": 1.6891368036459335e-05, "loss": 0.3712, "step": 26541 }, { "epoch": 4.332721113423942, "grad_norm": 1.9761523008346558, "learning_rate": 1.6891138083873486e-05, "loss": 0.3822, "step": 26542 }, { "epoch": 4.332884372066446, "grad_norm": 2.6964845657348633, "learning_rate": 1.689090812434826e-05, "loss": 0.4907, "step": 26543 }, { "epoch": 4.3330476307089505, "grad_norm": 1.8290635347366333, "learning_rate": 1.6890678157883896e-05, "loss": 0.4081, "step": 26544 }, { "epoch": 4.333210889351455, "grad_norm": 1.7913281917572021, "learning_rate": 1.689044818448062e-05, "loss": 0.2892, "step": 26545 }, { "epoch": 4.333374147993959, "grad_norm": 1.7505847215652466, "learning_rate": 1.6890218204138663e-05, "loss": 0.3257, "step": 26546 }, { "epoch": 4.333537406636464, "grad_norm": 2.1885781288146973, "learning_rate": 1.6889988216858256e-05, "loss": 0.39, "step": 26547 }, { "epoch": 4.333700665278968, "grad_norm": 2.3919153213500977, "learning_rate": 1.6889758222639633e-05, "loss": 0.3821, "step": 26548 }, { "epoch": 4.333863923921473, "grad_norm": 2.396275520324707, "learning_rate": 1.688952822148303e-05, "loss": 0.414, "step": 26549 }, { "epoch": 4.334027182563977, "grad_norm": 1.7281789779663086, "learning_rate": 1.688929821338867e-05, "loss": 0.34, "step": 26550 }, { "epoch": 4.334190441206482, "grad_norm": 1.9736052751541138, "learning_rate": 1.688906819835679e-05, "loss": 0.3887, "step": 26551 }, { "epoch": 4.334353699848986, "grad_norm": 2.026122570037842, "learning_rate": 1.688883817638762e-05, "loss": 0.3613, "step": 26552 }, { "epoch": 4.33451695849149, "grad_norm": 2.512037515640259, "learning_rate": 1.688860814748139e-05, "loss": 0.5491, "step": 26553 }, { "epoch": 4.334680217133995, "grad_norm": 2.0443437099456787, "learning_rate": 1.6888378111638335e-05, "loss": 0.371, "step": 26554 }, { "epoch": 4.334843475776499, "grad_norm": 2.2350869178771973, "learning_rate": 1.6888148068858683e-05, "loss": 0.4366, "step": 26555 }, { "epoch": 4.335006734419004, "grad_norm": 2.2646944522857666, "learning_rate": 1.688791801914267e-05, "loss": 0.3999, "step": 26556 }, { "epoch": 4.335169993061507, "grad_norm": 2.2300350666046143, "learning_rate": 1.6887687962490526e-05, "loss": 0.3314, "step": 26557 }, { "epoch": 4.335333251704012, "grad_norm": 2.0274598598480225, "learning_rate": 1.688745789890248e-05, "loss": 0.3274, "step": 26558 }, { "epoch": 4.335496510346516, "grad_norm": 1.9337236881256104, "learning_rate": 1.6887227828378768e-05, "loss": 0.3188, "step": 26559 }, { "epoch": 4.335659768989021, "grad_norm": 1.9768930673599243, "learning_rate": 1.688699775091962e-05, "loss": 0.3445, "step": 26560 }, { "epoch": 4.335823027631525, "grad_norm": 2.2011096477508545, "learning_rate": 1.688676766652526e-05, "loss": 0.4045, "step": 26561 }, { "epoch": 4.3359862862740295, "grad_norm": 2.1820476055145264, "learning_rate": 1.6886537575195934e-05, "loss": 0.4082, "step": 26562 }, { "epoch": 4.336149544916534, "grad_norm": 2.2697503566741943, "learning_rate": 1.6886307476931866e-05, "loss": 0.3821, "step": 26563 }, { "epoch": 4.336312803559038, "grad_norm": 2.0704598426818848, "learning_rate": 1.6886077371733285e-05, "loss": 0.3564, "step": 26564 }, { "epoch": 4.336476062201543, "grad_norm": 2.0050344467163086, "learning_rate": 1.688584725960043e-05, "loss": 0.3306, "step": 26565 }, { "epoch": 4.336639320844047, "grad_norm": 2.400603771209717, "learning_rate": 1.6885617140533526e-05, "loss": 0.4142, "step": 26566 }, { "epoch": 4.336802579486552, "grad_norm": 1.9194540977478027, "learning_rate": 1.688538701453281e-05, "loss": 0.3806, "step": 26567 }, { "epoch": 4.336965838129056, "grad_norm": 2.389031410217285, "learning_rate": 1.6885156881598504e-05, "loss": 0.4082, "step": 26568 }, { "epoch": 4.3371290967715606, "grad_norm": 2.4083311557769775, "learning_rate": 1.688492674173085e-05, "loss": 0.3596, "step": 26569 }, { "epoch": 4.337292355414065, "grad_norm": 2.0091497898101807, "learning_rate": 1.6884696594930077e-05, "loss": 0.3717, "step": 26570 }, { "epoch": 4.337455614056569, "grad_norm": 1.9141618013381958, "learning_rate": 1.688446644119642e-05, "loss": 0.3289, "step": 26571 }, { "epoch": 4.337618872699074, "grad_norm": 2.0761208534240723, "learning_rate": 1.6884236280530104e-05, "loss": 0.3835, "step": 26572 }, { "epoch": 4.337782131341578, "grad_norm": 1.9629136323928833, "learning_rate": 1.6884006112931365e-05, "loss": 0.3269, "step": 26573 }, { "epoch": 4.337945389984082, "grad_norm": 2.0038154125213623, "learning_rate": 1.6883775938400434e-05, "loss": 0.3649, "step": 26574 }, { "epoch": 4.338108648626586, "grad_norm": 1.8437135219573975, "learning_rate": 1.688354575693754e-05, "loss": 0.3307, "step": 26575 }, { "epoch": 4.338271907269091, "grad_norm": 2.339905023574829, "learning_rate": 1.688331556854292e-05, "loss": 0.4715, "step": 26576 }, { "epoch": 4.338435165911595, "grad_norm": 2.1112959384918213, "learning_rate": 1.6883085373216804e-05, "loss": 0.3959, "step": 26577 }, { "epoch": 4.3385984245541, "grad_norm": 2.33535099029541, "learning_rate": 1.688285517095942e-05, "loss": 0.4059, "step": 26578 }, { "epoch": 4.338761683196604, "grad_norm": 2.4909350872039795, "learning_rate": 1.6882624961771003e-05, "loss": 0.3781, "step": 26579 }, { "epoch": 4.3389249418391085, "grad_norm": 2.3053481578826904, "learning_rate": 1.6882394745651784e-05, "loss": 0.4048, "step": 26580 }, { "epoch": 4.339088200481613, "grad_norm": 2.069016456604004, "learning_rate": 1.6882164522602e-05, "loss": 0.3625, "step": 26581 }, { "epoch": 4.339251459124117, "grad_norm": 2.0125951766967773, "learning_rate": 1.688193429262187e-05, "loss": 0.3791, "step": 26582 }, { "epoch": 4.339414717766622, "grad_norm": 1.9079043865203857, "learning_rate": 1.688170405571164e-05, "loss": 0.3651, "step": 26583 }, { "epoch": 4.339577976409126, "grad_norm": 1.9646601676940918, "learning_rate": 1.6881473811871533e-05, "loss": 0.3959, "step": 26584 }, { "epoch": 4.339741235051631, "grad_norm": 2.118868112564087, "learning_rate": 1.6881243561101785e-05, "loss": 0.4029, "step": 26585 }, { "epoch": 4.339904493694135, "grad_norm": 1.850684404373169, "learning_rate": 1.688101330340263e-05, "loss": 0.36, "step": 26586 }, { "epoch": 4.3400677523366396, "grad_norm": 2.2062716484069824, "learning_rate": 1.688078303877429e-05, "loss": 0.3922, "step": 26587 }, { "epoch": 4.340231010979144, "grad_norm": 1.796568751335144, "learning_rate": 1.6880552767217007e-05, "loss": 0.3661, "step": 26588 }, { "epoch": 4.340394269621648, "grad_norm": 1.9037175178527832, "learning_rate": 1.6880322488731006e-05, "loss": 0.3854, "step": 26589 }, { "epoch": 4.340557528264153, "grad_norm": 2.3861162662506104, "learning_rate": 1.6880092203316526e-05, "loss": 0.4372, "step": 26590 }, { "epoch": 4.340720786906656, "grad_norm": 2.1809561252593994, "learning_rate": 1.6879861910973795e-05, "loss": 0.4262, "step": 26591 }, { "epoch": 4.340884045549161, "grad_norm": 2.221853256225586, "learning_rate": 1.6879631611703042e-05, "loss": 0.361, "step": 26592 }, { "epoch": 4.341047304191665, "grad_norm": 2.482351303100586, "learning_rate": 1.6879401305504505e-05, "loss": 0.406, "step": 26593 }, { "epoch": 4.34121056283417, "grad_norm": 2.1139767169952393, "learning_rate": 1.6879170992378408e-05, "loss": 0.374, "step": 26594 }, { "epoch": 4.341373821476674, "grad_norm": 1.9678702354431152, "learning_rate": 1.687894067232499e-05, "loss": 0.3846, "step": 26595 }, { "epoch": 4.341537080119179, "grad_norm": 1.8304470777511597, "learning_rate": 1.6878710345344483e-05, "loss": 0.384, "step": 26596 }, { "epoch": 4.341700338761683, "grad_norm": 1.866415023803711, "learning_rate": 1.6878480011437113e-05, "loss": 0.381, "step": 26597 }, { "epoch": 4.3418635974041875, "grad_norm": 2.055211067199707, "learning_rate": 1.687824967060312e-05, "loss": 0.4105, "step": 26598 }, { "epoch": 4.342026856046692, "grad_norm": 1.6078581809997559, "learning_rate": 1.6878019322842727e-05, "loss": 0.3598, "step": 26599 }, { "epoch": 4.342190114689196, "grad_norm": 2.264277935028076, "learning_rate": 1.6877788968156172e-05, "loss": 0.4116, "step": 26600 }, { "epoch": 4.342353373331701, "grad_norm": 2.4106509685516357, "learning_rate": 1.6877558606543686e-05, "loss": 0.3887, "step": 26601 }, { "epoch": 4.342516631974205, "grad_norm": 2.2686145305633545, "learning_rate": 1.6877328238005496e-05, "loss": 0.3848, "step": 26602 }, { "epoch": 4.34267989061671, "grad_norm": 2.1571810245513916, "learning_rate": 1.6877097862541843e-05, "loss": 0.4048, "step": 26603 }, { "epoch": 4.342843149259214, "grad_norm": 2.322096586227417, "learning_rate": 1.6876867480152953e-05, "loss": 0.4053, "step": 26604 }, { "epoch": 4.3430064079017185, "grad_norm": 2.303872585296631, "learning_rate": 1.687663709083906e-05, "loss": 0.4208, "step": 26605 }, { "epoch": 4.343169666544223, "grad_norm": 2.3261940479278564, "learning_rate": 1.6876406694600396e-05, "loss": 0.3833, "step": 26606 }, { "epoch": 4.343332925186727, "grad_norm": 2.819594144821167, "learning_rate": 1.687617629143719e-05, "loss": 0.4394, "step": 26607 }, { "epoch": 4.343496183829232, "grad_norm": 2.03377366065979, "learning_rate": 1.6875945881349676e-05, "loss": 0.3917, "step": 26608 }, { "epoch": 4.343659442471736, "grad_norm": 2.3117873668670654, "learning_rate": 1.6875715464338088e-05, "loss": 0.5893, "step": 26609 }, { "epoch": 4.34382270111424, "grad_norm": 1.891358494758606, "learning_rate": 1.6875485040402655e-05, "loss": 0.3637, "step": 26610 }, { "epoch": 4.343985959756744, "grad_norm": 2.067145347595215, "learning_rate": 1.687525460954361e-05, "loss": 0.4089, "step": 26611 }, { "epoch": 4.344149218399249, "grad_norm": 2.0628650188446045, "learning_rate": 1.6875024171761188e-05, "loss": 0.3845, "step": 26612 }, { "epoch": 4.344312477041753, "grad_norm": 1.872780680656433, "learning_rate": 1.6874793727055617e-05, "loss": 0.3714, "step": 26613 }, { "epoch": 4.344475735684258, "grad_norm": 2.1901698112487793, "learning_rate": 1.687456327542713e-05, "loss": 0.4033, "step": 26614 }, { "epoch": 4.344638994326762, "grad_norm": 2.2971088886260986, "learning_rate": 1.687433281687596e-05, "loss": 0.4036, "step": 26615 }, { "epoch": 4.3448022529692665, "grad_norm": 1.9579271078109741, "learning_rate": 1.687410235140234e-05, "loss": 0.4077, "step": 26616 }, { "epoch": 4.344965511611771, "grad_norm": 2.436343193054199, "learning_rate": 1.68738718790065e-05, "loss": 0.4636, "step": 26617 }, { "epoch": 4.345128770254275, "grad_norm": 2.2140164375305176, "learning_rate": 1.687364139968867e-05, "loss": 0.3912, "step": 26618 }, { "epoch": 4.34529202889678, "grad_norm": 1.8873167037963867, "learning_rate": 1.687341091344909e-05, "loss": 0.3525, "step": 26619 }, { "epoch": 4.345455287539284, "grad_norm": 2.035781145095825, "learning_rate": 1.6873180420287982e-05, "loss": 0.3746, "step": 26620 }, { "epoch": 4.345618546181789, "grad_norm": 1.8881440162658691, "learning_rate": 1.687294992020559e-05, "loss": 0.3607, "step": 26621 }, { "epoch": 4.345781804824293, "grad_norm": 1.7032470703125, "learning_rate": 1.687271941320213e-05, "loss": 0.3548, "step": 26622 }, { "epoch": 4.3459450634667975, "grad_norm": 1.9067844152450562, "learning_rate": 1.6872488899277847e-05, "loss": 0.3642, "step": 26623 }, { "epoch": 4.346108322109302, "grad_norm": 1.9860343933105469, "learning_rate": 1.6872258378432972e-05, "loss": 0.3195, "step": 26624 }, { "epoch": 4.346271580751806, "grad_norm": 1.5722707509994507, "learning_rate": 1.6872027850667736e-05, "loss": 0.3235, "step": 26625 }, { "epoch": 4.346434839394311, "grad_norm": 2.4840195178985596, "learning_rate": 1.6871797315982366e-05, "loss": 0.4538, "step": 26626 }, { "epoch": 4.346598098036814, "grad_norm": 2.1493618488311768, "learning_rate": 1.68715667743771e-05, "loss": 0.436, "step": 26627 }, { "epoch": 4.346761356679319, "grad_norm": 1.7394646406173706, "learning_rate": 1.6871336225852168e-05, "loss": 0.3231, "step": 26628 }, { "epoch": 4.346924615321823, "grad_norm": 2.1650285720825195, "learning_rate": 1.68711056704078e-05, "loss": 0.4183, "step": 26629 }, { "epoch": 4.347087873964328, "grad_norm": 2.3940865993499756, "learning_rate": 1.6870875108044233e-05, "loss": 0.4065, "step": 26630 }, { "epoch": 4.347251132606832, "grad_norm": 1.6306955814361572, "learning_rate": 1.6870644538761694e-05, "loss": 0.327, "step": 26631 }, { "epoch": 4.347414391249337, "grad_norm": 1.9219520092010498, "learning_rate": 1.6870413962560417e-05, "loss": 0.3595, "step": 26632 }, { "epoch": 4.347577649891841, "grad_norm": 2.403569221496582, "learning_rate": 1.687018337944064e-05, "loss": 0.3672, "step": 26633 }, { "epoch": 4.3477409085343455, "grad_norm": 1.8626081943511963, "learning_rate": 1.6869952789402585e-05, "loss": 0.3453, "step": 26634 }, { "epoch": 4.34790416717685, "grad_norm": 2.2776224613189697, "learning_rate": 1.6869722192446494e-05, "loss": 0.3862, "step": 26635 }, { "epoch": 4.348067425819354, "grad_norm": 1.8616619110107422, "learning_rate": 1.6869491588572594e-05, "loss": 0.3389, "step": 26636 }, { "epoch": 4.348230684461859, "grad_norm": 1.9285551309585571, "learning_rate": 1.6869260977781114e-05, "loss": 0.4084, "step": 26637 }, { "epoch": 4.348393943104363, "grad_norm": 2.281430244445801, "learning_rate": 1.6869030360072292e-05, "loss": 0.3808, "step": 26638 }, { "epoch": 4.348557201746868, "grad_norm": 2.2609331607818604, "learning_rate": 1.686879973544636e-05, "loss": 0.3288, "step": 26639 }, { "epoch": 4.348720460389372, "grad_norm": 2.1616640090942383, "learning_rate": 1.6868569103903544e-05, "loss": 0.3828, "step": 26640 }, { "epoch": 4.3488837190318765, "grad_norm": 1.9231065511703491, "learning_rate": 1.6868338465444086e-05, "loss": 0.3861, "step": 26641 }, { "epoch": 4.349046977674381, "grad_norm": 2.000274896621704, "learning_rate": 1.6868107820068207e-05, "loss": 0.337, "step": 26642 }, { "epoch": 4.349210236316885, "grad_norm": 2.452286958694458, "learning_rate": 1.686787716777615e-05, "loss": 0.428, "step": 26643 }, { "epoch": 4.349373494959389, "grad_norm": 2.550713300704956, "learning_rate": 1.686764650856814e-05, "loss": 0.4268, "step": 26644 }, { "epoch": 4.349536753601893, "grad_norm": 2.404933214187622, "learning_rate": 1.6867415842444415e-05, "loss": 0.3419, "step": 26645 }, { "epoch": 4.349700012244398, "grad_norm": 2.035372257232666, "learning_rate": 1.6867185169405203e-05, "loss": 0.3572, "step": 26646 }, { "epoch": 4.349863270886902, "grad_norm": 1.702368974685669, "learning_rate": 1.6866954489450735e-05, "loss": 0.345, "step": 26647 }, { "epoch": 4.350026529529407, "grad_norm": 2.3186917304992676, "learning_rate": 1.686672380258125e-05, "loss": 0.4425, "step": 26648 }, { "epoch": 4.350189788171911, "grad_norm": 1.7348244190216064, "learning_rate": 1.686649310879697e-05, "loss": 0.3293, "step": 26649 }, { "epoch": 4.350353046814416, "grad_norm": 2.4448235034942627, "learning_rate": 1.6866262408098134e-05, "loss": 0.4205, "step": 26650 }, { "epoch": 4.35051630545692, "grad_norm": 1.995890498161316, "learning_rate": 1.686603170048498e-05, "loss": 0.3788, "step": 26651 }, { "epoch": 4.3506795640994245, "grad_norm": 1.9128705263137817, "learning_rate": 1.686580098595773e-05, "loss": 0.2915, "step": 26652 }, { "epoch": 4.350842822741929, "grad_norm": 2.0959057807922363, "learning_rate": 1.686557026451662e-05, "loss": 0.4019, "step": 26653 }, { "epoch": 4.351006081384433, "grad_norm": 2.457808494567871, "learning_rate": 1.6865339536161884e-05, "loss": 0.3621, "step": 26654 }, { "epoch": 4.351169340026938, "grad_norm": 2.643082857131958, "learning_rate": 1.686510880089375e-05, "loss": 0.4453, "step": 26655 }, { "epoch": 4.351332598669442, "grad_norm": 1.8923282623291016, "learning_rate": 1.6864878058712456e-05, "loss": 0.3586, "step": 26656 }, { "epoch": 4.351495857311947, "grad_norm": 2.2671878337860107, "learning_rate": 1.6864647309618226e-05, "loss": 0.3965, "step": 26657 }, { "epoch": 4.351659115954451, "grad_norm": 2.26385498046875, "learning_rate": 1.6864416553611306e-05, "loss": 0.3727, "step": 26658 }, { "epoch": 4.3518223745969555, "grad_norm": 2.0225181579589844, "learning_rate": 1.6864185790691916e-05, "loss": 0.3679, "step": 26659 }, { "epoch": 4.35198563323946, "grad_norm": 1.9534839391708374, "learning_rate": 1.686395502086029e-05, "loss": 0.3647, "step": 26660 }, { "epoch": 4.352148891881964, "grad_norm": 2.474947452545166, "learning_rate": 1.686372424411667e-05, "loss": 0.3863, "step": 26661 }, { "epoch": 4.352312150524469, "grad_norm": 1.6658295392990112, "learning_rate": 1.686349346046128e-05, "loss": 0.3386, "step": 26662 }, { "epoch": 4.352475409166972, "grad_norm": 2.2936151027679443, "learning_rate": 1.686326266989435e-05, "loss": 0.3576, "step": 26663 }, { "epoch": 4.352638667809477, "grad_norm": 2.21982741355896, "learning_rate": 1.6863031872416115e-05, "loss": 0.4484, "step": 26664 }, { "epoch": 4.352801926451981, "grad_norm": 2.0109121799468994, "learning_rate": 1.6862801068026815e-05, "loss": 0.3466, "step": 26665 }, { "epoch": 4.352965185094486, "grad_norm": 1.954665184020996, "learning_rate": 1.6862570256726672e-05, "loss": 0.3357, "step": 26666 }, { "epoch": 4.35312844373699, "grad_norm": 2.0791544914245605, "learning_rate": 1.6862339438515926e-05, "loss": 0.4119, "step": 26667 }, { "epoch": 4.353291702379495, "grad_norm": 2.3248233795166016, "learning_rate": 1.6862108613394803e-05, "loss": 0.4486, "step": 26668 }, { "epoch": 4.353454961021999, "grad_norm": 2.246234178543091, "learning_rate": 1.6861877781363536e-05, "loss": 0.3895, "step": 26669 }, { "epoch": 4.3536182196645035, "grad_norm": 1.9596351385116577, "learning_rate": 1.6861646942422366e-05, "loss": 0.3657, "step": 26670 }, { "epoch": 4.353781478307008, "grad_norm": 2.091374158859253, "learning_rate": 1.6861416096571515e-05, "loss": 0.41, "step": 26671 }, { "epoch": 4.353944736949512, "grad_norm": 1.81184720993042, "learning_rate": 1.6861185243811223e-05, "loss": 0.3448, "step": 26672 }, { "epoch": 4.354107995592017, "grad_norm": 1.9922679662704468, "learning_rate": 1.6860954384141718e-05, "loss": 0.3639, "step": 26673 }, { "epoch": 4.354271254234521, "grad_norm": 2.3455119132995605, "learning_rate": 1.6860723517563232e-05, "loss": 0.3576, "step": 26674 }, { "epoch": 4.354434512877026, "grad_norm": 2.54292368888855, "learning_rate": 1.6860492644076e-05, "loss": 0.4034, "step": 26675 }, { "epoch": 4.35459777151953, "grad_norm": 2.0478878021240234, "learning_rate": 1.6860261763680255e-05, "loss": 0.3335, "step": 26676 }, { "epoch": 4.3547610301620345, "grad_norm": 2.0074877738952637, "learning_rate": 1.6860030876376225e-05, "loss": 0.3916, "step": 26677 }, { "epoch": 4.354924288804539, "grad_norm": 2.4656779766082764, "learning_rate": 1.685979998216415e-05, "loss": 0.3977, "step": 26678 }, { "epoch": 4.355087547447043, "grad_norm": 2.6102237701416016, "learning_rate": 1.6859569081044257e-05, "loss": 0.4744, "step": 26679 }, { "epoch": 4.355250806089547, "grad_norm": 2.14853835105896, "learning_rate": 1.685933817301678e-05, "loss": 0.4097, "step": 26680 }, { "epoch": 4.355414064732051, "grad_norm": 2.1638944149017334, "learning_rate": 1.685910725808195e-05, "loss": 0.3768, "step": 26681 }, { "epoch": 4.355577323374556, "grad_norm": 1.848750352859497, "learning_rate": 1.685887633624e-05, "loss": 0.3097, "step": 26682 }, { "epoch": 4.35574058201706, "grad_norm": 2.36957049369812, "learning_rate": 1.6858645407491164e-05, "loss": 0.4192, "step": 26683 }, { "epoch": 4.355903840659565, "grad_norm": 2.319990396499634, "learning_rate": 1.6858414471835674e-05, "loss": 0.4216, "step": 26684 }, { "epoch": 4.356067099302069, "grad_norm": 2.1862988471984863, "learning_rate": 1.6858183529273766e-05, "loss": 0.4171, "step": 26685 }, { "epoch": 4.356230357944574, "grad_norm": 1.9785716533660889, "learning_rate": 1.6857952579805666e-05, "loss": 0.4009, "step": 26686 }, { "epoch": 4.356393616587078, "grad_norm": 1.711787462234497, "learning_rate": 1.6857721623431608e-05, "loss": 0.3444, "step": 26687 }, { "epoch": 4.3565568752295825, "grad_norm": 1.881080150604248, "learning_rate": 1.6857490660151826e-05, "loss": 0.364, "step": 26688 }, { "epoch": 4.356720133872087, "grad_norm": 2.2007007598876953, "learning_rate": 1.6857259689966557e-05, "loss": 0.4455, "step": 26689 }, { "epoch": 4.356883392514591, "grad_norm": 2.244093418121338, "learning_rate": 1.6857028712876025e-05, "loss": 0.4545, "step": 26690 }, { "epoch": 4.357046651157096, "grad_norm": 1.9583076238632202, "learning_rate": 1.685679772888047e-05, "loss": 0.4545, "step": 26691 }, { "epoch": 4.3572099097996, "grad_norm": 2.611532211303711, "learning_rate": 1.685656673798012e-05, "loss": 0.4627, "step": 26692 }, { "epoch": 4.357373168442105, "grad_norm": 1.9253010749816895, "learning_rate": 1.685633574017521e-05, "loss": 0.3464, "step": 26693 }, { "epoch": 4.357536427084609, "grad_norm": 1.5949211120605469, "learning_rate": 1.685610473546597e-05, "loss": 0.3094, "step": 26694 }, { "epoch": 4.3576996857271135, "grad_norm": 2.231152296066284, "learning_rate": 1.6855873723852637e-05, "loss": 0.3867, "step": 26695 }, { "epoch": 4.357862944369618, "grad_norm": 1.779632329940796, "learning_rate": 1.6855642705335438e-05, "loss": 0.3533, "step": 26696 }, { "epoch": 4.3580262030121215, "grad_norm": 2.521700620651245, "learning_rate": 1.6855411679914612e-05, "loss": 0.3897, "step": 26697 }, { "epoch": 4.358189461654626, "grad_norm": 2.3192968368530273, "learning_rate": 1.6855180647590383e-05, "loss": 0.4251, "step": 26698 }, { "epoch": 4.35835272029713, "grad_norm": 2.1390793323516846, "learning_rate": 1.685494960836299e-05, "loss": 0.3565, "step": 26699 }, { "epoch": 4.358515978939635, "grad_norm": 1.7702566385269165, "learning_rate": 1.685471856223267e-05, "loss": 0.3666, "step": 26700 }, { "epoch": 4.358679237582139, "grad_norm": 2.0375242233276367, "learning_rate": 1.6854487509199647e-05, "loss": 0.4474, "step": 26701 }, { "epoch": 4.358842496224644, "grad_norm": 2.638962745666504, "learning_rate": 1.6854256449264156e-05, "loss": 0.3887, "step": 26702 }, { "epoch": 4.359005754867148, "grad_norm": 1.7534270286560059, "learning_rate": 1.6854025382426428e-05, "loss": 0.3281, "step": 26703 }, { "epoch": 4.359169013509653, "grad_norm": 2.3635478019714355, "learning_rate": 1.6853794308686705e-05, "loss": 0.4286, "step": 26704 }, { "epoch": 4.359332272152157, "grad_norm": 2.038969039916992, "learning_rate": 1.685356322804521e-05, "loss": 0.3552, "step": 26705 }, { "epoch": 4.3594955307946615, "grad_norm": 2.1696791648864746, "learning_rate": 1.6853332140502177e-05, "loss": 0.3787, "step": 26706 }, { "epoch": 4.359658789437166, "grad_norm": 1.7868194580078125, "learning_rate": 1.685310104605784e-05, "loss": 0.35, "step": 26707 }, { "epoch": 4.35982204807967, "grad_norm": 2.4193592071533203, "learning_rate": 1.6852869944712432e-05, "loss": 0.4225, "step": 26708 }, { "epoch": 4.359985306722175, "grad_norm": 2.0422699451446533, "learning_rate": 1.685263883646619e-05, "loss": 0.3472, "step": 26709 }, { "epoch": 4.360148565364679, "grad_norm": 2.0601894855499268, "learning_rate": 1.685240772131934e-05, "loss": 0.3583, "step": 26710 }, { "epoch": 4.360311824007184, "grad_norm": 2.338682174682617, "learning_rate": 1.685217659927212e-05, "loss": 0.446, "step": 26711 }, { "epoch": 4.360475082649688, "grad_norm": 4.045852184295654, "learning_rate": 1.6851945470324754e-05, "loss": 0.4938, "step": 26712 }, { "epoch": 4.3606383412921925, "grad_norm": 2.544301748275757, "learning_rate": 1.6851714334477483e-05, "loss": 0.3841, "step": 26713 }, { "epoch": 4.360801599934696, "grad_norm": 2.074113368988037, "learning_rate": 1.685148319173054e-05, "loss": 0.3846, "step": 26714 }, { "epoch": 4.360964858577201, "grad_norm": 1.7264878749847412, "learning_rate": 1.6851252042084155e-05, "loss": 0.345, "step": 26715 }, { "epoch": 4.361128117219705, "grad_norm": 2.2743780612945557, "learning_rate": 1.6851020885538557e-05, "loss": 0.4292, "step": 26716 }, { "epoch": 4.361291375862209, "grad_norm": 2.067253351211548, "learning_rate": 1.6850789722093986e-05, "loss": 0.4085, "step": 26717 }, { "epoch": 4.361454634504714, "grad_norm": 2.0205087661743164, "learning_rate": 1.685055855175067e-05, "loss": 0.3583, "step": 26718 }, { "epoch": 4.361617893147218, "grad_norm": 2.1732065677642822, "learning_rate": 1.6850327374508846e-05, "loss": 0.3548, "step": 26719 }, { "epoch": 4.361781151789723, "grad_norm": 1.7940495014190674, "learning_rate": 1.685009619036874e-05, "loss": 0.3634, "step": 26720 }, { "epoch": 4.361944410432227, "grad_norm": 1.9785552024841309, "learning_rate": 1.6849864999330594e-05, "loss": 0.3242, "step": 26721 }, { "epoch": 4.362107669074732, "grad_norm": 1.7673324346542358, "learning_rate": 1.684963380139463e-05, "loss": 0.3316, "step": 26722 }, { "epoch": 4.362270927717236, "grad_norm": 2.051776647567749, "learning_rate": 1.684940259656109e-05, "loss": 0.3679, "step": 26723 }, { "epoch": 4.3624341863597405, "grad_norm": 2.1910431385040283, "learning_rate": 1.6849171384830204e-05, "loss": 0.4125, "step": 26724 }, { "epoch": 4.362597445002245, "grad_norm": 1.5937601327896118, "learning_rate": 1.6848940166202203e-05, "loss": 0.3172, "step": 26725 }, { "epoch": 4.362760703644749, "grad_norm": 2.195390462875366, "learning_rate": 1.6848708940677324e-05, "loss": 0.3605, "step": 26726 }, { "epoch": 4.362923962287254, "grad_norm": 2.4469640254974365, "learning_rate": 1.684847770825579e-05, "loss": 0.4005, "step": 26727 }, { "epoch": 4.363087220929758, "grad_norm": 1.9518760442733765, "learning_rate": 1.6848246468937848e-05, "loss": 0.3693, "step": 26728 }, { "epoch": 4.363250479572263, "grad_norm": 2.5778727531433105, "learning_rate": 1.6848015222723722e-05, "loss": 0.4218, "step": 26729 }, { "epoch": 4.363413738214767, "grad_norm": 1.824320673942566, "learning_rate": 1.6847783969613646e-05, "loss": 0.3604, "step": 26730 }, { "epoch": 4.3635769968572715, "grad_norm": 2.3636960983276367, "learning_rate": 1.6847552709607852e-05, "loss": 0.4085, "step": 26731 }, { "epoch": 4.363740255499776, "grad_norm": 2.221017837524414, "learning_rate": 1.6847321442706572e-05, "loss": 0.3901, "step": 26732 }, { "epoch": 4.3639035141422795, "grad_norm": 1.7952126264572144, "learning_rate": 1.6847090168910045e-05, "loss": 0.371, "step": 26733 }, { "epoch": 4.364066772784784, "grad_norm": 2.0134477615356445, "learning_rate": 1.68468588882185e-05, "loss": 0.3983, "step": 26734 }, { "epoch": 4.364230031427288, "grad_norm": 1.8986217975616455, "learning_rate": 1.6846627600632167e-05, "loss": 0.3111, "step": 26735 }, { "epoch": 4.364393290069793, "grad_norm": 1.98251473903656, "learning_rate": 1.6846396306151284e-05, "loss": 0.4261, "step": 26736 }, { "epoch": 4.364556548712297, "grad_norm": 2.3506920337677, "learning_rate": 1.6846165004776082e-05, "loss": 0.4073, "step": 26737 }, { "epoch": 4.364719807354802, "grad_norm": 2.7269294261932373, "learning_rate": 1.6845933696506794e-05, "loss": 0.4963, "step": 26738 }, { "epoch": 4.364883065997306, "grad_norm": 1.9478493928909302, "learning_rate": 1.684570238134365e-05, "loss": 0.3599, "step": 26739 }, { "epoch": 4.365046324639811, "grad_norm": 2.0463104248046875, "learning_rate": 1.684547105928689e-05, "loss": 0.3811, "step": 26740 }, { "epoch": 4.365209583282315, "grad_norm": 2.3866732120513916, "learning_rate": 1.6845239730336736e-05, "loss": 0.3964, "step": 26741 }, { "epoch": 4.3653728419248194, "grad_norm": 2.090867519378662, "learning_rate": 1.6845008394493433e-05, "loss": 0.3438, "step": 26742 }, { "epoch": 4.365536100567324, "grad_norm": 1.8873568773269653, "learning_rate": 1.6844777051757205e-05, "loss": 0.344, "step": 26743 }, { "epoch": 4.365699359209828, "grad_norm": 2.005019187927246, "learning_rate": 1.684454570212829e-05, "loss": 0.362, "step": 26744 }, { "epoch": 4.365862617852333, "grad_norm": 1.8580492734909058, "learning_rate": 1.684431434560692e-05, "loss": 0.3373, "step": 26745 }, { "epoch": 4.366025876494837, "grad_norm": 2.261962652206421, "learning_rate": 1.6844082982193325e-05, "loss": 0.3887, "step": 26746 }, { "epoch": 4.366189135137342, "grad_norm": 1.9178106784820557, "learning_rate": 1.6843851611887744e-05, "loss": 0.3496, "step": 26747 }, { "epoch": 4.366352393779846, "grad_norm": 2.0407490730285645, "learning_rate": 1.6843620234690404e-05, "loss": 0.3443, "step": 26748 }, { "epoch": 4.3665156524223505, "grad_norm": 2.220860481262207, "learning_rate": 1.684338885060154e-05, "loss": 0.3726, "step": 26749 }, { "epoch": 4.366678911064854, "grad_norm": 2.274904727935791, "learning_rate": 1.6843157459621386e-05, "loss": 0.3869, "step": 26750 }, { "epoch": 4.3668421697073585, "grad_norm": 2.373894214630127, "learning_rate": 1.684292606175017e-05, "loss": 0.4119, "step": 26751 }, { "epoch": 4.367005428349863, "grad_norm": 1.8024123907089233, "learning_rate": 1.6842694656988133e-05, "loss": 0.3294, "step": 26752 }, { "epoch": 4.367168686992367, "grad_norm": 2.0025956630706787, "learning_rate": 1.6842463245335506e-05, "loss": 0.3512, "step": 26753 }, { "epoch": 4.367331945634872, "grad_norm": 1.8898720741271973, "learning_rate": 1.684223182679252e-05, "loss": 0.4078, "step": 26754 }, { "epoch": 4.367495204277376, "grad_norm": 2.578909158706665, "learning_rate": 1.6842000401359405e-05, "loss": 0.4628, "step": 26755 }, { "epoch": 4.367658462919881, "grad_norm": 2.398298501968384, "learning_rate": 1.68417689690364e-05, "loss": 0.47, "step": 26756 }, { "epoch": 4.367821721562385, "grad_norm": 1.8314018249511719, "learning_rate": 1.6841537529823732e-05, "loss": 0.3211, "step": 26757 }, { "epoch": 4.36798498020489, "grad_norm": 2.18064284324646, "learning_rate": 1.6841306083721642e-05, "loss": 0.3686, "step": 26758 }, { "epoch": 4.368148238847394, "grad_norm": 2.2756128311157227, "learning_rate": 1.6841074630730358e-05, "loss": 0.5015, "step": 26759 }, { "epoch": 4.368311497489898, "grad_norm": 2.2199723720550537, "learning_rate": 1.684084317085011e-05, "loss": 0.4145, "step": 26760 }, { "epoch": 4.368474756132403, "grad_norm": 1.8874778747558594, "learning_rate": 1.6840611704081138e-05, "loss": 0.35, "step": 26761 }, { "epoch": 4.368638014774907, "grad_norm": 2.2263307571411133, "learning_rate": 1.684038023042367e-05, "loss": 0.3714, "step": 26762 }, { "epoch": 4.368801273417412, "grad_norm": 1.9040312767028809, "learning_rate": 1.6840148749877942e-05, "loss": 0.349, "step": 26763 }, { "epoch": 4.368964532059916, "grad_norm": 2.04569673538208, "learning_rate": 1.6839917262444188e-05, "loss": 0.3551, "step": 26764 }, { "epoch": 4.369127790702421, "grad_norm": 2.0251924991607666, "learning_rate": 1.6839685768122636e-05, "loss": 0.3625, "step": 26765 }, { "epoch": 4.369291049344925, "grad_norm": 2.4208478927612305, "learning_rate": 1.6839454266913524e-05, "loss": 0.4285, "step": 26766 }, { "epoch": 4.369454307987429, "grad_norm": 2.278820753097534, "learning_rate": 1.683922275881708e-05, "loss": 0.4671, "step": 26767 }, { "epoch": 4.369617566629933, "grad_norm": 1.8846410512924194, "learning_rate": 1.6838991243833546e-05, "loss": 0.3226, "step": 26768 }, { "epoch": 4.3697808252724375, "grad_norm": 2.254272222518921, "learning_rate": 1.6838759721963145e-05, "loss": 0.4266, "step": 26769 }, { "epoch": 4.369944083914942, "grad_norm": 2.053081750869751, "learning_rate": 1.683852819320612e-05, "loss": 0.4117, "step": 26770 }, { "epoch": 4.370107342557446, "grad_norm": 2.7091832160949707, "learning_rate": 1.6838296657562692e-05, "loss": 0.4366, "step": 26771 }, { "epoch": 4.370270601199951, "grad_norm": 2.1415746212005615, "learning_rate": 1.6838065115033107e-05, "loss": 0.3649, "step": 26772 }, { "epoch": 4.370433859842455, "grad_norm": 2.058262825012207, "learning_rate": 1.683783356561759e-05, "loss": 0.3912, "step": 26773 }, { "epoch": 4.37059711848496, "grad_norm": 2.2072641849517822, "learning_rate": 1.6837602009316375e-05, "loss": 0.4087, "step": 26774 }, { "epoch": 4.370760377127464, "grad_norm": 1.9914124011993408, "learning_rate": 1.6837370446129696e-05, "loss": 0.3346, "step": 26775 }, { "epoch": 4.370923635769969, "grad_norm": 2.2312235832214355, "learning_rate": 1.6837138876057794e-05, "loss": 0.3761, "step": 26776 }, { "epoch": 4.371086894412473, "grad_norm": 2.6467859745025635, "learning_rate": 1.6836907299100887e-05, "loss": 0.4085, "step": 26777 }, { "epoch": 4.371250153054977, "grad_norm": 1.8923050165176392, "learning_rate": 1.6836675715259218e-05, "loss": 0.3531, "step": 26778 }, { "epoch": 4.371413411697482, "grad_norm": 2.173475980758667, "learning_rate": 1.683644412453302e-05, "loss": 0.3575, "step": 26779 }, { "epoch": 4.371576670339986, "grad_norm": 2.095766067504883, "learning_rate": 1.6836212526922523e-05, "loss": 0.4097, "step": 26780 }, { "epoch": 4.371739928982491, "grad_norm": 2.2668216228485107, "learning_rate": 1.683598092242796e-05, "loss": 0.4156, "step": 26781 }, { "epoch": 4.371903187624995, "grad_norm": 2.0515263080596924, "learning_rate": 1.683574931104957e-05, "loss": 0.3848, "step": 26782 }, { "epoch": 4.3720664462675, "grad_norm": 2.2194254398345947, "learning_rate": 1.683551769278758e-05, "loss": 0.3717, "step": 26783 }, { "epoch": 4.372229704910004, "grad_norm": 1.766097903251648, "learning_rate": 1.6835286067642228e-05, "loss": 0.3422, "step": 26784 }, { "epoch": 4.3723929635525085, "grad_norm": 1.6745754480361938, "learning_rate": 1.6835054435613742e-05, "loss": 0.296, "step": 26785 }, { "epoch": 4.372556222195012, "grad_norm": 2.832566261291504, "learning_rate": 1.6834822796702358e-05, "loss": 0.4328, "step": 26786 }, { "epoch": 4.3727194808375165, "grad_norm": 2.140615224838257, "learning_rate": 1.6834591150908313e-05, "loss": 0.3547, "step": 26787 }, { "epoch": 4.372882739480021, "grad_norm": 2.0482475757598877, "learning_rate": 1.6834359498231834e-05, "loss": 0.4079, "step": 26788 }, { "epoch": 4.373045998122525, "grad_norm": 2.1857147216796875, "learning_rate": 1.6834127838673155e-05, "loss": 0.4019, "step": 26789 }, { "epoch": 4.37320925676503, "grad_norm": 2.4845657348632812, "learning_rate": 1.6833896172232512e-05, "loss": 0.4245, "step": 26790 }, { "epoch": 4.373372515407534, "grad_norm": 1.8862521648406982, "learning_rate": 1.6833664498910137e-05, "loss": 0.3412, "step": 26791 }, { "epoch": 4.373535774050039, "grad_norm": 2.236302375793457, "learning_rate": 1.683343281870627e-05, "loss": 0.3963, "step": 26792 }, { "epoch": 4.373699032692543, "grad_norm": 2.0173323154449463, "learning_rate": 1.683320113162113e-05, "loss": 0.3534, "step": 26793 }, { "epoch": 4.373862291335048, "grad_norm": 2.2434024810791016, "learning_rate": 1.683296943765496e-05, "loss": 0.355, "step": 26794 }, { "epoch": 4.374025549977552, "grad_norm": 2.3464133739471436, "learning_rate": 1.6832737736807994e-05, "loss": 0.4241, "step": 26795 }, { "epoch": 4.374188808620056, "grad_norm": 2.707665205001831, "learning_rate": 1.683250602908046e-05, "loss": 0.403, "step": 26796 }, { "epoch": 4.374352067262561, "grad_norm": 2.3090806007385254, "learning_rate": 1.6832274314472596e-05, "loss": 0.4005, "step": 26797 }, { "epoch": 4.374515325905065, "grad_norm": 2.2319955825805664, "learning_rate": 1.683204259298463e-05, "loss": 0.361, "step": 26798 }, { "epoch": 4.37467858454757, "grad_norm": 1.83309006690979, "learning_rate": 1.6831810864616803e-05, "loss": 0.3414, "step": 26799 }, { "epoch": 4.374841843190074, "grad_norm": 1.7747128009796143, "learning_rate": 1.6831579129369347e-05, "loss": 0.3698, "step": 26800 }, { "epoch": 4.375005101832579, "grad_norm": 2.57399320602417, "learning_rate": 1.683134738724249e-05, "loss": 0.3828, "step": 26801 }, { "epoch": 4.375168360475083, "grad_norm": 2.4643027782440186, "learning_rate": 1.6831115638236463e-05, "loss": 0.427, "step": 26802 }, { "epoch": 4.375331619117587, "grad_norm": 2.0742616653442383, "learning_rate": 1.683088388235151e-05, "loss": 0.3473, "step": 26803 }, { "epoch": 4.375494877760091, "grad_norm": 2.0882110595703125, "learning_rate": 1.683065211958786e-05, "loss": 0.3807, "step": 26804 }, { "epoch": 4.3756581364025955, "grad_norm": 2.2909557819366455, "learning_rate": 1.683042034994574e-05, "loss": 0.417, "step": 26805 }, { "epoch": 4.3758213950451, "grad_norm": 2.5197925567626953, "learning_rate": 1.683018857342539e-05, "loss": 0.4724, "step": 26806 }, { "epoch": 4.375984653687604, "grad_norm": 2.0914227962493896, "learning_rate": 1.6829956790027043e-05, "loss": 0.3748, "step": 26807 }, { "epoch": 4.376147912330109, "grad_norm": 2.3267459869384766, "learning_rate": 1.6829724999750933e-05, "loss": 0.45, "step": 26808 }, { "epoch": 4.376311170972613, "grad_norm": 1.6266335248947144, "learning_rate": 1.682949320259729e-05, "loss": 0.3061, "step": 26809 }, { "epoch": 4.376474429615118, "grad_norm": 2.3170597553253174, "learning_rate": 1.6829261398566347e-05, "loss": 0.3725, "step": 26810 }, { "epoch": 4.376637688257622, "grad_norm": 2.1250486373901367, "learning_rate": 1.6829029587658344e-05, "loss": 0.3864, "step": 26811 }, { "epoch": 4.3768009469001266, "grad_norm": 2.027008056640625, "learning_rate": 1.6828797769873503e-05, "loss": 0.3654, "step": 26812 }, { "epoch": 4.376964205542631, "grad_norm": 2.20621919631958, "learning_rate": 1.6828565945212073e-05, "loss": 0.3624, "step": 26813 }, { "epoch": 4.377127464185135, "grad_norm": 1.6699285507202148, "learning_rate": 1.6828334113674274e-05, "loss": 0.3107, "step": 26814 }, { "epoch": 4.37729072282764, "grad_norm": 2.0453460216522217, "learning_rate": 1.6828102275260345e-05, "loss": 0.3488, "step": 26815 }, { "epoch": 4.377453981470144, "grad_norm": 2.3227427005767822, "learning_rate": 1.682787042997052e-05, "loss": 0.4332, "step": 26816 }, { "epoch": 4.377617240112649, "grad_norm": 2.594578504562378, "learning_rate": 1.6827638577805028e-05, "loss": 0.3981, "step": 26817 }, { "epoch": 4.377780498755153, "grad_norm": 2.453611373901367, "learning_rate": 1.682740671876411e-05, "loss": 0.4451, "step": 26818 }, { "epoch": 4.377943757397658, "grad_norm": 1.799761414527893, "learning_rate": 1.6827174852847994e-05, "loss": 0.3368, "step": 26819 }, { "epoch": 4.378107016040161, "grad_norm": 2.254147529602051, "learning_rate": 1.682694298005691e-05, "loss": 0.4095, "step": 26820 }, { "epoch": 4.378270274682666, "grad_norm": 2.204714775085449, "learning_rate": 1.6826711100391103e-05, "loss": 0.3973, "step": 26821 }, { "epoch": 4.37843353332517, "grad_norm": 1.784060001373291, "learning_rate": 1.6826479213850796e-05, "loss": 0.3359, "step": 26822 }, { "epoch": 4.3785967919676745, "grad_norm": 1.860614538192749, "learning_rate": 1.682624732043623e-05, "loss": 0.3452, "step": 26823 }, { "epoch": 4.378760050610179, "grad_norm": 2.0375115871429443, "learning_rate": 1.682601542014763e-05, "loss": 0.4283, "step": 26824 }, { "epoch": 4.378923309252683, "grad_norm": 1.8354467153549194, "learning_rate": 1.6825783512985238e-05, "loss": 0.3064, "step": 26825 }, { "epoch": 4.379086567895188, "grad_norm": 2.0486621856689453, "learning_rate": 1.6825551598949282e-05, "loss": 0.3797, "step": 26826 }, { "epoch": 4.379249826537692, "grad_norm": 2.0812950134277344, "learning_rate": 1.6825319678039996e-05, "loss": 0.3832, "step": 26827 }, { "epoch": 4.379413085180197, "grad_norm": 1.8280057907104492, "learning_rate": 1.6825087750257617e-05, "loss": 0.3465, "step": 26828 }, { "epoch": 4.379576343822701, "grad_norm": 2.137444496154785, "learning_rate": 1.6824855815602376e-05, "loss": 0.4454, "step": 26829 }, { "epoch": 4.3797396024652056, "grad_norm": 2.369993209838867, "learning_rate": 1.682462387407451e-05, "loss": 0.3977, "step": 26830 }, { "epoch": 4.37990286110771, "grad_norm": 1.8695344924926758, "learning_rate": 1.6824391925674244e-05, "loss": 0.3734, "step": 26831 }, { "epoch": 4.380066119750214, "grad_norm": 1.8387051820755005, "learning_rate": 1.6824159970401817e-05, "loss": 0.3698, "step": 26832 }, { "epoch": 4.380229378392719, "grad_norm": 2.2338414192199707, "learning_rate": 1.682392800825747e-05, "loss": 0.4072, "step": 26833 }, { "epoch": 4.380392637035223, "grad_norm": 2.0136003494262695, "learning_rate": 1.6823696039241422e-05, "loss": 0.319, "step": 26834 }, { "epoch": 4.380555895677728, "grad_norm": 2.347994565963745, "learning_rate": 1.6823464063353915e-05, "loss": 0.433, "step": 26835 }, { "epoch": 4.380719154320232, "grad_norm": 2.0933549404144287, "learning_rate": 1.6823232080595184e-05, "loss": 0.4392, "step": 26836 }, { "epoch": 4.380882412962737, "grad_norm": 1.9340249300003052, "learning_rate": 1.682300009096546e-05, "loss": 0.3141, "step": 26837 }, { "epoch": 4.381045671605241, "grad_norm": 1.730040192604065, "learning_rate": 1.6822768094464973e-05, "loss": 0.3304, "step": 26838 }, { "epoch": 4.381208930247745, "grad_norm": 1.7984122037887573, "learning_rate": 1.6822536091093967e-05, "loss": 0.3753, "step": 26839 }, { "epoch": 4.381372188890249, "grad_norm": 2.624145269393921, "learning_rate": 1.6822304080852663e-05, "loss": 0.4217, "step": 26840 }, { "epoch": 4.3815354475327535, "grad_norm": 1.858957290649414, "learning_rate": 1.6822072063741302e-05, "loss": 0.4016, "step": 26841 }, { "epoch": 4.381698706175258, "grad_norm": 2.1424829959869385, "learning_rate": 1.682184003976012e-05, "loss": 0.3983, "step": 26842 }, { "epoch": 4.381861964817762, "grad_norm": 2.120455741882324, "learning_rate": 1.6821608008909343e-05, "loss": 0.3881, "step": 26843 }, { "epoch": 4.382025223460267, "grad_norm": 2.24894642829895, "learning_rate": 1.682137597118921e-05, "loss": 0.3457, "step": 26844 }, { "epoch": 4.382188482102771, "grad_norm": 1.9519860744476318, "learning_rate": 1.682114392659995e-05, "loss": 0.3461, "step": 26845 }, { "epoch": 4.382351740745276, "grad_norm": 2.1460039615631104, "learning_rate": 1.6820911875141807e-05, "loss": 0.404, "step": 26846 }, { "epoch": 4.38251499938778, "grad_norm": 2.430476188659668, "learning_rate": 1.6820679816815002e-05, "loss": 0.3805, "step": 26847 }, { "epoch": 4.3826782580302845, "grad_norm": 2.275063991546631, "learning_rate": 1.6820447751619772e-05, "loss": 0.4487, "step": 26848 }, { "epoch": 4.382841516672789, "grad_norm": 2.2047245502471924, "learning_rate": 1.6820215679556356e-05, "loss": 0.4169, "step": 26849 }, { "epoch": 4.383004775315293, "grad_norm": 2.237217664718628, "learning_rate": 1.6819983600624986e-05, "loss": 0.4513, "step": 26850 }, { "epoch": 4.383168033957798, "grad_norm": 2.3487606048583984, "learning_rate": 1.6819751514825894e-05, "loss": 0.3728, "step": 26851 }, { "epoch": 4.383331292600302, "grad_norm": 2.143068790435791, "learning_rate": 1.6819519422159313e-05, "loss": 0.4266, "step": 26852 }, { "epoch": 4.383494551242807, "grad_norm": 2.0498061180114746, "learning_rate": 1.6819287322625476e-05, "loss": 0.3715, "step": 26853 }, { "epoch": 4.383657809885311, "grad_norm": 1.8700339794158936, "learning_rate": 1.681905521622462e-05, "loss": 0.3757, "step": 26854 }, { "epoch": 4.383821068527816, "grad_norm": 2.4466724395751953, "learning_rate": 1.6818823102956982e-05, "loss": 0.4373, "step": 26855 }, { "epoch": 4.383984327170319, "grad_norm": 2.329760789871216, "learning_rate": 1.6818590982822786e-05, "loss": 0.4213, "step": 26856 }, { "epoch": 4.384147585812824, "grad_norm": 2.0103418827056885, "learning_rate": 1.681835885582227e-05, "loss": 0.3951, "step": 26857 }, { "epoch": 4.384310844455328, "grad_norm": 2.1091673374176025, "learning_rate": 1.6818126721955666e-05, "loss": 0.4245, "step": 26858 }, { "epoch": 4.3844741030978325, "grad_norm": 2.1801772117614746, "learning_rate": 1.6817894581223218e-05, "loss": 0.4426, "step": 26859 }, { "epoch": 4.384637361740337, "grad_norm": 1.9760897159576416, "learning_rate": 1.6817662433625147e-05, "loss": 0.3629, "step": 26860 }, { "epoch": 4.384800620382841, "grad_norm": 1.8255325555801392, "learning_rate": 1.681743027916169e-05, "loss": 0.3999, "step": 26861 }, { "epoch": 4.384963879025346, "grad_norm": 2.0543861389160156, "learning_rate": 1.6817198117833088e-05, "loss": 0.3963, "step": 26862 }, { "epoch": 4.38512713766785, "grad_norm": 2.061805248260498, "learning_rate": 1.6816965949639567e-05, "loss": 0.407, "step": 26863 }, { "epoch": 4.385290396310355, "grad_norm": 2.0238914489746094, "learning_rate": 1.6816733774581362e-05, "loss": 0.3763, "step": 26864 }, { "epoch": 4.385453654952859, "grad_norm": 2.199742078781128, "learning_rate": 1.6816501592658708e-05, "loss": 0.3827, "step": 26865 }, { "epoch": 4.3856169135953635, "grad_norm": 2.042869806289673, "learning_rate": 1.681626940387184e-05, "loss": 0.3639, "step": 26866 }, { "epoch": 4.385780172237868, "grad_norm": 2.4809720516204834, "learning_rate": 1.681603720822099e-05, "loss": 0.4437, "step": 26867 }, { "epoch": 4.385943430880372, "grad_norm": 1.9102654457092285, "learning_rate": 1.6815805005706393e-05, "loss": 0.3852, "step": 26868 }, { "epoch": 4.386106689522877, "grad_norm": 2.0637614727020264, "learning_rate": 1.681557279632828e-05, "loss": 0.347, "step": 26869 }, { "epoch": 4.386269948165381, "grad_norm": 2.1744604110717773, "learning_rate": 1.6815340580086886e-05, "loss": 0.3649, "step": 26870 }, { "epoch": 4.386433206807886, "grad_norm": 2.0218286514282227, "learning_rate": 1.681510835698245e-05, "loss": 0.3591, "step": 26871 }, { "epoch": 4.38659646545039, "grad_norm": 1.8885564804077148, "learning_rate": 1.68148761270152e-05, "loss": 0.3955, "step": 26872 }, { "epoch": 4.386759724092894, "grad_norm": 2.1057019233703613, "learning_rate": 1.681464389018537e-05, "loss": 0.3383, "step": 26873 }, { "epoch": 4.386922982735398, "grad_norm": 2.34384822845459, "learning_rate": 1.6814411646493197e-05, "loss": 0.3605, "step": 26874 }, { "epoch": 4.387086241377903, "grad_norm": 2.252300977706909, "learning_rate": 1.6814179395938915e-05, "loss": 0.4185, "step": 26875 }, { "epoch": 4.387249500020407, "grad_norm": 1.929128885269165, "learning_rate": 1.6813947138522752e-05, "loss": 0.3226, "step": 26876 }, { "epoch": 4.3874127586629115, "grad_norm": 2.489478826522827, "learning_rate": 1.6813714874244948e-05, "loss": 0.3873, "step": 26877 }, { "epoch": 4.387576017305416, "grad_norm": 2.1163134574890137, "learning_rate": 1.6813482603105736e-05, "loss": 0.3777, "step": 26878 }, { "epoch": 4.38773927594792, "grad_norm": 2.93496036529541, "learning_rate": 1.6813250325105347e-05, "loss": 0.4493, "step": 26879 }, { "epoch": 4.387902534590425, "grad_norm": 2.115158796310425, "learning_rate": 1.681301804024402e-05, "loss": 0.3827, "step": 26880 }, { "epoch": 4.388065793232929, "grad_norm": 2.7399144172668457, "learning_rate": 1.681278574852198e-05, "loss": 0.4339, "step": 26881 }, { "epoch": 4.388229051875434, "grad_norm": 1.9271215200424194, "learning_rate": 1.681255344993947e-05, "loss": 0.3332, "step": 26882 }, { "epoch": 4.388392310517938, "grad_norm": 2.2708024978637695, "learning_rate": 1.6812321144496722e-05, "loss": 0.3746, "step": 26883 }, { "epoch": 4.3885555691604425, "grad_norm": 1.9163895845413208, "learning_rate": 1.6812088832193965e-05, "loss": 0.3616, "step": 26884 }, { "epoch": 4.388718827802947, "grad_norm": 1.859850287437439, "learning_rate": 1.6811856513031438e-05, "loss": 0.3486, "step": 26885 }, { "epoch": 4.388882086445451, "grad_norm": 1.725441813468933, "learning_rate": 1.6811624187009373e-05, "loss": 0.3161, "step": 26886 }, { "epoch": 4.389045345087956, "grad_norm": 1.666322112083435, "learning_rate": 1.6811391854128005e-05, "loss": 0.3186, "step": 26887 }, { "epoch": 4.38920860373046, "grad_norm": 1.8972324132919312, "learning_rate": 1.6811159514387568e-05, "loss": 0.3244, "step": 26888 }, { "epoch": 4.389371862372965, "grad_norm": 1.7415987253189087, "learning_rate": 1.6810927167788295e-05, "loss": 0.3419, "step": 26889 }, { "epoch": 4.389535121015469, "grad_norm": 2.5149295330047607, "learning_rate": 1.6810694814330416e-05, "loss": 0.4027, "step": 26890 }, { "epoch": 4.389698379657974, "grad_norm": 2.0634500980377197, "learning_rate": 1.6810462454014173e-05, "loss": 0.3594, "step": 26891 }, { "epoch": 4.389861638300477, "grad_norm": 2.2092602252960205, "learning_rate": 1.6810230086839796e-05, "loss": 0.3397, "step": 26892 }, { "epoch": 4.390024896942982, "grad_norm": 2.7586708068847656, "learning_rate": 1.6809997712807517e-05, "loss": 0.4457, "step": 26893 }, { "epoch": 4.390188155585486, "grad_norm": 1.909130334854126, "learning_rate": 1.6809765331917576e-05, "loss": 0.3399, "step": 26894 }, { "epoch": 4.3903514142279905, "grad_norm": 1.94956374168396, "learning_rate": 1.68095329441702e-05, "loss": 0.352, "step": 26895 }, { "epoch": 4.390514672870495, "grad_norm": 2.244675397872925, "learning_rate": 1.6809300549565627e-05, "loss": 0.4213, "step": 26896 }, { "epoch": 4.390677931512999, "grad_norm": 2.378518581390381, "learning_rate": 1.6809068148104087e-05, "loss": 0.4108, "step": 26897 }, { "epoch": 4.390841190155504, "grad_norm": 2.6443960666656494, "learning_rate": 1.6808835739785823e-05, "loss": 0.4627, "step": 26898 }, { "epoch": 4.391004448798008, "grad_norm": 1.8526477813720703, "learning_rate": 1.680860332461106e-05, "loss": 0.3811, "step": 26899 }, { "epoch": 4.391167707440513, "grad_norm": 2.0267412662506104, "learning_rate": 1.6808370902580034e-05, "loss": 0.4047, "step": 26900 }, { "epoch": 4.391330966083017, "grad_norm": 1.888904094696045, "learning_rate": 1.6808138473692984e-05, "loss": 0.3459, "step": 26901 }, { "epoch": 4.3914942247255215, "grad_norm": 2.270123243331909, "learning_rate": 1.6807906037950137e-05, "loss": 0.3584, "step": 26902 }, { "epoch": 4.391657483368026, "grad_norm": 2.2662806510925293, "learning_rate": 1.680767359535173e-05, "loss": 0.3853, "step": 26903 }, { "epoch": 4.39182074201053, "grad_norm": 2.2526373863220215, "learning_rate": 1.6807441145898003e-05, "loss": 0.407, "step": 26904 }, { "epoch": 4.391984000653035, "grad_norm": 2.214369297027588, "learning_rate": 1.680720868958918e-05, "loss": 0.3599, "step": 26905 }, { "epoch": 4.392147259295539, "grad_norm": 2.4003918170928955, "learning_rate": 1.68069762264255e-05, "loss": 0.4224, "step": 26906 }, { "epoch": 4.392310517938044, "grad_norm": 2.0401151180267334, "learning_rate": 1.68067437564072e-05, "loss": 0.3735, "step": 26907 }, { "epoch": 4.392473776580548, "grad_norm": 2.273948907852173, "learning_rate": 1.6806511279534506e-05, "loss": 0.4588, "step": 26908 }, { "epoch": 4.392637035223052, "grad_norm": 2.0063323974609375, "learning_rate": 1.680627879580766e-05, "loss": 0.4099, "step": 26909 }, { "epoch": 4.392800293865556, "grad_norm": 2.1819541454315186, "learning_rate": 1.680604630522689e-05, "loss": 0.3971, "step": 26910 }, { "epoch": 4.392963552508061, "grad_norm": 2.711350202560425, "learning_rate": 1.6805813807792435e-05, "loss": 0.4163, "step": 26911 }, { "epoch": 4.393126811150565, "grad_norm": 2.2024121284484863, "learning_rate": 1.6805581303504528e-05, "loss": 0.3746, "step": 26912 }, { "epoch": 4.3932900697930695, "grad_norm": 1.7461950778961182, "learning_rate": 1.6805348792363402e-05, "loss": 0.3494, "step": 26913 }, { "epoch": 4.393453328435574, "grad_norm": 1.9342328310012817, "learning_rate": 1.6805116274369293e-05, "loss": 0.3467, "step": 26914 }, { "epoch": 4.393616587078078, "grad_norm": 2.414379835128784, "learning_rate": 1.6804883749522432e-05, "loss": 0.372, "step": 26915 }, { "epoch": 4.393779845720583, "grad_norm": 2.11293625831604, "learning_rate": 1.6804651217823055e-05, "loss": 0.431, "step": 26916 }, { "epoch": 4.393943104363087, "grad_norm": 2.3590571880340576, "learning_rate": 1.6804418679271395e-05, "loss": 0.3661, "step": 26917 }, { "epoch": 4.394106363005592, "grad_norm": 1.7471370697021484, "learning_rate": 1.680418613386769e-05, "loss": 0.3283, "step": 26918 }, { "epoch": 4.394269621648096, "grad_norm": 1.9268885850906372, "learning_rate": 1.6803953581612166e-05, "loss": 0.3978, "step": 26919 }, { "epoch": 4.3944328802906005, "grad_norm": 1.603460431098938, "learning_rate": 1.680372102250507e-05, "loss": 0.3762, "step": 26920 }, { "epoch": 4.394596138933105, "grad_norm": 2.3647897243499756, "learning_rate": 1.6803488456546625e-05, "loss": 0.4104, "step": 26921 }, { "epoch": 4.394759397575609, "grad_norm": 2.1474533081054688, "learning_rate": 1.6803255883737066e-05, "loss": 0.4111, "step": 26922 }, { "epoch": 4.394922656218114, "grad_norm": 2.5961966514587402, "learning_rate": 1.6803023304076637e-05, "loss": 0.3583, "step": 26923 }, { "epoch": 4.395085914860618, "grad_norm": 2.3356502056121826, "learning_rate": 1.680279071756556e-05, "loss": 0.4263, "step": 26924 }, { "epoch": 4.395249173503123, "grad_norm": 2.3263332843780518, "learning_rate": 1.6802558124204076e-05, "loss": 0.3704, "step": 26925 }, { "epoch": 4.395412432145626, "grad_norm": 2.0424766540527344, "learning_rate": 1.6802325523992418e-05, "loss": 0.3999, "step": 26926 }, { "epoch": 4.395575690788131, "grad_norm": 1.9405410289764404, "learning_rate": 1.680209291693082e-05, "loss": 0.3585, "step": 26927 }, { "epoch": 4.395738949430635, "grad_norm": 2.294705629348755, "learning_rate": 1.680186030301951e-05, "loss": 0.3829, "step": 26928 }, { "epoch": 4.39590220807314, "grad_norm": 2.112518072128296, "learning_rate": 1.680162768225874e-05, "loss": 0.369, "step": 26929 }, { "epoch": 4.396065466715644, "grad_norm": 2.8739144802093506, "learning_rate": 1.6801395054648724e-05, "loss": 0.4348, "step": 26930 }, { "epoch": 4.3962287253581485, "grad_norm": 1.9648568630218506, "learning_rate": 1.6801162420189708e-05, "loss": 0.3739, "step": 26931 }, { "epoch": 4.396391984000653, "grad_norm": 2.3120269775390625, "learning_rate": 1.6800929778881926e-05, "loss": 0.3948, "step": 26932 }, { "epoch": 4.396555242643157, "grad_norm": 2.2330002784729004, "learning_rate": 1.680069713072561e-05, "loss": 0.395, "step": 26933 }, { "epoch": 4.396718501285662, "grad_norm": 2.013935089111328, "learning_rate": 1.6800464475720985e-05, "loss": 0.3715, "step": 26934 }, { "epoch": 4.396881759928166, "grad_norm": 2.4314558506011963, "learning_rate": 1.68002318138683e-05, "loss": 0.4187, "step": 26935 }, { "epoch": 4.397045018570671, "grad_norm": 1.9683698415756226, "learning_rate": 1.6799999145167785e-05, "loss": 0.3293, "step": 26936 }, { "epoch": 4.397208277213175, "grad_norm": 2.0053720474243164, "learning_rate": 1.6799766469619672e-05, "loss": 0.3469, "step": 26937 }, { "epoch": 4.3973715358556795, "grad_norm": 2.4357993602752686, "learning_rate": 1.6799533787224192e-05, "loss": 0.4848, "step": 26938 }, { "epoch": 4.397534794498184, "grad_norm": 2.3710877895355225, "learning_rate": 1.679930109798159e-05, "loss": 0.3799, "step": 26939 }, { "epoch": 4.397698053140688, "grad_norm": 2.10978627204895, "learning_rate": 1.6799068401892087e-05, "loss": 0.3991, "step": 26940 }, { "epoch": 4.397861311783193, "grad_norm": 2.1245157718658447, "learning_rate": 1.6798835698955926e-05, "loss": 0.4522, "step": 26941 }, { "epoch": 4.398024570425697, "grad_norm": 2.5974810123443604, "learning_rate": 1.6798602989173344e-05, "loss": 0.4256, "step": 26942 }, { "epoch": 4.398187829068201, "grad_norm": 1.8850722312927246, "learning_rate": 1.6798370272544563e-05, "loss": 0.3434, "step": 26943 }, { "epoch": 4.398351087710706, "grad_norm": 2.11283540725708, "learning_rate": 1.6798137549069832e-05, "loss": 0.4427, "step": 26944 }, { "epoch": 4.39851434635321, "grad_norm": 1.9084537029266357, "learning_rate": 1.679790481874937e-05, "loss": 0.3485, "step": 26945 }, { "epoch": 4.398677604995714, "grad_norm": 2.162476062774658, "learning_rate": 1.679767208158343e-05, "loss": 0.4304, "step": 26946 }, { "epoch": 4.398840863638219, "grad_norm": 1.9623472690582275, "learning_rate": 1.6797439337572224e-05, "loss": 0.3552, "step": 26947 }, { "epoch": 4.399004122280723, "grad_norm": 1.701801061630249, "learning_rate": 1.6797206586716008e-05, "loss": 0.3262, "step": 26948 }, { "epoch": 4.3991673809232275, "grad_norm": 1.7959500551223755, "learning_rate": 1.6796973829015003e-05, "loss": 0.3919, "step": 26949 }, { "epoch": 4.399330639565732, "grad_norm": 2.373209238052368, "learning_rate": 1.6796741064469446e-05, "loss": 0.4604, "step": 26950 }, { "epoch": 4.399493898208236, "grad_norm": 1.9129747152328491, "learning_rate": 1.6796508293079576e-05, "loss": 0.404, "step": 26951 }, { "epoch": 4.399657156850741, "grad_norm": 1.8120793104171753, "learning_rate": 1.679627551484562e-05, "loss": 0.3434, "step": 26952 }, { "epoch": 4.399820415493245, "grad_norm": 1.886122465133667, "learning_rate": 1.679604272976782e-05, "loss": 0.4267, "step": 26953 }, { "epoch": 4.39998367413575, "grad_norm": 2.134873390197754, "learning_rate": 1.6795809937846406e-05, "loss": 0.3615, "step": 26954 }, { "epoch": 4.400146932778254, "grad_norm": 2.030320167541504, "learning_rate": 1.679557713908161e-05, "loss": 0.3946, "step": 26955 }, { "epoch": 4.4003101914207585, "grad_norm": 2.0046327114105225, "learning_rate": 1.679534433347367e-05, "loss": 0.3308, "step": 26956 }, { "epoch": 4.400473450063263, "grad_norm": 1.992470383644104, "learning_rate": 1.6795111521022825e-05, "loss": 0.3612, "step": 26957 }, { "epoch": 4.400636708705767, "grad_norm": 2.1229958534240723, "learning_rate": 1.67948787017293e-05, "loss": 0.372, "step": 26958 }, { "epoch": 4.400799967348272, "grad_norm": 2.286447286605835, "learning_rate": 1.6794645875593336e-05, "loss": 0.4408, "step": 26959 }, { "epoch": 4.400963225990776, "grad_norm": 1.8817791938781738, "learning_rate": 1.6794413042615168e-05, "loss": 0.3497, "step": 26960 }, { "epoch": 4.401126484633281, "grad_norm": 2.1748287677764893, "learning_rate": 1.6794180202795023e-05, "loss": 0.3642, "step": 26961 }, { "epoch": 4.401289743275784, "grad_norm": 2.1037774085998535, "learning_rate": 1.6793947356133143e-05, "loss": 0.3656, "step": 26962 }, { "epoch": 4.401453001918289, "grad_norm": 1.9194316864013672, "learning_rate": 1.679371450262976e-05, "loss": 0.3497, "step": 26963 }, { "epoch": 4.401616260560793, "grad_norm": 1.880861759185791, "learning_rate": 1.6793481642285107e-05, "loss": 0.3569, "step": 26964 }, { "epoch": 4.401779519203298, "grad_norm": 1.6508164405822754, "learning_rate": 1.679324877509942e-05, "loss": 0.3963, "step": 26965 }, { "epoch": 4.401942777845802, "grad_norm": 2.0907211303710938, "learning_rate": 1.6793015901072932e-05, "loss": 0.4072, "step": 26966 }, { "epoch": 4.4021060364883065, "grad_norm": 2.1231327056884766, "learning_rate": 1.679278302020588e-05, "loss": 0.3798, "step": 26967 }, { "epoch": 4.402269295130811, "grad_norm": 2.025085210800171, "learning_rate": 1.6792550132498497e-05, "loss": 0.384, "step": 26968 }, { "epoch": 4.402432553773315, "grad_norm": 1.4911047220230103, "learning_rate": 1.6792317237951018e-05, "loss": 0.3238, "step": 26969 }, { "epoch": 4.40259581241582, "grad_norm": 2.094515323638916, "learning_rate": 1.6792084336563677e-05, "loss": 0.3854, "step": 26970 }, { "epoch": 4.402759071058324, "grad_norm": 2.0933990478515625, "learning_rate": 1.679185142833671e-05, "loss": 0.4133, "step": 26971 }, { "epoch": 4.402922329700829, "grad_norm": 1.992457628250122, "learning_rate": 1.6791618513270354e-05, "loss": 0.3728, "step": 26972 }, { "epoch": 4.403085588343333, "grad_norm": 2.3645055294036865, "learning_rate": 1.6791385591364836e-05, "loss": 0.4273, "step": 26973 }, { "epoch": 4.4032488469858375, "grad_norm": 2.1405489444732666, "learning_rate": 1.679115266262039e-05, "loss": 0.391, "step": 26974 }, { "epoch": 4.403412105628342, "grad_norm": 2.149768352508545, "learning_rate": 1.6790919727037263e-05, "loss": 0.4214, "step": 26975 }, { "epoch": 4.403575364270846, "grad_norm": 2.1911401748657227, "learning_rate": 1.679068678461568e-05, "loss": 0.4035, "step": 26976 }, { "epoch": 4.403738622913351, "grad_norm": 2.210597038269043, "learning_rate": 1.6790453835355872e-05, "loss": 0.3639, "step": 26977 }, { "epoch": 4.403901881555855, "grad_norm": 1.904137134552002, "learning_rate": 1.6790220879258084e-05, "loss": 0.3565, "step": 26978 }, { "epoch": 4.404065140198359, "grad_norm": 2.0562448501586914, "learning_rate": 1.678998791632254e-05, "loss": 0.3903, "step": 26979 }, { "epoch": 4.404228398840863, "grad_norm": 1.843213438987732, "learning_rate": 1.6789754946549487e-05, "loss": 0.3518, "step": 26980 }, { "epoch": 4.404391657483368, "grad_norm": 2.4855103492736816, "learning_rate": 1.678952196993915e-05, "loss": 0.4006, "step": 26981 }, { "epoch": 4.404554916125872, "grad_norm": 2.188768148422241, "learning_rate": 1.6789288986491764e-05, "loss": 0.4172, "step": 26982 }, { "epoch": 4.404718174768377, "grad_norm": 2.0664432048797607, "learning_rate": 1.6789055996207567e-05, "loss": 0.4344, "step": 26983 }, { "epoch": 4.404881433410881, "grad_norm": 1.8524386882781982, "learning_rate": 1.678882299908679e-05, "loss": 0.3775, "step": 26984 }, { "epoch": 4.4050446920533854, "grad_norm": 1.880824089050293, "learning_rate": 1.6788589995129672e-05, "loss": 0.3748, "step": 26985 }, { "epoch": 4.40520795069589, "grad_norm": 2.3465652465820312, "learning_rate": 1.6788356984336446e-05, "loss": 0.3588, "step": 26986 }, { "epoch": 4.405371209338394, "grad_norm": 1.8133177757263184, "learning_rate": 1.6788123966707348e-05, "loss": 0.4007, "step": 26987 }, { "epoch": 4.405534467980899, "grad_norm": 1.7170164585113525, "learning_rate": 1.678789094224261e-05, "loss": 0.3433, "step": 26988 }, { "epoch": 4.405697726623403, "grad_norm": 2.438365936279297, "learning_rate": 1.6787657910942465e-05, "loss": 0.4036, "step": 26989 }, { "epoch": 4.405860985265908, "grad_norm": 2.179935932159424, "learning_rate": 1.678742487280715e-05, "loss": 0.3529, "step": 26990 }, { "epoch": 4.406024243908412, "grad_norm": 2.146817207336426, "learning_rate": 1.6787191827836907e-05, "loss": 0.3918, "step": 26991 }, { "epoch": 4.4061875025509165, "grad_norm": 1.890014410018921, "learning_rate": 1.6786958776031958e-05, "loss": 0.417, "step": 26992 }, { "epoch": 4.406350761193421, "grad_norm": 2.0938339233398438, "learning_rate": 1.6786725717392544e-05, "loss": 0.4143, "step": 26993 }, { "epoch": 4.406514019835925, "grad_norm": 2.15242862701416, "learning_rate": 1.67864926519189e-05, "loss": 0.4385, "step": 26994 }, { "epoch": 4.40667727847843, "grad_norm": 2.022233247756958, "learning_rate": 1.678625957961126e-05, "loss": 0.3786, "step": 26995 }, { "epoch": 4.406840537120933, "grad_norm": 2.6003055572509766, "learning_rate": 1.6786026500469857e-05, "loss": 0.476, "step": 26996 }, { "epoch": 4.407003795763438, "grad_norm": 2.0846734046936035, "learning_rate": 1.6785793414494928e-05, "loss": 0.3786, "step": 26997 }, { "epoch": 4.407167054405942, "grad_norm": 2.2180263996124268, "learning_rate": 1.6785560321686706e-05, "loss": 0.4362, "step": 26998 }, { "epoch": 4.407330313048447, "grad_norm": 1.9192217588424683, "learning_rate": 1.6785327222045425e-05, "loss": 0.4114, "step": 26999 }, { "epoch": 4.407493571690951, "grad_norm": 2.077796459197998, "learning_rate": 1.6785094115571323e-05, "loss": 0.3754, "step": 27000 }, { "epoch": 4.407656830333456, "grad_norm": 1.8261507749557495, "learning_rate": 1.6784861002264633e-05, "loss": 0.3821, "step": 27001 }, { "epoch": 4.40782008897596, "grad_norm": 1.980186939239502, "learning_rate": 1.678462788212559e-05, "loss": 0.3581, "step": 27002 }, { "epoch": 4.407983347618464, "grad_norm": 1.8066929578781128, "learning_rate": 1.6784394755154428e-05, "loss": 0.3438, "step": 27003 }, { "epoch": 4.408146606260969, "grad_norm": 1.78931725025177, "learning_rate": 1.6784161621351384e-05, "loss": 0.3239, "step": 27004 }, { "epoch": 4.408309864903473, "grad_norm": 2.2840311527252197, "learning_rate": 1.678392848071669e-05, "loss": 0.3377, "step": 27005 }, { "epoch": 4.408473123545978, "grad_norm": 2.0758609771728516, "learning_rate": 1.678369533325058e-05, "loss": 0.3825, "step": 27006 }, { "epoch": 4.408636382188482, "grad_norm": 2.7095541954040527, "learning_rate": 1.6783462178953293e-05, "loss": 0.4189, "step": 27007 }, { "epoch": 4.408799640830987, "grad_norm": 2.404209613800049, "learning_rate": 1.678322901782506e-05, "loss": 0.4234, "step": 27008 }, { "epoch": 4.408962899473491, "grad_norm": 2.0172677040100098, "learning_rate": 1.6782995849866118e-05, "loss": 0.4217, "step": 27009 }, { "epoch": 4.4091261581159955, "grad_norm": 1.9990311861038208, "learning_rate": 1.67827626750767e-05, "loss": 0.4309, "step": 27010 }, { "epoch": 4.4092894167585, "grad_norm": 2.2325854301452637, "learning_rate": 1.6782529493457045e-05, "loss": 0.3601, "step": 27011 }, { "epoch": 4.409452675401004, "grad_norm": 2.1259822845458984, "learning_rate": 1.678229630500738e-05, "loss": 0.4031, "step": 27012 }, { "epoch": 4.409615934043509, "grad_norm": 2.2708053588867188, "learning_rate": 1.6782063109727947e-05, "loss": 0.4307, "step": 27013 }, { "epoch": 4.409779192686013, "grad_norm": 2.4064133167266846, "learning_rate": 1.678182990761898e-05, "loss": 0.3929, "step": 27014 }, { "epoch": 4.409942451328517, "grad_norm": 2.447438955307007, "learning_rate": 1.6781596698680708e-05, "loss": 0.4208, "step": 27015 }, { "epoch": 4.410105709971021, "grad_norm": 1.9603101015090942, "learning_rate": 1.6781363482913372e-05, "loss": 0.3855, "step": 27016 }, { "epoch": 4.410268968613526, "grad_norm": 2.4143919944763184, "learning_rate": 1.6781130260317206e-05, "loss": 0.394, "step": 27017 }, { "epoch": 4.41043222725603, "grad_norm": 2.25634503364563, "learning_rate": 1.678089703089244e-05, "loss": 0.409, "step": 27018 }, { "epoch": 4.410595485898535, "grad_norm": 2.1496012210845947, "learning_rate": 1.6780663794639313e-05, "loss": 0.423, "step": 27019 }, { "epoch": 4.410758744541039, "grad_norm": 2.0044827461242676, "learning_rate": 1.678043055155806e-05, "loss": 0.3684, "step": 27020 }, { "epoch": 4.410922003183543, "grad_norm": 2.5583832263946533, "learning_rate": 1.678019730164892e-05, "loss": 0.5078, "step": 27021 }, { "epoch": 4.411085261826048, "grad_norm": 2.263383388519287, "learning_rate": 1.677996404491212e-05, "loss": 0.4435, "step": 27022 }, { "epoch": 4.411248520468552, "grad_norm": 1.8332524299621582, "learning_rate": 1.6779730781347897e-05, "loss": 0.4048, "step": 27023 }, { "epoch": 4.411411779111057, "grad_norm": 2.242441415786743, "learning_rate": 1.6779497510956487e-05, "loss": 0.3689, "step": 27024 }, { "epoch": 4.411575037753561, "grad_norm": 2.1985998153686523, "learning_rate": 1.6779264233738124e-05, "loss": 0.3779, "step": 27025 }, { "epoch": 4.411738296396066, "grad_norm": 1.8581401109695435, "learning_rate": 1.6779030949693044e-05, "loss": 0.3596, "step": 27026 }, { "epoch": 4.41190155503857, "grad_norm": 2.265974283218384, "learning_rate": 1.6778797658821486e-05, "loss": 0.3841, "step": 27027 }, { "epoch": 4.4120648136810745, "grad_norm": 2.0150275230407715, "learning_rate": 1.6778564361123675e-05, "loss": 0.4036, "step": 27028 }, { "epoch": 4.412228072323579, "grad_norm": 2.135582208633423, "learning_rate": 1.6778331056599853e-05, "loss": 0.3624, "step": 27029 }, { "epoch": 4.412391330966083, "grad_norm": 2.391209363937378, "learning_rate": 1.6778097745250256e-05, "loss": 0.3865, "step": 27030 }, { "epoch": 4.412554589608588, "grad_norm": 2.3215179443359375, "learning_rate": 1.6777864427075112e-05, "loss": 0.4091, "step": 27031 }, { "epoch": 4.412717848251091, "grad_norm": 2.7946536540985107, "learning_rate": 1.677763110207466e-05, "loss": 0.4075, "step": 27032 }, { "epoch": 4.412881106893596, "grad_norm": 2.0637025833129883, "learning_rate": 1.677739777024914e-05, "loss": 0.354, "step": 27033 }, { "epoch": 4.4130443655361, "grad_norm": 2.041416645050049, "learning_rate": 1.677716443159878e-05, "loss": 0.3464, "step": 27034 }, { "epoch": 4.413207624178605, "grad_norm": 2.199514150619507, "learning_rate": 1.677693108612382e-05, "loss": 0.4177, "step": 27035 }, { "epoch": 4.413370882821109, "grad_norm": 2.343034505844116, "learning_rate": 1.6776697733824487e-05, "loss": 0.4417, "step": 27036 }, { "epoch": 4.413534141463614, "grad_norm": 1.790825366973877, "learning_rate": 1.6776464374701026e-05, "loss": 0.3663, "step": 27037 }, { "epoch": 4.413697400106118, "grad_norm": 1.8923648595809937, "learning_rate": 1.6776231008753664e-05, "loss": 0.3761, "step": 27038 }, { "epoch": 4.413860658748622, "grad_norm": 2.1312978267669678, "learning_rate": 1.677599763598264e-05, "loss": 0.4342, "step": 27039 }, { "epoch": 4.414023917391127, "grad_norm": 2.6036124229431152, "learning_rate": 1.6775764256388187e-05, "loss": 0.4407, "step": 27040 }, { "epoch": 4.414187176033631, "grad_norm": 2.5543212890625, "learning_rate": 1.6775530869970543e-05, "loss": 0.4698, "step": 27041 }, { "epoch": 4.414350434676136, "grad_norm": 2.117241621017456, "learning_rate": 1.677529747672994e-05, "loss": 0.4109, "step": 27042 }, { "epoch": 4.41451369331864, "grad_norm": 2.1691207885742188, "learning_rate": 1.6775064076666615e-05, "loss": 0.3822, "step": 27043 }, { "epoch": 4.414676951961145, "grad_norm": 2.1989424228668213, "learning_rate": 1.6774830669780803e-05, "loss": 0.3436, "step": 27044 }, { "epoch": 4.414840210603649, "grad_norm": 2.1453306674957275, "learning_rate": 1.6774597256072736e-05, "loss": 0.4102, "step": 27045 }, { "epoch": 4.4150034692461535, "grad_norm": 2.2174875736236572, "learning_rate": 1.6774363835542653e-05, "loss": 0.4169, "step": 27046 }, { "epoch": 4.415166727888658, "grad_norm": 1.8173662424087524, "learning_rate": 1.6774130408190786e-05, "loss": 0.3681, "step": 27047 }, { "epoch": 4.415329986531162, "grad_norm": 1.977318286895752, "learning_rate": 1.6773896974017373e-05, "loss": 0.393, "step": 27048 }, { "epoch": 4.415493245173666, "grad_norm": 2.5501694679260254, "learning_rate": 1.677366353302265e-05, "loss": 0.4395, "step": 27049 }, { "epoch": 4.41565650381617, "grad_norm": 2.545840263366699, "learning_rate": 1.677343008520685e-05, "loss": 0.3885, "step": 27050 }, { "epoch": 4.415819762458675, "grad_norm": 2.426100969314575, "learning_rate": 1.6773196630570198e-05, "loss": 0.4472, "step": 27051 }, { "epoch": 4.415983021101179, "grad_norm": 2.214437961578369, "learning_rate": 1.677296316911295e-05, "loss": 0.3618, "step": 27052 }, { "epoch": 4.416146279743684, "grad_norm": 1.9618749618530273, "learning_rate": 1.677272970083532e-05, "loss": 0.3851, "step": 27053 }, { "epoch": 4.416309538386188, "grad_norm": 2.742415428161621, "learning_rate": 1.677249622573756e-05, "loss": 0.4752, "step": 27054 }, { "epoch": 4.4164727970286926, "grad_norm": 2.1870603561401367, "learning_rate": 1.6772262743819897e-05, "loss": 0.4194, "step": 27055 }, { "epoch": 4.416636055671197, "grad_norm": 1.9478422403335571, "learning_rate": 1.6772029255082567e-05, "loss": 0.4358, "step": 27056 }, { "epoch": 4.416799314313701, "grad_norm": 2.2720956802368164, "learning_rate": 1.6771795759525805e-05, "loss": 0.4388, "step": 27057 }, { "epoch": 4.416962572956206, "grad_norm": 1.784304141998291, "learning_rate": 1.6771562257149848e-05, "loss": 0.3461, "step": 27058 }, { "epoch": 4.41712583159871, "grad_norm": 1.7272804975509644, "learning_rate": 1.6771328747954924e-05, "loss": 0.3501, "step": 27059 }, { "epoch": 4.417289090241215, "grad_norm": 2.326737880706787, "learning_rate": 1.677109523194128e-05, "loss": 0.4691, "step": 27060 }, { "epoch": 4.417452348883719, "grad_norm": 2.0852692127227783, "learning_rate": 1.677086170910914e-05, "loss": 0.3713, "step": 27061 }, { "epoch": 4.417615607526224, "grad_norm": 1.8775091171264648, "learning_rate": 1.677062817945875e-05, "loss": 0.4123, "step": 27062 }, { "epoch": 4.417778866168728, "grad_norm": 1.6975901126861572, "learning_rate": 1.6770394642990335e-05, "loss": 0.3652, "step": 27063 }, { "epoch": 4.4179421248112325, "grad_norm": 2.001696825027466, "learning_rate": 1.6770161099704134e-05, "loss": 0.391, "step": 27064 }, { "epoch": 4.418105383453737, "grad_norm": 2.142834186553955, "learning_rate": 1.6769927549600385e-05, "loss": 0.3856, "step": 27065 }, { "epoch": 4.418268642096241, "grad_norm": 1.7624863386154175, "learning_rate": 1.6769693992679318e-05, "loss": 0.3619, "step": 27066 }, { "epoch": 4.418431900738746, "grad_norm": 2.3381454944610596, "learning_rate": 1.6769460428941174e-05, "loss": 0.4677, "step": 27067 }, { "epoch": 4.418595159381249, "grad_norm": 2.6232941150665283, "learning_rate": 1.6769226858386184e-05, "loss": 0.4135, "step": 27068 }, { "epoch": 4.418758418023754, "grad_norm": 1.7234086990356445, "learning_rate": 1.6768993281014584e-05, "loss": 0.353, "step": 27069 }, { "epoch": 4.418921676666258, "grad_norm": 1.775789737701416, "learning_rate": 1.6768759696826608e-05, "loss": 0.3417, "step": 27070 }, { "epoch": 4.419084935308763, "grad_norm": 1.9125105142593384, "learning_rate": 1.6768526105822494e-05, "loss": 0.3286, "step": 27071 }, { "epoch": 4.419248193951267, "grad_norm": 2.076416254043579, "learning_rate": 1.676829250800248e-05, "loss": 0.3087, "step": 27072 }, { "epoch": 4.4194114525937715, "grad_norm": 2.033268451690674, "learning_rate": 1.676805890336679e-05, "loss": 0.4022, "step": 27073 }, { "epoch": 4.419574711236276, "grad_norm": 2.024695634841919, "learning_rate": 1.6767825291915672e-05, "loss": 0.3699, "step": 27074 }, { "epoch": 4.41973796987878, "grad_norm": 2.1068530082702637, "learning_rate": 1.6767591673649356e-05, "loss": 0.4249, "step": 27075 }, { "epoch": 4.419901228521285, "grad_norm": 2.612135410308838, "learning_rate": 1.6767358048568073e-05, "loss": 0.4439, "step": 27076 }, { "epoch": 4.420064487163789, "grad_norm": 2.1792478561401367, "learning_rate": 1.676712441667206e-05, "loss": 0.3767, "step": 27077 }, { "epoch": 4.420227745806294, "grad_norm": 2.5601859092712402, "learning_rate": 1.676689077796156e-05, "loss": 0.4725, "step": 27078 }, { "epoch": 4.420391004448798, "grad_norm": 2.4206337928771973, "learning_rate": 1.6766657132436802e-05, "loss": 0.3796, "step": 27079 }, { "epoch": 4.420554263091303, "grad_norm": 2.6839632987976074, "learning_rate": 1.6766423480098023e-05, "loss": 0.5018, "step": 27080 }, { "epoch": 4.420717521733807, "grad_norm": 1.9118481874465942, "learning_rate": 1.6766189820945456e-05, "loss": 0.3804, "step": 27081 }, { "epoch": 4.4208807803763115, "grad_norm": 2.5224545001983643, "learning_rate": 1.6765956154979336e-05, "loss": 0.4419, "step": 27082 }, { "epoch": 4.421044039018816, "grad_norm": 2.0598373413085938, "learning_rate": 1.6765722482199903e-05, "loss": 0.3766, "step": 27083 }, { "epoch": 4.42120729766132, "grad_norm": 1.8708919286727905, "learning_rate": 1.6765488802607385e-05, "loss": 0.3652, "step": 27084 }, { "epoch": 4.421370556303824, "grad_norm": 1.99128258228302, "learning_rate": 1.6765255116202025e-05, "loss": 0.405, "step": 27085 }, { "epoch": 4.421533814946328, "grad_norm": 2.0124967098236084, "learning_rate": 1.6765021422984053e-05, "loss": 0.3817, "step": 27086 }, { "epoch": 4.421697073588833, "grad_norm": 2.3305907249450684, "learning_rate": 1.6764787722953707e-05, "loss": 0.4701, "step": 27087 }, { "epoch": 4.421860332231337, "grad_norm": 2.105679988861084, "learning_rate": 1.6764554016111223e-05, "loss": 0.3525, "step": 27088 }, { "epoch": 4.422023590873842, "grad_norm": 2.254608392715454, "learning_rate": 1.6764320302456834e-05, "loss": 0.3696, "step": 27089 }, { "epoch": 4.422186849516346, "grad_norm": 2.3395233154296875, "learning_rate": 1.6764086581990774e-05, "loss": 0.3906, "step": 27090 }, { "epoch": 4.4223501081588505, "grad_norm": 2.025677442550659, "learning_rate": 1.6763852854713286e-05, "loss": 0.3569, "step": 27091 }, { "epoch": 4.422513366801355, "grad_norm": 2.668727397918701, "learning_rate": 1.6763619120624595e-05, "loss": 0.4381, "step": 27092 }, { "epoch": 4.422676625443859, "grad_norm": 1.449535846710205, "learning_rate": 1.6763385379724943e-05, "loss": 0.3016, "step": 27093 }, { "epoch": 4.422839884086364, "grad_norm": 1.8188667297363281, "learning_rate": 1.6763151632014564e-05, "loss": 0.3681, "step": 27094 }, { "epoch": 4.423003142728868, "grad_norm": 1.9803855419158936, "learning_rate": 1.676291787749369e-05, "loss": 0.3809, "step": 27095 }, { "epoch": 4.423166401371373, "grad_norm": 2.4027838706970215, "learning_rate": 1.6762684116162563e-05, "loss": 0.4424, "step": 27096 }, { "epoch": 4.423329660013877, "grad_norm": 2.1420907974243164, "learning_rate": 1.6762450348021414e-05, "loss": 0.371, "step": 27097 }, { "epoch": 4.423492918656382, "grad_norm": 2.2710254192352295, "learning_rate": 1.6762216573070482e-05, "loss": 0.3969, "step": 27098 }, { "epoch": 4.423656177298886, "grad_norm": 2.595200777053833, "learning_rate": 1.6761982791309998e-05, "loss": 0.3958, "step": 27099 }, { "epoch": 4.4238194359413905, "grad_norm": 2.592700958251953, "learning_rate": 1.6761749002740195e-05, "loss": 0.4434, "step": 27100 }, { "epoch": 4.423982694583895, "grad_norm": 2.2445807456970215, "learning_rate": 1.6761515207361316e-05, "loss": 0.419, "step": 27101 }, { "epoch": 4.4241459532263985, "grad_norm": 2.3469364643096924, "learning_rate": 1.6761281405173595e-05, "loss": 0.3848, "step": 27102 }, { "epoch": 4.424309211868903, "grad_norm": 1.9066576957702637, "learning_rate": 1.676104759617726e-05, "loss": 0.3718, "step": 27103 }, { "epoch": 4.424472470511407, "grad_norm": 2.1146228313446045, "learning_rate": 1.6760813780372558e-05, "loss": 0.3518, "step": 27104 }, { "epoch": 4.424635729153912, "grad_norm": 2.446845769882202, "learning_rate": 1.6760579957759712e-05, "loss": 0.4473, "step": 27105 }, { "epoch": 4.424798987796416, "grad_norm": 1.8896592855453491, "learning_rate": 1.6760346128338968e-05, "loss": 0.3787, "step": 27106 }, { "epoch": 4.424962246438921, "grad_norm": 1.8396034240722656, "learning_rate": 1.676011229211056e-05, "loss": 0.3483, "step": 27107 }, { "epoch": 4.425125505081425, "grad_norm": 2.4662444591522217, "learning_rate": 1.6759878449074712e-05, "loss": 0.3883, "step": 27108 }, { "epoch": 4.4252887637239295, "grad_norm": 2.0828664302825928, "learning_rate": 1.6759644599231675e-05, "loss": 0.3939, "step": 27109 }, { "epoch": 4.425452022366434, "grad_norm": 1.697618842124939, "learning_rate": 1.6759410742581677e-05, "loss": 0.3292, "step": 27110 }, { "epoch": 4.425615281008938, "grad_norm": 1.9409197568893433, "learning_rate": 1.6759176879124954e-05, "loss": 0.3034, "step": 27111 }, { "epoch": 4.425778539651443, "grad_norm": 2.198390483856201, "learning_rate": 1.6758943008861737e-05, "loss": 0.3606, "step": 27112 }, { "epoch": 4.425941798293947, "grad_norm": 2.2095136642456055, "learning_rate": 1.6758709131792273e-05, "loss": 0.4481, "step": 27113 }, { "epoch": 4.426105056936452, "grad_norm": 2.4758362770080566, "learning_rate": 1.6758475247916786e-05, "loss": 0.3953, "step": 27114 }, { "epoch": 4.426268315578956, "grad_norm": 2.111316204071045, "learning_rate": 1.675824135723552e-05, "loss": 0.3778, "step": 27115 }, { "epoch": 4.426431574221461, "grad_norm": 2.338449716567993, "learning_rate": 1.67580074597487e-05, "loss": 0.4329, "step": 27116 }, { "epoch": 4.426594832863965, "grad_norm": 2.2980244159698486, "learning_rate": 1.6757773555456577e-05, "loss": 0.335, "step": 27117 }, { "epoch": 4.4267580915064695, "grad_norm": 1.9481054544448853, "learning_rate": 1.6757539644359373e-05, "loss": 0.3708, "step": 27118 }, { "epoch": 4.426921350148974, "grad_norm": 2.8977935314178467, "learning_rate": 1.675730572645733e-05, "loss": 0.461, "step": 27119 }, { "epoch": 4.427084608791478, "grad_norm": 2.218703269958496, "learning_rate": 1.675707180175068e-05, "loss": 0.3867, "step": 27120 }, { "epoch": 4.427247867433982, "grad_norm": 1.7569035291671753, "learning_rate": 1.675683787023966e-05, "loss": 0.3433, "step": 27121 }, { "epoch": 4.427411126076486, "grad_norm": 2.1285760402679443, "learning_rate": 1.675660393192451e-05, "loss": 0.4076, "step": 27122 }, { "epoch": 4.427574384718991, "grad_norm": 1.8653956651687622, "learning_rate": 1.675636998680546e-05, "loss": 0.3244, "step": 27123 }, { "epoch": 4.427737643361495, "grad_norm": 2.5965802669525146, "learning_rate": 1.6756136034882743e-05, "loss": 0.4845, "step": 27124 }, { "epoch": 4.427900902004, "grad_norm": 2.490446090698242, "learning_rate": 1.6755902076156606e-05, "loss": 0.4322, "step": 27125 }, { "epoch": 4.428064160646504, "grad_norm": 1.9318112134933472, "learning_rate": 1.6755668110627272e-05, "loss": 0.4068, "step": 27126 }, { "epoch": 4.4282274192890085, "grad_norm": 2.3139803409576416, "learning_rate": 1.6755434138294983e-05, "loss": 0.4602, "step": 27127 }, { "epoch": 4.428390677931513, "grad_norm": 2.9673757553100586, "learning_rate": 1.6755200159159976e-05, "loss": 0.4192, "step": 27128 }, { "epoch": 4.428553936574017, "grad_norm": 2.0268008708953857, "learning_rate": 1.675496617322248e-05, "loss": 0.3607, "step": 27129 }, { "epoch": 4.428717195216522, "grad_norm": 2.4555134773254395, "learning_rate": 1.675473218048274e-05, "loss": 0.3455, "step": 27130 }, { "epoch": 4.428880453859026, "grad_norm": 1.8555810451507568, "learning_rate": 1.6754498180940984e-05, "loss": 0.3298, "step": 27131 }, { "epoch": 4.429043712501531, "grad_norm": 2.375088930130005, "learning_rate": 1.675426417459745e-05, "loss": 0.4547, "step": 27132 }, { "epoch": 4.429206971144035, "grad_norm": 1.8255422115325928, "learning_rate": 1.6754030161452372e-05, "loss": 0.3515, "step": 27133 }, { "epoch": 4.42937022978654, "grad_norm": 2.3744356632232666, "learning_rate": 1.675379614150599e-05, "loss": 0.4294, "step": 27134 }, { "epoch": 4.429533488429044, "grad_norm": 2.0294480323791504, "learning_rate": 1.6753562114758535e-05, "loss": 0.3478, "step": 27135 }, { "epoch": 4.4296967470715485, "grad_norm": 2.1636509895324707, "learning_rate": 1.6753328081210244e-05, "loss": 0.4488, "step": 27136 }, { "epoch": 4.429860005714053, "grad_norm": 1.958489179611206, "learning_rate": 1.6753094040861358e-05, "loss": 0.3415, "step": 27137 }, { "epoch": 4.4300232643565565, "grad_norm": 1.855933666229248, "learning_rate": 1.6752859993712106e-05, "loss": 0.3357, "step": 27138 }, { "epoch": 4.430186522999061, "grad_norm": 1.6089189052581787, "learning_rate": 1.6752625939762724e-05, "loss": 0.3063, "step": 27139 }, { "epoch": 4.430349781641565, "grad_norm": 2.049806833267212, "learning_rate": 1.6752391879013453e-05, "loss": 0.4163, "step": 27140 }, { "epoch": 4.43051304028407, "grad_norm": 1.8706743717193604, "learning_rate": 1.6752157811464524e-05, "loss": 0.3643, "step": 27141 }, { "epoch": 4.430676298926574, "grad_norm": 2.4343953132629395, "learning_rate": 1.675192373711617e-05, "loss": 0.4655, "step": 27142 }, { "epoch": 4.430839557569079, "grad_norm": 2.0842409133911133, "learning_rate": 1.6751689655968634e-05, "loss": 0.4074, "step": 27143 }, { "epoch": 4.431002816211583, "grad_norm": 2.4613823890686035, "learning_rate": 1.6751455568022145e-05, "loss": 0.4193, "step": 27144 }, { "epoch": 4.4311660748540875, "grad_norm": 2.296586751937866, "learning_rate": 1.6751221473276946e-05, "loss": 0.3821, "step": 27145 }, { "epoch": 4.431329333496592, "grad_norm": 2.0307419300079346, "learning_rate": 1.6750987371733267e-05, "loss": 0.3724, "step": 27146 }, { "epoch": 4.431492592139096, "grad_norm": 2.786357879638672, "learning_rate": 1.6750753263391346e-05, "loss": 0.3953, "step": 27147 }, { "epoch": 4.431655850781601, "grad_norm": 2.2352452278137207, "learning_rate": 1.675051914825142e-05, "loss": 0.4079, "step": 27148 }, { "epoch": 4.431819109424105, "grad_norm": 2.009930372238159, "learning_rate": 1.675028502631372e-05, "loss": 0.3779, "step": 27149 }, { "epoch": 4.43198236806661, "grad_norm": 2.908029317855835, "learning_rate": 1.6750050897578484e-05, "loss": 0.5024, "step": 27150 }, { "epoch": 4.432145626709114, "grad_norm": 2.2334938049316406, "learning_rate": 1.6749816762045955e-05, "loss": 0.3899, "step": 27151 }, { "epoch": 4.432308885351619, "grad_norm": 2.261035203933716, "learning_rate": 1.6749582619716356e-05, "loss": 0.3755, "step": 27152 }, { "epoch": 4.432472143994123, "grad_norm": 1.9729055166244507, "learning_rate": 1.674934847058993e-05, "loss": 0.3475, "step": 27153 }, { "epoch": 4.4326354026366275, "grad_norm": 2.33225417137146, "learning_rate": 1.6749114314666913e-05, "loss": 0.467, "step": 27154 }, { "epoch": 4.432798661279131, "grad_norm": 2.583418607711792, "learning_rate": 1.6748880151947536e-05, "loss": 0.3972, "step": 27155 }, { "epoch": 4.4329619199216355, "grad_norm": 2.1923887729644775, "learning_rate": 1.6748645982432042e-05, "loss": 0.3884, "step": 27156 }, { "epoch": 4.43312517856414, "grad_norm": 2.013847589492798, "learning_rate": 1.6748411806120667e-05, "loss": 0.3629, "step": 27157 }, { "epoch": 4.433288437206644, "grad_norm": 2.2331578731536865, "learning_rate": 1.6748177623013638e-05, "loss": 0.4333, "step": 27158 }, { "epoch": 4.433451695849149, "grad_norm": 2.3563830852508545, "learning_rate": 1.67479434331112e-05, "loss": 0.4038, "step": 27159 }, { "epoch": 4.433614954491653, "grad_norm": 2.2327487468719482, "learning_rate": 1.674770923641358e-05, "loss": 0.429, "step": 27160 }, { "epoch": 4.433778213134158, "grad_norm": 1.890526294708252, "learning_rate": 1.674747503292102e-05, "loss": 0.3313, "step": 27161 }, { "epoch": 4.433941471776662, "grad_norm": 1.8807989358901978, "learning_rate": 1.6747240822633757e-05, "loss": 0.3384, "step": 27162 }, { "epoch": 4.4341047304191665, "grad_norm": 2.1291391849517822, "learning_rate": 1.674700660555202e-05, "loss": 0.3671, "step": 27163 }, { "epoch": 4.434267989061671, "grad_norm": 2.417914390563965, "learning_rate": 1.6746772381676052e-05, "loss": 0.4433, "step": 27164 }, { "epoch": 4.434431247704175, "grad_norm": 2.32218337059021, "learning_rate": 1.6746538151006086e-05, "loss": 0.4432, "step": 27165 }, { "epoch": 4.43459450634668, "grad_norm": 2.251493215560913, "learning_rate": 1.6746303913542358e-05, "loss": 0.4156, "step": 27166 }, { "epoch": 4.434757764989184, "grad_norm": 2.1753084659576416, "learning_rate": 1.67460696692851e-05, "loss": 0.4009, "step": 27167 }, { "epoch": 4.434921023631689, "grad_norm": 2.5538136959075928, "learning_rate": 1.6745835418234556e-05, "loss": 0.4479, "step": 27168 }, { "epoch": 4.435084282274193, "grad_norm": 1.8786579370498657, "learning_rate": 1.6745601160390958e-05, "loss": 0.3633, "step": 27169 }, { "epoch": 4.435247540916698, "grad_norm": 1.818957805633545, "learning_rate": 1.674536689575454e-05, "loss": 0.3366, "step": 27170 }, { "epoch": 4.435410799559202, "grad_norm": 2.5622315406799316, "learning_rate": 1.6745132624325537e-05, "loss": 0.3876, "step": 27171 }, { "epoch": 4.435574058201706, "grad_norm": 1.751323938369751, "learning_rate": 1.6744898346104192e-05, "loss": 0.3682, "step": 27172 }, { "epoch": 4.435737316844211, "grad_norm": 2.2392797470092773, "learning_rate": 1.674466406109073e-05, "loss": 0.3894, "step": 27173 }, { "epoch": 4.4359005754867145, "grad_norm": 2.553755521774292, "learning_rate": 1.6744429769285397e-05, "loss": 0.4795, "step": 27174 }, { "epoch": 4.436063834129219, "grad_norm": 1.8006190061569214, "learning_rate": 1.6744195470688424e-05, "loss": 0.3582, "step": 27175 }, { "epoch": 4.436227092771723, "grad_norm": 2.311012029647827, "learning_rate": 1.674396116530005e-05, "loss": 0.4386, "step": 27176 }, { "epoch": 4.436390351414228, "grad_norm": 2.275542736053467, "learning_rate": 1.6743726853120507e-05, "loss": 0.4343, "step": 27177 }, { "epoch": 4.436553610056732, "grad_norm": 2.0192389488220215, "learning_rate": 1.6743492534150033e-05, "loss": 0.4163, "step": 27178 }, { "epoch": 4.436716868699237, "grad_norm": 2.3756184577941895, "learning_rate": 1.6743258208388863e-05, "loss": 0.4768, "step": 27179 }, { "epoch": 4.436880127341741, "grad_norm": 1.7958205938339233, "learning_rate": 1.6743023875837233e-05, "loss": 0.355, "step": 27180 }, { "epoch": 4.4370433859842455, "grad_norm": 2.5604093074798584, "learning_rate": 1.6742789536495383e-05, "loss": 0.454, "step": 27181 }, { "epoch": 4.43720664462675, "grad_norm": 1.585054636001587, "learning_rate": 1.6742555190363545e-05, "loss": 0.3433, "step": 27182 }, { "epoch": 4.437369903269254, "grad_norm": 1.7961562871932983, "learning_rate": 1.6742320837441955e-05, "loss": 0.3483, "step": 27183 }, { "epoch": 4.437533161911759, "grad_norm": 1.8610846996307373, "learning_rate": 1.6742086477730846e-05, "loss": 0.3723, "step": 27184 }, { "epoch": 4.437696420554263, "grad_norm": 1.7918388843536377, "learning_rate": 1.6741852111230464e-05, "loss": 0.3714, "step": 27185 }, { "epoch": 4.437859679196768, "grad_norm": 1.8858505487442017, "learning_rate": 1.674161773794104e-05, "loss": 0.3566, "step": 27186 }, { "epoch": 4.438022937839272, "grad_norm": 1.9789098501205444, "learning_rate": 1.6741383357862803e-05, "loss": 0.3479, "step": 27187 }, { "epoch": 4.438186196481777, "grad_norm": 1.7285434007644653, "learning_rate": 1.6741148970995995e-05, "loss": 0.3258, "step": 27188 }, { "epoch": 4.438349455124281, "grad_norm": 2.008375883102417, "learning_rate": 1.6740914577340854e-05, "loss": 0.3498, "step": 27189 }, { "epoch": 4.4385127137667855, "grad_norm": 2.0624454021453857, "learning_rate": 1.6740680176897616e-05, "loss": 0.341, "step": 27190 }, { "epoch": 4.438675972409289, "grad_norm": 1.8526713848114014, "learning_rate": 1.674044576966651e-05, "loss": 0.362, "step": 27191 }, { "epoch": 4.4388392310517935, "grad_norm": 1.815578579902649, "learning_rate": 1.674021135564778e-05, "loss": 0.3047, "step": 27192 }, { "epoch": 4.439002489694298, "grad_norm": 2.230954885482788, "learning_rate": 1.6739976934841654e-05, "loss": 0.3997, "step": 27193 }, { "epoch": 4.439165748336802, "grad_norm": 2.4923899173736572, "learning_rate": 1.6739742507248377e-05, "loss": 0.4325, "step": 27194 }, { "epoch": 4.439329006979307, "grad_norm": 1.954040288925171, "learning_rate": 1.673950807286818e-05, "loss": 0.3654, "step": 27195 }, { "epoch": 4.439492265621811, "grad_norm": 1.9099547863006592, "learning_rate": 1.6739273631701302e-05, "loss": 0.3394, "step": 27196 }, { "epoch": 4.439655524264316, "grad_norm": 1.8185738325119019, "learning_rate": 1.6739039183747976e-05, "loss": 0.354, "step": 27197 }, { "epoch": 4.43981878290682, "grad_norm": 2.208876609802246, "learning_rate": 1.673880472900844e-05, "loss": 0.4101, "step": 27198 }, { "epoch": 4.4399820415493245, "grad_norm": 2.0198681354522705, "learning_rate": 1.673857026748293e-05, "loss": 0.3695, "step": 27199 }, { "epoch": 4.440145300191829, "grad_norm": 2.23492169380188, "learning_rate": 1.673833579917168e-05, "loss": 0.3508, "step": 27200 }, { "epoch": 4.440308558834333, "grad_norm": 1.9555882215499878, "learning_rate": 1.673810132407493e-05, "loss": 0.332, "step": 27201 }, { "epoch": 4.440471817476838, "grad_norm": 2.375282049179077, "learning_rate": 1.6737866842192908e-05, "loss": 0.3944, "step": 27202 }, { "epoch": 4.440635076119342, "grad_norm": 2.0188558101654053, "learning_rate": 1.673763235352586e-05, "loss": 0.3612, "step": 27203 }, { "epoch": 4.440798334761847, "grad_norm": 2.056922197341919, "learning_rate": 1.673739785807402e-05, "loss": 0.335, "step": 27204 }, { "epoch": 4.440961593404351, "grad_norm": 2.0917859077453613, "learning_rate": 1.673716335583762e-05, "loss": 0.4055, "step": 27205 }, { "epoch": 4.441124852046856, "grad_norm": 2.1378180980682373, "learning_rate": 1.67369288468169e-05, "loss": 0.3241, "step": 27206 }, { "epoch": 4.44128811068936, "grad_norm": 2.0808897018432617, "learning_rate": 1.6736694331012092e-05, "loss": 0.3551, "step": 27207 }, { "epoch": 4.441451369331864, "grad_norm": 2.2758796215057373, "learning_rate": 1.6736459808423437e-05, "loss": 0.4386, "step": 27208 }, { "epoch": 4.441614627974368, "grad_norm": 1.8874528408050537, "learning_rate": 1.6736225279051165e-05, "loss": 0.313, "step": 27209 }, { "epoch": 4.4417778866168725, "grad_norm": 1.8613336086273193, "learning_rate": 1.6735990742895518e-05, "loss": 0.3168, "step": 27210 }, { "epoch": 4.441941145259377, "grad_norm": 2.400252342224121, "learning_rate": 1.673575619995673e-05, "loss": 0.3822, "step": 27211 }, { "epoch": 4.442104403901881, "grad_norm": 2.20597243309021, "learning_rate": 1.6735521650235037e-05, "loss": 0.3923, "step": 27212 }, { "epoch": 4.442267662544386, "grad_norm": 2.573827028274536, "learning_rate": 1.6735287093730677e-05, "loss": 0.4213, "step": 27213 }, { "epoch": 4.44243092118689, "grad_norm": 2.2213125228881836, "learning_rate": 1.6735052530443884e-05, "loss": 0.3509, "step": 27214 }, { "epoch": 4.442594179829395, "grad_norm": 2.204540729522705, "learning_rate": 1.673481796037489e-05, "loss": 0.4071, "step": 27215 }, { "epoch": 4.442757438471899, "grad_norm": 2.530198812484741, "learning_rate": 1.6734583383523943e-05, "loss": 0.4122, "step": 27216 }, { "epoch": 4.4429206971144035, "grad_norm": 2.105226993560791, "learning_rate": 1.673434879989127e-05, "loss": 0.3846, "step": 27217 }, { "epoch": 4.443083955756908, "grad_norm": 2.2305116653442383, "learning_rate": 1.673411420947711e-05, "loss": 0.4321, "step": 27218 }, { "epoch": 4.443247214399412, "grad_norm": 2.095288038253784, "learning_rate": 1.6733879612281698e-05, "loss": 0.3703, "step": 27219 }, { "epoch": 4.443410473041917, "grad_norm": 2.5126028060913086, "learning_rate": 1.673364500830527e-05, "loss": 0.3653, "step": 27220 }, { "epoch": 4.443573731684421, "grad_norm": 2.4533212184906006, "learning_rate": 1.6733410397548065e-05, "loss": 0.3879, "step": 27221 }, { "epoch": 4.443736990326926, "grad_norm": 2.013826370239258, "learning_rate": 1.673317578001032e-05, "loss": 0.3801, "step": 27222 }, { "epoch": 4.44390024896943, "grad_norm": 2.0368881225585938, "learning_rate": 1.673294115569226e-05, "loss": 0.3291, "step": 27223 }, { "epoch": 4.444063507611935, "grad_norm": 2.133216619491577, "learning_rate": 1.6732706524594138e-05, "loss": 0.3968, "step": 27224 }, { "epoch": 4.444226766254438, "grad_norm": 2.0680153369903564, "learning_rate": 1.673247188671618e-05, "loss": 0.3786, "step": 27225 }, { "epoch": 4.444390024896943, "grad_norm": 1.8901818990707397, "learning_rate": 1.6732237242058628e-05, "loss": 0.3329, "step": 27226 }, { "epoch": 4.444553283539447, "grad_norm": 2.509864091873169, "learning_rate": 1.673200259062171e-05, "loss": 0.4443, "step": 27227 }, { "epoch": 4.4447165421819514, "grad_norm": 1.670524001121521, "learning_rate": 1.673176793240567e-05, "loss": 0.3107, "step": 27228 }, { "epoch": 4.444879800824456, "grad_norm": 2.0010182857513428, "learning_rate": 1.673153326741074e-05, "loss": 0.4415, "step": 27229 }, { "epoch": 4.44504305946696, "grad_norm": 2.077181577682495, "learning_rate": 1.6731298595637158e-05, "loss": 0.3308, "step": 27230 }, { "epoch": 4.445206318109465, "grad_norm": 2.3009417057037354, "learning_rate": 1.673106391708516e-05, "loss": 0.3516, "step": 27231 }, { "epoch": 4.445369576751969, "grad_norm": 2.975504159927368, "learning_rate": 1.6730829231754982e-05, "loss": 0.4904, "step": 27232 }, { "epoch": 4.445532835394474, "grad_norm": 2.0207698345184326, "learning_rate": 1.673059453964686e-05, "loss": 0.3799, "step": 27233 }, { "epoch": 4.445696094036978, "grad_norm": 2.1429941654205322, "learning_rate": 1.6730359840761034e-05, "loss": 0.3841, "step": 27234 }, { "epoch": 4.4458593526794825, "grad_norm": 2.6260948181152344, "learning_rate": 1.6730125135097736e-05, "loss": 0.4388, "step": 27235 }, { "epoch": 4.446022611321987, "grad_norm": 2.352342128753662, "learning_rate": 1.6729890422657203e-05, "loss": 0.3933, "step": 27236 }, { "epoch": 4.446185869964491, "grad_norm": 2.4898104667663574, "learning_rate": 1.672965570343967e-05, "loss": 0.4501, "step": 27237 }, { "epoch": 4.446349128606996, "grad_norm": 1.8300631046295166, "learning_rate": 1.672942097744538e-05, "loss": 0.4281, "step": 27238 }, { "epoch": 4.4465123872495, "grad_norm": 2.075378179550171, "learning_rate": 1.672918624467456e-05, "loss": 0.3843, "step": 27239 }, { "epoch": 4.446675645892005, "grad_norm": 1.8230026960372925, "learning_rate": 1.672895150512746e-05, "loss": 0.3646, "step": 27240 }, { "epoch": 4.446838904534509, "grad_norm": 2.2484753131866455, "learning_rate": 1.67287167588043e-05, "loss": 0.3749, "step": 27241 }, { "epoch": 4.447002163177014, "grad_norm": 2.1596245765686035, "learning_rate": 1.6728482005705324e-05, "loss": 0.3869, "step": 27242 }, { "epoch": 4.447165421819518, "grad_norm": 2.302457571029663, "learning_rate": 1.672824724583077e-05, "loss": 0.4375, "step": 27243 }, { "epoch": 4.447328680462022, "grad_norm": 2.3552353382110596, "learning_rate": 1.6728012479180876e-05, "loss": 0.405, "step": 27244 }, { "epoch": 4.447491939104526, "grad_norm": 2.224914312362671, "learning_rate": 1.672777770575587e-05, "loss": 0.4212, "step": 27245 }, { "epoch": 4.44765519774703, "grad_norm": 1.777811050415039, "learning_rate": 1.6727542925556e-05, "loss": 0.299, "step": 27246 }, { "epoch": 4.447818456389535, "grad_norm": 2.910459280014038, "learning_rate": 1.6727308138581492e-05, "loss": 0.4231, "step": 27247 }, { "epoch": 4.447981715032039, "grad_norm": 2.211944580078125, "learning_rate": 1.6727073344832585e-05, "loss": 0.3377, "step": 27248 }, { "epoch": 4.448144973674544, "grad_norm": 1.9851890802383423, "learning_rate": 1.672683854430952e-05, "loss": 0.3539, "step": 27249 }, { "epoch": 4.448308232317048, "grad_norm": 2.235567569732666, "learning_rate": 1.6726603737012527e-05, "loss": 0.393, "step": 27250 }, { "epoch": 4.448471490959553, "grad_norm": 2.4751992225646973, "learning_rate": 1.6726368922941847e-05, "loss": 0.4024, "step": 27251 }, { "epoch": 4.448634749602057, "grad_norm": 2.0061681270599365, "learning_rate": 1.6726134102097718e-05, "loss": 0.3412, "step": 27252 }, { "epoch": 4.4487980082445615, "grad_norm": 1.9586213827133179, "learning_rate": 1.6725899274480372e-05, "loss": 0.356, "step": 27253 }, { "epoch": 4.448961266887066, "grad_norm": 2.7735776901245117, "learning_rate": 1.6725664440090045e-05, "loss": 0.4523, "step": 27254 }, { "epoch": 4.44912452552957, "grad_norm": 2.509310245513916, "learning_rate": 1.6725429598926975e-05, "loss": 0.4042, "step": 27255 }, { "epoch": 4.449287784172075, "grad_norm": 2.465712785720825, "learning_rate": 1.6725194750991406e-05, "loss": 0.3877, "step": 27256 }, { "epoch": 4.449451042814579, "grad_norm": 2.0687880516052246, "learning_rate": 1.672495989628356e-05, "loss": 0.4236, "step": 27257 }, { "epoch": 4.449614301457084, "grad_norm": 2.108001947402954, "learning_rate": 1.6724725034803687e-05, "loss": 0.4008, "step": 27258 }, { "epoch": 4.449777560099588, "grad_norm": 2.0400516986846924, "learning_rate": 1.6724490166552015e-05, "loss": 0.4081, "step": 27259 }, { "epoch": 4.449940818742093, "grad_norm": 2.1954898834228516, "learning_rate": 1.6724255291528783e-05, "loss": 0.3868, "step": 27260 }, { "epoch": 4.450104077384596, "grad_norm": 2.7621757984161377, "learning_rate": 1.672402040973423e-05, "loss": 0.4099, "step": 27261 }, { "epoch": 4.450267336027101, "grad_norm": 2.326054811477661, "learning_rate": 1.6723785521168588e-05, "loss": 0.4532, "step": 27262 }, { "epoch": 4.450430594669605, "grad_norm": 2.30857253074646, "learning_rate": 1.6723550625832096e-05, "loss": 0.462, "step": 27263 }, { "epoch": 4.450593853312109, "grad_norm": 2.585897207260132, "learning_rate": 1.6723315723724988e-05, "loss": 0.4155, "step": 27264 }, { "epoch": 4.450757111954614, "grad_norm": 2.4532389640808105, "learning_rate": 1.672308081484751e-05, "loss": 0.388, "step": 27265 }, { "epoch": 4.450920370597118, "grad_norm": 2.0072762966156006, "learning_rate": 1.6722845899199885e-05, "loss": 0.3938, "step": 27266 }, { "epoch": 4.451083629239623, "grad_norm": 2.0604259967803955, "learning_rate": 1.6722610976782358e-05, "loss": 0.3848, "step": 27267 }, { "epoch": 4.451246887882127, "grad_norm": 2.0279412269592285, "learning_rate": 1.6722376047595163e-05, "loss": 0.4152, "step": 27268 }, { "epoch": 4.451410146524632, "grad_norm": 2.279170274734497, "learning_rate": 1.672214111163854e-05, "loss": 0.402, "step": 27269 }, { "epoch": 4.451573405167136, "grad_norm": 1.9906858205795288, "learning_rate": 1.6721906168912715e-05, "loss": 0.3717, "step": 27270 }, { "epoch": 4.4517366638096405, "grad_norm": 2.4099974632263184, "learning_rate": 1.6721671219417936e-05, "loss": 0.4027, "step": 27271 }, { "epoch": 4.451899922452145, "grad_norm": 2.086744546890259, "learning_rate": 1.6721436263154438e-05, "loss": 0.363, "step": 27272 }, { "epoch": 4.452063181094649, "grad_norm": 2.0686967372894287, "learning_rate": 1.6721201300122457e-05, "loss": 0.4174, "step": 27273 }, { "epoch": 4.452226439737154, "grad_norm": 2.1011886596679688, "learning_rate": 1.6720966330322224e-05, "loss": 0.3907, "step": 27274 }, { "epoch": 4.452389698379658, "grad_norm": 1.7758240699768066, "learning_rate": 1.672073135375398e-05, "loss": 0.3418, "step": 27275 }, { "epoch": 4.452552957022163, "grad_norm": 2.507101535797119, "learning_rate": 1.6720496370417963e-05, "loss": 0.3833, "step": 27276 }, { "epoch": 4.452716215664667, "grad_norm": 1.6648939847946167, "learning_rate": 1.672026138031441e-05, "loss": 0.2957, "step": 27277 }, { "epoch": 4.452879474307171, "grad_norm": 2.231069564819336, "learning_rate": 1.6720026383443552e-05, "loss": 0.437, "step": 27278 }, { "epoch": 4.453042732949675, "grad_norm": 2.2617156505584717, "learning_rate": 1.671979137980563e-05, "loss": 0.3748, "step": 27279 }, { "epoch": 4.45320599159218, "grad_norm": 2.1122660636901855, "learning_rate": 1.671955636940088e-05, "loss": 0.3507, "step": 27280 }, { "epoch": 4.453369250234684, "grad_norm": 2.0956807136535645, "learning_rate": 1.671932135222954e-05, "loss": 0.3841, "step": 27281 }, { "epoch": 4.453532508877188, "grad_norm": 2.162144899368286, "learning_rate": 1.6719086328291846e-05, "loss": 0.3924, "step": 27282 }, { "epoch": 4.453695767519693, "grad_norm": 2.1721880435943604, "learning_rate": 1.671885129758803e-05, "loss": 0.3715, "step": 27283 }, { "epoch": 4.453859026162197, "grad_norm": 2.3844611644744873, "learning_rate": 1.6718616260118336e-05, "loss": 0.4472, "step": 27284 }, { "epoch": 4.454022284804702, "grad_norm": 2.0653388500213623, "learning_rate": 1.6718381215882993e-05, "loss": 0.368, "step": 27285 }, { "epoch": 4.454185543447206, "grad_norm": 2.031834602355957, "learning_rate": 1.6718146164882245e-05, "loss": 0.3841, "step": 27286 }, { "epoch": 4.454348802089711, "grad_norm": 2.361238956451416, "learning_rate": 1.6717911107116327e-05, "loss": 0.4312, "step": 27287 }, { "epoch": 4.454512060732215, "grad_norm": 1.847434401512146, "learning_rate": 1.671767604258547e-05, "loss": 0.3551, "step": 27288 }, { "epoch": 4.4546753193747195, "grad_norm": 1.968302845954895, "learning_rate": 1.671744097128992e-05, "loss": 0.318, "step": 27289 }, { "epoch": 4.454838578017224, "grad_norm": 2.055255651473999, "learning_rate": 1.6717205893229904e-05, "loss": 0.338, "step": 27290 }, { "epoch": 4.455001836659728, "grad_norm": 1.5979818105697632, "learning_rate": 1.6716970808405664e-05, "loss": 0.3264, "step": 27291 }, { "epoch": 4.455165095302233, "grad_norm": 2.4409637451171875, "learning_rate": 1.671673571681744e-05, "loss": 0.4487, "step": 27292 }, { "epoch": 4.455328353944737, "grad_norm": 3.1780664920806885, "learning_rate": 1.6716500618465462e-05, "loss": 0.4284, "step": 27293 }, { "epoch": 4.455491612587242, "grad_norm": 1.8650020360946655, "learning_rate": 1.671626551334997e-05, "loss": 0.3508, "step": 27294 }, { "epoch": 4.455654871229746, "grad_norm": 1.8596522808074951, "learning_rate": 1.67160304014712e-05, "loss": 0.3638, "step": 27295 }, { "epoch": 4.455818129872251, "grad_norm": 2.2187111377716064, "learning_rate": 1.671579528282939e-05, "loss": 0.4124, "step": 27296 }, { "epoch": 4.455981388514754, "grad_norm": 2.060612440109253, "learning_rate": 1.6715560157424773e-05, "loss": 0.431, "step": 27297 }, { "epoch": 4.4561446471572586, "grad_norm": 2.1456775665283203, "learning_rate": 1.6715325025257595e-05, "loss": 0.4265, "step": 27298 }, { "epoch": 4.456307905799763, "grad_norm": 2.256805419921875, "learning_rate": 1.6715089886328085e-05, "loss": 0.3792, "step": 27299 }, { "epoch": 4.456471164442267, "grad_norm": 2.0117557048797607, "learning_rate": 1.6714854740636477e-05, "loss": 0.3694, "step": 27300 }, { "epoch": 4.456634423084772, "grad_norm": 1.8175359964370728, "learning_rate": 1.6714619588183015e-05, "loss": 0.3556, "step": 27301 }, { "epoch": 4.456797681727276, "grad_norm": 2.238424777984619, "learning_rate": 1.671438442896793e-05, "loss": 0.3662, "step": 27302 }, { "epoch": 4.456960940369781, "grad_norm": 2.0078892707824707, "learning_rate": 1.6714149262991462e-05, "loss": 0.3681, "step": 27303 }, { "epoch": 4.457124199012285, "grad_norm": 2.065157890319824, "learning_rate": 1.6713914090253852e-05, "loss": 0.3537, "step": 27304 }, { "epoch": 4.45728745765479, "grad_norm": 1.94640052318573, "learning_rate": 1.671367891075533e-05, "loss": 0.3567, "step": 27305 }, { "epoch": 4.457450716297294, "grad_norm": 2.3809354305267334, "learning_rate": 1.6713443724496136e-05, "loss": 0.4886, "step": 27306 }, { "epoch": 4.4576139749397985, "grad_norm": 2.042106866836548, "learning_rate": 1.6713208531476504e-05, "loss": 0.3846, "step": 27307 }, { "epoch": 4.457777233582303, "grad_norm": 2.1603899002075195, "learning_rate": 1.6712973331696673e-05, "loss": 0.3658, "step": 27308 }, { "epoch": 4.457940492224807, "grad_norm": 1.9039876461029053, "learning_rate": 1.671273812515688e-05, "loss": 0.395, "step": 27309 }, { "epoch": 4.458103750867312, "grad_norm": 2.103522539138794, "learning_rate": 1.6712502911857363e-05, "loss": 0.3668, "step": 27310 }, { "epoch": 4.458267009509816, "grad_norm": 2.2811312675476074, "learning_rate": 1.6712267691798353e-05, "loss": 0.3961, "step": 27311 }, { "epoch": 4.458430268152321, "grad_norm": 2.1843831539154053, "learning_rate": 1.6712032464980094e-05, "loss": 0.3865, "step": 27312 }, { "epoch": 4.458593526794825, "grad_norm": 2.09829044342041, "learning_rate": 1.671179723140282e-05, "loss": 0.4138, "step": 27313 }, { "epoch": 4.458756785437329, "grad_norm": 1.887914776802063, "learning_rate": 1.671156199106677e-05, "loss": 0.3672, "step": 27314 }, { "epoch": 4.458920044079833, "grad_norm": 2.1215457916259766, "learning_rate": 1.6711326743972177e-05, "loss": 0.4287, "step": 27315 }, { "epoch": 4.4590833027223375, "grad_norm": 2.3430874347686768, "learning_rate": 1.671109149011928e-05, "loss": 0.3941, "step": 27316 }, { "epoch": 4.459246561364842, "grad_norm": 2.092641830444336, "learning_rate": 1.6710856229508316e-05, "loss": 0.4644, "step": 27317 }, { "epoch": 4.459409820007346, "grad_norm": 1.856725811958313, "learning_rate": 1.671062096213952e-05, "loss": 0.3426, "step": 27318 }, { "epoch": 4.459573078649851, "grad_norm": 2.5586438179016113, "learning_rate": 1.671038568801313e-05, "loss": 0.4755, "step": 27319 }, { "epoch": 4.459736337292355, "grad_norm": 2.0478410720825195, "learning_rate": 1.6710150407129386e-05, "loss": 0.3674, "step": 27320 }, { "epoch": 4.45989959593486, "grad_norm": 2.153869390487671, "learning_rate": 1.6709915119488524e-05, "loss": 0.4196, "step": 27321 }, { "epoch": 4.460062854577364, "grad_norm": 2.2443554401397705, "learning_rate": 1.6709679825090775e-05, "loss": 0.4359, "step": 27322 }, { "epoch": 4.460226113219869, "grad_norm": 1.9422787427902222, "learning_rate": 1.670944452393638e-05, "loss": 0.3906, "step": 27323 }, { "epoch": 4.460389371862373, "grad_norm": 2.1791586875915527, "learning_rate": 1.6709209216025576e-05, "loss": 0.3735, "step": 27324 }, { "epoch": 4.4605526305048775, "grad_norm": 2.072354555130005, "learning_rate": 1.6708973901358603e-05, "loss": 0.3451, "step": 27325 }, { "epoch": 4.460715889147382, "grad_norm": 2.553136110305786, "learning_rate": 1.6708738579935695e-05, "loss": 0.4638, "step": 27326 }, { "epoch": 4.460879147789886, "grad_norm": 2.1808366775512695, "learning_rate": 1.670850325175709e-05, "loss": 0.4105, "step": 27327 }, { "epoch": 4.461042406432391, "grad_norm": 2.059540271759033, "learning_rate": 1.670826791682302e-05, "loss": 0.4109, "step": 27328 }, { "epoch": 4.461205665074895, "grad_norm": 2.288297414779663, "learning_rate": 1.670803257513373e-05, "loss": 0.3905, "step": 27329 }, { "epoch": 4.4613689237174, "grad_norm": 1.8964983224868774, "learning_rate": 1.6707797226689447e-05, "loss": 0.3743, "step": 27330 }, { "epoch": 4.461532182359903, "grad_norm": 3.0038323402404785, "learning_rate": 1.670756187149042e-05, "loss": 0.4468, "step": 27331 }, { "epoch": 4.461695441002408, "grad_norm": 1.973037838935852, "learning_rate": 1.6707326509536877e-05, "loss": 0.354, "step": 27332 }, { "epoch": 4.461858699644912, "grad_norm": 2.456897258758545, "learning_rate": 1.670709114082906e-05, "loss": 0.4345, "step": 27333 }, { "epoch": 4.4620219582874165, "grad_norm": 2.3801608085632324, "learning_rate": 1.6706855765367202e-05, "loss": 0.4317, "step": 27334 }, { "epoch": 4.462185216929921, "grad_norm": 2.2895560264587402, "learning_rate": 1.6706620383151544e-05, "loss": 0.4339, "step": 27335 }, { "epoch": 4.462348475572425, "grad_norm": 2.023491621017456, "learning_rate": 1.670638499418232e-05, "loss": 0.4062, "step": 27336 }, { "epoch": 4.46251173421493, "grad_norm": 2.204761266708374, "learning_rate": 1.6706149598459765e-05, "loss": 0.3302, "step": 27337 }, { "epoch": 4.462674992857434, "grad_norm": 2.177743911743164, "learning_rate": 1.6705914195984126e-05, "loss": 0.4134, "step": 27338 }, { "epoch": 4.462838251499939, "grad_norm": 2.0608808994293213, "learning_rate": 1.6705678786755628e-05, "loss": 0.3739, "step": 27339 }, { "epoch": 4.463001510142443, "grad_norm": 2.7111217975616455, "learning_rate": 1.6705443370774516e-05, "loss": 0.4353, "step": 27340 }, { "epoch": 4.463164768784948, "grad_norm": 2.4393928050994873, "learning_rate": 1.670520794804102e-05, "loss": 0.3899, "step": 27341 }, { "epoch": 4.463328027427452, "grad_norm": 2.1151509284973145, "learning_rate": 1.6704972518555383e-05, "loss": 0.3735, "step": 27342 }, { "epoch": 4.4634912860699565, "grad_norm": 2.31367564201355, "learning_rate": 1.6704737082317844e-05, "loss": 0.4727, "step": 27343 }, { "epoch": 4.463654544712461, "grad_norm": 1.8925529718399048, "learning_rate": 1.6704501639328636e-05, "loss": 0.3492, "step": 27344 }, { "epoch": 4.463817803354965, "grad_norm": 2.4344418048858643, "learning_rate": 1.6704266189587992e-05, "loss": 0.4725, "step": 27345 }, { "epoch": 4.46398106199747, "grad_norm": 2.0164742469787598, "learning_rate": 1.6704030733096157e-05, "loss": 0.382, "step": 27346 }, { "epoch": 4.464144320639974, "grad_norm": 2.5482048988342285, "learning_rate": 1.6703795269853364e-05, "loss": 0.4148, "step": 27347 }, { "epoch": 4.464307579282479, "grad_norm": 2.2658870220184326, "learning_rate": 1.670355979985985e-05, "loss": 0.3831, "step": 27348 }, { "epoch": 4.464470837924983, "grad_norm": 1.638384461402893, "learning_rate": 1.6703324323115858e-05, "loss": 0.3919, "step": 27349 }, { "epoch": 4.464634096567487, "grad_norm": 1.8322906494140625, "learning_rate": 1.6703088839621616e-05, "loss": 0.3595, "step": 27350 }, { "epoch": 4.464797355209991, "grad_norm": 1.8492833375930786, "learning_rate": 1.6702853349377367e-05, "loss": 0.3226, "step": 27351 }, { "epoch": 4.4649606138524955, "grad_norm": 2.1771881580352783, "learning_rate": 1.6702617852383344e-05, "loss": 0.385, "step": 27352 }, { "epoch": 4.465123872495, "grad_norm": 1.9283032417297363, "learning_rate": 1.6702382348639786e-05, "loss": 0.3613, "step": 27353 }, { "epoch": 4.465287131137504, "grad_norm": 2.4261691570281982, "learning_rate": 1.6702146838146934e-05, "loss": 0.4915, "step": 27354 }, { "epoch": 4.465450389780009, "grad_norm": 1.5703535079956055, "learning_rate": 1.6701911320905022e-05, "loss": 0.3113, "step": 27355 }, { "epoch": 4.465613648422513, "grad_norm": 2.0971574783325195, "learning_rate": 1.6701675796914284e-05, "loss": 0.3801, "step": 27356 }, { "epoch": 4.465776907065018, "grad_norm": 1.927077054977417, "learning_rate": 1.6701440266174966e-05, "loss": 0.3722, "step": 27357 }, { "epoch": 4.465940165707522, "grad_norm": 2.5529801845550537, "learning_rate": 1.6701204728687292e-05, "loss": 0.4453, "step": 27358 }, { "epoch": 4.466103424350027, "grad_norm": 2.441500663757324, "learning_rate": 1.670096918445151e-05, "loss": 0.4181, "step": 27359 }, { "epoch": 4.466266682992531, "grad_norm": 2.1444907188415527, "learning_rate": 1.6700733633467855e-05, "loss": 0.3959, "step": 27360 }, { "epoch": 4.4664299416350355, "grad_norm": 2.2493515014648438, "learning_rate": 1.670049807573656e-05, "loss": 0.4273, "step": 27361 }, { "epoch": 4.46659320027754, "grad_norm": 2.384420871734619, "learning_rate": 1.6700262511257868e-05, "loss": 0.46, "step": 27362 }, { "epoch": 4.466756458920044, "grad_norm": 2.454991102218628, "learning_rate": 1.670002694003201e-05, "loss": 0.3649, "step": 27363 }, { "epoch": 4.466919717562549, "grad_norm": 2.3082783222198486, "learning_rate": 1.6699791362059233e-05, "loss": 0.4022, "step": 27364 }, { "epoch": 4.467082976205053, "grad_norm": 2.869957447052002, "learning_rate": 1.6699555777339763e-05, "loss": 0.4428, "step": 27365 }, { "epoch": 4.467246234847558, "grad_norm": 2.205808162689209, "learning_rate": 1.669932018587384e-05, "loss": 0.4085, "step": 27366 }, { "epoch": 4.467409493490061, "grad_norm": 2.3326821327209473, "learning_rate": 1.6699084587661708e-05, "loss": 0.4519, "step": 27367 }, { "epoch": 4.467572752132566, "grad_norm": 2.0369820594787598, "learning_rate": 1.6698848982703598e-05, "loss": 0.386, "step": 27368 }, { "epoch": 4.46773601077507, "grad_norm": 2.190873861312866, "learning_rate": 1.669861337099975e-05, "loss": 0.3597, "step": 27369 }, { "epoch": 4.4678992694175745, "grad_norm": 2.0169503688812256, "learning_rate": 1.66983777525504e-05, "loss": 0.4437, "step": 27370 }, { "epoch": 4.468062528060079, "grad_norm": 2.245914936065674, "learning_rate": 1.6698142127355785e-05, "loss": 0.4056, "step": 27371 }, { "epoch": 4.468225786702583, "grad_norm": 2.031074285507202, "learning_rate": 1.6697906495416138e-05, "loss": 0.3406, "step": 27372 }, { "epoch": 4.468389045345088, "grad_norm": 2.2086050510406494, "learning_rate": 1.6697670856731707e-05, "loss": 0.4254, "step": 27373 }, { "epoch": 4.468552303987592, "grad_norm": 2.3853211402893066, "learning_rate": 1.6697435211302722e-05, "loss": 0.4446, "step": 27374 }, { "epoch": 4.468715562630097, "grad_norm": 1.8423662185668945, "learning_rate": 1.669719955912942e-05, "loss": 0.3227, "step": 27375 }, { "epoch": 4.468878821272601, "grad_norm": 2.389058828353882, "learning_rate": 1.669696390021204e-05, "loss": 0.3833, "step": 27376 }, { "epoch": 4.469042079915106, "grad_norm": 2.1536872386932373, "learning_rate": 1.669672823455082e-05, "loss": 0.398, "step": 27377 }, { "epoch": 4.46920533855761, "grad_norm": 2.091710329055786, "learning_rate": 1.6696492562145996e-05, "loss": 0.4015, "step": 27378 }, { "epoch": 4.4693685972001145, "grad_norm": 2.431230306625366, "learning_rate": 1.6696256882997808e-05, "loss": 0.4331, "step": 27379 }, { "epoch": 4.469531855842619, "grad_norm": 2.3416924476623535, "learning_rate": 1.669602119710649e-05, "loss": 0.4667, "step": 27380 }, { "epoch": 4.469695114485123, "grad_norm": 1.719169020652771, "learning_rate": 1.6695785504472277e-05, "loss": 0.3288, "step": 27381 }, { "epoch": 4.469858373127628, "grad_norm": 2.1634390354156494, "learning_rate": 1.669554980509541e-05, "loss": 0.396, "step": 27382 }, { "epoch": 4.470021631770132, "grad_norm": 1.4858287572860718, "learning_rate": 1.669531409897613e-05, "loss": 0.297, "step": 27383 }, { "epoch": 4.470184890412636, "grad_norm": 2.04506254196167, "learning_rate": 1.669507838611467e-05, "loss": 0.4017, "step": 27384 }, { "epoch": 4.47034814905514, "grad_norm": 2.473010301589966, "learning_rate": 1.6694842666511263e-05, "loss": 0.4609, "step": 27385 }, { "epoch": 4.470511407697645, "grad_norm": 2.316331624984741, "learning_rate": 1.669460694016616e-05, "loss": 0.5169, "step": 27386 }, { "epoch": 4.470674666340149, "grad_norm": 2.4743082523345947, "learning_rate": 1.6694371207079584e-05, "loss": 0.4613, "step": 27387 }, { "epoch": 4.4708379249826535, "grad_norm": 2.27455472946167, "learning_rate": 1.669413546725178e-05, "loss": 0.376, "step": 27388 }, { "epoch": 4.471001183625158, "grad_norm": 2.417442560195923, "learning_rate": 1.6693899720682977e-05, "loss": 0.4317, "step": 27389 }, { "epoch": 4.471164442267662, "grad_norm": 2.0986883640289307, "learning_rate": 1.6693663967373423e-05, "loss": 0.3968, "step": 27390 }, { "epoch": 4.471327700910167, "grad_norm": 2.1763360500335693, "learning_rate": 1.6693428207323354e-05, "loss": 0.3864, "step": 27391 }, { "epoch": 4.471490959552671, "grad_norm": 2.5876762866973877, "learning_rate": 1.6693192440533002e-05, "loss": 0.4385, "step": 27392 }, { "epoch": 4.471654218195176, "grad_norm": 2.2671070098876953, "learning_rate": 1.6692956667002607e-05, "loss": 0.3853, "step": 27393 }, { "epoch": 4.47181747683768, "grad_norm": 1.8241565227508545, "learning_rate": 1.6692720886732407e-05, "loss": 0.3512, "step": 27394 }, { "epoch": 4.471980735480185, "grad_norm": 2.170433759689331, "learning_rate": 1.6692485099722638e-05, "loss": 0.4282, "step": 27395 }, { "epoch": 4.472143994122689, "grad_norm": 1.9993053674697876, "learning_rate": 1.669224930597354e-05, "loss": 0.33, "step": 27396 }, { "epoch": 4.4723072527651935, "grad_norm": 2.15140962600708, "learning_rate": 1.6692013505485344e-05, "loss": 0.4135, "step": 27397 }, { "epoch": 4.472470511407698, "grad_norm": 1.7803983688354492, "learning_rate": 1.6691777698258297e-05, "loss": 0.3519, "step": 27398 }, { "epoch": 4.472633770050202, "grad_norm": 1.8550810813903809, "learning_rate": 1.6691541884292632e-05, "loss": 0.4128, "step": 27399 }, { "epoch": 4.472797028692707, "grad_norm": 1.981210470199585, "learning_rate": 1.6691306063588583e-05, "loss": 0.3566, "step": 27400 }, { "epoch": 4.47296028733521, "grad_norm": 2.2273662090301514, "learning_rate": 1.669107023614639e-05, "loss": 0.3867, "step": 27401 }, { "epoch": 4.473123545977716, "grad_norm": 1.967625379562378, "learning_rate": 1.6690834401966294e-05, "loss": 0.3688, "step": 27402 }, { "epoch": 4.473286804620219, "grad_norm": 1.983541488647461, "learning_rate": 1.669059856104853e-05, "loss": 0.3776, "step": 27403 }, { "epoch": 4.473450063262724, "grad_norm": 2.174194097518921, "learning_rate": 1.669036271339333e-05, "loss": 0.3816, "step": 27404 }, { "epoch": 4.473613321905228, "grad_norm": 2.7906486988067627, "learning_rate": 1.6690126859000943e-05, "loss": 0.3648, "step": 27405 }, { "epoch": 4.4737765805477325, "grad_norm": 2.233961582183838, "learning_rate": 1.66898909978716e-05, "loss": 0.4548, "step": 27406 }, { "epoch": 4.473939839190237, "grad_norm": 2.703019142150879, "learning_rate": 1.6689655130005535e-05, "loss": 0.4183, "step": 27407 }, { "epoch": 4.474103097832741, "grad_norm": 1.8545842170715332, "learning_rate": 1.668941925540299e-05, "loss": 0.3996, "step": 27408 }, { "epoch": 4.474266356475246, "grad_norm": 2.3087072372436523, "learning_rate": 1.66891833740642e-05, "loss": 0.318, "step": 27409 }, { "epoch": 4.47442961511775, "grad_norm": 2.295720100402832, "learning_rate": 1.6688947485989406e-05, "loss": 0.4519, "step": 27410 }, { "epoch": 4.474592873760255, "grad_norm": 2.1804604530334473, "learning_rate": 1.668871159117884e-05, "loss": 0.3395, "step": 27411 }, { "epoch": 4.474756132402759, "grad_norm": 2.279984951019287, "learning_rate": 1.6688475689632748e-05, "loss": 0.4272, "step": 27412 }, { "epoch": 4.474919391045264, "grad_norm": 2.0729100704193115, "learning_rate": 1.6688239781351364e-05, "loss": 0.3631, "step": 27413 }, { "epoch": 4.475082649687768, "grad_norm": 2.2649574279785156, "learning_rate": 1.668800386633492e-05, "loss": 0.442, "step": 27414 }, { "epoch": 4.4752459083302725, "grad_norm": 2.0847983360290527, "learning_rate": 1.6687767944583658e-05, "loss": 0.3319, "step": 27415 }, { "epoch": 4.475409166972777, "grad_norm": 2.524771213531494, "learning_rate": 1.6687532016097817e-05, "loss": 0.3993, "step": 27416 }, { "epoch": 4.475572425615281, "grad_norm": 2.223511219024658, "learning_rate": 1.6687296080877634e-05, "loss": 0.4068, "step": 27417 }, { "epoch": 4.475735684257786, "grad_norm": 1.6788214445114136, "learning_rate": 1.6687060138923343e-05, "loss": 0.3323, "step": 27418 }, { "epoch": 4.47589894290029, "grad_norm": 2.532702922821045, "learning_rate": 1.6686824190235187e-05, "loss": 0.4173, "step": 27419 }, { "epoch": 4.476062201542794, "grad_norm": 2.204102039337158, "learning_rate": 1.66865882348134e-05, "loss": 0.4449, "step": 27420 }, { "epoch": 4.476225460185298, "grad_norm": 2.2083652019500732, "learning_rate": 1.6686352272658217e-05, "loss": 0.4286, "step": 27421 }, { "epoch": 4.476388718827803, "grad_norm": 1.9589382410049438, "learning_rate": 1.6686116303769884e-05, "loss": 0.3495, "step": 27422 }, { "epoch": 4.476551977470307, "grad_norm": 1.5306313037872314, "learning_rate": 1.668588032814863e-05, "loss": 0.2885, "step": 27423 }, { "epoch": 4.4767152361128115, "grad_norm": 2.353039026260376, "learning_rate": 1.6685644345794698e-05, "loss": 0.4001, "step": 27424 }, { "epoch": 4.476878494755316, "grad_norm": 1.9890544414520264, "learning_rate": 1.6685408356708325e-05, "loss": 0.3816, "step": 27425 }, { "epoch": 4.47704175339782, "grad_norm": 2.0695436000823975, "learning_rate": 1.6685172360889745e-05, "loss": 0.3777, "step": 27426 }, { "epoch": 4.477205012040325, "grad_norm": 2.1259424686431885, "learning_rate": 1.66849363583392e-05, "loss": 0.3377, "step": 27427 }, { "epoch": 4.477368270682829, "grad_norm": 2.4485557079315186, "learning_rate": 1.6684700349056924e-05, "loss": 0.4249, "step": 27428 }, { "epoch": 4.477531529325334, "grad_norm": 2.0431618690490723, "learning_rate": 1.6684464333043157e-05, "loss": 0.3562, "step": 27429 }, { "epoch": 4.477694787967838, "grad_norm": 1.8134936094284058, "learning_rate": 1.668422831029814e-05, "loss": 0.3999, "step": 27430 }, { "epoch": 4.477858046610343, "grad_norm": 1.7160743474960327, "learning_rate": 1.66839922808221e-05, "loss": 0.3329, "step": 27431 }, { "epoch": 4.478021305252847, "grad_norm": 2.04618239402771, "learning_rate": 1.668375624461529e-05, "loss": 0.376, "step": 27432 }, { "epoch": 4.4781845638953515, "grad_norm": 1.942146897315979, "learning_rate": 1.6683520201677933e-05, "loss": 0.3614, "step": 27433 }, { "epoch": 4.478347822537856, "grad_norm": 1.9254460334777832, "learning_rate": 1.6683284152010274e-05, "loss": 0.3479, "step": 27434 }, { "epoch": 4.47851108118036, "grad_norm": 2.300262451171875, "learning_rate": 1.668304809561255e-05, "loss": 0.4071, "step": 27435 }, { "epoch": 4.478674339822865, "grad_norm": 2.329097032546997, "learning_rate": 1.6682812032485e-05, "loss": 0.5045, "step": 27436 }, { "epoch": 4.478837598465368, "grad_norm": 2.2300000190734863, "learning_rate": 1.668257596262786e-05, "loss": 0.4129, "step": 27437 }, { "epoch": 4.479000857107873, "grad_norm": 1.9273914098739624, "learning_rate": 1.6682339886041364e-05, "loss": 0.3576, "step": 27438 }, { "epoch": 4.479164115750377, "grad_norm": 1.8572584390640259, "learning_rate": 1.6682103802725754e-05, "loss": 0.3925, "step": 27439 }, { "epoch": 4.479327374392882, "grad_norm": 1.9615639448165894, "learning_rate": 1.6681867712681274e-05, "loss": 0.3774, "step": 27440 }, { "epoch": 4.479490633035386, "grad_norm": 1.8719472885131836, "learning_rate": 1.6681631615908147e-05, "loss": 0.3411, "step": 27441 }, { "epoch": 4.4796538916778905, "grad_norm": 2.300121307373047, "learning_rate": 1.668139551240662e-05, "loss": 0.4127, "step": 27442 }, { "epoch": 4.479817150320395, "grad_norm": 2.265871047973633, "learning_rate": 1.6681159402176932e-05, "loss": 0.3922, "step": 27443 }, { "epoch": 4.479980408962899, "grad_norm": 2.5157816410064697, "learning_rate": 1.668092328521932e-05, "loss": 0.3987, "step": 27444 }, { "epoch": 4.480143667605404, "grad_norm": 1.7416332960128784, "learning_rate": 1.6680687161534013e-05, "loss": 0.3437, "step": 27445 }, { "epoch": 4.480306926247908, "grad_norm": 1.784895896911621, "learning_rate": 1.6680451031121264e-05, "loss": 0.3394, "step": 27446 }, { "epoch": 4.480470184890413, "grad_norm": 2.8916194438934326, "learning_rate": 1.66802148939813e-05, "loss": 0.416, "step": 27447 }, { "epoch": 4.480633443532917, "grad_norm": 1.882917046546936, "learning_rate": 1.667997875011436e-05, "loss": 0.3665, "step": 27448 }, { "epoch": 4.480796702175422, "grad_norm": 2.3416216373443604, "learning_rate": 1.667974259952068e-05, "loss": 0.444, "step": 27449 }, { "epoch": 4.480959960817926, "grad_norm": 2.211840867996216, "learning_rate": 1.6679506442200508e-05, "loss": 0.463, "step": 27450 }, { "epoch": 4.4811232194604305, "grad_norm": 2.1143178939819336, "learning_rate": 1.6679270278154072e-05, "loss": 0.3543, "step": 27451 }, { "epoch": 4.481286478102935, "grad_norm": 2.1939244270324707, "learning_rate": 1.667903410738161e-05, "loss": 0.4464, "step": 27452 }, { "epoch": 4.481449736745439, "grad_norm": 2.7436447143554688, "learning_rate": 1.6678797929883366e-05, "loss": 0.456, "step": 27453 }, { "epoch": 4.481612995387943, "grad_norm": 2.469717264175415, "learning_rate": 1.667856174565957e-05, "loss": 0.4277, "step": 27454 }, { "epoch": 4.481776254030447, "grad_norm": 2.4356629848480225, "learning_rate": 1.6678325554710467e-05, "loss": 0.3481, "step": 27455 }, { "epoch": 4.481939512672952, "grad_norm": 2.110043525695801, "learning_rate": 1.6678089357036292e-05, "loss": 0.4414, "step": 27456 }, { "epoch": 4.482102771315456, "grad_norm": 2.0514936447143555, "learning_rate": 1.6677853152637285e-05, "loss": 0.4131, "step": 27457 }, { "epoch": 4.482266029957961, "grad_norm": 2.6165452003479004, "learning_rate": 1.6677616941513677e-05, "loss": 0.4662, "step": 27458 }, { "epoch": 4.482429288600465, "grad_norm": 2.0718517303466797, "learning_rate": 1.667738072366571e-05, "loss": 0.3687, "step": 27459 }, { "epoch": 4.4825925472429695, "grad_norm": 2.383577585220337, "learning_rate": 1.6677144499093626e-05, "loss": 0.4031, "step": 27460 }, { "epoch": 4.482755805885474, "grad_norm": 2.661595344543457, "learning_rate": 1.667690826779766e-05, "loss": 0.4206, "step": 27461 }, { "epoch": 4.482919064527978, "grad_norm": 1.7976239919662476, "learning_rate": 1.6676672029778046e-05, "loss": 0.3737, "step": 27462 }, { "epoch": 4.483082323170483, "grad_norm": 2.087214946746826, "learning_rate": 1.6676435785035025e-05, "loss": 0.4306, "step": 27463 }, { "epoch": 4.483245581812987, "grad_norm": 1.9976636171340942, "learning_rate": 1.6676199533568837e-05, "loss": 0.406, "step": 27464 }, { "epoch": 4.483408840455492, "grad_norm": 2.011155843734741, "learning_rate": 1.667596327537972e-05, "loss": 0.3548, "step": 27465 }, { "epoch": 4.483572099097996, "grad_norm": 2.36138653755188, "learning_rate": 1.667572701046791e-05, "loss": 0.4722, "step": 27466 }, { "epoch": 4.483735357740501, "grad_norm": 2.1056556701660156, "learning_rate": 1.667549073883364e-05, "loss": 0.3617, "step": 27467 }, { "epoch": 4.483898616383005, "grad_norm": 1.9284462928771973, "learning_rate": 1.6675254460477153e-05, "loss": 0.3697, "step": 27468 }, { "epoch": 4.4840618750255095, "grad_norm": 1.96943199634552, "learning_rate": 1.6675018175398685e-05, "loss": 0.3693, "step": 27469 }, { "epoch": 4.484225133668014, "grad_norm": 2.305049180984497, "learning_rate": 1.667478188359848e-05, "loss": 0.3814, "step": 27470 }, { "epoch": 4.484388392310518, "grad_norm": 2.0734002590179443, "learning_rate": 1.667454558507677e-05, "loss": 0.3729, "step": 27471 }, { "epoch": 4.484551650953023, "grad_norm": 1.9627456665039062, "learning_rate": 1.6674309279833792e-05, "loss": 0.3808, "step": 27472 }, { "epoch": 4.484714909595526, "grad_norm": 1.9086847305297852, "learning_rate": 1.667407296786979e-05, "loss": 0.3413, "step": 27473 }, { "epoch": 4.484878168238031, "grad_norm": 2.014421224594116, "learning_rate": 1.6673836649184995e-05, "loss": 0.3397, "step": 27474 }, { "epoch": 4.485041426880535, "grad_norm": 2.7863786220550537, "learning_rate": 1.6673600323779654e-05, "loss": 0.5001, "step": 27475 }, { "epoch": 4.48520468552304, "grad_norm": 1.9993913173675537, "learning_rate": 1.667336399165399e-05, "loss": 0.3461, "step": 27476 }, { "epoch": 4.485367944165544, "grad_norm": 1.9304373264312744, "learning_rate": 1.6673127652808257e-05, "loss": 0.3545, "step": 27477 }, { "epoch": 4.4855312028080485, "grad_norm": 2.0351064205169678, "learning_rate": 1.667289130724269e-05, "loss": 0.352, "step": 27478 }, { "epoch": 4.485694461450553, "grad_norm": 1.7156376838684082, "learning_rate": 1.6672654954957513e-05, "loss": 0.332, "step": 27479 }, { "epoch": 4.485857720093057, "grad_norm": 1.8767958879470825, "learning_rate": 1.667241859595298e-05, "loss": 0.3314, "step": 27480 }, { "epoch": 4.486020978735562, "grad_norm": 1.8156981468200684, "learning_rate": 1.6672182230229322e-05, "loss": 0.3314, "step": 27481 }, { "epoch": 4.486184237378066, "grad_norm": 1.9562079906463623, "learning_rate": 1.6671945857786778e-05, "loss": 0.3336, "step": 27482 }, { "epoch": 4.486347496020571, "grad_norm": 2.094634532928467, "learning_rate": 1.6671709478625585e-05, "loss": 0.4277, "step": 27483 }, { "epoch": 4.486510754663075, "grad_norm": 3.011324167251587, "learning_rate": 1.6671473092745985e-05, "loss": 0.4277, "step": 27484 }, { "epoch": 4.48667401330558, "grad_norm": 2.2083122730255127, "learning_rate": 1.667123670014821e-05, "loss": 0.3773, "step": 27485 }, { "epoch": 4.486837271948084, "grad_norm": 2.402695894241333, "learning_rate": 1.6671000300832506e-05, "loss": 0.3413, "step": 27486 }, { "epoch": 4.4870005305905885, "grad_norm": 2.14656138420105, "learning_rate": 1.6670763894799106e-05, "loss": 0.4068, "step": 27487 }, { "epoch": 4.487163789233093, "grad_norm": 2.141345500946045, "learning_rate": 1.6670527482048246e-05, "loss": 0.4113, "step": 27488 }, { "epoch": 4.487327047875597, "grad_norm": 2.0528459548950195, "learning_rate": 1.6670291062580167e-05, "loss": 0.4198, "step": 27489 }, { "epoch": 4.487490306518101, "grad_norm": 1.9202477931976318, "learning_rate": 1.667005463639511e-05, "loss": 0.3672, "step": 27490 }, { "epoch": 4.487653565160605, "grad_norm": 3.0802876949310303, "learning_rate": 1.6669818203493306e-05, "loss": 0.4826, "step": 27491 }, { "epoch": 4.48781682380311, "grad_norm": 2.343560218811035, "learning_rate": 1.6669581763874995e-05, "loss": 0.3925, "step": 27492 }, { "epoch": 4.487980082445614, "grad_norm": 2.3249738216400146, "learning_rate": 1.666934531754042e-05, "loss": 0.456, "step": 27493 }, { "epoch": 4.488143341088119, "grad_norm": 1.5936973094940186, "learning_rate": 1.6669108864489816e-05, "loss": 0.3196, "step": 27494 }, { "epoch": 4.488306599730623, "grad_norm": 1.9824200868606567, "learning_rate": 1.666887240472342e-05, "loss": 0.369, "step": 27495 }, { "epoch": 4.4884698583731275, "grad_norm": 2.1820871829986572, "learning_rate": 1.6668635938241476e-05, "loss": 0.4224, "step": 27496 }, { "epoch": 4.488633117015632, "grad_norm": 2.045806884765625, "learning_rate": 1.666839946504421e-05, "loss": 0.3361, "step": 27497 }, { "epoch": 4.488796375658136, "grad_norm": 2.804994583129883, "learning_rate": 1.6668162985131874e-05, "loss": 0.4612, "step": 27498 }, { "epoch": 4.488959634300641, "grad_norm": 2.0460658073425293, "learning_rate": 1.6667926498504695e-05, "loss": 0.3838, "step": 27499 }, { "epoch": 4.489122892943145, "grad_norm": 2.2825939655303955, "learning_rate": 1.666769000516292e-05, "loss": 0.43, "step": 27500 }, { "epoch": 4.48928615158565, "grad_norm": 1.770001769065857, "learning_rate": 1.6667453505106777e-05, "loss": 0.3582, "step": 27501 }, { "epoch": 4.489449410228154, "grad_norm": 2.270822286605835, "learning_rate": 1.6667216998336514e-05, "loss": 0.4042, "step": 27502 }, { "epoch": 4.489612668870659, "grad_norm": 1.6392443180084229, "learning_rate": 1.6666980484852366e-05, "loss": 0.3091, "step": 27503 }, { "epoch": 4.489775927513163, "grad_norm": 2.1212515830993652, "learning_rate": 1.6666743964654567e-05, "loss": 0.3793, "step": 27504 }, { "epoch": 4.4899391861556674, "grad_norm": 2.222827434539795, "learning_rate": 1.666650743774336e-05, "loss": 0.3749, "step": 27505 }, { "epoch": 4.490102444798172, "grad_norm": 2.3984148502349854, "learning_rate": 1.6666270904118985e-05, "loss": 0.4105, "step": 27506 }, { "epoch": 4.490265703440675, "grad_norm": 1.8861950635910034, "learning_rate": 1.6666034363781673e-05, "loss": 0.3283, "step": 27507 }, { "epoch": 4.49042896208318, "grad_norm": 1.9192277193069458, "learning_rate": 1.666579781673167e-05, "loss": 0.3604, "step": 27508 }, { "epoch": 4.490592220725684, "grad_norm": 2.488839626312256, "learning_rate": 1.6665561262969207e-05, "loss": 0.3678, "step": 27509 }, { "epoch": 4.490755479368189, "grad_norm": 2.2444238662719727, "learning_rate": 1.6665324702494524e-05, "loss": 0.4684, "step": 27510 }, { "epoch": 4.490918738010693, "grad_norm": 2.277462959289551, "learning_rate": 1.6665088135307867e-05, "loss": 0.4396, "step": 27511 }, { "epoch": 4.491081996653198, "grad_norm": 1.7477902173995972, "learning_rate": 1.6664851561409463e-05, "loss": 0.3384, "step": 27512 }, { "epoch": 4.491245255295702, "grad_norm": 2.6204335689544678, "learning_rate": 1.6664614980799556e-05, "loss": 0.4227, "step": 27513 }, { "epoch": 4.4914085139382065, "grad_norm": 2.061091184616089, "learning_rate": 1.6664378393478384e-05, "loss": 0.367, "step": 27514 }, { "epoch": 4.491571772580711, "grad_norm": 2.231308698654175, "learning_rate": 1.6664141799446186e-05, "loss": 0.377, "step": 27515 }, { "epoch": 4.491735031223215, "grad_norm": 2.097532033920288, "learning_rate": 1.6663905198703198e-05, "loss": 0.4012, "step": 27516 }, { "epoch": 4.49189828986572, "grad_norm": 2.3444600105285645, "learning_rate": 1.6663668591249658e-05, "loss": 0.3589, "step": 27517 }, { "epoch": 4.492061548508224, "grad_norm": 1.7375515699386597, "learning_rate": 1.6663431977085805e-05, "loss": 0.3318, "step": 27518 }, { "epoch": 4.492224807150729, "grad_norm": 2.0053532123565674, "learning_rate": 1.6663195356211882e-05, "loss": 0.405, "step": 27519 }, { "epoch": 4.492388065793233, "grad_norm": 2.292268753051758, "learning_rate": 1.666295872862812e-05, "loss": 0.4369, "step": 27520 }, { "epoch": 4.492551324435738, "grad_norm": 2.6406900882720947, "learning_rate": 1.666272209433476e-05, "loss": 0.4272, "step": 27521 }, { "epoch": 4.492714583078242, "grad_norm": 2.160477876663208, "learning_rate": 1.666248545333204e-05, "loss": 0.4307, "step": 27522 }, { "epoch": 4.4928778417207464, "grad_norm": 1.9493136405944824, "learning_rate": 1.66622488056202e-05, "loss": 0.4013, "step": 27523 }, { "epoch": 4.493041100363251, "grad_norm": 2.6345512866973877, "learning_rate": 1.6662012151199478e-05, "loss": 0.4349, "step": 27524 }, { "epoch": 4.493204359005755, "grad_norm": 2.1660852432250977, "learning_rate": 1.6661775490070114e-05, "loss": 0.3603, "step": 27525 }, { "epoch": 4.493367617648259, "grad_norm": 1.8950611352920532, "learning_rate": 1.6661538822232338e-05, "loss": 0.3682, "step": 27526 }, { "epoch": 4.493530876290763, "grad_norm": 2.14313006401062, "learning_rate": 1.66613021476864e-05, "loss": 0.3484, "step": 27527 }, { "epoch": 4.493694134933268, "grad_norm": 2.072406053543091, "learning_rate": 1.6661065466432526e-05, "loss": 0.3403, "step": 27528 }, { "epoch": 4.493857393575772, "grad_norm": 2.378981590270996, "learning_rate": 1.6660828778470968e-05, "loss": 0.3899, "step": 27529 }, { "epoch": 4.494020652218277, "grad_norm": 1.9649871587753296, "learning_rate": 1.6660592083801953e-05, "loss": 0.3639, "step": 27530 }, { "epoch": 4.494183910860781, "grad_norm": 2.08742618560791, "learning_rate": 1.6660355382425724e-05, "loss": 0.3847, "step": 27531 }, { "epoch": 4.4943471695032855, "grad_norm": 1.7501338720321655, "learning_rate": 1.666011867434252e-05, "loss": 0.3545, "step": 27532 }, { "epoch": 4.49451042814579, "grad_norm": 2.380187511444092, "learning_rate": 1.6659881959552574e-05, "loss": 0.4099, "step": 27533 }, { "epoch": 4.494673686788294, "grad_norm": 1.760194182395935, "learning_rate": 1.6659645238056132e-05, "loss": 0.3403, "step": 27534 }, { "epoch": 4.494836945430799, "grad_norm": 2.398155450820923, "learning_rate": 1.665940850985343e-05, "loss": 0.4091, "step": 27535 }, { "epoch": 4.495000204073303, "grad_norm": 2.209174871444702, "learning_rate": 1.6659171774944706e-05, "loss": 0.4311, "step": 27536 }, { "epoch": 4.495163462715808, "grad_norm": 2.1526544094085693, "learning_rate": 1.6658935033330194e-05, "loss": 0.4148, "step": 27537 }, { "epoch": 4.495326721358312, "grad_norm": 1.8247658014297485, "learning_rate": 1.665869828501014e-05, "loss": 0.4001, "step": 27538 }, { "epoch": 4.495489980000817, "grad_norm": 2.6001105308532715, "learning_rate": 1.6658461529984774e-05, "loss": 0.4011, "step": 27539 }, { "epoch": 4.495653238643321, "grad_norm": 1.6599875688552856, "learning_rate": 1.6658224768254345e-05, "loss": 0.3638, "step": 27540 }, { "epoch": 4.495816497285825, "grad_norm": 1.8283724784851074, "learning_rate": 1.665798799981908e-05, "loss": 0.3613, "step": 27541 }, { "epoch": 4.49597975592833, "grad_norm": 2.371570587158203, "learning_rate": 1.6657751224679227e-05, "loss": 0.431, "step": 27542 }, { "epoch": 4.496143014570833, "grad_norm": 2.2703676223754883, "learning_rate": 1.6657514442835014e-05, "loss": 0.4449, "step": 27543 }, { "epoch": 4.496306273213338, "grad_norm": 2.327073812484741, "learning_rate": 1.6657277654286695e-05, "loss": 0.4157, "step": 27544 }, { "epoch": 4.496469531855842, "grad_norm": 2.1468541622161865, "learning_rate": 1.6657040859034492e-05, "loss": 0.3634, "step": 27545 }, { "epoch": 4.496632790498347, "grad_norm": 2.340877056121826, "learning_rate": 1.6656804057078654e-05, "loss": 0.4126, "step": 27546 }, { "epoch": 4.496796049140851, "grad_norm": 2.4671716690063477, "learning_rate": 1.6656567248419414e-05, "loss": 0.401, "step": 27547 }, { "epoch": 4.496959307783356, "grad_norm": 1.9040993452072144, "learning_rate": 1.6656330433057016e-05, "loss": 0.38, "step": 27548 }, { "epoch": 4.49712256642586, "grad_norm": 2.059994697570801, "learning_rate": 1.665609361099169e-05, "loss": 0.4291, "step": 27549 }, { "epoch": 4.4972858250683645, "grad_norm": 1.8697975873947144, "learning_rate": 1.6655856782223682e-05, "loss": 0.3422, "step": 27550 }, { "epoch": 4.497449083710869, "grad_norm": 2.2179627418518066, "learning_rate": 1.665561994675323e-05, "loss": 0.4171, "step": 27551 }, { "epoch": 4.497612342353373, "grad_norm": 2.1138761043548584, "learning_rate": 1.6655383104580566e-05, "loss": 0.3516, "step": 27552 }, { "epoch": 4.497775600995878, "grad_norm": 2.1585278511047363, "learning_rate": 1.6655146255705936e-05, "loss": 0.375, "step": 27553 }, { "epoch": 4.497938859638382, "grad_norm": 2.4272053241729736, "learning_rate": 1.6654909400129575e-05, "loss": 0.4023, "step": 27554 }, { "epoch": 4.498102118280887, "grad_norm": 1.662743091583252, "learning_rate": 1.665467253785172e-05, "loss": 0.3194, "step": 27555 }, { "epoch": 4.498265376923391, "grad_norm": 1.813063383102417, "learning_rate": 1.6654435668872615e-05, "loss": 0.3956, "step": 27556 }, { "epoch": 4.498428635565896, "grad_norm": 2.3050389289855957, "learning_rate": 1.6654198793192496e-05, "loss": 0.4136, "step": 27557 }, { "epoch": 4.4985918942084, "grad_norm": 2.1440322399139404, "learning_rate": 1.6653961910811598e-05, "loss": 0.4249, "step": 27558 }, { "epoch": 4.498755152850904, "grad_norm": 2.1356828212738037, "learning_rate": 1.665372502173016e-05, "loss": 0.3338, "step": 27559 }, { "epoch": 4.498918411493408, "grad_norm": 2.2785403728485107, "learning_rate": 1.6653488125948425e-05, "loss": 0.4136, "step": 27560 }, { "epoch": 4.499081670135912, "grad_norm": 1.9833828210830688, "learning_rate": 1.665325122346663e-05, "loss": 0.3841, "step": 27561 }, { "epoch": 4.499244928778417, "grad_norm": 2.069169282913208, "learning_rate": 1.665301431428501e-05, "loss": 0.4425, "step": 27562 }, { "epoch": 4.499408187420921, "grad_norm": 2.238461971282959, "learning_rate": 1.665277739840381e-05, "loss": 0.4496, "step": 27563 }, { "epoch": 4.499571446063426, "grad_norm": 2.0368096828460693, "learning_rate": 1.665254047582326e-05, "loss": 0.3916, "step": 27564 }, { "epoch": 4.49973470470593, "grad_norm": 2.2313766479492188, "learning_rate": 1.665230354654361e-05, "loss": 0.388, "step": 27565 }, { "epoch": 4.499897963348435, "grad_norm": 2.615286350250244, "learning_rate": 1.665206661056509e-05, "loss": 0.4725, "step": 27566 }, { "epoch": 4.500061221990939, "grad_norm": 2.0306239128112793, "learning_rate": 1.6651829667887937e-05, "loss": 0.3739, "step": 27567 }, { "epoch": 4.5002244806334435, "grad_norm": 2.041780710220337, "learning_rate": 1.6651592718512397e-05, "loss": 0.3681, "step": 27568 }, { "epoch": 4.500387739275948, "grad_norm": 1.9453411102294922, "learning_rate": 1.6651355762438704e-05, "loss": 0.3859, "step": 27569 }, { "epoch": 4.500550997918452, "grad_norm": 1.8622325658798218, "learning_rate": 1.6651118799667095e-05, "loss": 0.3249, "step": 27570 }, { "epoch": 4.500714256560957, "grad_norm": 1.7712324857711792, "learning_rate": 1.6650881830197817e-05, "loss": 0.3813, "step": 27571 }, { "epoch": 4.500877515203461, "grad_norm": 2.1315441131591797, "learning_rate": 1.6650644854031096e-05, "loss": 0.3709, "step": 27572 }, { "epoch": 4.501040773845966, "grad_norm": 2.3594412803649902, "learning_rate": 1.665040787116718e-05, "loss": 0.3636, "step": 27573 }, { "epoch": 4.50120403248847, "grad_norm": 1.8512518405914307, "learning_rate": 1.6650170881606308e-05, "loss": 0.3291, "step": 27574 }, { "epoch": 4.501367291130975, "grad_norm": 1.8184834718704224, "learning_rate": 1.6649933885348715e-05, "loss": 0.359, "step": 27575 }, { "epoch": 4.501530549773479, "grad_norm": 2.316960334777832, "learning_rate": 1.6649696882394635e-05, "loss": 0.4277, "step": 27576 }, { "epoch": 4.5016938084159825, "grad_norm": 2.0363075733184814, "learning_rate": 1.6649459872744316e-05, "loss": 0.3926, "step": 27577 }, { "epoch": 4.501857067058488, "grad_norm": 1.8407316207885742, "learning_rate": 1.6649222856397994e-05, "loss": 0.4194, "step": 27578 }, { "epoch": 4.502020325700991, "grad_norm": 2.071963310241699, "learning_rate": 1.66489858333559e-05, "loss": 0.3801, "step": 27579 }, { "epoch": 4.502183584343496, "grad_norm": 2.2291297912597656, "learning_rate": 1.6648748803618287e-05, "loss": 0.4416, "step": 27580 }, { "epoch": 4.502346842986, "grad_norm": 2.193796396255493, "learning_rate": 1.6648511767185382e-05, "loss": 0.4268, "step": 27581 }, { "epoch": 4.502510101628505, "grad_norm": 2.1430578231811523, "learning_rate": 1.664827472405743e-05, "loss": 0.4186, "step": 27582 }, { "epoch": 4.502673360271009, "grad_norm": 1.7447535991668701, "learning_rate": 1.6648037674234663e-05, "loss": 0.3435, "step": 27583 }, { "epoch": 4.502836618913514, "grad_norm": 2.074101686477661, "learning_rate": 1.6647800617717327e-05, "loss": 0.4067, "step": 27584 }, { "epoch": 4.502999877556018, "grad_norm": 1.678507924079895, "learning_rate": 1.6647563554505655e-05, "loss": 0.3306, "step": 27585 }, { "epoch": 4.5031631361985225, "grad_norm": 2.554957151412964, "learning_rate": 1.664732648459989e-05, "loss": 0.4606, "step": 27586 }, { "epoch": 4.503326394841027, "grad_norm": 1.7823306322097778, "learning_rate": 1.664708940800027e-05, "loss": 0.3521, "step": 27587 }, { "epoch": 4.503489653483531, "grad_norm": 1.94434654712677, "learning_rate": 1.6646852324707028e-05, "loss": 0.3449, "step": 27588 }, { "epoch": 4.503652912126036, "grad_norm": 2.4502620697021484, "learning_rate": 1.664661523472041e-05, "loss": 0.4456, "step": 27589 }, { "epoch": 4.50381617076854, "grad_norm": 1.829287052154541, "learning_rate": 1.6646378138040655e-05, "loss": 0.3317, "step": 27590 }, { "epoch": 4.503979429411045, "grad_norm": 2.170548915863037, "learning_rate": 1.6646141034667997e-05, "loss": 0.3527, "step": 27591 }, { "epoch": 4.504142688053549, "grad_norm": 2.5245888233184814, "learning_rate": 1.6645903924602677e-05, "loss": 0.4077, "step": 27592 }, { "epoch": 4.5043059466960536, "grad_norm": 2.055673837661743, "learning_rate": 1.664566680784493e-05, "loss": 0.3321, "step": 27593 }, { "epoch": 4.504469205338558, "grad_norm": 1.9800519943237305, "learning_rate": 1.6645429684395002e-05, "loss": 0.3691, "step": 27594 }, { "epoch": 4.504632463981062, "grad_norm": 1.9154558181762695, "learning_rate": 1.664519255425313e-05, "loss": 0.3672, "step": 27595 }, { "epoch": 4.504795722623566, "grad_norm": 2.3918299674987793, "learning_rate": 1.6644955417419545e-05, "loss": 0.4213, "step": 27596 }, { "epoch": 4.50495898126607, "grad_norm": 2.555607557296753, "learning_rate": 1.66447182738945e-05, "loss": 0.4003, "step": 27597 }, { "epoch": 4.505122239908575, "grad_norm": 1.786171555519104, "learning_rate": 1.664448112367822e-05, "loss": 0.3573, "step": 27598 }, { "epoch": 4.505285498551079, "grad_norm": 2.2712278366088867, "learning_rate": 1.6644243966770948e-05, "loss": 0.3948, "step": 27599 }, { "epoch": 4.505448757193584, "grad_norm": 2.222395658493042, "learning_rate": 1.6644006803172926e-05, "loss": 0.4383, "step": 27600 }, { "epoch": 4.505612015836088, "grad_norm": 2.440401554107666, "learning_rate": 1.6643769632884387e-05, "loss": 0.4677, "step": 27601 }, { "epoch": 4.505775274478593, "grad_norm": 2.069974660873413, "learning_rate": 1.664353245590558e-05, "loss": 0.4304, "step": 27602 }, { "epoch": 4.505938533121097, "grad_norm": 1.845879316329956, "learning_rate": 1.6643295272236735e-05, "loss": 0.3757, "step": 27603 }, { "epoch": 4.5061017917636015, "grad_norm": 2.409294843673706, "learning_rate": 1.6643058081878093e-05, "loss": 0.3766, "step": 27604 }, { "epoch": 4.506265050406106, "grad_norm": 2.3548452854156494, "learning_rate": 1.6642820884829896e-05, "loss": 0.4214, "step": 27605 }, { "epoch": 4.50642830904861, "grad_norm": 2.3261072635650635, "learning_rate": 1.664258368109238e-05, "loss": 0.4272, "step": 27606 }, { "epoch": 4.506591567691115, "grad_norm": 2.285726547241211, "learning_rate": 1.6642346470665777e-05, "loss": 0.3554, "step": 27607 }, { "epoch": 4.506754826333619, "grad_norm": 1.9709794521331787, "learning_rate": 1.6642109253550337e-05, "loss": 0.3579, "step": 27608 }, { "epoch": 4.506918084976124, "grad_norm": 2.2280938625335693, "learning_rate": 1.6641872029746297e-05, "loss": 0.4603, "step": 27609 }, { "epoch": 4.507081343618628, "grad_norm": 1.9350873231887817, "learning_rate": 1.6641634799253892e-05, "loss": 0.3546, "step": 27610 }, { "epoch": 4.5072446022611325, "grad_norm": 2.1537482738494873, "learning_rate": 1.6641397562073362e-05, "loss": 0.4295, "step": 27611 }, { "epoch": 4.507407860903637, "grad_norm": 1.896573543548584, "learning_rate": 1.664116031820495e-05, "loss": 0.3981, "step": 27612 }, { "epoch": 4.5075711195461405, "grad_norm": 2.1439692974090576, "learning_rate": 1.6640923067648884e-05, "loss": 0.4016, "step": 27613 }, { "epoch": 4.507734378188645, "grad_norm": 1.8565624952316284, "learning_rate": 1.664068581040542e-05, "loss": 0.3806, "step": 27614 }, { "epoch": 4.507897636831149, "grad_norm": 1.9279755353927612, "learning_rate": 1.664044854647478e-05, "loss": 0.4156, "step": 27615 }, { "epoch": 4.508060895473654, "grad_norm": 2.3707189559936523, "learning_rate": 1.664021127585721e-05, "loss": 0.4281, "step": 27616 }, { "epoch": 4.508224154116158, "grad_norm": 1.8794375658035278, "learning_rate": 1.663997399855295e-05, "loss": 0.3994, "step": 27617 }, { "epoch": 4.508387412758663, "grad_norm": 2.1171622276306152, "learning_rate": 1.663973671456224e-05, "loss": 0.3877, "step": 27618 }, { "epoch": 4.508550671401167, "grad_norm": 1.9004143476486206, "learning_rate": 1.6639499423885315e-05, "loss": 0.3446, "step": 27619 }, { "epoch": 4.508713930043672, "grad_norm": 2.111396074295044, "learning_rate": 1.6639262126522417e-05, "loss": 0.3703, "step": 27620 }, { "epoch": 4.508877188686176, "grad_norm": 2.1323184967041016, "learning_rate": 1.6639024822473783e-05, "loss": 0.4212, "step": 27621 }, { "epoch": 4.5090404473286805, "grad_norm": 1.6309301853179932, "learning_rate": 1.6638787511739653e-05, "loss": 0.3182, "step": 27622 }, { "epoch": 4.509203705971185, "grad_norm": 2.478243827819824, "learning_rate": 1.6638550194320268e-05, "loss": 0.4641, "step": 27623 }, { "epoch": 4.509366964613689, "grad_norm": 1.9544802904129028, "learning_rate": 1.6638312870215862e-05, "loss": 0.3575, "step": 27624 }, { "epoch": 4.509530223256194, "grad_norm": 1.700825572013855, "learning_rate": 1.6638075539426675e-05, "loss": 0.3537, "step": 27625 }, { "epoch": 4.509693481898698, "grad_norm": 1.949966549873352, "learning_rate": 1.6637838201952952e-05, "loss": 0.4069, "step": 27626 }, { "epoch": 4.509856740541203, "grad_norm": 1.9732860326766968, "learning_rate": 1.6637600857794926e-05, "loss": 0.3342, "step": 27627 }, { "epoch": 4.510019999183707, "grad_norm": 2.29374098777771, "learning_rate": 1.6637363506952837e-05, "loss": 0.3711, "step": 27628 }, { "epoch": 4.5101832578262115, "grad_norm": 2.099273920059204, "learning_rate": 1.6637126149426923e-05, "loss": 0.3849, "step": 27629 }, { "epoch": 4.510346516468715, "grad_norm": 1.970659852027893, "learning_rate": 1.6636888785217425e-05, "loss": 0.3629, "step": 27630 }, { "epoch": 4.51050977511122, "grad_norm": 2.1328976154327393, "learning_rate": 1.6636651414324586e-05, "loss": 0.3843, "step": 27631 }, { "epoch": 4.510673033753724, "grad_norm": 1.6424516439437866, "learning_rate": 1.6636414036748637e-05, "loss": 0.3221, "step": 27632 }, { "epoch": 4.510836292396228, "grad_norm": 2.495635747909546, "learning_rate": 1.6636176652489824e-05, "loss": 0.4284, "step": 27633 }, { "epoch": 4.510999551038733, "grad_norm": 2.0280721187591553, "learning_rate": 1.6635939261548383e-05, "loss": 0.3603, "step": 27634 }, { "epoch": 4.511162809681237, "grad_norm": 2.700929880142212, "learning_rate": 1.663570186392455e-05, "loss": 0.8029, "step": 27635 }, { "epoch": 4.511326068323742, "grad_norm": 2.8825581073760986, "learning_rate": 1.6635464459618568e-05, "loss": 0.4573, "step": 27636 }, { "epoch": 4.511489326966246, "grad_norm": 1.691787600517273, "learning_rate": 1.6635227048630672e-05, "loss": 0.3427, "step": 27637 }, { "epoch": 4.511652585608751, "grad_norm": 2.24554181098938, "learning_rate": 1.6634989630961106e-05, "loss": 0.4622, "step": 27638 }, { "epoch": 4.511815844251255, "grad_norm": 1.7813552618026733, "learning_rate": 1.663475220661011e-05, "loss": 0.3314, "step": 27639 }, { "epoch": 4.5119791028937595, "grad_norm": 1.6627521514892578, "learning_rate": 1.663451477557792e-05, "loss": 0.3371, "step": 27640 }, { "epoch": 4.512142361536264, "grad_norm": 1.9706486463546753, "learning_rate": 1.6634277337864774e-05, "loss": 0.3415, "step": 27641 }, { "epoch": 4.512305620178768, "grad_norm": 2.4447414875030518, "learning_rate": 1.6634039893470912e-05, "loss": 0.3776, "step": 27642 }, { "epoch": 4.512468878821273, "grad_norm": 2.176309108734131, "learning_rate": 1.6633802442396572e-05, "loss": 0.3766, "step": 27643 }, { "epoch": 4.512632137463777, "grad_norm": 1.852258563041687, "learning_rate": 1.6633564984642e-05, "loss": 0.388, "step": 27644 }, { "epoch": 4.512795396106282, "grad_norm": 2.167100191116333, "learning_rate": 1.6633327520207425e-05, "loss": 0.3452, "step": 27645 }, { "epoch": 4.512958654748786, "grad_norm": 2.412163257598877, "learning_rate": 1.6633090049093092e-05, "loss": 0.4091, "step": 27646 }, { "epoch": 4.51312191339129, "grad_norm": 2.2828781604766846, "learning_rate": 1.663285257129924e-05, "loss": 0.3959, "step": 27647 }, { "epoch": 4.513285172033795, "grad_norm": 2.018023729324341, "learning_rate": 1.6632615086826107e-05, "loss": 0.3883, "step": 27648 }, { "epoch": 4.5134484306762985, "grad_norm": 2.4415266513824463, "learning_rate": 1.6632377595673932e-05, "loss": 0.3906, "step": 27649 }, { "epoch": 4.513611689318803, "grad_norm": 2.030806303024292, "learning_rate": 1.6632140097842953e-05, "loss": 0.3877, "step": 27650 }, { "epoch": 4.513774947961307, "grad_norm": 2.435739517211914, "learning_rate": 1.6631902593333415e-05, "loss": 0.4681, "step": 27651 }, { "epoch": 4.513938206603812, "grad_norm": 2.4605324268341064, "learning_rate": 1.663166508214555e-05, "loss": 0.3979, "step": 27652 }, { "epoch": 4.514101465246316, "grad_norm": 2.0172362327575684, "learning_rate": 1.6631427564279602e-05, "loss": 0.4391, "step": 27653 }, { "epoch": 4.514264723888821, "grad_norm": 2.3298416137695312, "learning_rate": 1.6631190039735803e-05, "loss": 0.3875, "step": 27654 }, { "epoch": 4.514427982531325, "grad_norm": 2.0640182495117188, "learning_rate": 1.6630952508514403e-05, "loss": 0.3917, "step": 27655 }, { "epoch": 4.51459124117383, "grad_norm": 2.052098512649536, "learning_rate": 1.6630714970615632e-05, "loss": 0.3825, "step": 27656 }, { "epoch": 4.514754499816334, "grad_norm": 2.487149953842163, "learning_rate": 1.6630477426039734e-05, "loss": 0.4824, "step": 27657 }, { "epoch": 4.5149177584588385, "grad_norm": 1.8523006439208984, "learning_rate": 1.6630239874786946e-05, "loss": 0.4028, "step": 27658 }, { "epoch": 4.515081017101343, "grad_norm": 2.197641611099243, "learning_rate": 1.663000231685751e-05, "loss": 0.4098, "step": 27659 }, { "epoch": 4.515244275743847, "grad_norm": 2.059866189956665, "learning_rate": 1.6629764752251664e-05, "loss": 0.3681, "step": 27660 }, { "epoch": 4.515407534386352, "grad_norm": 2.0970206260681152, "learning_rate": 1.6629527180969644e-05, "loss": 0.3797, "step": 27661 }, { "epoch": 4.515570793028856, "grad_norm": 1.6816200017929077, "learning_rate": 1.6629289603011693e-05, "loss": 0.3381, "step": 27662 }, { "epoch": 4.515734051671361, "grad_norm": 1.8181730508804321, "learning_rate": 1.6629052018378052e-05, "loss": 0.3767, "step": 27663 }, { "epoch": 4.515897310313865, "grad_norm": 2.5486738681793213, "learning_rate": 1.6628814427068954e-05, "loss": 0.4735, "step": 27664 }, { "epoch": 4.5160605689563695, "grad_norm": 1.5739814043045044, "learning_rate": 1.662857682908464e-05, "loss": 0.3354, "step": 27665 }, { "epoch": 4.516223827598873, "grad_norm": 1.8201695680618286, "learning_rate": 1.6628339224425355e-05, "loss": 0.3391, "step": 27666 }, { "epoch": 4.5163870862413775, "grad_norm": 1.978411078453064, "learning_rate": 1.662810161309133e-05, "loss": 0.362, "step": 27667 }, { "epoch": 4.516550344883882, "grad_norm": 1.81136155128479, "learning_rate": 1.6627863995082813e-05, "loss": 0.3835, "step": 27668 }, { "epoch": 4.516713603526386, "grad_norm": 2.4104557037353516, "learning_rate": 1.6627626370400035e-05, "loss": 0.4151, "step": 27669 }, { "epoch": 4.516876862168891, "grad_norm": 1.8915261030197144, "learning_rate": 1.6627388739043243e-05, "loss": 0.3423, "step": 27670 }, { "epoch": 4.517040120811395, "grad_norm": 1.9022971391677856, "learning_rate": 1.662715110101267e-05, "loss": 0.3226, "step": 27671 }, { "epoch": 4.5172033794539, "grad_norm": 2.0297982692718506, "learning_rate": 1.662691345630856e-05, "loss": 0.327, "step": 27672 }, { "epoch": 4.517366638096404, "grad_norm": 2.0704474449157715, "learning_rate": 1.6626675804931144e-05, "loss": 0.3568, "step": 27673 }, { "epoch": 4.517529896738909, "grad_norm": 1.597583293914795, "learning_rate": 1.662643814688067e-05, "loss": 0.3359, "step": 27674 }, { "epoch": 4.517693155381413, "grad_norm": 2.0353469848632812, "learning_rate": 1.6626200482157378e-05, "loss": 0.3772, "step": 27675 }, { "epoch": 4.5178564140239175, "grad_norm": 1.917299747467041, "learning_rate": 1.6625962810761498e-05, "loss": 0.4284, "step": 27676 }, { "epoch": 4.518019672666422, "grad_norm": 2.037644863128662, "learning_rate": 1.662572513269328e-05, "loss": 0.3621, "step": 27677 }, { "epoch": 4.518182931308926, "grad_norm": 2.3386294841766357, "learning_rate": 1.6625487447952954e-05, "loss": 0.4724, "step": 27678 }, { "epoch": 4.518346189951431, "grad_norm": 2.1252949237823486, "learning_rate": 1.662524975654077e-05, "loss": 0.3384, "step": 27679 }, { "epoch": 4.518509448593935, "grad_norm": 1.9544428586959839, "learning_rate": 1.6625012058456956e-05, "loss": 0.3594, "step": 27680 }, { "epoch": 4.51867270723644, "grad_norm": 2.1898293495178223, "learning_rate": 1.662477435370176e-05, "loss": 0.4219, "step": 27681 }, { "epoch": 4.518835965878944, "grad_norm": 1.783836841583252, "learning_rate": 1.6624536642275416e-05, "loss": 0.35, "step": 27682 }, { "epoch": 4.518999224521448, "grad_norm": 1.9278067350387573, "learning_rate": 1.6624298924178168e-05, "loss": 0.3605, "step": 27683 }, { "epoch": 4.519162483163953, "grad_norm": 1.9286903142929077, "learning_rate": 1.662406119941025e-05, "loss": 0.3476, "step": 27684 }, { "epoch": 4.5193257418064565, "grad_norm": 2.000251293182373, "learning_rate": 1.6623823467971905e-05, "loss": 0.3333, "step": 27685 }, { "epoch": 4.519489000448961, "grad_norm": 1.9690426588058472, "learning_rate": 1.662358572986337e-05, "loss": 0.3845, "step": 27686 }, { "epoch": 4.519652259091465, "grad_norm": 1.972810983657837, "learning_rate": 1.6623347985084887e-05, "loss": 0.3876, "step": 27687 }, { "epoch": 4.51981551773397, "grad_norm": 2.0970001220703125, "learning_rate": 1.6623110233636695e-05, "loss": 0.4076, "step": 27688 }, { "epoch": 4.519978776376474, "grad_norm": 2.121047019958496, "learning_rate": 1.662287247551903e-05, "loss": 0.3933, "step": 27689 }, { "epoch": 4.520142035018979, "grad_norm": 1.608975887298584, "learning_rate": 1.6622634710732138e-05, "loss": 0.3382, "step": 27690 }, { "epoch": 4.520305293661483, "grad_norm": 2.1720569133758545, "learning_rate": 1.6622396939276252e-05, "loss": 0.3703, "step": 27691 }, { "epoch": 4.520468552303988, "grad_norm": 2.0644235610961914, "learning_rate": 1.6622159161151616e-05, "loss": 0.339, "step": 27692 }, { "epoch": 4.520631810946492, "grad_norm": 2.033950090408325, "learning_rate": 1.6621921376358468e-05, "loss": 0.3878, "step": 27693 }, { "epoch": 4.5207950695889965, "grad_norm": 2.177642822265625, "learning_rate": 1.6621683584897047e-05, "loss": 0.4324, "step": 27694 }, { "epoch": 4.520958328231501, "grad_norm": 1.975577473640442, "learning_rate": 1.662144578676759e-05, "loss": 0.3748, "step": 27695 }, { "epoch": 4.521121586874005, "grad_norm": 2.6018106937408447, "learning_rate": 1.662120798197034e-05, "loss": 0.3662, "step": 27696 }, { "epoch": 4.52128484551651, "grad_norm": 2.3380987644195557, "learning_rate": 1.6620970170505534e-05, "loss": 0.3863, "step": 27697 }, { "epoch": 4.521448104159014, "grad_norm": 2.7592058181762695, "learning_rate": 1.6620732352373413e-05, "loss": 0.502, "step": 27698 }, { "epoch": 4.521611362801519, "grad_norm": 2.297239303588867, "learning_rate": 1.662049452757422e-05, "loss": 0.4085, "step": 27699 }, { "epoch": 4.521774621444022, "grad_norm": 2.492110013961792, "learning_rate": 1.6620256696108187e-05, "loss": 0.4021, "step": 27700 }, { "epoch": 4.5219378800865275, "grad_norm": 2.556138753890991, "learning_rate": 1.662001885797556e-05, "loss": 0.4324, "step": 27701 }, { "epoch": 4.522101138729031, "grad_norm": 2.478163480758667, "learning_rate": 1.661978101317657e-05, "loss": 0.5129, "step": 27702 }, { "epoch": 4.5222643973715355, "grad_norm": 2.28871750831604, "learning_rate": 1.6619543161711468e-05, "loss": 0.3597, "step": 27703 }, { "epoch": 4.52242765601404, "grad_norm": 1.5655193328857422, "learning_rate": 1.6619305303580487e-05, "loss": 0.3316, "step": 27704 }, { "epoch": 4.522590914656544, "grad_norm": 2.28930401802063, "learning_rate": 1.6619067438783868e-05, "loss": 0.4118, "step": 27705 }, { "epoch": 4.522754173299049, "grad_norm": 2.5637433528900146, "learning_rate": 1.6618829567321845e-05, "loss": 0.4566, "step": 27706 }, { "epoch": 4.522917431941553, "grad_norm": 1.8061062097549438, "learning_rate": 1.661859168919467e-05, "loss": 0.2857, "step": 27707 }, { "epoch": 4.523080690584058, "grad_norm": 2.2355148792266846, "learning_rate": 1.6618353804402567e-05, "loss": 0.4162, "step": 27708 }, { "epoch": 4.523243949226562, "grad_norm": 2.275728702545166, "learning_rate": 1.6618115912945785e-05, "loss": 0.4264, "step": 27709 }, { "epoch": 4.523407207869067, "grad_norm": 2.2699670791625977, "learning_rate": 1.6617878014824566e-05, "loss": 0.4035, "step": 27710 }, { "epoch": 4.523570466511571, "grad_norm": 2.3459346294403076, "learning_rate": 1.6617640110039142e-05, "loss": 0.3977, "step": 27711 }, { "epoch": 4.5237337251540755, "grad_norm": 2.155134677886963, "learning_rate": 1.6617402198589758e-05, "loss": 0.3935, "step": 27712 }, { "epoch": 4.52389698379658, "grad_norm": 2.095123291015625, "learning_rate": 1.6617164280476653e-05, "loss": 0.3347, "step": 27713 }, { "epoch": 4.524060242439084, "grad_norm": 2.08815336227417, "learning_rate": 1.6616926355700066e-05, "loss": 0.3629, "step": 27714 }, { "epoch": 4.524223501081589, "grad_norm": 2.487131357192993, "learning_rate": 1.6616688424260233e-05, "loss": 0.447, "step": 27715 }, { "epoch": 4.524386759724093, "grad_norm": 2.4359078407287598, "learning_rate": 1.6616450486157395e-05, "loss": 0.3846, "step": 27716 }, { "epoch": 4.524550018366598, "grad_norm": 2.1169865131378174, "learning_rate": 1.6616212541391796e-05, "loss": 0.3611, "step": 27717 }, { "epoch": 4.524713277009102, "grad_norm": 2.452808141708374, "learning_rate": 1.6615974589963673e-05, "loss": 0.4324, "step": 27718 }, { "epoch": 4.524876535651606, "grad_norm": 2.32130765914917, "learning_rate": 1.6615736631873263e-05, "loss": 0.4518, "step": 27719 }, { "epoch": 4.52503979429411, "grad_norm": 2.2295496463775635, "learning_rate": 1.661549866712081e-05, "loss": 0.4048, "step": 27720 }, { "epoch": 4.5252030529366145, "grad_norm": 2.2760589122772217, "learning_rate": 1.661526069570655e-05, "loss": 0.4323, "step": 27721 }, { "epoch": 4.525366311579119, "grad_norm": 2.1881654262542725, "learning_rate": 1.6615022717630727e-05, "loss": 0.3622, "step": 27722 }, { "epoch": 4.525529570221623, "grad_norm": 2.10339093208313, "learning_rate": 1.661478473289358e-05, "loss": 0.3909, "step": 27723 }, { "epoch": 4.525692828864128, "grad_norm": 2.0861315727233887, "learning_rate": 1.661454674149534e-05, "loss": 0.4419, "step": 27724 }, { "epoch": 4.525856087506632, "grad_norm": 2.376481533050537, "learning_rate": 1.6614308743436254e-05, "loss": 0.4239, "step": 27725 }, { "epoch": 4.526019346149137, "grad_norm": 2.4828710556030273, "learning_rate": 1.6614070738716565e-05, "loss": 0.3942, "step": 27726 }, { "epoch": 4.526182604791641, "grad_norm": 2.165858268737793, "learning_rate": 1.6613832727336507e-05, "loss": 0.4124, "step": 27727 }, { "epoch": 4.526345863434146, "grad_norm": 2.3580026626586914, "learning_rate": 1.661359470929632e-05, "loss": 0.4358, "step": 27728 }, { "epoch": 4.52650912207665, "grad_norm": 2.294633388519287, "learning_rate": 1.6613356684596245e-05, "loss": 0.3314, "step": 27729 }, { "epoch": 4.5266723807191545, "grad_norm": 2.3353962898254395, "learning_rate": 1.661311865323652e-05, "loss": 0.3714, "step": 27730 }, { "epoch": 4.526835639361659, "grad_norm": 2.2628347873687744, "learning_rate": 1.6612880615217387e-05, "loss": 0.4144, "step": 27731 }, { "epoch": 4.526998898004163, "grad_norm": 2.490579843521118, "learning_rate": 1.6612642570539088e-05, "loss": 0.4793, "step": 27732 }, { "epoch": 4.527162156646668, "grad_norm": 2.2237913608551025, "learning_rate": 1.6612404519201857e-05, "loss": 0.3584, "step": 27733 }, { "epoch": 4.527325415289172, "grad_norm": 1.695725679397583, "learning_rate": 1.6612166461205937e-05, "loss": 0.3475, "step": 27734 }, { "epoch": 4.527488673931677, "grad_norm": 2.116929292678833, "learning_rate": 1.661192839655157e-05, "loss": 0.3803, "step": 27735 }, { "epoch": 4.52765193257418, "grad_norm": 1.9302644729614258, "learning_rate": 1.661169032523899e-05, "loss": 0.3611, "step": 27736 }, { "epoch": 4.5278151912166855, "grad_norm": 2.3153674602508545, "learning_rate": 1.6611452247268437e-05, "loss": 0.3783, "step": 27737 }, { "epoch": 4.527978449859189, "grad_norm": 2.2030136585235596, "learning_rate": 1.6611214162640158e-05, "loss": 0.3936, "step": 27738 }, { "epoch": 4.5281417085016935, "grad_norm": 1.9689890146255493, "learning_rate": 1.6610976071354386e-05, "loss": 0.3427, "step": 27739 }, { "epoch": 4.528304967144198, "grad_norm": 2.3151025772094727, "learning_rate": 1.6610737973411365e-05, "loss": 0.442, "step": 27740 }, { "epoch": 4.528468225786702, "grad_norm": 2.1876015663146973, "learning_rate": 1.6610499868811327e-05, "loss": 0.4059, "step": 27741 }, { "epoch": 4.528631484429207, "grad_norm": 2.3054850101470947, "learning_rate": 1.6610261757554524e-05, "loss": 0.4351, "step": 27742 }, { "epoch": 4.528794743071711, "grad_norm": 1.8276944160461426, "learning_rate": 1.6610023639641184e-05, "loss": 0.3228, "step": 27743 }, { "epoch": 4.528958001714216, "grad_norm": 2.343953847885132, "learning_rate": 1.6609785515071557e-05, "loss": 0.4043, "step": 27744 }, { "epoch": 4.52912126035672, "grad_norm": 2.0539913177490234, "learning_rate": 1.660954738384587e-05, "loss": 0.3742, "step": 27745 }, { "epoch": 4.529284518999225, "grad_norm": 2.0111210346221924, "learning_rate": 1.6609309245964377e-05, "loss": 0.4001, "step": 27746 }, { "epoch": 4.529447777641729, "grad_norm": 2.205171585083008, "learning_rate": 1.660907110142731e-05, "loss": 0.4948, "step": 27747 }, { "epoch": 4.5296110362842334, "grad_norm": 1.887243628501892, "learning_rate": 1.6608832950234913e-05, "loss": 0.3871, "step": 27748 }, { "epoch": 4.529774294926738, "grad_norm": 2.137136697769165, "learning_rate": 1.660859479238742e-05, "loss": 0.4233, "step": 27749 }, { "epoch": 4.529937553569242, "grad_norm": 2.452052354812622, "learning_rate": 1.660835662788507e-05, "loss": 0.415, "step": 27750 }, { "epoch": 4.530100812211747, "grad_norm": 2.0435283184051514, "learning_rate": 1.6608118456728114e-05, "loss": 0.373, "step": 27751 }, { "epoch": 4.530264070854251, "grad_norm": 1.9417136907577515, "learning_rate": 1.6607880278916778e-05, "loss": 0.3919, "step": 27752 }, { "epoch": 4.530427329496755, "grad_norm": 2.0835652351379395, "learning_rate": 1.6607642094451314e-05, "loss": 0.3759, "step": 27753 }, { "epoch": 4.53059058813926, "grad_norm": 2.003056764602661, "learning_rate": 1.6607403903331952e-05, "loss": 0.4184, "step": 27754 }, { "epoch": 4.530753846781764, "grad_norm": 2.0742921829223633, "learning_rate": 1.6607165705558936e-05, "loss": 0.4122, "step": 27755 }, { "epoch": 4.530917105424268, "grad_norm": 1.7252044677734375, "learning_rate": 1.6606927501132507e-05, "loss": 0.3891, "step": 27756 }, { "epoch": 4.5310803640667725, "grad_norm": 2.1128342151641846, "learning_rate": 1.6606689290052905e-05, "loss": 0.4179, "step": 27757 }, { "epoch": 4.531243622709277, "grad_norm": 2.0929298400878906, "learning_rate": 1.6606451072320368e-05, "loss": 0.3844, "step": 27758 }, { "epoch": 4.531406881351781, "grad_norm": 1.7261488437652588, "learning_rate": 1.6606212847935135e-05, "loss": 0.3726, "step": 27759 }, { "epoch": 4.531570139994286, "grad_norm": 2.4146957397460938, "learning_rate": 1.660597461689745e-05, "loss": 0.4421, "step": 27760 }, { "epoch": 4.53173339863679, "grad_norm": 2.411123037338257, "learning_rate": 1.6605736379207548e-05, "loss": 0.3764, "step": 27761 }, { "epoch": 4.531896657279295, "grad_norm": 2.0946578979492188, "learning_rate": 1.6605498134865673e-05, "loss": 0.4209, "step": 27762 }, { "epoch": 4.532059915921799, "grad_norm": 2.029266834259033, "learning_rate": 1.6605259883872063e-05, "loss": 0.3759, "step": 27763 }, { "epoch": 4.532223174564304, "grad_norm": 2.7115445137023926, "learning_rate": 1.6605021626226957e-05, "loss": 0.3772, "step": 27764 }, { "epoch": 4.532386433206808, "grad_norm": 1.8683687448501587, "learning_rate": 1.6604783361930596e-05, "loss": 0.3302, "step": 27765 }, { "epoch": 4.5325496918493124, "grad_norm": 2.3347995281219482, "learning_rate": 1.660454509098322e-05, "loss": 0.3771, "step": 27766 }, { "epoch": 4.532712950491817, "grad_norm": 1.8539329767227173, "learning_rate": 1.660430681338507e-05, "loss": 0.3765, "step": 27767 }, { "epoch": 4.532876209134321, "grad_norm": 2.2165162563323975, "learning_rate": 1.660406852913638e-05, "loss": 0.419, "step": 27768 }, { "epoch": 4.533039467776826, "grad_norm": 1.9821313619613647, "learning_rate": 1.66038302382374e-05, "loss": 0.38, "step": 27769 }, { "epoch": 4.53320272641933, "grad_norm": 1.8627010583877563, "learning_rate": 1.6603591940688364e-05, "loss": 0.3481, "step": 27770 }, { "epoch": 4.533365985061835, "grad_norm": 2.293452262878418, "learning_rate": 1.660335363648951e-05, "loss": 0.4256, "step": 27771 }, { "epoch": 4.533529243704338, "grad_norm": 1.7464877367019653, "learning_rate": 1.6603115325641086e-05, "loss": 0.3261, "step": 27772 }, { "epoch": 4.533692502346843, "grad_norm": 1.7662365436553955, "learning_rate": 1.6602877008143324e-05, "loss": 0.3198, "step": 27773 }, { "epoch": 4.533855760989347, "grad_norm": 2.8532683849334717, "learning_rate": 1.6602638683996462e-05, "loss": 0.4328, "step": 27774 }, { "epoch": 4.5340190196318515, "grad_norm": 1.9486253261566162, "learning_rate": 1.660240035320075e-05, "loss": 0.3259, "step": 27775 }, { "epoch": 4.534182278274356, "grad_norm": 1.9720664024353027, "learning_rate": 1.6602162015756423e-05, "loss": 0.3455, "step": 27776 }, { "epoch": 4.53434553691686, "grad_norm": 2.1358115673065186, "learning_rate": 1.6601923671663714e-05, "loss": 0.3429, "step": 27777 }, { "epoch": 4.534508795559365, "grad_norm": 1.781829595565796, "learning_rate": 1.6601685320922877e-05, "loss": 0.347, "step": 27778 }, { "epoch": 4.534672054201869, "grad_norm": 2.2099645137786865, "learning_rate": 1.660144696353414e-05, "loss": 0.3357, "step": 27779 }, { "epoch": 4.534835312844374, "grad_norm": 2.9868581295013428, "learning_rate": 1.660120859949775e-05, "loss": 0.4908, "step": 27780 }, { "epoch": 4.534998571486878, "grad_norm": 2.0051004886627197, "learning_rate": 1.6600970228813944e-05, "loss": 0.4049, "step": 27781 }, { "epoch": 4.535161830129383, "grad_norm": 2.04144549369812, "learning_rate": 1.6600731851482964e-05, "loss": 0.3743, "step": 27782 }, { "epoch": 4.535325088771887, "grad_norm": 1.750442624092102, "learning_rate": 1.660049346750505e-05, "loss": 0.3602, "step": 27783 }, { "epoch": 4.535488347414391, "grad_norm": 2.1792516708374023, "learning_rate": 1.6600255076880435e-05, "loss": 0.377, "step": 27784 }, { "epoch": 4.535651606056896, "grad_norm": 2.084867238998413, "learning_rate": 1.660001667960937e-05, "loss": 0.3506, "step": 27785 }, { "epoch": 4.5358148646994, "grad_norm": 2.2473886013031006, "learning_rate": 1.659977827569209e-05, "loss": 0.4026, "step": 27786 }, { "epoch": 4.535978123341905, "grad_norm": 2.291151762008667, "learning_rate": 1.659953986512883e-05, "loss": 0.3965, "step": 27787 }, { "epoch": 4.536141381984409, "grad_norm": 2.247774362564087, "learning_rate": 1.659930144791984e-05, "loss": 0.4104, "step": 27788 }, { "epoch": 4.536304640626913, "grad_norm": 2.3072493076324463, "learning_rate": 1.6599063024065355e-05, "loss": 0.3996, "step": 27789 }, { "epoch": 4.536467899269418, "grad_norm": 2.2176477909088135, "learning_rate": 1.659882459356561e-05, "loss": 0.4426, "step": 27790 }, { "epoch": 4.536631157911922, "grad_norm": 2.296626567840576, "learning_rate": 1.659858615642086e-05, "loss": 0.4216, "step": 27791 }, { "epoch": 4.536794416554426, "grad_norm": 1.966599702835083, "learning_rate": 1.6598347712631328e-05, "loss": 0.3756, "step": 27792 }, { "epoch": 4.5369576751969305, "grad_norm": 2.318849563598633, "learning_rate": 1.659810926219726e-05, "loss": 0.4293, "step": 27793 }, { "epoch": 4.537120933839435, "grad_norm": 2.289440870285034, "learning_rate": 1.6597870805118905e-05, "loss": 0.4548, "step": 27794 }, { "epoch": 4.537284192481939, "grad_norm": 1.9688698053359985, "learning_rate": 1.659763234139649e-05, "loss": 0.3531, "step": 27795 }, { "epoch": 4.537447451124444, "grad_norm": 2.4456517696380615, "learning_rate": 1.6597393871030264e-05, "loss": 0.4845, "step": 27796 }, { "epoch": 4.537610709766948, "grad_norm": 2.2433054447174072, "learning_rate": 1.659715539402046e-05, "loss": 0.4054, "step": 27797 }, { "epoch": 4.537773968409453, "grad_norm": 1.746627688407898, "learning_rate": 1.6596916910367326e-05, "loss": 0.3884, "step": 27798 }, { "epoch": 4.537937227051957, "grad_norm": 1.772033929824829, "learning_rate": 1.6596678420071096e-05, "loss": 0.3622, "step": 27799 }, { "epoch": 4.538100485694462, "grad_norm": 2.260486125946045, "learning_rate": 1.6596439923132016e-05, "loss": 0.4202, "step": 27800 }, { "epoch": 4.538263744336966, "grad_norm": 1.5951513051986694, "learning_rate": 1.659620141955032e-05, "loss": 0.3599, "step": 27801 }, { "epoch": 4.53842700297947, "grad_norm": 2.427743911743164, "learning_rate": 1.6595962909326255e-05, "loss": 0.4138, "step": 27802 }, { "epoch": 4.538590261621975, "grad_norm": 2.092937707901001, "learning_rate": 1.6595724392460055e-05, "loss": 0.3703, "step": 27803 }, { "epoch": 4.538753520264479, "grad_norm": 1.6118160486221313, "learning_rate": 1.659548586895196e-05, "loss": 0.316, "step": 27804 }, { "epoch": 4.538916778906984, "grad_norm": 1.8231730461120605, "learning_rate": 1.6595247338802213e-05, "loss": 0.3537, "step": 27805 }, { "epoch": 4.539080037549487, "grad_norm": 1.896866798400879, "learning_rate": 1.6595008802011054e-05, "loss": 0.3465, "step": 27806 }, { "epoch": 4.539243296191993, "grad_norm": 2.2947967052459717, "learning_rate": 1.6594770258578722e-05, "loss": 0.4186, "step": 27807 }, { "epoch": 4.539406554834496, "grad_norm": 1.7848305702209473, "learning_rate": 1.6594531708505462e-05, "loss": 0.3705, "step": 27808 }, { "epoch": 4.539569813477001, "grad_norm": 2.1704742908477783, "learning_rate": 1.6594293151791508e-05, "loss": 0.3662, "step": 27809 }, { "epoch": 4.539733072119505, "grad_norm": 2.1236228942871094, "learning_rate": 1.6594054588437104e-05, "loss": 0.3639, "step": 27810 }, { "epoch": 4.5398963307620095, "grad_norm": 2.2603282928466797, "learning_rate": 1.6593816018442486e-05, "loss": 0.4557, "step": 27811 }, { "epoch": 4.540059589404514, "grad_norm": 1.7024120092391968, "learning_rate": 1.65935774418079e-05, "loss": 0.3074, "step": 27812 }, { "epoch": 4.540222848047018, "grad_norm": 2.0515894889831543, "learning_rate": 1.659333885853358e-05, "loss": 0.3643, "step": 27813 }, { "epoch": 4.540386106689523, "grad_norm": 1.8542602062225342, "learning_rate": 1.6593100268619774e-05, "loss": 0.3658, "step": 27814 }, { "epoch": 4.540549365332027, "grad_norm": 2.3675239086151123, "learning_rate": 1.659286167206671e-05, "loss": 0.3754, "step": 27815 }, { "epoch": 4.540712623974532, "grad_norm": 2.3227732181549072, "learning_rate": 1.659262306887464e-05, "loss": 0.3962, "step": 27816 }, { "epoch": 4.540875882617036, "grad_norm": 2.49662446975708, "learning_rate": 1.6592384459043804e-05, "loss": 0.3623, "step": 27817 }, { "epoch": 4.541039141259541, "grad_norm": 2.5227630138397217, "learning_rate": 1.6592145842574433e-05, "loss": 0.3677, "step": 27818 }, { "epoch": 4.541202399902045, "grad_norm": 2.255753755569458, "learning_rate": 1.6591907219466777e-05, "loss": 0.3813, "step": 27819 }, { "epoch": 4.541365658544549, "grad_norm": 2.3800809383392334, "learning_rate": 1.659166858972107e-05, "loss": 0.3538, "step": 27820 }, { "epoch": 4.541528917187054, "grad_norm": 2.3418455123901367, "learning_rate": 1.6591429953337555e-05, "loss": 0.4232, "step": 27821 }, { "epoch": 4.541692175829558, "grad_norm": 2.0944442749023438, "learning_rate": 1.6591191310316473e-05, "loss": 0.404, "step": 27822 }, { "epoch": 4.541855434472063, "grad_norm": 2.275846481323242, "learning_rate": 1.6590952660658065e-05, "loss": 0.4244, "step": 27823 }, { "epoch": 4.542018693114567, "grad_norm": 2.171657085418701, "learning_rate": 1.6590714004362564e-05, "loss": 0.3791, "step": 27824 }, { "epoch": 4.542181951757071, "grad_norm": 1.8724976778030396, "learning_rate": 1.6590475341430217e-05, "loss": 0.3687, "step": 27825 }, { "epoch": 4.542345210399575, "grad_norm": 2.251732110977173, "learning_rate": 1.6590236671861266e-05, "loss": 0.3916, "step": 27826 }, { "epoch": 4.54250846904208, "grad_norm": 2.6533327102661133, "learning_rate": 1.6589997995655947e-05, "loss": 0.4066, "step": 27827 }, { "epoch": 4.542671727684584, "grad_norm": 1.958689570426941, "learning_rate": 1.65897593128145e-05, "loss": 0.322, "step": 27828 }, { "epoch": 4.5428349863270885, "grad_norm": 1.8601915836334229, "learning_rate": 1.6589520623337173e-05, "loss": 0.401, "step": 27829 }, { "epoch": 4.542998244969593, "grad_norm": 2.1203441619873047, "learning_rate": 1.6589281927224194e-05, "loss": 0.4176, "step": 27830 }, { "epoch": 4.543161503612097, "grad_norm": 2.0968410968780518, "learning_rate": 1.6589043224475814e-05, "loss": 0.4274, "step": 27831 }, { "epoch": 4.543324762254602, "grad_norm": 2.388063430786133, "learning_rate": 1.6588804515092265e-05, "loss": 0.4314, "step": 27832 }, { "epoch": 4.543488020897106, "grad_norm": 2.282593011856079, "learning_rate": 1.6588565799073796e-05, "loss": 0.417, "step": 27833 }, { "epoch": 4.543651279539611, "grad_norm": 1.9218554496765137, "learning_rate": 1.658832707642064e-05, "loss": 0.3752, "step": 27834 }, { "epoch": 4.543814538182115, "grad_norm": 2.4018187522888184, "learning_rate": 1.658808834713304e-05, "loss": 0.3533, "step": 27835 }, { "epoch": 4.5439777968246196, "grad_norm": 2.5563557147979736, "learning_rate": 1.658784961121124e-05, "loss": 0.4347, "step": 27836 }, { "epoch": 4.544141055467124, "grad_norm": 2.778864622116089, "learning_rate": 1.6587610868655473e-05, "loss": 0.4482, "step": 27837 }, { "epoch": 4.544304314109628, "grad_norm": 2.405195951461792, "learning_rate": 1.6587372119465984e-05, "loss": 0.4494, "step": 27838 }, { "epoch": 4.544467572752133, "grad_norm": 2.14109206199646, "learning_rate": 1.6587133363643014e-05, "loss": 0.4036, "step": 27839 }, { "epoch": 4.544630831394637, "grad_norm": 2.471097230911255, "learning_rate": 1.6586894601186804e-05, "loss": 0.428, "step": 27840 }, { "epoch": 4.544794090037142, "grad_norm": 1.8860543966293335, "learning_rate": 1.6586655832097597e-05, "loss": 0.369, "step": 27841 }, { "epoch": 4.544957348679645, "grad_norm": 1.911699891090393, "learning_rate": 1.6586417056375624e-05, "loss": 0.3405, "step": 27842 }, { "epoch": 4.54512060732215, "grad_norm": 2.3188459873199463, "learning_rate": 1.658617827402113e-05, "loss": 0.4096, "step": 27843 }, { "epoch": 4.545283865964654, "grad_norm": 2.040703773498535, "learning_rate": 1.6585939485034356e-05, "loss": 0.3738, "step": 27844 }, { "epoch": 4.545447124607159, "grad_norm": 2.0439016819000244, "learning_rate": 1.6585700689415545e-05, "loss": 0.3626, "step": 27845 }, { "epoch": 4.545610383249663, "grad_norm": 2.5141189098358154, "learning_rate": 1.6585461887164937e-05, "loss": 0.4619, "step": 27846 }, { "epoch": 4.5457736418921675, "grad_norm": 2.0373141765594482, "learning_rate": 1.6585223078282763e-05, "loss": 0.3751, "step": 27847 }, { "epoch": 4.545936900534672, "grad_norm": 1.7953718900680542, "learning_rate": 1.658498426276928e-05, "loss": 0.3404, "step": 27848 }, { "epoch": 4.546100159177176, "grad_norm": 1.9068052768707275, "learning_rate": 1.6584745440624713e-05, "loss": 0.3638, "step": 27849 }, { "epoch": 4.546263417819681, "grad_norm": 2.002049446105957, "learning_rate": 1.6584506611849313e-05, "loss": 0.3481, "step": 27850 }, { "epoch": 4.546426676462185, "grad_norm": 2.056820869445801, "learning_rate": 1.6584267776443318e-05, "loss": 0.3456, "step": 27851 }, { "epoch": 4.54658993510469, "grad_norm": 1.9592958688735962, "learning_rate": 1.6584028934406963e-05, "loss": 0.3797, "step": 27852 }, { "epoch": 4.546753193747194, "grad_norm": 2.2725822925567627, "learning_rate": 1.658379008574049e-05, "loss": 0.4358, "step": 27853 }, { "epoch": 4.5469164523896985, "grad_norm": 2.5852115154266357, "learning_rate": 1.658355123044415e-05, "loss": 0.5485, "step": 27854 }, { "epoch": 4.547079711032203, "grad_norm": 2.1446194648742676, "learning_rate": 1.658331236851817e-05, "loss": 0.3612, "step": 27855 }, { "epoch": 4.547242969674707, "grad_norm": 2.6848785877227783, "learning_rate": 1.65830734999628e-05, "loss": 0.4517, "step": 27856 }, { "epoch": 4.547406228317212, "grad_norm": 2.2480807304382324, "learning_rate": 1.658283462477827e-05, "loss": 0.4091, "step": 27857 }, { "epoch": 4.547569486959716, "grad_norm": 2.1419320106506348, "learning_rate": 1.6582595742964835e-05, "loss": 0.3636, "step": 27858 }, { "epoch": 4.54773274560222, "grad_norm": 2.2296905517578125, "learning_rate": 1.6582356854522726e-05, "loss": 0.3969, "step": 27859 }, { "epoch": 4.547896004244725, "grad_norm": 2.066934823989868, "learning_rate": 1.6582117959452185e-05, "loss": 0.354, "step": 27860 }, { "epoch": 4.548059262887229, "grad_norm": 1.6964399814605713, "learning_rate": 1.658187905775345e-05, "loss": 0.3641, "step": 27861 }, { "epoch": 4.548222521529733, "grad_norm": 2.2931606769561768, "learning_rate": 1.6581640149426766e-05, "loss": 0.4122, "step": 27862 }, { "epoch": 4.548385780172238, "grad_norm": 2.1387085914611816, "learning_rate": 1.6581401234472374e-05, "loss": 0.4707, "step": 27863 }, { "epoch": 4.548549038814742, "grad_norm": 1.9460333585739136, "learning_rate": 1.6581162312890514e-05, "loss": 0.3544, "step": 27864 }, { "epoch": 4.5487122974572465, "grad_norm": 2.175860643386841, "learning_rate": 1.658092338468142e-05, "loss": 0.3999, "step": 27865 }, { "epoch": 4.548875556099751, "grad_norm": 1.631883144378662, "learning_rate": 1.6580684449845342e-05, "loss": 0.3187, "step": 27866 }, { "epoch": 4.549038814742255, "grad_norm": 2.0706324577331543, "learning_rate": 1.6580445508382515e-05, "loss": 0.3628, "step": 27867 }, { "epoch": 4.54920207338476, "grad_norm": 2.136688232421875, "learning_rate": 1.6580206560293183e-05, "loss": 0.3499, "step": 27868 }, { "epoch": 4.549365332027264, "grad_norm": 2.181698799133301, "learning_rate": 1.657996760557758e-05, "loss": 0.4017, "step": 27869 }, { "epoch": 4.549528590669769, "grad_norm": 1.9968968629837036, "learning_rate": 1.6579728644235956e-05, "loss": 0.3959, "step": 27870 }, { "epoch": 4.549691849312273, "grad_norm": 2.1090381145477295, "learning_rate": 1.6579489676268546e-05, "loss": 0.4408, "step": 27871 }, { "epoch": 4.5498551079547775, "grad_norm": 2.376680612564087, "learning_rate": 1.657925070167559e-05, "loss": 0.4025, "step": 27872 }, { "epoch": 4.550018366597282, "grad_norm": 2.2981014251708984, "learning_rate": 1.6579011720457333e-05, "loss": 0.3624, "step": 27873 }, { "epoch": 4.550181625239786, "grad_norm": 1.965557336807251, "learning_rate": 1.6578772732614014e-05, "loss": 0.3709, "step": 27874 }, { "epoch": 4.550344883882291, "grad_norm": 1.8993791341781616, "learning_rate": 1.6578533738145867e-05, "loss": 0.3828, "step": 27875 }, { "epoch": 4.550508142524794, "grad_norm": 2.003138303756714, "learning_rate": 1.6578294737053143e-05, "loss": 0.3868, "step": 27876 }, { "epoch": 4.5506714011673, "grad_norm": 2.2069475650787354, "learning_rate": 1.6578055729336073e-05, "loss": 0.4626, "step": 27877 }, { "epoch": 4.550834659809803, "grad_norm": 1.9299442768096924, "learning_rate": 1.6577816714994906e-05, "loss": 0.3495, "step": 27878 }, { "epoch": 4.550997918452308, "grad_norm": 2.071709156036377, "learning_rate": 1.657757769402988e-05, "loss": 0.3241, "step": 27879 }, { "epoch": 4.551161177094812, "grad_norm": 2.1487746238708496, "learning_rate": 1.6577338666441232e-05, "loss": 0.3667, "step": 27880 }, { "epoch": 4.551324435737317, "grad_norm": 1.9072105884552002, "learning_rate": 1.657709963222921e-05, "loss": 0.3444, "step": 27881 }, { "epoch": 4.551487694379821, "grad_norm": 2.5212771892547607, "learning_rate": 1.6576860591394048e-05, "loss": 0.4317, "step": 27882 }, { "epoch": 4.5516509530223255, "grad_norm": 1.7414175271987915, "learning_rate": 1.657662154393599e-05, "loss": 0.3227, "step": 27883 }, { "epoch": 4.55181421166483, "grad_norm": 2.047722101211548, "learning_rate": 1.6576382489855274e-05, "loss": 0.3725, "step": 27884 }, { "epoch": 4.551977470307334, "grad_norm": 2.2410871982574463, "learning_rate": 1.6576143429152143e-05, "loss": 0.3747, "step": 27885 }, { "epoch": 4.552140728949839, "grad_norm": 2.7601492404937744, "learning_rate": 1.6575904361826836e-05, "loss": 0.4579, "step": 27886 }, { "epoch": 4.552303987592343, "grad_norm": 2.2067301273345947, "learning_rate": 1.6575665287879603e-05, "loss": 0.4188, "step": 27887 }, { "epoch": 4.552467246234848, "grad_norm": 2.2743756771087646, "learning_rate": 1.657542620731067e-05, "loss": 0.3701, "step": 27888 }, { "epoch": 4.552630504877352, "grad_norm": 2.028921127319336, "learning_rate": 1.6575187120120285e-05, "loss": 0.3162, "step": 27889 }, { "epoch": 4.5527937635198565, "grad_norm": 2.1631357669830322, "learning_rate": 1.6574948026308688e-05, "loss": 0.3916, "step": 27890 }, { "epoch": 4.552957022162361, "grad_norm": 2.3844618797302246, "learning_rate": 1.657470892587612e-05, "loss": 0.4003, "step": 27891 }, { "epoch": 4.553120280804865, "grad_norm": 2.2188661098480225, "learning_rate": 1.6574469818822824e-05, "loss": 0.3641, "step": 27892 }, { "epoch": 4.55328353944737, "grad_norm": 1.9388200044631958, "learning_rate": 1.657423070514904e-05, "loss": 0.341, "step": 27893 }, { "epoch": 4.553446798089874, "grad_norm": 2.6508610248565674, "learning_rate": 1.6573991584855003e-05, "loss": 0.4049, "step": 27894 }, { "epoch": 4.553610056732378, "grad_norm": 2.1770179271698, "learning_rate": 1.657375245794096e-05, "loss": 0.4097, "step": 27895 }, { "epoch": 4.553773315374882, "grad_norm": 2.350349187850952, "learning_rate": 1.657351332440715e-05, "loss": 0.4, "step": 27896 }, { "epoch": 4.553936574017387, "grad_norm": 2.621873617172241, "learning_rate": 1.657327418425381e-05, "loss": 0.4086, "step": 27897 }, { "epoch": 4.554099832659891, "grad_norm": 2.016008138656616, "learning_rate": 1.6573035037481192e-05, "loss": 0.3532, "step": 27898 }, { "epoch": 4.554263091302396, "grad_norm": 1.927635669708252, "learning_rate": 1.6572795884089525e-05, "loss": 0.3367, "step": 27899 }, { "epoch": 4.5544263499449, "grad_norm": 2.670851945877075, "learning_rate": 1.6572556724079055e-05, "loss": 0.4096, "step": 27900 }, { "epoch": 4.5545896085874045, "grad_norm": 2.4973506927490234, "learning_rate": 1.6572317557450025e-05, "loss": 0.4951, "step": 27901 }, { "epoch": 4.554752867229909, "grad_norm": 2.7601099014282227, "learning_rate": 1.657207838420267e-05, "loss": 0.4661, "step": 27902 }, { "epoch": 4.554916125872413, "grad_norm": 1.9121687412261963, "learning_rate": 1.6571839204337235e-05, "loss": 0.3758, "step": 27903 }, { "epoch": 4.555079384514918, "grad_norm": 2.29129958152771, "learning_rate": 1.6571600017853956e-05, "loss": 0.3952, "step": 27904 }, { "epoch": 4.555242643157422, "grad_norm": 1.4688475131988525, "learning_rate": 1.657136082475308e-05, "loss": 0.3563, "step": 27905 }, { "epoch": 4.555405901799927, "grad_norm": 2.09163498878479, "learning_rate": 1.6571121625034847e-05, "loss": 0.4171, "step": 27906 }, { "epoch": 4.555569160442431, "grad_norm": 2.317706346511841, "learning_rate": 1.6570882418699494e-05, "loss": 0.455, "step": 27907 }, { "epoch": 4.5557324190849355, "grad_norm": 1.8258205652236938, "learning_rate": 1.6570643205747265e-05, "loss": 0.3632, "step": 27908 }, { "epoch": 4.55589567772744, "grad_norm": 1.9752298593521118, "learning_rate": 1.6570403986178402e-05, "loss": 0.3413, "step": 27909 }, { "epoch": 4.556058936369944, "grad_norm": 2.45068359375, "learning_rate": 1.657016475999314e-05, "loss": 0.4451, "step": 27910 }, { "epoch": 4.556222195012449, "grad_norm": 2.1319234371185303, "learning_rate": 1.6569925527191728e-05, "loss": 0.4125, "step": 27911 }, { "epoch": 4.556385453654952, "grad_norm": 1.8699744939804077, "learning_rate": 1.65696862877744e-05, "loss": 0.3691, "step": 27912 }, { "epoch": 4.556548712297458, "grad_norm": 1.994835615158081, "learning_rate": 1.6569447041741404e-05, "loss": 0.3191, "step": 27913 }, { "epoch": 4.556711970939961, "grad_norm": 1.8991038799285889, "learning_rate": 1.656920778909297e-05, "loss": 0.3666, "step": 27914 }, { "epoch": 4.556875229582466, "grad_norm": 1.9650951623916626, "learning_rate": 1.6568968529829347e-05, "loss": 0.4616, "step": 27915 }, { "epoch": 4.55703848822497, "grad_norm": 2.0361058712005615, "learning_rate": 1.6568729263950776e-05, "loss": 0.382, "step": 27916 }, { "epoch": 4.557201746867475, "grad_norm": 2.1663875579833984, "learning_rate": 1.6568489991457498e-05, "loss": 0.4097, "step": 27917 }, { "epoch": 4.557365005509979, "grad_norm": 1.9932734966278076, "learning_rate": 1.656825071234975e-05, "loss": 0.3836, "step": 27918 }, { "epoch": 4.5575282641524835, "grad_norm": 2.099144458770752, "learning_rate": 1.6568011426627776e-05, "loss": 0.3878, "step": 27919 }, { "epoch": 4.557691522794988, "grad_norm": 1.9140015840530396, "learning_rate": 1.6567772134291816e-05, "loss": 0.3954, "step": 27920 }, { "epoch": 4.557854781437492, "grad_norm": 1.8543823957443237, "learning_rate": 1.656753283534211e-05, "loss": 0.4086, "step": 27921 }, { "epoch": 4.558018040079997, "grad_norm": 1.7211300134658813, "learning_rate": 1.6567293529778902e-05, "loss": 0.3617, "step": 27922 }, { "epoch": 4.558181298722501, "grad_norm": 2.154447555541992, "learning_rate": 1.656705421760243e-05, "loss": 0.3574, "step": 27923 }, { "epoch": 4.558344557365006, "grad_norm": 2.038052797317505, "learning_rate": 1.656681489881294e-05, "loss": 0.3738, "step": 27924 }, { "epoch": 4.55850781600751, "grad_norm": 1.8371310234069824, "learning_rate": 1.6566575573410663e-05, "loss": 0.3753, "step": 27925 }, { "epoch": 4.5586710746500145, "grad_norm": 2.0383455753326416, "learning_rate": 1.6566336241395848e-05, "loss": 0.3588, "step": 27926 }, { "epoch": 4.558834333292519, "grad_norm": 1.9843344688415527, "learning_rate": 1.6566096902768735e-05, "loss": 0.375, "step": 27927 }, { "epoch": 4.558997591935023, "grad_norm": 2.062068462371826, "learning_rate": 1.6565857557529567e-05, "loss": 0.4218, "step": 27928 }, { "epoch": 4.559160850577527, "grad_norm": 2.1531944274902344, "learning_rate": 1.656561820567858e-05, "loss": 0.3632, "step": 27929 }, { "epoch": 4.559324109220032, "grad_norm": 1.7514774799346924, "learning_rate": 1.6565378847216017e-05, "loss": 0.3588, "step": 27930 }, { "epoch": 4.559487367862536, "grad_norm": 1.9027034044265747, "learning_rate": 1.656513948214212e-05, "loss": 0.383, "step": 27931 }, { "epoch": 4.55965062650504, "grad_norm": 2.050285816192627, "learning_rate": 1.6564900110457126e-05, "loss": 0.4198, "step": 27932 }, { "epoch": 4.559813885147545, "grad_norm": 2.298567295074463, "learning_rate": 1.656466073216128e-05, "loss": 0.3933, "step": 27933 }, { "epoch": 4.559977143790049, "grad_norm": 2.1768321990966797, "learning_rate": 1.6564421347254825e-05, "loss": 0.3826, "step": 27934 }, { "epoch": 4.560140402432554, "grad_norm": 2.1396920680999756, "learning_rate": 1.6564181955738e-05, "loss": 0.449, "step": 27935 }, { "epoch": 4.560303661075058, "grad_norm": 2.1149299144744873, "learning_rate": 1.6563942557611043e-05, "loss": 0.3603, "step": 27936 }, { "epoch": 4.5604669197175625, "grad_norm": 1.9605087041854858, "learning_rate": 1.6563703152874197e-05, "loss": 0.4274, "step": 27937 }, { "epoch": 4.560630178360067, "grad_norm": 1.9801472425460815, "learning_rate": 1.6563463741527706e-05, "loss": 0.3601, "step": 27938 }, { "epoch": 4.560793437002571, "grad_norm": 2.2501375675201416, "learning_rate": 1.6563224323571807e-05, "loss": 0.3901, "step": 27939 }, { "epoch": 4.560956695645076, "grad_norm": 1.47868013381958, "learning_rate": 1.6562984899006743e-05, "loss": 0.3228, "step": 27940 }, { "epoch": 4.56111995428758, "grad_norm": 1.8181309700012207, "learning_rate": 1.656274546783276e-05, "loss": 0.3776, "step": 27941 }, { "epoch": 4.561283212930085, "grad_norm": 1.784165620803833, "learning_rate": 1.656250603005009e-05, "loss": 0.3697, "step": 27942 }, { "epoch": 4.561446471572589, "grad_norm": 2.1846539974212646, "learning_rate": 1.6562266585658976e-05, "loss": 0.3766, "step": 27943 }, { "epoch": 4.5616097302150935, "grad_norm": 2.6061699390411377, "learning_rate": 1.656202713465966e-05, "loss": 0.4105, "step": 27944 }, { "epoch": 4.561772988857598, "grad_norm": 2.050187826156616, "learning_rate": 1.656178767705239e-05, "loss": 0.3327, "step": 27945 }, { "epoch": 4.561936247500102, "grad_norm": 2.235957622528076, "learning_rate": 1.65615482128374e-05, "loss": 0.3719, "step": 27946 }, { "epoch": 4.562099506142607, "grad_norm": 2.150855779647827, "learning_rate": 1.656130874201493e-05, "loss": 0.3457, "step": 27947 }, { "epoch": 4.56226276478511, "grad_norm": 2.2670700550079346, "learning_rate": 1.6561069264585227e-05, "loss": 0.3853, "step": 27948 }, { "epoch": 4.562426023427615, "grad_norm": 2.54296875, "learning_rate": 1.656082978054853e-05, "loss": 0.3705, "step": 27949 }, { "epoch": 4.562589282070119, "grad_norm": 2.053100824356079, "learning_rate": 1.6560590289905074e-05, "loss": 0.3593, "step": 27950 }, { "epoch": 4.562752540712624, "grad_norm": 2.4444401264190674, "learning_rate": 1.6560350792655107e-05, "loss": 0.4007, "step": 27951 }, { "epoch": 4.562915799355128, "grad_norm": 2.588071823120117, "learning_rate": 1.6560111288798873e-05, "loss": 0.4579, "step": 27952 }, { "epoch": 4.563079057997633, "grad_norm": 2.6180636882781982, "learning_rate": 1.6559871778336604e-05, "loss": 0.4599, "step": 27953 }, { "epoch": 4.563242316640137, "grad_norm": 2.4008078575134277, "learning_rate": 1.6559632261268547e-05, "loss": 0.4454, "step": 27954 }, { "epoch": 4.5634055752826415, "grad_norm": 1.993004322052002, "learning_rate": 1.6559392737594943e-05, "loss": 0.3903, "step": 27955 }, { "epoch": 4.563568833925146, "grad_norm": 3.3257534503936768, "learning_rate": 1.655915320731603e-05, "loss": 0.4422, "step": 27956 }, { "epoch": 4.56373209256765, "grad_norm": 2.3423969745635986, "learning_rate": 1.6558913670432054e-05, "loss": 0.4042, "step": 27957 }, { "epoch": 4.563895351210155, "grad_norm": 2.210416793823242, "learning_rate": 1.6558674126943252e-05, "loss": 0.3606, "step": 27958 }, { "epoch": 4.564058609852659, "grad_norm": 2.3228840827941895, "learning_rate": 1.6558434576849868e-05, "loss": 0.4595, "step": 27959 }, { "epoch": 4.564221868495164, "grad_norm": 2.182576894760132, "learning_rate": 1.655819502015214e-05, "loss": 0.3986, "step": 27960 }, { "epoch": 4.564385127137668, "grad_norm": 2.2612650394439697, "learning_rate": 1.6557955456850313e-05, "loss": 0.4058, "step": 27961 }, { "epoch": 4.5645483857801725, "grad_norm": 2.161680221557617, "learning_rate": 1.6557715886944628e-05, "loss": 0.4114, "step": 27962 }, { "epoch": 4.564711644422677, "grad_norm": 2.343350410461426, "learning_rate": 1.655747631043532e-05, "loss": 0.3944, "step": 27963 }, { "epoch": 4.564874903065181, "grad_norm": 2.2323834896087646, "learning_rate": 1.6557236727322645e-05, "loss": 0.3913, "step": 27964 }, { "epoch": 4.565038161707685, "grad_norm": 1.8165854215621948, "learning_rate": 1.6556997137606824e-05, "loss": 0.351, "step": 27965 }, { "epoch": 4.56520142035019, "grad_norm": 2.3596701622009277, "learning_rate": 1.6556757541288114e-05, "loss": 0.4465, "step": 27966 }, { "epoch": 4.565364678992694, "grad_norm": 2.199199914932251, "learning_rate": 1.655651793836675e-05, "loss": 0.4182, "step": 27967 }, { "epoch": 4.565527937635198, "grad_norm": 2.4818973541259766, "learning_rate": 1.6556278328842973e-05, "loss": 0.3567, "step": 27968 }, { "epoch": 4.565691196277703, "grad_norm": 1.8499629497528076, "learning_rate": 1.6556038712717028e-05, "loss": 0.3593, "step": 27969 }, { "epoch": 4.565854454920207, "grad_norm": 1.5627137422561646, "learning_rate": 1.6555799089989152e-05, "loss": 0.3338, "step": 27970 }, { "epoch": 4.566017713562712, "grad_norm": 2.254361152648926, "learning_rate": 1.6555559460659586e-05, "loss": 0.3634, "step": 27971 }, { "epoch": 4.566180972205216, "grad_norm": 1.441887617111206, "learning_rate": 1.6555319824728577e-05, "loss": 0.328, "step": 27972 }, { "epoch": 4.5663442308477205, "grad_norm": 1.718362808227539, "learning_rate": 1.655508018219636e-05, "loss": 0.3465, "step": 27973 }, { "epoch": 4.566507489490225, "grad_norm": 1.3863985538482666, "learning_rate": 1.6554840533063176e-05, "loss": 0.3078, "step": 27974 }, { "epoch": 4.566670748132729, "grad_norm": 2.4694461822509766, "learning_rate": 1.6554600877329275e-05, "loss": 0.4467, "step": 27975 }, { "epoch": 4.566834006775234, "grad_norm": 2.3269054889678955, "learning_rate": 1.655436121499489e-05, "loss": 0.427, "step": 27976 }, { "epoch": 4.566997265417738, "grad_norm": 1.9619567394256592, "learning_rate": 1.6554121546060266e-05, "loss": 0.3844, "step": 27977 }, { "epoch": 4.567160524060243, "grad_norm": 2.090597629547119, "learning_rate": 1.6553881870525642e-05, "loss": 0.4138, "step": 27978 }, { "epoch": 4.567323782702747, "grad_norm": 2.0362296104431152, "learning_rate": 1.6553642188391266e-05, "loss": 0.4027, "step": 27979 }, { "epoch": 4.5674870413452515, "grad_norm": 2.1198205947875977, "learning_rate": 1.655340249965737e-05, "loss": 0.36, "step": 27980 }, { "epoch": 4.567650299987756, "grad_norm": 2.1678850650787354, "learning_rate": 1.6553162804324196e-05, "loss": 0.4038, "step": 27981 }, { "epoch": 4.5678135586302595, "grad_norm": 2.1709694862365723, "learning_rate": 1.6552923102391997e-05, "loss": 0.3884, "step": 27982 }, { "epoch": 4.567976817272765, "grad_norm": 1.9869612455368042, "learning_rate": 1.6552683393860998e-05, "loss": 0.3639, "step": 27983 }, { "epoch": 4.568140075915268, "grad_norm": 2.0871741771698, "learning_rate": 1.655244367873145e-05, "loss": 0.3656, "step": 27984 }, { "epoch": 4.568303334557773, "grad_norm": 2.8034520149230957, "learning_rate": 1.6552203957003596e-05, "loss": 0.4296, "step": 27985 }, { "epoch": 4.568466593200277, "grad_norm": 2.277904987335205, "learning_rate": 1.6551964228677674e-05, "loss": 0.3829, "step": 27986 }, { "epoch": 4.568629851842782, "grad_norm": 2.8942675590515137, "learning_rate": 1.6551724493753925e-05, "loss": 0.4107, "step": 27987 }, { "epoch": 4.568793110485286, "grad_norm": 2.2648532390594482, "learning_rate": 1.655148475223259e-05, "loss": 0.396, "step": 27988 }, { "epoch": 4.568956369127791, "grad_norm": 1.9700324535369873, "learning_rate": 1.6551245004113915e-05, "loss": 0.333, "step": 27989 }, { "epoch": 4.569119627770295, "grad_norm": 2.6195340156555176, "learning_rate": 1.6551005249398136e-05, "loss": 0.5034, "step": 27990 }, { "epoch": 4.5692828864127994, "grad_norm": 2.176527738571167, "learning_rate": 1.6550765488085495e-05, "loss": 0.4365, "step": 27991 }, { "epoch": 4.569446145055304, "grad_norm": 1.8855953216552734, "learning_rate": 1.6550525720176237e-05, "loss": 0.3505, "step": 27992 }, { "epoch": 4.569609403697808, "grad_norm": 2.0863232612609863, "learning_rate": 1.65502859456706e-05, "loss": 0.3589, "step": 27993 }, { "epoch": 4.569772662340313, "grad_norm": 1.9840450286865234, "learning_rate": 1.6550046164568827e-05, "loss": 0.3632, "step": 27994 }, { "epoch": 4.569935920982817, "grad_norm": 1.7094264030456543, "learning_rate": 1.6549806376871157e-05, "loss": 0.3567, "step": 27995 }, { "epoch": 4.570099179625322, "grad_norm": 2.0353243350982666, "learning_rate": 1.6549566582577838e-05, "loss": 0.3694, "step": 27996 }, { "epoch": 4.570262438267826, "grad_norm": 2.2012860774993896, "learning_rate": 1.6549326781689104e-05, "loss": 0.3221, "step": 27997 }, { "epoch": 4.5704256969103305, "grad_norm": 2.561845302581787, "learning_rate": 1.6549086974205202e-05, "loss": 0.436, "step": 27998 }, { "epoch": 4.570588955552835, "grad_norm": 1.9008245468139648, "learning_rate": 1.6548847160126373e-05, "loss": 0.328, "step": 27999 }, { "epoch": 4.570752214195339, "grad_norm": 2.2111265659332275, "learning_rate": 1.6548607339452853e-05, "loss": 0.4395, "step": 28000 }, { "epoch": 4.570915472837843, "grad_norm": 2.158871650695801, "learning_rate": 1.6548367512184888e-05, "loss": 0.3407, "step": 28001 }, { "epoch": 4.571078731480347, "grad_norm": 2.7677178382873535, "learning_rate": 1.654812767832272e-05, "loss": 0.392, "step": 28002 }, { "epoch": 4.571241990122852, "grad_norm": 2.276561975479126, "learning_rate": 1.6547887837866586e-05, "loss": 0.3891, "step": 28003 }, { "epoch": 4.571405248765356, "grad_norm": 1.9523475170135498, "learning_rate": 1.654764799081673e-05, "loss": 0.4127, "step": 28004 }, { "epoch": 4.571568507407861, "grad_norm": 2.3171322345733643, "learning_rate": 1.6547408137173396e-05, "loss": 0.3952, "step": 28005 }, { "epoch": 4.571731766050365, "grad_norm": 2.0273869037628174, "learning_rate": 1.6547168276936828e-05, "loss": 0.4073, "step": 28006 }, { "epoch": 4.57189502469287, "grad_norm": 2.5825986862182617, "learning_rate": 1.654692841010726e-05, "loss": 0.4856, "step": 28007 }, { "epoch": 4.572058283335374, "grad_norm": 2.4138457775115967, "learning_rate": 1.6546688536684933e-05, "loss": 0.4354, "step": 28008 }, { "epoch": 4.5722215419778784, "grad_norm": 1.991492509841919, "learning_rate": 1.65464486566701e-05, "loss": 0.366, "step": 28009 }, { "epoch": 4.572384800620383, "grad_norm": 2.1677820682525635, "learning_rate": 1.654620877006299e-05, "loss": 0.3865, "step": 28010 }, { "epoch": 4.572548059262887, "grad_norm": 2.5229973793029785, "learning_rate": 1.654596887686385e-05, "loss": 0.3862, "step": 28011 }, { "epoch": 4.572711317905392, "grad_norm": 2.1380515098571777, "learning_rate": 1.654572897707292e-05, "loss": 0.3891, "step": 28012 }, { "epoch": 4.572874576547896, "grad_norm": 2.3871779441833496, "learning_rate": 1.6545489070690445e-05, "loss": 0.4413, "step": 28013 }, { "epoch": 4.573037835190401, "grad_norm": 2.3296916484832764, "learning_rate": 1.6545249157716667e-05, "loss": 0.3933, "step": 28014 }, { "epoch": 4.573201093832905, "grad_norm": 2.091430902481079, "learning_rate": 1.654500923815182e-05, "loss": 0.3947, "step": 28015 }, { "epoch": 4.5733643524754095, "grad_norm": 2.1772377490997314, "learning_rate": 1.654476931199615e-05, "loss": 0.4106, "step": 28016 }, { "epoch": 4.573527611117914, "grad_norm": 2.125251054763794, "learning_rate": 1.6544529379249898e-05, "loss": 0.398, "step": 28017 }, { "epoch": 4.5736908697604175, "grad_norm": 2.041872024536133, "learning_rate": 1.6544289439913314e-05, "loss": 0.3908, "step": 28018 }, { "epoch": 4.573854128402923, "grad_norm": 1.7581559419631958, "learning_rate": 1.6544049493986623e-05, "loss": 0.379, "step": 28019 }, { "epoch": 4.574017387045426, "grad_norm": 2.138108730316162, "learning_rate": 1.6543809541470084e-05, "loss": 0.4092, "step": 28020 }, { "epoch": 4.574180645687931, "grad_norm": 1.7800116539001465, "learning_rate": 1.6543569582363924e-05, "loss": 0.3936, "step": 28021 }, { "epoch": 4.574343904330435, "grad_norm": 2.087184190750122, "learning_rate": 1.6543329616668396e-05, "loss": 0.4021, "step": 28022 }, { "epoch": 4.57450716297294, "grad_norm": 2.4500017166137695, "learning_rate": 1.6543089644383737e-05, "loss": 0.4624, "step": 28023 }, { "epoch": 4.574670421615444, "grad_norm": 2.027479648590088, "learning_rate": 1.6542849665510186e-05, "loss": 0.3726, "step": 28024 }, { "epoch": 4.574833680257949, "grad_norm": 2.223886251449585, "learning_rate": 1.6542609680047985e-05, "loss": 0.466, "step": 28025 }, { "epoch": 4.574996938900453, "grad_norm": 2.203251600265503, "learning_rate": 1.6542369687997383e-05, "loss": 0.4031, "step": 28026 }, { "epoch": 4.575160197542957, "grad_norm": 2.226052761077881, "learning_rate": 1.6542129689358613e-05, "loss": 0.4201, "step": 28027 }, { "epoch": 4.575323456185462, "grad_norm": 2.129157304763794, "learning_rate": 1.654188968413192e-05, "loss": 0.3876, "step": 28028 }, { "epoch": 4.575486714827966, "grad_norm": 2.1479783058166504, "learning_rate": 1.6541649672317547e-05, "loss": 0.4481, "step": 28029 }, { "epoch": 4.575649973470471, "grad_norm": 2.6088573932647705, "learning_rate": 1.6541409653915736e-05, "loss": 0.445, "step": 28030 }, { "epoch": 4.575813232112975, "grad_norm": 2.2865734100341797, "learning_rate": 1.6541169628926726e-05, "loss": 0.3779, "step": 28031 }, { "epoch": 4.57597649075548, "grad_norm": 1.801133155822754, "learning_rate": 1.6540929597350757e-05, "loss": 0.3579, "step": 28032 }, { "epoch": 4.576139749397984, "grad_norm": 2.666867256164551, "learning_rate": 1.6540689559188078e-05, "loss": 0.4262, "step": 28033 }, { "epoch": 4.5763030080404885, "grad_norm": 2.4564621448516846, "learning_rate": 1.654044951443892e-05, "loss": 0.454, "step": 28034 }, { "epoch": 4.576466266682992, "grad_norm": 2.1818315982818604, "learning_rate": 1.6540209463103537e-05, "loss": 0.3967, "step": 28035 }, { "epoch": 4.576629525325497, "grad_norm": 1.9284210205078125, "learning_rate": 1.6539969405182164e-05, "loss": 0.3792, "step": 28036 }, { "epoch": 4.576792783968001, "grad_norm": 2.3401334285736084, "learning_rate": 1.6539729340675045e-05, "loss": 0.3997, "step": 28037 }, { "epoch": 4.576956042610505, "grad_norm": 1.921585202217102, "learning_rate": 1.6539489269582414e-05, "loss": 0.3318, "step": 28038 }, { "epoch": 4.57711930125301, "grad_norm": 2.091141939163208, "learning_rate": 1.6539249191904526e-05, "loss": 0.3959, "step": 28039 }, { "epoch": 4.577282559895514, "grad_norm": 2.1645326614379883, "learning_rate": 1.653900910764161e-05, "loss": 0.3976, "step": 28040 }, { "epoch": 4.577445818538019, "grad_norm": 2.3033740520477295, "learning_rate": 1.653876901679392e-05, "loss": 0.3944, "step": 28041 }, { "epoch": 4.577609077180523, "grad_norm": 2.5075490474700928, "learning_rate": 1.6538528919361688e-05, "loss": 0.4355, "step": 28042 }, { "epoch": 4.577772335823028, "grad_norm": 2.865471124649048, "learning_rate": 1.6538288815345158e-05, "loss": 0.4663, "step": 28043 }, { "epoch": 4.577935594465532, "grad_norm": 2.0446925163269043, "learning_rate": 1.6538048704744573e-05, "loss": 0.3681, "step": 28044 }, { "epoch": 4.578098853108036, "grad_norm": 2.629035472869873, "learning_rate": 1.6537808587560177e-05, "loss": 0.4152, "step": 28045 }, { "epoch": 4.578262111750541, "grad_norm": 2.399444103240967, "learning_rate": 1.6537568463792207e-05, "loss": 0.3602, "step": 28046 }, { "epoch": 4.578425370393045, "grad_norm": 2.4875292778015137, "learning_rate": 1.6537328333440908e-05, "loss": 0.4423, "step": 28047 }, { "epoch": 4.57858862903555, "grad_norm": 2.3518483638763428, "learning_rate": 1.6537088196506523e-05, "loss": 0.3577, "step": 28048 }, { "epoch": 4.578751887678054, "grad_norm": 2.0244712829589844, "learning_rate": 1.6536848052989292e-05, "loss": 0.3782, "step": 28049 }, { "epoch": 4.578915146320559, "grad_norm": 1.8246476650238037, "learning_rate": 1.6536607902889453e-05, "loss": 0.3241, "step": 28050 }, { "epoch": 4.579078404963063, "grad_norm": 2.739492893218994, "learning_rate": 1.6536367746207254e-05, "loss": 0.394, "step": 28051 }, { "epoch": 4.5792416636055675, "grad_norm": 1.93517005443573, "learning_rate": 1.6536127582942934e-05, "loss": 0.418, "step": 28052 }, { "epoch": 4.579404922248072, "grad_norm": 3.3572065830230713, "learning_rate": 1.6535887413096735e-05, "loss": 0.4825, "step": 28053 }, { "epoch": 4.5795681808905755, "grad_norm": 2.3785295486450195, "learning_rate": 1.65356472366689e-05, "loss": 0.4217, "step": 28054 }, { "epoch": 4.57973143953308, "grad_norm": 2.3964643478393555, "learning_rate": 1.6535407053659666e-05, "loss": 0.3906, "step": 28055 }, { "epoch": 4.579894698175584, "grad_norm": 2.1500802040100098, "learning_rate": 1.6535166864069285e-05, "loss": 0.3753, "step": 28056 }, { "epoch": 4.580057956818089, "grad_norm": 2.523371696472168, "learning_rate": 1.653492666789799e-05, "loss": 0.4519, "step": 28057 }, { "epoch": 4.580221215460593, "grad_norm": 1.9179270267486572, "learning_rate": 1.6534686465146027e-05, "loss": 0.3752, "step": 28058 }, { "epoch": 4.580384474103098, "grad_norm": 2.0985918045043945, "learning_rate": 1.6534446255813635e-05, "loss": 0.3754, "step": 28059 }, { "epoch": 4.580547732745602, "grad_norm": 1.7979294061660767, "learning_rate": 1.6534206039901057e-05, "loss": 0.3352, "step": 28060 }, { "epoch": 4.580710991388107, "grad_norm": 2.4711527824401855, "learning_rate": 1.6533965817408535e-05, "loss": 0.3829, "step": 28061 }, { "epoch": 4.580874250030611, "grad_norm": 2.242365837097168, "learning_rate": 1.653372558833631e-05, "loss": 0.4141, "step": 28062 }, { "epoch": 4.581037508673115, "grad_norm": 2.5608999729156494, "learning_rate": 1.653348535268463e-05, "loss": 0.4696, "step": 28063 }, { "epoch": 4.58120076731562, "grad_norm": 1.98444402217865, "learning_rate": 1.653324511045373e-05, "loss": 0.37, "step": 28064 }, { "epoch": 4.581364025958124, "grad_norm": 2.2446417808532715, "learning_rate": 1.6533004861643852e-05, "loss": 0.4003, "step": 28065 }, { "epoch": 4.581527284600629, "grad_norm": 2.1009504795074463, "learning_rate": 1.653276460625524e-05, "loss": 0.415, "step": 28066 }, { "epoch": 4.581690543243133, "grad_norm": 2.3340022563934326, "learning_rate": 1.6532524344288136e-05, "loss": 0.3996, "step": 28067 }, { "epoch": 4.581853801885638, "grad_norm": 2.2591960430145264, "learning_rate": 1.653228407574278e-05, "loss": 0.3945, "step": 28068 }, { "epoch": 4.582017060528142, "grad_norm": 1.8441107273101807, "learning_rate": 1.653204380061942e-05, "loss": 0.3136, "step": 28069 }, { "epoch": 4.5821803191706465, "grad_norm": 1.9669145345687866, "learning_rate": 1.653180351891829e-05, "loss": 0.3717, "step": 28070 }, { "epoch": 4.58234357781315, "grad_norm": 2.128051519393921, "learning_rate": 1.653156323063964e-05, "loss": 0.3984, "step": 28071 }, { "epoch": 4.5825068364556545, "grad_norm": 2.0322141647338867, "learning_rate": 1.65313229357837e-05, "loss": 0.386, "step": 28072 }, { "epoch": 4.582670095098159, "grad_norm": 2.15116548538208, "learning_rate": 1.6531082634350725e-05, "loss": 0.4232, "step": 28073 }, { "epoch": 4.582833353740663, "grad_norm": 1.9135851860046387, "learning_rate": 1.653084232634095e-05, "loss": 0.3153, "step": 28074 }, { "epoch": 4.582996612383168, "grad_norm": 2.2621679306030273, "learning_rate": 1.653060201175462e-05, "loss": 0.4322, "step": 28075 }, { "epoch": 4.583159871025672, "grad_norm": 2.483182430267334, "learning_rate": 1.6530361690591973e-05, "loss": 0.4383, "step": 28076 }, { "epoch": 4.583323129668177, "grad_norm": 1.9485512971878052, "learning_rate": 1.653012136285326e-05, "loss": 0.3912, "step": 28077 }, { "epoch": 4.583486388310681, "grad_norm": 2.2036242485046387, "learning_rate": 1.652988102853871e-05, "loss": 0.4229, "step": 28078 }, { "epoch": 4.5836496469531856, "grad_norm": 2.116879940032959, "learning_rate": 1.652964068764857e-05, "loss": 0.4046, "step": 28079 }, { "epoch": 4.58381290559569, "grad_norm": 2.14093279838562, "learning_rate": 1.6529400340183087e-05, "loss": 0.4138, "step": 28080 }, { "epoch": 4.583976164238194, "grad_norm": 2.2361624240875244, "learning_rate": 1.6529159986142502e-05, "loss": 0.4133, "step": 28081 }, { "epoch": 4.584139422880699, "grad_norm": 1.7450307607650757, "learning_rate": 1.652891962552705e-05, "loss": 0.3253, "step": 28082 }, { "epoch": 4.584302681523203, "grad_norm": 2.35933518409729, "learning_rate": 1.652867925833698e-05, "loss": 0.4432, "step": 28083 }, { "epoch": 4.584465940165708, "grad_norm": 2.6037449836730957, "learning_rate": 1.652843888457253e-05, "loss": 0.4698, "step": 28084 }, { "epoch": 4.584629198808212, "grad_norm": 2.0746631622314453, "learning_rate": 1.6528198504233946e-05, "loss": 0.3675, "step": 28085 }, { "epoch": 4.584792457450717, "grad_norm": 1.9518827199935913, "learning_rate": 1.6527958117321466e-05, "loss": 0.3725, "step": 28086 }, { "epoch": 4.584955716093221, "grad_norm": 1.7946914434432983, "learning_rate": 1.6527717723835336e-05, "loss": 0.3213, "step": 28087 }, { "epoch": 4.585118974735725, "grad_norm": 2.023324966430664, "learning_rate": 1.6527477323775797e-05, "loss": 0.3713, "step": 28088 }, { "epoch": 4.58528223337823, "grad_norm": 2.2455203533172607, "learning_rate": 1.6527236917143086e-05, "loss": 0.4501, "step": 28089 }, { "epoch": 4.5854454920207335, "grad_norm": 1.8267496824264526, "learning_rate": 1.652699650393745e-05, "loss": 0.3616, "step": 28090 }, { "epoch": 4.585608750663238, "grad_norm": 1.732655644416809, "learning_rate": 1.6526756084159132e-05, "loss": 0.3455, "step": 28091 }, { "epoch": 4.585772009305742, "grad_norm": 1.8854193687438965, "learning_rate": 1.652651565780837e-05, "loss": 0.3695, "step": 28092 }, { "epoch": 4.585935267948247, "grad_norm": 1.7215840816497803, "learning_rate": 1.652627522488541e-05, "loss": 0.313, "step": 28093 }, { "epoch": 4.586098526590751, "grad_norm": 1.8872283697128296, "learning_rate": 1.6526034785390494e-05, "loss": 0.4032, "step": 28094 }, { "epoch": 4.586261785233256, "grad_norm": 2.1798722743988037, "learning_rate": 1.652579433932386e-05, "loss": 0.4561, "step": 28095 }, { "epoch": 4.58642504387576, "grad_norm": 2.003232717514038, "learning_rate": 1.6525553886685756e-05, "loss": 0.3695, "step": 28096 }, { "epoch": 4.5865883025182645, "grad_norm": 2.348987579345703, "learning_rate": 1.652531342747642e-05, "loss": 0.4394, "step": 28097 }, { "epoch": 4.586751561160769, "grad_norm": 3.0420804023742676, "learning_rate": 1.6525072961696094e-05, "loss": 0.4649, "step": 28098 }, { "epoch": 4.586914819803273, "grad_norm": 2.3614823818206787, "learning_rate": 1.652483248934502e-05, "loss": 0.3977, "step": 28099 }, { "epoch": 4.587078078445778, "grad_norm": 1.7547959089279175, "learning_rate": 1.6524592010423444e-05, "loss": 0.3588, "step": 28100 }, { "epoch": 4.587241337088282, "grad_norm": 1.9332836866378784, "learning_rate": 1.6524351524931606e-05, "loss": 0.3546, "step": 28101 }, { "epoch": 4.587404595730787, "grad_norm": 2.312941551208496, "learning_rate": 1.6524111032869744e-05, "loss": 0.3968, "step": 28102 }, { "epoch": 4.587567854373291, "grad_norm": 2.105814218521118, "learning_rate": 1.6523870534238107e-05, "loss": 0.3655, "step": 28103 }, { "epoch": 4.587731113015796, "grad_norm": 2.066572904586792, "learning_rate": 1.652363002903693e-05, "loss": 0.398, "step": 28104 }, { "epoch": 4.587894371658299, "grad_norm": 2.652432918548584, "learning_rate": 1.6523389517266465e-05, "loss": 0.4665, "step": 28105 }, { "epoch": 4.5880576303008045, "grad_norm": 2.1887693405151367, "learning_rate": 1.6523148998926947e-05, "loss": 0.3919, "step": 28106 }, { "epoch": 4.588220888943308, "grad_norm": 2.2445056438446045, "learning_rate": 1.6522908474018617e-05, "loss": 0.4403, "step": 28107 }, { "epoch": 4.5883841475858125, "grad_norm": 2.0750653743743896, "learning_rate": 1.6522667942541723e-05, "loss": 0.3609, "step": 28108 }, { "epoch": 4.588547406228317, "grad_norm": 2.025310516357422, "learning_rate": 1.6522427404496505e-05, "loss": 0.3795, "step": 28109 }, { "epoch": 4.588710664870821, "grad_norm": 2.151731014251709, "learning_rate": 1.65221868598832e-05, "loss": 0.3798, "step": 28110 }, { "epoch": 4.588873923513326, "grad_norm": 2.3988349437713623, "learning_rate": 1.6521946308702056e-05, "loss": 0.4127, "step": 28111 }, { "epoch": 4.58903718215583, "grad_norm": 2.45286226272583, "learning_rate": 1.6521705750953314e-05, "loss": 0.4176, "step": 28112 }, { "epoch": 4.589200440798335, "grad_norm": 2.011324644088745, "learning_rate": 1.6521465186637217e-05, "loss": 0.3567, "step": 28113 }, { "epoch": 4.589363699440839, "grad_norm": 2.5261282920837402, "learning_rate": 1.6521224615754005e-05, "loss": 0.3935, "step": 28114 }, { "epoch": 4.5895269580833435, "grad_norm": 2.4627978801727295, "learning_rate": 1.6520984038303924e-05, "loss": 0.4022, "step": 28115 }, { "epoch": 4.589690216725848, "grad_norm": 2.2182974815368652, "learning_rate": 1.6520743454287212e-05, "loss": 0.3936, "step": 28116 }, { "epoch": 4.589853475368352, "grad_norm": 2.335801124572754, "learning_rate": 1.6520502863704112e-05, "loss": 0.3726, "step": 28117 }, { "epoch": 4.590016734010857, "grad_norm": 2.16323184967041, "learning_rate": 1.652026226655487e-05, "loss": 0.3726, "step": 28118 }, { "epoch": 4.590179992653361, "grad_norm": 2.3648598194122314, "learning_rate": 1.6520021662839726e-05, "loss": 0.4345, "step": 28119 }, { "epoch": 4.590343251295866, "grad_norm": 2.6912193298339844, "learning_rate": 1.6519781052558918e-05, "loss": 0.4456, "step": 28120 }, { "epoch": 4.59050650993837, "grad_norm": 2.1589324474334717, "learning_rate": 1.6519540435712694e-05, "loss": 0.4021, "step": 28121 }, { "epoch": 4.590669768580875, "grad_norm": 2.2274885177612305, "learning_rate": 1.6519299812301297e-05, "loss": 0.4206, "step": 28122 }, { "epoch": 4.590833027223379, "grad_norm": 2.2566473484039307, "learning_rate": 1.651905918232496e-05, "loss": 0.3725, "step": 28123 }, { "epoch": 4.590996285865883, "grad_norm": 1.7932517528533936, "learning_rate": 1.651881854578394e-05, "loss": 0.3285, "step": 28124 }, { "epoch": 4.591159544508387, "grad_norm": 1.9365171194076538, "learning_rate": 1.6518577902678467e-05, "loss": 0.3548, "step": 28125 }, { "epoch": 4.5913228031508915, "grad_norm": 1.9590222835540771, "learning_rate": 1.651833725300879e-05, "loss": 0.3913, "step": 28126 }, { "epoch": 4.591486061793396, "grad_norm": 1.9711804389953613, "learning_rate": 1.6518096596775145e-05, "loss": 0.409, "step": 28127 }, { "epoch": 4.5916493204359, "grad_norm": 2.279412031173706, "learning_rate": 1.6517855933977786e-05, "loss": 0.4071, "step": 28128 }, { "epoch": 4.591812579078405, "grad_norm": 1.7028776407241821, "learning_rate": 1.6517615264616942e-05, "loss": 0.3245, "step": 28129 }, { "epoch": 4.591975837720909, "grad_norm": 2.2966439723968506, "learning_rate": 1.651737458869286e-05, "loss": 0.4299, "step": 28130 }, { "epoch": 4.592139096363414, "grad_norm": 2.431837797164917, "learning_rate": 1.6517133906205786e-05, "loss": 0.3585, "step": 28131 }, { "epoch": 4.592302355005918, "grad_norm": 2.012094497680664, "learning_rate": 1.6516893217155963e-05, "loss": 0.3806, "step": 28132 }, { "epoch": 4.5924656136484225, "grad_norm": 1.7618205547332764, "learning_rate": 1.6516652521543626e-05, "loss": 0.3443, "step": 28133 }, { "epoch": 4.592628872290927, "grad_norm": 2.0156633853912354, "learning_rate": 1.6516411819369024e-05, "loss": 0.3508, "step": 28134 }, { "epoch": 4.592792130933431, "grad_norm": 1.7319979667663574, "learning_rate": 1.6516171110632396e-05, "loss": 0.3451, "step": 28135 }, { "epoch": 4.592955389575936, "grad_norm": 2.715893507003784, "learning_rate": 1.6515930395333984e-05, "loss": 0.4362, "step": 28136 }, { "epoch": 4.59311864821844, "grad_norm": 2.187288522720337, "learning_rate": 1.6515689673474035e-05, "loss": 0.4609, "step": 28137 }, { "epoch": 4.593281906860945, "grad_norm": 2.1929140090942383, "learning_rate": 1.6515448945052785e-05, "loss": 0.424, "step": 28138 }, { "epoch": 4.593445165503449, "grad_norm": 1.9400728940963745, "learning_rate": 1.6515208210070482e-05, "loss": 0.3989, "step": 28139 }, { "epoch": 4.593608424145954, "grad_norm": 2.1737587451934814, "learning_rate": 1.6514967468527364e-05, "loss": 0.359, "step": 28140 }, { "epoch": 4.593771682788457, "grad_norm": 2.369720697402954, "learning_rate": 1.6514726720423677e-05, "loss": 0.3957, "step": 28141 }, { "epoch": 4.5939349414309625, "grad_norm": 2.1598806381225586, "learning_rate": 1.651448596575966e-05, "loss": 0.4266, "step": 28142 }, { "epoch": 4.594098200073466, "grad_norm": 1.826338529586792, "learning_rate": 1.6514245204535558e-05, "loss": 0.3537, "step": 28143 }, { "epoch": 4.5942614587159705, "grad_norm": 1.8707315921783447, "learning_rate": 1.6514004436751613e-05, "loss": 0.373, "step": 28144 }, { "epoch": 4.594424717358475, "grad_norm": 2.879725694656372, "learning_rate": 1.651376366240807e-05, "loss": 0.4203, "step": 28145 }, { "epoch": 4.594587976000979, "grad_norm": 2.308832883834839, "learning_rate": 1.6513522881505166e-05, "loss": 0.4277, "step": 28146 }, { "epoch": 4.594751234643484, "grad_norm": 2.291619062423706, "learning_rate": 1.6513282094043148e-05, "loss": 0.4285, "step": 28147 }, { "epoch": 4.594914493285988, "grad_norm": 1.9881560802459717, "learning_rate": 1.6513041300022253e-05, "loss": 0.3828, "step": 28148 }, { "epoch": 4.595077751928493, "grad_norm": 2.44018816947937, "learning_rate": 1.651280049944273e-05, "loss": 0.398, "step": 28149 }, { "epoch": 4.595241010570997, "grad_norm": 2.4534153938293457, "learning_rate": 1.651255969230482e-05, "loss": 0.4187, "step": 28150 }, { "epoch": 4.5954042692135015, "grad_norm": 1.8712142705917358, "learning_rate": 1.651231887860876e-05, "loss": 0.315, "step": 28151 }, { "epoch": 4.595567527856006, "grad_norm": 1.8630542755126953, "learning_rate": 1.65120780583548e-05, "loss": 0.3322, "step": 28152 }, { "epoch": 4.59573078649851, "grad_norm": 2.743000030517578, "learning_rate": 1.6511837231543176e-05, "loss": 0.3961, "step": 28153 }, { "epoch": 4.595894045141015, "grad_norm": 2.3337767124176025, "learning_rate": 1.6511596398174138e-05, "loss": 0.3855, "step": 28154 }, { "epoch": 4.596057303783519, "grad_norm": 1.8731763362884521, "learning_rate": 1.6511355558247922e-05, "loss": 0.3955, "step": 28155 }, { "epoch": 4.596220562426024, "grad_norm": 2.6912057399749756, "learning_rate": 1.6511114711764774e-05, "loss": 0.428, "step": 28156 }, { "epoch": 4.596383821068528, "grad_norm": 2.640594482421875, "learning_rate": 1.651087385872493e-05, "loss": 0.3562, "step": 28157 }, { "epoch": 4.596547079711032, "grad_norm": 2.232069969177246, "learning_rate": 1.6510632999128645e-05, "loss": 0.4011, "step": 28158 }, { "epoch": 4.596710338353537, "grad_norm": 1.9601305723190308, "learning_rate": 1.6510392132976148e-05, "loss": 0.3958, "step": 28159 }, { "epoch": 4.596873596996041, "grad_norm": 2.221118450164795, "learning_rate": 1.6510151260267694e-05, "loss": 0.4013, "step": 28160 }, { "epoch": 4.597036855638545, "grad_norm": 2.6319735050201416, "learning_rate": 1.6509910381003514e-05, "loss": 0.4228, "step": 28161 }, { "epoch": 4.5972001142810495, "grad_norm": 2.0387790203094482, "learning_rate": 1.6509669495183857e-05, "loss": 0.3919, "step": 28162 }, { "epoch": 4.597363372923554, "grad_norm": 1.985166311264038, "learning_rate": 1.6509428602808966e-05, "loss": 0.3963, "step": 28163 }, { "epoch": 4.597526631566058, "grad_norm": 2.3429694175720215, "learning_rate": 1.6509187703879082e-05, "loss": 0.4127, "step": 28164 }, { "epoch": 4.597689890208563, "grad_norm": 2.1671130657196045, "learning_rate": 1.6508946798394448e-05, "loss": 0.4191, "step": 28165 }, { "epoch": 4.597853148851067, "grad_norm": 2.4220893383026123, "learning_rate": 1.6508705886355304e-05, "loss": 0.4098, "step": 28166 }, { "epoch": 4.598016407493572, "grad_norm": 2.1509408950805664, "learning_rate": 1.6508464967761896e-05, "loss": 0.4159, "step": 28167 }, { "epoch": 4.598179666136076, "grad_norm": 1.878302812576294, "learning_rate": 1.650822404261447e-05, "loss": 0.3197, "step": 28168 }, { "epoch": 4.5983429247785805, "grad_norm": 1.9426180124282837, "learning_rate": 1.6507983110913257e-05, "loss": 0.3591, "step": 28169 }, { "epoch": 4.598506183421085, "grad_norm": 2.1194043159484863, "learning_rate": 1.650774217265851e-05, "loss": 0.4457, "step": 28170 }, { "epoch": 4.598669442063589, "grad_norm": 2.265202522277832, "learning_rate": 1.6507501227850467e-05, "loss": 0.4085, "step": 28171 }, { "epoch": 4.598832700706094, "grad_norm": 2.1846988201141357, "learning_rate": 1.6507260276489376e-05, "loss": 0.3997, "step": 28172 }, { "epoch": 4.598995959348598, "grad_norm": 2.173419237136841, "learning_rate": 1.6507019318575473e-05, "loss": 0.3713, "step": 28173 }, { "epoch": 4.599159217991103, "grad_norm": 2.5336086750030518, "learning_rate": 1.6506778354109004e-05, "loss": 0.4012, "step": 28174 }, { "epoch": 4.599322476633607, "grad_norm": 2.0598580837249756, "learning_rate": 1.650653738309021e-05, "loss": 0.3536, "step": 28175 }, { "epoch": 4.599485735276112, "grad_norm": 2.1324591636657715, "learning_rate": 1.6506296405519335e-05, "loss": 0.3606, "step": 28176 }, { "epoch": 4.599648993918615, "grad_norm": 2.0004706382751465, "learning_rate": 1.650605542139662e-05, "loss": 0.3361, "step": 28177 }, { "epoch": 4.59981225256112, "grad_norm": 2.7856719493865967, "learning_rate": 1.650581443072231e-05, "loss": 0.4138, "step": 28178 }, { "epoch": 4.599975511203624, "grad_norm": 2.126861810684204, "learning_rate": 1.6505573433496646e-05, "loss": 0.3886, "step": 28179 }, { "epoch": 4.6001387698461285, "grad_norm": 2.1707570552825928, "learning_rate": 1.6505332429719872e-05, "loss": 0.4303, "step": 28180 }, { "epoch": 4.600302028488633, "grad_norm": 2.405939817428589, "learning_rate": 1.650509141939223e-05, "loss": 0.3915, "step": 28181 }, { "epoch": 4.600465287131137, "grad_norm": 2.0692384243011475, "learning_rate": 1.650485040251396e-05, "loss": 0.3848, "step": 28182 }, { "epoch": 4.600628545773642, "grad_norm": 2.477304697036743, "learning_rate": 1.6504609379085312e-05, "loss": 0.4115, "step": 28183 }, { "epoch": 4.600791804416146, "grad_norm": 2.543781280517578, "learning_rate": 1.650436834910652e-05, "loss": 0.4602, "step": 28184 }, { "epoch": 4.600955063058651, "grad_norm": 1.8748886585235596, "learning_rate": 1.6504127312577835e-05, "loss": 0.338, "step": 28185 }, { "epoch": 4.601118321701155, "grad_norm": 2.417668342590332, "learning_rate": 1.6503886269499488e-05, "loss": 0.3869, "step": 28186 }, { "epoch": 4.6012815803436595, "grad_norm": 2.0808050632476807, "learning_rate": 1.6503645219871737e-05, "loss": 0.3423, "step": 28187 }, { "epoch": 4.601444838986164, "grad_norm": 2.05368709564209, "learning_rate": 1.6503404163694815e-05, "loss": 0.3878, "step": 28188 }, { "epoch": 4.601608097628668, "grad_norm": 2.398324728012085, "learning_rate": 1.6503163100968967e-05, "loss": 0.4056, "step": 28189 }, { "epoch": 4.601771356271173, "grad_norm": 2.13751482963562, "learning_rate": 1.6502922031694436e-05, "loss": 0.3841, "step": 28190 }, { "epoch": 4.601934614913677, "grad_norm": 2.5517446994781494, "learning_rate": 1.650268095587146e-05, "loss": 0.3835, "step": 28191 }, { "epoch": 4.602097873556182, "grad_norm": 1.8408390283584595, "learning_rate": 1.650243987350029e-05, "loss": 0.3677, "step": 28192 }, { "epoch": 4.602261132198686, "grad_norm": 1.9144923686981201, "learning_rate": 1.6502198784581165e-05, "loss": 0.3548, "step": 28193 }, { "epoch": 4.60242439084119, "grad_norm": 2.8692917823791504, "learning_rate": 1.6501957689114325e-05, "loss": 0.4155, "step": 28194 }, { "epoch": 4.602587649483695, "grad_norm": 1.9140864610671997, "learning_rate": 1.6501716587100015e-05, "loss": 0.3737, "step": 28195 }, { "epoch": 4.602750908126199, "grad_norm": 2.1186797618865967, "learning_rate": 1.650147547853848e-05, "loss": 0.3837, "step": 28196 }, { "epoch": 4.602914166768703, "grad_norm": 2.171421527862549, "learning_rate": 1.6501234363429962e-05, "loss": 0.3566, "step": 28197 }, { "epoch": 4.6030774254112075, "grad_norm": 2.073472738265991, "learning_rate": 1.65009932417747e-05, "loss": 0.4123, "step": 28198 }, { "epoch": 4.603240684053712, "grad_norm": 2.0646278858184814, "learning_rate": 1.650075211357294e-05, "loss": 0.3583, "step": 28199 }, { "epoch": 4.603403942696216, "grad_norm": 1.8597900867462158, "learning_rate": 1.6500510978824928e-05, "loss": 0.3857, "step": 28200 }, { "epoch": 4.603567201338721, "grad_norm": 2.248011827468872, "learning_rate": 1.6500269837530898e-05, "loss": 0.3993, "step": 28201 }, { "epoch": 4.603730459981225, "grad_norm": 2.2153117656707764, "learning_rate": 1.65000286896911e-05, "loss": 0.3606, "step": 28202 }, { "epoch": 4.60389371862373, "grad_norm": 2.040288209915161, "learning_rate": 1.6499787535305777e-05, "loss": 0.4606, "step": 28203 }, { "epoch": 4.604056977266234, "grad_norm": 1.7143455743789673, "learning_rate": 1.6499546374375165e-05, "loss": 0.3357, "step": 28204 }, { "epoch": 4.6042202359087385, "grad_norm": 2.177159070968628, "learning_rate": 1.6499305206899517e-05, "loss": 0.4114, "step": 28205 }, { "epoch": 4.604383494551243, "grad_norm": 2.388688325881958, "learning_rate": 1.6499064032879065e-05, "loss": 0.4456, "step": 28206 }, { "epoch": 4.604546753193747, "grad_norm": 2.3952786922454834, "learning_rate": 1.649882285231406e-05, "loss": 0.3857, "step": 28207 }, { "epoch": 4.604710011836252, "grad_norm": 2.1882846355438232, "learning_rate": 1.6498581665204745e-05, "loss": 0.3913, "step": 28208 }, { "epoch": 4.604873270478756, "grad_norm": 2.1253323554992676, "learning_rate": 1.6498340471551357e-05, "loss": 0.352, "step": 28209 }, { "epoch": 4.605036529121261, "grad_norm": 1.9286545515060425, "learning_rate": 1.649809927135414e-05, "loss": 0.3888, "step": 28210 }, { "epoch": 4.605199787763764, "grad_norm": 1.8201285600662231, "learning_rate": 1.6497858064613342e-05, "loss": 0.3182, "step": 28211 }, { "epoch": 4.60536304640627, "grad_norm": 2.4040963649749756, "learning_rate": 1.64976168513292e-05, "loss": 0.4412, "step": 28212 }, { "epoch": 4.605526305048773, "grad_norm": 2.6084961891174316, "learning_rate": 1.6497375631501962e-05, "loss": 0.4986, "step": 28213 }, { "epoch": 4.605689563691278, "grad_norm": 2.2673516273498535, "learning_rate": 1.649713440513187e-05, "loss": 0.4051, "step": 28214 }, { "epoch": 4.605852822333782, "grad_norm": 1.9763375520706177, "learning_rate": 1.649689317221916e-05, "loss": 0.3523, "step": 28215 }, { "epoch": 4.6060160809762865, "grad_norm": 2.3056881427764893, "learning_rate": 1.649665193276408e-05, "loss": 0.3936, "step": 28216 }, { "epoch": 4.606179339618791, "grad_norm": 2.4304308891296387, "learning_rate": 1.649641068676688e-05, "loss": 0.4322, "step": 28217 }, { "epoch": 4.606342598261295, "grad_norm": 2.3044564723968506, "learning_rate": 1.649616943422779e-05, "loss": 0.4435, "step": 28218 }, { "epoch": 4.6065058569038, "grad_norm": 3.0482609272003174, "learning_rate": 1.6495928175147066e-05, "loss": 0.5325, "step": 28219 }, { "epoch": 4.606669115546304, "grad_norm": 2.143779754638672, "learning_rate": 1.6495686909524934e-05, "loss": 0.367, "step": 28220 }, { "epoch": 4.606832374188809, "grad_norm": 2.4868528842926025, "learning_rate": 1.6495445637361655e-05, "loss": 0.4104, "step": 28221 }, { "epoch": 4.606995632831313, "grad_norm": 2.149268388748169, "learning_rate": 1.6495204358657462e-05, "loss": 0.3765, "step": 28222 }, { "epoch": 4.6071588914738175, "grad_norm": 1.9685064554214478, "learning_rate": 1.64949630734126e-05, "loss": 0.3834, "step": 28223 }, { "epoch": 4.607322150116322, "grad_norm": 1.7313213348388672, "learning_rate": 1.649472178162731e-05, "loss": 0.3558, "step": 28224 }, { "epoch": 4.607485408758826, "grad_norm": 2.3395721912384033, "learning_rate": 1.6494480483301836e-05, "loss": 0.4067, "step": 28225 }, { "epoch": 4.607648667401331, "grad_norm": 2.2822484970092773, "learning_rate": 1.6494239178436427e-05, "loss": 0.4562, "step": 28226 }, { "epoch": 4.607811926043835, "grad_norm": 2.069465398788452, "learning_rate": 1.6493997867031317e-05, "loss": 0.4166, "step": 28227 }, { "epoch": 4.60797518468634, "grad_norm": 2.152359962463379, "learning_rate": 1.6493756549086754e-05, "loss": 0.4493, "step": 28228 }, { "epoch": 4.608138443328844, "grad_norm": 1.6917017698287964, "learning_rate": 1.649351522460298e-05, "loss": 0.3467, "step": 28229 }, { "epoch": 4.608301701971348, "grad_norm": 2.161867618560791, "learning_rate": 1.6493273893580236e-05, "loss": 0.3814, "step": 28230 }, { "epoch": 4.608464960613852, "grad_norm": 1.8272895812988281, "learning_rate": 1.649303255601877e-05, "loss": 0.3364, "step": 28231 }, { "epoch": 4.608628219256357, "grad_norm": 2.3336517810821533, "learning_rate": 1.649279121191882e-05, "loss": 0.4695, "step": 28232 }, { "epoch": 4.608791477898861, "grad_norm": 1.9338512420654297, "learning_rate": 1.6492549861280633e-05, "loss": 0.3036, "step": 28233 }, { "epoch": 4.6089547365413654, "grad_norm": 2.3112664222717285, "learning_rate": 1.6492308504104445e-05, "loss": 0.4319, "step": 28234 }, { "epoch": 4.60911799518387, "grad_norm": 1.8299888372421265, "learning_rate": 1.649206714039051e-05, "loss": 0.3566, "step": 28235 }, { "epoch": 4.609281253826374, "grad_norm": 2.19466495513916, "learning_rate": 1.649182577013906e-05, "loss": 0.3975, "step": 28236 }, { "epoch": 4.609444512468879, "grad_norm": 1.9415706396102905, "learning_rate": 1.649158439335035e-05, "loss": 0.3615, "step": 28237 }, { "epoch": 4.609607771111383, "grad_norm": 2.393232583999634, "learning_rate": 1.649134301002461e-05, "loss": 0.428, "step": 28238 }, { "epoch": 4.609771029753888, "grad_norm": 1.8286761045455933, "learning_rate": 1.6491101620162092e-05, "loss": 0.3673, "step": 28239 }, { "epoch": 4.609934288396392, "grad_norm": 2.3410496711730957, "learning_rate": 1.6490860223763037e-05, "loss": 0.3979, "step": 28240 }, { "epoch": 4.6100975470388965, "grad_norm": 2.197293281555176, "learning_rate": 1.6490618820827683e-05, "loss": 0.3868, "step": 28241 }, { "epoch": 4.610260805681401, "grad_norm": 2.050194501876831, "learning_rate": 1.6490377411356283e-05, "loss": 0.3886, "step": 28242 }, { "epoch": 4.610424064323905, "grad_norm": 2.485919713973999, "learning_rate": 1.6490135995349073e-05, "loss": 0.4164, "step": 28243 }, { "epoch": 4.61058732296641, "grad_norm": 2.085385799407959, "learning_rate": 1.6489894572806295e-05, "loss": 0.3849, "step": 28244 }, { "epoch": 4.610750581608914, "grad_norm": 2.0120129585266113, "learning_rate": 1.6489653143728197e-05, "loss": 0.3683, "step": 28245 }, { "epoch": 4.610913840251419, "grad_norm": 1.7238541841506958, "learning_rate": 1.6489411708115023e-05, "loss": 0.3909, "step": 28246 }, { "epoch": 4.611077098893922, "grad_norm": 2.055860996246338, "learning_rate": 1.648917026596701e-05, "loss": 0.3987, "step": 28247 }, { "epoch": 4.611240357536428, "grad_norm": 2.156541347503662, "learning_rate": 1.6488928817284402e-05, "loss": 0.4356, "step": 28248 }, { "epoch": 4.611403616178931, "grad_norm": 2.263867139816284, "learning_rate": 1.6488687362067447e-05, "loss": 0.3711, "step": 28249 }, { "epoch": 4.611566874821436, "grad_norm": 1.9885965585708618, "learning_rate": 1.6488445900316388e-05, "loss": 0.3252, "step": 28250 }, { "epoch": 4.61173013346394, "grad_norm": 1.8564808368682861, "learning_rate": 1.6488204432031462e-05, "loss": 0.3281, "step": 28251 }, { "epoch": 4.6118933921064444, "grad_norm": 2.1623568534851074, "learning_rate": 1.6487962957212918e-05, "loss": 0.3478, "step": 28252 }, { "epoch": 4.612056650748949, "grad_norm": 2.195966958999634, "learning_rate": 1.6487721475860994e-05, "loss": 0.39, "step": 28253 }, { "epoch": 4.612219909391453, "grad_norm": 2.1995997428894043, "learning_rate": 1.648747998797594e-05, "loss": 0.3845, "step": 28254 }, { "epoch": 4.612383168033958, "grad_norm": 2.5050275325775146, "learning_rate": 1.648723849355799e-05, "loss": 0.388, "step": 28255 }, { "epoch": 4.612546426676462, "grad_norm": 2.4816153049468994, "learning_rate": 1.64869969926074e-05, "loss": 0.3317, "step": 28256 }, { "epoch": 4.612709685318967, "grad_norm": 2.5293610095977783, "learning_rate": 1.64867554851244e-05, "loss": 0.3946, "step": 28257 }, { "epoch": 4.612872943961471, "grad_norm": 1.7343202829360962, "learning_rate": 1.6486513971109245e-05, "loss": 0.3225, "step": 28258 }, { "epoch": 4.6130362026039755, "grad_norm": 1.9045441150665283, "learning_rate": 1.6486272450562166e-05, "loss": 0.3693, "step": 28259 }, { "epoch": 4.61319946124648, "grad_norm": 1.8743982315063477, "learning_rate": 1.6486030923483413e-05, "loss": 0.3423, "step": 28260 }, { "epoch": 4.613362719888984, "grad_norm": 2.111767053604126, "learning_rate": 1.648578938987323e-05, "loss": 0.4264, "step": 28261 }, { "epoch": 4.613525978531489, "grad_norm": 2.3815066814422607, "learning_rate": 1.648554784973186e-05, "loss": 0.3707, "step": 28262 }, { "epoch": 4.613689237173993, "grad_norm": 2.6340293884277344, "learning_rate": 1.648530630305954e-05, "loss": 0.4579, "step": 28263 }, { "epoch": 4.613852495816497, "grad_norm": 2.1483943462371826, "learning_rate": 1.6485064749856524e-05, "loss": 0.3924, "step": 28264 }, { "epoch": 4.614015754459002, "grad_norm": 2.0437424182891846, "learning_rate": 1.6484823190123048e-05, "loss": 0.3382, "step": 28265 }, { "epoch": 4.614179013101506, "grad_norm": 2.117027759552002, "learning_rate": 1.648458162385936e-05, "loss": 0.384, "step": 28266 }, { "epoch": 4.61434227174401, "grad_norm": 2.287642478942871, "learning_rate": 1.6484340051065695e-05, "loss": 0.4546, "step": 28267 }, { "epoch": 4.614505530386515, "grad_norm": 1.9999103546142578, "learning_rate": 1.64840984717423e-05, "loss": 0.3423, "step": 28268 }, { "epoch": 4.614668789029019, "grad_norm": 2.4719958305358887, "learning_rate": 1.648385688588942e-05, "loss": 0.423, "step": 28269 }, { "epoch": 4.614832047671523, "grad_norm": 2.185295581817627, "learning_rate": 1.6483615293507304e-05, "loss": 0.3632, "step": 28270 }, { "epoch": 4.614995306314028, "grad_norm": 2.268411874771118, "learning_rate": 1.6483373694596183e-05, "loss": 0.3873, "step": 28271 }, { "epoch": 4.615158564956532, "grad_norm": 1.919912576675415, "learning_rate": 1.6483132089156312e-05, "loss": 0.3576, "step": 28272 }, { "epoch": 4.615321823599037, "grad_norm": 1.6667494773864746, "learning_rate": 1.6482890477187924e-05, "loss": 0.329, "step": 28273 }, { "epoch": 4.615485082241541, "grad_norm": 1.986512541770935, "learning_rate": 1.6482648858691267e-05, "loss": 0.3128, "step": 28274 }, { "epoch": 4.615648340884046, "grad_norm": 2.2262678146362305, "learning_rate": 1.6482407233666587e-05, "loss": 0.3715, "step": 28275 }, { "epoch": 4.61581159952655, "grad_norm": 2.3534457683563232, "learning_rate": 1.6482165602114123e-05, "loss": 0.3944, "step": 28276 }, { "epoch": 4.6159748581690545, "grad_norm": 2.1438679695129395, "learning_rate": 1.6481923964034118e-05, "loss": 0.3721, "step": 28277 }, { "epoch": 4.616138116811559, "grad_norm": 2.2895374298095703, "learning_rate": 1.6481682319426823e-05, "loss": 0.3385, "step": 28278 }, { "epoch": 4.616301375454063, "grad_norm": 2.512346029281616, "learning_rate": 1.648144066829247e-05, "loss": 0.4632, "step": 28279 }, { "epoch": 4.616464634096568, "grad_norm": 2.415294647216797, "learning_rate": 1.6481199010631312e-05, "loss": 0.4683, "step": 28280 }, { "epoch": 4.616627892739072, "grad_norm": 2.7803666591644287, "learning_rate": 1.6480957346443583e-05, "loss": 0.4373, "step": 28281 }, { "epoch": 4.616791151381577, "grad_norm": 2.4318454265594482, "learning_rate": 1.6480715675729537e-05, "loss": 0.3547, "step": 28282 }, { "epoch": 4.61695441002408, "grad_norm": 2.17063570022583, "learning_rate": 1.648047399848941e-05, "loss": 0.36, "step": 28283 }, { "epoch": 4.617117668666585, "grad_norm": 2.2261290550231934, "learning_rate": 1.6480232314723446e-05, "loss": 0.4251, "step": 28284 }, { "epoch": 4.617280927309089, "grad_norm": 2.3157949447631836, "learning_rate": 1.6479990624431893e-05, "loss": 0.3593, "step": 28285 }, { "epoch": 4.617444185951594, "grad_norm": 1.7753994464874268, "learning_rate": 1.6479748927614992e-05, "loss": 0.3162, "step": 28286 }, { "epoch": 4.617607444594098, "grad_norm": 1.945940375328064, "learning_rate": 1.647950722427298e-05, "loss": 0.3807, "step": 28287 }, { "epoch": 4.617770703236602, "grad_norm": 1.9289371967315674, "learning_rate": 1.647926551440611e-05, "loss": 0.4157, "step": 28288 }, { "epoch": 4.617933961879107, "grad_norm": 1.8430057764053345, "learning_rate": 1.647902379801462e-05, "loss": 0.344, "step": 28289 }, { "epoch": 4.618097220521611, "grad_norm": 2.182478666305542, "learning_rate": 1.6478782075098757e-05, "loss": 0.4568, "step": 28290 }, { "epoch": 4.618260479164116, "grad_norm": 1.9257934093475342, "learning_rate": 1.6478540345658758e-05, "loss": 0.3301, "step": 28291 }, { "epoch": 4.61842373780662, "grad_norm": 2.13112735748291, "learning_rate": 1.647829860969487e-05, "loss": 0.4361, "step": 28292 }, { "epoch": 4.618586996449125, "grad_norm": 2.6634442806243896, "learning_rate": 1.647805686720734e-05, "loss": 0.409, "step": 28293 }, { "epoch": 4.618750255091629, "grad_norm": 1.9201207160949707, "learning_rate": 1.6477815118196412e-05, "loss": 0.3891, "step": 28294 }, { "epoch": 4.6189135137341335, "grad_norm": 1.8837660551071167, "learning_rate": 1.647757336266232e-05, "loss": 0.3163, "step": 28295 }, { "epoch": 4.619076772376638, "grad_norm": 2.8111884593963623, "learning_rate": 1.6477331600605317e-05, "loss": 0.422, "step": 28296 }, { "epoch": 4.619240031019142, "grad_norm": 2.234222650527954, "learning_rate": 1.6477089832025643e-05, "loss": 0.385, "step": 28297 }, { "epoch": 4.619403289661647, "grad_norm": 1.9758930206298828, "learning_rate": 1.6476848056923537e-05, "loss": 0.3874, "step": 28298 }, { "epoch": 4.619566548304151, "grad_norm": 2.1116080284118652, "learning_rate": 1.647660627529925e-05, "loss": 0.3598, "step": 28299 }, { "epoch": 4.619729806946655, "grad_norm": 2.6844899654388428, "learning_rate": 1.6476364487153024e-05, "loss": 0.4272, "step": 28300 }, { "epoch": 4.619893065589159, "grad_norm": 1.7891182899475098, "learning_rate": 1.6476122692485094e-05, "loss": 0.3509, "step": 28301 }, { "epoch": 4.620056324231664, "grad_norm": 2.417994260787964, "learning_rate": 1.6475880891295716e-05, "loss": 0.4087, "step": 28302 }, { "epoch": 4.620219582874168, "grad_norm": 2.0807669162750244, "learning_rate": 1.6475639083585125e-05, "loss": 0.4328, "step": 28303 }, { "epoch": 4.620382841516673, "grad_norm": 1.4544671773910522, "learning_rate": 1.647539726935357e-05, "loss": 0.3058, "step": 28304 }, { "epoch": 4.620546100159177, "grad_norm": 1.9440186023712158, "learning_rate": 1.647515544860129e-05, "loss": 0.4021, "step": 28305 }, { "epoch": 4.620709358801681, "grad_norm": 2.3392162322998047, "learning_rate": 1.647491362132853e-05, "loss": 0.3784, "step": 28306 }, { "epoch": 4.620872617444186, "grad_norm": 2.064026355743408, "learning_rate": 1.647467178753553e-05, "loss": 0.4005, "step": 28307 }, { "epoch": 4.62103587608669, "grad_norm": 2.2032036781311035, "learning_rate": 1.6474429947222544e-05, "loss": 0.4275, "step": 28308 }, { "epoch": 4.621199134729195, "grad_norm": 2.034759759902954, "learning_rate": 1.6474188100389806e-05, "loss": 0.3862, "step": 28309 }, { "epoch": 4.621362393371699, "grad_norm": 2.001859426498413, "learning_rate": 1.6473946247037563e-05, "loss": 0.358, "step": 28310 }, { "epoch": 4.621525652014204, "grad_norm": 2.038797616958618, "learning_rate": 1.6473704387166053e-05, "loss": 0.3265, "step": 28311 }, { "epoch": 4.621688910656708, "grad_norm": 2.265618085861206, "learning_rate": 1.647346252077553e-05, "loss": 0.4287, "step": 28312 }, { "epoch": 4.6218521692992125, "grad_norm": 2.2500956058502197, "learning_rate": 1.647322064786623e-05, "loss": 0.399, "step": 28313 }, { "epoch": 4.622015427941717, "grad_norm": 2.4279308319091797, "learning_rate": 1.64729787684384e-05, "loss": 0.4238, "step": 28314 }, { "epoch": 4.622178686584221, "grad_norm": 1.8623180389404297, "learning_rate": 1.647273688249228e-05, "loss": 0.3698, "step": 28315 }, { "epoch": 4.622341945226726, "grad_norm": 2.6692070960998535, "learning_rate": 1.6472494990028116e-05, "loss": 0.3715, "step": 28316 }, { "epoch": 4.622505203869229, "grad_norm": 2.3668973445892334, "learning_rate": 1.647225309104615e-05, "loss": 0.4168, "step": 28317 }, { "epoch": 4.622668462511735, "grad_norm": 1.9528049230575562, "learning_rate": 1.647201118554663e-05, "loss": 0.3802, "step": 28318 }, { "epoch": 4.622831721154238, "grad_norm": 2.203188419342041, "learning_rate": 1.6471769273529797e-05, "loss": 0.432, "step": 28319 }, { "epoch": 4.622994979796743, "grad_norm": 2.6327097415924072, "learning_rate": 1.647152735499589e-05, "loss": 0.4387, "step": 28320 }, { "epoch": 4.623158238439247, "grad_norm": 1.841214895248413, "learning_rate": 1.647128542994516e-05, "loss": 0.3466, "step": 28321 }, { "epoch": 4.6233214970817516, "grad_norm": 2.3399763107299805, "learning_rate": 1.6471043498377845e-05, "loss": 0.439, "step": 28322 }, { "epoch": 4.623484755724256, "grad_norm": 1.9139292240142822, "learning_rate": 1.6470801560294193e-05, "loss": 0.3524, "step": 28323 }, { "epoch": 4.62364801436676, "grad_norm": 1.9060715436935425, "learning_rate": 1.6470559615694445e-05, "loss": 0.3543, "step": 28324 }, { "epoch": 4.623811273009265, "grad_norm": 1.838168740272522, "learning_rate": 1.6470317664578846e-05, "loss": 0.3606, "step": 28325 }, { "epoch": 4.623974531651769, "grad_norm": 2.174490451812744, "learning_rate": 1.6470075706947635e-05, "loss": 0.4091, "step": 28326 }, { "epoch": 4.624137790294274, "grad_norm": 2.195568323135376, "learning_rate": 1.6469833742801064e-05, "loss": 0.3883, "step": 28327 }, { "epoch": 4.624301048936778, "grad_norm": 2.3761184215545654, "learning_rate": 1.646959177213937e-05, "loss": 0.4356, "step": 28328 }, { "epoch": 4.624464307579283, "grad_norm": 2.6155295372009277, "learning_rate": 1.64693497949628e-05, "loss": 0.3811, "step": 28329 }, { "epoch": 4.624627566221787, "grad_norm": 2.331696033477783, "learning_rate": 1.6469107811271593e-05, "loss": 0.4051, "step": 28330 }, { "epoch": 4.6247908248642915, "grad_norm": 2.2387030124664307, "learning_rate": 1.6468865821066e-05, "loss": 0.3832, "step": 28331 }, { "epoch": 4.624954083506796, "grad_norm": 2.347442865371704, "learning_rate": 1.646862382434626e-05, "loss": 0.3916, "step": 28332 }, { "epoch": 4.6251173421493, "grad_norm": 2.1791985034942627, "learning_rate": 1.6468381821112615e-05, "loss": 0.3599, "step": 28333 }, { "epoch": 4.625280600791804, "grad_norm": 2.6918702125549316, "learning_rate": 1.6468139811365314e-05, "loss": 0.5101, "step": 28334 }, { "epoch": 4.625443859434309, "grad_norm": 2.4340529441833496, "learning_rate": 1.64678977951046e-05, "loss": 0.4725, "step": 28335 }, { "epoch": 4.625607118076813, "grad_norm": 1.987754225730896, "learning_rate": 1.646765577233071e-05, "loss": 0.371, "step": 28336 }, { "epoch": 4.625770376719317, "grad_norm": 2.2608184814453125, "learning_rate": 1.6467413743043895e-05, "loss": 0.4049, "step": 28337 }, { "epoch": 4.625933635361822, "grad_norm": 2.5888748168945312, "learning_rate": 1.6467171707244394e-05, "loss": 0.446, "step": 28338 }, { "epoch": 4.626096894004326, "grad_norm": 2.269423484802246, "learning_rate": 1.6466929664932454e-05, "loss": 0.4142, "step": 28339 }, { "epoch": 4.6262601526468305, "grad_norm": 2.2589006423950195, "learning_rate": 1.6466687616108317e-05, "loss": 0.4281, "step": 28340 }, { "epoch": 4.626423411289335, "grad_norm": 2.040625810623169, "learning_rate": 1.646644556077223e-05, "loss": 0.4066, "step": 28341 }, { "epoch": 4.626586669931839, "grad_norm": 2.2756905555725098, "learning_rate": 1.646620349892443e-05, "loss": 0.4307, "step": 28342 }, { "epoch": 4.626749928574344, "grad_norm": 2.478428602218628, "learning_rate": 1.6465961430565168e-05, "loss": 0.4509, "step": 28343 }, { "epoch": 4.626913187216848, "grad_norm": 1.7600677013397217, "learning_rate": 1.646571935569468e-05, "loss": 0.3817, "step": 28344 }, { "epoch": 4.627076445859353, "grad_norm": 2.441340684890747, "learning_rate": 1.646547727431322e-05, "loss": 0.4226, "step": 28345 }, { "epoch": 4.627239704501857, "grad_norm": 2.5079383850097656, "learning_rate": 1.6465235186421024e-05, "loss": 0.4872, "step": 28346 }, { "epoch": 4.627402963144362, "grad_norm": 2.2188074588775635, "learning_rate": 1.6464993092018336e-05, "loss": 0.4518, "step": 28347 }, { "epoch": 4.627566221786866, "grad_norm": 2.237560987472534, "learning_rate": 1.6464750991105403e-05, "loss": 0.3805, "step": 28348 }, { "epoch": 4.6277294804293705, "grad_norm": 2.30985426902771, "learning_rate": 1.6464508883682467e-05, "loss": 0.4234, "step": 28349 }, { "epoch": 4.627892739071875, "grad_norm": 2.459796667098999, "learning_rate": 1.6464266769749774e-05, "loss": 0.4294, "step": 28350 }, { "epoch": 4.628055997714379, "grad_norm": 1.8045185804367065, "learning_rate": 1.6464024649307565e-05, "loss": 0.3905, "step": 28351 }, { "epoch": 4.628219256356884, "grad_norm": 2.1526777744293213, "learning_rate": 1.6463782522356085e-05, "loss": 0.4053, "step": 28352 }, { "epoch": 4.628382514999387, "grad_norm": 2.230240821838379, "learning_rate": 1.6463540388895576e-05, "loss": 0.423, "step": 28353 }, { "epoch": 4.628545773641892, "grad_norm": 2.219756841659546, "learning_rate": 1.6463298248926286e-05, "loss": 0.3869, "step": 28354 }, { "epoch": 4.628709032284396, "grad_norm": 2.3262526988983154, "learning_rate": 1.6463056102448457e-05, "loss": 0.3992, "step": 28355 }, { "epoch": 4.628872290926901, "grad_norm": 2.186394691467285, "learning_rate": 1.6462813949462327e-05, "loss": 0.375, "step": 28356 }, { "epoch": 4.629035549569405, "grad_norm": 2.0854501724243164, "learning_rate": 1.6462571789968153e-05, "loss": 0.3391, "step": 28357 }, { "epoch": 4.6291988082119095, "grad_norm": 1.9075863361358643, "learning_rate": 1.6462329623966166e-05, "loss": 0.3576, "step": 28358 }, { "epoch": 4.629362066854414, "grad_norm": 2.005561590194702, "learning_rate": 1.6462087451456615e-05, "loss": 0.3969, "step": 28359 }, { "epoch": 4.629525325496918, "grad_norm": 2.2611937522888184, "learning_rate": 1.6461845272439743e-05, "loss": 0.4114, "step": 28360 }, { "epoch": 4.629688584139423, "grad_norm": 2.2433090209960938, "learning_rate": 1.6461603086915796e-05, "loss": 0.4194, "step": 28361 }, { "epoch": 4.629851842781927, "grad_norm": 1.9639900922775269, "learning_rate": 1.646136089488502e-05, "loss": 0.3775, "step": 28362 }, { "epoch": 4.630015101424432, "grad_norm": 2.2527618408203125, "learning_rate": 1.6461118696347646e-05, "loss": 0.3879, "step": 28363 }, { "epoch": 4.630178360066936, "grad_norm": 2.2461464405059814, "learning_rate": 1.6460876491303936e-05, "loss": 0.3272, "step": 28364 }, { "epoch": 4.630341618709441, "grad_norm": 1.9418734312057495, "learning_rate": 1.646063427975412e-05, "loss": 0.3307, "step": 28365 }, { "epoch": 4.630504877351945, "grad_norm": 2.0623972415924072, "learning_rate": 1.646039206169845e-05, "loss": 0.4642, "step": 28366 }, { "epoch": 4.6306681359944495, "grad_norm": 2.2459890842437744, "learning_rate": 1.6460149837137166e-05, "loss": 0.4583, "step": 28367 }, { "epoch": 4.630831394636954, "grad_norm": 2.062110662460327, "learning_rate": 1.6459907606070513e-05, "loss": 0.3797, "step": 28368 }, { "epoch": 4.630994653279458, "grad_norm": 2.216921091079712, "learning_rate": 1.645966536849873e-05, "loss": 0.4507, "step": 28369 }, { "epoch": 4.631157911921962, "grad_norm": 1.8758227825164795, "learning_rate": 1.6459423124422072e-05, "loss": 0.3457, "step": 28370 }, { "epoch": 4.631321170564467, "grad_norm": 2.8030574321746826, "learning_rate": 1.6459180873840773e-05, "loss": 0.4067, "step": 28371 }, { "epoch": 4.631484429206971, "grad_norm": 1.5687038898468018, "learning_rate": 1.6458938616755084e-05, "loss": 0.3091, "step": 28372 }, { "epoch": 4.631647687849475, "grad_norm": 1.9199423789978027, "learning_rate": 1.6458696353165242e-05, "loss": 0.3554, "step": 28373 }, { "epoch": 4.63181094649198, "grad_norm": 1.907181978225708, "learning_rate": 1.6458454083071495e-05, "loss": 0.3563, "step": 28374 }, { "epoch": 4.631974205134484, "grad_norm": 1.971081018447876, "learning_rate": 1.645821180647409e-05, "loss": 0.3246, "step": 28375 }, { "epoch": 4.6321374637769885, "grad_norm": 2.2314140796661377, "learning_rate": 1.645796952337326e-05, "loss": 0.4338, "step": 28376 }, { "epoch": 4.632300722419493, "grad_norm": 2.278825044631958, "learning_rate": 1.6457727233769266e-05, "loss": 0.422, "step": 28377 }, { "epoch": 4.632463981061997, "grad_norm": 2.110097885131836, "learning_rate": 1.6457484937662334e-05, "loss": 0.3464, "step": 28378 }, { "epoch": 4.632627239704502, "grad_norm": 2.0738818645477295, "learning_rate": 1.6457242635052724e-05, "loss": 0.3878, "step": 28379 }, { "epoch": 4.632790498347006, "grad_norm": 2.230440378189087, "learning_rate": 1.6457000325940665e-05, "loss": 0.4465, "step": 28380 }, { "epoch": 4.632953756989511, "grad_norm": 1.988961935043335, "learning_rate": 1.6456758010326416e-05, "loss": 0.4291, "step": 28381 }, { "epoch": 4.633117015632015, "grad_norm": 1.8936303853988647, "learning_rate": 1.6456515688210206e-05, "loss": 0.3687, "step": 28382 }, { "epoch": 4.63328027427452, "grad_norm": 1.8152098655700684, "learning_rate": 1.6456273359592293e-05, "loss": 0.371, "step": 28383 }, { "epoch": 4.633443532917024, "grad_norm": 2.21490478515625, "learning_rate": 1.6456031024472907e-05, "loss": 0.3941, "step": 28384 }, { "epoch": 4.6336067915595285, "grad_norm": 2.0028159618377686, "learning_rate": 1.6455788682852303e-05, "loss": 0.3962, "step": 28385 }, { "epoch": 4.633770050202033, "grad_norm": 2.0491700172424316, "learning_rate": 1.6455546334730725e-05, "loss": 0.3711, "step": 28386 }, { "epoch": 4.6339333088445365, "grad_norm": 2.172457456588745, "learning_rate": 1.6455303980108413e-05, "loss": 0.3578, "step": 28387 }, { "epoch": 4.634096567487042, "grad_norm": 2.2332489490509033, "learning_rate": 1.6455061618985607e-05, "loss": 0.3563, "step": 28388 }, { "epoch": 4.634259826129545, "grad_norm": 1.7876619100570679, "learning_rate": 1.6454819251362556e-05, "loss": 0.3694, "step": 28389 }, { "epoch": 4.63442308477205, "grad_norm": 1.6257585287094116, "learning_rate": 1.645457687723951e-05, "loss": 0.3568, "step": 28390 }, { "epoch": 4.634586343414554, "grad_norm": 1.7396302223205566, "learning_rate": 1.64543344966167e-05, "loss": 0.3112, "step": 28391 }, { "epoch": 4.634749602057059, "grad_norm": 1.976952314376831, "learning_rate": 1.645409210949438e-05, "loss": 0.3618, "step": 28392 }, { "epoch": 4.634912860699563, "grad_norm": 2.0176174640655518, "learning_rate": 1.6453849715872788e-05, "loss": 0.4203, "step": 28393 }, { "epoch": 4.6350761193420675, "grad_norm": 2.1965041160583496, "learning_rate": 1.6453607315752175e-05, "loss": 0.4092, "step": 28394 }, { "epoch": 4.635239377984572, "grad_norm": 1.9347131252288818, "learning_rate": 1.645336490913278e-05, "loss": 0.3575, "step": 28395 }, { "epoch": 4.635402636627076, "grad_norm": 1.692861557006836, "learning_rate": 1.6453122496014847e-05, "loss": 0.366, "step": 28396 }, { "epoch": 4.635565895269581, "grad_norm": 1.8257791996002197, "learning_rate": 1.645288007639862e-05, "loss": 0.3212, "step": 28397 }, { "epoch": 4.635729153912085, "grad_norm": 2.036144495010376, "learning_rate": 1.6452637650284348e-05, "loss": 0.3898, "step": 28398 }, { "epoch": 4.63589241255459, "grad_norm": 2.2774887084960938, "learning_rate": 1.645239521767227e-05, "loss": 0.435, "step": 28399 }, { "epoch": 4.636055671197094, "grad_norm": 2.120788812637329, "learning_rate": 1.6452152778562633e-05, "loss": 0.3758, "step": 28400 }, { "epoch": 4.636218929839599, "grad_norm": 2.2780749797821045, "learning_rate": 1.645191033295568e-05, "loss": 0.4028, "step": 28401 }, { "epoch": 4.636382188482103, "grad_norm": 1.8734148740768433, "learning_rate": 1.6451667880851653e-05, "loss": 0.3174, "step": 28402 }, { "epoch": 4.6365454471246075, "grad_norm": 2.287899971008301, "learning_rate": 1.6451425422250798e-05, "loss": 0.4146, "step": 28403 }, { "epoch": 4.636708705767112, "grad_norm": 2.397660732269287, "learning_rate": 1.645118295715336e-05, "loss": 0.3932, "step": 28404 }, { "epoch": 4.636871964409616, "grad_norm": 2.513733148574829, "learning_rate": 1.6450940485559582e-05, "loss": 0.4441, "step": 28405 }, { "epoch": 4.63703522305212, "grad_norm": 1.876358151435852, "learning_rate": 1.645069800746971e-05, "loss": 0.3442, "step": 28406 }, { "epoch": 4.637198481694624, "grad_norm": 3.8612351417541504, "learning_rate": 1.6450455522883987e-05, "loss": 0.4071, "step": 28407 }, { "epoch": 4.637361740337129, "grad_norm": 2.3126587867736816, "learning_rate": 1.6450213031802654e-05, "loss": 0.4706, "step": 28408 }, { "epoch": 4.637524998979633, "grad_norm": 2.414717674255371, "learning_rate": 1.644997053422596e-05, "loss": 0.3788, "step": 28409 }, { "epoch": 4.637688257622138, "grad_norm": 2.608731746673584, "learning_rate": 1.644972803015415e-05, "loss": 0.3926, "step": 28410 }, { "epoch": 4.637851516264642, "grad_norm": 2.0900487899780273, "learning_rate": 1.6449485519587463e-05, "loss": 0.4082, "step": 28411 }, { "epoch": 4.6380147749071465, "grad_norm": 1.9403244256973267, "learning_rate": 1.6449243002526146e-05, "loss": 0.3457, "step": 28412 }, { "epoch": 4.638178033549651, "grad_norm": 1.9316858053207397, "learning_rate": 1.644900047897044e-05, "loss": 0.4336, "step": 28413 }, { "epoch": 4.638341292192155, "grad_norm": 1.6299676895141602, "learning_rate": 1.64487579489206e-05, "loss": 0.3333, "step": 28414 }, { "epoch": 4.63850455083466, "grad_norm": 2.1951329708099365, "learning_rate": 1.6448515412376857e-05, "loss": 0.3668, "step": 28415 }, { "epoch": 4.638667809477164, "grad_norm": 2.2775676250457764, "learning_rate": 1.6448272869339458e-05, "loss": 0.3856, "step": 28416 }, { "epoch": 4.638831068119669, "grad_norm": 2.3622095584869385, "learning_rate": 1.6448030319808658e-05, "loss": 0.3632, "step": 28417 }, { "epoch": 4.638994326762173, "grad_norm": 1.8683955669403076, "learning_rate": 1.6447787763784685e-05, "loss": 0.3756, "step": 28418 }, { "epoch": 4.639157585404678, "grad_norm": 2.2553915977478027, "learning_rate": 1.6447545201267792e-05, "loss": 0.4444, "step": 28419 }, { "epoch": 4.639320844047182, "grad_norm": 1.9047541618347168, "learning_rate": 1.644730263225823e-05, "loss": 0.3949, "step": 28420 }, { "epoch": 4.6394841026896865, "grad_norm": 2.3450818061828613, "learning_rate": 1.644706005675623e-05, "loss": 0.4516, "step": 28421 }, { "epoch": 4.639647361332191, "grad_norm": 2.4894914627075195, "learning_rate": 1.6446817474762045e-05, "loss": 0.4382, "step": 28422 }, { "epoch": 4.6398106199746945, "grad_norm": 1.9110339879989624, "learning_rate": 1.6446574886275914e-05, "loss": 0.3528, "step": 28423 }, { "epoch": 4.6399738786172, "grad_norm": 1.8404682874679565, "learning_rate": 1.6446332291298086e-05, "loss": 0.3419, "step": 28424 }, { "epoch": 4.640137137259703, "grad_norm": 2.0950686931610107, "learning_rate": 1.6446089689828804e-05, "loss": 0.3403, "step": 28425 }, { "epoch": 4.640300395902208, "grad_norm": 2.00569224357605, "learning_rate": 1.644584708186831e-05, "loss": 0.388, "step": 28426 }, { "epoch": 4.640463654544712, "grad_norm": 2.511606216430664, "learning_rate": 1.6445604467416848e-05, "loss": 0.4773, "step": 28427 }, { "epoch": 4.640626913187217, "grad_norm": 2.1157419681549072, "learning_rate": 1.6445361846474665e-05, "loss": 0.362, "step": 28428 }, { "epoch": 4.640790171829721, "grad_norm": 2.1037938594818115, "learning_rate": 1.6445119219042003e-05, "loss": 0.3437, "step": 28429 }, { "epoch": 4.6409534304722255, "grad_norm": 2.123595714569092, "learning_rate": 1.644487658511911e-05, "loss": 0.3734, "step": 28430 }, { "epoch": 4.64111668911473, "grad_norm": 2.2182164192199707, "learning_rate": 1.6444633944706226e-05, "loss": 0.3851, "step": 28431 }, { "epoch": 4.641279947757234, "grad_norm": 2.2265102863311768, "learning_rate": 1.64443912978036e-05, "loss": 0.3996, "step": 28432 }, { "epoch": 4.641443206399739, "grad_norm": 1.9814560413360596, "learning_rate": 1.6444148644411474e-05, "loss": 0.386, "step": 28433 }, { "epoch": 4.641606465042243, "grad_norm": 2.299288511276245, "learning_rate": 1.6443905984530092e-05, "loss": 0.4126, "step": 28434 }, { "epoch": 4.641769723684748, "grad_norm": 2.248087167739868, "learning_rate": 1.6443663318159695e-05, "loss": 0.4093, "step": 28435 }, { "epoch": 4.641932982327252, "grad_norm": 2.459293842315674, "learning_rate": 1.6443420645300532e-05, "loss": 0.402, "step": 28436 }, { "epoch": 4.642096240969757, "grad_norm": 2.668389320373535, "learning_rate": 1.6443177965952845e-05, "loss": 0.4465, "step": 28437 }, { "epoch": 4.642259499612261, "grad_norm": 2.4394969940185547, "learning_rate": 1.6442935280116883e-05, "loss": 0.3937, "step": 28438 }, { "epoch": 4.6424227582547655, "grad_norm": 2.1834168434143066, "learning_rate": 1.6442692587792882e-05, "loss": 0.4467, "step": 28439 }, { "epoch": 4.642586016897269, "grad_norm": 2.1453158855438232, "learning_rate": 1.6442449888981096e-05, "loss": 0.3347, "step": 28440 }, { "epoch": 4.642749275539774, "grad_norm": 2.4099509716033936, "learning_rate": 1.6442207183681763e-05, "loss": 0.4649, "step": 28441 }, { "epoch": 4.642912534182278, "grad_norm": 1.9114062786102295, "learning_rate": 1.644196447189513e-05, "loss": 0.3349, "step": 28442 }, { "epoch": 4.643075792824782, "grad_norm": 2.0394821166992188, "learning_rate": 1.644172175362144e-05, "loss": 0.3713, "step": 28443 }, { "epoch": 4.643239051467287, "grad_norm": 2.5772080421447754, "learning_rate": 1.6441479028860938e-05, "loss": 0.4518, "step": 28444 }, { "epoch": 4.643402310109791, "grad_norm": 1.9695035219192505, "learning_rate": 1.644123629761387e-05, "loss": 0.3349, "step": 28445 }, { "epoch": 4.643565568752296, "grad_norm": 1.944463849067688, "learning_rate": 1.6440993559880475e-05, "loss": 0.3815, "step": 28446 }, { "epoch": 4.6437288273948, "grad_norm": 1.834728479385376, "learning_rate": 1.6440750815661002e-05, "loss": 0.3451, "step": 28447 }, { "epoch": 4.6438920860373045, "grad_norm": 2.280139923095703, "learning_rate": 1.6440508064955695e-05, "loss": 0.4207, "step": 28448 }, { "epoch": 4.644055344679809, "grad_norm": 2.0018961429595947, "learning_rate": 1.6440265307764797e-05, "loss": 0.4215, "step": 28449 }, { "epoch": 4.644218603322313, "grad_norm": 1.7762665748596191, "learning_rate": 1.6440022544088553e-05, "loss": 0.3582, "step": 28450 }, { "epoch": 4.644381861964818, "grad_norm": 2.379319190979004, "learning_rate": 1.643977977392721e-05, "loss": 0.4696, "step": 28451 }, { "epoch": 4.644545120607322, "grad_norm": 2.138568162918091, "learning_rate": 1.6439536997281013e-05, "loss": 0.389, "step": 28452 }, { "epoch": 4.644708379249827, "grad_norm": 1.7893224954605103, "learning_rate": 1.6439294214150197e-05, "loss": 0.3503, "step": 28453 }, { "epoch": 4.644871637892331, "grad_norm": 2.2374765872955322, "learning_rate": 1.6439051424535023e-05, "loss": 0.3561, "step": 28454 }, { "epoch": 4.645034896534836, "grad_norm": 2.047347068786621, "learning_rate": 1.643880862843572e-05, "loss": 0.3364, "step": 28455 }, { "epoch": 4.64519815517734, "grad_norm": 2.2879133224487305, "learning_rate": 1.643856582585254e-05, "loss": 0.3798, "step": 28456 }, { "epoch": 4.6453614138198445, "grad_norm": 2.0849006175994873, "learning_rate": 1.6438323016785724e-05, "loss": 0.3868, "step": 28457 }, { "epoch": 4.645524672462349, "grad_norm": 1.9480361938476562, "learning_rate": 1.643808020123552e-05, "loss": 0.3655, "step": 28458 }, { "epoch": 4.6456879311048525, "grad_norm": 2.201508045196533, "learning_rate": 1.643783737920217e-05, "loss": 0.4215, "step": 28459 }, { "epoch": 4.645851189747357, "grad_norm": 2.2193703651428223, "learning_rate": 1.643759455068592e-05, "loss": 0.4186, "step": 28460 }, { "epoch": 4.646014448389861, "grad_norm": 2.016423463821411, "learning_rate": 1.6437351715687016e-05, "loss": 0.4085, "step": 28461 }, { "epoch": 4.646177707032366, "grad_norm": 2.095252513885498, "learning_rate": 1.6437108874205697e-05, "loss": 0.4143, "step": 28462 }, { "epoch": 4.64634096567487, "grad_norm": 2.392005205154419, "learning_rate": 1.643686602624221e-05, "loss": 0.4791, "step": 28463 }, { "epoch": 4.646504224317375, "grad_norm": 1.9145203828811646, "learning_rate": 1.6436623171796805e-05, "loss": 0.3651, "step": 28464 }, { "epoch": 4.646667482959879, "grad_norm": 1.9324891567230225, "learning_rate": 1.643638031086972e-05, "loss": 0.353, "step": 28465 }, { "epoch": 4.6468307416023835, "grad_norm": 2.1116838455200195, "learning_rate": 1.64361374434612e-05, "loss": 0.372, "step": 28466 }, { "epoch": 4.646994000244888, "grad_norm": 2.07033109664917, "learning_rate": 1.6435894569571496e-05, "loss": 0.3966, "step": 28467 }, { "epoch": 4.647157258887392, "grad_norm": 2.382164239883423, "learning_rate": 1.6435651689200845e-05, "loss": 0.4068, "step": 28468 }, { "epoch": 4.647320517529897, "grad_norm": 2.2753586769104004, "learning_rate": 1.6435408802349497e-05, "loss": 0.3921, "step": 28469 }, { "epoch": 4.647483776172401, "grad_norm": 2.377209186553955, "learning_rate": 1.643516590901769e-05, "loss": 0.4574, "step": 28470 }, { "epoch": 4.647647034814906, "grad_norm": 1.6872838735580444, "learning_rate": 1.6434923009205673e-05, "loss": 0.3221, "step": 28471 }, { "epoch": 4.64781029345741, "grad_norm": 2.3868937492370605, "learning_rate": 1.6434680102913692e-05, "loss": 0.3925, "step": 28472 }, { "epoch": 4.647973552099915, "grad_norm": 2.2158305644989014, "learning_rate": 1.643443719014199e-05, "loss": 0.4356, "step": 28473 }, { "epoch": 4.648136810742419, "grad_norm": 1.928929328918457, "learning_rate": 1.643419427089081e-05, "loss": 0.3507, "step": 28474 }, { "epoch": 4.6483000693849235, "grad_norm": 2.037613868713379, "learning_rate": 1.64339513451604e-05, "loss": 0.3758, "step": 28475 }, { "epoch": 4.648463328027427, "grad_norm": 2.309755325317383, "learning_rate": 1.6433708412951e-05, "loss": 0.394, "step": 28476 }, { "epoch": 4.648626586669932, "grad_norm": 2.408405065536499, "learning_rate": 1.643346547426286e-05, "loss": 0.4619, "step": 28477 }, { "epoch": 4.648789845312436, "grad_norm": 1.7272595167160034, "learning_rate": 1.643322252909622e-05, "loss": 0.3571, "step": 28478 }, { "epoch": 4.64895310395494, "grad_norm": 2.0841166973114014, "learning_rate": 1.6432979577451328e-05, "loss": 0.4135, "step": 28479 }, { "epoch": 4.649116362597445, "grad_norm": 2.064304828643799, "learning_rate": 1.6432736619328425e-05, "loss": 0.3662, "step": 28480 }, { "epoch": 4.649279621239949, "grad_norm": 2.154238224029541, "learning_rate": 1.643249365472776e-05, "loss": 0.3785, "step": 28481 }, { "epoch": 4.649442879882454, "grad_norm": 2.590959072113037, "learning_rate": 1.6432250683649577e-05, "loss": 0.4711, "step": 28482 }, { "epoch": 4.649606138524958, "grad_norm": 2.0862748622894287, "learning_rate": 1.643200770609412e-05, "loss": 0.3755, "step": 28483 }, { "epoch": 4.6497693971674625, "grad_norm": 2.1520586013793945, "learning_rate": 1.643176472206163e-05, "loss": 0.3736, "step": 28484 }, { "epoch": 4.649932655809967, "grad_norm": 1.9777958393096924, "learning_rate": 1.6431521731552356e-05, "loss": 0.3982, "step": 28485 }, { "epoch": 4.650095914452471, "grad_norm": 2.294084072113037, "learning_rate": 1.643127873456654e-05, "loss": 0.4072, "step": 28486 }, { "epoch": 4.650259173094976, "grad_norm": 1.5974922180175781, "learning_rate": 1.6431035731104428e-05, "loss": 0.2921, "step": 28487 }, { "epoch": 4.65042243173748, "grad_norm": 1.9394092559814453, "learning_rate": 1.6430792721166264e-05, "loss": 0.3778, "step": 28488 }, { "epoch": 4.650585690379985, "grad_norm": 2.124501943588257, "learning_rate": 1.6430549704752295e-05, "loss": 0.416, "step": 28489 }, { "epoch": 4.650748949022489, "grad_norm": 1.9807313680648804, "learning_rate": 1.6430306681862763e-05, "loss": 0.3779, "step": 28490 }, { "epoch": 4.650912207664994, "grad_norm": 1.9933831691741943, "learning_rate": 1.6430063652497915e-05, "loss": 0.3859, "step": 28491 }, { "epoch": 4.651075466307498, "grad_norm": 1.659305453300476, "learning_rate": 1.6429820616657998e-05, "loss": 0.3064, "step": 28492 }, { "epoch": 4.651238724950002, "grad_norm": 1.985182762145996, "learning_rate": 1.6429577574343246e-05, "loss": 0.3604, "step": 28493 }, { "epoch": 4.651401983592507, "grad_norm": 2.0423595905303955, "learning_rate": 1.6429334525553917e-05, "loss": 0.5073, "step": 28494 }, { "epoch": 4.6515652422350104, "grad_norm": 2.07499361038208, "learning_rate": 1.6429091470290247e-05, "loss": 0.3762, "step": 28495 }, { "epoch": 4.651728500877515, "grad_norm": 1.8519810438156128, "learning_rate": 1.6428848408552484e-05, "loss": 0.3373, "step": 28496 }, { "epoch": 4.651891759520019, "grad_norm": 2.12734317779541, "learning_rate": 1.642860534034087e-05, "loss": 0.3626, "step": 28497 }, { "epoch": 4.652055018162524, "grad_norm": 2.5545494556427, "learning_rate": 1.642836226565566e-05, "loss": 0.3637, "step": 28498 }, { "epoch": 4.652218276805028, "grad_norm": 2.384190320968628, "learning_rate": 1.6428119184497086e-05, "loss": 0.4137, "step": 28499 }, { "epoch": 4.652381535447533, "grad_norm": 1.9204061031341553, "learning_rate": 1.6427876096865394e-05, "loss": 0.3501, "step": 28500 }, { "epoch": 4.652544794090037, "grad_norm": 2.244488000869751, "learning_rate": 1.6427633002760838e-05, "loss": 0.3777, "step": 28501 }, { "epoch": 4.6527080527325415, "grad_norm": 2.2822535037994385, "learning_rate": 1.6427389902183653e-05, "loss": 0.4221, "step": 28502 }, { "epoch": 4.652871311375046, "grad_norm": 2.0819361209869385, "learning_rate": 1.6427146795134093e-05, "loss": 0.3737, "step": 28503 }, { "epoch": 4.65303457001755, "grad_norm": 1.943116545677185, "learning_rate": 1.6426903681612395e-05, "loss": 0.3856, "step": 28504 }, { "epoch": 4.653197828660055, "grad_norm": 2.270340919494629, "learning_rate": 1.6426660561618806e-05, "loss": 0.3956, "step": 28505 }, { "epoch": 4.653361087302559, "grad_norm": 2.331511974334717, "learning_rate": 1.6426417435153574e-05, "loss": 0.4255, "step": 28506 }, { "epoch": 4.653524345945064, "grad_norm": 2.5644969940185547, "learning_rate": 1.6426174302216935e-05, "loss": 0.4512, "step": 28507 }, { "epoch": 4.653687604587568, "grad_norm": 2.4204354286193848, "learning_rate": 1.6425931162809144e-05, "loss": 0.4048, "step": 28508 }, { "epoch": 4.653850863230073, "grad_norm": 1.951276421546936, "learning_rate": 1.6425688016930442e-05, "loss": 0.3765, "step": 28509 }, { "epoch": 4.654014121872577, "grad_norm": 1.8129829168319702, "learning_rate": 1.642544486458108e-05, "loss": 0.4069, "step": 28510 }, { "epoch": 4.6541773805150815, "grad_norm": 2.2848634719848633, "learning_rate": 1.6425201705761288e-05, "loss": 0.419, "step": 28511 }, { "epoch": 4.654340639157585, "grad_norm": 1.931840181350708, "learning_rate": 1.642495854047132e-05, "loss": 0.3036, "step": 28512 }, { "epoch": 4.654503897800089, "grad_norm": 2.5335347652435303, "learning_rate": 1.642471536871143e-05, "loss": 0.4026, "step": 28513 }, { "epoch": 4.654667156442594, "grad_norm": 2.3281166553497314, "learning_rate": 1.6424472190481845e-05, "loss": 0.4454, "step": 28514 }, { "epoch": 4.654830415085098, "grad_norm": 2.6595897674560547, "learning_rate": 1.642422900578282e-05, "loss": 0.4268, "step": 28515 }, { "epoch": 4.654993673727603, "grad_norm": 2.7172389030456543, "learning_rate": 1.6423985814614597e-05, "loss": 0.4189, "step": 28516 }, { "epoch": 4.655156932370107, "grad_norm": 1.8699967861175537, "learning_rate": 1.6423742616977425e-05, "loss": 0.3501, "step": 28517 }, { "epoch": 4.655320191012612, "grad_norm": 2.5237793922424316, "learning_rate": 1.6423499412871543e-05, "loss": 0.4057, "step": 28518 }, { "epoch": 4.655483449655116, "grad_norm": 1.973233938217163, "learning_rate": 1.64232562022972e-05, "loss": 0.3778, "step": 28519 }, { "epoch": 4.6556467082976205, "grad_norm": 2.3260657787323, "learning_rate": 1.6423012985254638e-05, "loss": 0.3967, "step": 28520 }, { "epoch": 4.655809966940125, "grad_norm": 2.3184659481048584, "learning_rate": 1.6422769761744105e-05, "loss": 0.4031, "step": 28521 }, { "epoch": 4.655973225582629, "grad_norm": 2.7194151878356934, "learning_rate": 1.6422526531765846e-05, "loss": 0.3886, "step": 28522 }, { "epoch": 4.656136484225134, "grad_norm": 1.6618014574050903, "learning_rate": 1.6422283295320104e-05, "loss": 0.3458, "step": 28523 }, { "epoch": 4.656299742867638, "grad_norm": 2.089754104614258, "learning_rate": 1.6422040052407124e-05, "loss": 0.3679, "step": 28524 }, { "epoch": 4.656463001510143, "grad_norm": 2.220832347869873, "learning_rate": 1.6421796803027148e-05, "loss": 0.3968, "step": 28525 }, { "epoch": 4.656626260152647, "grad_norm": 2.522970199584961, "learning_rate": 1.6421553547180428e-05, "loss": 0.3825, "step": 28526 }, { "epoch": 4.656789518795152, "grad_norm": 1.672874927520752, "learning_rate": 1.6421310284867204e-05, "loss": 0.3686, "step": 28527 }, { "epoch": 4.656952777437656, "grad_norm": 2.127774238586426, "learning_rate": 1.642106701608772e-05, "loss": 0.4086, "step": 28528 }, { "epoch": 4.65711603608016, "grad_norm": 2.76480770111084, "learning_rate": 1.642082374084223e-05, "loss": 0.4947, "step": 28529 }, { "epoch": 4.657279294722664, "grad_norm": 2.1241660118103027, "learning_rate": 1.6420580459130965e-05, "loss": 0.3794, "step": 28530 }, { "epoch": 4.657442553365168, "grad_norm": 2.2475297451019287, "learning_rate": 1.6420337170954184e-05, "loss": 0.425, "step": 28531 }, { "epoch": 4.657605812007673, "grad_norm": 2.2354917526245117, "learning_rate": 1.642009387631212e-05, "loss": 0.3749, "step": 28532 }, { "epoch": 4.657769070650177, "grad_norm": 1.8077131509780884, "learning_rate": 1.6419850575205026e-05, "loss": 0.3632, "step": 28533 }, { "epoch": 4.657932329292682, "grad_norm": 2.1770408153533936, "learning_rate": 1.641960726763314e-05, "loss": 0.4128, "step": 28534 }, { "epoch": 4.658095587935186, "grad_norm": 2.2410032749176025, "learning_rate": 1.6419363953596715e-05, "loss": 0.428, "step": 28535 }, { "epoch": 4.658258846577691, "grad_norm": 2.105649709701538, "learning_rate": 1.6419120633095994e-05, "loss": 0.4159, "step": 28536 }, { "epoch": 4.658422105220195, "grad_norm": 2.3484230041503906, "learning_rate": 1.641887730613122e-05, "loss": 0.4399, "step": 28537 }, { "epoch": 4.6585853638626995, "grad_norm": 2.221081495285034, "learning_rate": 1.6418633972702634e-05, "loss": 0.4259, "step": 28538 }, { "epoch": 4.658748622505204, "grad_norm": 1.9496878385543823, "learning_rate": 1.6418390632810486e-05, "loss": 0.3752, "step": 28539 }, { "epoch": 4.658911881147708, "grad_norm": 1.9218829870224, "learning_rate": 1.641814728645502e-05, "loss": 0.3907, "step": 28540 }, { "epoch": 4.659075139790213, "grad_norm": 2.455893039703369, "learning_rate": 1.6417903933636485e-05, "loss": 0.4292, "step": 28541 }, { "epoch": 4.659238398432717, "grad_norm": 1.6968809366226196, "learning_rate": 1.6417660574355116e-05, "loss": 0.3194, "step": 28542 }, { "epoch": 4.659401657075222, "grad_norm": 2.0744833946228027, "learning_rate": 1.6417417208611174e-05, "loss": 0.3858, "step": 28543 }, { "epoch": 4.659564915717726, "grad_norm": 2.2316465377807617, "learning_rate": 1.6417173836404888e-05, "loss": 0.3995, "step": 28544 }, { "epoch": 4.659728174360231, "grad_norm": 2.089707851409912, "learning_rate": 1.6416930457736507e-05, "loss": 0.3745, "step": 28545 }, { "epoch": 4.659891433002734, "grad_norm": 1.9291939735412598, "learning_rate": 1.6416687072606285e-05, "loss": 0.353, "step": 28546 }, { "epoch": 4.6600546916452394, "grad_norm": 3.195741891860962, "learning_rate": 1.6416443681014458e-05, "loss": 0.4564, "step": 28547 }, { "epoch": 4.660217950287743, "grad_norm": 2.5958518981933594, "learning_rate": 1.6416200282961274e-05, "loss": 0.4041, "step": 28548 }, { "epoch": 4.660381208930247, "grad_norm": 2.2977449893951416, "learning_rate": 1.641595687844698e-05, "loss": 0.4674, "step": 28549 }, { "epoch": 4.660544467572752, "grad_norm": 2.176964044570923, "learning_rate": 1.6415713467471817e-05, "loss": 0.4217, "step": 28550 }, { "epoch": 4.660707726215256, "grad_norm": 2.426849603652954, "learning_rate": 1.6415470050036032e-05, "loss": 0.4144, "step": 28551 }, { "epoch": 4.660870984857761, "grad_norm": 2.2998061180114746, "learning_rate": 1.641522662613987e-05, "loss": 0.4523, "step": 28552 }, { "epoch": 4.661034243500265, "grad_norm": 2.1715807914733887, "learning_rate": 1.641498319578358e-05, "loss": 0.3598, "step": 28553 }, { "epoch": 4.66119750214277, "grad_norm": 2.105309009552002, "learning_rate": 1.64147397589674e-05, "loss": 0.3919, "step": 28554 }, { "epoch": 4.661360760785274, "grad_norm": 2.309572458267212, "learning_rate": 1.641449631569158e-05, "loss": 0.4194, "step": 28555 }, { "epoch": 4.6615240194277785, "grad_norm": 2.199190616607666, "learning_rate": 1.6414252865956366e-05, "loss": 0.4193, "step": 28556 }, { "epoch": 4.661687278070283, "grad_norm": 1.846699833869934, "learning_rate": 1.6414009409761997e-05, "loss": 0.3909, "step": 28557 }, { "epoch": 4.661850536712787, "grad_norm": 1.853484034538269, "learning_rate": 1.6413765947108724e-05, "loss": 0.3799, "step": 28558 }, { "epoch": 4.662013795355292, "grad_norm": 2.0875625610351562, "learning_rate": 1.641352247799679e-05, "loss": 0.371, "step": 28559 }, { "epoch": 4.662177053997796, "grad_norm": 2.418536424636841, "learning_rate": 1.641327900242644e-05, "loss": 0.3917, "step": 28560 }, { "epoch": 4.662340312640301, "grad_norm": 1.8005512952804565, "learning_rate": 1.6413035520397923e-05, "loss": 0.3689, "step": 28561 }, { "epoch": 4.662503571282805, "grad_norm": 2.068455457687378, "learning_rate": 1.6412792031911476e-05, "loss": 0.3879, "step": 28562 }, { "epoch": 4.662666829925309, "grad_norm": 2.0990490913391113, "learning_rate": 1.6412548536967356e-05, "loss": 0.3383, "step": 28563 }, { "epoch": 4.662830088567814, "grad_norm": 1.8716436624526978, "learning_rate": 1.6412305035565798e-05, "loss": 0.3879, "step": 28564 }, { "epoch": 4.6629933472103176, "grad_norm": 2.1570255756378174, "learning_rate": 1.6412061527707047e-05, "loss": 0.3539, "step": 28565 }, { "epoch": 4.663156605852822, "grad_norm": 2.6918811798095703, "learning_rate": 1.6411818013391357e-05, "loss": 0.4784, "step": 28566 }, { "epoch": 4.663319864495326, "grad_norm": 2.1085894107818604, "learning_rate": 1.6411574492618966e-05, "loss": 0.391, "step": 28567 }, { "epoch": 4.663483123137831, "grad_norm": 2.1680002212524414, "learning_rate": 1.641133096539012e-05, "loss": 0.4394, "step": 28568 }, { "epoch": 4.663646381780335, "grad_norm": 2.131624698638916, "learning_rate": 1.6411087431705064e-05, "loss": 0.3982, "step": 28569 }, { "epoch": 4.66380964042284, "grad_norm": 2.169774055480957, "learning_rate": 1.6410843891564047e-05, "loss": 0.3611, "step": 28570 }, { "epoch": 4.663972899065344, "grad_norm": 1.6980819702148438, "learning_rate": 1.6410600344967312e-05, "loss": 0.3083, "step": 28571 }, { "epoch": 4.664136157707849, "grad_norm": 2.414783239364624, "learning_rate": 1.64103567919151e-05, "loss": 0.4993, "step": 28572 }, { "epoch": 4.664299416350353, "grad_norm": 2.334036111831665, "learning_rate": 1.641011323240767e-05, "loss": 0.4575, "step": 28573 }, { "epoch": 4.6644626749928575, "grad_norm": 1.8811001777648926, "learning_rate": 1.640986966644525e-05, "loss": 0.3924, "step": 28574 }, { "epoch": 4.664625933635362, "grad_norm": 1.9071667194366455, "learning_rate": 1.6409626094028098e-05, "loss": 0.3207, "step": 28575 }, { "epoch": 4.664789192277866, "grad_norm": 2.429517984390259, "learning_rate": 1.640938251515645e-05, "loss": 0.4407, "step": 28576 }, { "epoch": 4.664952450920371, "grad_norm": 2.250641107559204, "learning_rate": 1.6409138929830556e-05, "loss": 0.372, "step": 28577 }, { "epoch": 4.665115709562875, "grad_norm": 1.9664571285247803, "learning_rate": 1.640889533805066e-05, "loss": 0.391, "step": 28578 }, { "epoch": 4.66527896820538, "grad_norm": 1.8402682542800903, "learning_rate": 1.640865173981701e-05, "loss": 0.3864, "step": 28579 }, { "epoch": 4.665442226847884, "grad_norm": 2.031315803527832, "learning_rate": 1.640840813512985e-05, "loss": 0.378, "step": 28580 }, { "epoch": 4.665605485490389, "grad_norm": 1.988773226737976, "learning_rate": 1.640816452398942e-05, "loss": 0.3943, "step": 28581 }, { "epoch": 4.665768744132892, "grad_norm": 1.7580480575561523, "learning_rate": 1.6407920906395975e-05, "loss": 0.3514, "step": 28582 }, { "epoch": 4.6659320027753965, "grad_norm": 2.1017093658447266, "learning_rate": 1.6407677282349755e-05, "loss": 0.3788, "step": 28583 }, { "epoch": 4.666095261417901, "grad_norm": 2.026638984680176, "learning_rate": 1.6407433651851005e-05, "loss": 0.3858, "step": 28584 }, { "epoch": 4.666258520060405, "grad_norm": 2.206228017807007, "learning_rate": 1.640719001489997e-05, "loss": 0.4193, "step": 28585 }, { "epoch": 4.66642177870291, "grad_norm": 2.2784321308135986, "learning_rate": 1.6406946371496897e-05, "loss": 0.3994, "step": 28586 }, { "epoch": 4.666585037345414, "grad_norm": 2.4952967166900635, "learning_rate": 1.640670272164203e-05, "loss": 0.4332, "step": 28587 }, { "epoch": 4.666748295987919, "grad_norm": 2.3136117458343506, "learning_rate": 1.6406459065335616e-05, "loss": 0.3936, "step": 28588 }, { "epoch": 4.666911554630423, "grad_norm": 2.4129364490509033, "learning_rate": 1.64062154025779e-05, "loss": 0.3974, "step": 28589 }, { "epoch": 4.667074813272928, "grad_norm": 2.092874765396118, "learning_rate": 1.6405971733369128e-05, "loss": 0.4048, "step": 28590 }, { "epoch": 4.667238071915432, "grad_norm": 2.336116313934326, "learning_rate": 1.640572805770954e-05, "loss": 0.432, "step": 28591 }, { "epoch": 4.6674013305579365, "grad_norm": 2.270686626434326, "learning_rate": 1.640548437559939e-05, "loss": 0.3727, "step": 28592 }, { "epoch": 4.667564589200441, "grad_norm": 2.4650301933288574, "learning_rate": 1.6405240687038916e-05, "loss": 0.4157, "step": 28593 }, { "epoch": 4.667727847842945, "grad_norm": 2.820289373397827, "learning_rate": 1.6404996992028364e-05, "loss": 0.4427, "step": 28594 }, { "epoch": 4.66789110648545, "grad_norm": 2.229430913925171, "learning_rate": 1.640475329056799e-05, "loss": 0.3284, "step": 28595 }, { "epoch": 4.668054365127954, "grad_norm": 1.9549357891082764, "learning_rate": 1.6404509582658023e-05, "loss": 0.3699, "step": 28596 }, { "epoch": 4.668217623770459, "grad_norm": 1.848164439201355, "learning_rate": 1.6404265868298718e-05, "loss": 0.2908, "step": 28597 }, { "epoch": 4.668380882412963, "grad_norm": 1.854788899421692, "learning_rate": 1.640402214749032e-05, "loss": 0.3595, "step": 28598 }, { "epoch": 4.668544141055467, "grad_norm": 2.4602341651916504, "learning_rate": 1.6403778420233073e-05, "loss": 0.3959, "step": 28599 }, { "epoch": 4.668707399697972, "grad_norm": 1.934475064277649, "learning_rate": 1.6403534686527223e-05, "loss": 0.3919, "step": 28600 }, { "epoch": 4.6688706583404755, "grad_norm": 2.7812695503234863, "learning_rate": 1.6403290946373017e-05, "loss": 0.42, "step": 28601 }, { "epoch": 4.66903391698298, "grad_norm": 2.0687315464019775, "learning_rate": 1.6403047199770702e-05, "loss": 0.3728, "step": 28602 }, { "epoch": 4.669197175625484, "grad_norm": 2.543565273284912, "learning_rate": 1.640280344672051e-05, "loss": 0.3449, "step": 28603 }, { "epoch": 4.669360434267989, "grad_norm": 2.1109631061553955, "learning_rate": 1.6402559687222705e-05, "loss": 0.4138, "step": 28604 }, { "epoch": 4.669523692910493, "grad_norm": 2.330456495285034, "learning_rate": 1.640231592127752e-05, "loss": 0.4348, "step": 28605 }, { "epoch": 4.669686951552998, "grad_norm": 2.170708656311035, "learning_rate": 1.6402072148885207e-05, "loss": 0.3927, "step": 28606 }, { "epoch": 4.669850210195502, "grad_norm": 2.3798739910125732, "learning_rate": 1.6401828370046007e-05, "loss": 0.4115, "step": 28607 }, { "epoch": 4.670013468838007, "grad_norm": 2.118769407272339, "learning_rate": 1.6401584584760168e-05, "loss": 0.3531, "step": 28608 }, { "epoch": 4.670176727480511, "grad_norm": 1.7823143005371094, "learning_rate": 1.6401340793027937e-05, "loss": 0.3308, "step": 28609 }, { "epoch": 4.6703399861230155, "grad_norm": 1.989909291267395, "learning_rate": 1.6401096994849558e-05, "loss": 0.3868, "step": 28610 }, { "epoch": 4.67050324476552, "grad_norm": 2.387284755706787, "learning_rate": 1.6400853190225272e-05, "loss": 0.4983, "step": 28611 }, { "epoch": 4.670666503408024, "grad_norm": 2.0483148097991943, "learning_rate": 1.640060937915533e-05, "loss": 0.3589, "step": 28612 }, { "epoch": 4.670829762050529, "grad_norm": 2.451366424560547, "learning_rate": 1.640036556163998e-05, "loss": 0.4072, "step": 28613 }, { "epoch": 4.670993020693033, "grad_norm": 1.8999677896499634, "learning_rate": 1.640012173767946e-05, "loss": 0.3408, "step": 28614 }, { "epoch": 4.671156279335538, "grad_norm": 2.345567226409912, "learning_rate": 1.6399877907274018e-05, "loss": 0.4078, "step": 28615 }, { "epoch": 4.671319537978041, "grad_norm": 2.0133790969848633, "learning_rate": 1.6399634070423903e-05, "loss": 0.3359, "step": 28616 }, { "epoch": 4.6714827966205466, "grad_norm": 2.4365780353546143, "learning_rate": 1.6399390227129357e-05, "loss": 0.3865, "step": 28617 }, { "epoch": 4.67164605526305, "grad_norm": 1.8564722537994385, "learning_rate": 1.6399146377390626e-05, "loss": 0.4291, "step": 28618 }, { "epoch": 4.6718093139055545, "grad_norm": 2.0761406421661377, "learning_rate": 1.6398902521207957e-05, "loss": 0.4032, "step": 28619 }, { "epoch": 4.671972572548059, "grad_norm": 2.0418972969055176, "learning_rate": 1.6398658658581596e-05, "loss": 0.3376, "step": 28620 }, { "epoch": 4.672135831190563, "grad_norm": 1.8787649869918823, "learning_rate": 1.6398414789511784e-05, "loss": 0.3885, "step": 28621 }, { "epoch": 4.672299089833068, "grad_norm": 1.7795219421386719, "learning_rate": 1.6398170913998775e-05, "loss": 0.3525, "step": 28622 }, { "epoch": 4.672462348475572, "grad_norm": 2.2758336067199707, "learning_rate": 1.6397927032042807e-05, "loss": 0.4199, "step": 28623 }, { "epoch": 4.672625607118077, "grad_norm": 2.059455394744873, "learning_rate": 1.639768314364413e-05, "loss": 0.3833, "step": 28624 }, { "epoch": 4.672788865760581, "grad_norm": 1.9692060947418213, "learning_rate": 1.6397439248802986e-05, "loss": 0.4228, "step": 28625 }, { "epoch": 4.672952124403086, "grad_norm": 2.8504693508148193, "learning_rate": 1.639719534751962e-05, "loss": 0.5645, "step": 28626 }, { "epoch": 4.67311538304559, "grad_norm": 2.0793633460998535, "learning_rate": 1.6396951439794284e-05, "loss": 0.3529, "step": 28627 }, { "epoch": 4.6732786416880945, "grad_norm": 1.7869229316711426, "learning_rate": 1.6396707525627218e-05, "loss": 0.3541, "step": 28628 }, { "epoch": 4.673441900330599, "grad_norm": 2.6377756595611572, "learning_rate": 1.639646360501867e-05, "loss": 0.4222, "step": 28629 }, { "epoch": 4.673605158973103, "grad_norm": 2.361788511276245, "learning_rate": 1.639621967796888e-05, "loss": 0.4219, "step": 28630 }, { "epoch": 4.673768417615608, "grad_norm": 2.8132565021514893, "learning_rate": 1.6395975744478106e-05, "loss": 0.4493, "step": 28631 }, { "epoch": 4.673931676258112, "grad_norm": 2.0287868976593018, "learning_rate": 1.6395731804546582e-05, "loss": 0.3442, "step": 28632 }, { "epoch": 4.674094934900617, "grad_norm": 2.2658936977386475, "learning_rate": 1.6395487858174558e-05, "loss": 0.4306, "step": 28633 }, { "epoch": 4.674258193543121, "grad_norm": 2.1790308952331543, "learning_rate": 1.639524390536228e-05, "loss": 0.4145, "step": 28634 }, { "epoch": 4.674421452185625, "grad_norm": 2.51371169090271, "learning_rate": 1.6394999946109993e-05, "loss": 0.3867, "step": 28635 }, { "epoch": 4.674584710828129, "grad_norm": 2.145270347595215, "learning_rate": 1.6394755980417944e-05, "loss": 0.4042, "step": 28636 }, { "epoch": 4.6747479694706335, "grad_norm": 1.950244665145874, "learning_rate": 1.6394512008286377e-05, "loss": 0.3808, "step": 28637 }, { "epoch": 4.674911228113138, "grad_norm": 2.3539583683013916, "learning_rate": 1.6394268029715538e-05, "loss": 0.3835, "step": 28638 }, { "epoch": 4.675074486755642, "grad_norm": 2.1203887462615967, "learning_rate": 1.639402404470567e-05, "loss": 0.419, "step": 28639 }, { "epoch": 4.675237745398147, "grad_norm": 1.8706096410751343, "learning_rate": 1.639378005325702e-05, "loss": 0.3559, "step": 28640 }, { "epoch": 4.675401004040651, "grad_norm": 2.2982993125915527, "learning_rate": 1.6393536055369842e-05, "loss": 0.3951, "step": 28641 }, { "epoch": 4.675564262683156, "grad_norm": 2.445674180984497, "learning_rate": 1.6393292051044373e-05, "loss": 0.4701, "step": 28642 }, { "epoch": 4.67572752132566, "grad_norm": 2.2200138568878174, "learning_rate": 1.6393048040280857e-05, "loss": 0.3621, "step": 28643 }, { "epoch": 4.675890779968165, "grad_norm": 2.1708006858825684, "learning_rate": 1.6392804023079548e-05, "loss": 0.4016, "step": 28644 }, { "epoch": 4.676054038610669, "grad_norm": 2.6431541442871094, "learning_rate": 1.6392559999440684e-05, "loss": 0.4286, "step": 28645 }, { "epoch": 4.6762172972531735, "grad_norm": 2.0770153999328613, "learning_rate": 1.6392315969364512e-05, "loss": 0.3793, "step": 28646 }, { "epoch": 4.676380555895678, "grad_norm": 2.8407294750213623, "learning_rate": 1.6392071932851282e-05, "loss": 0.4246, "step": 28647 }, { "epoch": 4.676543814538182, "grad_norm": 2.0894932746887207, "learning_rate": 1.639182788990124e-05, "loss": 0.3824, "step": 28648 }, { "epoch": 4.676707073180687, "grad_norm": 2.428302526473999, "learning_rate": 1.6391583840514623e-05, "loss": 0.4399, "step": 28649 }, { "epoch": 4.676870331823191, "grad_norm": 1.6439815759658813, "learning_rate": 1.6391339784691685e-05, "loss": 0.3314, "step": 28650 }, { "epoch": 4.677033590465696, "grad_norm": 1.7985156774520874, "learning_rate": 1.6391095722432672e-05, "loss": 0.3598, "step": 28651 }, { "epoch": 4.677196849108199, "grad_norm": 2.470219850540161, "learning_rate": 1.6390851653737828e-05, "loss": 0.3964, "step": 28652 }, { "epoch": 4.6773601077507045, "grad_norm": 2.253272771835327, "learning_rate": 1.6390607578607394e-05, "loss": 0.385, "step": 28653 }, { "epoch": 4.677523366393208, "grad_norm": 2.425076723098755, "learning_rate": 1.639036349704162e-05, "loss": 0.4157, "step": 28654 }, { "epoch": 4.6776866250357125, "grad_norm": 2.1170055866241455, "learning_rate": 1.6390119409040758e-05, "loss": 0.3969, "step": 28655 }, { "epoch": 4.677849883678217, "grad_norm": 2.26257061958313, "learning_rate": 1.638987531460504e-05, "loss": 0.4378, "step": 28656 }, { "epoch": 4.678013142320721, "grad_norm": 2.0426108837127686, "learning_rate": 1.638963121373472e-05, "loss": 0.3438, "step": 28657 }, { "epoch": 4.678176400963226, "grad_norm": 2.0535337924957275, "learning_rate": 1.6389387106430046e-05, "loss": 0.4389, "step": 28658 }, { "epoch": 4.67833965960573, "grad_norm": 2.0949716567993164, "learning_rate": 1.6389142992691264e-05, "loss": 0.4016, "step": 28659 }, { "epoch": 4.678502918248235, "grad_norm": 2.433583974838257, "learning_rate": 1.638889887251861e-05, "loss": 0.4307, "step": 28660 }, { "epoch": 4.678666176890739, "grad_norm": 2.1473824977874756, "learning_rate": 1.638865474591234e-05, "loss": 0.3635, "step": 28661 }, { "epoch": 4.678829435533244, "grad_norm": 2.0556588172912598, "learning_rate": 1.6388410612872696e-05, "loss": 0.3816, "step": 28662 }, { "epoch": 4.678992694175748, "grad_norm": 2.1100685596466064, "learning_rate": 1.638816647339992e-05, "loss": 0.4052, "step": 28663 }, { "epoch": 4.6791559528182525, "grad_norm": 2.2983460426330566, "learning_rate": 1.638792232749427e-05, "loss": 0.4101, "step": 28664 }, { "epoch": 4.679319211460757, "grad_norm": 1.912756323814392, "learning_rate": 1.638767817515598e-05, "loss": 0.366, "step": 28665 }, { "epoch": 4.679482470103261, "grad_norm": 1.8561142683029175, "learning_rate": 1.6387434016385298e-05, "loss": 0.3821, "step": 28666 }, { "epoch": 4.679645728745766, "grad_norm": 1.4853293895721436, "learning_rate": 1.6387189851182475e-05, "loss": 0.3374, "step": 28667 }, { "epoch": 4.67980898738827, "grad_norm": 1.716313123703003, "learning_rate": 1.638694567954775e-05, "loss": 0.3268, "step": 28668 }, { "epoch": 4.679972246030774, "grad_norm": 2.467520236968994, "learning_rate": 1.6386701501481373e-05, "loss": 0.4487, "step": 28669 }, { "epoch": 4.680135504673279, "grad_norm": 2.05816650390625, "learning_rate": 1.638645731698359e-05, "loss": 0.4223, "step": 28670 }, { "epoch": 4.680298763315783, "grad_norm": 2.1716392040252686, "learning_rate": 1.638621312605465e-05, "loss": 0.3511, "step": 28671 }, { "epoch": 4.680462021958287, "grad_norm": 2.370850086212158, "learning_rate": 1.6385968928694792e-05, "loss": 0.4458, "step": 28672 }, { "epoch": 4.6806252806007915, "grad_norm": 1.9072530269622803, "learning_rate": 1.6385724724904264e-05, "loss": 0.3345, "step": 28673 }, { "epoch": 4.680788539243296, "grad_norm": 2.253849506378174, "learning_rate": 1.638548051468331e-05, "loss": 0.4227, "step": 28674 }, { "epoch": 4.6809517978858, "grad_norm": 2.714916229248047, "learning_rate": 1.6385236298032183e-05, "loss": 0.4404, "step": 28675 }, { "epoch": 4.681115056528305, "grad_norm": 2.362095832824707, "learning_rate": 1.6384992074951124e-05, "loss": 0.3758, "step": 28676 }, { "epoch": 4.681278315170809, "grad_norm": 2.3243749141693115, "learning_rate": 1.638474784544038e-05, "loss": 0.4557, "step": 28677 }, { "epoch": 4.681441573813314, "grad_norm": 2.0827605724334717, "learning_rate": 1.6384503609500196e-05, "loss": 0.3619, "step": 28678 }, { "epoch": 4.681604832455818, "grad_norm": 2.0947840213775635, "learning_rate": 1.6384259367130818e-05, "loss": 0.3879, "step": 28679 }, { "epoch": 4.681768091098323, "grad_norm": 2.2868170738220215, "learning_rate": 1.6384015118332494e-05, "loss": 0.4222, "step": 28680 }, { "epoch": 4.681931349740827, "grad_norm": 2.3740975856781006, "learning_rate": 1.638377086310547e-05, "loss": 0.366, "step": 28681 }, { "epoch": 4.6820946083833315, "grad_norm": 2.3621013164520264, "learning_rate": 1.638352660144999e-05, "loss": 0.3945, "step": 28682 }, { "epoch": 4.682257867025836, "grad_norm": 1.9299166202545166, "learning_rate": 1.6383282333366297e-05, "loss": 0.3889, "step": 28683 }, { "epoch": 4.68242112566834, "grad_norm": 1.6637961864471436, "learning_rate": 1.638303805885464e-05, "loss": 0.3376, "step": 28684 }, { "epoch": 4.682584384310845, "grad_norm": 2.532325506210327, "learning_rate": 1.6382793777915268e-05, "loss": 0.4542, "step": 28685 }, { "epoch": 4.682747642953349, "grad_norm": 2.281404733657837, "learning_rate": 1.6382549490548423e-05, "loss": 0.4034, "step": 28686 }, { "epoch": 4.682910901595854, "grad_norm": 2.191602945327759, "learning_rate": 1.6382305196754357e-05, "loss": 0.3356, "step": 28687 }, { "epoch": 4.683074160238357, "grad_norm": 2.8338935375213623, "learning_rate": 1.6382060896533306e-05, "loss": 0.6449, "step": 28688 }, { "epoch": 4.683237418880862, "grad_norm": 2.174104690551758, "learning_rate": 1.638181658988552e-05, "loss": 0.3673, "step": 28689 }, { "epoch": 4.683400677523366, "grad_norm": 2.142505168914795, "learning_rate": 1.6381572276811254e-05, "loss": 0.3434, "step": 28690 }, { "epoch": 4.6835639361658705, "grad_norm": 2.27860689163208, "learning_rate": 1.638132795731074e-05, "loss": 0.3727, "step": 28691 }, { "epoch": 4.683727194808375, "grad_norm": 2.177827835083008, "learning_rate": 1.6381083631384232e-05, "loss": 0.3514, "step": 28692 }, { "epoch": 4.683890453450879, "grad_norm": 2.4961295127868652, "learning_rate": 1.6380839299031976e-05, "loss": 0.3817, "step": 28693 }, { "epoch": 4.684053712093384, "grad_norm": 2.238605499267578, "learning_rate": 1.6380594960254216e-05, "loss": 0.4158, "step": 28694 }, { "epoch": 4.684216970735888, "grad_norm": 2.0703086853027344, "learning_rate": 1.6380350615051196e-05, "loss": 0.3503, "step": 28695 }, { "epoch": 4.684380229378393, "grad_norm": 2.4751641750335693, "learning_rate": 1.6380106263423166e-05, "loss": 0.4196, "step": 28696 }, { "epoch": 4.684543488020897, "grad_norm": 2.0219035148620605, "learning_rate": 1.6379861905370372e-05, "loss": 0.342, "step": 28697 }, { "epoch": 4.684706746663402, "grad_norm": 2.202890157699585, "learning_rate": 1.6379617540893056e-05, "loss": 0.3706, "step": 28698 }, { "epoch": 4.684870005305906, "grad_norm": 2.2314586639404297, "learning_rate": 1.637937316999147e-05, "loss": 0.3893, "step": 28699 }, { "epoch": 4.6850332639484105, "grad_norm": 1.8561487197875977, "learning_rate": 1.6379128792665853e-05, "loss": 0.37, "step": 28700 }, { "epoch": 4.685196522590915, "grad_norm": 1.9045727252960205, "learning_rate": 1.637888440891646e-05, "loss": 0.3549, "step": 28701 }, { "epoch": 4.685359781233419, "grad_norm": 2.3313024044036865, "learning_rate": 1.6378640018743527e-05, "loss": 0.4279, "step": 28702 }, { "epoch": 4.685523039875924, "grad_norm": 1.8252894878387451, "learning_rate": 1.637839562214731e-05, "loss": 0.3722, "step": 28703 }, { "epoch": 4.685686298518428, "grad_norm": 2.4154937267303467, "learning_rate": 1.6378151219128048e-05, "loss": 0.4327, "step": 28704 }, { "epoch": 4.685849557160932, "grad_norm": 1.7476636171340942, "learning_rate": 1.6377906809685988e-05, "loss": 0.3452, "step": 28705 }, { "epoch": 4.686012815803437, "grad_norm": 2.2009923458099365, "learning_rate": 1.6377662393821382e-05, "loss": 0.3747, "step": 28706 }, { "epoch": 4.686176074445941, "grad_norm": 2.688472032546997, "learning_rate": 1.6377417971534468e-05, "loss": 0.4425, "step": 28707 }, { "epoch": 4.686339333088445, "grad_norm": 2.072172164916992, "learning_rate": 1.6377173542825495e-05, "loss": 0.3587, "step": 28708 }, { "epoch": 4.6865025917309495, "grad_norm": 2.2391514778137207, "learning_rate": 1.637692910769471e-05, "loss": 0.3757, "step": 28709 }, { "epoch": 4.686665850373454, "grad_norm": 2.0893561840057373, "learning_rate": 1.637668466614236e-05, "loss": 0.3913, "step": 28710 }, { "epoch": 4.686829109015958, "grad_norm": 2.4086227416992188, "learning_rate": 1.6376440218168695e-05, "loss": 0.3972, "step": 28711 }, { "epoch": 4.686992367658463, "grad_norm": 1.9687719345092773, "learning_rate": 1.6376195763773953e-05, "loss": 0.3531, "step": 28712 }, { "epoch": 4.687155626300967, "grad_norm": 2.412849187850952, "learning_rate": 1.637595130295838e-05, "loss": 0.4578, "step": 28713 }, { "epoch": 4.687318884943472, "grad_norm": 1.872291088104248, "learning_rate": 1.6375706835722228e-05, "loss": 0.3522, "step": 28714 }, { "epoch": 4.687482143585976, "grad_norm": 2.4627797603607178, "learning_rate": 1.6375462362065743e-05, "loss": 0.4409, "step": 28715 }, { "epoch": 4.687645402228481, "grad_norm": 2.2120749950408936, "learning_rate": 1.6375217881989165e-05, "loss": 0.4381, "step": 28716 }, { "epoch": 4.687808660870985, "grad_norm": 2.0488991737365723, "learning_rate": 1.637497339549275e-05, "loss": 0.3666, "step": 28717 }, { "epoch": 4.6879719195134895, "grad_norm": 1.4543137550354004, "learning_rate": 1.6374728902576734e-05, "loss": 0.3189, "step": 28718 }, { "epoch": 4.688135178155994, "grad_norm": 2.16852068901062, "learning_rate": 1.6374484403241368e-05, "loss": 0.4234, "step": 28719 }, { "epoch": 4.688298436798498, "grad_norm": 1.9653970003128052, "learning_rate": 1.63742398974869e-05, "loss": 0.3746, "step": 28720 }, { "epoch": 4.688461695441003, "grad_norm": 2.4084248542785645, "learning_rate": 1.6373995385313572e-05, "loss": 0.4322, "step": 28721 }, { "epoch": 4.688624954083506, "grad_norm": 2.23640775680542, "learning_rate": 1.6373750866721634e-05, "loss": 0.3679, "step": 28722 }, { "epoch": 4.688788212726012, "grad_norm": 2.6961379051208496, "learning_rate": 1.6373506341711327e-05, "loss": 0.4965, "step": 28723 }, { "epoch": 4.688951471368515, "grad_norm": 2.426563024520874, "learning_rate": 1.6373261810282906e-05, "loss": 0.4286, "step": 28724 }, { "epoch": 4.68911473001102, "grad_norm": 2.320528745651245, "learning_rate": 1.6373017272436608e-05, "loss": 0.4054, "step": 28725 }, { "epoch": 4.689277988653524, "grad_norm": 2.590161085128784, "learning_rate": 1.6372772728172687e-05, "loss": 0.4192, "step": 28726 }, { "epoch": 4.6894412472960285, "grad_norm": 2.34480357170105, "learning_rate": 1.6372528177491383e-05, "loss": 0.4127, "step": 28727 }, { "epoch": 4.689604505938533, "grad_norm": 1.9902199506759644, "learning_rate": 1.6372283620392947e-05, "loss": 0.363, "step": 28728 }, { "epoch": 4.689767764581037, "grad_norm": 2.2534189224243164, "learning_rate": 1.6372039056877617e-05, "loss": 0.3537, "step": 28729 }, { "epoch": 4.689931023223542, "grad_norm": 2.155782699584961, "learning_rate": 1.637179448694565e-05, "loss": 0.3544, "step": 28730 }, { "epoch": 4.690094281866046, "grad_norm": 2.1967594623565674, "learning_rate": 1.637154991059729e-05, "loss": 0.4167, "step": 28731 }, { "epoch": 4.690257540508551, "grad_norm": 2.167940378189087, "learning_rate": 1.637130532783278e-05, "loss": 0.3539, "step": 28732 }, { "epoch": 4.690420799151055, "grad_norm": 1.9833238124847412, "learning_rate": 1.6371060738652365e-05, "loss": 0.3313, "step": 28733 }, { "epoch": 4.69058405779356, "grad_norm": 2.12284255027771, "learning_rate": 1.6370816143056295e-05, "loss": 0.3986, "step": 28734 }, { "epoch": 4.690747316436064, "grad_norm": 2.337754964828491, "learning_rate": 1.6370571541044812e-05, "loss": 0.4292, "step": 28735 }, { "epoch": 4.6909105750785685, "grad_norm": 1.869801640510559, "learning_rate": 1.6370326932618165e-05, "loss": 0.3316, "step": 28736 }, { "epoch": 4.691073833721073, "grad_norm": 2.0246102809906006, "learning_rate": 1.6370082317776602e-05, "loss": 0.3718, "step": 28737 }, { "epoch": 4.691237092363577, "grad_norm": 1.8297407627105713, "learning_rate": 1.636983769652037e-05, "loss": 0.378, "step": 28738 }, { "epoch": 4.691400351006082, "grad_norm": 2.0659542083740234, "learning_rate": 1.636959306884971e-05, "loss": 0.3436, "step": 28739 }, { "epoch": 4.691563609648586, "grad_norm": 2.445672035217285, "learning_rate": 1.6369348434764875e-05, "loss": 0.4455, "step": 28740 }, { "epoch": 4.69172686829109, "grad_norm": 2.089942216873169, "learning_rate": 1.6369103794266105e-05, "loss": 0.3797, "step": 28741 }, { "epoch": 4.691890126933594, "grad_norm": 1.9753397703170776, "learning_rate": 1.636885914735365e-05, "loss": 0.3441, "step": 28742 }, { "epoch": 4.692053385576099, "grad_norm": 2.375218629837036, "learning_rate": 1.6368614494027758e-05, "loss": 0.4145, "step": 28743 }, { "epoch": 4.692216644218603, "grad_norm": 2.4856600761413574, "learning_rate": 1.6368369834288665e-05, "loss": 0.3849, "step": 28744 }, { "epoch": 4.6923799028611075, "grad_norm": 1.9416959285736084, "learning_rate": 1.6368125168136635e-05, "loss": 0.4024, "step": 28745 }, { "epoch": 4.692543161503612, "grad_norm": 2.477893590927124, "learning_rate": 1.6367880495571898e-05, "loss": 0.5481, "step": 28746 }, { "epoch": 4.692706420146116, "grad_norm": 2.426403284072876, "learning_rate": 1.636763581659471e-05, "loss": 0.4461, "step": 28747 }, { "epoch": 4.692869678788621, "grad_norm": 2.1741714477539062, "learning_rate": 1.6367391131205313e-05, "loss": 0.4004, "step": 28748 }, { "epoch": 4.693032937431125, "grad_norm": 1.9411091804504395, "learning_rate": 1.6367146439403956e-05, "loss": 0.3619, "step": 28749 }, { "epoch": 4.69319619607363, "grad_norm": 2.0791616439819336, "learning_rate": 1.6366901741190885e-05, "loss": 0.3887, "step": 28750 }, { "epoch": 4.693359454716134, "grad_norm": 2.4362869262695312, "learning_rate": 1.6366657036566344e-05, "loss": 0.4393, "step": 28751 }, { "epoch": 4.693522713358639, "grad_norm": 2.1673240661621094, "learning_rate": 1.6366412325530583e-05, "loss": 0.4187, "step": 28752 }, { "epoch": 4.693685972001143, "grad_norm": 2.6440699100494385, "learning_rate": 1.6366167608083844e-05, "loss": 0.4166, "step": 28753 }, { "epoch": 4.6938492306436475, "grad_norm": 1.991589069366455, "learning_rate": 1.6365922884226376e-05, "loss": 0.3455, "step": 28754 }, { "epoch": 4.694012489286152, "grad_norm": 2.0961480140686035, "learning_rate": 1.6365678153958424e-05, "loss": 0.4402, "step": 28755 }, { "epoch": 4.694175747928656, "grad_norm": 2.086705446243286, "learning_rate": 1.636543341728024e-05, "loss": 0.3554, "step": 28756 }, { "epoch": 4.694339006571161, "grad_norm": 2.0524637699127197, "learning_rate": 1.6365188674192064e-05, "loss": 0.3535, "step": 28757 }, { "epoch": 4.694502265213664, "grad_norm": 2.116163730621338, "learning_rate": 1.6364943924694143e-05, "loss": 0.3839, "step": 28758 }, { "epoch": 4.694665523856169, "grad_norm": 2.2212960720062256, "learning_rate": 1.6364699168786727e-05, "loss": 0.3448, "step": 28759 }, { "epoch": 4.694828782498673, "grad_norm": 1.8458013534545898, "learning_rate": 1.6364454406470064e-05, "loss": 0.3554, "step": 28760 }, { "epoch": 4.694992041141178, "grad_norm": 2.2649126052856445, "learning_rate": 1.6364209637744394e-05, "loss": 0.3698, "step": 28761 }, { "epoch": 4.695155299783682, "grad_norm": 2.502732992172241, "learning_rate": 1.6363964862609964e-05, "loss": 0.4118, "step": 28762 }, { "epoch": 4.6953185584261865, "grad_norm": 2.065251350402832, "learning_rate": 1.6363720081067028e-05, "loss": 0.3755, "step": 28763 }, { "epoch": 4.695481817068691, "grad_norm": 1.7816413640975952, "learning_rate": 1.6363475293115824e-05, "loss": 0.36, "step": 28764 }, { "epoch": 4.695645075711195, "grad_norm": 2.026608467102051, "learning_rate": 1.6363230498756603e-05, "loss": 0.3974, "step": 28765 }, { "epoch": 4.6958083343537, "grad_norm": 2.226980686187744, "learning_rate": 1.636298569798961e-05, "loss": 0.402, "step": 28766 }, { "epoch": 4.695971592996204, "grad_norm": 2.2491633892059326, "learning_rate": 1.6362740890815092e-05, "loss": 0.4073, "step": 28767 }, { "epoch": 4.696134851638709, "grad_norm": 2.152982473373413, "learning_rate": 1.6362496077233298e-05, "loss": 0.344, "step": 28768 }, { "epoch": 4.696298110281213, "grad_norm": 2.5277810096740723, "learning_rate": 1.636225125724447e-05, "loss": 0.3804, "step": 28769 }, { "epoch": 4.696461368923718, "grad_norm": 1.711207628250122, "learning_rate": 1.6362006430848855e-05, "loss": 0.3589, "step": 28770 }, { "epoch": 4.696624627566222, "grad_norm": 1.8731322288513184, "learning_rate": 1.6361761598046704e-05, "loss": 0.3435, "step": 28771 }, { "epoch": 4.6967878862087264, "grad_norm": 2.2018604278564453, "learning_rate": 1.636151675883826e-05, "loss": 0.3858, "step": 28772 }, { "epoch": 4.696951144851231, "grad_norm": 1.9499828815460205, "learning_rate": 1.6361271913223774e-05, "loss": 0.3147, "step": 28773 }, { "epoch": 4.697114403493735, "grad_norm": 2.1870384216308594, "learning_rate": 1.6361027061203482e-05, "loss": 0.3638, "step": 28774 }, { "epoch": 4.697277662136239, "grad_norm": 1.6428476572036743, "learning_rate": 1.636078220277764e-05, "loss": 0.3626, "step": 28775 }, { "epoch": 4.697440920778744, "grad_norm": 1.575366735458374, "learning_rate": 1.6360537337946493e-05, "loss": 0.3372, "step": 28776 }, { "epoch": 4.697604179421248, "grad_norm": 2.4586358070373535, "learning_rate": 1.6360292466710287e-05, "loss": 0.4151, "step": 28777 }, { "epoch": 4.697767438063752, "grad_norm": 2.1518502235412598, "learning_rate": 1.6360047589069264e-05, "loss": 0.4376, "step": 28778 }, { "epoch": 4.697930696706257, "grad_norm": 2.2105355262756348, "learning_rate": 1.6359802705023683e-05, "loss": 0.3753, "step": 28779 }, { "epoch": 4.698093955348761, "grad_norm": 2.518576145172119, "learning_rate": 1.6359557814573778e-05, "loss": 0.3417, "step": 28780 }, { "epoch": 4.6982572139912655, "grad_norm": 2.1601696014404297, "learning_rate": 1.63593129177198e-05, "loss": 0.3917, "step": 28781 }, { "epoch": 4.69842047263377, "grad_norm": 2.156040906906128, "learning_rate": 1.6359068014461993e-05, "loss": 0.3677, "step": 28782 }, { "epoch": 4.698583731276274, "grad_norm": 2.0679712295532227, "learning_rate": 1.635882310480061e-05, "loss": 0.3469, "step": 28783 }, { "epoch": 4.698746989918779, "grad_norm": 1.9790290594100952, "learning_rate": 1.635857818873589e-05, "loss": 0.3369, "step": 28784 }, { "epoch": 4.698910248561283, "grad_norm": 1.7115939855575562, "learning_rate": 1.6358333266268086e-05, "loss": 0.337, "step": 28785 }, { "epoch": 4.699073507203788, "grad_norm": 1.7849042415618896, "learning_rate": 1.6358088337397444e-05, "loss": 0.3528, "step": 28786 }, { "epoch": 4.699236765846292, "grad_norm": 2.224853277206421, "learning_rate": 1.63578434021242e-05, "loss": 0.3579, "step": 28787 }, { "epoch": 4.699400024488797, "grad_norm": 2.3457536697387695, "learning_rate": 1.635759846044862e-05, "loss": 0.4194, "step": 28788 }, { "epoch": 4.699563283131301, "grad_norm": 2.408813238143921, "learning_rate": 1.6357353512370936e-05, "loss": 0.42, "step": 28789 }, { "epoch": 4.699726541773805, "grad_norm": 2.553351640701294, "learning_rate": 1.63571085578914e-05, "loss": 0.4147, "step": 28790 }, { "epoch": 4.69988980041631, "grad_norm": 2.1753852367401123, "learning_rate": 1.6356863597010256e-05, "loss": 0.3651, "step": 28791 }, { "epoch": 4.700053059058813, "grad_norm": 1.649337649345398, "learning_rate": 1.635661862972775e-05, "loss": 0.3157, "step": 28792 }, { "epoch": 4.700216317701319, "grad_norm": 2.653049945831299, "learning_rate": 1.6356373656044132e-05, "loss": 0.4041, "step": 28793 }, { "epoch": 4.700379576343822, "grad_norm": 2.119920492172241, "learning_rate": 1.6356128675959652e-05, "loss": 0.3592, "step": 28794 }, { "epoch": 4.700542834986327, "grad_norm": 2.0345726013183594, "learning_rate": 1.6355883689474547e-05, "loss": 0.4256, "step": 28795 }, { "epoch": 4.700706093628831, "grad_norm": 2.5425829887390137, "learning_rate": 1.6355638696589068e-05, "loss": 0.4296, "step": 28796 }, { "epoch": 4.700869352271336, "grad_norm": 2.1957175731658936, "learning_rate": 1.6355393697303465e-05, "loss": 0.3589, "step": 28797 }, { "epoch": 4.70103261091384, "grad_norm": 2.3262076377868652, "learning_rate": 1.6355148691617983e-05, "loss": 0.3628, "step": 28798 }, { "epoch": 4.7011958695563445, "grad_norm": 2.2186379432678223, "learning_rate": 1.6354903679532866e-05, "loss": 0.4246, "step": 28799 }, { "epoch": 4.701359128198849, "grad_norm": 2.4422507286071777, "learning_rate": 1.6354658661048364e-05, "loss": 0.4045, "step": 28800 }, { "epoch": 4.701522386841353, "grad_norm": 2.450808525085449, "learning_rate": 1.635441363616472e-05, "loss": 0.447, "step": 28801 }, { "epoch": 4.701685645483858, "grad_norm": 1.7857967615127563, "learning_rate": 1.6354168604882185e-05, "loss": 0.3495, "step": 28802 }, { "epoch": 4.701848904126362, "grad_norm": 2.175527572631836, "learning_rate": 1.6353923567201006e-05, "loss": 0.4254, "step": 28803 }, { "epoch": 4.702012162768867, "grad_norm": 2.321944236755371, "learning_rate": 1.6353678523121425e-05, "loss": 0.4236, "step": 28804 }, { "epoch": 4.702175421411371, "grad_norm": 2.3606019020080566, "learning_rate": 1.6353433472643692e-05, "loss": 0.3627, "step": 28805 }, { "epoch": 4.702338680053876, "grad_norm": 1.7718287706375122, "learning_rate": 1.6353188415768052e-05, "loss": 0.364, "step": 28806 }, { "epoch": 4.70250193869638, "grad_norm": 1.929621934890747, "learning_rate": 1.6352943352494755e-05, "loss": 0.3769, "step": 28807 }, { "epoch": 4.702665197338884, "grad_norm": 2.370926856994629, "learning_rate": 1.6352698282824045e-05, "loss": 0.4027, "step": 28808 }, { "epoch": 4.702828455981389, "grad_norm": 2.048548698425293, "learning_rate": 1.6352453206756166e-05, "loss": 0.3762, "step": 28809 }, { "epoch": 4.702991714623893, "grad_norm": 1.9978739023208618, "learning_rate": 1.6352208124291376e-05, "loss": 0.4124, "step": 28810 }, { "epoch": 4.703154973266397, "grad_norm": 1.9607925415039062, "learning_rate": 1.635196303542991e-05, "loss": 0.3309, "step": 28811 }, { "epoch": 4.703318231908901, "grad_norm": 2.314472198486328, "learning_rate": 1.6351717940172017e-05, "loss": 0.4461, "step": 28812 }, { "epoch": 4.703481490551406, "grad_norm": 2.2065587043762207, "learning_rate": 1.6351472838517946e-05, "loss": 0.3744, "step": 28813 }, { "epoch": 4.70364474919391, "grad_norm": 2.1001906394958496, "learning_rate": 1.6351227730467947e-05, "loss": 0.3672, "step": 28814 }, { "epoch": 4.703808007836415, "grad_norm": 1.9950248003005981, "learning_rate": 1.635098261602226e-05, "loss": 0.3852, "step": 28815 }, { "epoch": 4.703971266478919, "grad_norm": 2.418825387954712, "learning_rate": 1.6350737495181136e-05, "loss": 0.3774, "step": 28816 }, { "epoch": 4.7041345251214235, "grad_norm": 2.2322914600372314, "learning_rate": 1.635049236794482e-05, "loss": 0.4651, "step": 28817 }, { "epoch": 4.704297783763928, "grad_norm": 2.284743070602417, "learning_rate": 1.6350247234313562e-05, "loss": 0.404, "step": 28818 }, { "epoch": 4.704461042406432, "grad_norm": 2.092949628829956, "learning_rate": 1.6350002094287608e-05, "loss": 0.4001, "step": 28819 }, { "epoch": 4.704624301048937, "grad_norm": 1.945680856704712, "learning_rate": 1.6349756947867202e-05, "loss": 0.324, "step": 28820 }, { "epoch": 4.704787559691441, "grad_norm": 1.9923347234725952, "learning_rate": 1.6349511795052592e-05, "loss": 0.3846, "step": 28821 }, { "epoch": 4.704950818333946, "grad_norm": 2.0996034145355225, "learning_rate": 1.6349266635844025e-05, "loss": 0.4043, "step": 28822 }, { "epoch": 4.70511407697645, "grad_norm": 2.3004236221313477, "learning_rate": 1.634902147024175e-05, "loss": 0.3864, "step": 28823 }, { "epoch": 4.705277335618955, "grad_norm": 1.6792840957641602, "learning_rate": 1.634877629824601e-05, "loss": 0.2996, "step": 28824 }, { "epoch": 4.705440594261459, "grad_norm": 2.224532127380371, "learning_rate": 1.6348531119857055e-05, "loss": 0.4704, "step": 28825 }, { "epoch": 4.705603852903963, "grad_norm": 2.0507636070251465, "learning_rate": 1.6348285935075127e-05, "loss": 0.4079, "step": 28826 }, { "epoch": 4.705767111546468, "grad_norm": 2.199957847595215, "learning_rate": 1.634804074390048e-05, "loss": 0.422, "step": 28827 }, { "epoch": 4.705930370188971, "grad_norm": 2.1998839378356934, "learning_rate": 1.6347795546333357e-05, "loss": 0.3635, "step": 28828 }, { "epoch": 4.706093628831477, "grad_norm": 2.0859291553497314, "learning_rate": 1.6347550342374013e-05, "loss": 0.4402, "step": 28829 }, { "epoch": 4.70625688747398, "grad_norm": 2.034541130065918, "learning_rate": 1.6347305132022677e-05, "loss": 0.4067, "step": 28830 }, { "epoch": 4.706420146116485, "grad_norm": 1.702813982963562, "learning_rate": 1.634705991527961e-05, "loss": 0.3109, "step": 28831 }, { "epoch": 4.706583404758989, "grad_norm": 2.2295453548431396, "learning_rate": 1.6346814692145057e-05, "loss": 0.3612, "step": 28832 }, { "epoch": 4.706746663401494, "grad_norm": 2.064302444458008, "learning_rate": 1.634656946261926e-05, "loss": 0.3445, "step": 28833 }, { "epoch": 4.706909922043998, "grad_norm": 1.843930721282959, "learning_rate": 1.634632422670247e-05, "loss": 0.3458, "step": 28834 }, { "epoch": 4.7070731806865025, "grad_norm": 2.2421913146972656, "learning_rate": 1.6346078984394934e-05, "loss": 0.3625, "step": 28835 }, { "epoch": 4.707236439329007, "grad_norm": 2.5159928798675537, "learning_rate": 1.6345833735696903e-05, "loss": 0.3976, "step": 28836 }, { "epoch": 4.707399697971511, "grad_norm": 2.814910650253296, "learning_rate": 1.6345588480608614e-05, "loss": 0.4413, "step": 28837 }, { "epoch": 4.707562956614016, "grad_norm": 1.9702836275100708, "learning_rate": 1.634534321913032e-05, "loss": 0.3468, "step": 28838 }, { "epoch": 4.70772621525652, "grad_norm": 2.2895843982696533, "learning_rate": 1.6345097951262268e-05, "loss": 0.3997, "step": 28839 }, { "epoch": 4.707889473899025, "grad_norm": 1.966810703277588, "learning_rate": 1.6344852677004702e-05, "loss": 0.4166, "step": 28840 }, { "epoch": 4.708052732541529, "grad_norm": 2.3871686458587646, "learning_rate": 1.634460739635787e-05, "loss": 0.4084, "step": 28841 }, { "epoch": 4.7082159911840336, "grad_norm": 2.029841423034668, "learning_rate": 1.6344362109322024e-05, "loss": 0.3635, "step": 28842 }, { "epoch": 4.708379249826538, "grad_norm": 1.7114124298095703, "learning_rate": 1.6344116815897404e-05, "loss": 0.3494, "step": 28843 }, { "epoch": 4.708542508469042, "grad_norm": 2.0102880001068115, "learning_rate": 1.634387151608426e-05, "loss": 0.392, "step": 28844 }, { "epoch": 4.708705767111546, "grad_norm": 2.062542200088501, "learning_rate": 1.634362620988284e-05, "loss": 0.3186, "step": 28845 }, { "epoch": 4.708869025754051, "grad_norm": 2.4024035930633545, "learning_rate": 1.634338089729339e-05, "loss": 0.3773, "step": 28846 }, { "epoch": 4.709032284396555, "grad_norm": 2.2588727474212646, "learning_rate": 1.6343135578316158e-05, "loss": 0.4245, "step": 28847 }, { "epoch": 4.709195543039059, "grad_norm": 2.8799548149108887, "learning_rate": 1.634289025295139e-05, "loss": 0.4055, "step": 28848 }, { "epoch": 4.709358801681564, "grad_norm": 1.6568242311477661, "learning_rate": 1.6342644921199334e-05, "loss": 0.3255, "step": 28849 }, { "epoch": 4.709522060324068, "grad_norm": 2.491594076156616, "learning_rate": 1.6342399583060234e-05, "loss": 0.3558, "step": 28850 }, { "epoch": 4.709685318966573, "grad_norm": 2.008244276046753, "learning_rate": 1.634215423853434e-05, "loss": 0.3442, "step": 28851 }, { "epoch": 4.709848577609077, "grad_norm": 2.070014715194702, "learning_rate": 1.6341908887621894e-05, "loss": 0.3236, "step": 28852 }, { "epoch": 4.7100118362515815, "grad_norm": 2.775336980819702, "learning_rate": 1.6341663530323156e-05, "loss": 0.4777, "step": 28853 }, { "epoch": 4.710175094894086, "grad_norm": 1.9072734117507935, "learning_rate": 1.634141816663836e-05, "loss": 0.3433, "step": 28854 }, { "epoch": 4.71033835353659, "grad_norm": 2.3130667209625244, "learning_rate": 1.6341172796567756e-05, "loss": 0.3874, "step": 28855 }, { "epoch": 4.710501612179095, "grad_norm": 1.9858150482177734, "learning_rate": 1.6340927420111596e-05, "loss": 0.3551, "step": 28856 }, { "epoch": 4.710664870821599, "grad_norm": 1.8853096961975098, "learning_rate": 1.6340682037270122e-05, "loss": 0.3907, "step": 28857 }, { "epoch": 4.710828129464104, "grad_norm": 2.485074281692505, "learning_rate": 1.6340436648043585e-05, "loss": 0.4346, "step": 28858 }, { "epoch": 4.710991388106608, "grad_norm": 1.465010404586792, "learning_rate": 1.634019125243223e-05, "loss": 0.2965, "step": 28859 }, { "epoch": 4.7111546467491126, "grad_norm": 2.176485300064087, "learning_rate": 1.63399458504363e-05, "loss": 0.3597, "step": 28860 }, { "epoch": 4.711317905391617, "grad_norm": 2.130749464035034, "learning_rate": 1.633970044205605e-05, "loss": 0.3731, "step": 28861 }, { "epoch": 4.711481164034121, "grad_norm": 1.8442633152008057, "learning_rate": 1.633945502729172e-05, "loss": 0.3665, "step": 28862 }, { "epoch": 4.711644422676626, "grad_norm": 2.4290592670440674, "learning_rate": 1.6339209606143566e-05, "loss": 0.3622, "step": 28863 }, { "epoch": 4.711807681319129, "grad_norm": 2.292769432067871, "learning_rate": 1.6338964178611824e-05, "loss": 0.4088, "step": 28864 }, { "epoch": 4.711970939961634, "grad_norm": 1.9867676496505737, "learning_rate": 1.6338718744696747e-05, "loss": 0.3452, "step": 28865 }, { "epoch": 4.712134198604138, "grad_norm": 1.675252914428711, "learning_rate": 1.6338473304398585e-05, "loss": 0.3156, "step": 28866 }, { "epoch": 4.712297457246643, "grad_norm": 2.0565500259399414, "learning_rate": 1.633822785771758e-05, "loss": 0.3574, "step": 28867 }, { "epoch": 4.712460715889147, "grad_norm": 1.8779277801513672, "learning_rate": 1.633798240465398e-05, "loss": 0.3003, "step": 28868 }, { "epoch": 4.712623974531652, "grad_norm": 1.876863956451416, "learning_rate": 1.633773694520804e-05, "loss": 0.3468, "step": 28869 }, { "epoch": 4.712787233174156, "grad_norm": 2.593679428100586, "learning_rate": 1.6337491479379993e-05, "loss": 0.4052, "step": 28870 }, { "epoch": 4.7129504918166605, "grad_norm": 2.178377628326416, "learning_rate": 1.63372460071701e-05, "loss": 0.376, "step": 28871 }, { "epoch": 4.713113750459165, "grad_norm": 2.30517840385437, "learning_rate": 1.6337000528578598e-05, "loss": 0.4386, "step": 28872 }, { "epoch": 4.713277009101669, "grad_norm": 2.5666353702545166, "learning_rate": 1.6336755043605737e-05, "loss": 0.4624, "step": 28873 }, { "epoch": 4.713440267744174, "grad_norm": 2.149705410003662, "learning_rate": 1.6336509552251766e-05, "loss": 0.3933, "step": 28874 }, { "epoch": 4.713603526386678, "grad_norm": 2.1989386081695557, "learning_rate": 1.6336264054516934e-05, "loss": 0.4319, "step": 28875 }, { "epoch": 4.713766785029183, "grad_norm": 2.551323175430298, "learning_rate": 1.6336018550401484e-05, "loss": 0.4749, "step": 28876 }, { "epoch": 4.713930043671687, "grad_norm": 2.0233378410339355, "learning_rate": 1.6335773039905663e-05, "loss": 0.4194, "step": 28877 }, { "epoch": 4.7140933023141915, "grad_norm": 2.3862507343292236, "learning_rate": 1.6335527523029723e-05, "loss": 0.4323, "step": 28878 }, { "epoch": 4.714256560956696, "grad_norm": 2.4072799682617188, "learning_rate": 1.633528199977391e-05, "loss": 0.4033, "step": 28879 }, { "epoch": 4.7144198195992, "grad_norm": 2.2845072746276855, "learning_rate": 1.633503647013847e-05, "loss": 0.4191, "step": 28880 }, { "epoch": 4.714583078241704, "grad_norm": 2.8180456161499023, "learning_rate": 1.633479093412365e-05, "loss": 0.4329, "step": 28881 }, { "epoch": 4.714746336884209, "grad_norm": 2.0390055179595947, "learning_rate": 1.6334545391729693e-05, "loss": 0.3553, "step": 28882 }, { "epoch": 4.714909595526713, "grad_norm": 2.3070127964019775, "learning_rate": 1.633429984295685e-05, "loss": 0.4052, "step": 28883 }, { "epoch": 4.715072854169217, "grad_norm": 4.196361541748047, "learning_rate": 1.633405428780537e-05, "loss": 0.4051, "step": 28884 }, { "epoch": 4.715236112811722, "grad_norm": 1.8477771282196045, "learning_rate": 1.6333808726275503e-05, "loss": 0.3727, "step": 28885 }, { "epoch": 4.715399371454226, "grad_norm": 2.779478073120117, "learning_rate": 1.6333563158367488e-05, "loss": 0.4244, "step": 28886 }, { "epoch": 4.715562630096731, "grad_norm": 2.419393301010132, "learning_rate": 1.633331758408158e-05, "loss": 0.4576, "step": 28887 }, { "epoch": 4.715725888739235, "grad_norm": 2.164781093597412, "learning_rate": 1.633307200341802e-05, "loss": 0.3971, "step": 28888 }, { "epoch": 4.7158891473817395, "grad_norm": 2.1050612926483154, "learning_rate": 1.633282641637706e-05, "loss": 0.3597, "step": 28889 }, { "epoch": 4.716052406024244, "grad_norm": 2.41041898727417, "learning_rate": 1.6332580822958945e-05, "loss": 0.434, "step": 28890 }, { "epoch": 4.716215664666748, "grad_norm": 2.3162038326263428, "learning_rate": 1.6332335223163924e-05, "loss": 0.3795, "step": 28891 }, { "epoch": 4.716378923309253, "grad_norm": 1.7780423164367676, "learning_rate": 1.6332089616992242e-05, "loss": 0.3572, "step": 28892 }, { "epoch": 4.716542181951757, "grad_norm": 2.3132879734039307, "learning_rate": 1.6331844004444147e-05, "loss": 0.4896, "step": 28893 }, { "epoch": 4.716705440594262, "grad_norm": 2.2780802249908447, "learning_rate": 1.6331598385519886e-05, "loss": 0.3529, "step": 28894 }, { "epoch": 4.716868699236766, "grad_norm": 2.32486891746521, "learning_rate": 1.6331352760219707e-05, "loss": 0.4045, "step": 28895 }, { "epoch": 4.7170319578792705, "grad_norm": 2.1290032863616943, "learning_rate": 1.6331107128543856e-05, "loss": 0.3837, "step": 28896 }, { "epoch": 4.717195216521775, "grad_norm": 2.097224473953247, "learning_rate": 1.6330861490492588e-05, "loss": 0.3676, "step": 28897 }, { "epoch": 4.7173584751642785, "grad_norm": 2.559791326522827, "learning_rate": 1.6330615846066137e-05, "loss": 0.4072, "step": 28898 }, { "epoch": 4.717521733806784, "grad_norm": 1.847235918045044, "learning_rate": 1.6330370195264764e-05, "loss": 0.356, "step": 28899 }, { "epoch": 4.717684992449287, "grad_norm": 2.405409336090088, "learning_rate": 1.6330124538088705e-05, "loss": 0.4361, "step": 28900 }, { "epoch": 4.717848251091792, "grad_norm": 2.4472620487213135, "learning_rate": 1.6329878874538215e-05, "loss": 0.4383, "step": 28901 }, { "epoch": 4.718011509734296, "grad_norm": 2.494577646255493, "learning_rate": 1.6329633204613538e-05, "loss": 0.4708, "step": 28902 }, { "epoch": 4.718174768376801, "grad_norm": 2.3633460998535156, "learning_rate": 1.632938752831492e-05, "loss": 0.4304, "step": 28903 }, { "epoch": 4.718338027019305, "grad_norm": 2.401850700378418, "learning_rate": 1.6329141845642612e-05, "loss": 0.415, "step": 28904 }, { "epoch": 4.71850128566181, "grad_norm": 2.433140754699707, "learning_rate": 1.632889615659686e-05, "loss": 0.4317, "step": 28905 }, { "epoch": 4.718664544304314, "grad_norm": 2.2263681888580322, "learning_rate": 1.632865046117791e-05, "loss": 0.4247, "step": 28906 }, { "epoch": 4.7188278029468185, "grad_norm": 1.7103177309036255, "learning_rate": 1.6328404759386015e-05, "loss": 0.2894, "step": 28907 }, { "epoch": 4.718991061589323, "grad_norm": 2.5727157592773438, "learning_rate": 1.6328159051221416e-05, "loss": 0.4178, "step": 28908 }, { "epoch": 4.719154320231827, "grad_norm": 2.4395503997802734, "learning_rate": 1.632791333668436e-05, "loss": 0.4598, "step": 28909 }, { "epoch": 4.719317578874332, "grad_norm": 2.0315420627593994, "learning_rate": 1.6327667615775096e-05, "loss": 0.3513, "step": 28910 }, { "epoch": 4.719480837516836, "grad_norm": 2.271927833557129, "learning_rate": 1.6327421888493878e-05, "loss": 0.4119, "step": 28911 }, { "epoch": 4.719644096159341, "grad_norm": 2.49702525138855, "learning_rate": 1.6327176154840946e-05, "loss": 0.4039, "step": 28912 }, { "epoch": 4.719807354801845, "grad_norm": 2.0746936798095703, "learning_rate": 1.6326930414816544e-05, "loss": 0.3959, "step": 28913 }, { "epoch": 4.7199706134443495, "grad_norm": 2.1324377059936523, "learning_rate": 1.632668466842093e-05, "loss": 0.3681, "step": 28914 }, { "epoch": 4.720133872086854, "grad_norm": 2.3172967433929443, "learning_rate": 1.6326438915654346e-05, "loss": 0.3967, "step": 28915 }, { "epoch": 4.720297130729358, "grad_norm": 2.0222008228302, "learning_rate": 1.6326193156517035e-05, "loss": 0.3373, "step": 28916 }, { "epoch": 4.720460389371862, "grad_norm": 2.22053861618042, "learning_rate": 1.6325947391009254e-05, "loss": 0.4243, "step": 28917 }, { "epoch": 4.720623648014366, "grad_norm": 1.769449234008789, "learning_rate": 1.6325701619131246e-05, "loss": 0.3489, "step": 28918 }, { "epoch": 4.720786906656871, "grad_norm": 2.4117164611816406, "learning_rate": 1.6325455840883256e-05, "loss": 0.4895, "step": 28919 }, { "epoch": 4.720950165299375, "grad_norm": 2.242414712905884, "learning_rate": 1.6325210056265537e-05, "loss": 0.446, "step": 28920 }, { "epoch": 4.72111342394188, "grad_norm": 1.9789659976959229, "learning_rate": 1.632496426527833e-05, "loss": 0.356, "step": 28921 }, { "epoch": 4.721276682584384, "grad_norm": 2.279695749282837, "learning_rate": 1.6324718467921883e-05, "loss": 0.4018, "step": 28922 }, { "epoch": 4.721439941226889, "grad_norm": 2.35011887550354, "learning_rate": 1.632447266419645e-05, "loss": 0.4497, "step": 28923 }, { "epoch": 4.721603199869393, "grad_norm": 2.5045485496520996, "learning_rate": 1.6324226854102274e-05, "loss": 0.4129, "step": 28924 }, { "epoch": 4.7217664585118975, "grad_norm": 2.2554876804351807, "learning_rate": 1.6323981037639606e-05, "loss": 0.4574, "step": 28925 }, { "epoch": 4.721929717154402, "grad_norm": 1.9324036836624146, "learning_rate": 1.6323735214808684e-05, "loss": 0.3636, "step": 28926 }, { "epoch": 4.722092975796906, "grad_norm": 2.116974115371704, "learning_rate": 1.6323489385609768e-05, "loss": 0.4011, "step": 28927 }, { "epoch": 4.722256234439411, "grad_norm": 1.8230884075164795, "learning_rate": 1.63232435500431e-05, "loss": 0.3521, "step": 28928 }, { "epoch": 4.722419493081915, "grad_norm": 2.0664803981781006, "learning_rate": 1.6322997708108923e-05, "loss": 0.3805, "step": 28929 }, { "epoch": 4.72258275172442, "grad_norm": 2.188107490539551, "learning_rate": 1.6322751859807493e-05, "loss": 0.3621, "step": 28930 }, { "epoch": 4.722746010366924, "grad_norm": 2.298250436782837, "learning_rate": 1.6322506005139052e-05, "loss": 0.4158, "step": 28931 }, { "epoch": 4.7229092690094285, "grad_norm": 2.118020534515381, "learning_rate": 1.6322260144103847e-05, "loss": 0.4208, "step": 28932 }, { "epoch": 4.723072527651933, "grad_norm": 2.3798434734344482, "learning_rate": 1.6322014276702132e-05, "loss": 0.4055, "step": 28933 }, { "epoch": 4.7232357862944365, "grad_norm": 2.3353564739227295, "learning_rate": 1.6321768402934148e-05, "loss": 0.437, "step": 28934 }, { "epoch": 4.723399044936942, "grad_norm": 2.3337137699127197, "learning_rate": 1.6321522522800143e-05, "loss": 0.4404, "step": 28935 }, { "epoch": 4.723562303579445, "grad_norm": 2.475741386413574, "learning_rate": 1.632127663630037e-05, "loss": 0.4499, "step": 28936 }, { "epoch": 4.72372556222195, "grad_norm": 2.055236577987671, "learning_rate": 1.6321030743435072e-05, "loss": 0.4203, "step": 28937 }, { "epoch": 4.723888820864454, "grad_norm": 2.3758904933929443, "learning_rate": 1.6320784844204502e-05, "loss": 0.415, "step": 28938 }, { "epoch": 4.724052079506959, "grad_norm": 2.6791200637817383, "learning_rate": 1.6320538938608896e-05, "loss": 0.4648, "step": 28939 }, { "epoch": 4.724215338149463, "grad_norm": 2.2307322025299072, "learning_rate": 1.632029302664851e-05, "loss": 0.3922, "step": 28940 }, { "epoch": 4.724378596791968, "grad_norm": 1.8258792161941528, "learning_rate": 1.6320047108323593e-05, "loss": 0.3414, "step": 28941 }, { "epoch": 4.724541855434472, "grad_norm": 2.319890260696411, "learning_rate": 1.631980118363439e-05, "loss": 0.4052, "step": 28942 }, { "epoch": 4.7247051140769765, "grad_norm": 1.987808346748352, "learning_rate": 1.6319555252581148e-05, "loss": 0.3124, "step": 28943 }, { "epoch": 4.724868372719481, "grad_norm": 2.3142309188842773, "learning_rate": 1.6319309315164117e-05, "loss": 0.411, "step": 28944 }, { "epoch": 4.725031631361985, "grad_norm": 2.107236623764038, "learning_rate": 1.631906337138354e-05, "loss": 0.3644, "step": 28945 }, { "epoch": 4.72519489000449, "grad_norm": 2.500663995742798, "learning_rate": 1.631881742123967e-05, "loss": 0.4656, "step": 28946 }, { "epoch": 4.725358148646994, "grad_norm": 1.8282747268676758, "learning_rate": 1.6318571464732752e-05, "loss": 0.3433, "step": 28947 }, { "epoch": 4.725521407289499, "grad_norm": 2.257594347000122, "learning_rate": 1.6318325501863038e-05, "loss": 0.3747, "step": 28948 }, { "epoch": 4.725684665932003, "grad_norm": 2.0086190700531006, "learning_rate": 1.6318079532630768e-05, "loss": 0.4214, "step": 28949 }, { "epoch": 4.7258479245745075, "grad_norm": 2.2653093338012695, "learning_rate": 1.6317833557036193e-05, "loss": 0.417, "step": 28950 }, { "epoch": 4.726011183217011, "grad_norm": 1.9755221605300903, "learning_rate": 1.6317587575079564e-05, "loss": 0.4019, "step": 28951 }, { "epoch": 4.726174441859516, "grad_norm": 2.0542986392974854, "learning_rate": 1.631734158676112e-05, "loss": 0.3881, "step": 28952 }, { "epoch": 4.72633770050202, "grad_norm": 2.1277058124542236, "learning_rate": 1.6317095592081122e-05, "loss": 0.4259, "step": 28953 }, { "epoch": 4.726500959144524, "grad_norm": 2.1469810009002686, "learning_rate": 1.6316849591039807e-05, "loss": 0.3801, "step": 28954 }, { "epoch": 4.726664217787029, "grad_norm": 1.9922521114349365, "learning_rate": 1.6316603583637428e-05, "loss": 0.3209, "step": 28955 }, { "epoch": 4.726827476429533, "grad_norm": 2.182448148727417, "learning_rate": 1.6316357569874227e-05, "loss": 0.4321, "step": 28956 }, { "epoch": 4.726990735072038, "grad_norm": 1.8810834884643555, "learning_rate": 1.6316111549750463e-05, "loss": 0.3307, "step": 28957 }, { "epoch": 4.727153993714542, "grad_norm": 2.7789433002471924, "learning_rate": 1.631586552326637e-05, "loss": 0.4517, "step": 28958 }, { "epoch": 4.727317252357047, "grad_norm": 2.024087905883789, "learning_rate": 1.6315619490422206e-05, "loss": 0.3772, "step": 28959 }, { "epoch": 4.727480510999551, "grad_norm": 2.5653162002563477, "learning_rate": 1.631537345121821e-05, "loss": 0.3897, "step": 28960 }, { "epoch": 4.7276437696420555, "grad_norm": 2.128188371658325, "learning_rate": 1.631512740565464e-05, "loss": 0.4104, "step": 28961 }, { "epoch": 4.72780702828456, "grad_norm": 2.4148221015930176, "learning_rate": 1.6314881353731733e-05, "loss": 0.3725, "step": 28962 }, { "epoch": 4.727970286927064, "grad_norm": 2.303241014480591, "learning_rate": 1.6314635295449744e-05, "loss": 0.3908, "step": 28963 }, { "epoch": 4.728133545569569, "grad_norm": 2.1113626956939697, "learning_rate": 1.631438923080892e-05, "loss": 0.393, "step": 28964 }, { "epoch": 4.728296804212073, "grad_norm": 2.3150429725646973, "learning_rate": 1.6314143159809507e-05, "loss": 0.4133, "step": 28965 }, { "epoch": 4.728460062854578, "grad_norm": 2.2583537101745605, "learning_rate": 1.6313897082451754e-05, "loss": 0.3724, "step": 28966 }, { "epoch": 4.728623321497082, "grad_norm": 2.168663501739502, "learning_rate": 1.6313650998735907e-05, "loss": 0.3877, "step": 28967 }, { "epoch": 4.7287865801395865, "grad_norm": 1.920728087425232, "learning_rate": 1.6313404908662217e-05, "loss": 0.3499, "step": 28968 }, { "epoch": 4.728949838782091, "grad_norm": 1.8921514749526978, "learning_rate": 1.6313158812230932e-05, "loss": 0.3033, "step": 28969 }, { "epoch": 4.7291130974245945, "grad_norm": 2.0917930603027344, "learning_rate": 1.6312912709442295e-05, "loss": 0.3902, "step": 28970 }, { "epoch": 4.729276356067099, "grad_norm": 2.1157326698303223, "learning_rate": 1.6312666600296557e-05, "loss": 0.3419, "step": 28971 }, { "epoch": 4.729439614709603, "grad_norm": 1.7371997833251953, "learning_rate": 1.6312420484793962e-05, "loss": 0.3576, "step": 28972 }, { "epoch": 4.729602873352108, "grad_norm": 2.617159843444824, "learning_rate": 1.6312174362934765e-05, "loss": 0.443, "step": 28973 }, { "epoch": 4.729766131994612, "grad_norm": 2.789614200592041, "learning_rate": 1.6311928234719208e-05, "loss": 0.4563, "step": 28974 }, { "epoch": 4.729929390637117, "grad_norm": 1.7452902793884277, "learning_rate": 1.6311682100147544e-05, "loss": 0.353, "step": 28975 }, { "epoch": 4.730092649279621, "grad_norm": 2.4207119941711426, "learning_rate": 1.6311435959220015e-05, "loss": 0.4131, "step": 28976 }, { "epoch": 4.730255907922126, "grad_norm": 2.1373777389526367, "learning_rate": 1.6311189811936873e-05, "loss": 0.3494, "step": 28977 }, { "epoch": 4.73041916656463, "grad_norm": 2.676110029220581, "learning_rate": 1.6310943658298365e-05, "loss": 0.4611, "step": 28978 }, { "epoch": 4.7305824252071345, "grad_norm": 2.4797985553741455, "learning_rate": 1.631069749830474e-05, "loss": 0.3642, "step": 28979 }, { "epoch": 4.730745683849639, "grad_norm": 2.349691152572632, "learning_rate": 1.631045133195624e-05, "loss": 0.3898, "step": 28980 }, { "epoch": 4.730908942492143, "grad_norm": 2.30824875831604, "learning_rate": 1.631020515925312e-05, "loss": 0.4403, "step": 28981 }, { "epoch": 4.731072201134648, "grad_norm": 2.182612180709839, "learning_rate": 1.6309958980195624e-05, "loss": 0.4069, "step": 28982 }, { "epoch": 4.731235459777152, "grad_norm": 2.3154966831207275, "learning_rate": 1.6309712794783998e-05, "loss": 0.4013, "step": 28983 }, { "epoch": 4.731398718419657, "grad_norm": 2.3481855392456055, "learning_rate": 1.6309466603018497e-05, "loss": 0.3886, "step": 28984 }, { "epoch": 4.731561977062161, "grad_norm": 2.0607662200927734, "learning_rate": 1.6309220404899365e-05, "loss": 0.3423, "step": 28985 }, { "epoch": 4.7317252357046655, "grad_norm": 2.452751874923706, "learning_rate": 1.630897420042685e-05, "loss": 0.4335, "step": 28986 }, { "epoch": 4.731888494347169, "grad_norm": 2.3459856510162354, "learning_rate": 1.6308727989601195e-05, "loss": 0.4656, "step": 28987 }, { "epoch": 4.7320517529896735, "grad_norm": 2.0309884548187256, "learning_rate": 1.6308481772422656e-05, "loss": 0.4412, "step": 28988 }, { "epoch": 4.732215011632178, "grad_norm": 1.7747069597244263, "learning_rate": 1.630823554889148e-05, "loss": 0.3373, "step": 28989 }, { "epoch": 4.732378270274682, "grad_norm": 2.2170183658599854, "learning_rate": 1.630798931900791e-05, "loss": 0.4449, "step": 28990 }, { "epoch": 4.732541528917187, "grad_norm": 1.7059766054153442, "learning_rate": 1.6307743082772196e-05, "loss": 0.3324, "step": 28991 }, { "epoch": 4.732704787559691, "grad_norm": 2.2399208545684814, "learning_rate": 1.6307496840184586e-05, "loss": 0.4147, "step": 28992 }, { "epoch": 4.732868046202196, "grad_norm": 1.9381155967712402, "learning_rate": 1.630725059124533e-05, "loss": 0.3705, "step": 28993 }, { "epoch": 4.7330313048447, "grad_norm": 1.836714267730713, "learning_rate": 1.6307004335954672e-05, "loss": 0.3628, "step": 28994 }, { "epoch": 4.733194563487205, "grad_norm": 2.377168893814087, "learning_rate": 1.6306758074312866e-05, "loss": 0.361, "step": 28995 }, { "epoch": 4.733357822129709, "grad_norm": 2.268794536590576, "learning_rate": 1.630651180632015e-05, "loss": 0.3806, "step": 28996 }, { "epoch": 4.7335210807722135, "grad_norm": 2.1271069049835205, "learning_rate": 1.6306265531976784e-05, "loss": 0.4533, "step": 28997 }, { "epoch": 4.733684339414718, "grad_norm": 2.154374122619629, "learning_rate": 1.6306019251283008e-05, "loss": 0.4259, "step": 28998 }, { "epoch": 4.733847598057222, "grad_norm": 1.9285452365875244, "learning_rate": 1.6305772964239074e-05, "loss": 0.4043, "step": 28999 }, { "epoch": 4.734010856699727, "grad_norm": 2.3636927604675293, "learning_rate": 1.6305526670845225e-05, "loss": 0.4274, "step": 29000 }, { "epoch": 4.734174115342231, "grad_norm": 1.9608302116394043, "learning_rate": 1.6305280371101716e-05, "loss": 0.4275, "step": 29001 }, { "epoch": 4.734337373984736, "grad_norm": 2.0038199424743652, "learning_rate": 1.630503406500879e-05, "loss": 0.4058, "step": 29002 }, { "epoch": 4.73450063262724, "grad_norm": 1.9145965576171875, "learning_rate": 1.6304787752566694e-05, "loss": 0.4354, "step": 29003 }, { "epoch": 4.734663891269744, "grad_norm": 2.0805795192718506, "learning_rate": 1.630454143377568e-05, "loss": 0.3802, "step": 29004 }, { "epoch": 4.734827149912249, "grad_norm": 2.3331828117370605, "learning_rate": 1.6304295108635996e-05, "loss": 0.4025, "step": 29005 }, { "epoch": 4.7349904085547525, "grad_norm": 1.796836495399475, "learning_rate": 1.630404877714789e-05, "loss": 0.3432, "step": 29006 }, { "epoch": 4.735153667197257, "grad_norm": 1.9413319826126099, "learning_rate": 1.6303802439311605e-05, "loss": 0.3896, "step": 29007 }, { "epoch": 4.735316925839761, "grad_norm": 2.482456922531128, "learning_rate": 1.6303556095127394e-05, "loss": 0.4833, "step": 29008 }, { "epoch": 4.735480184482266, "grad_norm": 2.376008987426758, "learning_rate": 1.6303309744595505e-05, "loss": 0.4084, "step": 29009 }, { "epoch": 4.73564344312477, "grad_norm": 2.6153435707092285, "learning_rate": 1.630306338771618e-05, "loss": 0.4412, "step": 29010 }, { "epoch": 4.735806701767275, "grad_norm": 2.331754207611084, "learning_rate": 1.6302817024489676e-05, "loss": 0.3586, "step": 29011 }, { "epoch": 4.735969960409779, "grad_norm": 1.8882367610931396, "learning_rate": 1.6302570654916236e-05, "loss": 0.397, "step": 29012 }, { "epoch": 4.736133219052284, "grad_norm": 2.3492820262908936, "learning_rate": 1.630232427899611e-05, "loss": 0.385, "step": 29013 }, { "epoch": 4.736296477694788, "grad_norm": 1.9702463150024414, "learning_rate": 1.6302077896729544e-05, "loss": 0.3777, "step": 29014 }, { "epoch": 4.7364597363372924, "grad_norm": 1.897957682609558, "learning_rate": 1.6301831508116785e-05, "loss": 0.3628, "step": 29015 }, { "epoch": 4.736622994979797, "grad_norm": 2.0577127933502197, "learning_rate": 1.6301585113158088e-05, "loss": 0.3695, "step": 29016 }, { "epoch": 4.736786253622301, "grad_norm": 2.255631446838379, "learning_rate": 1.6301338711853695e-05, "loss": 0.3893, "step": 29017 }, { "epoch": 4.736949512264806, "grad_norm": 1.816909909248352, "learning_rate": 1.630109230420385e-05, "loss": 0.3702, "step": 29018 }, { "epoch": 4.73711277090731, "grad_norm": 1.9071452617645264, "learning_rate": 1.6300845890208813e-05, "loss": 0.3562, "step": 29019 }, { "epoch": 4.737276029549815, "grad_norm": 2.073746681213379, "learning_rate": 1.6300599469868825e-05, "loss": 0.4012, "step": 29020 }, { "epoch": 4.737439288192318, "grad_norm": 2.569017171859741, "learning_rate": 1.6300353043184134e-05, "loss": 0.4437, "step": 29021 }, { "epoch": 4.7376025468348235, "grad_norm": 2.0741679668426514, "learning_rate": 1.630010661015499e-05, "loss": 0.3842, "step": 29022 }, { "epoch": 4.737765805477327, "grad_norm": 1.6076455116271973, "learning_rate": 1.629986017078164e-05, "loss": 0.2957, "step": 29023 }, { "epoch": 4.7379290641198315, "grad_norm": 2.207767963409424, "learning_rate": 1.629961372506433e-05, "loss": 0.4474, "step": 29024 }, { "epoch": 4.738092322762336, "grad_norm": 2.483555793762207, "learning_rate": 1.6299367273003315e-05, "loss": 0.451, "step": 29025 }, { "epoch": 4.73825558140484, "grad_norm": 1.9031524658203125, "learning_rate": 1.6299120814598835e-05, "loss": 0.3901, "step": 29026 }, { "epoch": 4.738418840047345, "grad_norm": 2.039179801940918, "learning_rate": 1.6298874349851143e-05, "loss": 0.4101, "step": 29027 }, { "epoch": 4.738582098689849, "grad_norm": 3.3504788875579834, "learning_rate": 1.6298627878760488e-05, "loss": 0.807, "step": 29028 }, { "epoch": 4.738745357332354, "grad_norm": 2.339311361312866, "learning_rate": 1.629838140132712e-05, "loss": 0.3924, "step": 29029 }, { "epoch": 4.738908615974858, "grad_norm": 2.627021074295044, "learning_rate": 1.6298134917551275e-05, "loss": 0.4358, "step": 29030 }, { "epoch": 4.739071874617363, "grad_norm": 2.548412561416626, "learning_rate": 1.6297888427433214e-05, "loss": 0.4104, "step": 29031 }, { "epoch": 4.739235133259867, "grad_norm": 2.31614089012146, "learning_rate": 1.629764193097318e-05, "loss": 0.3561, "step": 29032 }, { "epoch": 4.739398391902371, "grad_norm": 2.329496383666992, "learning_rate": 1.629739542817142e-05, "loss": 0.3744, "step": 29033 }, { "epoch": 4.739561650544876, "grad_norm": 2.4741103649139404, "learning_rate": 1.6297148919028188e-05, "loss": 0.3871, "step": 29034 }, { "epoch": 4.73972490918738, "grad_norm": 2.1010489463806152, "learning_rate": 1.629690240354373e-05, "loss": 0.386, "step": 29035 }, { "epoch": 4.739888167829885, "grad_norm": 2.0644001960754395, "learning_rate": 1.6296655881718292e-05, "loss": 0.3467, "step": 29036 }, { "epoch": 4.740051426472389, "grad_norm": 1.743869662284851, "learning_rate": 1.629640935355212e-05, "loss": 0.3222, "step": 29037 }, { "epoch": 4.740214685114894, "grad_norm": 2.188262701034546, "learning_rate": 1.629616281904547e-05, "loss": 0.425, "step": 29038 }, { "epoch": 4.740377943757398, "grad_norm": 2.2891440391540527, "learning_rate": 1.6295916278198584e-05, "loss": 0.3972, "step": 29039 }, { "epoch": 4.740541202399902, "grad_norm": 1.8455610275268555, "learning_rate": 1.629566973101171e-05, "loss": 0.3634, "step": 29040 }, { "epoch": 4.740704461042406, "grad_norm": 2.1672427654266357, "learning_rate": 1.62954231774851e-05, "loss": 0.4216, "step": 29041 }, { "epoch": 4.7408677196849105, "grad_norm": 2.0353407859802246, "learning_rate": 1.6295176617619e-05, "loss": 0.3982, "step": 29042 }, { "epoch": 4.741030978327415, "grad_norm": 2.3216230869293213, "learning_rate": 1.6294930051413657e-05, "loss": 0.3711, "step": 29043 }, { "epoch": 4.741194236969919, "grad_norm": 2.0541508197784424, "learning_rate": 1.6294683478869323e-05, "loss": 0.3845, "step": 29044 }, { "epoch": 4.741357495612424, "grad_norm": 2.4265871047973633, "learning_rate": 1.6294436899986245e-05, "loss": 0.4411, "step": 29045 }, { "epoch": 4.741520754254928, "grad_norm": 1.6765785217285156, "learning_rate": 1.6294190314764673e-05, "loss": 0.342, "step": 29046 }, { "epoch": 4.741684012897433, "grad_norm": 2.335444450378418, "learning_rate": 1.629394372320485e-05, "loss": 0.4279, "step": 29047 }, { "epoch": 4.741847271539937, "grad_norm": 1.9476690292358398, "learning_rate": 1.6293697125307027e-05, "loss": 0.4317, "step": 29048 }, { "epoch": 4.742010530182442, "grad_norm": 1.9490169286727905, "learning_rate": 1.6293450521071453e-05, "loss": 0.3604, "step": 29049 }, { "epoch": 4.742173788824946, "grad_norm": 1.8016812801361084, "learning_rate": 1.6293203910498375e-05, "loss": 0.3483, "step": 29050 }, { "epoch": 4.74233704746745, "grad_norm": 1.9711357355117798, "learning_rate": 1.6292957293588044e-05, "loss": 0.4452, "step": 29051 }, { "epoch": 4.742500306109955, "grad_norm": 2.3870387077331543, "learning_rate": 1.6292710670340707e-05, "loss": 0.4119, "step": 29052 }, { "epoch": 4.742663564752459, "grad_norm": 2.0688135623931885, "learning_rate": 1.629246404075661e-05, "loss": 0.3994, "step": 29053 }, { "epoch": 4.742826823394964, "grad_norm": 1.947570562362671, "learning_rate": 1.6292217404836005e-05, "loss": 0.3882, "step": 29054 }, { "epoch": 4.742990082037468, "grad_norm": 2.539876937866211, "learning_rate": 1.6291970762579138e-05, "loss": 0.4109, "step": 29055 }, { "epoch": 4.743153340679973, "grad_norm": 1.9044240713119507, "learning_rate": 1.629172411398626e-05, "loss": 0.4049, "step": 29056 }, { "epoch": 4.743316599322476, "grad_norm": 1.6881479024887085, "learning_rate": 1.6291477459057615e-05, "loss": 0.3797, "step": 29057 }, { "epoch": 4.7434798579649815, "grad_norm": 2.239588975906372, "learning_rate": 1.6291230797793455e-05, "loss": 0.385, "step": 29058 }, { "epoch": 4.743643116607485, "grad_norm": 2.181884527206421, "learning_rate": 1.6290984130194026e-05, "loss": 0.444, "step": 29059 }, { "epoch": 4.7438063752499895, "grad_norm": 2.409385919570923, "learning_rate": 1.629073745625958e-05, "loss": 0.4157, "step": 29060 }, { "epoch": 4.743969633892494, "grad_norm": 2.076246738433838, "learning_rate": 1.629049077599036e-05, "loss": 0.3661, "step": 29061 }, { "epoch": 4.744132892534998, "grad_norm": 1.7909451723098755, "learning_rate": 1.629024408938662e-05, "loss": 0.3357, "step": 29062 }, { "epoch": 4.744296151177503, "grad_norm": 2.062026262283325, "learning_rate": 1.6289997396448604e-05, "loss": 0.4335, "step": 29063 }, { "epoch": 4.744459409820007, "grad_norm": 2.295426845550537, "learning_rate": 1.6289750697176564e-05, "loss": 0.4084, "step": 29064 }, { "epoch": 4.744622668462512, "grad_norm": 2.526864528656006, "learning_rate": 1.6289503991570746e-05, "loss": 0.471, "step": 29065 }, { "epoch": 4.744785927105016, "grad_norm": 2.167349100112915, "learning_rate": 1.6289257279631403e-05, "loss": 0.4297, "step": 29066 }, { "epoch": 4.744949185747521, "grad_norm": 2.056577205657959, "learning_rate": 1.6289010561358773e-05, "loss": 0.3988, "step": 29067 }, { "epoch": 4.745112444390025, "grad_norm": 2.1349596977233887, "learning_rate": 1.6288763836753116e-05, "loss": 0.4182, "step": 29068 }, { "epoch": 4.745275703032529, "grad_norm": 2.3383116722106934, "learning_rate": 1.6288517105814674e-05, "loss": 0.4039, "step": 29069 }, { "epoch": 4.745438961675034, "grad_norm": 2.1244475841522217, "learning_rate": 1.6288270368543696e-05, "loss": 0.4069, "step": 29070 }, { "epoch": 4.745602220317538, "grad_norm": 2.2584166526794434, "learning_rate": 1.628802362494043e-05, "loss": 0.3921, "step": 29071 }, { "epoch": 4.745765478960043, "grad_norm": 2.1347012519836426, "learning_rate": 1.628777687500513e-05, "loss": 0.3873, "step": 29072 }, { "epoch": 4.745928737602547, "grad_norm": 1.9052132368087769, "learning_rate": 1.6287530118738036e-05, "loss": 0.359, "step": 29073 }, { "epoch": 4.746091996245051, "grad_norm": 2.1280648708343506, "learning_rate": 1.6287283356139403e-05, "loss": 0.3913, "step": 29074 }, { "epoch": 4.746255254887556, "grad_norm": 1.9599523544311523, "learning_rate": 1.6287036587209477e-05, "loss": 0.3545, "step": 29075 }, { "epoch": 4.74641851353006, "grad_norm": 2.3698859214782715, "learning_rate": 1.628678981194851e-05, "loss": 0.4155, "step": 29076 }, { "epoch": 4.746581772172564, "grad_norm": 2.535325288772583, "learning_rate": 1.6286543030356744e-05, "loss": 0.4919, "step": 29077 }, { "epoch": 4.7467450308150685, "grad_norm": 1.9071394205093384, "learning_rate": 1.628629624243443e-05, "loss": 0.3824, "step": 29078 }, { "epoch": 4.746908289457573, "grad_norm": 2.097212791442871, "learning_rate": 1.628604944818182e-05, "loss": 0.3634, "step": 29079 }, { "epoch": 4.747071548100077, "grad_norm": 2.0060081481933594, "learning_rate": 1.6285802647599156e-05, "loss": 0.3797, "step": 29080 }, { "epoch": 4.747234806742582, "grad_norm": 2.1106977462768555, "learning_rate": 1.628555584068669e-05, "loss": 0.4374, "step": 29081 }, { "epoch": 4.747398065385086, "grad_norm": 2.7923316955566406, "learning_rate": 1.6285309027444677e-05, "loss": 0.4466, "step": 29082 }, { "epoch": 4.747561324027591, "grad_norm": 2.0371828079223633, "learning_rate": 1.6285062207873354e-05, "loss": 0.3505, "step": 29083 }, { "epoch": 4.747724582670095, "grad_norm": 1.7792556285858154, "learning_rate": 1.628481538197298e-05, "loss": 0.3685, "step": 29084 }, { "epoch": 4.7478878413125996, "grad_norm": 1.965441107749939, "learning_rate": 1.6284568549743792e-05, "loss": 0.3693, "step": 29085 }, { "epoch": 4.748051099955104, "grad_norm": 1.944170355796814, "learning_rate": 1.6284321711186054e-05, "loss": 0.3612, "step": 29086 }, { "epoch": 4.748214358597608, "grad_norm": 2.2409071922302246, "learning_rate": 1.6284074866299997e-05, "loss": 0.3882, "step": 29087 }, { "epoch": 4.748377617240113, "grad_norm": 2.3422303199768066, "learning_rate": 1.6283828015085882e-05, "loss": 0.4517, "step": 29088 }, { "epoch": 4.748540875882617, "grad_norm": 2.134246587753296, "learning_rate": 1.6283581157543956e-05, "loss": 0.3942, "step": 29089 }, { "epoch": 4.748704134525122, "grad_norm": 2.4813232421875, "learning_rate": 1.628333429367446e-05, "loss": 0.4085, "step": 29090 }, { "epoch": 4.748867393167626, "grad_norm": 2.1826913356781006, "learning_rate": 1.6283087423477654e-05, "loss": 0.4362, "step": 29091 }, { "epoch": 4.749030651810131, "grad_norm": 2.071955680847168, "learning_rate": 1.6282840546953777e-05, "loss": 0.4001, "step": 29092 }, { "epoch": 4.749193910452634, "grad_norm": 2.000598430633545, "learning_rate": 1.6282593664103084e-05, "loss": 0.3338, "step": 29093 }, { "epoch": 4.749357169095139, "grad_norm": 2.314410924911499, "learning_rate": 1.6282346774925816e-05, "loss": 0.3325, "step": 29094 }, { "epoch": 4.749520427737643, "grad_norm": 2.058638334274292, "learning_rate": 1.628209987942223e-05, "loss": 0.39, "step": 29095 }, { "epoch": 4.7496836863801475, "grad_norm": 2.556915044784546, "learning_rate": 1.6281852977592572e-05, "loss": 0.4571, "step": 29096 }, { "epoch": 4.749846945022652, "grad_norm": 1.9520338773727417, "learning_rate": 1.6281606069437088e-05, "loss": 0.3356, "step": 29097 }, { "epoch": 4.750010203665156, "grad_norm": 1.9238252639770508, "learning_rate": 1.628135915495603e-05, "loss": 0.3841, "step": 29098 }, { "epoch": 4.750173462307661, "grad_norm": 1.8028899431228638, "learning_rate": 1.628111223414964e-05, "loss": 0.3382, "step": 29099 }, { "epoch": 4.750336720950165, "grad_norm": 2.333883285522461, "learning_rate": 1.6280865307018177e-05, "loss": 0.3619, "step": 29100 }, { "epoch": 4.75049997959267, "grad_norm": 1.601523756980896, "learning_rate": 1.6280618373561883e-05, "loss": 0.3297, "step": 29101 }, { "epoch": 4.750663238235174, "grad_norm": 2.3664684295654297, "learning_rate": 1.6280371433781007e-05, "loss": 0.4159, "step": 29102 }, { "epoch": 4.7508264968776786, "grad_norm": 2.0798864364624023, "learning_rate": 1.6280124487675797e-05, "loss": 0.3888, "step": 29103 }, { "epoch": 4.750989755520183, "grad_norm": 1.7678252458572388, "learning_rate": 1.6279877535246506e-05, "loss": 0.369, "step": 29104 }, { "epoch": 4.751153014162687, "grad_norm": 2.9257826805114746, "learning_rate": 1.6279630576493383e-05, "loss": 0.5145, "step": 29105 }, { "epoch": 4.751316272805192, "grad_norm": 1.6996465921401978, "learning_rate": 1.6279383611416668e-05, "loss": 0.3947, "step": 29106 }, { "epoch": 4.751479531447696, "grad_norm": 2.0207877159118652, "learning_rate": 1.6279136640016618e-05, "loss": 0.3827, "step": 29107 }, { "epoch": 4.751642790090201, "grad_norm": 1.9642454385757446, "learning_rate": 1.6278889662293477e-05, "loss": 0.4084, "step": 29108 }, { "epoch": 4.751806048732705, "grad_norm": 2.402585506439209, "learning_rate": 1.62786426782475e-05, "loss": 0.4358, "step": 29109 }, { "epoch": 4.751969307375209, "grad_norm": 3.1457629203796387, "learning_rate": 1.627839568787893e-05, "loss": 0.4589, "step": 29110 }, { "epoch": 4.752132566017714, "grad_norm": 1.9505828619003296, "learning_rate": 1.6278148691188013e-05, "loss": 0.326, "step": 29111 }, { "epoch": 4.752295824660218, "grad_norm": 2.2660694122314453, "learning_rate": 1.6277901688175003e-05, "loss": 0.361, "step": 29112 }, { "epoch": 4.752459083302722, "grad_norm": 2.5940332412719727, "learning_rate": 1.6277654678840152e-05, "loss": 0.5367, "step": 29113 }, { "epoch": 4.7526223419452265, "grad_norm": 2.2021913528442383, "learning_rate": 1.62774076631837e-05, "loss": 0.3754, "step": 29114 }, { "epoch": 4.752785600587731, "grad_norm": 1.7807420492172241, "learning_rate": 1.6277160641205904e-05, "loss": 0.3844, "step": 29115 }, { "epoch": 4.752948859230235, "grad_norm": 2.466226816177368, "learning_rate": 1.6276913612907005e-05, "loss": 0.3936, "step": 29116 }, { "epoch": 4.75311211787274, "grad_norm": 2.450838327407837, "learning_rate": 1.627666657828726e-05, "loss": 0.4262, "step": 29117 }, { "epoch": 4.753275376515244, "grad_norm": 1.9531768560409546, "learning_rate": 1.627641953734691e-05, "loss": 0.4088, "step": 29118 }, { "epoch": 4.753438635157749, "grad_norm": 2.2101798057556152, "learning_rate": 1.6276172490086208e-05, "loss": 0.4029, "step": 29119 }, { "epoch": 4.753601893800253, "grad_norm": 2.063354253768921, "learning_rate": 1.62759254365054e-05, "loss": 0.3877, "step": 29120 }, { "epoch": 4.7537651524427575, "grad_norm": 2.0119237899780273, "learning_rate": 1.627567837660474e-05, "loss": 0.3598, "step": 29121 }, { "epoch": 4.753928411085262, "grad_norm": 2.107760429382324, "learning_rate": 1.6275431310384475e-05, "loss": 0.4395, "step": 29122 }, { "epoch": 4.754091669727766, "grad_norm": 2.2640936374664307, "learning_rate": 1.627518423784485e-05, "loss": 0.4329, "step": 29123 }, { "epoch": 4.754254928370271, "grad_norm": 2.038255214691162, "learning_rate": 1.6274937158986116e-05, "loss": 0.4163, "step": 29124 }, { "epoch": 4.754418187012775, "grad_norm": 2.0317952632904053, "learning_rate": 1.627469007380852e-05, "loss": 0.4162, "step": 29125 }, { "epoch": 4.75458144565528, "grad_norm": 2.2347428798675537, "learning_rate": 1.6274442982312314e-05, "loss": 0.4119, "step": 29126 }, { "epoch": 4.754744704297783, "grad_norm": 2.6116344928741455, "learning_rate": 1.627419588449775e-05, "loss": 0.4015, "step": 29127 }, { "epoch": 4.754907962940289, "grad_norm": 2.462778329849243, "learning_rate": 1.627394878036507e-05, "loss": 0.4032, "step": 29128 }, { "epoch": 4.755071221582792, "grad_norm": 1.8817737102508545, "learning_rate": 1.6273701669914522e-05, "loss": 0.3314, "step": 29129 }, { "epoch": 4.755234480225297, "grad_norm": 1.9868627786636353, "learning_rate": 1.627345455314636e-05, "loss": 0.3692, "step": 29130 }, { "epoch": 4.755397738867801, "grad_norm": 2.535764217376709, "learning_rate": 1.627320743006083e-05, "loss": 0.4023, "step": 29131 }, { "epoch": 4.7555609975103055, "grad_norm": 1.7454278469085693, "learning_rate": 1.6272960300658182e-05, "loss": 0.3669, "step": 29132 }, { "epoch": 4.75572425615281, "grad_norm": 2.301835298538208, "learning_rate": 1.6272713164938667e-05, "loss": 0.3907, "step": 29133 }, { "epoch": 4.755887514795314, "grad_norm": 1.7707918882369995, "learning_rate": 1.6272466022902528e-05, "loss": 0.3529, "step": 29134 }, { "epoch": 4.756050773437819, "grad_norm": 1.9085630178451538, "learning_rate": 1.627221887455002e-05, "loss": 0.3788, "step": 29135 }, { "epoch": 4.756214032080323, "grad_norm": 2.739487409591675, "learning_rate": 1.627197171988139e-05, "loss": 0.5014, "step": 29136 }, { "epoch": 4.756377290722828, "grad_norm": 2.0000765323638916, "learning_rate": 1.6271724558896883e-05, "loss": 0.3569, "step": 29137 }, { "epoch": 4.756540549365332, "grad_norm": 1.8717198371887207, "learning_rate": 1.6271477391596754e-05, "loss": 0.337, "step": 29138 }, { "epoch": 4.7567038080078365, "grad_norm": 1.6589727401733398, "learning_rate": 1.627123021798125e-05, "loss": 0.3462, "step": 29139 }, { "epoch": 4.756867066650341, "grad_norm": 1.953298568725586, "learning_rate": 1.6270983038050615e-05, "loss": 0.3635, "step": 29140 }, { "epoch": 4.757030325292845, "grad_norm": 2.4116249084472656, "learning_rate": 1.6270735851805103e-05, "loss": 0.4044, "step": 29141 }, { "epoch": 4.75719358393535, "grad_norm": 2.194221019744873, "learning_rate": 1.627048865924496e-05, "loss": 0.3946, "step": 29142 }, { "epoch": 4.757356842577854, "grad_norm": 2.0406527519226074, "learning_rate": 1.627024146037044e-05, "loss": 0.3829, "step": 29143 }, { "epoch": 4.757520101220359, "grad_norm": 2.900029182434082, "learning_rate": 1.6269994255181786e-05, "loss": 0.4455, "step": 29144 }, { "epoch": 4.757683359862863, "grad_norm": 2.115246057510376, "learning_rate": 1.6269747043679253e-05, "loss": 0.407, "step": 29145 }, { "epoch": 4.757846618505367, "grad_norm": 1.9932289123535156, "learning_rate": 1.626949982586308e-05, "loss": 0.397, "step": 29146 }, { "epoch": 4.758009877147871, "grad_norm": 1.6464855670928955, "learning_rate": 1.6269252601733526e-05, "loss": 0.331, "step": 29147 }, { "epoch": 4.758173135790376, "grad_norm": 2.2118239402770996, "learning_rate": 1.626900537129084e-05, "loss": 0.4328, "step": 29148 }, { "epoch": 4.75833639443288, "grad_norm": 2.2283968925476074, "learning_rate": 1.626875813453526e-05, "loss": 0.385, "step": 29149 }, { "epoch": 4.7584996530753845, "grad_norm": 2.0943760871887207, "learning_rate": 1.6268510891467048e-05, "loss": 0.3637, "step": 29150 }, { "epoch": 4.758662911717889, "grad_norm": 2.1498775482177734, "learning_rate": 1.6268263642086445e-05, "loss": 0.3752, "step": 29151 }, { "epoch": 4.758826170360393, "grad_norm": 1.973783016204834, "learning_rate": 1.6268016386393704e-05, "loss": 0.3803, "step": 29152 }, { "epoch": 4.758989429002898, "grad_norm": 2.1936187744140625, "learning_rate": 1.6267769124389067e-05, "loss": 0.3867, "step": 29153 }, { "epoch": 4.759152687645402, "grad_norm": 1.554202675819397, "learning_rate": 1.626752185607279e-05, "loss": 0.3301, "step": 29154 }, { "epoch": 4.759315946287907, "grad_norm": 1.7655547857284546, "learning_rate": 1.6267274581445124e-05, "loss": 0.3311, "step": 29155 }, { "epoch": 4.759479204930411, "grad_norm": 2.477307081222534, "learning_rate": 1.6267027300506316e-05, "loss": 0.408, "step": 29156 }, { "epoch": 4.7596424635729155, "grad_norm": 2.052598714828491, "learning_rate": 1.6266780013256608e-05, "loss": 0.3258, "step": 29157 }, { "epoch": 4.75980572221542, "grad_norm": 2.230792999267578, "learning_rate": 1.6266532719696253e-05, "loss": 0.4574, "step": 29158 }, { "epoch": 4.759968980857924, "grad_norm": 1.9316139221191406, "learning_rate": 1.6266285419825503e-05, "loss": 0.3594, "step": 29159 }, { "epoch": 4.760132239500429, "grad_norm": 1.6380488872528076, "learning_rate": 1.6266038113644605e-05, "loss": 0.3527, "step": 29160 }, { "epoch": 4.760295498142933, "grad_norm": 1.5673450231552124, "learning_rate": 1.6265790801153813e-05, "loss": 0.3177, "step": 29161 }, { "epoch": 4.760458756785438, "grad_norm": 1.942160725593567, "learning_rate": 1.6265543482353364e-05, "loss": 0.3753, "step": 29162 }, { "epoch": 4.760622015427941, "grad_norm": 2.3415091037750244, "learning_rate": 1.626529615724352e-05, "loss": 0.4154, "step": 29163 }, { "epoch": 4.760785274070446, "grad_norm": 2.2358052730560303, "learning_rate": 1.6265048825824522e-05, "loss": 0.3516, "step": 29164 }, { "epoch": 4.76094853271295, "grad_norm": 2.119647264480591, "learning_rate": 1.626480148809662e-05, "loss": 0.4167, "step": 29165 }, { "epoch": 4.761111791355455, "grad_norm": 2.0773777961730957, "learning_rate": 1.6264554144060068e-05, "loss": 0.3618, "step": 29166 }, { "epoch": 4.761275049997959, "grad_norm": 2.344794511795044, "learning_rate": 1.626430679371511e-05, "loss": 0.4268, "step": 29167 }, { "epoch": 4.7614383086404635, "grad_norm": 1.5726290941238403, "learning_rate": 1.6264059437061998e-05, "loss": 0.3515, "step": 29168 }, { "epoch": 4.761601567282968, "grad_norm": 2.5033514499664307, "learning_rate": 1.6263812074100974e-05, "loss": 0.4812, "step": 29169 }, { "epoch": 4.761764825925472, "grad_norm": 2.7651116847991943, "learning_rate": 1.6263564704832297e-05, "loss": 0.4518, "step": 29170 }, { "epoch": 4.761928084567977, "grad_norm": 2.3040401935577393, "learning_rate": 1.6263317329256215e-05, "loss": 0.3998, "step": 29171 }, { "epoch": 4.762091343210481, "grad_norm": 3.0131521224975586, "learning_rate": 1.626306994737297e-05, "loss": 0.4276, "step": 29172 }, { "epoch": 4.762254601852986, "grad_norm": 2.391199827194214, "learning_rate": 1.6262822559182817e-05, "loss": 0.4197, "step": 29173 }, { "epoch": 4.76241786049549, "grad_norm": 1.7429519891738892, "learning_rate": 1.6262575164686004e-05, "loss": 0.3274, "step": 29174 }, { "epoch": 4.7625811191379945, "grad_norm": 2.1017494201660156, "learning_rate": 1.6262327763882778e-05, "loss": 0.3706, "step": 29175 }, { "epoch": 4.762744377780499, "grad_norm": 2.2710695266723633, "learning_rate": 1.626208035677339e-05, "loss": 0.4067, "step": 29176 }, { "epoch": 4.762907636423003, "grad_norm": 2.111294984817505, "learning_rate": 1.6261832943358088e-05, "loss": 0.3594, "step": 29177 }, { "epoch": 4.763070895065508, "grad_norm": 2.102881908416748, "learning_rate": 1.6261585523637126e-05, "loss": 0.3618, "step": 29178 }, { "epoch": 4.763234153708012, "grad_norm": 1.9454699754714966, "learning_rate": 1.6261338097610742e-05, "loss": 0.3845, "step": 29179 }, { "epoch": 4.763397412350516, "grad_norm": 2.04512095451355, "learning_rate": 1.6261090665279198e-05, "loss": 0.38, "step": 29180 }, { "epoch": 4.763560670993021, "grad_norm": 2.3748371601104736, "learning_rate": 1.6260843226642733e-05, "loss": 0.4232, "step": 29181 }, { "epoch": 4.763723929635525, "grad_norm": 2.405658006668091, "learning_rate": 1.6260595781701605e-05, "loss": 0.3775, "step": 29182 }, { "epoch": 4.763887188278029, "grad_norm": 1.6372722387313843, "learning_rate": 1.6260348330456056e-05, "loss": 0.346, "step": 29183 }, { "epoch": 4.764050446920534, "grad_norm": 2.690056324005127, "learning_rate": 1.6260100872906338e-05, "loss": 0.4459, "step": 29184 }, { "epoch": 4.764213705563038, "grad_norm": 2.3161213397979736, "learning_rate": 1.62598534090527e-05, "loss": 0.3653, "step": 29185 }, { "epoch": 4.7643769642055425, "grad_norm": 2.76116681098938, "learning_rate": 1.6259605938895395e-05, "loss": 0.4541, "step": 29186 }, { "epoch": 4.764540222848047, "grad_norm": 1.9221315383911133, "learning_rate": 1.6259358462434663e-05, "loss": 0.3361, "step": 29187 }, { "epoch": 4.764703481490551, "grad_norm": 1.9474681615829468, "learning_rate": 1.625911097967076e-05, "loss": 0.3682, "step": 29188 }, { "epoch": 4.764866740133056, "grad_norm": 2.168513536453247, "learning_rate": 1.6258863490603936e-05, "loss": 0.4146, "step": 29189 }, { "epoch": 4.76502999877556, "grad_norm": 2.0857906341552734, "learning_rate": 1.6258615995234434e-05, "loss": 0.3944, "step": 29190 }, { "epoch": 4.765193257418065, "grad_norm": 2.3221843242645264, "learning_rate": 1.6258368493562513e-05, "loss": 0.3884, "step": 29191 }, { "epoch": 4.765356516060569, "grad_norm": 2.204928398132324, "learning_rate": 1.6258120985588412e-05, "loss": 0.421, "step": 29192 }, { "epoch": 4.7655197747030735, "grad_norm": 1.826128602027893, "learning_rate": 1.625787347131239e-05, "loss": 0.362, "step": 29193 }, { "epoch": 4.765683033345578, "grad_norm": 2.3546226024627686, "learning_rate": 1.6257625950734683e-05, "loss": 0.4181, "step": 29194 }, { "epoch": 4.765846291988082, "grad_norm": 2.234217643737793, "learning_rate": 1.6257378423855556e-05, "loss": 0.414, "step": 29195 }, { "epoch": 4.766009550630587, "grad_norm": 2.3428382873535156, "learning_rate": 1.625713089067525e-05, "loss": 0.4127, "step": 29196 }, { "epoch": 4.766172809273091, "grad_norm": 2.1172091960906982, "learning_rate": 1.625688335119401e-05, "loss": 0.4202, "step": 29197 }, { "epoch": 4.766336067915596, "grad_norm": 1.7520114183425903, "learning_rate": 1.6256635805412095e-05, "loss": 0.365, "step": 29198 }, { "epoch": 4.766499326558099, "grad_norm": 1.562857985496521, "learning_rate": 1.6256388253329746e-05, "loss": 0.3199, "step": 29199 }, { "epoch": 4.766662585200604, "grad_norm": 2.1720385551452637, "learning_rate": 1.6256140694947217e-05, "loss": 0.4314, "step": 29200 }, { "epoch": 4.766825843843108, "grad_norm": 1.9613652229309082, "learning_rate": 1.6255893130264755e-05, "loss": 0.4002, "step": 29201 }, { "epoch": 4.766989102485613, "grad_norm": 1.9993069171905518, "learning_rate": 1.625564555928261e-05, "loss": 0.3873, "step": 29202 }, { "epoch": 4.767152361128117, "grad_norm": 2.1157472133636475, "learning_rate": 1.6255397982001032e-05, "loss": 0.3904, "step": 29203 }, { "epoch": 4.7673156197706215, "grad_norm": 1.7908005714416504, "learning_rate": 1.6255150398420273e-05, "loss": 0.3244, "step": 29204 }, { "epoch": 4.767478878413126, "grad_norm": 1.7897926568984985, "learning_rate": 1.6254902808540576e-05, "loss": 0.3502, "step": 29205 }, { "epoch": 4.76764213705563, "grad_norm": 2.510895252227783, "learning_rate": 1.6254655212362194e-05, "loss": 0.4393, "step": 29206 }, { "epoch": 4.767805395698135, "grad_norm": 2.1672818660736084, "learning_rate": 1.6254407609885376e-05, "loss": 0.4351, "step": 29207 }, { "epoch": 4.767968654340639, "grad_norm": 2.0989303588867188, "learning_rate": 1.625416000111037e-05, "loss": 0.3953, "step": 29208 }, { "epoch": 4.768131912983144, "grad_norm": 2.069704055786133, "learning_rate": 1.625391238603743e-05, "loss": 0.3853, "step": 29209 }, { "epoch": 4.768295171625648, "grad_norm": 2.010640859603882, "learning_rate": 1.6253664764666797e-05, "loss": 0.3794, "step": 29210 }, { "epoch": 4.7684584302681525, "grad_norm": 1.8060407638549805, "learning_rate": 1.625341713699873e-05, "loss": 0.378, "step": 29211 }, { "epoch": 4.768621688910657, "grad_norm": 2.4497339725494385, "learning_rate": 1.625316950303347e-05, "loss": 0.3952, "step": 29212 }, { "epoch": 4.768784947553161, "grad_norm": 2.178651809692383, "learning_rate": 1.6252921862771272e-05, "loss": 0.439, "step": 29213 }, { "epoch": 4.768948206195666, "grad_norm": 1.9232177734375, "learning_rate": 1.6252674216212383e-05, "loss": 0.3661, "step": 29214 }, { "epoch": 4.76911146483817, "grad_norm": 2.7989115715026855, "learning_rate": 1.6252426563357054e-05, "loss": 0.4852, "step": 29215 }, { "epoch": 4.769274723480674, "grad_norm": 2.568166971206665, "learning_rate": 1.625217890420553e-05, "loss": 0.5173, "step": 29216 }, { "epoch": 4.769437982123178, "grad_norm": 1.8174612522125244, "learning_rate": 1.6251931238758067e-05, "loss": 0.4057, "step": 29217 }, { "epoch": 4.769601240765683, "grad_norm": 2.1774723529815674, "learning_rate": 1.625168356701491e-05, "loss": 0.414, "step": 29218 }, { "epoch": 4.769764499408187, "grad_norm": 2.2936060428619385, "learning_rate": 1.625143588897631e-05, "loss": 0.4125, "step": 29219 }, { "epoch": 4.769927758050692, "grad_norm": 1.9957566261291504, "learning_rate": 1.625118820464251e-05, "loss": 0.3842, "step": 29220 }, { "epoch": 4.770091016693196, "grad_norm": 1.7919516563415527, "learning_rate": 1.625094051401377e-05, "loss": 0.3468, "step": 29221 }, { "epoch": 4.7702542753357005, "grad_norm": 2.1988182067871094, "learning_rate": 1.6250692817090334e-05, "loss": 0.425, "step": 29222 }, { "epoch": 4.770417533978205, "grad_norm": 2.276315450668335, "learning_rate": 1.625044511387245e-05, "loss": 0.4259, "step": 29223 }, { "epoch": 4.770580792620709, "grad_norm": 2.2792270183563232, "learning_rate": 1.6250197404360374e-05, "loss": 0.3823, "step": 29224 }, { "epoch": 4.770744051263214, "grad_norm": 2.372007131576538, "learning_rate": 1.6249949688554346e-05, "loss": 0.3884, "step": 29225 }, { "epoch": 4.770907309905718, "grad_norm": 2.366821765899658, "learning_rate": 1.6249701966454626e-05, "loss": 0.3984, "step": 29226 }, { "epoch": 4.771070568548223, "grad_norm": 2.3951809406280518, "learning_rate": 1.6249454238061454e-05, "loss": 0.4238, "step": 29227 }, { "epoch": 4.771233827190727, "grad_norm": 2.5615506172180176, "learning_rate": 1.6249206503375082e-05, "loss": 0.4732, "step": 29228 }, { "epoch": 4.7713970858332315, "grad_norm": 2.155587911605835, "learning_rate": 1.624895876239576e-05, "loss": 0.3858, "step": 29229 }, { "epoch": 4.771560344475736, "grad_norm": 1.8438501358032227, "learning_rate": 1.6248711015123742e-05, "loss": 0.3927, "step": 29230 }, { "epoch": 4.77172360311824, "grad_norm": 1.5831618309020996, "learning_rate": 1.6248463261559272e-05, "loss": 0.3216, "step": 29231 }, { "epoch": 4.771886861760745, "grad_norm": 1.8134689331054688, "learning_rate": 1.62482155017026e-05, "loss": 0.3516, "step": 29232 }, { "epoch": 4.772050120403248, "grad_norm": 2.27117919921875, "learning_rate": 1.6247967735553976e-05, "loss": 0.4125, "step": 29233 }, { "epoch": 4.772213379045754, "grad_norm": 1.8750383853912354, "learning_rate": 1.6247719963113655e-05, "loss": 0.316, "step": 29234 }, { "epoch": 4.772376637688257, "grad_norm": 1.6941030025482178, "learning_rate": 1.6247472184381878e-05, "loss": 0.3468, "step": 29235 }, { "epoch": 4.772539896330762, "grad_norm": 2.1255123615264893, "learning_rate": 1.62472243993589e-05, "loss": 0.3748, "step": 29236 }, { "epoch": 4.772703154973266, "grad_norm": 1.9055957794189453, "learning_rate": 1.624697660804497e-05, "loss": 0.3747, "step": 29237 }, { "epoch": 4.772866413615771, "grad_norm": 2.2558345794677734, "learning_rate": 1.6246728810440333e-05, "loss": 0.3802, "step": 29238 }, { "epoch": 4.773029672258275, "grad_norm": 2.155083656311035, "learning_rate": 1.624648100654524e-05, "loss": 0.4353, "step": 29239 }, { "epoch": 4.7731929309007795, "grad_norm": 2.3544955253601074, "learning_rate": 1.6246233196359944e-05, "loss": 0.3683, "step": 29240 }, { "epoch": 4.773356189543284, "grad_norm": 1.9392833709716797, "learning_rate": 1.6245985379884694e-05, "loss": 0.3876, "step": 29241 }, { "epoch": 4.773519448185788, "grad_norm": 2.13063645362854, "learning_rate": 1.624573755711974e-05, "loss": 0.3716, "step": 29242 }, { "epoch": 4.773682706828293, "grad_norm": 1.8641188144683838, "learning_rate": 1.6245489728065325e-05, "loss": 0.3255, "step": 29243 }, { "epoch": 4.773845965470797, "grad_norm": 2.2824409008026123, "learning_rate": 1.6245241892721705e-05, "loss": 0.3462, "step": 29244 }, { "epoch": 4.774009224113302, "grad_norm": 2.423097848892212, "learning_rate": 1.6244994051089132e-05, "loss": 0.4399, "step": 29245 }, { "epoch": 4.774172482755806, "grad_norm": 2.166473150253296, "learning_rate": 1.624474620316785e-05, "loss": 0.3203, "step": 29246 }, { "epoch": 4.7743357413983105, "grad_norm": 2.311741352081299, "learning_rate": 1.6244498348958107e-05, "loss": 0.4147, "step": 29247 }, { "epoch": 4.774499000040815, "grad_norm": 1.927215337753296, "learning_rate": 1.624425048846016e-05, "loss": 0.3918, "step": 29248 }, { "epoch": 4.774662258683319, "grad_norm": 2.4150161743164062, "learning_rate": 1.6244002621674253e-05, "loss": 0.4031, "step": 29249 }, { "epoch": 4.774825517325823, "grad_norm": 2.5193333625793457, "learning_rate": 1.6243754748600637e-05, "loss": 0.4172, "step": 29250 }, { "epoch": 4.774988775968328, "grad_norm": 1.9053839445114136, "learning_rate": 1.6243506869239557e-05, "loss": 0.3448, "step": 29251 }, { "epoch": 4.775152034610832, "grad_norm": 2.1728994846343994, "learning_rate": 1.6243258983591273e-05, "loss": 0.4097, "step": 29252 }, { "epoch": 4.775315293253336, "grad_norm": 1.8536990880966187, "learning_rate": 1.624301109165603e-05, "loss": 0.3449, "step": 29253 }, { "epoch": 4.775478551895841, "grad_norm": 2.9236936569213867, "learning_rate": 1.6242763193434073e-05, "loss": 0.5475, "step": 29254 }, { "epoch": 4.775641810538345, "grad_norm": 1.8965551853179932, "learning_rate": 1.6242515288925656e-05, "loss": 0.3402, "step": 29255 }, { "epoch": 4.77580506918085, "grad_norm": 1.81196928024292, "learning_rate": 1.6242267378131027e-05, "loss": 0.3524, "step": 29256 }, { "epoch": 4.775968327823354, "grad_norm": 2.4098730087280273, "learning_rate": 1.6242019461050435e-05, "loss": 0.4086, "step": 29257 }, { "epoch": 4.7761315864658584, "grad_norm": 1.7718075513839722, "learning_rate": 1.6241771537684136e-05, "loss": 0.3653, "step": 29258 }, { "epoch": 4.776294845108363, "grad_norm": 2.2336721420288086, "learning_rate": 1.6241523608032372e-05, "loss": 0.4366, "step": 29259 }, { "epoch": 4.776458103750867, "grad_norm": 2.320174217224121, "learning_rate": 1.6241275672095397e-05, "loss": 0.4338, "step": 29260 }, { "epoch": 4.776621362393372, "grad_norm": 1.7617911100387573, "learning_rate": 1.6241027729873454e-05, "loss": 0.3421, "step": 29261 }, { "epoch": 4.776784621035876, "grad_norm": 1.883914828300476, "learning_rate": 1.62407797813668e-05, "loss": 0.3498, "step": 29262 }, { "epoch": 4.776947879678381, "grad_norm": 2.3472373485565186, "learning_rate": 1.624053182657568e-05, "loss": 0.4398, "step": 29263 }, { "epoch": 4.777111138320885, "grad_norm": 2.3786282539367676, "learning_rate": 1.6240283865500357e-05, "loss": 0.4658, "step": 29264 }, { "epoch": 4.7772743969633895, "grad_norm": 1.8234795331954956, "learning_rate": 1.624003589814106e-05, "loss": 0.3936, "step": 29265 }, { "epoch": 4.777437655605894, "grad_norm": 2.7178468704223633, "learning_rate": 1.6239787924498052e-05, "loss": 0.4622, "step": 29266 }, { "epoch": 4.777600914248398, "grad_norm": 2.0807504653930664, "learning_rate": 1.623953994457158e-05, "loss": 0.3905, "step": 29267 }, { "epoch": 4.777764172890903, "grad_norm": 1.9001752138137817, "learning_rate": 1.6239291958361887e-05, "loss": 0.3951, "step": 29268 }, { "epoch": 4.777927431533406, "grad_norm": 2.3071811199188232, "learning_rate": 1.6239043965869233e-05, "loss": 0.43, "step": 29269 }, { "epoch": 4.778090690175911, "grad_norm": 2.361572027206421, "learning_rate": 1.6238795967093865e-05, "loss": 0.368, "step": 29270 }, { "epoch": 4.778253948818415, "grad_norm": 2.5906600952148438, "learning_rate": 1.6238547962036026e-05, "loss": 0.4317, "step": 29271 }, { "epoch": 4.77841720746092, "grad_norm": 2.2387866973876953, "learning_rate": 1.6238299950695974e-05, "loss": 0.4361, "step": 29272 }, { "epoch": 4.778580466103424, "grad_norm": 2.2606546878814697, "learning_rate": 1.6238051933073957e-05, "loss": 0.4607, "step": 29273 }, { "epoch": 4.778743724745929, "grad_norm": 1.640037178993225, "learning_rate": 1.623780390917022e-05, "loss": 0.3556, "step": 29274 }, { "epoch": 4.778906983388433, "grad_norm": 2.0955092906951904, "learning_rate": 1.623755587898502e-05, "loss": 0.3945, "step": 29275 }, { "epoch": 4.779070242030937, "grad_norm": 2.24495267868042, "learning_rate": 1.62373078425186e-05, "loss": 0.4379, "step": 29276 }, { "epoch": 4.779233500673442, "grad_norm": 2.197700023651123, "learning_rate": 1.6237059799771215e-05, "loss": 0.3564, "step": 29277 }, { "epoch": 4.779396759315946, "grad_norm": 2.406961679458618, "learning_rate": 1.6236811750743113e-05, "loss": 0.4413, "step": 29278 }, { "epoch": 4.779560017958451, "grad_norm": 2.1166348457336426, "learning_rate": 1.623656369543454e-05, "loss": 0.3424, "step": 29279 }, { "epoch": 4.779723276600955, "grad_norm": 2.284453868865967, "learning_rate": 1.623631563384575e-05, "loss": 0.3971, "step": 29280 }, { "epoch": 4.77988653524346, "grad_norm": 2.2757978439331055, "learning_rate": 1.6236067565976992e-05, "loss": 0.3803, "step": 29281 }, { "epoch": 4.780049793885964, "grad_norm": 2.086174964904785, "learning_rate": 1.6235819491828515e-05, "loss": 0.4009, "step": 29282 }, { "epoch": 4.7802130525284685, "grad_norm": 1.8499850034713745, "learning_rate": 1.623557141140057e-05, "loss": 0.3592, "step": 29283 }, { "epoch": 4.780376311170973, "grad_norm": 2.7131850719451904, "learning_rate": 1.6235323324693407e-05, "loss": 0.469, "step": 29284 }, { "epoch": 4.780539569813477, "grad_norm": 2.154492139816284, "learning_rate": 1.6235075231707274e-05, "loss": 0.3413, "step": 29285 }, { "epoch": 4.780702828455981, "grad_norm": 1.8668359518051147, "learning_rate": 1.6234827132442424e-05, "loss": 0.3896, "step": 29286 }, { "epoch": 4.780866087098486, "grad_norm": 1.9588643312454224, "learning_rate": 1.62345790268991e-05, "loss": 0.3787, "step": 29287 }, { "epoch": 4.78102934574099, "grad_norm": 2.1860556602478027, "learning_rate": 1.623433091507756e-05, "loss": 0.4197, "step": 29288 }, { "epoch": 4.781192604383494, "grad_norm": 2.644662857055664, "learning_rate": 1.623408279697805e-05, "loss": 0.4517, "step": 29289 }, { "epoch": 4.781355863025999, "grad_norm": 1.7616928815841675, "learning_rate": 1.623383467260082e-05, "loss": 0.3804, "step": 29290 }, { "epoch": 4.781519121668503, "grad_norm": 2.2194066047668457, "learning_rate": 1.623358654194612e-05, "loss": 0.365, "step": 29291 }, { "epoch": 4.781682380311008, "grad_norm": 2.3156964778900146, "learning_rate": 1.6233338405014204e-05, "loss": 0.3865, "step": 29292 }, { "epoch": 4.781845638953512, "grad_norm": 2.3104915618896484, "learning_rate": 1.6233090261805312e-05, "loss": 0.3975, "step": 29293 }, { "epoch": 4.782008897596016, "grad_norm": 2.237436056137085, "learning_rate": 1.6232842112319703e-05, "loss": 0.4331, "step": 29294 }, { "epoch": 4.782172156238521, "grad_norm": 2.2448415756225586, "learning_rate": 1.6232593956557623e-05, "loss": 0.4719, "step": 29295 }, { "epoch": 4.782335414881025, "grad_norm": 2.064584970474243, "learning_rate": 1.623234579451932e-05, "loss": 0.4101, "step": 29296 }, { "epoch": 4.78249867352353, "grad_norm": 2.056696891784668, "learning_rate": 1.6232097626205053e-05, "loss": 0.372, "step": 29297 }, { "epoch": 4.782661932166034, "grad_norm": 1.8394849300384521, "learning_rate": 1.6231849451615057e-05, "loss": 0.3374, "step": 29298 }, { "epoch": 4.782825190808539, "grad_norm": 1.8545477390289307, "learning_rate": 1.6231601270749594e-05, "loss": 0.3835, "step": 29299 }, { "epoch": 4.782988449451043, "grad_norm": 2.172640562057495, "learning_rate": 1.623135308360891e-05, "loss": 0.3743, "step": 29300 }, { "epoch": 4.7831517080935475, "grad_norm": 1.886810541152954, "learning_rate": 1.6231104890193256e-05, "loss": 0.3476, "step": 29301 }, { "epoch": 4.783314966736052, "grad_norm": 2.0966827869415283, "learning_rate": 1.6230856690502882e-05, "loss": 0.3889, "step": 29302 }, { "epoch": 4.7834782253785555, "grad_norm": 2.142137050628662, "learning_rate": 1.6230608484538034e-05, "loss": 0.3773, "step": 29303 }, { "epoch": 4.783641484021061, "grad_norm": 2.2398018836975098, "learning_rate": 1.623036027229897e-05, "loss": 0.4067, "step": 29304 }, { "epoch": 4.783804742663564, "grad_norm": 2.3524014949798584, "learning_rate": 1.623011205378593e-05, "loss": 0.3702, "step": 29305 }, { "epoch": 4.783968001306069, "grad_norm": 2.116767406463623, "learning_rate": 1.6229863828999173e-05, "loss": 0.3299, "step": 29306 }, { "epoch": 4.784131259948573, "grad_norm": 2.015355110168457, "learning_rate": 1.622961559793894e-05, "loss": 0.3682, "step": 29307 }, { "epoch": 4.784294518591078, "grad_norm": 1.879536747932434, "learning_rate": 1.6229367360605487e-05, "loss": 0.337, "step": 29308 }, { "epoch": 4.784457777233582, "grad_norm": 3.2688636779785156, "learning_rate": 1.6229119116999064e-05, "loss": 0.4248, "step": 29309 }, { "epoch": 4.784621035876087, "grad_norm": 1.9341387748718262, "learning_rate": 1.6228870867119918e-05, "loss": 0.401, "step": 29310 }, { "epoch": 4.784784294518591, "grad_norm": 2.2591028213500977, "learning_rate": 1.62286226109683e-05, "loss": 0.3958, "step": 29311 }, { "epoch": 4.784947553161095, "grad_norm": 2.268479347229004, "learning_rate": 1.6228374348544464e-05, "loss": 0.4043, "step": 29312 }, { "epoch": 4.7851108118036, "grad_norm": 2.331868886947632, "learning_rate": 1.6228126079848657e-05, "loss": 0.4598, "step": 29313 }, { "epoch": 4.785274070446104, "grad_norm": 2.1646182537078857, "learning_rate": 1.6227877804881126e-05, "loss": 0.4187, "step": 29314 }, { "epoch": 4.785437329088609, "grad_norm": 2.2366693019866943, "learning_rate": 1.6227629523642127e-05, "loss": 0.3969, "step": 29315 }, { "epoch": 4.785600587731113, "grad_norm": 2.3650758266448975, "learning_rate": 1.6227381236131902e-05, "loss": 0.4308, "step": 29316 }, { "epoch": 4.785763846373618, "grad_norm": 1.9933995008468628, "learning_rate": 1.622713294235071e-05, "loss": 0.3381, "step": 29317 }, { "epoch": 4.785927105016122, "grad_norm": 2.0875802040100098, "learning_rate": 1.6226884642298794e-05, "loss": 0.3583, "step": 29318 }, { "epoch": 4.7860903636586265, "grad_norm": 2.4359588623046875, "learning_rate": 1.6226636335976408e-05, "loss": 0.4564, "step": 29319 }, { "epoch": 4.786253622301131, "grad_norm": 2.181762933731079, "learning_rate": 1.62263880233838e-05, "loss": 0.4074, "step": 29320 }, { "epoch": 4.786416880943635, "grad_norm": 2.303081512451172, "learning_rate": 1.6226139704521223e-05, "loss": 0.4086, "step": 29321 }, { "epoch": 4.786580139586139, "grad_norm": 2.039564847946167, "learning_rate": 1.6225891379388922e-05, "loss": 0.3409, "step": 29322 }, { "epoch": 4.786743398228643, "grad_norm": 1.9209190607070923, "learning_rate": 1.622564304798715e-05, "loss": 0.3597, "step": 29323 }, { "epoch": 4.786906656871148, "grad_norm": 1.9909663200378418, "learning_rate": 1.622539471031616e-05, "loss": 0.3268, "step": 29324 }, { "epoch": 4.787069915513652, "grad_norm": 2.0717272758483887, "learning_rate": 1.6225146366376198e-05, "loss": 0.3875, "step": 29325 }, { "epoch": 4.787233174156157, "grad_norm": 2.1123154163360596, "learning_rate": 1.6224898016167513e-05, "loss": 0.3533, "step": 29326 }, { "epoch": 4.787396432798661, "grad_norm": 2.276141405105591, "learning_rate": 1.622464965969036e-05, "loss": 0.4214, "step": 29327 }, { "epoch": 4.7875596914411656, "grad_norm": 1.9534261226654053, "learning_rate": 1.6224401296944983e-05, "loss": 0.352, "step": 29328 }, { "epoch": 4.78772295008367, "grad_norm": 1.749380111694336, "learning_rate": 1.6224152927931643e-05, "loss": 0.3596, "step": 29329 }, { "epoch": 4.787886208726174, "grad_norm": 2.3348474502563477, "learning_rate": 1.6223904552650577e-05, "loss": 0.4227, "step": 29330 }, { "epoch": 4.788049467368679, "grad_norm": 2.325896978378296, "learning_rate": 1.6223656171102043e-05, "loss": 0.4031, "step": 29331 }, { "epoch": 4.788212726011183, "grad_norm": 2.04879093170166, "learning_rate": 1.6223407783286288e-05, "loss": 0.3993, "step": 29332 }, { "epoch": 4.788375984653688, "grad_norm": 2.556939125061035, "learning_rate": 1.622315938920356e-05, "loss": 0.4466, "step": 29333 }, { "epoch": 4.788539243296192, "grad_norm": 2.4136428833007812, "learning_rate": 1.6222910988854117e-05, "loss": 0.3985, "step": 29334 }, { "epoch": 4.788702501938697, "grad_norm": 2.1615536212921143, "learning_rate": 1.62226625822382e-05, "loss": 0.3752, "step": 29335 }, { "epoch": 4.788865760581201, "grad_norm": 2.081942558288574, "learning_rate": 1.6222414169356066e-05, "loss": 0.3818, "step": 29336 }, { "epoch": 4.7890290192237055, "grad_norm": 2.270490884780884, "learning_rate": 1.6222165750207963e-05, "loss": 0.3943, "step": 29337 }, { "epoch": 4.78919227786621, "grad_norm": 2.336453676223755, "learning_rate": 1.6221917324794138e-05, "loss": 0.3989, "step": 29338 }, { "epoch": 4.7893555365087135, "grad_norm": 2.137045383453369, "learning_rate": 1.6221668893114846e-05, "loss": 0.3817, "step": 29339 }, { "epoch": 4.789518795151219, "grad_norm": 2.579303026199341, "learning_rate": 1.6221420455170337e-05, "loss": 0.3725, "step": 29340 }, { "epoch": 4.789682053793722, "grad_norm": 2.124178409576416, "learning_rate": 1.6221172010960854e-05, "loss": 0.4223, "step": 29341 }, { "epoch": 4.789845312436227, "grad_norm": 2.414630651473999, "learning_rate": 1.6220923560486657e-05, "loss": 0.3918, "step": 29342 }, { "epoch": 4.790008571078731, "grad_norm": 1.9138071537017822, "learning_rate": 1.622067510374799e-05, "loss": 0.3647, "step": 29343 }, { "epoch": 4.790171829721236, "grad_norm": 2.2059812545776367, "learning_rate": 1.6220426640745106e-05, "loss": 0.4011, "step": 29344 }, { "epoch": 4.79033508836374, "grad_norm": 2.200092077255249, "learning_rate": 1.622017817147825e-05, "loss": 0.3741, "step": 29345 }, { "epoch": 4.7904983470062446, "grad_norm": 2.2802743911743164, "learning_rate": 1.621992969594768e-05, "loss": 0.438, "step": 29346 }, { "epoch": 4.790661605648749, "grad_norm": 2.190467596054077, "learning_rate": 1.621968121415364e-05, "loss": 0.4057, "step": 29347 }, { "epoch": 4.790824864291253, "grad_norm": 2.6661176681518555, "learning_rate": 1.621943272609639e-05, "loss": 0.4206, "step": 29348 }, { "epoch": 4.790988122933758, "grad_norm": 2.2351391315460205, "learning_rate": 1.6219184231776165e-05, "loss": 0.4206, "step": 29349 }, { "epoch": 4.791151381576262, "grad_norm": 2.1587507724761963, "learning_rate": 1.6218935731193223e-05, "loss": 0.3878, "step": 29350 }, { "epoch": 4.791314640218767, "grad_norm": 2.7233264446258545, "learning_rate": 1.621868722434782e-05, "loss": 0.4942, "step": 29351 }, { "epoch": 4.791477898861271, "grad_norm": 1.881340503692627, "learning_rate": 1.6218438711240194e-05, "loss": 0.3584, "step": 29352 }, { "epoch": 4.791641157503776, "grad_norm": 2.910447120666504, "learning_rate": 1.6218190191870607e-05, "loss": 0.4344, "step": 29353 }, { "epoch": 4.79180441614628, "grad_norm": 1.8526877164840698, "learning_rate": 1.6217941666239302e-05, "loss": 0.3384, "step": 29354 }, { "epoch": 4.7919676747887845, "grad_norm": 2.0637218952178955, "learning_rate": 1.621769313434653e-05, "loss": 0.3679, "step": 29355 }, { "epoch": 4.792130933431288, "grad_norm": 2.259409189224243, "learning_rate": 1.6217444596192548e-05, "loss": 0.4538, "step": 29356 }, { "epoch": 4.792294192073793, "grad_norm": 2.274369478225708, "learning_rate": 1.6217196051777596e-05, "loss": 0.3901, "step": 29357 }, { "epoch": 4.792457450716297, "grad_norm": 1.6931321620941162, "learning_rate": 1.621694750110193e-05, "loss": 0.3029, "step": 29358 }, { "epoch": 4.792620709358801, "grad_norm": 1.88877272605896, "learning_rate": 1.6216698944165798e-05, "loss": 0.3934, "step": 29359 }, { "epoch": 4.792783968001306, "grad_norm": 2.3064029216766357, "learning_rate": 1.6216450380969456e-05, "loss": 0.4171, "step": 29360 }, { "epoch": 4.79294722664381, "grad_norm": 2.361980676651001, "learning_rate": 1.6216201811513148e-05, "loss": 0.4747, "step": 29361 }, { "epoch": 4.793110485286315, "grad_norm": 1.748619794845581, "learning_rate": 1.6215953235797127e-05, "loss": 0.3679, "step": 29362 }, { "epoch": 4.793273743928819, "grad_norm": 2.282163143157959, "learning_rate": 1.621570465382164e-05, "loss": 0.52, "step": 29363 }, { "epoch": 4.7934370025713235, "grad_norm": 2.323066473007202, "learning_rate": 1.6215456065586944e-05, "loss": 0.4047, "step": 29364 }, { "epoch": 4.793600261213828, "grad_norm": 2.4686853885650635, "learning_rate": 1.621520747109328e-05, "loss": 0.4303, "step": 29365 }, { "epoch": 4.793763519856332, "grad_norm": 1.8896592855453491, "learning_rate": 1.621495887034091e-05, "loss": 0.3481, "step": 29366 }, { "epoch": 4.793926778498837, "grad_norm": 2.638700008392334, "learning_rate": 1.6214710263330075e-05, "loss": 0.3708, "step": 29367 }, { "epoch": 4.794090037141341, "grad_norm": 2.189417600631714, "learning_rate": 1.6214461650061028e-05, "loss": 0.358, "step": 29368 }, { "epoch": 4.794253295783846, "grad_norm": 2.1881461143493652, "learning_rate": 1.621421303053402e-05, "loss": 0.3725, "step": 29369 }, { "epoch": 4.79441655442635, "grad_norm": 1.8031586408615112, "learning_rate": 1.6213964404749302e-05, "loss": 0.3631, "step": 29370 }, { "epoch": 4.794579813068855, "grad_norm": 2.227703809738159, "learning_rate": 1.6213715772707122e-05, "loss": 0.3882, "step": 29371 }, { "epoch": 4.794743071711359, "grad_norm": 1.813591480255127, "learning_rate": 1.6213467134407733e-05, "loss": 0.3331, "step": 29372 }, { "epoch": 4.7949063303538635, "grad_norm": 2.3365554809570312, "learning_rate": 1.6213218489851386e-05, "loss": 0.4185, "step": 29373 }, { "epoch": 4.795069588996368, "grad_norm": 1.694563388824463, "learning_rate": 1.621296983903833e-05, "loss": 0.3517, "step": 29374 }, { "epoch": 4.7952328476388715, "grad_norm": 2.7828493118286133, "learning_rate": 1.6212721181968812e-05, "loss": 0.4253, "step": 29375 }, { "epoch": 4.795396106281376, "grad_norm": 2.378504753112793, "learning_rate": 1.6212472518643086e-05, "loss": 0.4545, "step": 29376 }, { "epoch": 4.79555936492388, "grad_norm": 2.0397121906280518, "learning_rate": 1.62122238490614e-05, "loss": 0.3477, "step": 29377 }, { "epoch": 4.795722623566385, "grad_norm": 2.0219790935516357, "learning_rate": 1.621197517322401e-05, "loss": 0.3894, "step": 29378 }, { "epoch": 4.795885882208889, "grad_norm": 2.5048553943634033, "learning_rate": 1.621172649113116e-05, "loss": 0.4021, "step": 29379 }, { "epoch": 4.796049140851394, "grad_norm": 1.7836071252822876, "learning_rate": 1.6211477802783105e-05, "loss": 0.3401, "step": 29380 }, { "epoch": 4.796212399493898, "grad_norm": 1.974456548690796, "learning_rate": 1.6211229108180094e-05, "loss": 0.3742, "step": 29381 }, { "epoch": 4.7963756581364025, "grad_norm": 2.2701165676116943, "learning_rate": 1.6210980407322372e-05, "loss": 0.4292, "step": 29382 }, { "epoch": 4.796538916778907, "grad_norm": 1.9895929098129272, "learning_rate": 1.62107317002102e-05, "loss": 0.354, "step": 29383 }, { "epoch": 4.796702175421411, "grad_norm": 2.258708953857422, "learning_rate": 1.6210482986843823e-05, "loss": 0.4172, "step": 29384 }, { "epoch": 4.796865434063916, "grad_norm": 2.0447192192077637, "learning_rate": 1.6210234267223488e-05, "loss": 0.365, "step": 29385 }, { "epoch": 4.79702869270642, "grad_norm": 2.3593356609344482, "learning_rate": 1.6209985541349447e-05, "loss": 0.4411, "step": 29386 }, { "epoch": 4.797191951348925, "grad_norm": 2.952195644378662, "learning_rate": 1.6209736809221956e-05, "loss": 0.4083, "step": 29387 }, { "epoch": 4.797355209991429, "grad_norm": 1.9688234329223633, "learning_rate": 1.620948807084126e-05, "loss": 0.3595, "step": 29388 }, { "epoch": 4.797518468633934, "grad_norm": 2.1596627235412598, "learning_rate": 1.620923932620761e-05, "loss": 0.3699, "step": 29389 }, { "epoch": 4.797681727276438, "grad_norm": 2.3438451290130615, "learning_rate": 1.6208990575321264e-05, "loss": 0.3655, "step": 29390 }, { "epoch": 4.7978449859189425, "grad_norm": 2.6587681770324707, "learning_rate": 1.620874181818246e-05, "loss": 0.4504, "step": 29391 }, { "epoch": 4.798008244561446, "grad_norm": 2.2375311851501465, "learning_rate": 1.6208493054791458e-05, "loss": 0.3483, "step": 29392 }, { "epoch": 4.7981715032039505, "grad_norm": 2.085947036743164, "learning_rate": 1.62082442851485e-05, "loss": 0.4157, "step": 29393 }, { "epoch": 4.798334761846455, "grad_norm": 2.423569679260254, "learning_rate": 1.6207995509253848e-05, "loss": 0.4457, "step": 29394 }, { "epoch": 4.798498020488959, "grad_norm": 2.9386098384857178, "learning_rate": 1.6207746727107742e-05, "loss": 0.4162, "step": 29395 }, { "epoch": 4.798661279131464, "grad_norm": 1.9147555828094482, "learning_rate": 1.6207497938710436e-05, "loss": 0.3331, "step": 29396 }, { "epoch": 4.798824537773968, "grad_norm": 1.8690696954727173, "learning_rate": 1.6207249144062185e-05, "loss": 0.3423, "step": 29397 }, { "epoch": 4.798987796416473, "grad_norm": 2.014775276184082, "learning_rate": 1.6207000343163234e-05, "loss": 0.3759, "step": 29398 }, { "epoch": 4.799151055058977, "grad_norm": 2.219435691833496, "learning_rate": 1.6206751536013835e-05, "loss": 0.3936, "step": 29399 }, { "epoch": 4.7993143137014815, "grad_norm": 2.126784563064575, "learning_rate": 1.620650272261424e-05, "loss": 0.3477, "step": 29400 }, { "epoch": 4.799477572343986, "grad_norm": 2.4475841522216797, "learning_rate": 1.6206253902964693e-05, "loss": 0.382, "step": 29401 }, { "epoch": 4.79964083098649, "grad_norm": 2.350910186767578, "learning_rate": 1.6206005077065457e-05, "loss": 0.3699, "step": 29402 }, { "epoch": 4.799804089628995, "grad_norm": 2.166706085205078, "learning_rate": 1.6205756244916774e-05, "loss": 0.3826, "step": 29403 }, { "epoch": 4.799967348271499, "grad_norm": 2.054663896560669, "learning_rate": 1.6205507406518896e-05, "loss": 0.3755, "step": 29404 }, { "epoch": 4.800130606914004, "grad_norm": 2.045173168182373, "learning_rate": 1.6205258561872073e-05, "loss": 0.3798, "step": 29405 }, { "epoch": 4.800293865556508, "grad_norm": 2.3944807052612305, "learning_rate": 1.6205009710976555e-05, "loss": 0.3738, "step": 29406 }, { "epoch": 4.800457124199013, "grad_norm": 2.3086352348327637, "learning_rate": 1.6204760853832594e-05, "loss": 0.4229, "step": 29407 }, { "epoch": 4.800620382841517, "grad_norm": 2.3199303150177, "learning_rate": 1.620451199044044e-05, "loss": 0.402, "step": 29408 }, { "epoch": 4.800783641484021, "grad_norm": 2.1609063148498535, "learning_rate": 1.6204263120800348e-05, "loss": 0.4177, "step": 29409 }, { "epoch": 4.800946900126526, "grad_norm": 2.3323428630828857, "learning_rate": 1.6204014244912564e-05, "loss": 0.4128, "step": 29410 }, { "epoch": 4.8011101587690295, "grad_norm": 2.217705726623535, "learning_rate": 1.6203765362777333e-05, "loss": 0.3951, "step": 29411 }, { "epoch": 4.801273417411534, "grad_norm": 2.132915735244751, "learning_rate": 1.620351647439492e-05, "loss": 0.3728, "step": 29412 }, { "epoch": 4.801436676054038, "grad_norm": 2.0486860275268555, "learning_rate": 1.6203267579765563e-05, "loss": 0.4053, "step": 29413 }, { "epoch": 4.801599934696543, "grad_norm": 2.3914666175842285, "learning_rate": 1.6203018678889517e-05, "loss": 0.4563, "step": 29414 }, { "epoch": 4.801763193339047, "grad_norm": 2.055826425552368, "learning_rate": 1.620276977176703e-05, "loss": 0.3705, "step": 29415 }, { "epoch": 4.801926451981552, "grad_norm": 2.193035364151001, "learning_rate": 1.620252085839836e-05, "loss": 0.4992, "step": 29416 }, { "epoch": 4.802089710624056, "grad_norm": 2.29182767868042, "learning_rate": 1.6202271938783756e-05, "loss": 0.3668, "step": 29417 }, { "epoch": 4.8022529692665605, "grad_norm": 2.2152018547058105, "learning_rate": 1.620202301292346e-05, "loss": 0.3977, "step": 29418 }, { "epoch": 4.802416227909065, "grad_norm": 2.1220662593841553, "learning_rate": 1.6201774080817735e-05, "loss": 0.3621, "step": 29419 }, { "epoch": 4.802579486551569, "grad_norm": 2.2371535301208496, "learning_rate": 1.6201525142466817e-05, "loss": 0.3998, "step": 29420 }, { "epoch": 4.802742745194074, "grad_norm": 1.8472599983215332, "learning_rate": 1.620127619787097e-05, "loss": 0.3679, "step": 29421 }, { "epoch": 4.802906003836578, "grad_norm": 2.1011059284210205, "learning_rate": 1.620102724703044e-05, "loss": 0.3353, "step": 29422 }, { "epoch": 4.803069262479083, "grad_norm": 2.5787811279296875, "learning_rate": 1.6200778289945475e-05, "loss": 0.3982, "step": 29423 }, { "epoch": 4.803232521121587, "grad_norm": 2.078641176223755, "learning_rate": 1.620052932661633e-05, "loss": 0.3412, "step": 29424 }, { "epoch": 4.803395779764092, "grad_norm": 2.3284125328063965, "learning_rate": 1.620028035704325e-05, "loss": 0.4564, "step": 29425 }, { "epoch": 4.803559038406595, "grad_norm": 2.0789971351623535, "learning_rate": 1.6200031381226493e-05, "loss": 0.4123, "step": 29426 }, { "epoch": 4.8037222970491005, "grad_norm": 1.7188782691955566, "learning_rate": 1.61997823991663e-05, "loss": 0.3416, "step": 29427 }, { "epoch": 4.803885555691604, "grad_norm": 2.5824878215789795, "learning_rate": 1.6199533410862933e-05, "loss": 0.4362, "step": 29428 }, { "epoch": 4.8040488143341085, "grad_norm": 2.3078064918518066, "learning_rate": 1.6199284416316638e-05, "loss": 0.3727, "step": 29429 }, { "epoch": 4.804212072976613, "grad_norm": 2.086163282394409, "learning_rate": 1.6199035415527663e-05, "loss": 0.3459, "step": 29430 }, { "epoch": 4.804375331619117, "grad_norm": 2.165872573852539, "learning_rate": 1.6198786408496262e-05, "loss": 0.3988, "step": 29431 }, { "epoch": 4.804538590261622, "grad_norm": 1.9844787120819092, "learning_rate": 1.6198537395222685e-05, "loss": 0.3996, "step": 29432 }, { "epoch": 4.804701848904126, "grad_norm": 2.3731026649475098, "learning_rate": 1.6198288375707184e-05, "loss": 0.483, "step": 29433 }, { "epoch": 4.804865107546631, "grad_norm": 2.16975474357605, "learning_rate": 1.6198039349950005e-05, "loss": 0.3704, "step": 29434 }, { "epoch": 4.805028366189135, "grad_norm": 2.1397321224212646, "learning_rate": 1.6197790317951403e-05, "loss": 0.3729, "step": 29435 }, { "epoch": 4.8051916248316395, "grad_norm": 1.8838309049606323, "learning_rate": 1.619754127971163e-05, "loss": 0.3373, "step": 29436 }, { "epoch": 4.805354883474144, "grad_norm": 2.291731119155884, "learning_rate": 1.619729223523093e-05, "loss": 0.3909, "step": 29437 }, { "epoch": 4.805518142116648, "grad_norm": 2.3499817848205566, "learning_rate": 1.619704318450956e-05, "loss": 0.3859, "step": 29438 }, { "epoch": 4.805681400759153, "grad_norm": 2.5717172622680664, "learning_rate": 1.6196794127547774e-05, "loss": 0.4802, "step": 29439 }, { "epoch": 4.805844659401657, "grad_norm": 2.479429006576538, "learning_rate": 1.6196545064345813e-05, "loss": 0.4169, "step": 29440 }, { "epoch": 4.806007918044162, "grad_norm": 2.29858660697937, "learning_rate": 1.6196295994903933e-05, "loss": 0.4279, "step": 29441 }, { "epoch": 4.806171176686666, "grad_norm": 2.050959825515747, "learning_rate": 1.6196046919222385e-05, "loss": 0.377, "step": 29442 }, { "epoch": 4.806334435329171, "grad_norm": 2.165271043777466, "learning_rate": 1.619579783730142e-05, "loss": 0.3972, "step": 29443 }, { "epoch": 4.806497693971675, "grad_norm": 2.1754066944122314, "learning_rate": 1.6195548749141288e-05, "loss": 0.3535, "step": 29444 }, { "epoch": 4.806660952614179, "grad_norm": 2.702214241027832, "learning_rate": 1.6195299654742243e-05, "loss": 0.3815, "step": 29445 }, { "epoch": 4.806824211256683, "grad_norm": 1.6420069932937622, "learning_rate": 1.619505055410453e-05, "loss": 0.2967, "step": 29446 }, { "epoch": 4.8069874698991875, "grad_norm": 2.316082000732422, "learning_rate": 1.61948014472284e-05, "loss": 0.4015, "step": 29447 }, { "epoch": 4.807150728541692, "grad_norm": 2.601559638977051, "learning_rate": 1.6194552334114112e-05, "loss": 0.4308, "step": 29448 }, { "epoch": 4.807313987184196, "grad_norm": 2.1904618740081787, "learning_rate": 1.6194303214761907e-05, "loss": 0.3733, "step": 29449 }, { "epoch": 4.807477245826701, "grad_norm": 1.6940605640411377, "learning_rate": 1.6194054089172043e-05, "loss": 0.3413, "step": 29450 }, { "epoch": 4.807640504469205, "grad_norm": 2.5281448364257812, "learning_rate": 1.6193804957344767e-05, "loss": 0.4118, "step": 29451 }, { "epoch": 4.80780376311171, "grad_norm": 2.3180344104766846, "learning_rate": 1.619355581928033e-05, "loss": 0.4994, "step": 29452 }, { "epoch": 4.807967021754214, "grad_norm": 2.2667832374572754, "learning_rate": 1.6193306674978986e-05, "loss": 0.4307, "step": 29453 }, { "epoch": 4.8081302803967185, "grad_norm": 2.1622586250305176, "learning_rate": 1.6193057524440986e-05, "loss": 0.3717, "step": 29454 }, { "epoch": 4.808293539039223, "grad_norm": 2.0562095642089844, "learning_rate": 1.6192808367666574e-05, "loss": 0.4309, "step": 29455 }, { "epoch": 4.808456797681727, "grad_norm": 2.825251340866089, "learning_rate": 1.6192559204656007e-05, "loss": 0.4573, "step": 29456 }, { "epoch": 4.808620056324232, "grad_norm": 1.9404792785644531, "learning_rate": 1.6192310035409536e-05, "loss": 0.3664, "step": 29457 }, { "epoch": 4.808783314966736, "grad_norm": 1.761397123336792, "learning_rate": 1.619206085992741e-05, "loss": 0.3368, "step": 29458 }, { "epoch": 4.808946573609241, "grad_norm": 1.9822111129760742, "learning_rate": 1.6191811678209875e-05, "loss": 0.3484, "step": 29459 }, { "epoch": 4.809109832251745, "grad_norm": 2.2533175945281982, "learning_rate": 1.6191562490257197e-05, "loss": 0.4127, "step": 29460 }, { "epoch": 4.80927309089425, "grad_norm": 1.9176284074783325, "learning_rate": 1.6191313296069607e-05, "loss": 0.374, "step": 29461 }, { "epoch": 4.809436349536753, "grad_norm": 2.2408082485198975, "learning_rate": 1.6191064095647377e-05, "loss": 0.4085, "step": 29462 }, { "epoch": 4.8095996081792585, "grad_norm": 1.898246169090271, "learning_rate": 1.619081488899074e-05, "loss": 0.3753, "step": 29463 }, { "epoch": 4.809762866821762, "grad_norm": 1.9125252962112427, "learning_rate": 1.6190565676099955e-05, "loss": 0.4204, "step": 29464 }, { "epoch": 4.8099261254642665, "grad_norm": 2.0873706340789795, "learning_rate": 1.619031645697527e-05, "loss": 0.4387, "step": 29465 }, { "epoch": 4.810089384106771, "grad_norm": 2.2638556957244873, "learning_rate": 1.6190067231616944e-05, "loss": 0.3989, "step": 29466 }, { "epoch": 4.810252642749275, "grad_norm": 2.2191872596740723, "learning_rate": 1.618981800002522e-05, "loss": 0.4147, "step": 29467 }, { "epoch": 4.81041590139178, "grad_norm": 1.881063461303711, "learning_rate": 1.618956876220035e-05, "loss": 0.357, "step": 29468 }, { "epoch": 4.810579160034284, "grad_norm": 1.9004144668579102, "learning_rate": 1.618931951814258e-05, "loss": 0.386, "step": 29469 }, { "epoch": 4.810742418676789, "grad_norm": 2.376842737197876, "learning_rate": 1.6189070267852172e-05, "loss": 0.4848, "step": 29470 }, { "epoch": 4.810905677319293, "grad_norm": 2.0651328563690186, "learning_rate": 1.6188821011329375e-05, "loss": 0.3547, "step": 29471 }, { "epoch": 4.8110689359617975, "grad_norm": 2.274092435836792, "learning_rate": 1.6188571748574434e-05, "loss": 0.4056, "step": 29472 }, { "epoch": 4.811232194604302, "grad_norm": 1.9778772592544556, "learning_rate": 1.6188322479587604e-05, "loss": 0.3913, "step": 29473 }, { "epoch": 4.811395453246806, "grad_norm": 2.0859036445617676, "learning_rate": 1.6188073204369133e-05, "loss": 0.3403, "step": 29474 }, { "epoch": 4.811558711889311, "grad_norm": 2.0972864627838135, "learning_rate": 1.6187823922919273e-05, "loss": 0.356, "step": 29475 }, { "epoch": 4.811721970531815, "grad_norm": 2.1980388164520264, "learning_rate": 1.6187574635238277e-05, "loss": 0.4056, "step": 29476 }, { "epoch": 4.81188522917432, "grad_norm": 2.1321659088134766, "learning_rate": 1.6187325341326395e-05, "loss": 0.3689, "step": 29477 }, { "epoch": 4.812048487816824, "grad_norm": 1.9290307760238647, "learning_rate": 1.618707604118388e-05, "loss": 0.3746, "step": 29478 }, { "epoch": 4.812211746459328, "grad_norm": 2.0910987854003906, "learning_rate": 1.618682673481098e-05, "loss": 0.4167, "step": 29479 }, { "epoch": 4.812375005101833, "grad_norm": 2.3503382205963135, "learning_rate": 1.6186577422207946e-05, "loss": 0.4404, "step": 29480 }, { "epoch": 4.812538263744337, "grad_norm": 1.9344751834869385, "learning_rate": 1.618632810337503e-05, "loss": 0.3968, "step": 29481 }, { "epoch": 4.812701522386841, "grad_norm": 2.223607063293457, "learning_rate": 1.6186078778312487e-05, "loss": 0.3712, "step": 29482 }, { "epoch": 4.8128647810293455, "grad_norm": 2.2321178913116455, "learning_rate": 1.6185829447020563e-05, "loss": 0.3937, "step": 29483 }, { "epoch": 4.81302803967185, "grad_norm": 2.2770721912384033, "learning_rate": 1.6185580109499505e-05, "loss": 0.394, "step": 29484 }, { "epoch": 4.813191298314354, "grad_norm": 2.3884544372558594, "learning_rate": 1.6185330765749573e-05, "loss": 0.4294, "step": 29485 }, { "epoch": 4.813354556956859, "grad_norm": 2.404209613800049, "learning_rate": 1.6185081415771018e-05, "loss": 0.3529, "step": 29486 }, { "epoch": 4.813517815599363, "grad_norm": 2.3988215923309326, "learning_rate": 1.6184832059564084e-05, "loss": 0.4399, "step": 29487 }, { "epoch": 4.813681074241868, "grad_norm": 2.095766067504883, "learning_rate": 1.618458269712903e-05, "loss": 0.3748, "step": 29488 }, { "epoch": 4.813844332884372, "grad_norm": 1.9664926528930664, "learning_rate": 1.6184333328466096e-05, "loss": 0.3226, "step": 29489 }, { "epoch": 4.8140075915268765, "grad_norm": 1.8077815771102905, "learning_rate": 1.6184083953575543e-05, "loss": 0.3739, "step": 29490 }, { "epoch": 4.814170850169381, "grad_norm": 2.1475799083709717, "learning_rate": 1.618383457245762e-05, "loss": 0.4291, "step": 29491 }, { "epoch": 4.814334108811885, "grad_norm": 2.035935878753662, "learning_rate": 1.6183585185112577e-05, "loss": 0.4016, "step": 29492 }, { "epoch": 4.81449736745439, "grad_norm": 2.2213075160980225, "learning_rate": 1.6183335791540665e-05, "loss": 0.4144, "step": 29493 }, { "epoch": 4.814660626096894, "grad_norm": 2.1613266468048096, "learning_rate": 1.6183086391742136e-05, "loss": 0.3916, "step": 29494 }, { "epoch": 4.814823884739399, "grad_norm": 2.1101431846618652, "learning_rate": 1.6182836985717237e-05, "loss": 0.4509, "step": 29495 }, { "epoch": 4.814987143381903, "grad_norm": 2.22935152053833, "learning_rate": 1.618258757346623e-05, "loss": 0.4179, "step": 29496 }, { "epoch": 4.815150402024408, "grad_norm": 1.867679476737976, "learning_rate": 1.6182338154989353e-05, "loss": 0.3761, "step": 29497 }, { "epoch": 4.815313660666911, "grad_norm": 1.8376669883728027, "learning_rate": 1.6182088730286863e-05, "loss": 0.3738, "step": 29498 }, { "epoch": 4.815476919309416, "grad_norm": 2.168506383895874, "learning_rate": 1.6181839299359016e-05, "loss": 0.4055, "step": 29499 }, { "epoch": 4.81564017795192, "grad_norm": 1.5562704801559448, "learning_rate": 1.6181589862206053e-05, "loss": 0.3262, "step": 29500 }, { "epoch": 4.8158034365944244, "grad_norm": 2.505141496658325, "learning_rate": 1.6181340418828234e-05, "loss": 0.4521, "step": 29501 }, { "epoch": 4.815966695236929, "grad_norm": 2.510735511779785, "learning_rate": 1.6181090969225808e-05, "loss": 0.4063, "step": 29502 }, { "epoch": 4.816129953879433, "grad_norm": 2.703948974609375, "learning_rate": 1.618084151339902e-05, "loss": 0.4059, "step": 29503 }, { "epoch": 4.816293212521938, "grad_norm": 2.0152900218963623, "learning_rate": 1.618059205134813e-05, "loss": 0.3977, "step": 29504 }, { "epoch": 4.816456471164442, "grad_norm": 2.1327755451202393, "learning_rate": 1.6180342583073384e-05, "loss": 0.3898, "step": 29505 }, { "epoch": 4.816619729806947, "grad_norm": 2.3154478073120117, "learning_rate": 1.6180093108575037e-05, "loss": 0.3505, "step": 29506 }, { "epoch": 4.816782988449451, "grad_norm": 2.353598117828369, "learning_rate": 1.6179843627853338e-05, "loss": 0.4875, "step": 29507 }, { "epoch": 4.8169462470919555, "grad_norm": 2.631826639175415, "learning_rate": 1.6179594140908536e-05, "loss": 0.4278, "step": 29508 }, { "epoch": 4.81710950573446, "grad_norm": 2.1313769817352295, "learning_rate": 1.6179344647740884e-05, "loss": 0.4139, "step": 29509 }, { "epoch": 4.817272764376964, "grad_norm": 2.058952808380127, "learning_rate": 1.6179095148350633e-05, "loss": 0.3553, "step": 29510 }, { "epoch": 4.817436023019469, "grad_norm": 1.9884816408157349, "learning_rate": 1.6178845642738038e-05, "loss": 0.3917, "step": 29511 }, { "epoch": 4.817599281661973, "grad_norm": 2.226104259490967, "learning_rate": 1.6178596130903345e-05, "loss": 0.3541, "step": 29512 }, { "epoch": 4.817762540304478, "grad_norm": 1.8426439762115479, "learning_rate": 1.6178346612846806e-05, "loss": 0.3566, "step": 29513 }, { "epoch": 4.817925798946982, "grad_norm": 1.8976167440414429, "learning_rate": 1.6178097088568677e-05, "loss": 0.3606, "step": 29514 }, { "epoch": 4.818089057589486, "grad_norm": 2.212912082672119, "learning_rate": 1.6177847558069204e-05, "loss": 0.3843, "step": 29515 }, { "epoch": 4.818252316231991, "grad_norm": 2.3127052783966064, "learning_rate": 1.617759802134864e-05, "loss": 0.3741, "step": 29516 }, { "epoch": 4.818415574874495, "grad_norm": 2.0923027992248535, "learning_rate": 1.6177348478407234e-05, "loss": 0.3801, "step": 29517 }, { "epoch": 4.818578833516999, "grad_norm": 2.175851583480835, "learning_rate": 1.6177098929245246e-05, "loss": 0.4092, "step": 29518 }, { "epoch": 4.818742092159503, "grad_norm": 2.121300458908081, "learning_rate": 1.6176849373862916e-05, "loss": 0.3892, "step": 29519 }, { "epoch": 4.818905350802008, "grad_norm": 2.3700506687164307, "learning_rate": 1.6176599812260504e-05, "loss": 0.361, "step": 29520 }, { "epoch": 4.819068609444512, "grad_norm": 2.282355308532715, "learning_rate": 1.6176350244438255e-05, "loss": 0.3794, "step": 29521 }, { "epoch": 4.819231868087017, "grad_norm": 1.936100721359253, "learning_rate": 1.6176100670396423e-05, "loss": 0.327, "step": 29522 }, { "epoch": 4.819395126729521, "grad_norm": 2.0787413120269775, "learning_rate": 1.617585109013526e-05, "loss": 0.3496, "step": 29523 }, { "epoch": 4.819558385372026, "grad_norm": 2.0799810886383057, "learning_rate": 1.6175601503655017e-05, "loss": 0.3952, "step": 29524 }, { "epoch": 4.81972164401453, "grad_norm": 2.0045268535614014, "learning_rate": 1.6175351910955942e-05, "loss": 0.3662, "step": 29525 }, { "epoch": 4.8198849026570345, "grad_norm": 2.115752696990967, "learning_rate": 1.6175102312038294e-05, "loss": 0.3448, "step": 29526 }, { "epoch": 4.820048161299539, "grad_norm": 2.133289098739624, "learning_rate": 1.6174852706902315e-05, "loss": 0.4151, "step": 29527 }, { "epoch": 4.820211419942043, "grad_norm": 2.4385082721710205, "learning_rate": 1.617460309554826e-05, "loss": 0.382, "step": 29528 }, { "epoch": 4.820374678584548, "grad_norm": 2.3514363765716553, "learning_rate": 1.6174353477976386e-05, "loss": 0.3769, "step": 29529 }, { "epoch": 4.820537937227052, "grad_norm": 2.484832525253296, "learning_rate": 1.617410385418694e-05, "loss": 0.4329, "step": 29530 }, { "epoch": 4.820701195869557, "grad_norm": 1.8094072341918945, "learning_rate": 1.6173854224180173e-05, "loss": 0.3187, "step": 29531 }, { "epoch": 4.82086445451206, "grad_norm": 1.9094773530960083, "learning_rate": 1.6173604587956334e-05, "loss": 0.3422, "step": 29532 }, { "epoch": 4.821027713154566, "grad_norm": 2.2759716510772705, "learning_rate": 1.6173354945515677e-05, "loss": 0.391, "step": 29533 }, { "epoch": 4.821190971797069, "grad_norm": 2.001962661743164, "learning_rate": 1.617310529685845e-05, "loss": 0.4078, "step": 29534 }, { "epoch": 4.821354230439574, "grad_norm": 2.516170024871826, "learning_rate": 1.617285564198491e-05, "loss": 0.382, "step": 29535 }, { "epoch": 4.821517489082078, "grad_norm": 2.5597243309020996, "learning_rate": 1.617260598089531e-05, "loss": 0.4065, "step": 29536 }, { "epoch": 4.821680747724582, "grad_norm": 2.8753583431243896, "learning_rate": 1.617235631358989e-05, "loss": 0.3625, "step": 29537 }, { "epoch": 4.821844006367087, "grad_norm": 2.001659870147705, "learning_rate": 1.6172106640068917e-05, "loss": 0.3922, "step": 29538 }, { "epoch": 4.822007265009591, "grad_norm": 2.3769278526306152, "learning_rate": 1.617185696033263e-05, "loss": 0.3707, "step": 29539 }, { "epoch": 4.822170523652096, "grad_norm": 2.1230924129486084, "learning_rate": 1.6171607274381285e-05, "loss": 0.4163, "step": 29540 }, { "epoch": 4.8223337822946, "grad_norm": 1.7908881902694702, "learning_rate": 1.6171357582215135e-05, "loss": 0.3074, "step": 29541 }, { "epoch": 4.822497040937105, "grad_norm": 1.979197382926941, "learning_rate": 1.617110788383443e-05, "loss": 0.4199, "step": 29542 }, { "epoch": 4.822660299579609, "grad_norm": 2.2099428176879883, "learning_rate": 1.6170858179239415e-05, "loss": 0.4667, "step": 29543 }, { "epoch": 4.8228235582221135, "grad_norm": 1.930867075920105, "learning_rate": 1.6170608468430354e-05, "loss": 0.3484, "step": 29544 }, { "epoch": 4.822986816864618, "grad_norm": 1.8779906034469604, "learning_rate": 1.617035875140749e-05, "loss": 0.349, "step": 29545 }, { "epoch": 4.823150075507122, "grad_norm": 2.742403984069824, "learning_rate": 1.6170109028171073e-05, "loss": 0.5052, "step": 29546 }, { "epoch": 4.823313334149627, "grad_norm": 2.271591901779175, "learning_rate": 1.6169859298721358e-05, "loss": 0.4096, "step": 29547 }, { "epoch": 4.823476592792131, "grad_norm": 2.0589795112609863, "learning_rate": 1.6169609563058602e-05, "loss": 0.419, "step": 29548 }, { "epoch": 4.823639851434636, "grad_norm": 2.3016881942749023, "learning_rate": 1.616935982118305e-05, "loss": 0.4682, "step": 29549 }, { "epoch": 4.82380311007714, "grad_norm": 2.008146286010742, "learning_rate": 1.616911007309495e-05, "loss": 0.3948, "step": 29550 }, { "epoch": 4.823966368719644, "grad_norm": 2.0633513927459717, "learning_rate": 1.6168860318794562e-05, "loss": 0.3675, "step": 29551 }, { "epoch": 4.824129627362148, "grad_norm": 2.3486037254333496, "learning_rate": 1.616861055828213e-05, "loss": 0.4198, "step": 29552 }, { "epoch": 4.824292886004653, "grad_norm": 2.252781629562378, "learning_rate": 1.616836079155791e-05, "loss": 0.3373, "step": 29553 }, { "epoch": 4.824456144647157, "grad_norm": 1.7088344097137451, "learning_rate": 1.616811101862215e-05, "loss": 0.3706, "step": 29554 }, { "epoch": 4.824619403289661, "grad_norm": 2.193537473678589, "learning_rate": 1.6167861239475108e-05, "loss": 0.3619, "step": 29555 }, { "epoch": 4.824782661932166, "grad_norm": 2.227496862411499, "learning_rate": 1.6167611454117027e-05, "loss": 0.3774, "step": 29556 }, { "epoch": 4.82494592057467, "grad_norm": 1.9371516704559326, "learning_rate": 1.6167361662548165e-05, "loss": 0.3553, "step": 29557 }, { "epoch": 4.825109179217175, "grad_norm": 2.45890736579895, "learning_rate": 1.6167111864768773e-05, "loss": 0.4268, "step": 29558 }, { "epoch": 4.825272437859679, "grad_norm": 2.0971851348876953, "learning_rate": 1.6166862060779102e-05, "loss": 0.3258, "step": 29559 }, { "epoch": 4.825435696502184, "grad_norm": 1.9155765771865845, "learning_rate": 1.6166612250579398e-05, "loss": 0.3598, "step": 29560 }, { "epoch": 4.825598955144688, "grad_norm": 2.2758383750915527, "learning_rate": 1.6166362434169916e-05, "loss": 0.441, "step": 29561 }, { "epoch": 4.8257622137871925, "grad_norm": 2.435356855392456, "learning_rate": 1.6166112611550914e-05, "loss": 0.4047, "step": 29562 }, { "epoch": 4.825925472429697, "grad_norm": 2.1328840255737305, "learning_rate": 1.6165862782722638e-05, "loss": 0.4026, "step": 29563 }, { "epoch": 4.826088731072201, "grad_norm": 1.9201046228408813, "learning_rate": 1.6165612947685336e-05, "loss": 0.4451, "step": 29564 }, { "epoch": 4.826251989714706, "grad_norm": 1.8095144033432007, "learning_rate": 1.6165363106439266e-05, "loss": 0.4187, "step": 29565 }, { "epoch": 4.82641524835721, "grad_norm": 1.9144326448440552, "learning_rate": 1.616511325898468e-05, "loss": 0.3417, "step": 29566 }, { "epoch": 4.826578506999715, "grad_norm": 1.8795998096466064, "learning_rate": 1.616486340532182e-05, "loss": 0.3494, "step": 29567 }, { "epoch": 4.826741765642218, "grad_norm": 2.287914752960205, "learning_rate": 1.6164613545450946e-05, "loss": 0.4597, "step": 29568 }, { "epoch": 4.826905024284724, "grad_norm": 2.4030532836914062, "learning_rate": 1.616436367937231e-05, "loss": 0.4107, "step": 29569 }, { "epoch": 4.827068282927227, "grad_norm": 1.6600435972213745, "learning_rate": 1.616411380708616e-05, "loss": 0.3251, "step": 29570 }, { "epoch": 4.8272315415697316, "grad_norm": 1.9635001420974731, "learning_rate": 1.616386392859275e-05, "loss": 0.3643, "step": 29571 }, { "epoch": 4.827394800212236, "grad_norm": 2.067387580871582, "learning_rate": 1.616361404389233e-05, "loss": 0.337, "step": 29572 }, { "epoch": 4.82755805885474, "grad_norm": 2.0572926998138428, "learning_rate": 1.616336415298515e-05, "loss": 0.3864, "step": 29573 }, { "epoch": 4.827721317497245, "grad_norm": 2.0970773696899414, "learning_rate": 1.6163114255871468e-05, "loss": 0.3905, "step": 29574 }, { "epoch": 4.827884576139749, "grad_norm": 2.1508259773254395, "learning_rate": 1.616286435255153e-05, "loss": 0.3943, "step": 29575 }, { "epoch": 4.828047834782254, "grad_norm": 2.336723566055298, "learning_rate": 1.616261444302559e-05, "loss": 0.4961, "step": 29576 }, { "epoch": 4.828211093424758, "grad_norm": 2.127760410308838, "learning_rate": 1.6162364527293898e-05, "loss": 0.3502, "step": 29577 }, { "epoch": 4.828374352067263, "grad_norm": 2.21097993850708, "learning_rate": 1.6162114605356704e-05, "loss": 0.3594, "step": 29578 }, { "epoch": 4.828537610709767, "grad_norm": 2.106619358062744, "learning_rate": 1.6161864677214266e-05, "loss": 0.3649, "step": 29579 }, { "epoch": 4.8287008693522715, "grad_norm": 2.7774605751037598, "learning_rate": 1.616161474286683e-05, "loss": 0.3893, "step": 29580 }, { "epoch": 4.828864127994776, "grad_norm": 2.254577398300171, "learning_rate": 1.616136480231465e-05, "loss": 0.391, "step": 29581 }, { "epoch": 4.82902738663728, "grad_norm": 1.9831786155700684, "learning_rate": 1.616111485555798e-05, "loss": 0.3195, "step": 29582 }, { "epoch": 4.829190645279785, "grad_norm": 1.7714951038360596, "learning_rate": 1.6160864902597066e-05, "loss": 0.3135, "step": 29583 }, { "epoch": 4.829353903922289, "grad_norm": 2.080721855163574, "learning_rate": 1.6160614943432165e-05, "loss": 0.3984, "step": 29584 }, { "epoch": 4.829517162564793, "grad_norm": 2.3760805130004883, "learning_rate": 1.6160364978063523e-05, "loss": 0.3987, "step": 29585 }, { "epoch": 4.829680421207298, "grad_norm": 2.555417776107788, "learning_rate": 1.61601150064914e-05, "loss": 0.3804, "step": 29586 }, { "epoch": 4.829843679849802, "grad_norm": 1.8119046688079834, "learning_rate": 1.6159865028716035e-05, "loss": 0.352, "step": 29587 }, { "epoch": 4.830006938492306, "grad_norm": 2.113137722015381, "learning_rate": 1.6159615044737693e-05, "loss": 0.341, "step": 29588 }, { "epoch": 4.8301701971348106, "grad_norm": 2.410012722015381, "learning_rate": 1.615936505455662e-05, "loss": 0.3995, "step": 29589 }, { "epoch": 4.830333455777315, "grad_norm": 2.4505269527435303, "learning_rate": 1.6159115058173064e-05, "loss": 0.4323, "step": 29590 }, { "epoch": 4.830496714419819, "grad_norm": 2.27162766456604, "learning_rate": 1.6158865055587286e-05, "loss": 0.4274, "step": 29591 }, { "epoch": 4.830659973062324, "grad_norm": 2.9065680503845215, "learning_rate": 1.615861504679953e-05, "loss": 0.474, "step": 29592 }, { "epoch": 4.830823231704828, "grad_norm": 2.155649185180664, "learning_rate": 1.6158365031810053e-05, "loss": 0.3811, "step": 29593 }, { "epoch": 4.830986490347333, "grad_norm": 2.0946712493896484, "learning_rate": 1.61581150106191e-05, "loss": 0.3752, "step": 29594 }, { "epoch": 4.831149748989837, "grad_norm": 2.9865427017211914, "learning_rate": 1.615786498322693e-05, "loss": 0.488, "step": 29595 }, { "epoch": 4.831313007632342, "grad_norm": 2.211738348007202, "learning_rate": 1.615761494963379e-05, "loss": 0.4152, "step": 29596 }, { "epoch": 4.831476266274846, "grad_norm": 2.758894205093384, "learning_rate": 1.6157364909839936e-05, "loss": 0.4565, "step": 29597 }, { "epoch": 4.8316395249173505, "grad_norm": 2.1521732807159424, "learning_rate": 1.6157114863845613e-05, "loss": 0.3832, "step": 29598 }, { "epoch": 4.831802783559855, "grad_norm": 1.8249781131744385, "learning_rate": 1.615686481165108e-05, "loss": 0.336, "step": 29599 }, { "epoch": 4.831966042202359, "grad_norm": 2.0298409461975098, "learning_rate": 1.6156614753256583e-05, "loss": 0.3611, "step": 29600 }, { "epoch": 4.832129300844864, "grad_norm": 2.276719331741333, "learning_rate": 1.615636468866238e-05, "loss": 0.4517, "step": 29601 }, { "epoch": 4.832292559487368, "grad_norm": 2.0722029209136963, "learning_rate": 1.615611461786872e-05, "loss": 0.3835, "step": 29602 }, { "epoch": 4.832455818129873, "grad_norm": 2.360292673110962, "learning_rate": 1.615586454087585e-05, "loss": 0.3732, "step": 29603 }, { "epoch": 4.832619076772376, "grad_norm": 1.8165349960327148, "learning_rate": 1.6155614457684028e-05, "loss": 0.349, "step": 29604 }, { "epoch": 4.832782335414881, "grad_norm": 1.8915833234786987, "learning_rate": 1.6155364368293503e-05, "loss": 0.3831, "step": 29605 }, { "epoch": 4.832945594057385, "grad_norm": 2.1097488403320312, "learning_rate": 1.615511427270453e-05, "loss": 0.3708, "step": 29606 }, { "epoch": 4.8331088526998895, "grad_norm": 2.0687551498413086, "learning_rate": 1.6154864170917356e-05, "loss": 0.3819, "step": 29607 }, { "epoch": 4.833272111342394, "grad_norm": 2.162271022796631, "learning_rate": 1.6154614062932235e-05, "loss": 0.3678, "step": 29608 }, { "epoch": 4.833435369984898, "grad_norm": 2.5953867435455322, "learning_rate": 1.615436394874942e-05, "loss": 0.4614, "step": 29609 }, { "epoch": 4.833598628627403, "grad_norm": 1.8501774072647095, "learning_rate": 1.6154113828369163e-05, "loss": 0.3807, "step": 29610 }, { "epoch": 4.833761887269907, "grad_norm": 1.8737941980361938, "learning_rate": 1.6153863701791717e-05, "loss": 0.3608, "step": 29611 }, { "epoch": 4.833925145912412, "grad_norm": 1.7942475080490112, "learning_rate": 1.615361356901733e-05, "loss": 0.3451, "step": 29612 }, { "epoch": 4.834088404554916, "grad_norm": 2.5517055988311768, "learning_rate": 1.6153363430046253e-05, "loss": 0.5079, "step": 29613 }, { "epoch": 4.834251663197421, "grad_norm": 2.3291051387786865, "learning_rate": 1.6153113284878744e-05, "loss": 0.3905, "step": 29614 }, { "epoch": 4.834414921839925, "grad_norm": 2.1080288887023926, "learning_rate": 1.615286313351505e-05, "loss": 0.4131, "step": 29615 }, { "epoch": 4.8345781804824295, "grad_norm": 2.096682071685791, "learning_rate": 1.6152612975955426e-05, "loss": 0.4083, "step": 29616 }, { "epoch": 4.834741439124934, "grad_norm": 2.2014365196228027, "learning_rate": 1.615236281220012e-05, "loss": 0.3588, "step": 29617 }, { "epoch": 4.834904697767438, "grad_norm": 2.2141029834747314, "learning_rate": 1.615211264224939e-05, "loss": 0.41, "step": 29618 }, { "epoch": 4.835067956409943, "grad_norm": 2.447949171066284, "learning_rate": 1.6151862466103484e-05, "loss": 0.4124, "step": 29619 }, { "epoch": 4.835231215052447, "grad_norm": 2.2173876762390137, "learning_rate": 1.6151612283762653e-05, "loss": 0.4151, "step": 29620 }, { "epoch": 4.835394473694951, "grad_norm": 2.0109426975250244, "learning_rate": 1.6151362095227146e-05, "loss": 0.4151, "step": 29621 }, { "epoch": 4.835557732337455, "grad_norm": 2.1331369876861572, "learning_rate": 1.6151111900497225e-05, "loss": 0.3745, "step": 29622 }, { "epoch": 4.83572099097996, "grad_norm": 2.0063278675079346, "learning_rate": 1.6150861699573132e-05, "loss": 0.4088, "step": 29623 }, { "epoch": 4.835884249622464, "grad_norm": 2.4431979656219482, "learning_rate": 1.6150611492455124e-05, "loss": 0.4083, "step": 29624 }, { "epoch": 4.8360475082649685, "grad_norm": 1.9821107387542725, "learning_rate": 1.6150361279143452e-05, "loss": 0.3846, "step": 29625 }, { "epoch": 4.836210766907473, "grad_norm": 2.432396173477173, "learning_rate": 1.6150111059638367e-05, "loss": 0.458, "step": 29626 }, { "epoch": 4.836374025549977, "grad_norm": 1.7698508501052856, "learning_rate": 1.6149860833940122e-05, "loss": 0.4181, "step": 29627 }, { "epoch": 4.836537284192482, "grad_norm": 1.8794951438903809, "learning_rate": 1.6149610602048968e-05, "loss": 0.3897, "step": 29628 }, { "epoch": 4.836700542834986, "grad_norm": 1.785304069519043, "learning_rate": 1.614936036396516e-05, "loss": 0.3828, "step": 29629 }, { "epoch": 4.836863801477491, "grad_norm": 2.1798276901245117, "learning_rate": 1.614911011968895e-05, "loss": 0.3784, "step": 29630 }, { "epoch": 4.837027060119995, "grad_norm": 2.2895736694335938, "learning_rate": 1.6148859869220584e-05, "loss": 0.4021, "step": 29631 }, { "epoch": 4.8371903187625, "grad_norm": 2.260291576385498, "learning_rate": 1.6148609612560317e-05, "loss": 0.3916, "step": 29632 }, { "epoch": 4.837353577405004, "grad_norm": 2.0083250999450684, "learning_rate": 1.61483593497084e-05, "loss": 0.3815, "step": 29633 }, { "epoch": 4.8375168360475085, "grad_norm": 1.9321696758270264, "learning_rate": 1.6148109080665092e-05, "loss": 0.3843, "step": 29634 }, { "epoch": 4.837680094690013, "grad_norm": 1.8485345840454102, "learning_rate": 1.614785880543064e-05, "loss": 0.4107, "step": 29635 }, { "epoch": 4.837843353332517, "grad_norm": 2.726905345916748, "learning_rate": 1.614760852400529e-05, "loss": 0.4034, "step": 29636 }, { "epoch": 4.838006611975022, "grad_norm": 1.9070370197296143, "learning_rate": 1.6147358236389307e-05, "loss": 0.316, "step": 29637 }, { "epoch": 4.838169870617525, "grad_norm": 1.596165657043457, "learning_rate": 1.614710794258293e-05, "loss": 0.3318, "step": 29638 }, { "epoch": 4.838333129260031, "grad_norm": 1.6458959579467773, "learning_rate": 1.614685764258642e-05, "loss": 0.2948, "step": 29639 }, { "epoch": 4.838496387902534, "grad_norm": 2.468999147415161, "learning_rate": 1.6146607336400024e-05, "loss": 0.4476, "step": 29640 }, { "epoch": 4.838659646545039, "grad_norm": 2.5919766426086426, "learning_rate": 1.6146357024023997e-05, "loss": 0.4268, "step": 29641 }, { "epoch": 4.838822905187543, "grad_norm": 2.3633840084075928, "learning_rate": 1.6146106705458594e-05, "loss": 0.4177, "step": 29642 }, { "epoch": 4.8389861638300475, "grad_norm": 2.2489609718322754, "learning_rate": 1.6145856380704058e-05, "loss": 0.4125, "step": 29643 }, { "epoch": 4.839149422472552, "grad_norm": 2.187547445297241, "learning_rate": 1.6145606049760644e-05, "loss": 0.4064, "step": 29644 }, { "epoch": 4.839312681115056, "grad_norm": 2.2288806438446045, "learning_rate": 1.6145355712628612e-05, "loss": 0.3514, "step": 29645 }, { "epoch": 4.839475939757561, "grad_norm": 2.6330838203430176, "learning_rate": 1.6145105369308204e-05, "loss": 0.4071, "step": 29646 }, { "epoch": 4.839639198400065, "grad_norm": 1.862252950668335, "learning_rate": 1.6144855019799682e-05, "loss": 0.4031, "step": 29647 }, { "epoch": 4.83980245704257, "grad_norm": 2.3221495151519775, "learning_rate": 1.614460466410329e-05, "loss": 0.3843, "step": 29648 }, { "epoch": 4.839965715685074, "grad_norm": 2.217836856842041, "learning_rate": 1.614435430221928e-05, "loss": 0.3891, "step": 29649 }, { "epoch": 4.840128974327579, "grad_norm": 1.6781951189041138, "learning_rate": 1.614410393414791e-05, "loss": 0.3432, "step": 29650 }, { "epoch": 4.840292232970083, "grad_norm": 1.8430113792419434, "learning_rate": 1.614385355988943e-05, "loss": 0.3743, "step": 29651 }, { "epoch": 4.8404554916125875, "grad_norm": 2.2297356128692627, "learning_rate": 1.6143603179444086e-05, "loss": 0.3757, "step": 29652 }, { "epoch": 4.840618750255092, "grad_norm": 2.316739559173584, "learning_rate": 1.614335279281214e-05, "loss": 0.3976, "step": 29653 }, { "epoch": 4.840782008897596, "grad_norm": 2.365786552429199, "learning_rate": 1.6143102399993837e-05, "loss": 0.4208, "step": 29654 }, { "epoch": 4.8409452675401, "grad_norm": 2.3700013160705566, "learning_rate": 1.6142852000989432e-05, "loss": 0.3843, "step": 29655 }, { "epoch": 4.841108526182605, "grad_norm": 1.806862711906433, "learning_rate": 1.614260159579918e-05, "loss": 0.3598, "step": 29656 }, { "epoch": 4.841271784825109, "grad_norm": 2.084110736846924, "learning_rate": 1.6142351184423327e-05, "loss": 0.3739, "step": 29657 }, { "epoch": 4.841435043467613, "grad_norm": 1.8994499444961548, "learning_rate": 1.6142100766862125e-05, "loss": 0.3809, "step": 29658 }, { "epoch": 4.841598302110118, "grad_norm": 2.510357618331909, "learning_rate": 1.6141850343115834e-05, "loss": 0.3433, "step": 29659 }, { "epoch": 4.841761560752622, "grad_norm": 1.8000469207763672, "learning_rate": 1.61415999131847e-05, "loss": 0.3649, "step": 29660 }, { "epoch": 4.8419248193951265, "grad_norm": 2.41142201423645, "learning_rate": 1.6141349477068974e-05, "loss": 0.3893, "step": 29661 }, { "epoch": 4.842088078037631, "grad_norm": 2.562307596206665, "learning_rate": 1.614109903476891e-05, "loss": 0.406, "step": 29662 }, { "epoch": 4.842251336680135, "grad_norm": 2.087484359741211, "learning_rate": 1.6140848586284765e-05, "loss": 0.3818, "step": 29663 }, { "epoch": 4.84241459532264, "grad_norm": 1.751797080039978, "learning_rate": 1.6140598131616788e-05, "loss": 0.3567, "step": 29664 }, { "epoch": 4.842577853965144, "grad_norm": 1.98617684841156, "learning_rate": 1.6140347670765225e-05, "loss": 0.3632, "step": 29665 }, { "epoch": 4.842741112607649, "grad_norm": 2.7805750370025635, "learning_rate": 1.614009720373034e-05, "loss": 0.4344, "step": 29666 }, { "epoch": 4.842904371250153, "grad_norm": 2.165884256362915, "learning_rate": 1.6139846730512374e-05, "loss": 0.4065, "step": 29667 }, { "epoch": 4.843067629892658, "grad_norm": 2.1210503578186035, "learning_rate": 1.6139596251111584e-05, "loss": 0.3797, "step": 29668 }, { "epoch": 4.843230888535162, "grad_norm": 1.7355785369873047, "learning_rate": 1.613934576552822e-05, "loss": 0.3896, "step": 29669 }, { "epoch": 4.8433941471776665, "grad_norm": 2.185739517211914, "learning_rate": 1.6139095273762545e-05, "loss": 0.385, "step": 29670 }, { "epoch": 4.843557405820171, "grad_norm": 2.365987777709961, "learning_rate": 1.6138844775814796e-05, "loss": 0.4233, "step": 29671 }, { "epoch": 4.843720664462675, "grad_norm": 2.087028980255127, "learning_rate": 1.6138594271685234e-05, "loss": 0.3947, "step": 29672 }, { "epoch": 4.84388392310518, "grad_norm": 2.035200357437134, "learning_rate": 1.613834376137411e-05, "loss": 0.3353, "step": 29673 }, { "epoch": 4.844047181747683, "grad_norm": 2.5363402366638184, "learning_rate": 1.613809324488167e-05, "loss": 0.4674, "step": 29674 }, { "epoch": 4.844210440390188, "grad_norm": 2.0638043880462646, "learning_rate": 1.613784272220818e-05, "loss": 0.4106, "step": 29675 }, { "epoch": 4.844373699032692, "grad_norm": 2.128239154815674, "learning_rate": 1.6137592193353875e-05, "loss": 0.3835, "step": 29676 }, { "epoch": 4.844536957675197, "grad_norm": 1.9076335430145264, "learning_rate": 1.6137341658319022e-05, "loss": 0.3671, "step": 29677 }, { "epoch": 4.844700216317701, "grad_norm": 1.5765454769134521, "learning_rate": 1.613709111710387e-05, "loss": 0.3388, "step": 29678 }, { "epoch": 4.8448634749602055, "grad_norm": 2.0243732929229736, "learning_rate": 1.6136840569708665e-05, "loss": 0.3586, "step": 29679 }, { "epoch": 4.84502673360271, "grad_norm": 2.2081754207611084, "learning_rate": 1.6136590016133663e-05, "loss": 0.3807, "step": 29680 }, { "epoch": 4.845189992245214, "grad_norm": 2.0793566703796387, "learning_rate": 1.613633945637912e-05, "loss": 0.383, "step": 29681 }, { "epoch": 4.845353250887719, "grad_norm": 1.9422142505645752, "learning_rate": 1.613608889044528e-05, "loss": 0.3712, "step": 29682 }, { "epoch": 4.845516509530223, "grad_norm": 2.3125834465026855, "learning_rate": 1.61358383183324e-05, "loss": 0.4287, "step": 29683 }, { "epoch": 4.845679768172728, "grad_norm": 1.8637027740478516, "learning_rate": 1.6135587740040736e-05, "loss": 0.3997, "step": 29684 }, { "epoch": 4.845843026815232, "grad_norm": 2.139719009399414, "learning_rate": 1.6135337155570536e-05, "loss": 0.3935, "step": 29685 }, { "epoch": 4.846006285457737, "grad_norm": 2.1566882133483887, "learning_rate": 1.613508656492205e-05, "loss": 0.4439, "step": 29686 }, { "epoch": 4.846169544100241, "grad_norm": 2.1259586811065674, "learning_rate": 1.6134835968095534e-05, "loss": 0.3591, "step": 29687 }, { "epoch": 4.8463328027427455, "grad_norm": 1.8451303243637085, "learning_rate": 1.6134585365091243e-05, "loss": 0.336, "step": 29688 }, { "epoch": 4.84649606138525, "grad_norm": 2.2864789962768555, "learning_rate": 1.6134334755909426e-05, "loss": 0.4558, "step": 29689 }, { "epoch": 4.846659320027754, "grad_norm": 2.0996527671813965, "learning_rate": 1.6134084140550333e-05, "loss": 0.3431, "step": 29690 }, { "epoch": 4.846822578670258, "grad_norm": 2.1249520778656006, "learning_rate": 1.6133833519014222e-05, "loss": 0.3788, "step": 29691 }, { "epoch": 4.846985837312763, "grad_norm": 1.9516009092330933, "learning_rate": 1.613358289130134e-05, "loss": 0.3867, "step": 29692 }, { "epoch": 4.847149095955267, "grad_norm": 1.930011510848999, "learning_rate": 1.613333225741194e-05, "loss": 0.3954, "step": 29693 }, { "epoch": 4.847312354597771, "grad_norm": 1.9281237125396729, "learning_rate": 1.613308161734628e-05, "loss": 0.3512, "step": 29694 }, { "epoch": 4.847475613240276, "grad_norm": 1.915107250213623, "learning_rate": 1.6132830971104607e-05, "loss": 0.4153, "step": 29695 }, { "epoch": 4.84763887188278, "grad_norm": 2.3424577713012695, "learning_rate": 1.6132580318687175e-05, "loss": 0.4331, "step": 29696 }, { "epoch": 4.8478021305252845, "grad_norm": 2.337373733520508, "learning_rate": 1.6132329660094233e-05, "loss": 0.4122, "step": 29697 }, { "epoch": 4.847965389167789, "grad_norm": 1.8653178215026855, "learning_rate": 1.6132078995326042e-05, "loss": 0.3382, "step": 29698 }, { "epoch": 4.848128647810293, "grad_norm": 2.401200294494629, "learning_rate": 1.6131828324382848e-05, "loss": 0.4756, "step": 29699 }, { "epoch": 4.848291906452798, "grad_norm": 2.425419569015503, "learning_rate": 1.6131577647264903e-05, "loss": 0.4426, "step": 29700 }, { "epoch": 4.848455165095302, "grad_norm": 2.0986273288726807, "learning_rate": 1.613132696397246e-05, "loss": 0.4009, "step": 29701 }, { "epoch": 4.848618423737807, "grad_norm": 2.3271799087524414, "learning_rate": 1.6131076274505773e-05, "loss": 0.4404, "step": 29702 }, { "epoch": 4.848781682380311, "grad_norm": 1.9792135953903198, "learning_rate": 1.6130825578865094e-05, "loss": 0.4113, "step": 29703 }, { "epoch": 4.848944941022816, "grad_norm": 2.4625134468078613, "learning_rate": 1.6130574877050677e-05, "loss": 0.432, "step": 29704 }, { "epoch": 4.84910819966532, "grad_norm": 1.9964812994003296, "learning_rate": 1.6130324169062768e-05, "loss": 0.3774, "step": 29705 }, { "epoch": 4.8492714583078245, "grad_norm": 2.0238919258117676, "learning_rate": 1.613007345490163e-05, "loss": 0.3287, "step": 29706 }, { "epoch": 4.849434716950329, "grad_norm": 2.2317402362823486, "learning_rate": 1.6129822734567503e-05, "loss": 0.416, "step": 29707 }, { "epoch": 4.8495979755928325, "grad_norm": 2.335665464401245, "learning_rate": 1.612957200806065e-05, "loss": 0.3971, "step": 29708 }, { "epoch": 4.849761234235338, "grad_norm": 2.131220817565918, "learning_rate": 1.6129321275381323e-05, "loss": 0.362, "step": 29709 }, { "epoch": 4.849924492877841, "grad_norm": 2.1450133323669434, "learning_rate": 1.6129070536529767e-05, "loss": 0.3892, "step": 29710 }, { "epoch": 4.850087751520346, "grad_norm": 2.227627992630005, "learning_rate": 1.612881979150624e-05, "loss": 0.3898, "step": 29711 }, { "epoch": 4.85025101016285, "grad_norm": 1.9164354801177979, "learning_rate": 1.6128569040310994e-05, "loss": 0.3457, "step": 29712 }, { "epoch": 4.850414268805355, "grad_norm": 2.3070363998413086, "learning_rate": 1.6128318282944277e-05, "loss": 0.4642, "step": 29713 }, { "epoch": 4.850577527447859, "grad_norm": 2.7051799297332764, "learning_rate": 1.6128067519406348e-05, "loss": 0.4328, "step": 29714 }, { "epoch": 4.8507407860903635, "grad_norm": 2.2527127265930176, "learning_rate": 1.6127816749697456e-05, "loss": 0.387, "step": 29715 }, { "epoch": 4.850904044732868, "grad_norm": 2.0663623809814453, "learning_rate": 1.612756597381785e-05, "loss": 0.3771, "step": 29716 }, { "epoch": 4.851067303375372, "grad_norm": 2.1421456336975098, "learning_rate": 1.6127315191767793e-05, "loss": 0.3589, "step": 29717 }, { "epoch": 4.851230562017877, "grad_norm": 2.366401195526123, "learning_rate": 1.612706440354753e-05, "loss": 0.4771, "step": 29718 }, { "epoch": 4.851393820660381, "grad_norm": 2.4760854244232178, "learning_rate": 1.6126813609157315e-05, "loss": 0.4417, "step": 29719 }, { "epoch": 4.851557079302886, "grad_norm": 2.3639309406280518, "learning_rate": 1.61265628085974e-05, "loss": 0.4113, "step": 29720 }, { "epoch": 4.85172033794539, "grad_norm": 2.7000961303710938, "learning_rate": 1.6126312001868034e-05, "loss": 0.4889, "step": 29721 }, { "epoch": 4.851883596587895, "grad_norm": 2.063366174697876, "learning_rate": 1.6126061188969474e-05, "loss": 0.4452, "step": 29722 }, { "epoch": 4.852046855230399, "grad_norm": 2.0008509159088135, "learning_rate": 1.6125810369901976e-05, "loss": 0.3723, "step": 29723 }, { "epoch": 4.8522101138729035, "grad_norm": 2.137291193008423, "learning_rate": 1.6125559544665788e-05, "loss": 0.3936, "step": 29724 }, { "epoch": 4.852373372515408, "grad_norm": 2.3691117763519287, "learning_rate": 1.612530871326116e-05, "loss": 0.3858, "step": 29725 }, { "epoch": 4.852536631157912, "grad_norm": 1.6752160787582397, "learning_rate": 1.612505787568835e-05, "loss": 0.3356, "step": 29726 }, { "epoch": 4.852699889800416, "grad_norm": 2.0534846782684326, "learning_rate": 1.612480703194761e-05, "loss": 0.3344, "step": 29727 }, { "epoch": 4.85286314844292, "grad_norm": 2.265357494354248, "learning_rate": 1.6124556182039186e-05, "loss": 0.425, "step": 29728 }, { "epoch": 4.853026407085425, "grad_norm": 2.3270602226257324, "learning_rate": 1.612430532596334e-05, "loss": 0.3923, "step": 29729 }, { "epoch": 4.853189665727929, "grad_norm": 2.340442657470703, "learning_rate": 1.612405446372032e-05, "loss": 0.3925, "step": 29730 }, { "epoch": 4.853352924370434, "grad_norm": 2.2840652465820312, "learning_rate": 1.612380359531038e-05, "loss": 0.4677, "step": 29731 }, { "epoch": 4.853516183012938, "grad_norm": 2.2912192344665527, "learning_rate": 1.6123552720733767e-05, "loss": 0.388, "step": 29732 }, { "epoch": 4.8536794416554425, "grad_norm": 2.305111885070801, "learning_rate": 1.612330183999074e-05, "loss": 0.4573, "step": 29733 }, { "epoch": 4.853842700297947, "grad_norm": 1.9828908443450928, "learning_rate": 1.6123050953081548e-05, "loss": 0.3859, "step": 29734 }, { "epoch": 4.854005958940451, "grad_norm": 2.4202730655670166, "learning_rate": 1.6122800060006447e-05, "loss": 0.4115, "step": 29735 }, { "epoch": 4.854169217582956, "grad_norm": 3.1967759132385254, "learning_rate": 1.612254916076569e-05, "loss": 0.4055, "step": 29736 }, { "epoch": 4.85433247622546, "grad_norm": 1.9700695276260376, "learning_rate": 1.6122298255359523e-05, "loss": 0.3376, "step": 29737 }, { "epoch": 4.854495734867965, "grad_norm": 2.1704583168029785, "learning_rate": 1.6122047343788208e-05, "loss": 0.3408, "step": 29738 }, { "epoch": 4.854658993510469, "grad_norm": 2.1103856563568115, "learning_rate": 1.6121796426051993e-05, "loss": 0.3428, "step": 29739 }, { "epoch": 4.854822252152974, "grad_norm": 2.020256757736206, "learning_rate": 1.6121545502151127e-05, "loss": 0.391, "step": 29740 }, { "epoch": 4.854985510795478, "grad_norm": 2.1741933822631836, "learning_rate": 1.6121294572085867e-05, "loss": 0.3923, "step": 29741 }, { "epoch": 4.8551487694379825, "grad_norm": 2.232123851776123, "learning_rate": 1.6121043635856468e-05, "loss": 0.4109, "step": 29742 }, { "epoch": 4.855312028080487, "grad_norm": 2.3967816829681396, "learning_rate": 1.6120792693463175e-05, "loss": 0.4618, "step": 29743 }, { "epoch": 4.8554752867229904, "grad_norm": 2.2758123874664307, "learning_rate": 1.6120541744906247e-05, "loss": 0.3911, "step": 29744 }, { "epoch": 4.855638545365496, "grad_norm": 1.9833784103393555, "learning_rate": 1.612029079018594e-05, "loss": 0.3474, "step": 29745 }, { "epoch": 4.855801804007999, "grad_norm": 2.172852039337158, "learning_rate": 1.6120039829302493e-05, "loss": 0.4015, "step": 29746 }, { "epoch": 4.855965062650504, "grad_norm": 2.3112690448760986, "learning_rate": 1.611978886225617e-05, "loss": 0.4263, "step": 29747 }, { "epoch": 4.856128321293008, "grad_norm": 3.1804025173187256, "learning_rate": 1.6119537889047226e-05, "loss": 0.4232, "step": 29748 }, { "epoch": 4.856291579935513, "grad_norm": 2.098452568054199, "learning_rate": 1.6119286909675908e-05, "loss": 0.3853, "step": 29749 }, { "epoch": 4.856454838578017, "grad_norm": 2.626497983932495, "learning_rate": 1.6119035924142468e-05, "loss": 0.4656, "step": 29750 }, { "epoch": 4.8566180972205215, "grad_norm": 2.349609613418579, "learning_rate": 1.611878493244716e-05, "loss": 0.3952, "step": 29751 }, { "epoch": 4.856781355863026, "grad_norm": 2.51129412651062, "learning_rate": 1.6118533934590237e-05, "loss": 0.4499, "step": 29752 }, { "epoch": 4.85694461450553, "grad_norm": 2.128103017807007, "learning_rate": 1.6118282930571953e-05, "loss": 0.352, "step": 29753 }, { "epoch": 4.857107873148035, "grad_norm": 2.4476025104522705, "learning_rate": 1.611803192039256e-05, "loss": 0.401, "step": 29754 }, { "epoch": 4.857271131790539, "grad_norm": 2.216203212738037, "learning_rate": 1.611778090405231e-05, "loss": 0.4396, "step": 29755 }, { "epoch": 4.857434390433044, "grad_norm": 2.1168792247772217, "learning_rate": 1.6117529881551456e-05, "loss": 0.3568, "step": 29756 }, { "epoch": 4.857597649075548, "grad_norm": 1.9745471477508545, "learning_rate": 1.611727885289025e-05, "loss": 0.3641, "step": 29757 }, { "epoch": 4.857760907718053, "grad_norm": 2.106295585632324, "learning_rate": 1.6117027818068946e-05, "loss": 0.4009, "step": 29758 }, { "epoch": 4.857924166360557, "grad_norm": 1.9052400588989258, "learning_rate": 1.61167767770878e-05, "loss": 0.3585, "step": 29759 }, { "epoch": 4.8580874250030615, "grad_norm": 2.1497232913970947, "learning_rate": 1.611652572994706e-05, "loss": 0.3833, "step": 29760 }, { "epoch": 4.858250683645565, "grad_norm": 2.109450340270996, "learning_rate": 1.6116274676646977e-05, "loss": 0.3967, "step": 29761 }, { "epoch": 4.85841394228807, "grad_norm": 2.7866060733795166, "learning_rate": 1.6116023617187812e-05, "loss": 0.4067, "step": 29762 }, { "epoch": 4.858577200930574, "grad_norm": 2.3612420558929443, "learning_rate": 1.611577255156981e-05, "loss": 0.4367, "step": 29763 }, { "epoch": 4.858740459573078, "grad_norm": 2.26198148727417, "learning_rate": 1.6115521479793223e-05, "loss": 0.4632, "step": 29764 }, { "epoch": 4.858903718215583, "grad_norm": 1.9831935167312622, "learning_rate": 1.6115270401858315e-05, "loss": 0.3679, "step": 29765 }, { "epoch": 4.859066976858087, "grad_norm": 2.048370122909546, "learning_rate": 1.6115019317765324e-05, "loss": 0.3354, "step": 29766 }, { "epoch": 4.859230235500592, "grad_norm": 2.558209180831909, "learning_rate": 1.6114768227514516e-05, "loss": 0.4146, "step": 29767 }, { "epoch": 4.859393494143096, "grad_norm": 1.8829373121261597, "learning_rate": 1.6114517131106134e-05, "loss": 0.3471, "step": 29768 }, { "epoch": 4.8595567527856005, "grad_norm": 2.198925018310547, "learning_rate": 1.611426602854044e-05, "loss": 0.4673, "step": 29769 }, { "epoch": 4.859720011428105, "grad_norm": 1.9705218076705933, "learning_rate": 1.611401491981768e-05, "loss": 0.3787, "step": 29770 }, { "epoch": 4.859883270070609, "grad_norm": 1.7877899408340454, "learning_rate": 1.6113763804938104e-05, "loss": 0.3493, "step": 29771 }, { "epoch": 4.860046528713114, "grad_norm": 2.3435966968536377, "learning_rate": 1.6113512683901974e-05, "loss": 0.4429, "step": 29772 }, { "epoch": 4.860209787355618, "grad_norm": 2.065561532974243, "learning_rate": 1.6113261556709535e-05, "loss": 0.3429, "step": 29773 }, { "epoch": 4.860373045998123, "grad_norm": 2.2782955169677734, "learning_rate": 1.6113010423361047e-05, "loss": 0.4717, "step": 29774 }, { "epoch": 4.860536304640627, "grad_norm": 2.414543628692627, "learning_rate": 1.6112759283856757e-05, "loss": 0.4316, "step": 29775 }, { "epoch": 4.860699563283132, "grad_norm": 2.193963050842285, "learning_rate": 1.611250813819692e-05, "loss": 0.4005, "step": 29776 }, { "epoch": 4.860862821925636, "grad_norm": 2.2983553409576416, "learning_rate": 1.6112256986381788e-05, "loss": 0.4568, "step": 29777 }, { "epoch": 4.8610260805681405, "grad_norm": 2.1141085624694824, "learning_rate": 1.6112005828411617e-05, "loss": 0.3406, "step": 29778 }, { "epoch": 4.861189339210645, "grad_norm": 1.9194059371948242, "learning_rate": 1.6111754664286654e-05, "loss": 0.3437, "step": 29779 }, { "epoch": 4.861352597853148, "grad_norm": 2.211517572402954, "learning_rate": 1.611150349400716e-05, "loss": 0.3758, "step": 29780 }, { "epoch": 4.861515856495653, "grad_norm": 1.851090431213379, "learning_rate": 1.6111252317573382e-05, "loss": 0.3782, "step": 29781 }, { "epoch": 4.861679115138157, "grad_norm": 1.640182375907898, "learning_rate": 1.6111001134985574e-05, "loss": 0.3533, "step": 29782 }, { "epoch": 4.861842373780662, "grad_norm": 2.318915843963623, "learning_rate": 1.6110749946243987e-05, "loss": 0.3846, "step": 29783 }, { "epoch": 4.862005632423166, "grad_norm": 2.120682954788208, "learning_rate": 1.6110498751348882e-05, "loss": 0.3847, "step": 29784 }, { "epoch": 4.862168891065671, "grad_norm": 1.9380921125411987, "learning_rate": 1.6110247550300503e-05, "loss": 0.3692, "step": 29785 }, { "epoch": 4.862332149708175, "grad_norm": 1.7417045831680298, "learning_rate": 1.6109996343099107e-05, "loss": 0.3245, "step": 29786 }, { "epoch": 4.8624954083506795, "grad_norm": 2.2295446395874023, "learning_rate": 1.610974512974495e-05, "loss": 0.3787, "step": 29787 }, { "epoch": 4.862658666993184, "grad_norm": 1.7242523431777954, "learning_rate": 1.6109493910238274e-05, "loss": 0.3011, "step": 29788 }, { "epoch": 4.862821925635688, "grad_norm": 1.9228249788284302, "learning_rate": 1.6109242684579346e-05, "loss": 0.3562, "step": 29789 }, { "epoch": 4.862985184278193, "grad_norm": 1.6685758829116821, "learning_rate": 1.6108991452768407e-05, "loss": 0.3263, "step": 29790 }, { "epoch": 4.863148442920697, "grad_norm": 2.713674306869507, "learning_rate": 1.610874021480572e-05, "loss": 0.4115, "step": 29791 }, { "epoch": 4.863311701563202, "grad_norm": 1.9530041217803955, "learning_rate": 1.610848897069153e-05, "loss": 0.4412, "step": 29792 }, { "epoch": 4.863474960205706, "grad_norm": 2.08917498588562, "learning_rate": 1.6108237720426093e-05, "loss": 0.3937, "step": 29793 }, { "epoch": 4.863638218848211, "grad_norm": 1.7932888269424438, "learning_rate": 1.610798646400966e-05, "loss": 0.3674, "step": 29794 }, { "epoch": 4.863801477490715, "grad_norm": 2.096470355987549, "learning_rate": 1.610773520144249e-05, "loss": 0.3525, "step": 29795 }, { "epoch": 4.8639647361332194, "grad_norm": 2.6978416442871094, "learning_rate": 1.6107483932724832e-05, "loss": 0.3878, "step": 29796 }, { "epoch": 4.864127994775723, "grad_norm": 1.8690956830978394, "learning_rate": 1.610723265785694e-05, "loss": 0.3415, "step": 29797 }, { "epoch": 4.864291253418228, "grad_norm": 1.980162501335144, "learning_rate": 1.6106981376839064e-05, "loss": 0.3665, "step": 29798 }, { "epoch": 4.864454512060732, "grad_norm": 1.8761507272720337, "learning_rate": 1.6106730089671463e-05, "loss": 0.3853, "step": 29799 }, { "epoch": 4.864617770703236, "grad_norm": 2.1955549716949463, "learning_rate": 1.6106478796354382e-05, "loss": 0.3381, "step": 29800 }, { "epoch": 4.864781029345741, "grad_norm": 2.141862630844116, "learning_rate": 1.6106227496888083e-05, "loss": 0.3602, "step": 29801 }, { "epoch": 4.864944287988245, "grad_norm": 1.5472923517227173, "learning_rate": 1.610597619127281e-05, "loss": 0.2835, "step": 29802 }, { "epoch": 4.86510754663075, "grad_norm": 2.07393217086792, "learning_rate": 1.6105724879508824e-05, "loss": 0.3809, "step": 29803 }, { "epoch": 4.865270805273254, "grad_norm": 1.8943818807601929, "learning_rate": 1.6105473561596373e-05, "loss": 0.3323, "step": 29804 }, { "epoch": 4.8654340639157585, "grad_norm": 2.0615386962890625, "learning_rate": 1.610522223753571e-05, "loss": 0.308, "step": 29805 }, { "epoch": 4.865597322558263, "grad_norm": 1.9144175052642822, "learning_rate": 1.6104970907327094e-05, "loss": 0.3519, "step": 29806 }, { "epoch": 4.865760581200767, "grad_norm": 2.6791985034942627, "learning_rate": 1.610471957097077e-05, "loss": 0.4368, "step": 29807 }, { "epoch": 4.865923839843272, "grad_norm": 2.2765676975250244, "learning_rate": 1.6104468228467e-05, "loss": 0.344, "step": 29808 }, { "epoch": 4.866087098485776, "grad_norm": 2.504667043685913, "learning_rate": 1.6104216879816027e-05, "loss": 0.4164, "step": 29809 }, { "epoch": 4.866250357128281, "grad_norm": 1.8355690240859985, "learning_rate": 1.6103965525018113e-05, "loss": 0.3991, "step": 29810 }, { "epoch": 4.866413615770785, "grad_norm": 2.1215100288391113, "learning_rate": 1.6103714164073502e-05, "loss": 0.406, "step": 29811 }, { "epoch": 4.86657687441329, "grad_norm": 2.6108438968658447, "learning_rate": 1.610346279698246e-05, "loss": 0.4642, "step": 29812 }, { "epoch": 4.866740133055794, "grad_norm": 2.249621868133545, "learning_rate": 1.6103211423745225e-05, "loss": 0.4432, "step": 29813 }, { "epoch": 4.8669033916982976, "grad_norm": 2.0806212425231934, "learning_rate": 1.6102960044362062e-05, "loss": 0.3633, "step": 29814 }, { "epoch": 4.867066650340803, "grad_norm": 3.0988082885742188, "learning_rate": 1.610270865883322e-05, "loss": 0.5188, "step": 29815 }, { "epoch": 4.867229908983306, "grad_norm": 2.0143797397613525, "learning_rate": 1.610245726715895e-05, "loss": 0.3702, "step": 29816 }, { "epoch": 4.867393167625811, "grad_norm": 1.8793225288391113, "learning_rate": 1.610220586933951e-05, "loss": 0.3786, "step": 29817 }, { "epoch": 4.867556426268315, "grad_norm": 2.289119005203247, "learning_rate": 1.6101954465375148e-05, "loss": 0.3809, "step": 29818 }, { "epoch": 4.86771968491082, "grad_norm": 2.4182424545288086, "learning_rate": 1.6101703055266122e-05, "loss": 0.4143, "step": 29819 }, { "epoch": 4.867882943553324, "grad_norm": 2.2722864151000977, "learning_rate": 1.610145163901268e-05, "loss": 0.379, "step": 29820 }, { "epoch": 4.868046202195829, "grad_norm": 1.9852917194366455, "learning_rate": 1.6101200216615077e-05, "loss": 0.3601, "step": 29821 }, { "epoch": 4.868209460838333, "grad_norm": 2.145930767059326, "learning_rate": 1.6100948788073567e-05, "loss": 0.3803, "step": 29822 }, { "epoch": 4.8683727194808375, "grad_norm": 2.3913955688476562, "learning_rate": 1.6100697353388405e-05, "loss": 0.4126, "step": 29823 }, { "epoch": 4.868535978123342, "grad_norm": 1.9733250141143799, "learning_rate": 1.6100445912559842e-05, "loss": 0.4079, "step": 29824 }, { "epoch": 4.868699236765846, "grad_norm": 1.9055007696151733, "learning_rate": 1.610019446558813e-05, "loss": 0.3558, "step": 29825 }, { "epoch": 4.868862495408351, "grad_norm": 2.3794188499450684, "learning_rate": 1.6099943012473528e-05, "loss": 0.4169, "step": 29826 }, { "epoch": 4.869025754050855, "grad_norm": 1.956074833869934, "learning_rate": 1.6099691553216283e-05, "loss": 0.3445, "step": 29827 }, { "epoch": 4.86918901269336, "grad_norm": 2.222757577896118, "learning_rate": 1.609944008781665e-05, "loss": 0.3974, "step": 29828 }, { "epoch": 4.869352271335864, "grad_norm": 2.5350303649902344, "learning_rate": 1.609918861627488e-05, "loss": 0.3659, "step": 29829 }, { "epoch": 4.869515529978369, "grad_norm": 1.853729009628296, "learning_rate": 1.609893713859123e-05, "loss": 0.3632, "step": 29830 }, { "epoch": 4.869678788620873, "grad_norm": 2.2064619064331055, "learning_rate": 1.6098685654765956e-05, "loss": 0.3683, "step": 29831 }, { "epoch": 4.869842047263377, "grad_norm": 2.1738998889923096, "learning_rate": 1.6098434164799304e-05, "loss": 0.4221, "step": 29832 }, { "epoch": 4.870005305905881, "grad_norm": 1.9636247158050537, "learning_rate": 1.6098182668691528e-05, "loss": 0.3091, "step": 29833 }, { "epoch": 4.870168564548385, "grad_norm": 2.9474008083343506, "learning_rate": 1.6097931166442888e-05, "loss": 0.5225, "step": 29834 }, { "epoch": 4.87033182319089, "grad_norm": 2.394331932067871, "learning_rate": 1.6097679658053628e-05, "loss": 0.4209, "step": 29835 }, { "epoch": 4.870495081833394, "grad_norm": 2.1137335300445557, "learning_rate": 1.609742814352401e-05, "loss": 0.3861, "step": 29836 }, { "epoch": 4.870658340475899, "grad_norm": 2.577829599380493, "learning_rate": 1.6097176622854283e-05, "loss": 0.4092, "step": 29837 }, { "epoch": 4.870821599118403, "grad_norm": 2.561253786087036, "learning_rate": 1.6096925096044703e-05, "loss": 0.5068, "step": 29838 }, { "epoch": 4.870984857760908, "grad_norm": 1.465737223625183, "learning_rate": 1.6096673563095517e-05, "loss": 0.2857, "step": 29839 }, { "epoch": 4.871148116403412, "grad_norm": 2.5251922607421875, "learning_rate": 1.609642202400698e-05, "loss": 0.4092, "step": 29840 }, { "epoch": 4.8713113750459165, "grad_norm": 2.2676796913146973, "learning_rate": 1.6096170478779354e-05, "loss": 0.4703, "step": 29841 }, { "epoch": 4.871474633688421, "grad_norm": 2.0771307945251465, "learning_rate": 1.6095918927412883e-05, "loss": 0.3958, "step": 29842 }, { "epoch": 4.871637892330925, "grad_norm": 2.0318520069122314, "learning_rate": 1.6095667369907825e-05, "loss": 0.4086, "step": 29843 }, { "epoch": 4.87180115097343, "grad_norm": 2.5107080936431885, "learning_rate": 1.6095415806264428e-05, "loss": 0.4464, "step": 29844 }, { "epoch": 4.871964409615934, "grad_norm": 1.8695622682571411, "learning_rate": 1.6095164236482952e-05, "loss": 0.3633, "step": 29845 }, { "epoch": 4.872127668258439, "grad_norm": 2.1962406635284424, "learning_rate": 1.6094912660563645e-05, "loss": 0.4385, "step": 29846 }, { "epoch": 4.872290926900943, "grad_norm": 2.6043174266815186, "learning_rate": 1.6094661078506764e-05, "loss": 0.4607, "step": 29847 }, { "epoch": 4.872454185543448, "grad_norm": 1.7469922304153442, "learning_rate": 1.609440949031256e-05, "loss": 0.3652, "step": 29848 }, { "epoch": 4.872617444185952, "grad_norm": 2.421335458755493, "learning_rate": 1.6094157895981292e-05, "loss": 0.4319, "step": 29849 }, { "epoch": 4.8727807028284555, "grad_norm": 2.8898422718048096, "learning_rate": 1.6093906295513202e-05, "loss": 0.4135, "step": 29850 }, { "epoch": 4.87294396147096, "grad_norm": 1.9133682250976562, "learning_rate": 1.6093654688908554e-05, "loss": 0.3619, "step": 29851 }, { "epoch": 4.873107220113464, "grad_norm": 2.4778661727905273, "learning_rate": 1.6093403076167594e-05, "loss": 0.4361, "step": 29852 }, { "epoch": 4.873270478755969, "grad_norm": 2.211578607559204, "learning_rate": 1.609315145729058e-05, "loss": 0.3674, "step": 29853 }, { "epoch": 4.873433737398473, "grad_norm": 2.149550676345825, "learning_rate": 1.6092899832277766e-05, "loss": 0.3731, "step": 29854 }, { "epoch": 4.873596996040978, "grad_norm": 1.9095160961151123, "learning_rate": 1.60926482011294e-05, "loss": 0.3641, "step": 29855 }, { "epoch": 4.873760254683482, "grad_norm": 2.0311717987060547, "learning_rate": 1.609239656384574e-05, "loss": 0.3807, "step": 29856 }, { "epoch": 4.873923513325987, "grad_norm": 2.3511886596679688, "learning_rate": 1.6092144920427043e-05, "loss": 0.424, "step": 29857 }, { "epoch": 4.874086771968491, "grad_norm": 2.0786514282226562, "learning_rate": 1.6091893270873552e-05, "loss": 0.403, "step": 29858 }, { "epoch": 4.8742500306109955, "grad_norm": 2.202997922897339, "learning_rate": 1.609164161518553e-05, "loss": 0.4056, "step": 29859 }, { "epoch": 4.8744132892535, "grad_norm": 1.7753846645355225, "learning_rate": 1.6091389953363226e-05, "loss": 0.3828, "step": 29860 }, { "epoch": 4.874576547896004, "grad_norm": 2.199415922164917, "learning_rate": 1.609113828540689e-05, "loss": 0.3568, "step": 29861 }, { "epoch": 4.874739806538509, "grad_norm": 2.075329542160034, "learning_rate": 1.6090886611316784e-05, "loss": 0.4172, "step": 29862 }, { "epoch": 4.874903065181013, "grad_norm": 2.221712827682495, "learning_rate": 1.6090634931093152e-05, "loss": 0.417, "step": 29863 }, { "epoch": 4.875066323823518, "grad_norm": 2.2894928455352783, "learning_rate": 1.6090383244736256e-05, "loss": 0.3904, "step": 29864 }, { "epoch": 4.875229582466022, "grad_norm": 1.990820288658142, "learning_rate": 1.6090131552246347e-05, "loss": 0.3754, "step": 29865 }, { "epoch": 4.8753928411085266, "grad_norm": 1.8530783653259277, "learning_rate": 1.6089879853623672e-05, "loss": 0.3179, "step": 29866 }, { "epoch": 4.87555609975103, "grad_norm": 2.1998636722564697, "learning_rate": 1.6089628148868493e-05, "loss": 0.412, "step": 29867 }, { "epoch": 4.875719358393535, "grad_norm": 2.036998748779297, "learning_rate": 1.6089376437981062e-05, "loss": 0.3231, "step": 29868 }, { "epoch": 4.875882617036039, "grad_norm": 2.342862606048584, "learning_rate": 1.6089124720961628e-05, "loss": 0.4203, "step": 29869 }, { "epoch": 4.876045875678543, "grad_norm": 1.9677231311798096, "learning_rate": 1.6088872997810444e-05, "loss": 0.4457, "step": 29870 }, { "epoch": 4.876209134321048, "grad_norm": 2.176732301712036, "learning_rate": 1.6088621268527772e-05, "loss": 0.3661, "step": 29871 }, { "epoch": 4.876372392963552, "grad_norm": 2.246945381164551, "learning_rate": 1.6088369533113855e-05, "loss": 0.4474, "step": 29872 }, { "epoch": 4.876535651606057, "grad_norm": 2.314472198486328, "learning_rate": 1.6088117791568955e-05, "loss": 0.4582, "step": 29873 }, { "epoch": 4.876698910248561, "grad_norm": 1.8192377090454102, "learning_rate": 1.608786604389332e-05, "loss": 0.4219, "step": 29874 }, { "epoch": 4.876862168891066, "grad_norm": 1.981610655784607, "learning_rate": 1.608761429008721e-05, "loss": 0.3624, "step": 29875 }, { "epoch": 4.87702542753357, "grad_norm": 2.277121067047119, "learning_rate": 1.6087362530150868e-05, "loss": 0.4522, "step": 29876 }, { "epoch": 4.8771886861760745, "grad_norm": 2.087947368621826, "learning_rate": 1.6087110764084557e-05, "loss": 0.3847, "step": 29877 }, { "epoch": 4.877351944818579, "grad_norm": 2.455976724624634, "learning_rate": 1.608685899188853e-05, "loss": 0.3953, "step": 29878 }, { "epoch": 4.877515203461083, "grad_norm": 2.125432252883911, "learning_rate": 1.608660721356303e-05, "loss": 0.3916, "step": 29879 }, { "epoch": 4.877678462103588, "grad_norm": 2.0084352493286133, "learning_rate": 1.608635542910832e-05, "loss": 0.3738, "step": 29880 }, { "epoch": 4.877841720746092, "grad_norm": 2.068071126937866, "learning_rate": 1.6086103638524654e-05, "loss": 0.4093, "step": 29881 }, { "epoch": 4.878004979388597, "grad_norm": 1.686867117881775, "learning_rate": 1.6085851841812284e-05, "loss": 0.34, "step": 29882 }, { "epoch": 4.878168238031101, "grad_norm": 2.167732000350952, "learning_rate": 1.608560003897146e-05, "loss": 0.3867, "step": 29883 }, { "epoch": 4.878331496673605, "grad_norm": 2.126746416091919, "learning_rate": 1.6085348230002443e-05, "loss": 0.4341, "step": 29884 }, { "epoch": 4.87849475531611, "grad_norm": 2.0388131141662598, "learning_rate": 1.6085096414905478e-05, "loss": 0.3817, "step": 29885 }, { "epoch": 4.8786580139586135, "grad_norm": 1.897936463356018, "learning_rate": 1.608484459368082e-05, "loss": 0.3755, "step": 29886 }, { "epoch": 4.878821272601118, "grad_norm": 2.724658489227295, "learning_rate": 1.6084592766328732e-05, "loss": 0.4538, "step": 29887 }, { "epoch": 4.878984531243622, "grad_norm": 2.123779773712158, "learning_rate": 1.608434093284946e-05, "loss": 0.3997, "step": 29888 }, { "epoch": 4.879147789886127, "grad_norm": 2.251877546310425, "learning_rate": 1.6084089093243254e-05, "loss": 0.3891, "step": 29889 }, { "epoch": 4.879311048528631, "grad_norm": 1.9963642358779907, "learning_rate": 1.6083837247510374e-05, "loss": 0.3781, "step": 29890 }, { "epoch": 4.879474307171136, "grad_norm": 2.4093236923217773, "learning_rate": 1.608358539565107e-05, "loss": 0.4347, "step": 29891 }, { "epoch": 4.87963756581364, "grad_norm": 2.169454336166382, "learning_rate": 1.6083333537665597e-05, "loss": 0.4537, "step": 29892 }, { "epoch": 4.879800824456145, "grad_norm": 2.1063222885131836, "learning_rate": 1.608308167355421e-05, "loss": 0.3967, "step": 29893 }, { "epoch": 4.879964083098649, "grad_norm": 2.5792620182037354, "learning_rate": 1.6082829803317164e-05, "loss": 0.4585, "step": 29894 }, { "epoch": 4.8801273417411535, "grad_norm": 2.5607430934906006, "learning_rate": 1.6082577926954705e-05, "loss": 0.4327, "step": 29895 }, { "epoch": 4.880290600383658, "grad_norm": 1.8615704774856567, "learning_rate": 1.6082326044467093e-05, "loss": 0.3861, "step": 29896 }, { "epoch": 4.880453859026162, "grad_norm": 1.9011093378067017, "learning_rate": 1.6082074155854583e-05, "loss": 0.3539, "step": 29897 }, { "epoch": 4.880617117668667, "grad_norm": 2.33052134513855, "learning_rate": 1.6081822261117424e-05, "loss": 0.4057, "step": 29898 }, { "epoch": 4.880780376311171, "grad_norm": 2.558196544647217, "learning_rate": 1.6081570360255872e-05, "loss": 0.4295, "step": 29899 }, { "epoch": 4.880943634953676, "grad_norm": 2.0984227657318115, "learning_rate": 1.608131845327018e-05, "loss": 0.3982, "step": 29900 }, { "epoch": 4.88110689359618, "grad_norm": 1.9222508668899536, "learning_rate": 1.6081066540160603e-05, "loss": 0.3795, "step": 29901 }, { "epoch": 4.8812701522386845, "grad_norm": 2.148883104324341, "learning_rate": 1.608081462092739e-05, "loss": 0.4155, "step": 29902 }, { "epoch": 4.881433410881188, "grad_norm": 2.1283299922943115, "learning_rate": 1.6080562695570804e-05, "loss": 0.4156, "step": 29903 }, { "epoch": 4.8815966695236925, "grad_norm": 1.965155005455017, "learning_rate": 1.6080310764091088e-05, "loss": 0.4081, "step": 29904 }, { "epoch": 4.881759928166197, "grad_norm": 2.009726047515869, "learning_rate": 1.6080058826488502e-05, "loss": 0.3859, "step": 29905 }, { "epoch": 4.881923186808701, "grad_norm": 2.030691146850586, "learning_rate": 1.60798068827633e-05, "loss": 0.3742, "step": 29906 }, { "epoch": 4.882086445451206, "grad_norm": 2.004786729812622, "learning_rate": 1.6079554932915734e-05, "loss": 0.3824, "step": 29907 }, { "epoch": 4.88224970409371, "grad_norm": 1.7718899250030518, "learning_rate": 1.6079302976946055e-05, "loss": 0.3384, "step": 29908 }, { "epoch": 4.882412962736215, "grad_norm": 2.439276933670044, "learning_rate": 1.6079051014854524e-05, "loss": 0.3923, "step": 29909 }, { "epoch": 4.882576221378719, "grad_norm": 2.7606141567230225, "learning_rate": 1.6078799046641385e-05, "loss": 0.4479, "step": 29910 }, { "epoch": 4.882739480021224, "grad_norm": 2.1456522941589355, "learning_rate": 1.60785470723069e-05, "loss": 0.4017, "step": 29911 }, { "epoch": 4.882902738663728, "grad_norm": 2.001464366912842, "learning_rate": 1.607829509185132e-05, "loss": 0.374, "step": 29912 }, { "epoch": 4.8830659973062325, "grad_norm": 2.018944263458252, "learning_rate": 1.6078043105274898e-05, "loss": 0.3601, "step": 29913 }, { "epoch": 4.883229255948737, "grad_norm": 2.182189702987671, "learning_rate": 1.6077791112577888e-05, "loss": 0.3609, "step": 29914 }, { "epoch": 4.883392514591241, "grad_norm": 2.0527737140655518, "learning_rate": 1.607753911376054e-05, "loss": 0.3579, "step": 29915 }, { "epoch": 4.883555773233746, "grad_norm": 1.8629428148269653, "learning_rate": 1.6077287108823117e-05, "loss": 0.3514, "step": 29916 }, { "epoch": 4.88371903187625, "grad_norm": 2.6604795455932617, "learning_rate": 1.607703509776587e-05, "loss": 0.4191, "step": 29917 }, { "epoch": 4.883882290518755, "grad_norm": 2.0768580436706543, "learning_rate": 1.607678308058904e-05, "loss": 0.3717, "step": 29918 }, { "epoch": 4.884045549161259, "grad_norm": 2.109179973602295, "learning_rate": 1.60765310572929e-05, "loss": 0.3721, "step": 29919 }, { "epoch": 4.884208807803763, "grad_norm": 2.1924431324005127, "learning_rate": 1.6076279027877694e-05, "loss": 0.428, "step": 29920 }, { "epoch": 4.884372066446268, "grad_norm": 1.7270872592926025, "learning_rate": 1.6076026992343673e-05, "loss": 0.3404, "step": 29921 }, { "epoch": 4.8845353250887715, "grad_norm": 2.3489060401916504, "learning_rate": 1.60757749506911e-05, "loss": 0.4037, "step": 29922 }, { "epoch": 4.884698583731276, "grad_norm": 2.141481399536133, "learning_rate": 1.6075522902920217e-05, "loss": 0.3569, "step": 29923 }, { "epoch": 4.88486184237378, "grad_norm": 2.1660659313201904, "learning_rate": 1.6075270849031287e-05, "loss": 0.3685, "step": 29924 }, { "epoch": 4.885025101016285, "grad_norm": 1.5462491512298584, "learning_rate": 1.607501878902456e-05, "loss": 0.3319, "step": 29925 }, { "epoch": 4.885188359658789, "grad_norm": 2.366997241973877, "learning_rate": 1.607476672290029e-05, "loss": 0.38, "step": 29926 }, { "epoch": 4.885351618301294, "grad_norm": 2.1583290100097656, "learning_rate": 1.607451465065873e-05, "loss": 0.4131, "step": 29927 }, { "epoch": 4.885514876943798, "grad_norm": 2.7413907051086426, "learning_rate": 1.607426257230014e-05, "loss": 0.4668, "step": 29928 }, { "epoch": 4.885678135586303, "grad_norm": 2.6368801593780518, "learning_rate": 1.6074010487824766e-05, "loss": 0.4019, "step": 29929 }, { "epoch": 4.885841394228807, "grad_norm": 2.498802661895752, "learning_rate": 1.607375839723287e-05, "loss": 0.449, "step": 29930 }, { "epoch": 4.8860046528713115, "grad_norm": 2.46171236038208, "learning_rate": 1.60735063005247e-05, "loss": 0.4276, "step": 29931 }, { "epoch": 4.886167911513816, "grad_norm": 2.1888515949249268, "learning_rate": 1.6073254197700504e-05, "loss": 0.4127, "step": 29932 }, { "epoch": 4.88633117015632, "grad_norm": 1.9356238842010498, "learning_rate": 1.6073002088760544e-05, "loss": 0.3687, "step": 29933 }, { "epoch": 4.886494428798825, "grad_norm": 2.5737032890319824, "learning_rate": 1.6072749973705075e-05, "loss": 0.4103, "step": 29934 }, { "epoch": 4.886657687441329, "grad_norm": 2.1482036113739014, "learning_rate": 1.607249785253435e-05, "loss": 0.3507, "step": 29935 }, { "epoch": 4.886820946083834, "grad_norm": 3.1795082092285156, "learning_rate": 1.6072245725248622e-05, "loss": 0.4356, "step": 29936 }, { "epoch": 4.886984204726337, "grad_norm": 1.6885417699813843, "learning_rate": 1.607199359184814e-05, "loss": 0.3326, "step": 29937 }, { "epoch": 4.8871474633688425, "grad_norm": 2.213245391845703, "learning_rate": 1.6071741452333164e-05, "loss": 0.4668, "step": 29938 }, { "epoch": 4.887310722011346, "grad_norm": 2.048056125640869, "learning_rate": 1.6071489306703946e-05, "loss": 0.3574, "step": 29939 }, { "epoch": 4.8874739806538505, "grad_norm": 1.7172714471817017, "learning_rate": 1.6071237154960738e-05, "loss": 0.3525, "step": 29940 }, { "epoch": 4.887637239296355, "grad_norm": 2.1752264499664307, "learning_rate": 1.60709849971038e-05, "loss": 0.4569, "step": 29941 }, { "epoch": 4.887800497938859, "grad_norm": 1.9620712995529175, "learning_rate": 1.607073283313338e-05, "loss": 0.3745, "step": 29942 }, { "epoch": 4.887963756581364, "grad_norm": 2.690446138381958, "learning_rate": 1.607048066304973e-05, "loss": 0.3818, "step": 29943 }, { "epoch": 4.888127015223868, "grad_norm": 2.3276031017303467, "learning_rate": 1.6070228486853112e-05, "loss": 0.3752, "step": 29944 }, { "epoch": 4.888290273866373, "grad_norm": 2.255401134490967, "learning_rate": 1.6069976304543776e-05, "loss": 0.4253, "step": 29945 }, { "epoch": 4.888453532508877, "grad_norm": 2.205767869949341, "learning_rate": 1.606972411612197e-05, "loss": 0.4178, "step": 29946 }, { "epoch": 4.888616791151382, "grad_norm": 1.9539991617202759, "learning_rate": 1.606947192158796e-05, "loss": 0.3837, "step": 29947 }, { "epoch": 4.888780049793886, "grad_norm": 1.9596596956253052, "learning_rate": 1.606921972094199e-05, "loss": 0.3953, "step": 29948 }, { "epoch": 4.8889433084363905, "grad_norm": 1.9938085079193115, "learning_rate": 1.6068967514184318e-05, "loss": 0.406, "step": 29949 }, { "epoch": 4.889106567078895, "grad_norm": 2.010657548904419, "learning_rate": 1.6068715301315195e-05, "loss": 0.3618, "step": 29950 }, { "epoch": 4.889269825721399, "grad_norm": 2.290571689605713, "learning_rate": 1.606846308233488e-05, "loss": 0.4274, "step": 29951 }, { "epoch": 4.889433084363904, "grad_norm": 2.095555543899536, "learning_rate": 1.6068210857243625e-05, "loss": 0.3952, "step": 29952 }, { "epoch": 4.889596343006408, "grad_norm": 2.028911828994751, "learning_rate": 1.606795862604168e-05, "loss": 0.3756, "step": 29953 }, { "epoch": 4.889759601648913, "grad_norm": 2.098731279373169, "learning_rate": 1.6067706388729303e-05, "loss": 0.4078, "step": 29954 }, { "epoch": 4.889922860291417, "grad_norm": 2.30895733833313, "learning_rate": 1.606745414530675e-05, "loss": 0.4029, "step": 29955 }, { "epoch": 4.890086118933921, "grad_norm": 1.8159031867980957, "learning_rate": 1.6067201895774273e-05, "loss": 0.3489, "step": 29956 }, { "epoch": 4.890249377576425, "grad_norm": 1.5833277702331543, "learning_rate": 1.606694964013212e-05, "loss": 0.3296, "step": 29957 }, { "epoch": 4.8904126362189295, "grad_norm": 2.040459156036377, "learning_rate": 1.6066697378380553e-05, "loss": 0.3836, "step": 29958 }, { "epoch": 4.890575894861434, "grad_norm": 2.378572463989258, "learning_rate": 1.6066445110519825e-05, "loss": 0.3957, "step": 29959 }, { "epoch": 4.890739153503938, "grad_norm": 2.0870349407196045, "learning_rate": 1.6066192836550186e-05, "loss": 0.3771, "step": 29960 }, { "epoch": 4.890902412146443, "grad_norm": 2.0317881107330322, "learning_rate": 1.6065940556471896e-05, "loss": 0.3686, "step": 29961 }, { "epoch": 4.891065670788947, "grad_norm": 2.541264772415161, "learning_rate": 1.6065688270285203e-05, "loss": 0.4145, "step": 29962 }, { "epoch": 4.891228929431452, "grad_norm": 3.2992141246795654, "learning_rate": 1.606543597799036e-05, "loss": 0.5405, "step": 29963 }, { "epoch": 4.891392188073956, "grad_norm": 2.6635615825653076, "learning_rate": 1.606518367958763e-05, "loss": 0.4019, "step": 29964 }, { "epoch": 4.891555446716461, "grad_norm": 2.6280510425567627, "learning_rate": 1.606493137507726e-05, "loss": 0.4039, "step": 29965 }, { "epoch": 4.891718705358965, "grad_norm": 2.170971632003784, "learning_rate": 1.6064679064459507e-05, "loss": 0.3296, "step": 29966 }, { "epoch": 4.8918819640014695, "grad_norm": 1.8692013025283813, "learning_rate": 1.606442674773462e-05, "loss": 0.3464, "step": 29967 }, { "epoch": 4.892045222643974, "grad_norm": 2.312448024749756, "learning_rate": 1.606417442490286e-05, "loss": 0.4064, "step": 29968 }, { "epoch": 4.892208481286478, "grad_norm": 2.0202927589416504, "learning_rate": 1.6063922095964476e-05, "loss": 0.3678, "step": 29969 }, { "epoch": 4.892371739928983, "grad_norm": 1.69284188747406, "learning_rate": 1.6063669760919727e-05, "loss": 0.3504, "step": 29970 }, { "epoch": 4.892534998571487, "grad_norm": 2.2211363315582275, "learning_rate": 1.606341741976886e-05, "loss": 0.4344, "step": 29971 }, { "epoch": 4.892698257213992, "grad_norm": 2.028167724609375, "learning_rate": 1.6063165072512136e-05, "loss": 0.3535, "step": 29972 }, { "epoch": 4.892861515856495, "grad_norm": 2.4088850021362305, "learning_rate": 1.6062912719149805e-05, "loss": 0.377, "step": 29973 }, { "epoch": 4.8930247744990005, "grad_norm": 2.2846133708953857, "learning_rate": 1.6062660359682124e-05, "loss": 0.4099, "step": 29974 }, { "epoch": 4.893188033141504, "grad_norm": 1.9184902906417847, "learning_rate": 1.6062407994109342e-05, "loss": 0.3555, "step": 29975 }, { "epoch": 4.8933512917840085, "grad_norm": 2.3074119091033936, "learning_rate": 1.6062155622431723e-05, "loss": 0.4314, "step": 29976 }, { "epoch": 4.893514550426513, "grad_norm": 2.2233188152313232, "learning_rate": 1.6061903244649513e-05, "loss": 0.3758, "step": 29977 }, { "epoch": 4.893677809069017, "grad_norm": 2.4625227451324463, "learning_rate": 1.6061650860762964e-05, "loss": 0.441, "step": 29978 }, { "epoch": 4.893841067711522, "grad_norm": 2.5396323204040527, "learning_rate": 1.606139847077234e-05, "loss": 0.3945, "step": 29979 }, { "epoch": 4.894004326354026, "grad_norm": 1.9172415733337402, "learning_rate": 1.6061146074677884e-05, "loss": 0.3332, "step": 29980 }, { "epoch": 4.894167584996531, "grad_norm": 2.1704444885253906, "learning_rate": 1.606089367247986e-05, "loss": 0.3588, "step": 29981 }, { "epoch": 4.894330843639035, "grad_norm": 2.199157238006592, "learning_rate": 1.6060641264178513e-05, "loss": 0.3804, "step": 29982 }, { "epoch": 4.89449410228154, "grad_norm": 1.9954699277877808, "learning_rate": 1.6060388849774105e-05, "loss": 0.4338, "step": 29983 }, { "epoch": 4.894657360924044, "grad_norm": 2.014531373977661, "learning_rate": 1.6060136429266886e-05, "loss": 0.3716, "step": 29984 }, { "epoch": 4.8948206195665485, "grad_norm": 2.0828139781951904, "learning_rate": 1.605988400265711e-05, "loss": 0.3812, "step": 29985 }, { "epoch": 4.894983878209053, "grad_norm": 2.1168673038482666, "learning_rate": 1.6059631569945037e-05, "loss": 0.3747, "step": 29986 }, { "epoch": 4.895147136851557, "grad_norm": 2.277602195739746, "learning_rate": 1.6059379131130915e-05, "loss": 0.422, "step": 29987 }, { "epoch": 4.895310395494062, "grad_norm": 1.9776839017868042, "learning_rate": 1.6059126686214995e-05, "loss": 0.3661, "step": 29988 }, { "epoch": 4.895473654136566, "grad_norm": 2.1129791736602783, "learning_rate": 1.605887423519754e-05, "loss": 0.3654, "step": 29989 }, { "epoch": 4.89563691277907, "grad_norm": 2.0583367347717285, "learning_rate": 1.60586217780788e-05, "loss": 0.4148, "step": 29990 }, { "epoch": 4.895800171421575, "grad_norm": 2.361762523651123, "learning_rate": 1.6058369314859027e-05, "loss": 0.3963, "step": 29991 }, { "epoch": 4.895963430064079, "grad_norm": 2.4615254402160645, "learning_rate": 1.605811684553848e-05, "loss": 0.4165, "step": 29992 }, { "epoch": 4.896126688706583, "grad_norm": 1.7148267030715942, "learning_rate": 1.605786437011741e-05, "loss": 0.3454, "step": 29993 }, { "epoch": 4.8962899473490875, "grad_norm": 2.3824052810668945, "learning_rate": 1.6057611888596075e-05, "loss": 0.4514, "step": 29994 }, { "epoch": 4.896453205991592, "grad_norm": 1.923061728477478, "learning_rate": 1.6057359400974726e-05, "loss": 0.3691, "step": 29995 }, { "epoch": 4.896616464634096, "grad_norm": 2.4922144412994385, "learning_rate": 1.6057106907253617e-05, "loss": 0.4587, "step": 29996 }, { "epoch": 4.896779723276601, "grad_norm": 2.444342613220215, "learning_rate": 1.6056854407433e-05, "loss": 0.3812, "step": 29997 }, { "epoch": 4.896942981919105, "grad_norm": 2.3408610820770264, "learning_rate": 1.6056601901513135e-05, "loss": 0.3999, "step": 29998 }, { "epoch": 4.89710624056161, "grad_norm": 2.1326746940612793, "learning_rate": 1.6056349389494272e-05, "loss": 0.3837, "step": 29999 }, { "epoch": 4.897269499204114, "grad_norm": 2.28853440284729, "learning_rate": 1.6056096871376667e-05, "loss": 0.4529, "step": 30000 } ], "logging_steps": 1, "max_steps": 100000, "num_input_tokens_seen": 0, "num_train_epochs": 17, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.476277164458967e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }