diff --git "a/checkpoint-21773/trainer_state.json" "b/checkpoint-21773/trainer_state.json" deleted file mode 100644--- "a/checkpoint-21773/trainer_state.json" +++ /dev/null @@ -1,6145 +0,0 @@ -{ - "best_metric": 0.0981861874461174, - "best_model_checkpoint": "autotrain-ai-image-detect-20241226-0202/checkpoint-21773", - "epoch": 1.0, - "eval_steps": 500, - "global_step": 21773, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0011482110871262573, - "grad_norm": 0.009014680050313473, - "learning_rate": 3.9999869880391874e-05, - "loss": 0.0647, - "step": 25 - }, - { - "epoch": 0.0022964221742525145, - "grad_norm": 0.1195719987154007, - "learning_rate": 3.999947952326062e-05, - "loss": 0.0634, - "step": 50 - }, - { - "epoch": 0.003444633261378772, - "grad_norm": 0.033902380615472794, - "learning_rate": 3.9998828933685526e-05, - "loss": 0.0411, - "step": 75 - }, - { - "epoch": 0.004592844348505029, - "grad_norm": 0.003960250411182642, - "learning_rate": 3.999791812013205e-05, - "loss": 0.0076, - "step": 100 - }, - { - "epoch": 0.005741055435631286, - "grad_norm": 0.06124406307935715, - "learning_rate": 3.9996747094451666e-05, - "loss": 0.0671, - "step": 125 - }, - { - "epoch": 0.006889266522757544, - "grad_norm": 0.4315168559551239, - "learning_rate": 3.999531587188172e-05, - "loss": 0.0831, - "step": 150 - }, - { - "epoch": 0.008037477609883801, - "grad_norm": 0.20916788280010223, - "learning_rate": 3.99936244710452e-05, - "loss": 0.1156, - "step": 175 - }, - { - "epoch": 0.009185688697010058, - "grad_norm": 0.03289841115474701, - "learning_rate": 3.999167291395058e-05, - "loss": 0.1006, - "step": 200 - }, - { - "epoch": 0.010333899784136315, - "grad_norm": 0.803317129611969, - "learning_rate": 3.998946122599141e-05, - "loss": 0.0838, - "step": 225 - }, - { - "epoch": 0.011482110871262573, - "grad_norm": 25.849609375, - "learning_rate": 3.998698943594612e-05, - "loss": 0.0933, - "step": 250 - }, - { - "epoch": 0.01263032195838883, - "grad_norm": 12.879656791687012, - "learning_rate": 3.9984257575977524e-05, - "loss": 0.1025, - "step": 275 - }, - { - "epoch": 0.013778533045515087, - "grad_norm": 0.27299392223358154, - "learning_rate": 3.998126568163247e-05, - "loss": 0.1247, - "step": 300 - }, - { - "epoch": 0.014926744132641345, - "grad_norm": 0.012173276394605637, - "learning_rate": 3.99780137918414e-05, - "loss": 0.0649, - "step": 325 - }, - { - "epoch": 0.016074955219767602, - "grad_norm": 0.026948055252432823, - "learning_rate": 3.9974501948917754e-05, - "loss": 0.1292, - "step": 350 - }, - { - "epoch": 0.01722316630689386, - "grad_norm": 0.39485013484954834, - "learning_rate": 3.9970730198557495e-05, - "loss": 0.0592, - "step": 375 - }, - { - "epoch": 0.018371377394020116, - "grad_norm": 20.581707000732422, - "learning_rate": 3.996669858983851e-05, - "loss": 0.0457, - "step": 400 - }, - { - "epoch": 0.019519588481146374, - "grad_norm": 0.007092855870723724, - "learning_rate": 3.99624071752199e-05, - "loss": 0.1605, - "step": 425 - }, - { - "epoch": 0.02066779956827263, - "grad_norm": 9.681135177612305, - "learning_rate": 3.9957856010541405e-05, - "loss": 0.12, - "step": 450 - }, - { - "epoch": 0.021816010655398888, - "grad_norm": 0.29390987753868103, - "learning_rate": 3.9953045155022606e-05, - "loss": 0.051, - "step": 475 - }, - { - "epoch": 0.022964221742525145, - "grad_norm": 0.12392851710319519, - "learning_rate": 3.9947974671262166e-05, - "loss": 0.154, - "step": 500 - }, - { - "epoch": 0.024112432829651403, - "grad_norm": 19.981584548950195, - "learning_rate": 3.9942644625237004e-05, - "loss": 0.1149, - "step": 525 - }, - { - "epoch": 0.02526064391677766, - "grad_norm": 0.2311665415763855, - "learning_rate": 3.993705508630148e-05, - "loss": 0.09, - "step": 550 - }, - { - "epoch": 0.026408855003903917, - "grad_norm": 0.10829292237758636, - "learning_rate": 3.993120612718646e-05, - "loss": 0.109, - "step": 575 - }, - { - "epoch": 0.027557066091030175, - "grad_norm": 97.97807312011719, - "learning_rate": 3.992509782399837e-05, - "loss": 0.0767, - "step": 600 - }, - { - "epoch": 0.028705277178156432, - "grad_norm": 0.16646654903888702, - "learning_rate": 3.991873025621821e-05, - "loss": 0.1011, - "step": 625 - }, - { - "epoch": 0.02985348826528269, - "grad_norm": 0.0468452163040638, - "learning_rate": 3.991210350670052e-05, - "loss": 0.0476, - "step": 650 - }, - { - "epoch": 0.031001699352408946, - "grad_norm": 0.061356183141469955, - "learning_rate": 3.9905217661672294e-05, - "loss": 0.1146, - "step": 675 - }, - { - "epoch": 0.032149910439535204, - "grad_norm": 15.284680366516113, - "learning_rate": 3.989807281073191e-05, - "loss": 0.0839, - "step": 700 - }, - { - "epoch": 0.033298121526661464, - "grad_norm": 0.3123476803302765, - "learning_rate": 3.989066904684786e-05, - "loss": 0.154, - "step": 725 - }, - { - "epoch": 0.03444633261378772, - "grad_norm": 85.98719787597656, - "learning_rate": 3.988300646635763e-05, - "loss": 0.1015, - "step": 750 - }, - { - "epoch": 0.03559454370091398, - "grad_norm": 5.17075252532959, - "learning_rate": 3.987508516896643e-05, - "loss": 0.1069, - "step": 775 - }, - { - "epoch": 0.03674275478804023, - "grad_norm": 0.2559378445148468, - "learning_rate": 3.9866905257745875e-05, - "loss": 0.1191, - "step": 800 - }, - { - "epoch": 0.037890965875166494, - "grad_norm": 0.17535167932510376, - "learning_rate": 3.985846683913263e-05, - "loss": 0.104, - "step": 825 - }, - { - "epoch": 0.03903917696229275, - "grad_norm": 0.6827572584152222, - "learning_rate": 3.9849770022927085e-05, - "loss": 0.0745, - "step": 850 - }, - { - "epoch": 0.04018738804941901, - "grad_norm": 38.4011344909668, - "learning_rate": 3.9840814922291857e-05, - "loss": 0.0954, - "step": 875 - }, - { - "epoch": 0.04133559913654526, - "grad_norm": 25.91033172607422, - "learning_rate": 3.983160165375038e-05, - "loss": 0.1061, - "step": 900 - }, - { - "epoch": 0.04248381022367152, - "grad_norm": 17.619667053222656, - "learning_rate": 3.982213033718533e-05, - "loss": 0.0914, - "step": 925 - }, - { - "epoch": 0.043632021310797776, - "grad_norm": 0.0705079659819603, - "learning_rate": 3.981240109583711e-05, - "loss": 0.118, - "step": 950 - }, - { - "epoch": 0.04478023239792404, - "grad_norm": 0.1458115577697754, - "learning_rate": 3.9802414056302235e-05, - "loss": 0.0843, - "step": 975 - }, - { - "epoch": 0.04592844348505029, - "grad_norm": 0.156606525182724, - "learning_rate": 3.9792169348531666e-05, - "loss": 0.1119, - "step": 1000 - }, - { - "epoch": 0.04707665457217655, - "grad_norm": 1.229665994644165, - "learning_rate": 3.978166710582914e-05, - "loss": 0.1472, - "step": 1025 - }, - { - "epoch": 0.048224865659302805, - "grad_norm": 0.1480337530374527, - "learning_rate": 3.977090746484942e-05, - "loss": 0.1022, - "step": 1050 - }, - { - "epoch": 0.049373076746429066, - "grad_norm": 17.64451789855957, - "learning_rate": 3.975989056559655e-05, - "loss": 0.0852, - "step": 1075 - }, - { - "epoch": 0.05052128783355532, - "grad_norm": 72.8317642211914, - "learning_rate": 3.974861655142198e-05, - "loss": 0.0509, - "step": 1100 - }, - { - "epoch": 0.05166949892068158, - "grad_norm": 0.027190979570150375, - "learning_rate": 3.9737085569022736e-05, - "loss": 0.0603, - "step": 1125 - }, - { - "epoch": 0.052817710007807835, - "grad_norm": 0.06858149915933609, - "learning_rate": 3.9725297768439514e-05, - "loss": 0.0894, - "step": 1150 - }, - { - "epoch": 0.053965921094934095, - "grad_norm": 1.3908005952835083, - "learning_rate": 3.971325330305472e-05, - "loss": 0.1071, - "step": 1175 - }, - { - "epoch": 0.05511413218206035, - "grad_norm": 23.995895385742188, - "learning_rate": 3.9700952329590454e-05, - "loss": 0.1354, - "step": 1200 - }, - { - "epoch": 0.05626234326918661, - "grad_norm": 42.93232727050781, - "learning_rate": 3.968839500810651e-05, - "loss": 0.0654, - "step": 1225 - }, - { - "epoch": 0.057410554356312864, - "grad_norm": 0.07380808889865875, - "learning_rate": 3.9675581501998255e-05, - "loss": 0.1138, - "step": 1250 - }, - { - "epoch": 0.058558765443439124, - "grad_norm": 0.1862788200378418, - "learning_rate": 3.966251197799454e-05, - "loss": 0.0774, - "step": 1275 - }, - { - "epoch": 0.05970697653056538, - "grad_norm": 0.6021133661270142, - "learning_rate": 3.964918660615549e-05, - "loss": 0.0541, - "step": 1300 - }, - { - "epoch": 0.06085518761769164, - "grad_norm": 0.012276153080165386, - "learning_rate": 3.9635605559870324e-05, - "loss": 0.079, - "step": 1325 - }, - { - "epoch": 0.06200339870481789, - "grad_norm": 0.12071600556373596, - "learning_rate": 3.962176901585508e-05, - "loss": 0.0939, - "step": 1350 - }, - { - "epoch": 0.06315160979194415, - "grad_norm": 193.4734344482422, - "learning_rate": 3.960767715415033e-05, - "loss": 0.1279, - "step": 1375 - }, - { - "epoch": 0.06429982087907041, - "grad_norm": 20.70905876159668, - "learning_rate": 3.9593330158118826e-05, - "loss": 0.1717, - "step": 1400 - }, - { - "epoch": 0.06544803196619667, - "grad_norm": 0.20234434306621552, - "learning_rate": 3.957872821444312e-05, - "loss": 0.0846, - "step": 1425 - }, - { - "epoch": 0.06659624305332293, - "grad_norm": 0.10155820101499557, - "learning_rate": 3.956387151312312e-05, - "loss": 0.0694, - "step": 1450 - }, - { - "epoch": 0.06774445414044918, - "grad_norm": 0.027118023484945297, - "learning_rate": 3.9548760247473666e-05, - "loss": 0.1274, - "step": 1475 - }, - { - "epoch": 0.06889266522757544, - "grad_norm": 0.05700485408306122, - "learning_rate": 3.9533394614121926e-05, - "loss": 0.0701, - "step": 1500 - }, - { - "epoch": 0.0700408763147017, - "grad_norm": 64.16590881347656, - "learning_rate": 3.951777481300494e-05, - "loss": 0.1467, - "step": 1525 - }, - { - "epoch": 0.07118908740182796, - "grad_norm": 10.680575370788574, - "learning_rate": 3.950190104736694e-05, - "loss": 0.142, - "step": 1550 - }, - { - "epoch": 0.0723372984889542, - "grad_norm": 14.797567367553711, - "learning_rate": 3.948577352375674e-05, - "loss": 0.143, - "step": 1575 - }, - { - "epoch": 0.07348550957608047, - "grad_norm": 1.4608783721923828, - "learning_rate": 3.946939245202505e-05, - "loss": 0.1086, - "step": 1600 - }, - { - "epoch": 0.07463372066320673, - "grad_norm": 7.491002082824707, - "learning_rate": 3.9452758045321726e-05, - "loss": 0.1096, - "step": 1625 - }, - { - "epoch": 0.07578193175033299, - "grad_norm": 0.8704826235771179, - "learning_rate": 3.9435870520093027e-05, - "loss": 0.0745, - "step": 1650 - }, - { - "epoch": 0.07693014283745923, - "grad_norm": 18.54655647277832, - "learning_rate": 3.941873009607876e-05, - "loss": 0.1094, - "step": 1675 - }, - { - "epoch": 0.0780783539245855, - "grad_norm": 0.26919639110565186, - "learning_rate": 3.940133699630945e-05, - "loss": 0.1378, - "step": 1700 - }, - { - "epoch": 0.07922656501171176, - "grad_norm": 0.07600715756416321, - "learning_rate": 3.9383691447103443e-05, - "loss": 0.0277, - "step": 1725 - }, - { - "epoch": 0.08037477609883802, - "grad_norm": 0.03989580273628235, - "learning_rate": 3.936579367806392e-05, - "loss": 0.1106, - "step": 1750 - }, - { - "epoch": 0.08152298718596426, - "grad_norm": 0.060093723237514496, - "learning_rate": 3.934764392207595e-05, - "loss": 0.0976, - "step": 1775 - }, - { - "epoch": 0.08267119827309052, - "grad_norm": 16.212749481201172, - "learning_rate": 3.9329242415303464e-05, - "loss": 0.128, - "step": 1800 - }, - { - "epoch": 0.08381940936021678, - "grad_norm": 10.21111011505127, - "learning_rate": 3.931058939718613e-05, - "loss": 0.0758, - "step": 1825 - }, - { - "epoch": 0.08496762044734305, - "grad_norm": 0.7069344520568848, - "learning_rate": 3.9291685110436285e-05, - "loss": 0.1639, - "step": 1850 - }, - { - "epoch": 0.08611583153446929, - "grad_norm": 1.7670834064483643, - "learning_rate": 3.9272529801035785e-05, - "loss": 0.1184, - "step": 1875 - }, - { - "epoch": 0.08726404262159555, - "grad_norm": 0.5139174461364746, - "learning_rate": 3.925312371823275e-05, - "loss": 0.1004, - "step": 1900 - }, - { - "epoch": 0.08841225370872181, - "grad_norm": 0.5114403963088989, - "learning_rate": 3.9233467114538376e-05, - "loss": 0.0819, - "step": 1925 - }, - { - "epoch": 0.08956046479584807, - "grad_norm": 0.01510544028133154, - "learning_rate": 3.9213560245723625e-05, - "loss": 0.1955, - "step": 1950 - }, - { - "epoch": 0.09070867588297432, - "grad_norm": 0.10435652732849121, - "learning_rate": 3.919340337081589e-05, - "loss": 0.1491, - "step": 1975 - }, - { - "epoch": 0.09185688697010058, - "grad_norm": 22.64883804321289, - "learning_rate": 3.917299675209563e-05, - "loss": 0.142, - "step": 2000 - }, - { - "epoch": 0.09300509805722684, - "grad_norm": 9.807494163513184, - "learning_rate": 3.9152340655092975e-05, - "loss": 0.0557, - "step": 2025 - }, - { - "epoch": 0.0941533091443531, - "grad_norm": 0.6470879316329956, - "learning_rate": 3.9131435348584245e-05, - "loss": 0.0908, - "step": 2050 - }, - { - "epoch": 0.09530152023147935, - "grad_norm": 28.449705123901367, - "learning_rate": 3.9110281104588476e-05, - "loss": 0.074, - "step": 2075 - }, - { - "epoch": 0.09644973131860561, - "grad_norm": 0.19204096496105194, - "learning_rate": 3.908887819836386e-05, - "loss": 0.0709, - "step": 2100 - }, - { - "epoch": 0.09759794240573187, - "grad_norm": 15.106844902038574, - "learning_rate": 3.9067226908404166e-05, - "loss": 0.1325, - "step": 2125 - }, - { - "epoch": 0.09874615349285813, - "grad_norm": 0.47706907987594604, - "learning_rate": 3.904532751643514e-05, - "loss": 0.1235, - "step": 2150 - }, - { - "epoch": 0.09989436457998438, - "grad_norm": 11.67285442352295, - "learning_rate": 3.902318030741081e-05, - "loss": 0.0946, - "step": 2175 - }, - { - "epoch": 0.10104257566711064, - "grad_norm": 16.763837814331055, - "learning_rate": 3.9000785569509785e-05, - "loss": 0.1192, - "step": 2200 - }, - { - "epoch": 0.1021907867542369, - "grad_norm": 0.05309774726629257, - "learning_rate": 3.897814359413153e-05, - "loss": 0.0778, - "step": 2225 - }, - { - "epoch": 0.10333899784136316, - "grad_norm": 0.08014708012342453, - "learning_rate": 3.895525467589253e-05, - "loss": 0.0665, - "step": 2250 - }, - { - "epoch": 0.10448720892848941, - "grad_norm": 0.309901624917984, - "learning_rate": 3.89321191126225e-05, - "loss": 0.1596, - "step": 2275 - }, - { - "epoch": 0.10563542001561567, - "grad_norm": 11.939780235290527, - "learning_rate": 3.890873720536048e-05, - "loss": 0.1026, - "step": 2300 - }, - { - "epoch": 0.10678363110274193, - "grad_norm": 4.285236835479736, - "learning_rate": 3.888510925835093e-05, - "loss": 0.1026, - "step": 2325 - }, - { - "epoch": 0.10793184218986819, - "grad_norm": 0.06218241900205612, - "learning_rate": 3.886123557903976e-05, - "loss": 0.079, - "step": 2350 - }, - { - "epoch": 0.10908005327699444, - "grad_norm": 19.595443725585938, - "learning_rate": 3.883711647807037e-05, - "loss": 0.0781, - "step": 2375 - }, - { - "epoch": 0.1102282643641207, - "grad_norm": 58.581581115722656, - "learning_rate": 3.8812752269279544e-05, - "loss": 0.1329, - "step": 2400 - }, - { - "epoch": 0.11137647545124696, - "grad_norm": 21.686433792114258, - "learning_rate": 3.878814326969341e-05, - "loss": 0.1016, - "step": 2425 - }, - { - "epoch": 0.11252468653837322, - "grad_norm": 1.1680649518966675, - "learning_rate": 3.876328979952332e-05, - "loss": 0.1263, - "step": 2450 - }, - { - "epoch": 0.11367289762549947, - "grad_norm": 0.2271161824464798, - "learning_rate": 3.8738192182161645e-05, - "loss": 0.0411, - "step": 2475 - }, - { - "epoch": 0.11482110871262573, - "grad_norm": 18.177799224853516, - "learning_rate": 3.871285074417759e-05, - "loss": 0.1228, - "step": 2500 - }, - { - "epoch": 0.11596931979975199, - "grad_norm": 0.03746473789215088, - "learning_rate": 3.868726581531297e-05, - "loss": 0.0828, - "step": 2525 - }, - { - "epoch": 0.11711753088687825, - "grad_norm": 0.31662654876708984, - "learning_rate": 3.866143772847786e-05, - "loss": 0.0739, - "step": 2550 - }, - { - "epoch": 0.1182657419740045, - "grad_norm": 0.0405048243701458, - "learning_rate": 3.8635366819746336e-05, - "loss": 0.0636, - "step": 2575 - }, - { - "epoch": 0.11941395306113076, - "grad_norm": 0.07984010130167007, - "learning_rate": 3.860905342835201e-05, - "loss": 0.0898, - "step": 2600 - }, - { - "epoch": 0.12056216414825702, - "grad_norm": 0.061471085995435715, - "learning_rate": 3.8582497896683725e-05, - "loss": 0.1553, - "step": 2625 - }, - { - "epoch": 0.12171037523538328, - "grad_norm": 11.33873462677002, - "learning_rate": 3.855570057028101e-05, - "loss": 0.0815, - "step": 2650 - }, - { - "epoch": 0.12285858632250952, - "grad_norm": 0.12285740673542023, - "learning_rate": 3.8528661797829626e-05, - "loss": 0.0393, - "step": 2675 - }, - { - "epoch": 0.12400679740963579, - "grad_norm": 0.009187364019453526, - "learning_rate": 3.8501381931157026e-05, - "loss": 0.1046, - "step": 2700 - }, - { - "epoch": 0.12515500849676203, - "grad_norm": 10.243695259094238, - "learning_rate": 3.847386132522776e-05, - "loss": 0.1128, - "step": 2725 - }, - { - "epoch": 0.1263032195838883, - "grad_norm": 0.03348785266280174, - "learning_rate": 3.8446100338138864e-05, - "loss": 0.0571, - "step": 2750 - }, - { - "epoch": 0.12745143067101455, - "grad_norm": 0.054208606481552124, - "learning_rate": 3.841809933111523e-05, - "loss": 0.1265, - "step": 2775 - }, - { - "epoch": 0.12859964175814081, - "grad_norm": 0.04985649138689041, - "learning_rate": 3.838985866850486e-05, - "loss": 0.0614, - "step": 2800 - }, - { - "epoch": 0.12974785284526708, - "grad_norm": 0.009153477847576141, - "learning_rate": 3.836137871777414e-05, - "loss": 0.0833, - "step": 2825 - }, - { - "epoch": 0.13089606393239334, - "grad_norm": 0.103277787566185, - "learning_rate": 3.833265984950309e-05, - "loss": 0.1913, - "step": 2850 - }, - { - "epoch": 0.1320442750195196, - "grad_norm": 32.65317916870117, - "learning_rate": 3.830370243738049e-05, - "loss": 0.1379, - "step": 2875 - }, - { - "epoch": 0.13319248610664586, - "grad_norm": 0.17990216612815857, - "learning_rate": 3.827450685819905e-05, - "loss": 0.1472, - "step": 2900 - }, - { - "epoch": 0.1343406971937721, - "grad_norm": 0.43453171849250793, - "learning_rate": 3.8245073491850494e-05, - "loss": 0.0601, - "step": 2925 - }, - { - "epoch": 0.13548890828089835, - "grad_norm": 30.274080276489258, - "learning_rate": 3.821540272132065e-05, - "loss": 0.1168, - "step": 2950 - }, - { - "epoch": 0.1366371193680246, - "grad_norm": 0.02154465764760971, - "learning_rate": 3.8185494932684417e-05, - "loss": 0.0858, - "step": 2975 - }, - { - "epoch": 0.13778533045515087, - "grad_norm": 0.8407604694366455, - "learning_rate": 3.815535051510076e-05, - "loss": 0.1562, - "step": 3000 - }, - { - "epoch": 0.13893354154227713, - "grad_norm": 0.06254715472459793, - "learning_rate": 3.8124969860807655e-05, - "loss": 0.0887, - "step": 3025 - }, - { - "epoch": 0.1400817526294034, - "grad_norm": 0.47905296087265015, - "learning_rate": 3.8094353365117005e-05, - "loss": 0.1225, - "step": 3050 - }, - { - "epoch": 0.14122996371652966, - "grad_norm": 0.09489297866821289, - "learning_rate": 3.806350142640943e-05, - "loss": 0.0628, - "step": 3075 - }, - { - "epoch": 0.14237817480365592, - "grad_norm": 9.106457710266113, - "learning_rate": 3.803241444612917e-05, - "loss": 0.1389, - "step": 3100 - }, - { - "epoch": 0.14352638589078215, - "grad_norm": 0.08327560126781464, - "learning_rate": 3.8001092828778766e-05, - "loss": 0.0686, - "step": 3125 - }, - { - "epoch": 0.1446745969779084, - "grad_norm": 0.2027389407157898, - "learning_rate": 3.7969536981913906e-05, - "loss": 0.1172, - "step": 3150 - }, - { - "epoch": 0.14582280806503467, - "grad_norm": 0.024477185681462288, - "learning_rate": 3.7937747316138015e-05, - "loss": 0.0676, - "step": 3175 - }, - { - "epoch": 0.14697101915216093, - "grad_norm": 223.8543701171875, - "learning_rate": 3.790572424509698e-05, - "loss": 0.1085, - "step": 3200 - }, - { - "epoch": 0.1481192302392872, - "grad_norm": 0.2227756232023239, - "learning_rate": 3.787346818547375e-05, - "loss": 0.0865, - "step": 3225 - }, - { - "epoch": 0.14926744132641345, - "grad_norm": 0.060894425958395004, - "learning_rate": 3.784097955698291e-05, - "loss": 0.1782, - "step": 3250 - }, - { - "epoch": 0.1504156524135397, - "grad_norm": 0.5547670125961304, - "learning_rate": 3.780825878236521e-05, - "loss": 0.0733, - "step": 3275 - }, - { - "epoch": 0.15156386350066597, - "grad_norm": 6.312192916870117, - "learning_rate": 3.77753062873821e-05, - "loss": 0.1209, - "step": 3300 - }, - { - "epoch": 0.1527120745877922, - "grad_norm": 12.162946701049805, - "learning_rate": 3.774212250081014e-05, - "loss": 0.1259, - "step": 3325 - }, - { - "epoch": 0.15386028567491847, - "grad_norm": 0.04875678941607475, - "learning_rate": 3.770870785443548e-05, - "loss": 0.0683, - "step": 3350 - }, - { - "epoch": 0.15500849676204473, - "grad_norm": 40.281455993652344, - "learning_rate": 3.767506278304818e-05, - "loss": 0.1197, - "step": 3375 - }, - { - "epoch": 0.156156707849171, - "grad_norm": 0.008391838520765305, - "learning_rate": 3.7641187724436576e-05, - "loss": 0.0589, - "step": 3400 - }, - { - "epoch": 0.15730491893629725, - "grad_norm": 0.00909591093659401, - "learning_rate": 3.760708311938163e-05, - "loss": 0.0528, - "step": 3425 - }, - { - "epoch": 0.1584531300234235, - "grad_norm": 0.004507018718868494, - "learning_rate": 3.75727494116511e-05, - "loss": 0.0541, - "step": 3450 - }, - { - "epoch": 0.15960134111054977, - "grad_norm": 15.795205116271973, - "learning_rate": 3.753818704799386e-05, - "loss": 0.1272, - "step": 3475 - }, - { - "epoch": 0.16074955219767603, - "grad_norm": 0.024282341822981834, - "learning_rate": 3.750339647813403e-05, - "loss": 0.0625, - "step": 3500 - }, - { - "epoch": 0.16189776328480227, - "grad_norm": 0.060861699283123016, - "learning_rate": 3.7468378154765146e-05, - "loss": 0.0647, - "step": 3525 - }, - { - "epoch": 0.16304597437192853, - "grad_norm": 0.021951772272586823, - "learning_rate": 3.743313253354425e-05, - "loss": 0.1148, - "step": 3550 - }, - { - "epoch": 0.1641941854590548, - "grad_norm": 1.0026686191558838, - "learning_rate": 3.7397660073085994e-05, - "loss": 0.1176, - "step": 3575 - }, - { - "epoch": 0.16534239654618105, - "grad_norm": 0.014557062648236752, - "learning_rate": 3.736196123495663e-05, - "loss": 0.0518, - "step": 3600 - }, - { - "epoch": 0.1664906076333073, - "grad_norm": 0.09844854474067688, - "learning_rate": 3.732603648366805e-05, - "loss": 0.1042, - "step": 3625 - }, - { - "epoch": 0.16763881872043357, - "grad_norm": 426.1970520019531, - "learning_rate": 3.728988628667171e-05, - "loss": 0.1162, - "step": 3650 - }, - { - "epoch": 0.16878702980755983, - "grad_norm": 43.39368438720703, - "learning_rate": 3.725351111435256e-05, - "loss": 0.1072, - "step": 3675 - }, - { - "epoch": 0.1699352408946861, - "grad_norm": 0.1358594000339508, - "learning_rate": 3.7216911440022906e-05, - "loss": 0.1128, - "step": 3700 - }, - { - "epoch": 0.17108345198181232, - "grad_norm": 0.05883224308490753, - "learning_rate": 3.7180087739916284e-05, - "loss": 0.0222, - "step": 3725 - }, - { - "epoch": 0.17223166306893858, - "grad_norm": 3.3152406215667725, - "learning_rate": 3.7143040493181236e-05, - "loss": 0.1888, - "step": 3750 - }, - { - "epoch": 0.17337987415606484, - "grad_norm": 0.13006258010864258, - "learning_rate": 3.710577018187508e-05, - "loss": 0.1396, - "step": 3775 - }, - { - "epoch": 0.1745280852431911, - "grad_norm": 0.487556129693985, - "learning_rate": 3.706827729095765e-05, - "loss": 0.0768, - "step": 3800 - }, - { - "epoch": 0.17567629633031737, - "grad_norm": 0.18899738788604736, - "learning_rate": 3.703056230828497e-05, - "loss": 0.0797, - "step": 3825 - }, - { - "epoch": 0.17682450741744363, - "grad_norm": 0.32145893573760986, - "learning_rate": 3.699262572460293e-05, - "loss": 0.132, - "step": 3850 - }, - { - "epoch": 0.1779727185045699, - "grad_norm": 0.08297406136989594, - "learning_rate": 3.695446803354086e-05, - "loss": 0.0802, - "step": 3875 - }, - { - "epoch": 0.17912092959169615, - "grad_norm": 0.5447966456413269, - "learning_rate": 3.691608973160513e-05, - "loss": 0.0227, - "step": 3900 - }, - { - "epoch": 0.18026914067882238, - "grad_norm": 18.947834014892578, - "learning_rate": 3.687749131817272e-05, - "loss": 0.1073, - "step": 3925 - }, - { - "epoch": 0.18141735176594864, - "grad_norm": 0.3541482388973236, - "learning_rate": 3.683867329548466e-05, - "loss": 0.1116, - "step": 3950 - }, - { - "epoch": 0.1825655628530749, - "grad_norm": 0.40254440903663635, - "learning_rate": 3.679963616863955e-05, - "loss": 0.0876, - "step": 3975 - }, - { - "epoch": 0.18371377394020116, - "grad_norm": 1.2953015565872192, - "learning_rate": 3.676038044558694e-05, - "loss": 0.0962, - "step": 4000 - }, - { - "epoch": 0.18486198502732742, - "grad_norm": 0.8362603783607483, - "learning_rate": 3.672090663712078e-05, - "loss": 0.0926, - "step": 4025 - }, - { - "epoch": 0.18601019611445369, - "grad_norm": 10.473535537719727, - "learning_rate": 3.66812152568727e-05, - "loss": 0.0758, - "step": 4050 - }, - { - "epoch": 0.18715840720157995, - "grad_norm": 0.20654985308647156, - "learning_rate": 3.66413068213054e-05, - "loss": 0.1323, - "step": 4075 - }, - { - "epoch": 0.1883066182887062, - "grad_norm": 0.19674348831176758, - "learning_rate": 3.6601181849705864e-05, - "loss": 0.1177, - "step": 4100 - }, - { - "epoch": 0.18945482937583244, - "grad_norm": 46.79829025268555, - "learning_rate": 3.656084086417867e-05, - "loss": 0.1193, - "step": 4125 - }, - { - "epoch": 0.1906030404629587, - "grad_norm": 0.13020487129688263, - "learning_rate": 3.652028438963912e-05, - "loss": 0.0877, - "step": 4150 - }, - { - "epoch": 0.19175125155008496, - "grad_norm": 0.07615011185407639, - "learning_rate": 3.647951295380648e-05, - "loss": 0.0912, - "step": 4175 - }, - { - "epoch": 0.19289946263721122, - "grad_norm": 0.33390146493911743, - "learning_rate": 3.643852708719708e-05, - "loss": 0.0703, - "step": 4200 - }, - { - "epoch": 0.19404767372433748, - "grad_norm": 0.8612595200538635, - "learning_rate": 3.6397327323117406e-05, - "loss": 0.1447, - "step": 4225 - }, - { - "epoch": 0.19519588481146374, - "grad_norm": 0.028214924037456512, - "learning_rate": 3.635591419765717e-05, - "loss": 0.0586, - "step": 4250 - }, - { - "epoch": 0.19634409589859, - "grad_norm": 0.050730813294649124, - "learning_rate": 3.631428824968235e-05, - "loss": 0.0726, - "step": 4275 - }, - { - "epoch": 0.19749230698571626, - "grad_norm": 12.573225021362305, - "learning_rate": 3.627245002082814e-05, - "loss": 0.116, - "step": 4300 - }, - { - "epoch": 0.1986405180728425, - "grad_norm": 0.6728110313415527, - "learning_rate": 3.623040005549193e-05, - "loss": 0.1846, - "step": 4325 - }, - { - "epoch": 0.19978872915996876, - "grad_norm": 34.582183837890625, - "learning_rate": 3.6188138900826225e-05, - "loss": 0.1518, - "step": 4350 - }, - { - "epoch": 0.20093694024709502, - "grad_norm": 22.49669647216797, - "learning_rate": 3.6145667106731516e-05, - "loss": 0.0471, - "step": 4375 - }, - { - "epoch": 0.20208515133422128, - "grad_norm": 0.12985405325889587, - "learning_rate": 3.610298522584913e-05, - "loss": 0.1173, - "step": 4400 - }, - { - "epoch": 0.20323336242134754, - "grad_norm": 0.30833348631858826, - "learning_rate": 3.606009381355401e-05, - "loss": 0.1189, - "step": 4425 - }, - { - "epoch": 0.2043815735084738, - "grad_norm": 0.16521891951560974, - "learning_rate": 3.601699342794755e-05, - "loss": 0.0812, - "step": 4450 - }, - { - "epoch": 0.20552978459560006, - "grad_norm": 0.01110359001904726, - "learning_rate": 3.597368462985027e-05, - "loss": 0.0567, - "step": 4475 - }, - { - "epoch": 0.20667799568272632, - "grad_norm": 0.005069703795015812, - "learning_rate": 3.5930167982794555e-05, - "loss": 0.1032, - "step": 4500 - }, - { - "epoch": 0.20782620676985256, - "grad_norm": 0.046138398349285126, - "learning_rate": 3.588644405301731e-05, - "loss": 0.0537, - "step": 4525 - }, - { - "epoch": 0.20897441785697882, - "grad_norm": 0.9147820472717285, - "learning_rate": 3.5842513409452606e-05, - "loss": 0.0653, - "step": 4550 - }, - { - "epoch": 0.21012262894410508, - "grad_norm": 7.11830997467041, - "learning_rate": 3.579837662372424e-05, - "loss": 0.1436, - "step": 4575 - }, - { - "epoch": 0.21127084003123134, - "grad_norm": 0.07249364256858826, - "learning_rate": 3.575403427013834e-05, - "loss": 0.1258, - "step": 4600 - }, - { - "epoch": 0.2124190511183576, - "grad_norm": 9.790180206298828, - "learning_rate": 3.5709486925675887e-05, - "loss": 0.082, - "step": 4625 - }, - { - "epoch": 0.21356726220548386, - "grad_norm": 0.03522699326276779, - "learning_rate": 3.566473516998517e-05, - "loss": 0.1248, - "step": 4650 - }, - { - "epoch": 0.21471547329261012, - "grad_norm": 0.570968508720398, - "learning_rate": 3.5619779585374275e-05, - "loss": 0.0791, - "step": 4675 - }, - { - "epoch": 0.21586368437973638, - "grad_norm": 0.13998058438301086, - "learning_rate": 3.557462075680352e-05, - "loss": 0.0686, - "step": 4700 - }, - { - "epoch": 0.21701189546686261, - "grad_norm": 14.644072532653809, - "learning_rate": 3.552925927187781e-05, - "loss": 0.1849, - "step": 4725 - }, - { - "epoch": 0.21816010655398888, - "grad_norm": 0.02759512886404991, - "learning_rate": 3.548369572083901e-05, - "loss": 0.0851, - "step": 4750 - }, - { - "epoch": 0.21930831764111514, - "grad_norm": 0.30943742394447327, - "learning_rate": 3.543793069655825e-05, - "loss": 0.0682, - "step": 4775 - }, - { - "epoch": 0.2204565287282414, - "grad_norm": 0.032128553837537766, - "learning_rate": 3.5391964794528245e-05, - "loss": 0.0817, - "step": 4800 - }, - { - "epoch": 0.22160473981536766, - "grad_norm": 0.35530638694763184, - "learning_rate": 3.534579861285551e-05, - "loss": 0.1607, - "step": 4825 - }, - { - "epoch": 0.22275295090249392, - "grad_norm": 0.5857437252998352, - "learning_rate": 3.529943275225258e-05, - "loss": 0.0937, - "step": 4850 - }, - { - "epoch": 0.22390116198962018, - "grad_norm": 0.026839064434170723, - "learning_rate": 3.525286781603023e-05, - "loss": 0.1423, - "step": 4875 - }, - { - "epoch": 0.22504937307674644, - "grad_norm": 0.08431849628686905, - "learning_rate": 3.5206104410089584e-05, - "loss": 0.0854, - "step": 4900 - }, - { - "epoch": 0.22619758416387267, - "grad_norm": 0.15257743000984192, - "learning_rate": 3.5159143142914236e-05, - "loss": 0.0698, - "step": 4925 - }, - { - "epoch": 0.22734579525099893, - "grad_norm": 26.322877883911133, - "learning_rate": 3.511198462556236e-05, - "loss": 0.1412, - "step": 4950 - }, - { - "epoch": 0.2284940063381252, - "grad_norm": 0.6526215076446533, - "learning_rate": 3.506462947165874e-05, - "loss": 0.1362, - "step": 4975 - }, - { - "epoch": 0.22964221742525145, - "grad_norm": 0.3663279712200165, - "learning_rate": 3.5017078297386776e-05, - "loss": 0.1357, - "step": 5000 - }, - { - "epoch": 0.23079042851237772, - "grad_norm": 1.1023207902908325, - "learning_rate": 3.4969331721480495e-05, - "loss": 0.0967, - "step": 5025 - }, - { - "epoch": 0.23193863959950398, - "grad_norm": 64.01104736328125, - "learning_rate": 3.492139036521646e-05, - "loss": 0.0745, - "step": 5050 - }, - { - "epoch": 0.23308685068663024, - "grad_norm": 12.65407943725586, - "learning_rate": 3.487325485240573e-05, - "loss": 0.0891, - "step": 5075 - }, - { - "epoch": 0.2342350617737565, - "grad_norm": 0.8625269532203674, - "learning_rate": 3.48249258093857e-05, - "loss": 0.1101, - "step": 5100 - }, - { - "epoch": 0.23538327286088276, - "grad_norm": 0.026748131960630417, - "learning_rate": 3.477640386501199e-05, - "loss": 0.0314, - "step": 5125 - }, - { - "epoch": 0.236531483948009, - "grad_norm": 0.11494199931621552, - "learning_rate": 3.472768965065024e-05, - "loss": 0.1041, - "step": 5150 - }, - { - "epoch": 0.23767969503513525, - "grad_norm": 0.3611622154712677, - "learning_rate": 3.46787838001679e-05, - "loss": 0.1058, - "step": 5175 - }, - { - "epoch": 0.2388279061222615, - "grad_norm": 0.06698184460401535, - "learning_rate": 3.4629686949925976e-05, - "loss": 0.0987, - "step": 5200 - }, - { - "epoch": 0.23997611720938777, - "grad_norm": 0.0825871005654335, - "learning_rate": 3.458039973877076e-05, - "loss": 0.0437, - "step": 5225 - }, - { - "epoch": 0.24112432829651403, - "grad_norm": 8.870837211608887, - "learning_rate": 3.453092280802551e-05, - "loss": 0.0329, - "step": 5250 - }, - { - "epoch": 0.2422725393836403, - "grad_norm": 0.07859117537736893, - "learning_rate": 3.448125680148212e-05, - "loss": 0.1058, - "step": 5275 - }, - { - "epoch": 0.24342075047076656, - "grad_norm": 0.0069983587600290775, - "learning_rate": 3.44314023653927e-05, - "loss": 0.091, - "step": 5300 - }, - { - "epoch": 0.24456896155789282, - "grad_norm": 78.82186126708984, - "learning_rate": 3.438136014846124e-05, - "loss": 0.0884, - "step": 5325 - }, - { - "epoch": 0.24571717264501905, - "grad_norm": 0.035996224731206894, - "learning_rate": 3.43311308018351e-05, - "loss": 0.1089, - "step": 5350 - }, - { - "epoch": 0.2468653837321453, - "grad_norm": 0.06088200584053993, - "learning_rate": 3.428071497909657e-05, - "loss": 0.0349, - "step": 5375 - }, - { - "epoch": 0.24801359481927157, - "grad_norm": 0.19518066942691803, - "learning_rate": 3.423011333625435e-05, - "loss": 0.1557, - "step": 5400 - }, - { - "epoch": 0.24916180590639783, - "grad_norm": 0.14072376489639282, - "learning_rate": 3.417932653173505e-05, - "loss": 0.1763, - "step": 5425 - }, - { - "epoch": 0.25031001699352406, - "grad_norm": 0.13947640359401703, - "learning_rate": 3.412835522637456e-05, - "loss": 0.044, - "step": 5450 - }, - { - "epoch": 0.2514582280806503, - "grad_norm": 0.019208233803510666, - "learning_rate": 3.407720008340952e-05, - "loss": 0.0769, - "step": 5475 - }, - { - "epoch": 0.2526064391677766, - "grad_norm": 1.0778671503067017, - "learning_rate": 3.402586176846866e-05, - "loss": 0.0479, - "step": 5500 - }, - { - "epoch": 0.25375465025490285, - "grad_norm": 53.735904693603516, - "learning_rate": 3.397434094956409e-05, - "loss": 0.1, - "step": 5525 - }, - { - "epoch": 0.2549028613420291, - "grad_norm": 0.013616573065519333, - "learning_rate": 3.3922638297082715e-05, - "loss": 0.0678, - "step": 5550 - }, - { - "epoch": 0.25605107242915537, - "grad_norm": 0.19283778965473175, - "learning_rate": 3.387075448377741e-05, - "loss": 0.0992, - "step": 5575 - }, - { - "epoch": 0.25719928351628163, - "grad_norm": 0.1097509115934372, - "learning_rate": 3.381869018475832e-05, - "loss": 0.0624, - "step": 5600 - }, - { - "epoch": 0.2583474946034079, - "grad_norm": 3.6526105403900146, - "learning_rate": 3.376644607748406e-05, - "loss": 0.1127, - "step": 5625 - }, - { - "epoch": 0.25949570569053415, - "grad_norm": 11.323371887207031, - "learning_rate": 3.371402284175292e-05, - "loss": 0.125, - "step": 5650 - }, - { - "epoch": 0.2606439167776604, - "grad_norm": 0.08170542865991592, - "learning_rate": 3.366142115969398e-05, - "loss": 0.0569, - "step": 5675 - }, - { - "epoch": 0.2617921278647867, - "grad_norm": 0.30748364329338074, - "learning_rate": 3.3608641715758264e-05, - "loss": 0.0745, - "step": 5700 - }, - { - "epoch": 0.26294033895191293, - "grad_norm": 12.165470123291016, - "learning_rate": 3.3555685196709836e-05, - "loss": 0.1015, - "step": 5725 - }, - { - "epoch": 0.2640885500390392, - "grad_norm": 13.167491912841797, - "learning_rate": 3.350255229161684e-05, - "loss": 0.0937, - "step": 5750 - }, - { - "epoch": 0.26523676112616545, - "grad_norm": 0.2379087507724762, - "learning_rate": 3.3449243691842555e-05, - "loss": 0.1279, - "step": 5775 - }, - { - "epoch": 0.2663849722132917, - "grad_norm": 0.28682318329811096, - "learning_rate": 3.33957600910364e-05, - "loss": 0.0699, - "step": 5800 - }, - { - "epoch": 0.267533183300418, - "grad_norm": 10.061598777770996, - "learning_rate": 3.334210218512488e-05, - "loss": 0.0877, - "step": 5825 - }, - { - "epoch": 0.2686813943875442, - "grad_norm": 0.11539531499147415, - "learning_rate": 3.3288270672302575e-05, - "loss": 0.1336, - "step": 5850 - }, - { - "epoch": 0.26982960547467044, - "grad_norm": 0.02353590354323387, - "learning_rate": 3.3234266253023014e-05, - "loss": 0.1055, - "step": 5875 - }, - { - "epoch": 0.2709778165617967, - "grad_norm": 159.0912322998047, - "learning_rate": 3.3180089629989585e-05, - "loss": 0.1191, - "step": 5900 - }, - { - "epoch": 0.27212602764892296, - "grad_norm": 0.15841203927993774, - "learning_rate": 3.312574150814639e-05, - "loss": 0.1149, - "step": 5925 - }, - { - "epoch": 0.2732742387360492, - "grad_norm": 0.10681680589914322, - "learning_rate": 3.3071222594669045e-05, - "loss": 0.1223, - "step": 5950 - }, - { - "epoch": 0.2744224498231755, - "grad_norm": 0.1897619068622589, - "learning_rate": 3.301653359895554e-05, - "loss": 0.0769, - "step": 5975 - }, - { - "epoch": 0.27557066091030175, - "grad_norm": 0.33233270049095154, - "learning_rate": 3.296167523261692e-05, - "loss": 0.0216, - "step": 6000 - }, - { - "epoch": 0.276718871997428, - "grad_norm": 0.11045894026756287, - "learning_rate": 3.2906648209468116e-05, - "loss": 0.0968, - "step": 6025 - }, - { - "epoch": 0.27786708308455427, - "grad_norm": 0.17217367887496948, - "learning_rate": 3.2851453245518585e-05, - "loss": 0.0703, - "step": 6050 - }, - { - "epoch": 0.27901529417168053, - "grad_norm": 0.09274031966924667, - "learning_rate": 3.279609105896304e-05, - "loss": 0.0921, - "step": 6075 - }, - { - "epoch": 0.2801635052588068, - "grad_norm": 0.11065138876438141, - "learning_rate": 3.274056237017209e-05, - "loss": 0.0489, - "step": 6100 - }, - { - "epoch": 0.28131171634593305, - "grad_norm": 0.32041868567466736, - "learning_rate": 3.268486790168285e-05, - "loss": 0.1396, - "step": 6125 - }, - { - "epoch": 0.2824599274330593, - "grad_norm": 190.3155975341797, - "learning_rate": 3.262900837818955e-05, - "loss": 0.0857, - "step": 6150 - }, - { - "epoch": 0.28360813852018557, - "grad_norm": 0.05174362286925316, - "learning_rate": 3.257298452653414e-05, - "loss": 0.099, - "step": 6175 - }, - { - "epoch": 0.28475634960731183, - "grad_norm": 0.04005354642868042, - "learning_rate": 3.251679707569677e-05, - "loss": 0.0774, - "step": 6200 - }, - { - "epoch": 0.2859045606944381, - "grad_norm": 2.6924703121185303, - "learning_rate": 3.246044675678636e-05, - "loss": 0.1248, - "step": 6225 - }, - { - "epoch": 0.2870527717815643, - "grad_norm": 1.217990756034851, - "learning_rate": 3.240393430303105e-05, - "loss": 0.1095, - "step": 6250 - }, - { - "epoch": 0.28820098286869056, - "grad_norm": 10.461615562438965, - "learning_rate": 3.234726044976865e-05, - "loss": 0.0696, - "step": 6275 - }, - { - "epoch": 0.2893491939558168, - "grad_norm": 0.1737290918827057, - "learning_rate": 3.2290425934437146e-05, - "loss": 0.0865, - "step": 6300 - }, - { - "epoch": 0.2904974050429431, - "grad_norm": 0.6627403497695923, - "learning_rate": 3.2233431496565015e-05, - "loss": 0.0659, - "step": 6325 - }, - { - "epoch": 0.29164561613006934, - "grad_norm": 20.512529373168945, - "learning_rate": 3.2176277877761645e-05, - "loss": 0.1103, - "step": 6350 - }, - { - "epoch": 0.2927938272171956, - "grad_norm": 1.0080277919769287, - "learning_rate": 3.211896582170769e-05, - "loss": 0.1816, - "step": 6375 - }, - { - "epoch": 0.29394203830432186, - "grad_norm": 11.729516983032227, - "learning_rate": 3.2061496074145375e-05, - "loss": 0.0864, - "step": 6400 - }, - { - "epoch": 0.2950902493914481, - "grad_norm": 141.841064453125, - "learning_rate": 3.20038693828688e-05, - "loss": 0.1042, - "step": 6425 - }, - { - "epoch": 0.2962384604785744, - "grad_norm": 0.052077438682317734, - "learning_rate": 3.194608649771421e-05, - "loss": 0.0248, - "step": 6450 - }, - { - "epoch": 0.29738667156570064, - "grad_norm": 0.021261312067508698, - "learning_rate": 3.188814817055026e-05, - "loss": 0.0778, - "step": 6475 - }, - { - "epoch": 0.2985348826528269, - "grad_norm": 0.026634275913238525, - "learning_rate": 3.183005515526818e-05, - "loss": 0.0372, - "step": 6500 - }, - { - "epoch": 0.29968309373995317, - "grad_norm": 0.02382843941450119, - "learning_rate": 3.177180820777201e-05, - "loss": 0.0579, - "step": 6525 - }, - { - "epoch": 0.3008313048270794, - "grad_norm": 0.18128037452697754, - "learning_rate": 3.171340808596875e-05, - "loss": 0.1122, - "step": 6550 - }, - { - "epoch": 0.3019795159142057, - "grad_norm": 0.0806470662355423, - "learning_rate": 3.165485554975849e-05, - "loss": 0.1145, - "step": 6575 - }, - { - "epoch": 0.30312772700133195, - "grad_norm": 0.20076870918273926, - "learning_rate": 3.1596151361024545e-05, - "loss": 0.087, - "step": 6600 - }, - { - "epoch": 0.3042759380884582, - "grad_norm": 1.3655481338500977, - "learning_rate": 3.153729628362351e-05, - "loss": 0.1082, - "step": 6625 - }, - { - "epoch": 0.3054241491755844, - "grad_norm": 0.5538046956062317, - "learning_rate": 3.147829108337536e-05, - "loss": 0.0857, - "step": 6650 - }, - { - "epoch": 0.3065723602627107, - "grad_norm": 13.633685111999512, - "learning_rate": 3.141913652805343e-05, - "loss": 0.0875, - "step": 6675 - }, - { - "epoch": 0.30772057134983694, - "grad_norm": 0.4091302156448364, - "learning_rate": 3.135983338737449e-05, - "loss": 0.0719, - "step": 6700 - }, - { - "epoch": 0.3088687824369632, - "grad_norm": 0.15042781829833984, - "learning_rate": 3.130038243298867e-05, - "loss": 0.0981, - "step": 6725 - }, - { - "epoch": 0.31001699352408946, - "grad_norm": 0.1054098829627037, - "learning_rate": 3.124078443846947e-05, - "loss": 0.0626, - "step": 6750 - }, - { - "epoch": 0.3111652046112157, - "grad_norm": 0.033151715993881226, - "learning_rate": 3.118104017930365e-05, - "loss": 0.059, - "step": 6775 - }, - { - "epoch": 0.312313415698342, - "grad_norm": 0.18374769389629364, - "learning_rate": 3.1121150432881174e-05, - "loss": 0.0972, - "step": 6800 - }, - { - "epoch": 0.31346162678546824, - "grad_norm": 0.016897082328796387, - "learning_rate": 3.106111597848508e-05, - "loss": 0.0453, - "step": 6825 - }, - { - "epoch": 0.3146098378725945, - "grad_norm": 0.054736267775297165, - "learning_rate": 3.100093759728133e-05, - "loss": 0.0442, - "step": 6850 - }, - { - "epoch": 0.31575804895972076, - "grad_norm": 15.296791076660156, - "learning_rate": 3.0940616072308665e-05, - "loss": 0.13, - "step": 6875 - }, - { - "epoch": 0.316906260046847, - "grad_norm": 0.01988917961716652, - "learning_rate": 3.088015218846841e-05, - "loss": 0.036, - "step": 6900 - }, - { - "epoch": 0.3180544711339733, - "grad_norm": 0.025909580290317535, - "learning_rate": 3.081954673251423e-05, - "loss": 0.1197, - "step": 6925 - }, - { - "epoch": 0.31920268222109954, - "grad_norm": 0.13559691607952118, - "learning_rate": 3.075880049304196e-05, - "loss": 0.0492, - "step": 6950 - }, - { - "epoch": 0.3203508933082258, - "grad_norm": 0.3738120198249817, - "learning_rate": 3.069791426047929e-05, - "loss": 0.1177, - "step": 6975 - }, - { - "epoch": 0.32149910439535206, - "grad_norm": 0.032735977321863174, - "learning_rate": 3.063688882707549e-05, - "loss": 0.1113, - "step": 7000 - }, - { - "epoch": 0.3226473154824783, - "grad_norm": 16.048425674438477, - "learning_rate": 3.0575724986891096e-05, - "loss": 0.0528, - "step": 7025 - }, - { - "epoch": 0.32379552656960453, - "grad_norm": 11.062355041503906, - "learning_rate": 3.0514423535787618e-05, - "loss": 0.1168, - "step": 7050 - }, - { - "epoch": 0.3249437376567308, - "grad_norm": 0.09487222135066986, - "learning_rate": 3.0452985271417116e-05, - "loss": 0.0712, - "step": 7075 - }, - { - "epoch": 0.32609194874385705, - "grad_norm": 0.012298893183469772, - "learning_rate": 3.0391410993211897e-05, - "loss": 0.071, - "step": 7100 - }, - { - "epoch": 0.3272401598309833, - "grad_norm": 0.11313242465257645, - "learning_rate": 3.0329701502374046e-05, - "loss": 0.0509, - "step": 7125 - }, - { - "epoch": 0.3283883709181096, - "grad_norm": 0.025708986446261406, - "learning_rate": 3.0267857601865042e-05, - "loss": 0.0877, - "step": 7150 - }, - { - "epoch": 0.32953658200523583, - "grad_norm": 0.06797124445438385, - "learning_rate": 3.0205880096395294e-05, - "loss": 0.0589, - "step": 7175 - }, - { - "epoch": 0.3306847930923621, - "grad_norm": 0.6260665059089661, - "learning_rate": 3.0143769792413667e-05, - "loss": 0.1141, - "step": 7200 - }, - { - "epoch": 0.33183300417948836, - "grad_norm": 0.044946711510419846, - "learning_rate": 3.008152749809702e-05, - "loss": 0.0956, - "step": 7225 - }, - { - "epoch": 0.3329812152666146, - "grad_norm": 0.12308789044618607, - "learning_rate": 3.0019154023339633e-05, - "loss": 0.0621, - "step": 7250 - }, - { - "epoch": 0.3341294263537409, - "grad_norm": 0.25819188356399536, - "learning_rate": 2.9956650179742723e-05, - "loss": 0.0631, - "step": 7275 - }, - { - "epoch": 0.33527763744086714, - "grad_norm": 9.435999870300293, - "learning_rate": 2.9894016780603845e-05, - "loss": 0.1329, - "step": 7300 - }, - { - "epoch": 0.3364258485279934, - "grad_norm": 0.24013052880764008, - "learning_rate": 2.9831254640906346e-05, - "loss": 0.121, - "step": 7325 - }, - { - "epoch": 0.33757405961511966, - "grad_norm": 20.42167854309082, - "learning_rate": 2.9768364577308718e-05, - "loss": 0.0869, - "step": 7350 - }, - { - "epoch": 0.3387222707022459, - "grad_norm": 0.2154628038406372, - "learning_rate": 2.970534740813401e-05, - "loss": 0.1218, - "step": 7375 - }, - { - "epoch": 0.3398704817893722, - "grad_norm": 0.3771252930164337, - "learning_rate": 2.9642203953359154e-05, - "loss": 0.1596, - "step": 7400 - }, - { - "epoch": 0.34101869287649844, - "grad_norm": 0.2281711995601654, - "learning_rate": 2.957893503460431e-05, - "loss": 0.1132, - "step": 7425 - }, - { - "epoch": 0.34216690396362465, - "grad_norm": 0.0789218544960022, - "learning_rate": 2.9515541475122177e-05, - "loss": 0.0721, - "step": 7450 - }, - { - "epoch": 0.3433151150507509, - "grad_norm": 0.15669232606887817, - "learning_rate": 2.945202409978725e-05, - "loss": 0.1045, - "step": 7475 - }, - { - "epoch": 0.34446332613787717, - "grad_norm": 0.27901673316955566, - "learning_rate": 2.938838373508514e-05, - "loss": 0.081, - "step": 7500 - }, - { - "epoch": 0.34561153722500343, - "grad_norm": 0.14721404016017914, - "learning_rate": 2.9324621209101777e-05, - "loss": 0.0968, - "step": 7525 - }, - { - "epoch": 0.3467597483121297, - "grad_norm": 0.5337648987770081, - "learning_rate": 2.9260737351512653e-05, - "loss": 0.1014, - "step": 7550 - }, - { - "epoch": 0.34790795939925595, - "grad_norm": 0.0496150366961956, - "learning_rate": 2.9196732993572014e-05, - "loss": 0.0825, - "step": 7575 - }, - { - "epoch": 0.3490561704863822, - "grad_norm": 0.1201993003487587, - "learning_rate": 2.913260896810206e-05, - "loss": 0.0834, - "step": 7600 - }, - { - "epoch": 0.35020438157350847, - "grad_norm": 0.041360825300216675, - "learning_rate": 2.9068366109482096e-05, - "loss": 0.1034, - "step": 7625 - }, - { - "epoch": 0.35135259266063473, - "grad_norm": 0.031238993629813194, - "learning_rate": 2.9004005253637683e-05, - "loss": 0.0454, - "step": 7650 - }, - { - "epoch": 0.352500803747761, - "grad_norm": 0.20502029359340668, - "learning_rate": 2.8939527238029757e-05, - "loss": 0.0591, - "step": 7675 - }, - { - "epoch": 0.35364901483488725, - "grad_norm": 0.028439056128263474, - "learning_rate": 2.8874932901643724e-05, - "loss": 0.1009, - "step": 7700 - }, - { - "epoch": 0.3547972259220135, - "grad_norm": 0.04592956230044365, - "learning_rate": 2.881022308497856e-05, - "loss": 0.0688, - "step": 7725 - }, - { - "epoch": 0.3559454370091398, - "grad_norm": 0.26893487572669983, - "learning_rate": 2.874539863003587e-05, - "loss": 0.1086, - "step": 7750 - }, - { - "epoch": 0.35709364809626604, - "grad_norm": 0.029499026015400887, - "learning_rate": 2.868046038030891e-05, - "loss": 0.0941, - "step": 7775 - }, - { - "epoch": 0.3582418591833923, - "grad_norm": 0.42461779713630676, - "learning_rate": 2.8615409180771652e-05, - "loss": 0.0788, - "step": 7800 - }, - { - "epoch": 0.35939007027051856, - "grad_norm": 0.015663934871554375, - "learning_rate": 2.8550245877867745e-05, - "loss": 0.0633, - "step": 7825 - }, - { - "epoch": 0.36053828135764476, - "grad_norm": 25.207908630371094, - "learning_rate": 2.8484971319499547e-05, - "loss": 0.0354, - "step": 7850 - }, - { - "epoch": 0.361686492444771, - "grad_norm": 0.037977807223796844, - "learning_rate": 2.8419586355017034e-05, - "loss": 0.0868, - "step": 7875 - }, - { - "epoch": 0.3628347035318973, - "grad_norm": 0.06867849826812744, - "learning_rate": 2.8354091835206818e-05, - "loss": 0.0357, - "step": 7900 - }, - { - "epoch": 0.36398291461902355, - "grad_norm": 0.119336798787117, - "learning_rate": 2.828848861228102e-05, - "loss": 0.0761, - "step": 7925 - }, - { - "epoch": 0.3651311257061498, - "grad_norm": 20.873859405517578, - "learning_rate": 2.8222777539866197e-05, - "loss": 0.1192, - "step": 7950 - }, - { - "epoch": 0.36627933679327607, - "grad_norm": 8.662124633789062, - "learning_rate": 2.8156959472992264e-05, - "loss": 0.054, - "step": 7975 - }, - { - "epoch": 0.3674275478804023, - "grad_norm": 0.2797779440879822, - "learning_rate": 2.809103526808131e-05, - "loss": 0.0914, - "step": 8000 - }, - { - "epoch": 0.3685757589675286, - "grad_norm": 0.22510646283626556, - "learning_rate": 2.8025005782936525e-05, - "loss": 0.1265, - "step": 8025 - }, - { - "epoch": 0.36972397005465485, - "grad_norm": 0.7231925129890442, - "learning_rate": 2.7958871876730964e-05, - "loss": 0.1283, - "step": 8050 - }, - { - "epoch": 0.3708721811417811, - "grad_norm": 0.06375906616449356, - "learning_rate": 2.7892634409996433e-05, - "loss": 0.0671, - "step": 8075 - }, - { - "epoch": 0.37202039222890737, - "grad_norm": 0.02470921352505684, - "learning_rate": 2.7826294244612255e-05, - "loss": 0.0949, - "step": 8100 - }, - { - "epoch": 0.37316860331603363, - "grad_norm": 8.095888137817383, - "learning_rate": 2.775985224379406e-05, - "loss": 0.1367, - "step": 8125 - }, - { - "epoch": 0.3743168144031599, - "grad_norm": 10.58785343170166, - "learning_rate": 2.7693309272082554e-05, - "loss": 0.1, - "step": 8150 - }, - { - "epoch": 0.37546502549028615, - "grad_norm": 0.03961142152547836, - "learning_rate": 2.762666619533228e-05, - "loss": 0.0529, - "step": 8175 - }, - { - "epoch": 0.3766132365774124, - "grad_norm": 0.20349906384944916, - "learning_rate": 2.7559923880700345e-05, - "loss": 0.1317, - "step": 8200 - }, - { - "epoch": 0.3777614476645387, - "grad_norm": 0.05352284759283066, - "learning_rate": 2.7493083196635127e-05, - "loss": 0.173, - "step": 8225 - }, - { - "epoch": 0.3789096587516649, - "grad_norm": 47.993160247802734, - "learning_rate": 2.7426145012865e-05, - "loss": 0.0912, - "step": 8250 - }, - { - "epoch": 0.38005786983879114, - "grad_norm": 0.7617914080619812, - "learning_rate": 2.7359110200386966e-05, - "loss": 0.057, - "step": 8275 - }, - { - "epoch": 0.3812060809259174, - "grad_norm": 16.62209701538086, - "learning_rate": 2.7291979631455393e-05, - "loss": 0.1096, - "step": 8300 - }, - { - "epoch": 0.38235429201304366, - "grad_norm": 0.02364448457956314, - "learning_rate": 2.722475417957061e-05, - "loss": 0.1241, - "step": 8325 - }, - { - "epoch": 0.3835025031001699, - "grad_norm": 78.79348754882812, - "learning_rate": 2.7157434719467558e-05, - "loss": 0.1111, - "step": 8350 - }, - { - "epoch": 0.3846507141872962, - "grad_norm": 29.081613540649414, - "learning_rate": 2.7090022127104426e-05, - "loss": 0.0971, - "step": 8375 - }, - { - "epoch": 0.38579892527442244, - "grad_norm": 1.15839684009552, - "learning_rate": 2.7022517279651208e-05, - "loss": 0.0933, - "step": 8400 - }, - { - "epoch": 0.3869471363615487, - "grad_norm": 0.3460037410259247, - "learning_rate": 2.695492105547835e-05, - "loss": 0.1333, - "step": 8425 - }, - { - "epoch": 0.38809534744867497, - "grad_norm": 0.03244093060493469, - "learning_rate": 2.6887234334145257e-05, - "loss": 0.07, - "step": 8450 - }, - { - "epoch": 0.3892435585358012, - "grad_norm": 0.11166483163833618, - "learning_rate": 2.6819457996388907e-05, - "loss": 0.1051, - "step": 8475 - }, - { - "epoch": 0.3903917696229275, - "grad_norm": 0.8538084626197815, - "learning_rate": 2.6751592924112347e-05, - "loss": 0.1099, - "step": 8500 - }, - { - "epoch": 0.39153998071005375, - "grad_norm": 1.3156309127807617, - "learning_rate": 2.6683640000373232e-05, - "loss": 0.1429, - "step": 8525 - }, - { - "epoch": 0.39268819179718, - "grad_norm": 0.0183534175157547, - "learning_rate": 2.661560010937235e-05, - "loss": 0.1151, - "step": 8550 - }, - { - "epoch": 0.39383640288430627, - "grad_norm": 10.313261032104492, - "learning_rate": 2.6547474136442088e-05, - "loss": 0.108, - "step": 8575 - }, - { - "epoch": 0.39498461397143253, - "grad_norm": 0.022917896509170532, - "learning_rate": 2.647926296803495e-05, - "loss": 0.0776, - "step": 8600 - }, - { - "epoch": 0.3961328250585588, - "grad_norm": 0.6006048917770386, - "learning_rate": 2.6410967491711975e-05, - "loss": 0.04, - "step": 8625 - }, - { - "epoch": 0.397281036145685, - "grad_norm": 0.8469564914703369, - "learning_rate": 2.6342588596131225e-05, - "loss": 0.0361, - "step": 8650 - }, - { - "epoch": 0.39842924723281126, - "grad_norm": 0.8163219094276428, - "learning_rate": 2.6274127171036217e-05, - "loss": 0.0943, - "step": 8675 - }, - { - "epoch": 0.3995774583199375, - "grad_norm": 10.35794734954834, - "learning_rate": 2.6205584107244324e-05, - "loss": 0.0826, - "step": 8700 - }, - { - "epoch": 0.4007256694070638, - "grad_norm": 137.8352813720703, - "learning_rate": 2.613696029663521e-05, - "loss": 0.1485, - "step": 8725 - }, - { - "epoch": 0.40187388049419004, - "grad_norm": 0.1350816935300827, - "learning_rate": 2.6068256632139203e-05, - "loss": 0.0875, - "step": 8750 - }, - { - "epoch": 0.4030220915813163, - "grad_norm": 11.143074035644531, - "learning_rate": 2.5999474007725702e-05, - "loss": 0.1005, - "step": 8775 - }, - { - "epoch": 0.40417030266844256, - "grad_norm": 0.28397971391677856, - "learning_rate": 2.5930613318391517e-05, - "loss": 0.0564, - "step": 8800 - }, - { - "epoch": 0.4053185137555688, - "grad_norm": 0.023485900834202766, - "learning_rate": 2.5861675460149244e-05, - "loss": 0.0957, - "step": 8825 - }, - { - "epoch": 0.4064667248426951, - "grad_norm": 0.2290368527173996, - "learning_rate": 2.579266133001558e-05, - "loss": 0.0567, - "step": 8850 - }, - { - "epoch": 0.40761493592982134, - "grad_norm": 0.20042946934700012, - "learning_rate": 2.5723571825999692e-05, - "loss": 0.0697, - "step": 8875 - }, - { - "epoch": 0.4087631470169476, - "grad_norm": 24.241487503051758, - "learning_rate": 2.56544078470915e-05, - "loss": 0.098, - "step": 8900 - }, - { - "epoch": 0.40991135810407386, - "grad_norm": 31.435606002807617, - "learning_rate": 2.558517029324998e-05, - "loss": 0.0716, - "step": 8925 - }, - { - "epoch": 0.4110595691912001, - "grad_norm": 0.030203837901353836, - "learning_rate": 2.5515860065391477e-05, - "loss": 0.0695, - "step": 8950 - }, - { - "epoch": 0.4122077802783264, - "grad_norm": 0.09278186410665512, - "learning_rate": 2.5446478065377948e-05, - "loss": 0.1542, - "step": 8975 - }, - { - "epoch": 0.41335599136545265, - "grad_norm": 0.015812937170267105, - "learning_rate": 2.5377025196005277e-05, - "loss": 0.1059, - "step": 9000 - }, - { - "epoch": 0.4145042024525789, - "grad_norm": 120.72908020019531, - "learning_rate": 2.530750236099146e-05, - "loss": 0.0975, - "step": 9025 - }, - { - "epoch": 0.4156524135397051, - "grad_norm": 12.126846313476562, - "learning_rate": 2.5237910464964915e-05, - "loss": 0.0327, - "step": 9050 - }, - { - "epoch": 0.4168006246268314, - "grad_norm": 0.022481214255094528, - "learning_rate": 2.516825041345266e-05, - "loss": 0.0526, - "step": 9075 - }, - { - "epoch": 0.41794883571395763, - "grad_norm": 22.173906326293945, - "learning_rate": 2.5098523112868553e-05, - "loss": 0.0843, - "step": 9100 - }, - { - "epoch": 0.4190970468010839, - "grad_norm": 14.182297706604004, - "learning_rate": 2.5028729470501495e-05, - "loss": 0.0936, - "step": 9125 - }, - { - "epoch": 0.42024525788821016, - "grad_norm": 20.062166213989258, - "learning_rate": 2.4958870394503637e-05, - "loss": 0.0503, - "step": 9150 - }, - { - "epoch": 0.4213934689753364, - "grad_norm": 1.9899086952209473, - "learning_rate": 2.488894679387853e-05, - "loss": 0.1136, - "step": 9175 - }, - { - "epoch": 0.4225416800624627, - "grad_norm": 0.30002573132514954, - "learning_rate": 2.4818959578469325e-05, - "loss": 0.0558, - "step": 9200 - }, - { - "epoch": 0.42368989114958894, - "grad_norm": 0.0924188494682312, - "learning_rate": 2.474890965894693e-05, - "loss": 0.0939, - "step": 9225 - }, - { - "epoch": 0.4248381022367152, - "grad_norm": 0.11107456684112549, - "learning_rate": 2.467879794679815e-05, - "loss": 0.1044, - "step": 9250 - }, - { - "epoch": 0.42598631332384146, - "grad_norm": 0.28399938344955444, - "learning_rate": 2.4608625354313836e-05, - "loss": 0.0749, - "step": 9275 - }, - { - "epoch": 0.4271345244109677, - "grad_norm": 0.035467736423015594, - "learning_rate": 2.4538392794577014e-05, - "loss": 0.0737, - "step": 9300 - }, - { - "epoch": 0.428282735498094, - "grad_norm": 112.69757080078125, - "learning_rate": 2.4468101181450995e-05, - "loss": 0.1343, - "step": 9325 - }, - { - "epoch": 0.42943094658522024, - "grad_norm": 224.90357971191406, - "learning_rate": 2.43977514295675e-05, - "loss": 0.0376, - "step": 9350 - }, - { - "epoch": 0.4305791576723465, - "grad_norm": 0.23775708675384521, - "learning_rate": 2.4327344454314738e-05, - "loss": 0.0685, - "step": 9375 - }, - { - "epoch": 0.43172736875947276, - "grad_norm": 0.705800473690033, - "learning_rate": 2.4256881171825512e-05, - "loss": 0.0655, - "step": 9400 - }, - { - "epoch": 0.432875579846599, - "grad_norm": 0.2821938991546631, - "learning_rate": 2.4186362498965295e-05, - "loss": 0.1061, - "step": 9425 - }, - { - "epoch": 0.43402379093372523, - "grad_norm": 0.0656893253326416, - "learning_rate": 2.4115789353320302e-05, - "loss": 0.0865, - "step": 9450 - }, - { - "epoch": 0.4351720020208515, - "grad_norm": 2.238302707672119, - "learning_rate": 2.4045162653185528e-05, - "loss": 0.127, - "step": 9475 - }, - { - "epoch": 0.43632021310797775, - "grad_norm": 0.44611087441444397, - "learning_rate": 2.3974483317552824e-05, - "loss": 0.037, - "step": 9500 - }, - { - "epoch": 0.437468424195104, - "grad_norm": 0.26402464509010315, - "learning_rate": 2.3903752266098946e-05, - "loss": 0.0218, - "step": 9525 - }, - { - "epoch": 0.43861663528223027, - "grad_norm": 0.018698792904615402, - "learning_rate": 2.3832970419173558e-05, - "loss": 0.0601, - "step": 9550 - }, - { - "epoch": 0.43976484636935653, - "grad_norm": 44.78874206542969, - "learning_rate": 2.376213869778728e-05, - "loss": 0.1243, - "step": 9575 - }, - { - "epoch": 0.4409130574564828, - "grad_norm": 0.03857385739684105, - "learning_rate": 2.3691258023599706e-05, - "loss": 0.058, - "step": 9600 - }, - { - "epoch": 0.44206126854360905, - "grad_norm": 0.03389425948262215, - "learning_rate": 2.3620329318907363e-05, - "loss": 0.0755, - "step": 9625 - }, - { - "epoch": 0.4432094796307353, - "grad_norm": 15.255989074707031, - "learning_rate": 2.3549353506631805e-05, - "loss": 0.1009, - "step": 9650 - }, - { - "epoch": 0.4443576907178616, - "grad_norm": 0.014160329475998878, - "learning_rate": 2.3478331510307508e-05, - "loss": 0.1342, - "step": 9675 - }, - { - "epoch": 0.44550590180498784, - "grad_norm": 14.596662521362305, - "learning_rate": 2.3407264254069908e-05, - "loss": 0.0558, - "step": 9700 - }, - { - "epoch": 0.4466541128921141, - "grad_norm": 1.6787420511245728, - "learning_rate": 2.333615266264335e-05, - "loss": 0.0342, - "step": 9725 - }, - { - "epoch": 0.44780232397924036, - "grad_norm": 0.008284349925816059, - "learning_rate": 2.3264997661329085e-05, - "loss": 0.1212, - "step": 9750 - }, - { - "epoch": 0.4489505350663666, - "grad_norm": 0.06463667005300522, - "learning_rate": 2.3193800175993197e-05, - "loss": 0.119, - "step": 9775 - }, - { - "epoch": 0.4500987461534929, - "grad_norm": 0.08876292407512665, - "learning_rate": 2.3122561133054572e-05, - "loss": 0.0863, - "step": 9800 - }, - { - "epoch": 0.45124695724061914, - "grad_norm": 21.290327072143555, - "learning_rate": 2.3051281459472855e-05, - "loss": 0.0651, - "step": 9825 - }, - { - "epoch": 0.45239516832774535, - "grad_norm": 19.711048126220703, - "learning_rate": 2.2979962082736362e-05, - "loss": 0.1419, - "step": 9850 - }, - { - "epoch": 0.4535433794148716, - "grad_norm": 11.61158561706543, - "learning_rate": 2.290860393085002e-05, - "loss": 0.0398, - "step": 9875 - }, - { - "epoch": 0.45469159050199787, - "grad_norm": 0.06531768292188644, - "learning_rate": 2.2837207932323308e-05, - "loss": 0.1047, - "step": 9900 - }, - { - "epoch": 0.4558398015891241, - "grad_norm": 0.004605613183230162, - "learning_rate": 2.2765775016158173e-05, - "loss": 0.0674, - "step": 9925 - }, - { - "epoch": 0.4569880126762504, - "grad_norm": 0.27768808603286743, - "learning_rate": 2.2694306111836905e-05, - "loss": 0.1106, - "step": 9950 - }, - { - "epoch": 0.45813622376337665, - "grad_norm": 0.04515165835618973, - "learning_rate": 2.262280214931009e-05, - "loss": 0.1219, - "step": 9975 - }, - { - "epoch": 0.4592844348505029, - "grad_norm": 7.108341217041016, - "learning_rate": 2.2551264058984498e-05, - "loss": 0.1299, - "step": 10000 - }, - { - "epoch": 0.46043264593762917, - "grad_norm": 0.2922165095806122, - "learning_rate": 2.247969277171094e-05, - "loss": 0.0849, - "step": 10025 - }, - { - "epoch": 0.46158085702475543, - "grad_norm": 0.11757965385913849, - "learning_rate": 2.2408089218772215e-05, - "loss": 0.1056, - "step": 10050 - }, - { - "epoch": 0.4627290681118817, - "grad_norm": 13.596343994140625, - "learning_rate": 2.2336454331870937e-05, - "loss": 0.1195, - "step": 10075 - }, - { - "epoch": 0.46387727919900795, - "grad_norm": 0.041957736015319824, - "learning_rate": 2.2264789043117457e-05, - "loss": 0.0368, - "step": 10100 - }, - { - "epoch": 0.4650254902861342, - "grad_norm": 0.09741153568029404, - "learning_rate": 2.2193094285017692e-05, - "loss": 0.0597, - "step": 10125 - }, - { - "epoch": 0.4661737013732605, - "grad_norm": 0.41714927554130554, - "learning_rate": 2.2121370990461042e-05, - "loss": 0.0787, - "step": 10150 - }, - { - "epoch": 0.46732191246038673, - "grad_norm": 0.02291625551879406, - "learning_rate": 2.2049620092708194e-05, - "loss": 0.0905, - "step": 10175 - }, - { - "epoch": 0.468470123547513, - "grad_norm": 2.7025392055511475, - "learning_rate": 2.1977842525379012e-05, - "loss": 0.0952, - "step": 10200 - }, - { - "epoch": 0.46961833463463926, - "grad_norm": 0.047728363424539566, - "learning_rate": 2.1906039222440406e-05, - "loss": 0.0662, - "step": 10225 - }, - { - "epoch": 0.4707665457217655, - "grad_norm": 2.8927001953125, - "learning_rate": 2.1834211118194122e-05, - "loss": 0.091, - "step": 10250 - }, - { - "epoch": 0.4719147568088917, - "grad_norm": 0.3148484528064728, - "learning_rate": 2.1762359147264655e-05, - "loss": 0.0741, - "step": 10275 - }, - { - "epoch": 0.473062967896018, - "grad_norm": 20.764738082885742, - "learning_rate": 2.1690484244587023e-05, - "loss": 0.0896, - "step": 10300 - }, - { - "epoch": 0.47421117898314424, - "grad_norm": 37.63784408569336, - "learning_rate": 2.1618587345394643e-05, - "loss": 0.079, - "step": 10325 - }, - { - "epoch": 0.4753593900702705, - "grad_norm": 0.14948450028896332, - "learning_rate": 2.1546669385207152e-05, - "loss": 0.0781, - "step": 10350 - }, - { - "epoch": 0.47650760115739677, - "grad_norm": 0.020155781880021095, - "learning_rate": 2.1474731299818236e-05, - "loss": 0.0772, - "step": 10375 - }, - { - "epoch": 0.477655812244523, - "grad_norm": 25.559816360473633, - "learning_rate": 2.1402774025283435e-05, - "loss": 0.1737, - "step": 10400 - }, - { - "epoch": 0.4788040233316493, - "grad_norm": 10.59823989868164, - "learning_rate": 2.1330798497907986e-05, - "loss": 0.0766, - "step": 10425 - }, - { - "epoch": 0.47995223441877555, - "grad_norm": 0.12846903502941132, - "learning_rate": 2.125880565423464e-05, - "loss": 0.0181, - "step": 10450 - }, - { - "epoch": 0.4811004455059018, - "grad_norm": 0.02507212944328785, - "learning_rate": 2.118679643103144e-05, - "loss": 0.0862, - "step": 10475 - }, - { - "epoch": 0.48224865659302807, - "grad_norm": 0.03787585720419884, - "learning_rate": 2.1114771765279594e-05, - "loss": 0.0594, - "step": 10500 - }, - { - "epoch": 0.48339686768015433, - "grad_norm": 0.031070342287421227, - "learning_rate": 2.1042732594161227e-05, - "loss": 0.0266, - "step": 10525 - }, - { - "epoch": 0.4845450787672806, - "grad_norm": 0.5428506731987, - "learning_rate": 2.09706798550472e-05, - "loss": 0.1439, - "step": 10550 - }, - { - "epoch": 0.48569328985440685, - "grad_norm": 0.06007479876279831, - "learning_rate": 2.089861448548494e-05, - "loss": 0.0368, - "step": 10575 - }, - { - "epoch": 0.4868415009415331, - "grad_norm": 0.4172542095184326, - "learning_rate": 2.0826537423186204e-05, - "loss": 0.0624, - "step": 10600 - }, - { - "epoch": 0.4879897120286594, - "grad_norm": 0.019977344200015068, - "learning_rate": 2.0754449606014916e-05, - "loss": 0.0457, - "step": 10625 - }, - { - "epoch": 0.48913792311578563, - "grad_norm": 8.558683395385742, - "learning_rate": 2.0682351971974915e-05, - "loss": 0.0674, - "step": 10650 - }, - { - "epoch": 0.49028613420291184, - "grad_norm": 49.44523239135742, - "learning_rate": 2.06102454591978e-05, - "loss": 0.1491, - "step": 10675 - }, - { - "epoch": 0.4914343452900381, - "grad_norm": 6.603743076324463, - "learning_rate": 2.0538131005930678e-05, - "loss": 0.078, - "step": 10700 - }, - { - "epoch": 0.49258255637716436, - "grad_norm": 0.7394055724143982, - "learning_rate": 2.0466009550523997e-05, - "loss": 0.0763, - "step": 10725 - }, - { - "epoch": 0.4937307674642906, - "grad_norm": 0.5767204761505127, - "learning_rate": 2.0393882031419307e-05, - "loss": 0.1439, - "step": 10750 - }, - { - "epoch": 0.4948789785514169, - "grad_norm": 0.05295969545841217, - "learning_rate": 2.0321749387137055e-05, - "loss": 0.139, - "step": 10775 - }, - { - "epoch": 0.49602718963854314, - "grad_norm": 0.20006586611270905, - "learning_rate": 2.024961255626439e-05, - "loss": 0.0691, - "step": 10800 - }, - { - "epoch": 0.4971754007256694, - "grad_norm": 24.380508422851562, - "learning_rate": 2.017747247744292e-05, - "loss": 0.069, - "step": 10825 - }, - { - "epoch": 0.49832361181279566, - "grad_norm": 0.635622501373291, - "learning_rate": 2.0105330089356535e-05, - "loss": 0.0681, - "step": 10850 - }, - { - "epoch": 0.4994718228999219, - "grad_norm": 0.33230355381965637, - "learning_rate": 2.0033186330719147e-05, - "loss": 0.0754, - "step": 10875 - }, - { - "epoch": 0.5006200339870481, - "grad_norm": 0.046828582882881165, - "learning_rate": 1.9961042140262533e-05, - "loss": 0.0581, - "step": 10900 - }, - { - "epoch": 0.5017682450741744, - "grad_norm": 0.0318719819188118, - "learning_rate": 1.9888898456724058e-05, - "loss": 0.0746, - "step": 10925 - }, - { - "epoch": 0.5029164561613007, - "grad_norm": 0.019355185329914093, - "learning_rate": 1.9816756218834515e-05, - "loss": 0.0664, - "step": 10950 - }, - { - "epoch": 0.5040646672484269, - "grad_norm": 427.9779052734375, - "learning_rate": 1.974461636530587e-05, - "loss": 0.0777, - "step": 10975 - }, - { - "epoch": 0.5052128783355532, - "grad_norm": 0.22881121933460236, - "learning_rate": 1.9672479834819065e-05, - "loss": 0.1357, - "step": 11000 - }, - { - "epoch": 0.5063610894226794, - "grad_norm": 15.586023330688477, - "learning_rate": 1.960034756601182e-05, - "loss": 0.1463, - "step": 11025 - }, - { - "epoch": 0.5075093005098057, - "grad_norm": 0.1670362949371338, - "learning_rate": 1.9528220497466382e-05, - "loss": 0.2057, - "step": 11050 - }, - { - "epoch": 0.508657511596932, - "grad_norm": 0.052390482276678085, - "learning_rate": 1.945609956769735e-05, - "loss": 0.0423, - "step": 11075 - }, - { - "epoch": 0.5098057226840582, - "grad_norm": 3.1492886543273926, - "learning_rate": 1.938398571513942e-05, - "loss": 0.0344, - "step": 11100 - }, - { - "epoch": 0.5109539337711845, - "grad_norm": 0.30143025517463684, - "learning_rate": 1.9311879878135228e-05, - "loss": 0.0601, - "step": 11125 - }, - { - "epoch": 0.5121021448583107, - "grad_norm": 0.17168006300926208, - "learning_rate": 1.92397829949231e-05, - "loss": 0.0636, - "step": 11150 - }, - { - "epoch": 0.513250355945437, - "grad_norm": 0.601152777671814, - "learning_rate": 1.9167696003624846e-05, - "loss": 0.0369, - "step": 11175 - }, - { - "epoch": 0.5143985670325633, - "grad_norm": 0.04143074154853821, - "learning_rate": 1.909561984223358e-05, - "loss": 0.1208, - "step": 11200 - }, - { - "epoch": 0.5155467781196895, - "grad_norm": 0.0267263762652874, - "learning_rate": 1.9023555448601482e-05, - "loss": 0.0431, - "step": 11225 - }, - { - "epoch": 0.5166949892068158, - "grad_norm": 0.03883841261267662, - "learning_rate": 1.8951503760427628e-05, - "loss": 0.0247, - "step": 11250 - }, - { - "epoch": 0.517843200293942, - "grad_norm": 17.886707305908203, - "learning_rate": 1.8879465715245756e-05, - "loss": 0.129, - "step": 11275 - }, - { - "epoch": 0.5189914113810683, - "grad_norm": 0.12370496988296509, - "learning_rate": 1.8807442250412078e-05, - "loss": 0.0849, - "step": 11300 - }, - { - "epoch": 0.5201396224681946, - "grad_norm": 0.08131173998117447, - "learning_rate": 1.873543430309311e-05, - "loss": 0.112, - "step": 11325 - }, - { - "epoch": 0.5212878335553208, - "grad_norm": 0.0747842863202095, - "learning_rate": 1.8663442810253435e-05, - "loss": 0.0306, - "step": 11350 - }, - { - "epoch": 0.5224360446424471, - "grad_norm": 15.537251472473145, - "learning_rate": 1.8591468708643538e-05, - "loss": 0.0562, - "step": 11375 - }, - { - "epoch": 0.5235842557295733, - "grad_norm": 0.011435016058385372, - "learning_rate": 1.85195129347876e-05, - "loss": 0.0637, - "step": 11400 - }, - { - "epoch": 0.5247324668166996, - "grad_norm": 0.21264196932315826, - "learning_rate": 1.8447576424971348e-05, - "loss": 0.0993, - "step": 11425 - }, - { - "epoch": 0.5258806779038259, - "grad_norm": 15.959806442260742, - "learning_rate": 1.8375660115229815e-05, - "loss": 0.0553, - "step": 11450 - }, - { - "epoch": 0.5270288889909521, - "grad_norm": 16.946958541870117, - "learning_rate": 1.8303764941335206e-05, - "loss": 0.0264, - "step": 11475 - }, - { - "epoch": 0.5281771000780784, - "grad_norm": 0.8537108302116394, - "learning_rate": 1.8231891838784713e-05, - "loss": 0.097, - "step": 11500 - }, - { - "epoch": 0.5293253111652046, - "grad_norm": 0.011382007971405983, - "learning_rate": 1.816004174278832e-05, - "loss": 0.0714, - "step": 11525 - }, - { - "epoch": 0.5304735222523309, - "grad_norm": 9.222038269042969, - "learning_rate": 1.8088215588256672e-05, - "loss": 0.0796, - "step": 11550 - }, - { - "epoch": 0.5316217333394572, - "grad_norm": 0.031983211636543274, - "learning_rate": 1.8016414309788867e-05, - "loss": 0.0703, - "step": 11575 - }, - { - "epoch": 0.5327699444265834, - "grad_norm": 0.11978611350059509, - "learning_rate": 1.7944638841660334e-05, - "loss": 0.0489, - "step": 11600 - }, - { - "epoch": 0.5339181555137097, - "grad_norm": 0.2896723449230194, - "learning_rate": 1.7872890117810654e-05, - "loss": 0.1119, - "step": 11625 - }, - { - "epoch": 0.535066366600836, - "grad_norm": 213.61729431152344, - "learning_rate": 1.7801169071831396e-05, - "loss": 0.1102, - "step": 11650 - }, - { - "epoch": 0.5362145776879621, - "grad_norm": 15.954391479492188, - "learning_rate": 1.772947663695402e-05, - "loss": 0.0812, - "step": 11675 - }, - { - "epoch": 0.5373627887750884, - "grad_norm": 0.16466745734214783, - "learning_rate": 1.7657813746037663e-05, - "loss": 0.0633, - "step": 11700 - }, - { - "epoch": 0.5385109998622146, - "grad_norm": 0.5646315217018127, - "learning_rate": 1.7586181331557057e-05, - "loss": 0.111, - "step": 11725 - }, - { - "epoch": 0.5396592109493409, - "grad_norm": 0.21161770820617676, - "learning_rate": 1.751458032559037e-05, - "loss": 0.0424, - "step": 11750 - }, - { - "epoch": 0.5408074220364671, - "grad_norm": 0.1260633021593094, - "learning_rate": 1.744301165980709e-05, - "loss": 0.0511, - "step": 11775 - }, - { - "epoch": 0.5419556331235934, - "grad_norm": 0.11227844655513763, - "learning_rate": 1.737147626545589e-05, - "loss": 0.1005, - "step": 11800 - }, - { - "epoch": 0.5431038442107197, - "grad_norm": 0.010617699474096298, - "learning_rate": 1.7299975073352523e-05, - "loss": 0.0544, - "step": 11825 - }, - { - "epoch": 0.5442520552978459, - "grad_norm": 0.055700164288282394, - "learning_rate": 1.722850901386769e-05, - "loss": 0.0757, - "step": 11850 - }, - { - "epoch": 0.5454002663849722, - "grad_norm": 0.08620750904083252, - "learning_rate": 1.715707901691496e-05, - "loss": 0.0407, - "step": 11875 - }, - { - "epoch": 0.5465484774720984, - "grad_norm": 27.830577850341797, - "learning_rate": 1.708568601193866e-05, - "loss": 0.0594, - "step": 11900 - }, - { - "epoch": 0.5476966885592247, - "grad_norm": 0.10323012620210648, - "learning_rate": 1.7014330927901764e-05, - "loss": 0.1063, - "step": 11925 - }, - { - "epoch": 0.548844899646351, - "grad_norm": 0.05482901260256767, - "learning_rate": 1.6943014693273837e-05, - "loss": 0.0711, - "step": 11950 - }, - { - "epoch": 0.5499931107334772, - "grad_norm": 34.74000930786133, - "learning_rate": 1.6871738236018918e-05, - "loss": 0.044, - "step": 11975 - }, - { - "epoch": 0.5511413218206035, - "grad_norm": 0.025118809193372726, - "learning_rate": 1.680050248358349e-05, - "loss": 0.0378, - "step": 12000 - }, - { - "epoch": 0.5522895329077298, - "grad_norm": 4.1571149826049805, - "learning_rate": 1.672930836288436e-05, - "loss": 0.0656, - "step": 12025 - }, - { - "epoch": 0.553437743994856, - "grad_norm": 0.053131286054849625, - "learning_rate": 1.6658156800296646e-05, - "loss": 0.1149, - "step": 12050 - }, - { - "epoch": 0.5545859550819823, - "grad_norm": 0.09221284091472626, - "learning_rate": 1.6587048721641693e-05, - "loss": 0.1209, - "step": 12075 - }, - { - "epoch": 0.5557341661691085, - "grad_norm": 0.021747712045907974, - "learning_rate": 1.651598505217502e-05, - "loss": 0.0404, - "step": 12100 - }, - { - "epoch": 0.5568823772562348, - "grad_norm": 0.19503289461135864, - "learning_rate": 1.644496671657432e-05, - "loss": 0.1202, - "step": 12125 - }, - { - "epoch": 0.5580305883433611, - "grad_norm": 11.685429573059082, - "learning_rate": 1.6373994638927394e-05, - "loss": 0.0749, - "step": 12150 - }, - { - "epoch": 0.5591787994304873, - "grad_norm": 0.0062902239151299, - "learning_rate": 1.630306974272013e-05, - "loss": 0.0774, - "step": 12175 - }, - { - "epoch": 0.5603270105176136, - "grad_norm": 125.52642059326172, - "learning_rate": 1.6232192950824504e-05, - "loss": 0.0919, - "step": 12200 - }, - { - "epoch": 0.5614752216047398, - "grad_norm": 1.1520588397979736, - "learning_rate": 1.6161365185486546e-05, - "loss": 0.0294, - "step": 12225 - }, - { - "epoch": 0.5626234326918661, - "grad_norm": 0.2502564787864685, - "learning_rate": 1.609058736831437e-05, - "loss": 0.0986, - "step": 12250 - }, - { - "epoch": 0.5637716437789924, - "grad_norm": 0.015697428956627846, - "learning_rate": 1.6019860420266157e-05, - "loss": 0.0206, - "step": 12275 - }, - { - "epoch": 0.5649198548661186, - "grad_norm": 0.07955513894557953, - "learning_rate": 1.5949185261638186e-05, - "loss": 0.0877, - "step": 12300 - }, - { - "epoch": 0.5660680659532449, - "grad_norm": 0.09653720259666443, - "learning_rate": 1.5878562812052845e-05, - "loss": 0.0854, - "step": 12325 - }, - { - "epoch": 0.5672162770403711, - "grad_norm": 64.97950744628906, - "learning_rate": 1.5807993990446687e-05, - "loss": 0.0222, - "step": 12350 - }, - { - "epoch": 0.5683644881274974, - "grad_norm": 0.025900257751345634, - "learning_rate": 1.5737479715058454e-05, - "loss": 0.0203, - "step": 12375 - }, - { - "epoch": 0.5695126992146237, - "grad_norm": 0.007998030632734299, - "learning_rate": 1.5667020903417124e-05, - "loss": 0.064, - "step": 12400 - }, - { - "epoch": 0.5706609103017499, - "grad_norm": 0.07901493459939957, - "learning_rate": 1.5596618472330008e-05, - "loss": 0.0952, - "step": 12425 - }, - { - "epoch": 0.5718091213888762, - "grad_norm": 0.338176429271698, - "learning_rate": 1.5526273337870767e-05, - "loss": 0.0647, - "step": 12450 - }, - { - "epoch": 0.5729573324760023, - "grad_norm": 0.10939247161149979, - "learning_rate": 1.5455986415367547e-05, - "loss": 0.0651, - "step": 12475 - }, - { - "epoch": 0.5741055435631286, - "grad_norm": 0.003566980129107833, - "learning_rate": 1.538575861939102e-05, - "loss": 0.0155, - "step": 12500 - }, - { - "epoch": 0.5752537546502549, - "grad_norm": 0.6434283256530762, - "learning_rate": 1.531559086374251e-05, - "loss": 0.0527, - "step": 12525 - }, - { - "epoch": 0.5764019657373811, - "grad_norm": 0.05047542229294777, - "learning_rate": 1.5245484061442113e-05, - "loss": 0.0715, - "step": 12550 - }, - { - "epoch": 0.5775501768245074, - "grad_norm": 0.1261250227689743, - "learning_rate": 1.5175439124716793e-05, - "loss": 0.134, - "step": 12575 - }, - { - "epoch": 0.5786983879116336, - "grad_norm": 9.76130199432373, - "learning_rate": 1.5105456964988517e-05, - "loss": 0.0846, - "step": 12600 - }, - { - "epoch": 0.5798465989987599, - "grad_norm": 0.0062255095690488815, - "learning_rate": 1.5035538492862411e-05, - "loss": 0.1306, - "step": 12625 - }, - { - "epoch": 0.5809948100858862, - "grad_norm": 0.03955512493848801, - "learning_rate": 1.4965684618114891e-05, - "loss": 0.0427, - "step": 12650 - }, - { - "epoch": 0.5821430211730124, - "grad_norm": 0.12320832908153534, - "learning_rate": 1.4895896249681831e-05, - "loss": 0.0926, - "step": 12675 - }, - { - "epoch": 0.5832912322601387, - "grad_norm": 5.945989608764648, - "learning_rate": 1.4826174295646763e-05, - "loss": 0.0327, - "step": 12700 - }, - { - "epoch": 0.5844394433472649, - "grad_norm": 0.010435862466692924, - "learning_rate": 1.4756519663229e-05, - "loss": 0.0856, - "step": 12725 - }, - { - "epoch": 0.5855876544343912, - "grad_norm": 0.029769467189908028, - "learning_rate": 1.468693325877191e-05, - "loss": 0.0804, - "step": 12750 - }, - { - "epoch": 0.5867358655215175, - "grad_norm": 11.208464622497559, - "learning_rate": 1.4617415987731045e-05, - "loss": 0.0947, - "step": 12775 - }, - { - "epoch": 0.5878840766086437, - "grad_norm": 101.77286529541016, - "learning_rate": 1.454796875466242e-05, - "loss": 0.0808, - "step": 12800 - }, - { - "epoch": 0.58903228769577, - "grad_norm": 0.1369004100561142, - "learning_rate": 1.447859246321071e-05, - "loss": 0.0784, - "step": 12825 - }, - { - "epoch": 0.5901804987828962, - "grad_norm": 0.04609095677733421, - "learning_rate": 1.4409288016097493e-05, - "loss": 0.0839, - "step": 12850 - }, - { - "epoch": 0.5913287098700225, - "grad_norm": 0.10736856609582901, - "learning_rate": 1.434005631510953e-05, - "loss": 0.0442, - "step": 12875 - }, - { - "epoch": 0.5924769209571488, - "grad_norm": 0.07319389283657074, - "learning_rate": 1.427089826108699e-05, - "loss": 0.0816, - "step": 12900 - }, - { - "epoch": 0.593625132044275, - "grad_norm": 13.238898277282715, - "learning_rate": 1.4201814753911771e-05, - "loss": 0.0961, - "step": 12925 - }, - { - "epoch": 0.5947733431314013, - "grad_norm": 0.26180291175842285, - "learning_rate": 1.4132806692495761e-05, - "loss": 0.0415, - "step": 12950 - }, - { - "epoch": 0.5959215542185275, - "grad_norm": 0.839544951915741, - "learning_rate": 1.4063874974769141e-05, - "loss": 0.0837, - "step": 12975 - }, - { - "epoch": 0.5970697653056538, - "grad_norm": 1.000243902206421, - "learning_rate": 1.3995020497668735e-05, - "loss": 0.053, - "step": 13000 - }, - { - "epoch": 0.5982179763927801, - "grad_norm": 0.024470528587698936, - "learning_rate": 1.3926244157126285e-05, - "loss": 0.1149, - "step": 13025 - }, - { - "epoch": 0.5993661874799063, - "grad_norm": 274.2747497558594, - "learning_rate": 1.385754684805685e-05, - "loss": 0.1034, - "step": 13050 - }, - { - "epoch": 0.6005143985670326, - "grad_norm": 0.02918631210923195, - "learning_rate": 1.3788929464347121e-05, - "loss": 0.0702, - "step": 13075 - }, - { - "epoch": 0.6016626096541589, - "grad_norm": 0.040033821016550064, - "learning_rate": 1.3720392898843808e-05, - "loss": 0.0546, - "step": 13100 - }, - { - "epoch": 0.6028108207412851, - "grad_norm": 9.150288581848145, - "learning_rate": 1.3651938043342013e-05, - "loss": 0.1065, - "step": 13125 - }, - { - "epoch": 0.6039590318284114, - "grad_norm": 0.039467647671699524, - "learning_rate": 1.358356578857363e-05, - "loss": 0.0938, - "step": 13150 - }, - { - "epoch": 0.6051072429155376, - "grad_norm": 0.035682786256074905, - "learning_rate": 1.3515277024195765e-05, - "loss": 0.0543, - "step": 13175 - }, - { - "epoch": 0.6062554540026639, - "grad_norm": 0.22070759534835815, - "learning_rate": 1.3447072638779137e-05, - "loss": 0.0657, - "step": 13200 - }, - { - "epoch": 0.6074036650897902, - "grad_norm": 0.009781109169125557, - "learning_rate": 1.3378953519796545e-05, - "loss": 0.0457, - "step": 13225 - }, - { - "epoch": 0.6085518761769164, - "grad_norm": 21.625680923461914, - "learning_rate": 1.3310920553611286e-05, - "loss": 0.0978, - "step": 13250 - }, - { - "epoch": 0.6097000872640426, - "grad_norm": 1.1382207870483398, - "learning_rate": 1.324297462546567e-05, - "loss": 0.0682, - "step": 13275 - }, - { - "epoch": 0.6108482983511688, - "grad_norm": 24.955183029174805, - "learning_rate": 1.3175116619469424e-05, - "loss": 0.0923, - "step": 13300 - }, - { - "epoch": 0.6119965094382951, - "grad_norm": 0.0540093258023262, - "learning_rate": 1.3107347418588276e-05, - "loss": 0.0339, - "step": 13325 - }, - { - "epoch": 0.6131447205254213, - "grad_norm": 28.082834243774414, - "learning_rate": 1.3039667904632412e-05, - "loss": 0.0556, - "step": 13350 - }, - { - "epoch": 0.6142929316125476, - "grad_norm": 0.023283861577510834, - "learning_rate": 1.2972078958245016e-05, - "loss": 0.0866, - "step": 13375 - }, - { - "epoch": 0.6154411426996739, - "grad_norm": 22.21122932434082, - "learning_rate": 1.2904581458890809e-05, - "loss": 0.1098, - "step": 13400 - }, - { - "epoch": 0.6165893537868001, - "grad_norm": 0.04448782652616501, - "learning_rate": 1.2837176284844604e-05, - "loss": 0.0954, - "step": 13425 - }, - { - "epoch": 0.6177375648739264, - "grad_norm": 0.056919727474451065, - "learning_rate": 1.276986431317989e-05, - "loss": 0.1402, - "step": 13450 - }, - { - "epoch": 0.6188857759610527, - "grad_norm": 0.10666215419769287, - "learning_rate": 1.27026464197574e-05, - "loss": 0.087, - "step": 13475 - }, - { - "epoch": 0.6200339870481789, - "grad_norm": 0.20836232602596283, - "learning_rate": 1.2635523479213732e-05, - "loss": 0.056, - "step": 13500 - }, - { - "epoch": 0.6211821981353052, - "grad_norm": 12.055030822753906, - "learning_rate": 1.2568496364949953e-05, - "loss": 0.1186, - "step": 13525 - }, - { - "epoch": 0.6223304092224314, - "grad_norm": 0.016045846045017242, - "learning_rate": 1.2501565949120258e-05, - "loss": 0.1067, - "step": 13550 - }, - { - "epoch": 0.6234786203095577, - "grad_norm": 0.24520662426948547, - "learning_rate": 1.2434733102620586e-05, - "loss": 0.0778, - "step": 13575 - }, - { - "epoch": 0.624626831396684, - "grad_norm": 0.03329060226678848, - "learning_rate": 1.2367998695077317e-05, - "loss": 0.0169, - "step": 13600 - }, - { - "epoch": 0.6257750424838102, - "grad_norm": 0.04618681222200394, - "learning_rate": 1.2301363594835954e-05, - "loss": 0.0935, - "step": 13625 - }, - { - "epoch": 0.6269232535709365, - "grad_norm": 0.06207937374711037, - "learning_rate": 1.2234828668949796e-05, - "loss": 0.0924, - "step": 13650 - }, - { - "epoch": 0.6280714646580627, - "grad_norm": 0.05461873859167099, - "learning_rate": 1.2168394783168707e-05, - "loss": 0.0716, - "step": 13675 - }, - { - "epoch": 0.629219675745189, - "grad_norm": 0.06397520750761032, - "learning_rate": 1.2102062801927792e-05, - "loss": 0.0773, - "step": 13700 - }, - { - "epoch": 0.6303678868323153, - "grad_norm": 35.83101272583008, - "learning_rate": 1.2035833588336205e-05, - "loss": 0.059, - "step": 13725 - }, - { - "epoch": 0.6315160979194415, - "grad_norm": 0.06737085431814194, - "learning_rate": 1.1969708004165869e-05, - "loss": 0.0339, - "step": 13750 - }, - { - "epoch": 0.6326643090065678, - "grad_norm": 0.18903277814388275, - "learning_rate": 1.190368690984029e-05, - "loss": 0.049, - "step": 13775 - }, - { - "epoch": 0.633812520093694, - "grad_norm": 30.07693862915039, - "learning_rate": 1.1837771164423372e-05, - "loss": 0.0811, - "step": 13800 - }, - { - "epoch": 0.6349607311808203, - "grad_norm": 0.031337298452854156, - "learning_rate": 1.1771961625608203e-05, - "loss": 0.0716, - "step": 13825 - }, - { - "epoch": 0.6361089422679466, - "grad_norm": 0.1882849633693695, - "learning_rate": 1.1706259149705927e-05, - "loss": 0.0561, - "step": 13850 - }, - { - "epoch": 0.6372571533550728, - "grad_norm": 0.00982393603771925, - "learning_rate": 1.1640664591634585e-05, - "loss": 0.0454, - "step": 13875 - }, - { - "epoch": 0.6384053644421991, - "grad_norm": 0.00783027894794941, - "learning_rate": 1.1575178804907993e-05, - "loss": 0.1095, - "step": 13900 - }, - { - "epoch": 0.6395535755293253, - "grad_norm": 0.06899901479482651, - "learning_rate": 1.1509802641624642e-05, - "loss": 0.0606, - "step": 13925 - }, - { - "epoch": 0.6407017866164516, - "grad_norm": 0.05791301280260086, - "learning_rate": 1.1444536952456611e-05, - "loss": 0.0876, - "step": 13950 - }, - { - "epoch": 0.6418499977035779, - "grad_norm": 0.0551036112010479, - "learning_rate": 1.1379382586638487e-05, - "loss": 0.0195, - "step": 13975 - }, - { - "epoch": 0.6429982087907041, - "grad_norm": 0.3207205832004547, - "learning_rate": 1.1314340391956326e-05, - "loss": 0.0235, - "step": 14000 - }, - { - "epoch": 0.6441464198778304, - "grad_norm": 0.5788611173629761, - "learning_rate": 1.1249411214736616e-05, - "loss": 0.0834, - "step": 14025 - }, - { - "epoch": 0.6452946309649567, - "grad_norm": 0.015164912678301334, - "learning_rate": 1.118459589983526e-05, - "loss": 0.0862, - "step": 14050 - }, - { - "epoch": 0.6464428420520828, - "grad_norm": 0.19156979024410248, - "learning_rate": 1.1119895290626616e-05, - "loss": 0.1002, - "step": 14075 - }, - { - "epoch": 0.6475910531392091, - "grad_norm": 0.8341970443725586, - "learning_rate": 1.1055310228992453e-05, - "loss": 0.0336, - "step": 14100 - }, - { - "epoch": 0.6487392642263353, - "grad_norm": 0.015737101435661316, - "learning_rate": 1.0990841555311062e-05, - "loss": 0.0473, - "step": 14125 - }, - { - "epoch": 0.6498874753134616, - "grad_norm": 0.09364493191242218, - "learning_rate": 1.0926490108446317e-05, - "loss": 0.0548, - "step": 14150 - }, - { - "epoch": 0.6510356864005878, - "grad_norm": 0.07271132618188858, - "learning_rate": 1.0862256725736713e-05, - "loss": 0.0133, - "step": 14175 - }, - { - "epoch": 0.6521838974877141, - "grad_norm": 0.035903144627809525, - "learning_rate": 1.0798142242984507e-05, - "loss": 0.0337, - "step": 14200 - }, - { - "epoch": 0.6533321085748404, - "grad_norm": 19.09912109375, - "learning_rate": 1.0734147494444835e-05, - "loss": 0.0449, - "step": 14225 - }, - { - "epoch": 0.6544803196619666, - "grad_norm": 0.03382350504398346, - "learning_rate": 1.0670273312814854e-05, - "loss": 0.0619, - "step": 14250 - }, - { - "epoch": 0.6556285307490929, - "grad_norm": 0.2958403527736664, - "learning_rate": 1.0606520529222928e-05, - "loss": 0.0788, - "step": 14275 - }, - { - "epoch": 0.6567767418362191, - "grad_norm": 0.008743496611714363, - "learning_rate": 1.0542889973217765e-05, - "loss": 0.0784, - "step": 14300 - }, - { - "epoch": 0.6579249529233454, - "grad_norm": 12.422863960266113, - "learning_rate": 1.0479382472757673e-05, - "loss": 0.0671, - "step": 14325 - }, - { - "epoch": 0.6590731640104717, - "grad_norm": 0.12558233737945557, - "learning_rate": 1.0415998854199753e-05, - "loss": 0.108, - "step": 14350 - }, - { - "epoch": 0.6602213750975979, - "grad_norm": 0.021612035110592842, - "learning_rate": 1.0352739942289165e-05, - "loss": 0.0598, - "step": 14375 - }, - { - "epoch": 0.6613695861847242, - "grad_norm": 0.2242107391357422, - "learning_rate": 1.0289606560148402e-05, - "loss": 0.0619, - "step": 14400 - }, - { - "epoch": 0.6625177972718505, - "grad_norm": 11.117570877075195, - "learning_rate": 1.0226599529266554e-05, - "loss": 0.0695, - "step": 14425 - }, - { - "epoch": 0.6636660083589767, - "grad_norm": 0.20262782275676727, - "learning_rate": 1.0163719669488632e-05, - "loss": 0.0254, - "step": 14450 - }, - { - "epoch": 0.664814219446103, - "grad_norm": 0.039894625544548035, - "learning_rate": 1.0100967799004915e-05, - "loss": 0.0877, - "step": 14475 - }, - { - "epoch": 0.6659624305332292, - "grad_norm": 0.005091145634651184, - "learning_rate": 1.0038344734340271e-05, - "loss": 0.0592, - "step": 14500 - }, - { - "epoch": 0.6671106416203555, - "grad_norm": 0.004997065290808678, - "learning_rate": 9.975851290343577e-06, - "loss": 0.0867, - "step": 14525 - }, - { - "epoch": 0.6682588527074818, - "grad_norm": 1.4442912340164185, - "learning_rate": 9.913488280177072e-06, - "loss": 0.0725, - "step": 14550 - }, - { - "epoch": 0.669407063794608, - "grad_norm": 94.27518463134766, - "learning_rate": 9.851256515305803e-06, - "loss": 0.0307, - "step": 14575 - }, - { - "epoch": 0.6705552748817343, - "grad_norm": 0.0411621555685997, - "learning_rate": 9.789156805487044e-06, - "loss": 0.0963, - "step": 14600 - }, - { - "epoch": 0.6717034859688605, - "grad_norm": 10.251852035522461, - "learning_rate": 9.727189958759799e-06, - "loss": 0.1885, - "step": 14625 - }, - { - "epoch": 0.6728516970559868, - "grad_norm": 23.81732940673828, - "learning_rate": 9.665356781434249e-06, - "loss": 0.0569, - "step": 14650 - }, - { - "epoch": 0.6739999081431131, - "grad_norm": 0.010315894149243832, - "learning_rate": 9.603658078081268e-06, - "loss": 0.1003, - "step": 14675 - }, - { - "epoch": 0.6751481192302393, - "grad_norm": 26.99117088317871, - "learning_rate": 9.54209465152197e-06, - "loss": 0.0429, - "step": 14700 - }, - { - "epoch": 0.6762963303173656, - "grad_norm": 0.15148140490055084, - "learning_rate": 9.480667302817238e-06, - "loss": 0.097, - "step": 14725 - }, - { - "epoch": 0.6774445414044918, - "grad_norm": 15.545239448547363, - "learning_rate": 9.419376831257342e-06, - "loss": 0.1203, - "step": 14750 - }, - { - "epoch": 0.6785927524916181, - "grad_norm": 0.01652432791888714, - "learning_rate": 9.358224034351493e-06, - "loss": 0.0323, - "step": 14775 - }, - { - "epoch": 0.6797409635787444, - "grad_norm": 0.1552172303199768, - "learning_rate": 9.297209707817483e-06, - "loss": 0.052, - "step": 14800 - }, - { - "epoch": 0.6808891746658706, - "grad_norm": 0.020266342908143997, - "learning_rate": 9.23633464557134e-06, - "loss": 0.0756, - "step": 14825 - }, - { - "epoch": 0.6820373857529969, - "grad_norm": 0.062302496284246445, - "learning_rate": 9.175599639716976e-06, - "loss": 0.011, - "step": 14850 - }, - { - "epoch": 0.683185596840123, - "grad_norm": 0.14944753050804138, - "learning_rate": 9.115005480535938e-06, - "loss": 0.0373, - "step": 14875 - }, - { - "epoch": 0.6843338079272493, - "grad_norm": 0.363838255405426, - "learning_rate": 9.054552956477022e-06, - "loss": 0.074, - "step": 14900 - }, - { - "epoch": 0.6854820190143756, - "grad_norm": 0.018838651478290558, - "learning_rate": 8.994242854146114e-06, - "loss": 0.0707, - "step": 14925 - }, - { - "epoch": 0.6866302301015018, - "grad_norm": 0.015431873500347137, - "learning_rate": 8.93407595829589e-06, - "loss": 0.0222, - "step": 14950 - }, - { - "epoch": 0.6877784411886281, - "grad_norm": 10.010194778442383, - "learning_rate": 8.874053051815658e-06, - "loss": 0.0714, - "step": 14975 - }, - { - "epoch": 0.6889266522757543, - "grad_norm": 0.50653076171875, - "learning_rate": 8.81417491572112e-06, - "loss": 0.0292, - "step": 15000 - }, - { - "epoch": 0.6900748633628806, - "grad_norm": 0.018615659326314926, - "learning_rate": 8.754442329144232e-06, - "loss": 0.0173, - "step": 15025 - }, - { - "epoch": 0.6912230744500069, - "grad_norm": 0.0025054675061255693, - "learning_rate": 8.694856069323065e-06, - "loss": 0.0108, - "step": 15050 - }, - { - "epoch": 0.6923712855371331, - "grad_norm": 0.05472448095679283, - "learning_rate": 8.635416911591712e-06, - "loss": 0.0198, - "step": 15075 - }, - { - "epoch": 0.6935194966242594, - "grad_norm": 0.0030740767251700163, - "learning_rate": 8.576125629370156e-06, - "loss": 0.0536, - "step": 15100 - }, - { - "epoch": 0.6946677077113856, - "grad_norm": 0.011597322300076485, - "learning_rate": 8.516982994154238e-06, - "loss": 0.0311, - "step": 15125 - }, - { - "epoch": 0.6958159187985119, - "grad_norm": 9.239104270935059, - "learning_rate": 8.457989775505607e-06, - "loss": 0.16, - "step": 15150 - }, - { - "epoch": 0.6969641298856382, - "grad_norm": 0.03688933327794075, - "learning_rate": 8.399146741041709e-06, - "loss": 0.036, - "step": 15175 - }, - { - "epoch": 0.6981123409727644, - "grad_norm": 0.010223845951259136, - "learning_rate": 8.340454656425814e-06, - "loss": 0.001, - "step": 15200 - }, - { - "epoch": 0.6992605520598907, - "grad_norm": 17.244403839111328, - "learning_rate": 8.28191428535702e-06, - "loss": 0.1266, - "step": 15225 - }, - { - "epoch": 0.7004087631470169, - "grad_norm": 0.015315833501517773, - "learning_rate": 8.223526389560345e-06, - "loss": 0.0009, - "step": 15250 - }, - { - "epoch": 0.7015569742341432, - "grad_norm": 0.007865412160754204, - "learning_rate": 8.165291728776799e-06, - "loss": 0.0632, - "step": 15275 - }, - { - "epoch": 0.7027051853212695, - "grad_norm": 0.1337803304195404, - "learning_rate": 8.107211060753497e-06, - "loss": 0.0481, - "step": 15300 - }, - { - "epoch": 0.7038533964083957, - "grad_norm": 0.21445252001285553, - "learning_rate": 8.049285141233831e-06, - "loss": 0.0528, - "step": 15325 - }, - { - "epoch": 0.705001607495522, - "grad_norm": 0.2052716165781021, - "learning_rate": 7.991514723947589e-06, - "loss": 0.1004, - "step": 15350 - }, - { - "epoch": 0.7061498185826482, - "grad_norm": 6.991987705230713, - "learning_rate": 7.933900560601176e-06, - "loss": 0.0474, - "step": 15375 - }, - { - "epoch": 0.7072980296697745, - "grad_norm": 0.10487792640924454, - "learning_rate": 7.876443400867828e-06, - "loss": 0.111, - "step": 15400 - }, - { - "epoch": 0.7084462407569008, - "grad_norm": 2.286684989929199, - "learning_rate": 7.819143992377848e-06, - "loss": 0.0326, - "step": 15425 - }, - { - "epoch": 0.709594451844027, - "grad_norm": 0.013362064026296139, - "learning_rate": 7.76200308070891e-06, - "loss": 0.0688, - "step": 15450 - }, - { - "epoch": 0.7107426629311533, - "grad_norm": 0.2844740152359009, - "learning_rate": 7.70502140937631e-06, - "loss": 0.045, - "step": 15475 - }, - { - "epoch": 0.7118908740182796, - "grad_norm": 0.06062127277255058, - "learning_rate": 7.648199719823321e-06, - "loss": 0.0972, - "step": 15500 - }, - { - "epoch": 0.7130390851054058, - "grad_norm": 34.67185974121094, - "learning_rate": 7.591538751411536e-06, - "loss": 0.0903, - "step": 15525 - }, - { - "epoch": 0.7141872961925321, - "grad_norm": 0.020777028053998947, - "learning_rate": 7.535039241411266e-06, - "loss": 0.0811, - "step": 15550 - }, - { - "epoch": 0.7153355072796583, - "grad_norm": 0.023871062323451042, - "learning_rate": 7.478701924991918e-06, - "loss": 0.0868, - "step": 15575 - }, - { - "epoch": 0.7164837183667846, - "grad_norm": 0.0139063261449337, - "learning_rate": 7.422527535212443e-06, - "loss": 0.0627, - "step": 15600 - }, - { - "epoch": 0.7176319294539109, - "grad_norm": 0.12506511807441711, - "learning_rate": 7.366516803011798e-06, - "loss": 0.1005, - "step": 15625 - }, - { - "epoch": 0.7187801405410371, - "grad_norm": 0.0652109682559967, - "learning_rate": 7.310670457199434e-06, - "loss": 0.0836, - "step": 15650 - }, - { - "epoch": 0.7199283516281634, - "grad_norm": 0.21352873742580414, - "learning_rate": 7.254989224445823e-06, - "loss": 0.0903, - "step": 15675 - }, - { - "epoch": 0.7210765627152895, - "grad_norm": 0.2726813554763794, - "learning_rate": 7.199473829272985e-06, - "loss": 0.0385, - "step": 15700 - }, - { - "epoch": 0.7222247738024158, - "grad_norm": 0.3017083406448364, - "learning_rate": 7.144124994045054e-06, - "loss": 0.0429, - "step": 15725 - }, - { - "epoch": 0.723372984889542, - "grad_norm": 0.9701851010322571, - "learning_rate": 7.088943438958904e-06, - "loss": 0.0713, - "step": 15750 - }, - { - "epoch": 0.7245211959766683, - "grad_norm": 0.0777645856142044, - "learning_rate": 7.03392988203478e-06, - "loss": 0.0527, - "step": 15775 - }, - { - "epoch": 0.7256694070637946, - "grad_norm": 0.09907884895801544, - "learning_rate": 6.979085039106923e-06, - "loss": 0.0649, - "step": 15800 - }, - { - "epoch": 0.7268176181509208, - "grad_norm": 0.02636418305337429, - "learning_rate": 6.924409623814281e-06, - "loss": 0.0302, - "step": 15825 - }, - { - "epoch": 0.7279658292380471, - "grad_norm": 0.020427890121936798, - "learning_rate": 6.8699043475912145e-06, - "loss": 0.0163, - "step": 15850 - }, - { - "epoch": 0.7291140403251734, - "grad_norm": 15.661312103271484, - "learning_rate": 6.815569919658234e-06, - "loss": 0.0487, - "step": 15875 - }, - { - "epoch": 0.7302622514122996, - "grad_norm": 0.07850444316864014, - "learning_rate": 6.7614070470128e-06, - "loss": 0.0355, - "step": 15900 - }, - { - "epoch": 0.7314104624994259, - "grad_norm": 0.011141127906739712, - "learning_rate": 6.707416434420084e-06, - "loss": 0.0007, - "step": 15925 - }, - { - "epoch": 0.7325586735865521, - "grad_norm": 0.08933714032173157, - "learning_rate": 6.65359878440382e-06, - "loss": 0.1007, - "step": 15950 - }, - { - "epoch": 0.7337068846736784, - "grad_norm": 0.011219929903745651, - "learning_rate": 6.599954797237154e-06, - "loss": 0.0691, - "step": 15975 - }, - { - "epoch": 0.7348550957608047, - "grad_norm": 0.1323518455028534, - "learning_rate": 6.546485170933561e-06, - "loss": 0.0721, - "step": 16000 - }, - { - "epoch": 0.7360033068479309, - "grad_norm": 0.16300442814826965, - "learning_rate": 6.493190601237711e-06, - "loss": 0.1, - "step": 16025 - }, - { - "epoch": 0.7371515179350572, - "grad_norm": 0.08380606770515442, - "learning_rate": 6.440071781616462e-06, - "loss": 0.0276, - "step": 16050 - }, - { - "epoch": 0.7382997290221834, - "grad_norm": 13.4380521774292, - "learning_rate": 6.38712940324981e-06, - "loss": 0.0711, - "step": 16075 - }, - { - "epoch": 0.7394479401093097, - "grad_norm": 0.01914297230541706, - "learning_rate": 6.334364155021901e-06, - "loss": 0.0429, - "step": 16100 - }, - { - "epoch": 0.740596151196436, - "grad_norm": 0.007009518798440695, - "learning_rate": 6.281776723512094e-06, - "loss": 0.1372, - "step": 16125 - }, - { - "epoch": 0.7417443622835622, - "grad_norm": 0.09678731113672256, - "learning_rate": 6.229367792985976e-06, - "loss": 0.1245, - "step": 16150 - }, - { - "epoch": 0.7428925733706885, - "grad_norm": 0.007255534175783396, - "learning_rate": 6.177138045386499e-06, - "loss": 0.0326, - "step": 16175 - }, - { - "epoch": 0.7440407844578147, - "grad_norm": 0.1381562054157257, - "learning_rate": 6.125088160325092e-06, - "loss": 0.0234, - "step": 16200 - }, - { - "epoch": 0.745188995544941, - "grad_norm": 25.754289627075195, - "learning_rate": 6.0732188150728125e-06, - "loss": 0.0781, - "step": 16225 - }, - { - "epoch": 0.7463372066320673, - "grad_norm": 0.053420692682266235, - "learning_rate": 6.021530684551564e-06, - "loss": 0.0976, - "step": 16250 - }, - { - "epoch": 0.7474854177191935, - "grad_norm": 0.01614423282444477, - "learning_rate": 5.970024441325266e-06, - "loss": 0.06, - "step": 16275 - }, - { - "epoch": 0.7486336288063198, - "grad_norm": 0.2804288864135742, - "learning_rate": 5.918700755591138e-06, - "loss": 0.0173, - "step": 16300 - }, - { - "epoch": 0.749781839893446, - "grad_norm": 45.07802200317383, - "learning_rate": 5.867560295170967e-06, - "loss": 0.0699, - "step": 16325 - }, - { - "epoch": 0.7509300509805723, - "grad_norm": 2.6006088256835938, - "learning_rate": 5.816603725502412e-06, - "loss": 0.0218, - "step": 16350 - }, - { - "epoch": 0.7520782620676986, - "grad_norm": 0.07584603875875473, - "learning_rate": 5.7658317096303785e-06, - "loss": 0.0445, - "step": 16375 - }, - { - "epoch": 0.7532264731548248, - "grad_norm": 0.1880454570055008, - "learning_rate": 5.715244908198336e-06, - "loss": 0.0648, - "step": 16400 - }, - { - "epoch": 0.7543746842419511, - "grad_norm": 16.69906234741211, - "learning_rate": 5.664843979439765e-06, - "loss": 0.1212, - "step": 16425 - }, - { - "epoch": 0.7555228953290773, - "grad_norm": 0.2480192333459854, - "learning_rate": 5.614629579169568e-06, - "loss": 0.0396, - "step": 16450 - }, - { - "epoch": 0.7566711064162036, - "grad_norm": 234.59837341308594, - "learning_rate": 5.564602360775566e-06, - "loss": 0.1436, - "step": 16475 - }, - { - "epoch": 0.7578193175033298, - "grad_norm": 0.18311983346939087, - "learning_rate": 5.514762975209964e-06, - "loss": 0.0444, - "step": 16500 - }, - { - "epoch": 0.758967528590456, - "grad_norm": 0.024939807131886482, - "learning_rate": 5.465112070980885e-06, - "loss": 0.0401, - "step": 16525 - }, - { - "epoch": 0.7601157396775823, - "grad_norm": 0.010250646620988846, - "learning_rate": 5.415650294143944e-06, - "loss": 0.0773, - "step": 16550 - }, - { - "epoch": 0.7612639507647085, - "grad_norm": 0.34293168783187866, - "learning_rate": 5.366378288293856e-06, - "loss": 0.0606, - "step": 16575 - }, - { - "epoch": 0.7624121618518348, - "grad_norm": 1.0682902336120605, - "learning_rate": 5.317296694556029e-06, - "loss": 0.1323, - "step": 16600 - }, - { - "epoch": 0.7635603729389611, - "grad_norm": 0.12277599424123764, - "learning_rate": 5.268406151578234e-06, - "loss": 0.001, - "step": 16625 - }, - { - "epoch": 0.7647085840260873, - "grad_norm": 0.6210665702819824, - "learning_rate": 5.219707295522298e-06, - "loss": 0.046, - "step": 16650 - }, - { - "epoch": 0.7658567951132136, - "grad_norm": 19.144742965698242, - "learning_rate": 5.171200760055825e-06, - "loss": 0.0858, - "step": 16675 - }, - { - "epoch": 0.7670050062003398, - "grad_norm": 0.1801862269639969, - "learning_rate": 5.122887176343965e-06, - "loss": 0.0109, - "step": 16700 - }, - { - "epoch": 0.7681532172874661, - "grad_norm": 39.14018630981445, - "learning_rate": 5.074767173041169e-06, - "loss": 0.1078, - "step": 16725 - }, - { - "epoch": 0.7693014283745924, - "grad_norm": 0.11534450948238373, - "learning_rate": 5.0268413762830336e-06, - "loss": 0.0664, - "step": 16750 - }, - { - "epoch": 0.7704496394617186, - "grad_norm": 0.6854662299156189, - "learning_rate": 4.979110409678152e-06, - "loss": 0.0854, - "step": 16775 - }, - { - "epoch": 0.7715978505488449, - "grad_norm": 0.11855833977460861, - "learning_rate": 4.931574894299979e-06, - "loss": 0.0618, - "step": 16800 - }, - { - "epoch": 0.7727460616359711, - "grad_norm": 0.013820292428135872, - "learning_rate": 4.884235448678796e-06, - "loss": 0.0381, - "step": 16825 - }, - { - "epoch": 0.7738942727230974, - "grad_norm": 0.004285596311092377, - "learning_rate": 4.837092688793605e-06, - "loss": 0.0526, - "step": 16850 - }, - { - "epoch": 0.7750424838102237, - "grad_norm": 13.307071685791016, - "learning_rate": 4.7901472280641525e-06, - "loss": 0.0462, - "step": 16875 - }, - { - "epoch": 0.7761906948973499, - "grad_norm": 0.019855046644806862, - "learning_rate": 4.743399677342926e-06, - "loss": 0.0933, - "step": 16900 - }, - { - "epoch": 0.7773389059844762, - "grad_norm": 0.12389481067657471, - "learning_rate": 4.696850644907234e-06, - "loss": 0.0464, - "step": 16925 - }, - { - "epoch": 0.7784871170716025, - "grad_norm": 0.09542281925678253, - "learning_rate": 4.65050073645126e-06, - "loss": 0.0691, - "step": 16950 - }, - { - "epoch": 0.7796353281587287, - "grad_norm": 0.2983253598213196, - "learning_rate": 4.6043505550781945e-06, - "loss": 0.0359, - "step": 16975 - }, - { - "epoch": 0.780783539245855, - "grad_norm": 14.008248329162598, - "learning_rate": 4.558400701292389e-06, - "loss": 0.0641, - "step": 17000 - }, - { - "epoch": 0.7819317503329812, - "grad_norm": 0.024128809571266174, - "learning_rate": 4.512651772991534e-06, - "loss": 0.0224, - "step": 17025 - }, - { - "epoch": 0.7830799614201075, - "grad_norm": 0.02002117410302162, - "learning_rate": 4.467104365458905e-06, - "loss": 0.0216, - "step": 17050 - }, - { - "epoch": 0.7842281725072338, - "grad_norm": 0.004471524618566036, - "learning_rate": 4.421759071355578e-06, - "loss": 0.0698, - "step": 17075 - }, - { - "epoch": 0.78537638359436, - "grad_norm": 14.2806978225708, - "learning_rate": 4.376616480712741e-06, - "loss": 0.0849, - "step": 17100 - }, - { - "epoch": 0.7865245946814863, - "grad_norm": 0.05184144154191017, - "learning_rate": 4.331677180924017e-06, - "loss": 0.1411, - "step": 17125 - }, - { - "epoch": 0.7876728057686125, - "grad_norm": 0.0295930914580822, - "learning_rate": 4.286941756737806e-06, - "loss": 0.0095, - "step": 17150 - }, - { - "epoch": 0.7888210168557388, - "grad_norm": 32.65333938598633, - "learning_rate": 4.242410790249705e-06, - "loss": 0.1065, - "step": 17175 - }, - { - "epoch": 0.7899692279428651, - "grad_norm": 0.07840365171432495, - "learning_rate": 4.198084860894902e-06, - "loss": 0.0555, - "step": 17200 - }, - { - "epoch": 0.7911174390299913, - "grad_norm": 0.10743328183889389, - "learning_rate": 4.153964545440652e-06, - "loss": 0.0224, - "step": 17225 - }, - { - "epoch": 0.7922656501171176, - "grad_norm": 0.038462039083242416, - "learning_rate": 4.11005041797877e-06, - "loss": 0.0474, - "step": 17250 - }, - { - "epoch": 0.7934138612042438, - "grad_norm": 0.08056436479091644, - "learning_rate": 4.066343049918156e-06, - "loss": 0.049, - "step": 17275 - }, - { - "epoch": 0.79456207229137, - "grad_norm": 0.2512977421283722, - "learning_rate": 4.022843009977388e-06, - "loss": 0.0407, - "step": 17300 - }, - { - "epoch": 0.7957102833784963, - "grad_norm": 47.47632598876953, - "learning_rate": 3.979550864177262e-06, - "loss": 0.0447, - "step": 17325 - }, - { - "epoch": 0.7968584944656225, - "grad_norm": 0.1907694935798645, - "learning_rate": 3.936467175833487e-06, - "loss": 0.1266, - "step": 17350 - }, - { - "epoch": 0.7980067055527488, - "grad_norm": 118.2800064086914, - "learning_rate": 3.893592505549335e-06, - "loss": 0.0792, - "step": 17375 - }, - { - "epoch": 0.799154916639875, - "grad_norm": 0.35802847146987915, - "learning_rate": 3.85092741120833e-06, - "loss": 0.0686, - "step": 17400 - }, - { - "epoch": 0.8003031277270013, - "grad_norm": 99.87713623046875, - "learning_rate": 3.808472447967009e-06, - "loss": 0.0879, - "step": 17425 - }, - { - "epoch": 0.8014513388141276, - "grad_norm": 0.14348523318767548, - "learning_rate": 3.76622816824769e-06, - "loss": 0.0545, - "step": 17450 - }, - { - "epoch": 0.8025995499012538, - "grad_norm": 0.20051589608192444, - "learning_rate": 3.7241951217312777e-06, - "loss": 0.0261, - "step": 17475 - }, - { - "epoch": 0.8037477609883801, - "grad_norm": 11.250834465026855, - "learning_rate": 3.6823738553501408e-06, - "loss": 0.0782, - "step": 17500 - }, - { - "epoch": 0.8048959720755063, - "grad_norm": 0.04791349917650223, - "learning_rate": 3.64076491328095e-06, - "loss": 0.0687, - "step": 17525 - }, - { - "epoch": 0.8060441831626326, - "grad_norm": 126.63636016845703, - "learning_rate": 3.599368836937631e-06, - "loss": 0.033, - "step": 17550 - }, - { - "epoch": 0.8071923942497589, - "grad_norm": 0.020389311015605927, - "learning_rate": 3.558186164964306e-06, - "loss": 0.0724, - "step": 17575 - }, - { - "epoch": 0.8083406053368851, - "grad_norm": 0.026229048147797585, - "learning_rate": 3.517217433228284e-06, - "loss": 0.0687, - "step": 17600 - }, - { - "epoch": 0.8094888164240114, - "grad_norm": 0.02965112403035164, - "learning_rate": 3.476463174813105e-06, - "loss": 0.0642, - "step": 17625 - }, - { - "epoch": 0.8106370275111376, - "grad_norm": 0.09377150237560272, - "learning_rate": 3.4359239200115814e-06, - "loss": 0.0336, - "step": 17650 - }, - { - "epoch": 0.8117852385982639, - "grad_norm": 0.8769047260284424, - "learning_rate": 3.3956001963189045e-06, - "loss": 0.0494, - "step": 17675 - }, - { - "epoch": 0.8129334496853902, - "grad_norm": 0.2824127972126007, - "learning_rate": 3.3554925284257877e-06, - "loss": 0.0969, - "step": 17700 - }, - { - "epoch": 0.8140816607725164, - "grad_norm": 7.552093505859375, - "learning_rate": 3.3156014382116308e-06, - "loss": 0.052, - "step": 17725 - }, - { - "epoch": 0.8152298718596427, - "grad_norm": 0.0463847815990448, - "learning_rate": 3.2759274447377452e-06, - "loss": 0.0794, - "step": 17750 - }, - { - "epoch": 0.816378082946769, - "grad_norm": 0.02667197957634926, - "learning_rate": 3.2364710642405717e-06, - "loss": 0.0484, - "step": 17775 - }, - { - "epoch": 0.8175262940338952, - "grad_norm": 0.21280555427074432, - "learning_rate": 3.1972328101249927e-06, - "loss": 0.08, - "step": 17800 - }, - { - "epoch": 0.8186745051210215, - "grad_norm": 0.01187364012002945, - "learning_rate": 3.1582131929576263e-06, - "loss": 0.0336, - "step": 17825 - }, - { - "epoch": 0.8198227162081477, - "grad_norm": 0.022009817883372307, - "learning_rate": 3.119412720460204e-06, - "loss": 0.0624, - "step": 17850 - }, - { - "epoch": 0.820970927295274, - "grad_norm": 72.19200134277344, - "learning_rate": 3.080831897502958e-06, - "loss": 0.1004, - "step": 17875 - }, - { - "epoch": 0.8221191383824002, - "grad_norm": 0.12891149520874023, - "learning_rate": 3.0424712260980425e-06, - "loss": 0.0342, - "step": 17900 - }, - { - "epoch": 0.8232673494695265, - "grad_norm": 0.015281450003385544, - "learning_rate": 3.0043312053930095e-06, - "loss": 0.0711, - "step": 17925 - }, - { - "epoch": 0.8244155605566528, - "grad_norm": 0.06948760896921158, - "learning_rate": 2.96641233166431e-06, - "loss": 0.0971, - "step": 17950 - }, - { - "epoch": 0.825563771643779, - "grad_norm": 0.022813860327005386, - "learning_rate": 2.9287150983108526e-06, - "loss": 0.1079, - "step": 17975 - }, - { - "epoch": 0.8267119827309053, - "grad_norm": 0.027087997645139694, - "learning_rate": 2.8912399958475546e-06, - "loss": 0.0543, - "step": 18000 - }, - { - "epoch": 0.8278601938180316, - "grad_norm": 0.012122283689677715, - "learning_rate": 2.8539875118989813e-06, - "loss": 0.0429, - "step": 18025 - }, - { - "epoch": 0.8290084049051578, - "grad_norm": 0.1680118888616562, - "learning_rate": 2.816958131192993e-06, - "loss": 0.0524, - "step": 18050 - }, - { - "epoch": 0.8301566159922841, - "grad_norm": 0.015649737790226936, - "learning_rate": 2.7801523355544357e-06, - "loss": 0.0395, - "step": 18075 - }, - { - "epoch": 0.8313048270794102, - "grad_norm": 0.02703564055263996, - "learning_rate": 2.743570603898895e-06, - "loss": 0.0796, - "step": 18100 - }, - { - "epoch": 0.8324530381665365, - "grad_norm": 7.242969989776611, - "learning_rate": 2.707213412226417e-06, - "loss": 0.1004, - "step": 18125 - }, - { - "epoch": 0.8336012492536627, - "grad_norm": 0.03490910306572914, - "learning_rate": 2.6710812336153556e-06, - "loss": 0.0751, - "step": 18150 - }, - { - "epoch": 0.834749460340789, - "grad_norm": 0.08122912794351578, - "learning_rate": 2.635174538216203e-06, - "loss": 0.017, - "step": 18175 - }, - { - "epoch": 0.8358976714279153, - "grad_norm": 0.31337007880210876, - "learning_rate": 2.599493793245478e-06, - "loss": 0.044, - "step": 18200 - }, - { - "epoch": 0.8370458825150415, - "grad_norm": 0.022868860512971878, - "learning_rate": 2.564039462979635e-06, - "loss": 0.1036, - "step": 18225 - }, - { - "epoch": 0.8381940936021678, - "grad_norm": 0.1141880452632904, - "learning_rate": 2.5288120087490263e-06, - "loss": 0.0555, - "step": 18250 - }, - { - "epoch": 0.839342304689294, - "grad_norm": 0.021731365472078323, - "learning_rate": 2.4938118889319074e-06, - "loss": 0.0358, - "step": 18275 - }, - { - "epoch": 0.8404905157764203, - "grad_norm": 0.03330765664577484, - "learning_rate": 2.459039558948464e-06, - "loss": 0.0635, - "step": 18300 - }, - { - "epoch": 0.8416387268635466, - "grad_norm": 0.01710633747279644, - "learning_rate": 2.424495471254895e-06, - "loss": 0.1074, - "step": 18325 - }, - { - "epoch": 0.8427869379506728, - "grad_norm": 0.21491554379463196, - "learning_rate": 2.3901800753375137e-06, - "loss": 0.0803, - "step": 18350 - }, - { - "epoch": 0.8439351490377991, - "grad_norm": 0.03334735333919525, - "learning_rate": 2.356093817706908e-06, - "loss": 0.1059, - "step": 18375 - }, - { - "epoch": 0.8450833601249254, - "grad_norm": 10.869914054870605, - "learning_rate": 2.322237141892123e-06, - "loss": 0.0149, - "step": 18400 - }, - { - "epoch": 0.8462315712120516, - "grad_norm": 0.2710411250591278, - "learning_rate": 2.2886104884349035e-06, - "loss": 0.0454, - "step": 18425 - }, - { - "epoch": 0.8473797822991779, - "grad_norm": 0.3168097734451294, - "learning_rate": 2.255214294883943e-06, - "loss": 0.0519, - "step": 18450 - }, - { - "epoch": 0.8485279933863041, - "grad_norm": 0.020407404750585556, - "learning_rate": 2.2220489957892035e-06, - "loss": 0.0243, - "step": 18475 - }, - { - "epoch": 0.8496762044734304, - "grad_norm": 0.03963831812143326, - "learning_rate": 2.1891150226962577e-06, - "loss": 0.0435, - "step": 18500 - }, - { - "epoch": 0.8508244155605567, - "grad_norm": 0.031030111014842987, - "learning_rate": 2.1564128041406685e-06, - "loss": 0.0417, - "step": 18525 - }, - { - "epoch": 0.8519726266476829, - "grad_norm": 7.520127773284912, - "learning_rate": 2.1239427656424306e-06, - "loss": 0.0576, - "step": 18550 - }, - { - "epoch": 0.8531208377348092, - "grad_norm": 0.009268928319215775, - "learning_rate": 2.0917053297004086e-06, - "loss": 0.0296, - "step": 18575 - }, - { - "epoch": 0.8542690488219354, - "grad_norm": 15.918971061706543, - "learning_rate": 2.059700915786853e-06, - "loss": 0.0514, - "step": 18600 - }, - { - "epoch": 0.8554172599090617, - "grad_norm": 0.021697254851460457, - "learning_rate": 2.0279299403419483e-06, - "loss": 0.0315, - "step": 18625 - }, - { - "epoch": 0.856565470996188, - "grad_norm": 0.029788566753268242, - "learning_rate": 1.9963928167683756e-06, - "loss": 0.002, - "step": 18650 - }, - { - "epoch": 0.8577136820833142, - "grad_norm": 0.03993965685367584, - "learning_rate": 1.965089955425956e-06, - "loss": 0.0918, - "step": 18675 - }, - { - "epoch": 0.8588618931704405, - "grad_norm": 0.01322225946933031, - "learning_rate": 1.9340217636262948e-06, - "loss": 0.0726, - "step": 18700 - }, - { - "epoch": 0.8600101042575667, - "grad_norm": 8.532318115234375, - "learning_rate": 1.903188645627485e-06, - "loss": 0.121, - "step": 18725 - }, - { - "epoch": 0.861158315344693, - "grad_norm": 0.8324846625328064, - "learning_rate": 1.8725910026288496e-06, - "loss": 0.0393, - "step": 18750 - }, - { - "epoch": 0.8623065264318193, - "grad_norm": 7.0772576332092285, - "learning_rate": 1.8422292327657153e-06, - "loss": 0.0515, - "step": 18775 - }, - { - "epoch": 0.8634547375189455, - "grad_norm": 0.027621662244200706, - "learning_rate": 1.8121037311042512e-06, - "loss": 0.0582, - "step": 18800 - }, - { - "epoch": 0.8646029486060718, - "grad_norm": 0.23945468664169312, - "learning_rate": 1.7822148896363e-06, - "loss": 0.0701, - "step": 18825 - }, - { - "epoch": 0.865751159693198, - "grad_norm": 46.322044372558594, - "learning_rate": 1.7525630972742958e-06, - "loss": 0.0458, - "step": 18850 - }, - { - "epoch": 0.8668993707803243, - "grad_norm": 0.22386154532432556, - "learning_rate": 1.7231487398461943e-06, - "loss": 0.0535, - "step": 18875 - }, - { - "epoch": 0.8680475818674505, - "grad_norm": 0.15120260417461395, - "learning_rate": 1.6939722000904724e-06, - "loss": 0.0679, - "step": 18900 - }, - { - "epoch": 0.8691957929545767, - "grad_norm": 0.014234176836907864, - "learning_rate": 1.6650338576511128e-06, - "loss": 0.0751, - "step": 18925 - }, - { - "epoch": 0.870344004041703, - "grad_norm": 0.3317568600177765, - "learning_rate": 1.6363340890726953e-06, - "loss": 0.0044, - "step": 18950 - }, - { - "epoch": 0.8714922151288292, - "grad_norm": 0.04376505687832832, - "learning_rate": 1.607873267795481e-06, - "loss": 0.0367, - "step": 18975 - }, - { - "epoch": 0.8726404262159555, - "grad_norm": 150.4222869873047, - "learning_rate": 1.5796517641505692e-06, - "loss": 0.1018, - "step": 19000 - }, - { - "epoch": 0.8737886373030818, - "grad_norm": 0.057014692574739456, - "learning_rate": 1.5516699453550542e-06, - "loss": 0.0369, - "step": 19025 - }, - { - "epoch": 0.874936848390208, - "grad_norm": 15.234986305236816, - "learning_rate": 1.5239281755072655e-06, - "loss": 0.0832, - "step": 19050 - }, - { - "epoch": 0.8760850594773343, - "grad_norm": 15.890921592712402, - "learning_rate": 1.4964268155820261e-06, - "loss": 0.0393, - "step": 19075 - }, - { - "epoch": 0.8772332705644605, - "grad_norm": 0.3105641305446625, - "learning_rate": 1.4691662234259507e-06, - "loss": 0.0443, - "step": 19100 - }, - { - "epoch": 0.8783814816515868, - "grad_norm": 0.4761696457862854, - "learning_rate": 1.4421467537528022e-06, - "loss": 0.0697, - "step": 19125 - }, - { - "epoch": 0.8795296927387131, - "grad_norm": 0.06688586622476578, - "learning_rate": 1.4153687581388598e-06, - "loss": 0.0267, - "step": 19150 - }, - { - "epoch": 0.8806779038258393, - "grad_norm": 0.0743216946721077, - "learning_rate": 1.3888325850183494e-06, - "loss": 0.0311, - "step": 19175 - }, - { - "epoch": 0.8818261149129656, - "grad_norm": 26.711551666259766, - "learning_rate": 1.362538579678918e-06, - "loss": 0.0641, - "step": 19200 - }, - { - "epoch": 0.8829743260000918, - "grad_norm": 0.05731752887368202, - "learning_rate": 1.3364870842571298e-06, - "loss": 0.0191, - "step": 19225 - }, - { - "epoch": 0.8841225370872181, - "grad_norm": 0.12287302315235138, - "learning_rate": 1.3106784377340276e-06, - "loss": 0.0728, - "step": 19250 - }, - { - "epoch": 0.8852707481743444, - "grad_norm": 0.14047569036483765, - "learning_rate": 1.2851129759307047e-06, - "loss": 0.062, - "step": 19275 - }, - { - "epoch": 0.8864189592614706, - "grad_norm": 0.01031449530273676, - "learning_rate": 1.2597910315039496e-06, - "loss": 0.0776, - "step": 19300 - }, - { - "epoch": 0.8875671703485969, - "grad_norm": 0.013590280897915363, - "learning_rate": 1.2347129339419083e-06, - "loss": 0.0013, - "step": 19325 - }, - { - "epoch": 0.8887153814357232, - "grad_norm": 0.09587027877569199, - "learning_rate": 1.209879009559809e-06, - "loss": 0.0635, - "step": 19350 - }, - { - "epoch": 0.8898635925228494, - "grad_norm": 8.050373077392578, - "learning_rate": 1.1852895814956988e-06, - "loss": 0.0532, - "step": 19375 - }, - { - "epoch": 0.8910118036099757, - "grad_norm": 29.888036727905273, - "learning_rate": 1.1609449697062547e-06, - "loss": 0.1204, - "step": 19400 - }, - { - "epoch": 0.8921600146971019, - "grad_norm": 0.007251910865306854, - "learning_rate": 1.1368454909626058e-06, - "loss": 0.0317, - "step": 19425 - }, - { - "epoch": 0.8933082257842282, - "grad_norm": 0.022124579176306725, - "learning_rate": 1.1129914588462288e-06, - "loss": 0.0285, - "step": 19450 - }, - { - "epoch": 0.8944564368713545, - "grad_norm": 0.20724959671497345, - "learning_rate": 1.0893831837448566e-06, - "loss": 0.0535, - "step": 19475 - }, - { - "epoch": 0.8956046479584807, - "grad_norm": 0.03264782205224037, - "learning_rate": 1.0660209728484383e-06, - "loss": 0.0531, - "step": 19500 - }, - { - "epoch": 0.896752859045607, - "grad_norm": 0.30090564489364624, - "learning_rate": 1.0429051301451444e-06, - "loss": 0.0339, - "step": 19525 - }, - { - "epoch": 0.8979010701327332, - "grad_norm": 0.01747279241681099, - "learning_rate": 1.0200359564174157e-06, - "loss": 0.0655, - "step": 19550 - }, - { - "epoch": 0.8990492812198595, - "grad_norm": 0.43753546476364136, - "learning_rate": 9.974137492380431e-07, - "loss": 0.0686, - "step": 19575 - }, - { - "epoch": 0.9001974923069858, - "grad_norm": 0.16258420050144196, - "learning_rate": 9.750388029663061e-07, - "loss": 0.078, - "step": 19600 - }, - { - "epoch": 0.901345703394112, - "grad_norm": 0.1479305922985077, - "learning_rate": 9.529114087441216e-07, - "loss": 0.018, - "step": 19625 - }, - { - "epoch": 0.9024939144812383, - "grad_norm": 0.008134293369948864, - "learning_rate": 9.310318544922791e-07, - "loss": 0.0675, - "step": 19650 - }, - { - "epoch": 0.9036421255683645, - "grad_norm": 0.42792314291000366, - "learning_rate": 9.094004249066812e-07, - "loss": 0.0833, - "step": 19675 - }, - { - "epoch": 0.9047903366554907, - "grad_norm": 21.225181579589844, - "learning_rate": 8.880174014546417e-07, - "loss": 0.0778, - "step": 19700 - }, - { - "epoch": 0.905938547742617, - "grad_norm": 1.8646355867385864, - "learning_rate": 8.668830623712243e-07, - "loss": 0.05, - "step": 19725 - }, - { - "epoch": 0.9070867588297432, - "grad_norm": 0.005776954814791679, - "learning_rate": 8.459976826556194e-07, - "loss": 0.0939, - "step": 19750 - }, - { - "epoch": 0.9082349699168695, - "grad_norm": 0.02450496144592762, - "learning_rate": 8.253615340675658e-07, - "loss": 0.0309, - "step": 19775 - }, - { - "epoch": 0.9093831810039957, - "grad_norm": 0.02992299385368824, - "learning_rate": 8.049748851238304e-07, - "loss": 0.035, - "step": 19800 - }, - { - "epoch": 0.910531392091122, - "grad_norm": 108.27339172363281, - "learning_rate": 7.848380010946832e-07, - "loss": 0.0834, - "step": 19825 - }, - { - "epoch": 0.9116796031782483, - "grad_norm": 0.3087676167488098, - "learning_rate": 7.64951144000472e-07, - "loss": 0.1033, - "step": 19850 - }, - { - "epoch": 0.9128278142653745, - "grad_norm": 15.906920433044434, - "learning_rate": 7.453145726082023e-07, - "loss": 0.0817, - "step": 19875 - }, - { - "epoch": 0.9139760253525008, - "grad_norm": 0.0587725006043911, - "learning_rate": 7.25928542428167e-07, - "loss": 0.0329, - "step": 19900 - }, - { - "epoch": 0.915124236439627, - "grad_norm": 0.01723266765475273, - "learning_rate": 7.06793305710638e-07, - "loss": 0.0183, - "step": 19925 - }, - { - "epoch": 0.9162724475267533, - "grad_norm": 0.01341981254518032, - "learning_rate": 6.87909111442564e-07, - "loss": 0.0285, - "step": 19950 - }, - { - "epoch": 0.9174206586138796, - "grad_norm": 0.079460009932518, - "learning_rate": 6.69276205344338e-07, - "loss": 0.0697, - "step": 19975 - }, - { - "epoch": 0.9185688697010058, - "grad_norm": 0.03450249508023262, - "learning_rate": 6.50894829866604e-07, - "loss": 0.0491, - "step": 20000 - }, - { - "epoch": 0.9197170807881321, - "grad_norm": 0.05472569912672043, - "learning_rate": 6.327652241870996e-07, - "loss": 0.0937, - "step": 20025 - }, - { - "epoch": 0.9208652918752583, - "grad_norm": 0.27753645181655884, - "learning_rate": 6.148876242075475e-07, - "loss": 0.0456, - "step": 20050 - }, - { - "epoch": 0.9220135029623846, - "grad_norm": 0.0179009847342968, - "learning_rate": 5.972622625505753e-07, - "loss": 0.0364, - "step": 20075 - }, - { - "epoch": 0.9231617140495109, - "grad_norm": 0.07981289178133011, - "learning_rate": 5.798893685566964e-07, - "loss": 0.061, - "step": 20100 - }, - { - "epoch": 0.9243099251366371, - "grad_norm": 0.020030954852700233, - "learning_rate": 5.627691682813296e-07, - "loss": 0.0784, - "step": 20125 - }, - { - "epoch": 0.9254581362237634, - "grad_norm": 0.01465025544166565, - "learning_rate": 5.459018844918462e-07, - "loss": 0.0563, - "step": 20150 - }, - { - "epoch": 0.9266063473108896, - "grad_norm": 0.14181044697761536, - "learning_rate": 5.292877366646853e-07, - "loss": 0.0499, - "step": 20175 - }, - { - "epoch": 0.9277545583980159, - "grad_norm": 0.02160894311964512, - "learning_rate": 5.129269409824877e-07, - "loss": 0.0837, - "step": 20200 - }, - { - "epoch": 0.9289027694851422, - "grad_norm": 0.0065475874580442905, - "learning_rate": 4.968197103312844e-07, - "loss": 0.043, - "step": 20225 - }, - { - "epoch": 0.9300509805722684, - "grad_norm": 0.27221518754959106, - "learning_rate": 4.809662542977279e-07, - "loss": 0.0252, - "step": 20250 - }, - { - "epoch": 0.9311991916593947, - "grad_norm": 0.10816125571727753, - "learning_rate": 4.653667791663696e-07, - "loss": 0.0313, - "step": 20275 - }, - { - "epoch": 0.932347402746521, - "grad_norm": 0.011103514581918716, - "learning_rate": 4.500214879169651e-07, - "loss": 0.0285, - "step": 20300 - }, - { - "epoch": 0.9334956138336472, - "grad_norm": 0.12883403897285461, - "learning_rate": 4.3493058022184844e-07, - "loss": 0.0435, - "step": 20325 - }, - { - "epoch": 0.9346438249207735, - "grad_norm": 0.6828334927558899, - "learning_rate": 4.2009425244331493e-07, - "loss": 0.0606, - "step": 20350 - }, - { - "epoch": 0.9357920360078997, - "grad_norm": 0.06785145401954651, - "learning_rate": 4.055126976310786e-07, - "loss": 0.0012, - "step": 20375 - }, - { - "epoch": 0.936940247095026, - "grad_norm": 0.00857201311737299, - "learning_rate": 3.91186105519763e-07, - "loss": 0.0316, - "step": 20400 - }, - { - "epoch": 0.9380884581821523, - "grad_norm": 0.025530261918902397, - "learning_rate": 3.7711466252642324e-07, - "loss": 0.1559, - "step": 20425 - }, - { - "epoch": 0.9392366692692785, - "grad_norm": 0.021817797794938087, - "learning_rate": 3.632985517481213e-07, - "loss": 0.0298, - "step": 20450 - }, - { - "epoch": 0.9403848803564048, - "grad_norm": 0.006989351473748684, - "learning_rate": 3.4973795295955237e-07, - "loss": 0.0314, - "step": 20475 - }, - { - "epoch": 0.941533091443531, - "grad_norm": 0.01993320696055889, - "learning_rate": 3.364330426106932e-07, - "loss": 0.0323, - "step": 20500 - }, - { - "epoch": 0.9426813025306572, - "grad_norm": 7.905085563659668, - "learning_rate": 3.2338399382451977e-07, - "loss": 0.0802, - "step": 20525 - }, - { - "epoch": 0.9438295136177834, - "grad_norm": 0.06555715948343277, - "learning_rate": 3.1059097639474014e-07, - "loss": 0.0775, - "step": 20550 - }, - { - "epoch": 0.9449777247049097, - "grad_norm": 14.530689239501953, - "learning_rate": 2.9805415678359816e-07, - "loss": 0.0629, - "step": 20575 - }, - { - "epoch": 0.946125935792036, - "grad_norm": 0.01627318374812603, - "learning_rate": 2.8577369811969345e-07, - "loss": 0.053, - "step": 20600 - }, - { - "epoch": 0.9472741468791622, - "grad_norm": 0.0996640995144844, - "learning_rate": 2.737497601958827e-07, - "loss": 0.0729, - "step": 20625 - }, - { - "epoch": 0.9484223579662885, - "grad_norm": 0.1124410629272461, - "learning_rate": 2.6198249946716824e-07, - "loss": 0.0693, - "step": 20650 - }, - { - "epoch": 0.9495705690534147, - "grad_norm": 0.039213843643665314, - "learning_rate": 2.5047206904868616e-07, - "loss": 0.0742, - "step": 20675 - }, - { - "epoch": 0.950718780140541, - "grad_norm": 0.014431199990212917, - "learning_rate": 2.392186187137058e-07, - "loss": 0.0113, - "step": 20700 - }, - { - "epoch": 0.9518669912276673, - "grad_norm": 0.02886173129081726, - "learning_rate": 2.282222948916868e-07, - "loss": 0.0144, - "step": 20725 - }, - { - "epoch": 0.9530152023147935, - "grad_norm": 0.0110493628308177, - "learning_rate": 2.174832406663585e-07, - "loss": 0.046, - "step": 20750 - }, - { - "epoch": 0.9541634134019198, - "grad_norm": 0.015410786494612694, - "learning_rate": 2.070015957738747e-07, - "loss": 0.0199, - "step": 20775 - }, - { - "epoch": 0.955311624489046, - "grad_norm": 0.06066295877099037, - "learning_rate": 1.9677749660098831e-07, - "loss": 0.068, - "step": 20800 - }, - { - "epoch": 0.9564598355761723, - "grad_norm": 0.8917893767356873, - "learning_rate": 1.8681107618327755e-07, - "loss": 0.062, - "step": 20825 - }, - { - "epoch": 0.9576080466632986, - "grad_norm": 0.018326930701732635, - "learning_rate": 1.7710246420341582e-07, - "loss": 0.0741, - "step": 20850 - }, - { - "epoch": 0.9587562577504248, - "grad_norm": 0.0710015818476677, - "learning_rate": 1.676517869894778e-07, - "loss": 0.081, - "step": 20875 - }, - { - "epoch": 0.9599044688375511, - "grad_norm": 0.04719538986682892, - "learning_rate": 1.584591675133096e-07, - "loss": 0.029, - "step": 20900 - }, - { - "epoch": 0.9610526799246774, - "grad_norm": 0.012458267621695995, - "learning_rate": 1.4952472538891205e-07, - "loss": 0.0572, - "step": 20925 - }, - { - "epoch": 0.9622008910118036, - "grad_norm": 0.11031362414360046, - "learning_rate": 1.408485768708956e-07, - "loss": 0.0264, - "step": 20950 - }, - { - "epoch": 0.9633491020989299, - "grad_norm": 0.36117517948150635, - "learning_rate": 1.3243083485296793e-07, - "loss": 0.0468, - "step": 20975 - }, - { - "epoch": 0.9644973131860561, - "grad_norm": 0.02414921671152115, - "learning_rate": 1.242716088664575e-07, - "loss": 0.0524, - "step": 21000 - }, - { - "epoch": 0.9656455242731824, - "grad_norm": 0.15573085844516754, - "learning_rate": 1.1637100507889243e-07, - "loss": 0.0261, - "step": 21025 - }, - { - "epoch": 0.9667937353603087, - "grad_norm": 17.496049880981445, - "learning_rate": 1.0872912629261934e-07, - "loss": 0.1225, - "step": 21050 - }, - { - "epoch": 0.9679419464474349, - "grad_norm": 0.3148431181907654, - "learning_rate": 1.0134607194346446e-07, - "loss": 0.1067, - "step": 21075 - }, - { - "epoch": 0.9690901575345612, - "grad_norm": 0.05796843022108078, - "learning_rate": 9.422193809944358e-08, - "loss": 0.0683, - "step": 21100 - }, - { - "epoch": 0.9702383686216874, - "grad_norm": 0.009837535209953785, - "learning_rate": 8.735681745950741e-08, - "loss": 0.0158, - "step": 21125 - }, - { - "epoch": 0.9713865797088137, - "grad_norm": 0.031075546517968178, - "learning_rate": 8.075079935233599e-08, - "loss": 0.0333, - "step": 21150 - }, - { - "epoch": 0.97253479079594, - "grad_norm": 0.11271132528781891, - "learning_rate": 7.440396973517727e-08, - "loss": 0.0506, - "step": 21175 - }, - { - "epoch": 0.9736830018830662, - "grad_norm": 0.03392348811030388, - "learning_rate": 6.831641119272814e-08, - "loss": 0.0162, - "step": 21200 - }, - { - "epoch": 0.9748312129701925, - "grad_norm": 0.02314956858754158, - "learning_rate": 6.248820293606628e-08, - "loss": 0.0234, - "step": 21225 - }, - { - "epoch": 0.9759794240573187, - "grad_norm": 0.08869185298681259, - "learning_rate": 5.691942080160662e-08, - "loss": 0.0305, - "step": 21250 - }, - { - "epoch": 0.977127635144445, - "grad_norm": 0.08665268123149872, - "learning_rate": 5.16101372501221e-08, - "loss": 0.0254, - "step": 21275 - }, - { - "epoch": 0.9782758462315713, - "grad_norm": 0.1377086043357849, - "learning_rate": 4.6560421365804406e-08, - "loss": 0.0301, - "step": 21300 - }, - { - "epoch": 0.9794240573186974, - "grad_norm": 112.7161865234375, - "learning_rate": 4.1770338855360305e-08, - "loss": 0.0663, - "step": 21325 - }, - { - "epoch": 0.9805722684058237, - "grad_norm": 0.02316017635166645, - "learning_rate": 3.723995204715225e-08, - "loss": 0.0475, - "step": 21350 - }, - { - "epoch": 0.9817204794929499, - "grad_norm": 0.011443709954619408, - "learning_rate": 3.296931989039909e-08, - "loss": 0.0341, - "step": 21375 - }, - { - "epoch": 0.9828686905800762, - "grad_norm": 0.22276480495929718, - "learning_rate": 2.8958497954396646e-08, - "loss": 0.1059, - "step": 21400 - }, - { - "epoch": 0.9840169016672025, - "grad_norm": 0.2308289110660553, - "learning_rate": 2.520753842780277e-08, - "loss": 0.0883, - "step": 21425 - }, - { - "epoch": 0.9851651127543287, - "grad_norm": 0.011477758176624775, - "learning_rate": 2.1716490117957846e-08, - "loss": 0.062, - "step": 21450 - }, - { - "epoch": 0.986313323841455, - "grad_norm": 0.005774139892309904, - "learning_rate": 1.8485398450243107e-08, - "loss": 0.0689, - "step": 21475 - }, - { - "epoch": 0.9874615349285812, - "grad_norm": 0.10775581747293472, - "learning_rate": 1.551430546749888e-08, - "loss": 0.0417, - "step": 21500 - }, - { - "epoch": 0.9886097460157075, - "grad_norm": 0.013670174404978752, - "learning_rate": 1.2803249829471675e-08, - "loss": 0.0569, - "step": 21525 - }, - { - "epoch": 0.9897579571028338, - "grad_norm": 0.2979724705219269, - "learning_rate": 1.0352266812307943e-08, - "loss": 0.0224, - "step": 21550 - }, - { - "epoch": 0.99090616818996, - "grad_norm": 0.1788400113582611, - "learning_rate": 8.161388308103314e-09, - "loss": 0.0926, - "step": 21575 - }, - { - "epoch": 0.9920543792770863, - "grad_norm": 0.06912702322006226, - "learning_rate": 6.230642824485156e-09, - "loss": 0.0808, - "step": 21600 - }, - { - "epoch": 0.9932025903642125, - "grad_norm": 0.05865752696990967, - "learning_rate": 4.560055484235104e-09, - "loss": 0.0343, - "step": 21625 - }, - { - "epoch": 0.9943508014513388, - "grad_norm": 0.08185602724552155, - "learning_rate": 3.1496480249737506e-09, - "loss": 0.0328, - "step": 21650 - }, - { - "epoch": 0.9954990125384651, - "grad_norm": 0.564564049243927, - "learning_rate": 1.999438798863107e-09, - "loss": 0.0388, - "step": 21675 - }, - { - "epoch": 0.9966472236255913, - "grad_norm": 0.5557194948196411, - "learning_rate": 1.1094427723845613e-09, - "loss": 0.0493, - "step": 21700 - }, - { - "epoch": 0.9977954347127176, - "grad_norm": 0.0258924663066864, - "learning_rate": 4.796715261323748e-10, - "loss": 0.0509, - "step": 21725 - }, - { - "epoch": 0.9989436457998438, - "grad_norm": 0.021062856540083885, - "learning_rate": 1.1013325466047164e-10, - "loss": 0.0594, - "step": 21750 - }, - { - "epoch": 1.0, - "eval_accuracy": 0.9813904967021133, - "eval_auc": 0.9926618557097038, - "eval_f1": 0.98761561373256, - "eval_loss": 0.0981861874461174, - "eval_precision": 0.982620320855615, - "eval_recall": 0.9926619547112051, - "eval_runtime": 4593.0656, - "eval_samples_per_second": 6.47, - "eval_steps_per_second": 0.27, - "step": 21773 - } - ], - "logging_steps": 25, - "max_steps": 21773, - "num_input_tokens_seen": 0, - "num_train_epochs": 1, - "save_steps": 500, - "stateful_callbacks": { - "EarlyStoppingCallback": { - "args": { - "early_stopping_patience": 5, - "early_stopping_threshold": 0.01 - }, - "attributes": { - "early_stopping_patience_counter": 0 - } - }, - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 6.0162864745331294e+19, - "train_batch_size": 12, - "trial_name": null, - "trial_params": null -}