diff --git "a/robin-7b/trainer_state.json" "b/robin-7b/trainer_state.json" --- "a/robin-7b/trainer_state.json" +++ "b/robin-7b/trainer_state.json" @@ -1,1717 +1,13375 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 9.9867197875166, - "global_step": 5640, + "epoch": 5.0, + "global_step": 44500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.04, - "learning_rate": 9.411764705882353e-05, - "loss": 1.501, + "epoch": 0.0, + "learning_rate": 1.4981273408239701e-06, + "loss": 1.6359, "step": 20 }, { - "epoch": 0.07, - "learning_rate": 0.00018823529411764707, - "loss": 1.4128, + "epoch": 0.0, + "learning_rate": 2.9962546816479402e-06, + "loss": 1.6528, "step": 40 }, { - "epoch": 0.11, - "learning_rate": 0.0002823529411764706, - "loss": 1.325, + "epoch": 0.01, + "learning_rate": 4.49438202247191e-06, + "loss": 1.6445, "step": 60 }, { - "epoch": 0.14, - "learning_rate": 0.00037647058823529414, - "loss": 1.2828, + "epoch": 0.01, + "learning_rate": 5.9925093632958805e-06, + "loss": 1.6479, "step": 80 }, { - "epoch": 0.18, - "learning_rate": 0.00047058823529411766, - "loss": 1.2758, + "epoch": 0.01, + "learning_rate": 7.490636704119851e-06, + "loss": 1.608, "step": 100 }, { - "epoch": 0.21, - "learning_rate": 0.0005647058823529412, - "loss": 1.2667, + "epoch": 0.01, + "learning_rate": 8.98876404494382e-06, + "loss": 1.6177, "step": 120 }, { - "epoch": 0.25, - "learning_rate": 0.0006588235294117648, - "loss": 1.2504, + "epoch": 0.02, + "learning_rate": 1.0486891385767791e-05, + "loss": 1.5548, "step": 140 }, { - "epoch": 0.28, - "learning_rate": 0.0007529411764705883, - "loss": 1.2394, + "epoch": 0.02, + "learning_rate": 1.1985018726591761e-05, + "loss": 1.527, "step": 160 }, { - "epoch": 0.32, - "learning_rate": 0.0007999934028874321, - "loss": 1.228, + "epoch": 0.02, + "learning_rate": 1.348314606741573e-05, + "loss": 1.4841, "step": 180 }, { - "epoch": 0.35, - "learning_rate": 0.0007999406272925394, - "loss": 1.2138, + "epoch": 0.02, + "learning_rate": 1.4981273408239702e-05, + "loss": 1.4651, "step": 200 }, { - "epoch": 0.39, - "learning_rate": 0.0007998350830660272, - "loss": 1.2122, + "epoch": 0.02, + "learning_rate": 1.647940074906367e-05, + "loss": 1.4259, "step": 220 }, { - "epoch": 0.42, - "learning_rate": 0.0007996767841335234, - "loss": 1.219, + "epoch": 0.03, + "learning_rate": 1.797752808988764e-05, + "loss": 1.4087, "step": 240 }, { - "epoch": 0.46, - "learning_rate": 0.0007994657513811737, - "loss": 1.1998, + "epoch": 0.03, + "learning_rate": 1.9475655430711613e-05, + "loss": 1.4239, "step": 260 }, { - "epoch": 0.5, - "learning_rate": 0.0007992020126528848, - "loss": 1.188, + "epoch": 0.03, + "learning_rate": 2.0973782771535582e-05, + "loss": 1.4119, "step": 280 }, { - "epoch": 0.53, - "learning_rate": 0.0007988856027466511, - "loss": 1.1931, + "epoch": 0.03, + "learning_rate": 2.2471910112359552e-05, + "loss": 1.3844, "step": 300 }, { - "epoch": 0.57, - "learning_rate": 0.000798516563409964, - "loss": 1.1753, + "epoch": 0.04, + "learning_rate": 2.3970037453183522e-05, + "loss": 1.3582, "step": 320 }, { - "epoch": 0.6, - "learning_rate": 0.0007980949433343026, - "loss": 1.173, + "epoch": 0.04, + "learning_rate": 2.546816479400749e-05, + "loss": 1.3717, "step": 340 }, { - "epoch": 0.64, - "learning_rate": 0.0007976207981487104, - "loss": 1.1637, + "epoch": 0.04, + "learning_rate": 2.696629213483146e-05, + "loss": 1.3852, "step": 360 }, { - "epoch": 0.67, - "learning_rate": 0.0007970941904124546, - "loss": 1.1651, + "epoch": 0.04, + "learning_rate": 2.846441947565543e-05, + "loss": 1.3896, "step": 380 }, { - "epoch": 0.71, - "learning_rate": 0.0007965151896067728, - "loss": 1.1559, + "epoch": 0.04, + "learning_rate": 2.9962546816479404e-05, + "loss": 1.3512, "step": 400 }, { - "epoch": 0.74, - "learning_rate": 0.0007958838721257046, - "loss": 1.1658, + "epoch": 0.05, + "learning_rate": 3.1460674157303374e-05, + "loss": 1.3857, "step": 420 }, { - "epoch": 0.78, - "learning_rate": 0.0007952003212660127, - "loss": 1.1496, + "epoch": 0.05, + "learning_rate": 3.295880149812734e-05, + "loss": 1.3396, "step": 440 }, { - "epoch": 0.81, - "learning_rate": 0.0007944646272161933, - "loss": 1.1471, + "epoch": 0.05, + "learning_rate": 3.445692883895131e-05, + "loss": 1.3324, "step": 460 }, { - "epoch": 0.85, - "learning_rate": 0.0007936768870445747, - "loss": 1.1311, + "epoch": 0.05, + "learning_rate": 3.595505617977528e-05, + "loss": 1.3522, "step": 480 }, { - "epoch": 0.89, - "learning_rate": 0.0007928372046865116, - "loss": 1.1301, + "epoch": 0.06, + "learning_rate": 3.745318352059925e-05, + "loss": 1.3429, "step": 500 }, { - "epoch": 0.92, - "learning_rate": 0.0007919456909306711, - "loss": 1.134, + "epoch": 0.06, + "learning_rate": 3.8951310861423226e-05, + "loss": 1.3368, "step": 520 }, { - "epoch": 0.96, - "learning_rate": 0.0007910024634044154, - "loss": 1.1235, + "epoch": 0.06, + "learning_rate": 4.044943820224719e-05, + "loss": 1.3165, "step": 540 }, { - "epoch": 0.99, - "learning_rate": 0.0007900076465582816, - "loss": 1.1239, + "epoch": 0.06, + "learning_rate": 4.1947565543071165e-05, + "loss": 1.3356, "step": 560 }, { - "epoch": 1.03, - "learning_rate": 0.0007889613716495616, - "loss": 1.0878, + "epoch": 0.07, + "learning_rate": 4.344569288389513e-05, + "loss": 1.2966, "step": 580 }, { - "epoch": 1.06, - "learning_rate": 0.0007878637767249839, - "loss": 1.0879, + "epoch": 0.07, + "learning_rate": 4.4943820224719104e-05, + "loss": 1.3255, "step": 600 }, { - "epoch": 1.1, - "learning_rate": 0.0007867150066024996, - "loss": 1.0671, + "epoch": 0.07, + "learning_rate": 4.644194756554308e-05, + "loss": 1.2908, "step": 620 }, { - "epoch": 1.13, - "learning_rate": 0.0007855152128521754, - "loss": 1.0689, + "epoch": 0.07, + "learning_rate": 4.7940074906367044e-05, + "loss": 1.2983, "step": 640 }, { - "epoch": 1.17, - "learning_rate": 0.0007842645537761941, - "loss": 1.0794, + "epoch": 0.07, + "learning_rate": 4.943820224719101e-05, + "loss": 1.2979, "step": 660 }, { - "epoch": 1.2, - "learning_rate": 0.0007829631943879694, - "loss": 1.0653, + "epoch": 0.08, + "learning_rate": 5.093632958801498e-05, + "loss": 1.3171, "step": 680 }, { - "epoch": 1.24, - "learning_rate": 0.0007816113063903726, - "loss": 1.066, + "epoch": 0.08, + "learning_rate": 5.243445692883895e-05, + "loss": 1.3026, "step": 700 }, { - "epoch": 1.27, - "learning_rate": 0.0007802090681530788, - "loss": 1.0675, + "epoch": 0.08, + "learning_rate": 5.393258426966292e-05, + "loss": 1.3105, "step": 720 }, { - "epoch": 1.31, - "learning_rate": 0.0007787566646890325, - "loss": 1.0598, + "epoch": 0.08, + "learning_rate": 5.5430711610486895e-05, + "loss": 1.3146, "step": 740 }, { - "epoch": 1.35, - "learning_rate": 0.0007772542876300359, - "loss": 1.0669, + "epoch": 0.09, + "learning_rate": 5.692883895131086e-05, + "loss": 1.2834, "step": 760 }, { - "epoch": 1.38, - "learning_rate": 0.0007757021352014663, - "loss": 1.0558, + "epoch": 0.09, + "learning_rate": 5.8426966292134835e-05, + "loss": 1.2993, "step": 780 }, { - "epoch": 1.42, - "learning_rate": 0.0007741004121961207, - "loss": 1.0578, + "epoch": 0.09, + "learning_rate": 5.992509363295881e-05, + "loss": 1.287, "step": 800 }, { - "epoch": 1.45, - "learning_rate": 0.0007724493299471956, - "loss": 1.056, + "epoch": 0.09, + "learning_rate": 6.142322097378277e-05, + "loss": 1.293, "step": 820 }, { - "epoch": 1.49, - "learning_rate": 0.0007707491063004035, - "loss": 1.0491, + "epoch": 0.09, + "learning_rate": 6.292134831460675e-05, + "loss": 1.2556, "step": 840 }, { - "epoch": 1.52, - "learning_rate": 0.0007689999655852306, - "loss": 1.0497, + "epoch": 0.1, + "learning_rate": 6.441947565543071e-05, + "loss": 1.2979, "step": 860 }, { - "epoch": 1.56, - "learning_rate": 0.0007672021385853376, - "loss": 1.0393, + "epoch": 0.1, + "learning_rate": 6.591760299625468e-05, + "loss": 1.3002, "step": 880 }, { - "epoch": 1.59, - "learning_rate": 0.0007653558625081099, - "loss": 1.0379, + "epoch": 0.1, + "learning_rate": 6.741573033707866e-05, + "loss": 1.2408, "step": 900 }, { - "epoch": 1.63, - "learning_rate": 0.0007634613809533613, - "loss": 1.049, + "epoch": 0.1, + "learning_rate": 6.891385767790263e-05, + "loss": 1.2753, "step": 920 }, { - "epoch": 1.66, - "learning_rate": 0.0007615189438811918, - "loss": 1.0594, + "epoch": 0.11, + "learning_rate": 7.04119850187266e-05, + "loss": 1.2671, "step": 940 }, { - "epoch": 1.7, - "learning_rate": 0.0007595288075790085, - "loss": 1.0375, + "epoch": 0.11, + "learning_rate": 7.191011235955056e-05, + "loss": 1.2925, "step": 960 }, { - "epoch": 1.74, - "learning_rate": 0.0007574912346277103, - "loss": 1.0453, + "epoch": 0.11, + "learning_rate": 7.340823970037454e-05, + "loss": 1.2709, "step": 980 }, { - "epoch": 1.77, - "learning_rate": 0.0007554064938670426, - "loss": 1.0348, + "epoch": 0.11, + "learning_rate": 7.49063670411985e-05, + "loss": 1.2614, "step": 1000 }, { - "epoch": 1.81, - "learning_rate": 0.0007532748603601265, - "loss": 1.0441, + "epoch": 0.11, + "learning_rate": 7.640449438202247e-05, + "loss": 1.2979, "step": 1020 }, { - "epoch": 1.84, - "learning_rate": 0.0007510966153571667, - "loss": 1.0237, + "epoch": 0.12, + "learning_rate": 7.790262172284645e-05, + "loss": 1.2534, "step": 1040 }, { - "epoch": 1.88, - "learning_rate": 0.000748872046258343, - "loss": 1.0478, + "epoch": 0.12, + "learning_rate": 7.940074906367042e-05, + "loss": 1.2425, "step": 1060 }, { - "epoch": 1.91, - "learning_rate": 0.0007466014465758899, - "loss": 1.0289, + "epoch": 0.12, + "learning_rate": 8.089887640449438e-05, + "loss": 1.2671, "step": 1080 }, { - "epoch": 1.95, - "learning_rate": 0.0007442851158953712, - "loss": 1.026, + "epoch": 0.12, + "learning_rate": 8.239700374531836e-05, + "loss": 1.247, "step": 1100 }, { - "epoch": 1.98, - "learning_rate": 0.0007419233598361512, - "loss": 1.0244, + "epoch": 0.13, + "learning_rate": 8.389513108614233e-05, + "loss": 1.2785, "step": 1120 }, { - "epoch": 2.02, - "learning_rate": 0.0007395164900110721, - "loss": 0.9968, + "epoch": 0.13, + "learning_rate": 8.53932584269663e-05, + "loss": 1.2611, "step": 1140 }, { - "epoch": 2.05, - "learning_rate": 0.0007370648239853385, - "loss": 0.9798, + "epoch": 0.13, + "learning_rate": 8.689138576779026e-05, + "loss": 1.238, "step": 1160 }, { - "epoch": 2.09, - "learning_rate": 0.0007345686852346176, - "loss": 0.9529, + "epoch": 0.13, + "learning_rate": 8.838951310861424e-05, + "loss": 1.2352, "step": 1180 }, { - "epoch": 2.12, - "learning_rate": 0.0007320284031023603, - "loss": 0.9666, + "epoch": 0.13, + "learning_rate": 8.988764044943821e-05, + "loss": 1.2389, "step": 1200 }, { - "epoch": 2.16, - "learning_rate": 0.000729444312756346, - "loss": 0.9797, + "epoch": 0.14, + "learning_rate": 9.138576779026217e-05, + "loss": 1.2519, "step": 1220 }, { - "epoch": 2.2, - "learning_rate": 0.0007268167551444611, - "loss": 0.9674, + "epoch": 0.14, + "learning_rate": 9.288389513108615e-05, + "loss": 1.2466, "step": 1240 }, { - "epoch": 2.23, - "learning_rate": 0.0007241460769497138, - "loss": 0.9666, + "epoch": 0.14, + "learning_rate": 9.438202247191012e-05, + "loss": 1.2414, "step": 1260 }, { - "epoch": 2.27, - "learning_rate": 0.0007214326305444917, - "loss": 0.9785, + "epoch": 0.14, + "learning_rate": 9.588014981273409e-05, + "loss": 1.2565, "step": 1280 }, { - "epoch": 2.3, - "learning_rate": 0.0007186767739440701, - "loss": 0.9629, + "epoch": 0.15, + "learning_rate": 9.737827715355807e-05, + "loss": 1.2661, "step": 1300 }, { - "epoch": 2.34, - "learning_rate": 0.0007158788707593748, - "loss": 0.973, + "epoch": 0.15, + "learning_rate": 9.887640449438202e-05, + "loss": 1.2708, "step": 1320 }, { - "epoch": 2.37, - "learning_rate": 0.0007130392901490069, - "loss": 0.9649, + "epoch": 0.15, + "learning_rate": 9.999999668932716e-05, + "loss": 1.2292, "step": 1340 }, { - "epoch": 2.41, - "learning_rate": 0.0007101584067705355, - "loss": 0.9766, + "epoch": 0.15, + "learning_rate": 9.999991723320065e-05, + "loss": 1.2506, "step": 1360 }, { - "epoch": 2.44, - "learning_rate": 0.0007072366007310646, - "loss": 0.954, + "epoch": 0.16, + "learning_rate": 9.999973183573581e-05, + "loss": 1.2434, "step": 1380 }, { - "epoch": 2.48, - "learning_rate": 0.0007042742575370822, - "loss": 0.9576, + "epoch": 0.16, + "learning_rate": 9.999944049732545e-05, + "loss": 1.2425, "step": 1400 }, { - "epoch": 2.51, - "learning_rate": 0.0007012717680435956, - "loss": 0.9783, + "epoch": 0.16, + "learning_rate": 9.99990432185869e-05, + "loss": 1.2279, "step": 1420 }, { - "epoch": 2.55, - "learning_rate": 0.0006982295284025612, - "loss": 0.9553, + "epoch": 0.16, + "learning_rate": 9.999854000036192e-05, + "loss": 1.2039, "step": 1440 }, { - "epoch": 2.59, - "learning_rate": 0.0006951479400106161, - "loss": 0.951, + "epoch": 0.16, + "learning_rate": 9.999793084371672e-05, + "loss": 1.2298, "step": 1460 }, { - "epoch": 2.62, - "learning_rate": 0.000692027409456118, - "loss": 0.9647, + "epoch": 0.17, + "learning_rate": 9.999721574994201e-05, + "loss": 1.2467, "step": 1480 }, { - "epoch": 2.66, - "learning_rate": 0.0006888683484654981, - "loss": 0.9656, + "epoch": 0.17, + "learning_rate": 9.999639472055294e-05, + "loss": 1.2039, "step": 1500 }, { - "epoch": 2.69, - "learning_rate": 0.0006856711738489386, - "loss": 0.9507, + "epoch": 0.17, + "learning_rate": 9.999546775728917e-05, + "loss": 1.2668, "step": 1520 }, { - "epoch": 2.73, - "learning_rate": 0.0006824363074453778, - "loss": 0.9496, + "epoch": 0.17, + "learning_rate": 9.999443486211473e-05, + "loss": 1.2045, "step": 1540 }, { - "epoch": 2.76, - "learning_rate": 0.0006791641760668519, - "loss": 0.9571, + "epoch": 0.18, + "learning_rate": 9.99932960372182e-05, + "loss": 1.2156, "step": 1560 }, { - "epoch": 2.8, - "learning_rate": 0.0006758552114421815, - "loss": 0.9494, + "epoch": 0.18, + "learning_rate": 9.99920512850125e-05, + "loss": 1.2157, "step": 1580 }, { - "epoch": 2.83, - "learning_rate": 0.0006725098501600088, - "loss": 0.9556, + "epoch": 0.18, + "learning_rate": 9.999070060813509e-05, + "loss": 1.2027, "step": 1600 }, { - "epoch": 2.87, - "learning_rate": 0.0006691285336111928, - "loss": 0.9411, + "epoch": 0.18, + "learning_rate": 9.99892440094478e-05, + "loss": 1.222, "step": 1620 }, { - "epoch": 2.9, - "learning_rate": 0.0006657117079305725, - "loss": 0.9501, + "epoch": 0.18, + "learning_rate": 9.998768149203695e-05, + "loss": 1.2139, "step": 1640 }, { - "epoch": 2.94, - "learning_rate": 0.0006622598239381033, - "loss": 0.9598, + "epoch": 0.19, + "learning_rate": 9.998601305921322e-05, + "loss": 1.2042, "step": 1660 }, { - "epoch": 2.97, - "learning_rate": 0.0006587733370793743, - "loss": 0.9599, + "epoch": 0.19, + "learning_rate": 9.998423871451174e-05, + "loss": 1.2379, "step": 1680 }, { - "epoch": 3.01, - "learning_rate": 0.0006552527073655178, - "loss": 0.9306, + "epoch": 0.19, + "learning_rate": 9.998235846169204e-05, + "loss": 1.1764, "step": 1700 }, { - "epoch": 3.05, - "learning_rate": 0.0006516983993125138, - "loss": 0.9013, + "epoch": 0.19, + "learning_rate": 9.998037230473809e-05, + "loss": 1.2254, "step": 1720 }, { - "epoch": 3.08, - "learning_rate": 0.0006481108818799015, - "loss": 0.8798, + "epoch": 0.2, + "learning_rate": 9.997828024785817e-05, + "loss": 1.1891, "step": 1740 }, { - "epoch": 3.12, - "learning_rate": 0.0006444906284089044, - "loss": 0.903, + "epoch": 0.2, + "learning_rate": 9.997608229548504e-05, + "loss": 1.1889, "step": 1760 }, { - "epoch": 3.15, - "learning_rate": 0.000640838116559977, - "loss": 0.9017, + "epoch": 0.2, + "learning_rate": 9.997377845227576e-05, + "loss": 1.2035, "step": 1780 }, { - "epoch": 3.19, - "learning_rate": 0.0006371538282497815, - "loss": 0.9051, + "epoch": 0.2, + "learning_rate": 9.997136872311177e-05, + "loss": 1.2186, "step": 1800 }, { - "epoch": 3.22, - "learning_rate": 0.0006334382495876036, - "loss": 0.8966, + "epoch": 0.2, + "learning_rate": 9.996885311309891e-05, + "loss": 1.1837, "step": 1820 }, { - "epoch": 3.26, - "learning_rate": 0.0006296918708112143, - "loss": 0.8863, + "epoch": 0.21, + "learning_rate": 9.996623162756733e-05, + "loss": 1.183, "step": 1840 }, { - "epoch": 3.29, - "learning_rate": 0.0006259151862221875, - "loss": 0.8926, + "epoch": 0.21, + "learning_rate": 9.996350427207148e-05, + "loss": 1.1707, "step": 1860 }, { - "epoch": 3.33, - "learning_rate": 0.0006221086941206817, - "loss": 0.908, + "epoch": 0.21, + "learning_rate": 9.99606710523902e-05, + "loss": 1.2172, "step": 1880 }, { - "epoch": 3.36, - "learning_rate": 0.0006182728967396925, - "loss": 0.9007, + "epoch": 0.21, + "learning_rate": 9.995773197452657e-05, + "loss": 1.1877, "step": 1900 }, { - "epoch": 3.4, - "learning_rate": 0.0006144083001787886, - "loss": 0.8872, + "epoch": 0.22, + "learning_rate": 9.995468704470802e-05, + "loss": 1.2208, "step": 1920 }, { - "epoch": 3.44, - "learning_rate": 0.0006105154143373362, - "loss": 0.8984, + "epoch": 0.22, + "learning_rate": 9.995153626938623e-05, + "loss": 1.1727, "step": 1940 }, { - "epoch": 3.47, - "learning_rate": 0.0006065947528472215, - "loss": 0.9123, + "epoch": 0.22, + "learning_rate": 9.994827965523716e-05, + "loss": 1.2127, "step": 1960 }, { - "epoch": 3.51, - "learning_rate": 0.0006026468330050827, - "loss": 0.8929, + "epoch": 0.22, + "learning_rate": 9.994491720916102e-05, + "loss": 1.1912, "step": 1980 }, { - "epoch": 3.54, - "learning_rate": 0.0005986721757040564, - "loss": 0.9145, + "epoch": 0.22, + "learning_rate": 9.994144893828226e-05, + "loss": 1.1852, "step": 2000 }, { - "epoch": 3.58, - "learning_rate": 0.0005946713053650507, - "loss": 0.8867, + "epoch": 0.23, + "learning_rate": 9.993787484994957e-05, + "loss": 1.1689, "step": 2020 }, { - "epoch": 3.61, - "learning_rate": 0.0005906447498675521, - "loss": 0.8914, + "epoch": 0.23, + "learning_rate": 9.993419495173582e-05, + "loss": 1.19, "step": 2040 }, { - "epoch": 3.65, - "learning_rate": 0.0005865930404799774, - "loss": 0.8946, + "epoch": 0.23, + "learning_rate": 9.99304092514381e-05, + "loss": 1.1833, "step": 2060 }, { - "epoch": 3.68, - "learning_rate": 0.0005825167117895765, - "loss": 0.892, + "epoch": 0.23, + "learning_rate": 9.992651775707768e-05, + "loss": 1.1944, "step": 2080 }, { - "epoch": 3.72, - "learning_rate": 0.0005784163016318987, - "loss": 0.8875, + "epoch": 0.24, + "learning_rate": 9.992252047689997e-05, + "loss": 1.1592, "step": 2100 }, { - "epoch": 3.75, - "learning_rate": 0.0005742923510198303, - "loss": 0.888, + "epoch": 0.24, + "learning_rate": 9.991841741937448e-05, + "loss": 1.1744, "step": 2120 }, { - "epoch": 3.79, - "learning_rate": 0.0005701454040722124, - "loss": 0.9078, + "epoch": 0.24, + "learning_rate": 9.991420859319496e-05, + "loss": 1.1884, "step": 2140 }, { - "epoch": 3.82, - "learning_rate": 0.0005659760079420498, - "loss": 0.9027, + "epoch": 0.24, + "learning_rate": 9.990989400727916e-05, + "loss": 1.1372, "step": 2160 }, { - "epoch": 3.86, - "learning_rate": 0.000561784712744318, - "loss": 0.8997, + "epoch": 0.24, + "learning_rate": 9.990547367076896e-05, + "loss": 1.1767, "step": 2180 }, { - "epoch": 3.9, - "learning_rate": 0.0005575720714833808, - "loss": 0.9053, + "epoch": 0.25, + "learning_rate": 9.990094759303033e-05, + "loss": 1.1837, "step": 2200 }, { - "epoch": 3.93, - "learning_rate": 0.0005533386399800275, - "loss": 0.9054, + "epoch": 0.25, + "learning_rate": 9.989631578365322e-05, + "loss": 1.1564, "step": 2220 }, { - "epoch": 3.97, - "learning_rate": 0.0005490849767981348, - "loss": 0.8988, + "epoch": 0.25, + "learning_rate": 9.989157825245167e-05, + "loss": 1.1807, "step": 2240 }, { - "epoch": 4.0, - "learning_rate": 0.0005448116431709716, - "loss": 0.8903, + "epoch": 0.25, + "learning_rate": 9.98867350094637e-05, + "loss": 1.1898, "step": 2260 }, { - "epoch": 4.04, - "learning_rate": 0.0005405192029271477, - "loss": 0.8373, + "epoch": 0.26, + "learning_rate": 9.988178606495132e-05, + "loss": 1.2028, "step": 2280 }, { - "epoch": 4.07, - "learning_rate": 0.0005362082224162223, - "loss": 0.8336, + "epoch": 0.26, + "learning_rate": 9.98767314294005e-05, + "loss": 1.1629, "step": 2300 }, { - "epoch": 4.11, - "learning_rate": 0.0005318792704339792, - "loss": 0.8483, + "epoch": 0.26, + "learning_rate": 9.987157111352117e-05, + "loss": 1.1963, "step": 2320 }, { - "epoch": 4.14, - "learning_rate": 0.0005275329181473787, - "loss": 0.8453, + "epoch": 0.26, + "learning_rate": 9.986630512824715e-05, + "loss": 1.1642, "step": 2340 }, { - "epoch": 4.18, - "learning_rate": 0.0005231697390191976, - "loss": 0.8351, + "epoch": 0.27, + "learning_rate": 9.986093348473617e-05, + "loss": 1.1624, "step": 2360 }, { - "epoch": 4.21, - "learning_rate": 0.000518790308732366, - "loss": 0.8329, + "epoch": 0.27, + "learning_rate": 9.985545619436984e-05, + "loss": 1.1595, "step": 2380 }, { - "epoch": 4.25, - "learning_rate": 0.0005143952051140103, - "loss": 0.8394, + "epoch": 0.27, + "learning_rate": 9.984987326875359e-05, + "loss": 1.1985, "step": 2400 }, { - "epoch": 4.29, - "learning_rate": 0.000509985008059215, - "loss": 0.8526, + "epoch": 0.27, + "learning_rate": 9.984418471971671e-05, + "loss": 1.1912, "step": 2420 }, { - "epoch": 4.32, - "learning_rate": 0.0005055602994545098, - "loss": 0.826, + "epoch": 0.27, + "learning_rate": 9.983839055931226e-05, + "loss": 1.2146, "step": 2440 }, { - "epoch": 4.36, - "learning_rate": 0.0005011216631010953, - "loss": 0.849, + "epoch": 0.28, + "learning_rate": 9.983249079981709e-05, + "loss": 1.183, "step": 2460 }, { - "epoch": 4.39, - "learning_rate": 0.0004966696846378156, - "loss": 0.8507, + "epoch": 0.28, + "learning_rate": 9.982648545373177e-05, + "loss": 1.1561, "step": 2480 }, { - "epoch": 4.43, - "learning_rate": 0.000492204951463888, - "loss": 0.8461, + "epoch": 0.28, + "learning_rate": 9.982037453378063e-05, + "loss": 1.1517, "step": 2500 }, { - "epoch": 4.46, - "learning_rate": 0.00048772805266140154, - "loss": 0.8533, + "epoch": 0.28, + "learning_rate": 9.981415805291168e-05, + "loss": 1.1485, "step": 2520 }, { - "epoch": 4.5, - "learning_rate": 0.00048323957891759203, - "loss": 0.8384, + "epoch": 0.29, + "learning_rate": 9.980783602429656e-05, + "loss": 1.1721, "step": 2540 }, { - "epoch": 4.53, - "learning_rate": 0.00047874012244690696, - "loss": 0.842, + "epoch": 0.29, + "learning_rate": 9.98014084613306e-05, + "loss": 1.1825, "step": 2560 }, { - "epoch": 4.57, - "learning_rate": 0.000474230276912867, - "loss": 0.8608, + "epoch": 0.29, + "learning_rate": 9.979487537763269e-05, + "loss": 1.1338, "step": 2580 }, { - "epoch": 4.6, - "learning_rate": 0.00046971063734973833, - "loss": 0.8562, + "epoch": 0.29, + "learning_rate": 9.978823678704533e-05, + "loss": 1.193, "step": 2600 }, { - "epoch": 4.64, - "learning_rate": 0.0004651818000840229, - "loss": 0.8594, + "epoch": 0.29, + "learning_rate": 9.978149270363462e-05, + "loss": 1.1809, "step": 2620 }, { - "epoch": 4.67, - "learning_rate": 0.0004606443626557778, - "loss": 0.8608, + "epoch": 0.3, + "learning_rate": 9.977464314169005e-05, + "loss": 1.1425, "step": 2640 }, { - "epoch": 4.71, - "learning_rate": 0.0004560989237397758, - "loss": 0.8486, + "epoch": 0.3, + "learning_rate": 9.976768811572473e-05, + "loss": 1.1428, "step": 2660 }, { - "epoch": 4.75, - "learning_rate": 0.00045154608306651514, - "loss": 0.869, + "epoch": 0.3, + "learning_rate": 9.976062764047515e-05, + "loss": 1.1511, "step": 2680 }, { - "epoch": 4.78, - "learning_rate": 0.0004469864413430907, - "loss": 0.8482, + "epoch": 0.3, + "learning_rate": 9.975346173090128e-05, + "loss": 1.161, "step": 2700 }, { - "epoch": 4.82, - "learning_rate": 0.00044242060017393573, - "loss": 0.8583, + "epoch": 0.31, + "learning_rate": 9.974619040218644e-05, + "loss": 1.168, "step": 2720 }, { - "epoch": 4.85, - "learning_rate": 0.00043784916198144543, - "loss": 0.8582, + "epoch": 0.31, + "learning_rate": 9.973881366973738e-05, + "loss": 1.1474, "step": 2740 }, { - "epoch": 4.89, - "learning_rate": 0.00043327272992649317, - "loss": 0.8504, + "epoch": 0.31, + "learning_rate": 9.973133154918413e-05, + "loss": 1.1605, "step": 2760 }, { - "epoch": 4.92, - "learning_rate": 0.00042869190782884794, - "loss": 0.8592, + "epoch": 0.31, + "learning_rate": 9.972374405638e-05, + "loss": 1.1591, "step": 2780 }, { - "epoch": 4.96, - "learning_rate": 0.00042410730008750623, - "loss": 0.8545, + "epoch": 0.31, + "learning_rate": 9.971605120740166e-05, + "loss": 1.1494, "step": 2800 }, { - "epoch": 4.99, - "learning_rate": 0.00041951951160094664, - "loss": 0.855, + "epoch": 0.32, + "learning_rate": 9.970825301854889e-05, + "loss": 1.1596, "step": 2820 }, { - "epoch": 5.03, - "learning_rate": 0.00041492914768731927, - "loss": 0.7869, + "epoch": 0.32, + "learning_rate": 9.970034950634478e-05, + "loss": 1.1543, "step": 2840 }, { - "epoch": 5.06, - "learning_rate": 0.0004103368140045789, - "loss": 0.8083, + "epoch": 0.32, + "learning_rate": 9.96923406875355e-05, + "loss": 1.128, "step": 2860 }, { - "epoch": 5.1, - "learning_rate": 0.00040574311647057366, - "loss": 0.8108, + "epoch": 0.32, + "learning_rate": 9.968422657909037e-05, + "loss": 1.1567, "step": 2880 }, { - "epoch": 5.14, - "learning_rate": 0.00040114866118310045, - "loss": 0.7968, + "epoch": 0.33, + "learning_rate": 9.967600719820183e-05, + "loss": 1.1439, "step": 2900 }, { - "epoch": 5.17, - "learning_rate": 0.0003965540543399344, - "loss": 0.8016, + "epoch": 0.33, + "learning_rate": 9.966768256228536e-05, + "loss": 1.1156, "step": 2920 }, { - "epoch": 5.21, - "learning_rate": 0.00039195990215884756, - "loss": 0.7967, + "epoch": 0.33, + "learning_rate": 9.965925268897942e-05, + "loss": 1.1695, "step": 2940 }, { - "epoch": 5.24, - "learning_rate": 0.00038736681079762293, - "loss": 0.8096, + "epoch": 0.33, + "learning_rate": 9.96507175961455e-05, + "loss": 1.1286, "step": 2960 }, { - "epoch": 5.28, - "learning_rate": 0.0003827753862740779, - "loss": 0.8073, + "epoch": 0.33, + "learning_rate": 9.964207730186804e-05, + "loss": 1.1687, "step": 2980 }, { - "epoch": 5.31, - "learning_rate": 0.0003781862343861055, - "loss": 0.804, + "epoch": 0.34, + "learning_rate": 9.963333182445429e-05, + "loss": 1.1401, "step": 3000 }, { - "epoch": 5.35, - "learning_rate": 0.00037359996063174425, - "loss": 0.8119, + "epoch": 0.34, + "learning_rate": 9.962448118243451e-05, + "loss": 1.1419, "step": 3020 }, { - "epoch": 5.38, - "learning_rate": 0.0003690171701292887, - "loss": 0.7997, + "epoch": 0.34, + "learning_rate": 9.961552539456163e-05, + "loss": 1.1224, "step": 3040 }, { - "epoch": 5.42, - "learning_rate": 0.0003644384675374489, - "loss": 0.8202, + "epoch": 0.34, + "learning_rate": 9.96064644798115e-05, + "loss": 1.1506, "step": 3060 }, { - "epoch": 5.45, - "learning_rate": 0.0003598644569755713, - "loss": 0.815, + "epoch": 0.35, + "learning_rate": 9.959729845738264e-05, + "loss": 1.143, "step": 3080 }, { - "epoch": 5.49, - "learning_rate": 0.00035529574194393033, - "loss": 0.825, + "epoch": 0.35, + "learning_rate": 9.958802734669633e-05, + "loss": 1.1684, "step": 3100 }, { - "epoch": 5.52, - "learning_rate": 0.00035073292524410207, - "loss": 0.8171, + "epoch": 0.35, + "learning_rate": 9.957865116739641e-05, + "loss": 1.1226, "step": 3120 }, { - "epoch": 5.56, - "learning_rate": 0.00034617660889943, - "loss": 0.7921, + "epoch": 0.35, + "learning_rate": 9.956916993934947e-05, + "loss": 1.1404, "step": 3140 }, { - "epoch": 5.6, - "learning_rate": 0.00034162739407559285, - "loss": 0.8299, + "epoch": 0.36, + "learning_rate": 9.95595836826446e-05, + "loss": 1.1483, "step": 3160 }, { - "epoch": 5.63, - "learning_rate": 0.0003370858810012869, - "loss": 0.811, + "epoch": 0.36, + "learning_rate": 9.954989241759346e-05, + "loss": 1.137, "step": 3180 }, { - "epoch": 5.67, - "learning_rate": 0.00033255266888903006, - "loss": 0.8093, + "epoch": 0.36, + "learning_rate": 9.954009616473019e-05, + "loss": 1.1019, "step": 3200 }, { - "epoch": 5.7, - "learning_rate": 0.00032802835585610225, - "loss": 0.8106, + "epoch": 0.36, + "learning_rate": 9.95301949448114e-05, + "loss": 1.1063, "step": 3220 }, { - "epoch": 5.74, - "learning_rate": 0.00032351353884562783, - "loss": 0.8053, + "epoch": 0.36, + "learning_rate": 9.952018877881606e-05, + "loss": 1.1487, "step": 3240 }, { - "epoch": 5.77, - "learning_rate": 0.00031900881354781556, - "loss": 0.8161, + "epoch": 0.37, + "learning_rate": 9.951007768794558e-05, + "loss": 1.128, "step": 3260 }, { - "epoch": 5.81, - "learning_rate": 0.00031451477432136154, - "loss": 0.8186, + "epoch": 0.37, + "learning_rate": 9.949986169362362e-05, + "loss": 1.1343, "step": 3280 }, { - "epoch": 5.84, - "learning_rate": 0.0003100320141150293, - "loss": 0.8046, + "epoch": 0.37, + "learning_rate": 9.948954081749616e-05, + "loss": 1.1342, "step": 3300 }, { - "epoch": 5.88, - "learning_rate": 0.00030556112438941526, - "loss": 0.8236, + "epoch": 0.37, + "learning_rate": 9.947911508143135e-05, + "loss": 1.1387, "step": 3320 }, { - "epoch": 5.91, - "learning_rate": 0.00030110269503891084, - "loss": 0.8057, + "epoch": 0.38, + "learning_rate": 9.946858450751958e-05, + "loss": 1.1217, "step": 3340 }, { - "epoch": 5.95, - "learning_rate": 0.0002966573143138713, - "loss": 0.8109, + "epoch": 0.38, + "learning_rate": 9.945794911807334e-05, + "loss": 1.1276, "step": 3360 }, { - "epoch": 5.98, - "learning_rate": 0.00029222556874300036, - "loss": 0.8163, + "epoch": 0.38, + "learning_rate": 9.944720893562722e-05, + "loss": 1.1567, "step": 3380 }, { - "epoch": 6.02, - "learning_rate": 0.0002878080430559646, - "loss": 0.7901, + "epoch": 0.38, + "learning_rate": 9.943636398293785e-05, + "loss": 1.1307, "step": 3400 }, { - "epoch": 6.06, - "learning_rate": 0.0002834053201062417, - "loss": 0.7749, + "epoch": 0.38, + "learning_rate": 9.942541428298384e-05, + "loss": 1.1264, "step": 3420 }, { - "epoch": 6.09, - "learning_rate": 0.00027901798079421977, - "loss": 0.7775, + "epoch": 0.39, + "learning_rate": 9.941435985896573e-05, + "loss": 1.1148, "step": 3440 }, { - "epoch": 6.13, - "learning_rate": 0.0002746466039905513, - "loss": 0.7589, + "epoch": 0.39, + "learning_rate": 9.940320073430598e-05, + "loss": 1.1417, "step": 3460 }, { - "epoch": 6.16, - "learning_rate": 0.000270291766459777, - "loss": 0.7742, + "epoch": 0.39, + "learning_rate": 9.93919369326489e-05, + "loss": 1.1565, "step": 3480 }, { - "epoch": 6.2, - "learning_rate": 0.00026595404278422684, - "loss": 0.7914, + "epoch": 0.39, + "learning_rate": 9.938056847786053e-05, + "loss": 1.1517, "step": 3500 }, { - "epoch": 6.23, - "learning_rate": 0.00026163400528820836, - "loss": 0.7909, + "epoch": 0.4, + "learning_rate": 9.936909539402874e-05, + "loss": 1.1505, "step": 3520 }, { - "epoch": 6.27, - "learning_rate": 0.0002573322239624947, - "loss": 0.7653, + "epoch": 0.4, + "learning_rate": 9.935751770546302e-05, + "loss": 1.1276, "step": 3540 }, { - "epoch": 6.3, - "learning_rate": 0.000253049266389118, - "loss": 0.7779, + "epoch": 0.4, + "learning_rate": 9.934583543669453e-05, + "loss": 1.1169, "step": 3560 }, { - "epoch": 6.34, - "learning_rate": 0.0002487856976664831, - "loss": 0.7771, + "epoch": 0.4, + "learning_rate": 9.933404861247603e-05, + "loss": 1.1185, "step": 3580 }, { - "epoch": 6.37, - "learning_rate": 0.00024454208033480683, - "loss": 0.771, + "epoch": 0.4, + "learning_rate": 9.93221572577818e-05, + "loss": 1.1566, "step": 3600 }, { - "epoch": 6.41, - "learning_rate": 0.00024031897430189695, - "loss": 0.7788, + "epoch": 0.41, + "learning_rate": 9.931016139780758e-05, + "loss": 1.1288, "step": 3620 }, { - "epoch": 6.45, - "learning_rate": 0.00023611693676927606, - "loss": 0.7798, + "epoch": 0.41, + "learning_rate": 9.929806105797058e-05, + "loss": 1.137, "step": 3640 }, { - "epoch": 6.48, - "learning_rate": 0.00023193652215866429, - "loss": 0.7779, + "epoch": 0.41, + "learning_rate": 9.928585626390935e-05, + "loss": 1.1266, "step": 3660 }, { - "epoch": 6.52, - "learning_rate": 0.00022777828203882875, - "loss": 0.7784, + "epoch": 0.41, + "learning_rate": 9.927354704148382e-05, + "loss": 1.1436, "step": 3680 }, { - "epoch": 6.55, - "learning_rate": 0.00022364276505280794, - "loss": 0.7946, + "epoch": 0.42, + "learning_rate": 9.926113341677507e-05, + "loss": 1.0938, "step": 3700 }, { - "epoch": 6.59, - "learning_rate": 0.0002195305168455239, - "loss": 0.774, + "epoch": 0.42, + "learning_rate": 9.924861541608553e-05, + "loss": 1.1159, "step": 3720 }, { - "epoch": 6.62, - "learning_rate": 0.00021544207999178917, - "loss": 0.7823, + "epoch": 0.42, + "learning_rate": 9.92359930659387e-05, + "loss": 1.0964, "step": 3740 }, { - "epoch": 6.66, - "learning_rate": 0.00021137799392471814, - "loss": 0.7779, + "epoch": 0.42, + "learning_rate": 9.922326639307917e-05, + "loss": 1.1329, "step": 3760 }, { - "epoch": 6.69, - "learning_rate": 0.00020733879486455433, - "loss": 0.7867, + "epoch": 0.42, + "learning_rate": 9.921043542447264e-05, + "loss": 1.1393, "step": 3780 }, { - "epoch": 6.73, - "learning_rate": 0.0002033250157479206, - "loss": 0.7738, + "epoch": 0.43, + "learning_rate": 9.919750018730571e-05, + "loss": 1.1251, "step": 3800 }, { - "epoch": 6.76, - "learning_rate": 0.0001993371861575028, - "loss": 0.7814, + "epoch": 0.43, + "learning_rate": 9.918446070898601e-05, + "loss": 1.1018, "step": 3820 }, { - "epoch": 6.8, - "learning_rate": 0.00019537583225217605, - "loss": 0.7695, + "epoch": 0.43, + "learning_rate": 9.917131701714192e-05, + "loss": 1.1376, "step": 3840 }, { - "epoch": 6.83, - "learning_rate": 0.00019144147669758322, - "loss": 0.7846, + "epoch": 0.43, + "learning_rate": 9.915806913962274e-05, + "loss": 1.1901, "step": 3860 }, { - "epoch": 6.87, - "learning_rate": 0.00018753463859717283, - "loss": 0.7779, + "epoch": 0.44, + "learning_rate": 9.914471710449845e-05, + "loss": 1.1236, "step": 3880 }, { - "epoch": 6.91, - "learning_rate": 0.0001836558334237088, - "loss": 0.7769, + "epoch": 0.44, + "learning_rate": 9.913126094005976e-05, + "loss": 1.1188, "step": 3900 }, { - "epoch": 6.94, - "learning_rate": 0.0001798055729512579, - "loss": 0.782, + "epoch": 0.44, + "learning_rate": 9.911770067481798e-05, + "loss": 1.1358, "step": 3920 }, { - "epoch": 6.98, - "learning_rate": 0.00017598436518766596, - "loss": 0.7683, + "epoch": 0.44, + "learning_rate": 9.910403633750502e-05, + "loss": 1.14, "step": 3940 }, { - "epoch": 7.01, - "learning_rate": 0.0001721927143075305, - "loss": 0.7674, + "epoch": 0.44, + "learning_rate": 9.909026795707331e-05, + "loss": 1.1145, "step": 3960 }, { - "epoch": 7.05, - "learning_rate": 0.00016843112058567935, - "loss": 0.7427, + "epoch": 0.45, + "learning_rate": 9.907639556269566e-05, + "loss": 1.1162, "step": 3980 }, { - "epoch": 7.08, - "learning_rate": 0.00016470008033116443, - "loss": 0.7627, + "epoch": 0.45, + "learning_rate": 9.906241918376537e-05, + "loss": 1.1131, "step": 4000 }, { - "epoch": 7.12, - "learning_rate": 0.00016100008582177705, - "loss": 0.7541, + "epoch": 0.45, + "learning_rate": 9.904833884989602e-05, + "loss": 1.1174, "step": 4020 }, { - "epoch": 7.15, - "learning_rate": 0.00015733162523909707, - "loss": 0.7654, + "epoch": 0.45, + "learning_rate": 9.90341545909214e-05, + "loss": 1.1196, "step": 4040 }, { - "epoch": 7.19, - "learning_rate": 0.0001536951826040813, - "loss": 0.7382, + "epoch": 0.46, + "learning_rate": 9.901986643689559e-05, + "loss": 1.1053, "step": 4060 }, { - "epoch": 7.22, - "learning_rate": 0.0001500912377132013, - "loss": 0.7555, + "epoch": 0.46, + "learning_rate": 9.900547441809272e-05, + "loss": 1.1168, "step": 4080 }, { - "epoch": 7.26, - "learning_rate": 0.00014652026607513848, - "loss": 0.748, + "epoch": 0.46, + "learning_rate": 9.899097856500707e-05, + "loss": 1.101, "step": 4100 }, { - "epoch": 7.3, - "learning_rate": 0.00014298273884804478, - "loss": 0.753, + "epoch": 0.46, + "learning_rate": 9.897637890835289e-05, + "loss": 1.1039, "step": 4120 }, { - "epoch": 7.33, - "learning_rate": 0.00013947912277737808, - "loss": 0.7594, + "epoch": 0.47, + "learning_rate": 9.896167547906437e-05, + "loss": 1.1027, "step": 4140 }, { - "epoch": 7.37, - "learning_rate": 0.00013600988013431832, - "loss": 0.7387, + "epoch": 0.47, + "learning_rate": 9.894686830829558e-05, + "loss": 1.1028, "step": 4160 }, { - "epoch": 7.4, - "learning_rate": 0.00013257546865477572, - "loss": 0.7551, + "epoch": 0.47, + "learning_rate": 9.89319574274204e-05, + "loss": 1.0957, "step": 4180 }, { - "epoch": 7.44, - "learning_rate": 0.00012917634147899607, - "loss": 0.7609, + "epoch": 0.47, + "learning_rate": 9.891694286803246e-05, + "loss": 1.1397, "step": 4200 }, { - "epoch": 7.47, - "learning_rate": 0.00012581294709177327, - "loss": 0.762, + "epoch": 0.47, + "learning_rate": 9.890182466194505e-05, + "loss": 1.109, "step": 4220 }, { - "epoch": 7.51, - "learning_rate": 0.00012248572926327537, - "loss": 0.755, + "epoch": 0.48, + "learning_rate": 9.88866028411911e-05, + "loss": 1.1414, "step": 4240 }, { - "epoch": 7.54, - "learning_rate": 0.00011919512699049314, - "loss": 0.753, + "epoch": 0.48, + "learning_rate": 9.887127743802304e-05, + "loss": 1.136, "step": 4260 }, { - "epoch": 7.58, - "learning_rate": 0.00011594157443931872, - "loss": 0.7603, + "epoch": 0.48, + "learning_rate": 9.885584848491285e-05, + "loss": 1.1458, "step": 4280 }, { - "epoch": 7.61, - "learning_rate": 0.0001127255008872604, - "loss": 0.7565, + "epoch": 0.48, + "learning_rate": 9.884031601455179e-05, + "loss": 1.1379, "step": 4300 }, { - "epoch": 7.65, - "learning_rate": 0.00010954733066680401, - "loss": 0.7542, + "epoch": 0.49, + "learning_rate": 9.88246800598506e-05, + "loss": 1.1083, "step": 4320 }, { - "epoch": 7.68, - "learning_rate": 0.00010640748310942559, - "loss": 0.7674, + "epoch": 0.49, + "learning_rate": 9.880894065393915e-05, + "loss": 1.1063, "step": 4340 }, { - "epoch": 7.72, - "learning_rate": 0.00010330637249026445, - "loss": 0.7447, + "epoch": 0.49, + "learning_rate": 9.879309783016663e-05, + "loss": 1.101, "step": 4360 }, { - "epoch": 7.76, - "learning_rate": 0.00010024440797346324, - "loss": 0.7582, + "epoch": 0.49, + "learning_rate": 9.877715162210123e-05, + "loss": 1.1023, "step": 4380 }, { - "epoch": 7.79, - "learning_rate": 9.722199355818227e-05, - "loss": 0.7598, + "epoch": 0.49, + "learning_rate": 9.876110206353033e-05, + "loss": 1.1223, "step": 4400 }, { - "epoch": 7.83, - "learning_rate": 9.423952802529564e-05, - "loss": 0.7389, + "epoch": 0.5, + "learning_rate": 9.874494918846017e-05, + "loss": 1.1348, "step": 4420 }, { - "epoch": 7.86, - "learning_rate": 9.129740488477518e-05, - "loss": 0.759, + "epoch": 0.5, + "learning_rate": 9.872869303111595e-05, + "loss": 1.0998, "step": 4440 }, { - "epoch": 7.9, - "learning_rate": 8.83960123237706e-05, - "loss": 0.7442, + "epoch": 0.5, + "learning_rate": 9.871233362594175e-05, + "loss": 1.0983, "step": 4460 }, { - "epoch": 7.93, - "learning_rate": 8.553573315539188e-05, - "loss": 0.7627, + "epoch": 0.5, + "learning_rate": 9.869587100760034e-05, + "loss": 1.1114, "step": 4480 }, { - "epoch": 7.97, - "learning_rate": 8.271694476819956e-05, - "loss": 0.7531, + "epoch": 0.51, + "learning_rate": 9.86793052109732e-05, + "loss": 1.0765, "step": 4500 }, { - "epoch": 8.0, - "learning_rate": 7.994001907641262e-05, - "loss": 0.7567, + "epoch": 0.51, + "learning_rate": 9.866263627116049e-05, + "loss": 1.089, "step": 4520 }, { - "epoch": 8.04, - "learning_rate": 7.720532247083743e-05, - "loss": 0.7274, + "epoch": 0.51, + "learning_rate": 9.864586422348081e-05, + "loss": 1.101, "step": 4540 }, { - "epoch": 8.07, - "learning_rate": 7.451321577052533e-05, - "loss": 0.7347, + "epoch": 0.51, + "learning_rate": 9.862898910347132e-05, + "loss": 1.1247, "step": 4560 }, { - "epoch": 8.11, - "learning_rate": 7.18640541751661e-05, - "loss": 0.752, + "epoch": 0.51, + "learning_rate": 9.861201094688752e-05, + "loss": 1.113, "step": 4580 }, { - "epoch": 8.15, - "learning_rate": 6.925818721822239e-05, - "loss": 0.7293, + "epoch": 0.52, + "learning_rate": 9.859492978970325e-05, + "loss": 1.1354, "step": 4600 }, { - "epoch": 8.18, - "learning_rate": 6.669595872081211e-05, - "loss": 0.7213, + "epoch": 0.52, + "learning_rate": 9.857774566811058e-05, + "loss": 1.095, "step": 4620 }, { - "epoch": 8.22, - "learning_rate": 6.417770674634365e-05, - "loss": 0.7246, + "epoch": 0.52, + "learning_rate": 9.856045861851975e-05, + "loss": 1.1188, "step": 4640 }, { - "epoch": 8.25, - "learning_rate": 6.170376355591204e-05, - "loss": 0.7353, + "epoch": 0.52, + "learning_rate": 9.854306867755906e-05, + "loss": 1.0938, "step": 4660 }, { - "epoch": 8.29, - "learning_rate": 5.9274455564459896e-05, - "loss": 0.7396, + "epoch": 0.53, + "learning_rate": 9.852557588207487e-05, + "loss": 1.1113, "step": 4680 }, { - "epoch": 8.32, - "learning_rate": 5.689010329770965e-05, - "loss": 0.7401, + "epoch": 0.53, + "learning_rate": 9.850798026913145e-05, + "loss": 1.088, "step": 4700 }, { - "epoch": 8.36, - "learning_rate": 5.455102134987304e-05, - "loss": 0.7399, + "epoch": 0.53, + "learning_rate": 9.849028187601091e-05, + "loss": 1.1256, "step": 4720 }, { - "epoch": 8.39, - "learning_rate": 5.225751834214339e-05, - "loss": 0.7427, + "epoch": 0.53, + "learning_rate": 9.847248074021312e-05, + "loss": 1.0979, "step": 4740 }, { - "epoch": 8.43, - "learning_rate": 5.000989688197555e-05, - "loss": 0.7289, + "epoch": 0.53, + "learning_rate": 9.845457689945567e-05, + "loss": 1.1227, "step": 4760 }, { - "epoch": 8.46, - "learning_rate": 4.780845352315968e-05, - "loss": 0.7398, + "epoch": 0.54, + "learning_rate": 9.84365703916738e-05, + "loss": 1.1265, "step": 4780 }, { - "epoch": 8.5, - "learning_rate": 4.565347872669339e-05, - "loss": 0.7332, + "epoch": 0.54, + "learning_rate": 9.841846125502021e-05, + "loss": 1.0997, "step": 4800 }, { - "epoch": 8.53, - "learning_rate": 4.3545256822458445e-05, - "loss": 0.7365, + "epoch": 0.54, + "learning_rate": 9.840024952786508e-05, + "loss": 1.0951, "step": 4820 }, { - "epoch": 8.57, - "learning_rate": 4.148406597170529e-05, - "loss": 0.7347, + "epoch": 0.54, + "learning_rate": 9.838193524879599e-05, + "loss": 1.1091, "step": 4840 }, { - "epoch": 8.61, - "learning_rate": 3.947017813035254e-05, - "loss": 0.7515, + "epoch": 0.55, + "learning_rate": 9.836351845661777e-05, + "loss": 1.1148, "step": 4860 }, { - "epoch": 8.64, - "learning_rate": 3.7503859013104806e-05, - "loss": 0.7397, + "epoch": 0.55, + "learning_rate": 9.834499919035249e-05, + "loss": 1.1211, "step": 4880 }, { - "epoch": 8.68, - "learning_rate": 3.5585368058393834e-05, - "loss": 0.7597, + "epoch": 0.55, + "learning_rate": 9.832637748923934e-05, + "loss": 1.1033, "step": 4900 }, { - "epoch": 8.71, - "learning_rate": 3.3714958394147975e-05, - "loss": 0.7391, + "epoch": 0.55, + "learning_rate": 9.830765339273454e-05, + "loss": 1.0925, "step": 4920 }, { - "epoch": 8.75, - "learning_rate": 3.1892876804394144e-05, - "loss": 0.7359, + "epoch": 0.56, + "learning_rate": 9.828882694051124e-05, + "loss": 1.1061, "step": 4940 }, { - "epoch": 8.78, - "learning_rate": 3.0119363696697078e-05, - "loss": 0.7412, + "epoch": 0.56, + "learning_rate": 9.826989817245953e-05, + "loss": 1.1083, "step": 4960 }, { - "epoch": 8.82, - "learning_rate": 2.839465307043927e-05, - "loss": 0.7485, + "epoch": 0.56, + "learning_rate": 9.825086712868625e-05, + "loss": 1.0864, "step": 4980 }, { - "epoch": 8.85, - "learning_rate": 2.6718972485947037e-05, - "loss": 0.7534, + "epoch": 0.56, + "learning_rate": 9.823173384951496e-05, + "loss": 1.1202, "step": 5000 }, { - "epoch": 8.89, - "learning_rate": 2.5092543034466264e-05, - "loss": 0.7405, + "epoch": 0.56, + "learning_rate": 9.821249837548582e-05, + "loss": 1.101, "step": 5020 }, { - "epoch": 8.92, - "learning_rate": 2.3515579308990597e-05, - "loss": 0.7415, + "epoch": 0.57, + "learning_rate": 9.819316074735554e-05, + "loss": 1.1191, "step": 5040 }, { - "epoch": 8.96, - "learning_rate": 2.1988289375948524e-05, - "loss": 0.7309, + "epoch": 0.57, + "learning_rate": 9.817372100609726e-05, + "loss": 1.0535, "step": 5060 }, { - "epoch": 9.0, - "learning_rate": 2.0510874747750575e-05, - "loss": 0.7418, + "epoch": 0.57, + "learning_rate": 9.81541791929005e-05, + "loss": 1.1082, "step": 5080 }, { - "epoch": 9.03, - "learning_rate": 1.9083530356201407e-05, - "loss": 0.7469, + "epoch": 0.57, + "learning_rate": 9.813453534917105e-05, + "loss": 1.1366, "step": 5100 }, { - "epoch": 9.07, - "learning_rate": 1.7706444526780585e-05, - "loss": 0.7385, + "epoch": 0.58, + "learning_rate": 9.811478951653088e-05, + "loss": 1.072, "step": 5120 }, { - "epoch": 9.1, - "learning_rate": 1.637979895379429e-05, - "loss": 0.7333, + "epoch": 0.58, + "learning_rate": 9.809494173681804e-05, + "loss": 1.0954, "step": 5140 }, { - "epoch": 9.14, - "learning_rate": 1.5103768676402885e-05, - "loss": 0.7314, + "epoch": 0.58, + "learning_rate": 9.807499205208663e-05, + "loss": 1.1014, "step": 5160 }, { - "epoch": 9.17, - "learning_rate": 1.38785220555254e-05, - "loss": 0.74, + "epoch": 0.58, + "learning_rate": 9.805494050460666e-05, + "loss": 1.0966, "step": 5180 }, { - "epoch": 9.21, - "learning_rate": 1.270422075162645e-05, - "loss": 0.7481, + "epoch": 0.58, + "learning_rate": 9.803478713686391e-05, + "loss": 1.1035, "step": 5200 }, { - "epoch": 9.24, - "learning_rate": 1.1581019703386143e-05, - "loss": 0.7222, + "epoch": 0.59, + "learning_rate": 9.801453199155996e-05, + "loss": 1.1061, "step": 5220 }, { - "epoch": 9.28, - "learning_rate": 1.0509067107257365e-05, - "loss": 0.7259, + "epoch": 0.59, + "learning_rate": 9.799417511161206e-05, + "loss": 1.0943, "step": 5240 }, { - "epoch": 9.31, - "learning_rate": 9.488504397912712e-06, - "loss": 0.7309, + "epoch": 0.59, + "learning_rate": 9.797371654015296e-05, + "loss": 1.0943, "step": 5260 }, { - "epoch": 9.35, - "learning_rate": 8.51946622958324e-06, - "loss": 0.7212, + "epoch": 0.59, + "learning_rate": 9.795315632053088e-05, + "loss": 1.0939, "step": 5280 }, { - "epoch": 9.38, - "learning_rate": 7.602080458292227e-06, - "loss": 0.7254, + "epoch": 0.6, + "learning_rate": 9.793249449630946e-05, + "loss": 1.083, "step": 5300 }, { - "epoch": 9.42, - "learning_rate": 6.7364681249854735e-06, - "loss": 0.728, + "epoch": 0.6, + "learning_rate": 9.791173111126759e-05, + "loss": 1.0958, "step": 5320 }, { - "epoch": 9.46, - "learning_rate": 5.922743439561229e-06, - "loss": 0.7341, + "epoch": 0.6, + "learning_rate": 9.789086620939936e-05, + "loss": 1.0671, "step": 5340 }, { - "epoch": 9.49, - "learning_rate": 5.161013765801137e-06, - "loss": 0.721, + "epoch": 0.6, + "learning_rate": 9.786989983491397e-05, + "loss": 1.0766, "step": 5360 }, { - "epoch": 9.53, - "learning_rate": 4.451379607204453e-06, - "loss": 0.725, + "epoch": 0.6, + "learning_rate": 9.784883203223558e-05, + "loss": 1.1059, "step": 5380 }, { - "epoch": 9.56, - "learning_rate": 3.7939345937275884e-06, - "loss": 0.7399, + "epoch": 0.61, + "learning_rate": 9.782766284600332e-05, + "loss": 1.1136, "step": 5400 }, { - "epoch": 9.6, - "learning_rate": 3.1887654694303883e-06, - "loss": 0.7301, + "epoch": 0.61, + "learning_rate": 9.780639232107108e-05, + "loss": 1.1049, "step": 5420 }, { - "epoch": 9.63, - "learning_rate": 2.635952081031201e-06, - "loss": 0.7222, + "epoch": 0.61, + "learning_rate": 9.778502050250749e-05, + "loss": 1.0934, "step": 5440 }, { - "epoch": 9.67, - "learning_rate": 2.1355673673715716e-06, - "loss": 0.7308, + "epoch": 0.61, + "learning_rate": 9.776354743559583e-05, + "loss": 1.0905, "step": 5460 }, { - "epoch": 9.7, - "learning_rate": 1.6876773497926046e-06, - "loss": 0.7417, + "epoch": 0.62, + "learning_rate": 9.774197316583387e-05, + "loss": 1.0722, "step": 5480 }, { - "epoch": 9.74, - "learning_rate": 1.292341123424423e-06, - "loss": 0.7348, + "epoch": 0.62, + "learning_rate": 9.77202977389338e-05, + "loss": 1.0761, "step": 5500 }, { - "epoch": 9.77, - "learning_rate": 9.496108493884936e-07, - "loss": 0.7278, + "epoch": 0.62, + "learning_rate": 9.769852120082222e-05, + "loss": 1.0705, "step": 5520 }, { - "epoch": 9.81, - "learning_rate": 6.595317479159313e-07, - "loss": 0.7257, + "epoch": 0.62, + "learning_rate": 9.767664359763991e-05, + "loss": 1.0709, "step": 5540 }, { - "epoch": 9.85, - "learning_rate": 4.2214209238085054e-07, - "loss": 0.7296, + "epoch": 0.62, + "learning_rate": 9.765466497574175e-05, + "loss": 1.0927, "step": 5560 }, { - "epoch": 9.88, - "learning_rate": 2.3747320425053786e-07, - "loss": 0.7301, + "epoch": 0.63, + "learning_rate": 9.763258538169675e-05, + "loss": 1.0942, "step": 5580 }, { - "epoch": 9.92, - "learning_rate": 1.0554944895293517e-07, - "loss": 0.739, + "epoch": 0.63, + "learning_rate": 9.761040486228783e-05, + "loss": 1.1113, "step": 5600 }, { - "epoch": 9.95, - "learning_rate": 2.638823266174484e-08, - "loss": 0.7284, + "epoch": 0.63, + "learning_rate": 9.758812346451171e-05, + "loss": 1.0902, "step": 5620 }, { - "epoch": 9.99, - "learning_rate": 0.0, - "loss": 0.7294, + "epoch": 0.63, + "learning_rate": 9.756574123557893e-05, + "loss": 1.1216, "step": 5640 }, { - "epoch": 9.99, - "step": 5640, - "total_flos": 1.4686171215861645e+19, - "train_loss": 0.8784972819876163, - "train_runtime": 13393.5014, - "train_samples_per_second": 53.949, - "train_steps_per_second": 0.421 + "epoch": 0.64, + "learning_rate": 9.754325822291362e-05, + "loss": 1.1029, + "step": 5660 + }, + { + "epoch": 0.64, + "learning_rate": 9.752067447415342e-05, + "loss": 1.1237, + "step": 5680 + }, + { + "epoch": 0.64, + "learning_rate": 9.749799003714954e-05, + "loss": 1.0988, + "step": 5700 + }, + { + "epoch": 0.64, + "learning_rate": 9.747520495996641e-05, + "loss": 1.087, + "step": 5720 + }, + { + "epoch": 0.64, + "learning_rate": 9.745231929088174e-05, + "loss": 1.0668, + "step": 5740 + }, + { + "epoch": 0.65, + "learning_rate": 9.74293330783864e-05, + "loss": 1.0756, + "step": 5760 + }, + { + "epoch": 0.65, + "learning_rate": 9.740624637118425e-05, + "loss": 1.1091, + "step": 5780 + }, + { + "epoch": 0.65, + "learning_rate": 9.73830592181921e-05, + "loss": 1.0985, + "step": 5800 + }, + { + "epoch": 0.65, + "learning_rate": 9.735977166853962e-05, + "loss": 1.0667, + "step": 5820 + }, + { + "epoch": 0.66, + "learning_rate": 9.733638377156915e-05, + "loss": 1.0753, + "step": 5840 + }, + { + "epoch": 0.66, + "learning_rate": 9.731289557683567e-05, + "loss": 1.0869, + "step": 5860 + }, + { + "epoch": 0.66, + "learning_rate": 9.72893071341067e-05, + "loss": 1.0944, + "step": 5880 + }, + { + "epoch": 0.66, + "learning_rate": 9.726561849336216e-05, + "loss": 1.0817, + "step": 5900 + }, + { + "epoch": 0.67, + "learning_rate": 9.724182970479422e-05, + "loss": 1.1044, + "step": 5920 + }, + { + "epoch": 0.67, + "learning_rate": 9.72179408188073e-05, + "loss": 1.1296, + "step": 5940 + }, + { + "epoch": 0.67, + "learning_rate": 9.71939518860179e-05, + "loss": 1.0678, + "step": 5960 + }, + { + "epoch": 0.67, + "learning_rate": 9.71698629572545e-05, + "loss": 1.0837, + "step": 5980 + }, + { + "epoch": 0.67, + "learning_rate": 9.714567408355744e-05, + "loss": 1.0911, + "step": 6000 + }, + { + "epoch": 0.68, + "learning_rate": 9.712138531617883e-05, + "loss": 1.0979, + "step": 6020 + }, + { + "epoch": 0.68, + "learning_rate": 9.709699670658248e-05, + "loss": 1.0742, + "step": 6040 + }, + { + "epoch": 0.68, + "learning_rate": 9.707250830644367e-05, + "loss": 1.0689, + "step": 6060 + }, + { + "epoch": 0.68, + "learning_rate": 9.704792016764922e-05, + "loss": 1.1154, + "step": 6080 + }, + { + "epoch": 0.69, + "learning_rate": 9.702323234229717e-05, + "loss": 1.089, + "step": 6100 + }, + { + "epoch": 0.69, + "learning_rate": 9.699844488269687e-05, + "loss": 1.0787, + "step": 6120 + }, + { + "epoch": 0.69, + "learning_rate": 9.69735578413687e-05, + "loss": 1.0688, + "step": 6140 + }, + { + "epoch": 0.69, + "learning_rate": 9.69485712710441e-05, + "loss": 1.0665, + "step": 6160 + }, + { + "epoch": 0.69, + "learning_rate": 9.692348522466537e-05, + "loss": 1.0686, + "step": 6180 + }, + { + "epoch": 0.7, + "learning_rate": 9.689829975538559e-05, + "loss": 1.0872, + "step": 6200 + }, + { + "epoch": 0.7, + "learning_rate": 9.687301491656849e-05, + "loss": 1.0818, + "step": 6220 + }, + { + "epoch": 0.7, + "learning_rate": 9.684763076178836e-05, + "loss": 1.0844, + "step": 6240 + }, + { + "epoch": 0.7, + "learning_rate": 9.682214734482989e-05, + "loss": 1.0977, + "step": 6260 + }, + { + "epoch": 0.71, + "learning_rate": 9.679656471968814e-05, + "loss": 1.072, + "step": 6280 + }, + { + "epoch": 0.71, + "learning_rate": 9.677088294056833e-05, + "loss": 1.0869, + "step": 6300 + }, + { + "epoch": 0.71, + "learning_rate": 9.674510206188584e-05, + "loss": 1.0839, + "step": 6320 + }, + { + "epoch": 0.71, + "learning_rate": 9.671922213826589e-05, + "loss": 1.077, + "step": 6340 + }, + { + "epoch": 0.71, + "learning_rate": 9.669324322454373e-05, + "loss": 1.0974, + "step": 6360 + }, + { + "epoch": 0.72, + "learning_rate": 9.666716537576422e-05, + "loss": 1.1057, + "step": 6380 + }, + { + "epoch": 0.72, + "learning_rate": 9.664098864718191e-05, + "loss": 1.0543, + "step": 6400 + }, + { + "epoch": 0.72, + "learning_rate": 9.661471309426085e-05, + "loss": 1.0699, + "step": 6420 + }, + { + "epoch": 0.72, + "learning_rate": 9.658833877267448e-05, + "loss": 1.091, + "step": 6440 + }, + { + "epoch": 0.73, + "learning_rate": 9.65618657383055e-05, + "loss": 1.0425, + "step": 6460 + }, + { + "epoch": 0.73, + "learning_rate": 9.653529404724578e-05, + "loss": 1.0519, + "step": 6480 + }, + { + "epoch": 0.73, + "learning_rate": 9.650862375579622e-05, + "loss": 1.1099, + "step": 6500 + }, + { + "epoch": 0.73, + "learning_rate": 9.648185492046663e-05, + "loss": 1.1131, + "step": 6520 + }, + { + "epoch": 0.73, + "learning_rate": 9.645498759797566e-05, + "loss": 1.1014, + "step": 6540 + }, + { + "epoch": 0.74, + "learning_rate": 9.642802184525058e-05, + "loss": 1.0645, + "step": 6560 + }, + { + "epoch": 0.74, + "learning_rate": 9.640095771942725e-05, + "loss": 1.0989, + "step": 6580 + }, + { + "epoch": 0.74, + "learning_rate": 9.637379527784997e-05, + "loss": 1.1347, + "step": 6600 + }, + { + "epoch": 0.74, + "learning_rate": 9.634653457807135e-05, + "loss": 1.1031, + "step": 6620 + }, + { + "epoch": 0.75, + "learning_rate": 9.631917567785213e-05, + "loss": 1.1107, + "step": 6640 + }, + { + "epoch": 0.75, + "learning_rate": 9.629171863516126e-05, + "loss": 1.0845, + "step": 6660 + }, + { + "epoch": 0.75, + "learning_rate": 9.626416350817549e-05, + "loss": 1.0804, + "step": 6680 + }, + { + "epoch": 0.75, + "learning_rate": 9.623651035527947e-05, + "loss": 1.081, + "step": 6700 + }, + { + "epoch": 0.76, + "learning_rate": 9.620875923506556e-05, + "loss": 1.1133, + "step": 6720 + }, + { + "epoch": 0.76, + "learning_rate": 9.618091020633365e-05, + "loss": 1.0605, + "step": 6740 + }, + { + "epoch": 0.76, + "learning_rate": 9.615296332809112e-05, + "loss": 1.034, + "step": 6760 + }, + { + "epoch": 0.76, + "learning_rate": 9.612491865955265e-05, + "loss": 1.0704, + "step": 6780 + }, + { + "epoch": 0.76, + "learning_rate": 9.609677626014015e-05, + "loss": 1.0791, + "step": 6800 + }, + { + "epoch": 0.77, + "learning_rate": 9.606853618948256e-05, + "loss": 1.0931, + "step": 6820 + }, + { + "epoch": 0.77, + "learning_rate": 9.604019850741582e-05, + "loss": 1.0579, + "step": 6840 + }, + { + "epoch": 0.77, + "learning_rate": 9.601176327398267e-05, + "loss": 1.0252, + "step": 6860 + }, + { + "epoch": 0.77, + "learning_rate": 9.598323054943252e-05, + "loss": 1.0754, + "step": 6880 + }, + { + "epoch": 0.78, + "learning_rate": 9.59546003942214e-05, + "loss": 1.0665, + "step": 6900 + }, + { + "epoch": 0.78, + "learning_rate": 9.592587286901172e-05, + "loss": 1.1004, + "step": 6920 + }, + { + "epoch": 0.78, + "learning_rate": 9.589704803467225e-05, + "loss": 1.1064, + "step": 6940 + }, + { + "epoch": 0.78, + "learning_rate": 9.586812595227792e-05, + "loss": 1.0677, + "step": 6960 + }, + { + "epoch": 0.78, + "learning_rate": 9.583910668310971e-05, + "loss": 1.0727, + "step": 6980 + }, + { + "epoch": 0.79, + "learning_rate": 9.580999028865452e-05, + "loss": 1.0799, + "step": 7000 + }, + { + "epoch": 0.79, + "learning_rate": 9.578077683060507e-05, + "loss": 1.0665, + "step": 7020 + }, + { + "epoch": 0.79, + "learning_rate": 9.57514663708597e-05, + "loss": 1.0729, + "step": 7040 + }, + { + "epoch": 0.79, + "learning_rate": 9.572205897152229e-05, + "loss": 1.0472, + "step": 7060 + }, + { + "epoch": 0.8, + "learning_rate": 9.569255469490214e-05, + "loss": 1.0633, + "step": 7080 + }, + { + "epoch": 0.8, + "learning_rate": 9.566295360351383e-05, + "loss": 1.0578, + "step": 7100 + }, + { + "epoch": 0.8, + "learning_rate": 9.563325576007701e-05, + "loss": 1.0679, + "step": 7120 + }, + { + "epoch": 0.8, + "learning_rate": 9.560346122751638e-05, + "loss": 1.0878, + "step": 7140 + }, + { + "epoch": 0.8, + "learning_rate": 9.557357006896152e-05, + "loss": 1.0521, + "step": 7160 + }, + { + "epoch": 0.81, + "learning_rate": 9.554358234774669e-05, + "loss": 1.0543, + "step": 7180 + }, + { + "epoch": 0.81, + "learning_rate": 9.55134981274108e-05, + "loss": 1.0781, + "step": 7200 + }, + { + "epoch": 0.81, + "learning_rate": 9.548331747169719e-05, + "loss": 1.0668, + "step": 7220 + }, + { + "epoch": 0.81, + "learning_rate": 9.545304044455357e-05, + "loss": 1.0908, + "step": 7240 + }, + { + "epoch": 0.82, + "learning_rate": 9.542266711013182e-05, + "loss": 1.0535, + "step": 7260 + }, + { + "epoch": 0.82, + "learning_rate": 9.539219753278785e-05, + "loss": 1.0587, + "step": 7280 + }, + { + "epoch": 0.82, + "learning_rate": 9.536163177708155e-05, + "loss": 1.0406, + "step": 7300 + }, + { + "epoch": 0.82, + "learning_rate": 9.533096990777657e-05, + "loss": 1.0645, + "step": 7320 + }, + { + "epoch": 0.82, + "learning_rate": 9.530021198984019e-05, + "loss": 1.0425, + "step": 7340 + }, + { + "epoch": 0.83, + "learning_rate": 9.526935808844324e-05, + "loss": 1.0564, + "step": 7360 + }, + { + "epoch": 0.83, + "learning_rate": 9.523840826895988e-05, + "loss": 1.0811, + "step": 7380 + }, + { + "epoch": 0.83, + "learning_rate": 9.520736259696753e-05, + "loss": 1.0727, + "step": 7400 + }, + { + "epoch": 0.83, + "learning_rate": 9.51762211382467e-05, + "loss": 1.0687, + "step": 7420 + }, + { + "epoch": 0.84, + "learning_rate": 9.514498395878086e-05, + "loss": 1.0902, + "step": 7440 + }, + { + "epoch": 0.84, + "learning_rate": 9.51136511247563e-05, + "loss": 1.0613, + "step": 7460 + }, + { + "epoch": 0.84, + "learning_rate": 9.508222270256195e-05, + "loss": 1.0809, + "step": 7480 + }, + { + "epoch": 0.84, + "learning_rate": 9.505069875878934e-05, + "loss": 1.0682, + "step": 7500 + }, + { + "epoch": 0.84, + "learning_rate": 9.501907936023231e-05, + "loss": 1.07, + "step": 7520 + }, + { + "epoch": 0.85, + "learning_rate": 9.498736457388703e-05, + "loss": 1.0797, + "step": 7540 + }, + { + "epoch": 0.85, + "learning_rate": 9.495555446695175e-05, + "loss": 1.0468, + "step": 7560 + }, + { + "epoch": 0.85, + "learning_rate": 9.492364910682668e-05, + "loss": 1.0903, + "step": 7580 + }, + { + "epoch": 0.85, + "learning_rate": 9.489164856111387e-05, + "loss": 1.0929, + "step": 7600 + }, + { + "epoch": 0.86, + "learning_rate": 9.485955289761703e-05, + "loss": 1.0669, + "step": 7620 + }, + { + "epoch": 0.86, + "learning_rate": 9.482736218434143e-05, + "loss": 1.0938, + "step": 7640 + }, + { + "epoch": 0.86, + "learning_rate": 9.479507648949372e-05, + "loss": 1.0582, + "step": 7660 + }, + { + "epoch": 0.86, + "learning_rate": 9.476269588148186e-05, + "loss": 1.0793, + "step": 7680 + }, + { + "epoch": 0.87, + "learning_rate": 9.473022042891477e-05, + "loss": 1.0696, + "step": 7700 + }, + { + "epoch": 0.87, + "learning_rate": 9.469765020060251e-05, + "loss": 1.0599, + "step": 7720 + }, + { + "epoch": 0.87, + "learning_rate": 9.46649852655558e-05, + "loss": 1.0667, + "step": 7740 + }, + { + "epoch": 0.87, + "learning_rate": 9.463222569298615e-05, + "loss": 1.0126, + "step": 7760 + }, + { + "epoch": 0.87, + "learning_rate": 9.459937155230549e-05, + "loss": 1.0694, + "step": 7780 + }, + { + "epoch": 0.88, + "learning_rate": 9.456642291312618e-05, + "loss": 1.0566, + "step": 7800 + }, + { + "epoch": 0.88, + "learning_rate": 9.45333798452608e-05, + "loss": 1.0664, + "step": 7820 + }, + { + "epoch": 0.88, + "learning_rate": 9.450024241872201e-05, + "loss": 1.0408, + "step": 7840 + }, + { + "epoch": 0.88, + "learning_rate": 9.446701070372237e-05, + "loss": 1.0734, + "step": 7860 + }, + { + "epoch": 0.89, + "learning_rate": 9.443368477067427e-05, + "loss": 1.0604, + "step": 7880 + }, + { + "epoch": 0.89, + "learning_rate": 9.440026469018968e-05, + "loss": 1.0746, + "step": 7900 + }, + { + "epoch": 0.89, + "learning_rate": 9.43667505330801e-05, + "loss": 1.0358, + "step": 7920 + }, + { + "epoch": 0.89, + "learning_rate": 9.433314237035631e-05, + "loss": 1.0631, + "step": 7940 + }, + { + "epoch": 0.89, + "learning_rate": 9.429944027322834e-05, + "loss": 1.0296, + "step": 7960 + }, + { + "epoch": 0.9, + "learning_rate": 9.426564431310521e-05, + "loss": 1.0412, + "step": 7980 + }, + { + "epoch": 0.9, + "learning_rate": 9.42317545615948e-05, + "loss": 1.0554, + "step": 8000 + }, + { + "epoch": 0.9, + "learning_rate": 9.419777109050376e-05, + "loss": 1.0978, + "step": 8020 + }, + { + "epoch": 0.9, + "learning_rate": 9.416369397183728e-05, + "loss": 1.0215, + "step": 8040 + }, + { + "epoch": 0.91, + "learning_rate": 9.4129523277799e-05, + "loss": 1.0424, + "step": 8060 + }, + { + "epoch": 0.91, + "learning_rate": 9.409525908079082e-05, + "loss": 1.0453, + "step": 8080 + }, + { + "epoch": 0.91, + "learning_rate": 9.406090145341277e-05, + "loss": 1.0588, + "step": 8100 + }, + { + "epoch": 0.91, + "learning_rate": 9.402645046846281e-05, + "loss": 1.0439, + "step": 8120 + }, + { + "epoch": 0.91, + "learning_rate": 9.399190619893676e-05, + "loss": 1.0501, + "step": 8140 + }, + { + "epoch": 0.92, + "learning_rate": 9.395726871802804e-05, + "loss": 1.0565, + "step": 8160 + }, + { + "epoch": 0.92, + "learning_rate": 9.392253809912758e-05, + "loss": 1.0456, + "step": 8180 + }, + { + "epoch": 0.92, + "learning_rate": 9.388771441582369e-05, + "loss": 1.0547, + "step": 8200 + }, + { + "epoch": 0.92, + "learning_rate": 9.385279774190184e-05, + "loss": 1.0344, + "step": 8220 + }, + { + "epoch": 0.93, + "learning_rate": 9.381778815134455e-05, + "loss": 1.0286, + "step": 8240 + }, + { + "epoch": 0.93, + "learning_rate": 9.378268571833116e-05, + "loss": 1.1033, + "step": 8260 + }, + { + "epoch": 0.93, + "learning_rate": 9.374749051723781e-05, + "loss": 1.0667, + "step": 8280 + }, + { + "epoch": 0.93, + "learning_rate": 9.371220262263713e-05, + "loss": 1.0416, + "step": 8300 + }, + { + "epoch": 0.93, + "learning_rate": 9.36768221092982e-05, + "loss": 1.0674, + "step": 8320 + }, + { + "epoch": 0.94, + "learning_rate": 9.364134905218632e-05, + "loss": 1.0305, + "step": 8340 + }, + { + "epoch": 0.94, + "learning_rate": 9.360578352646285e-05, + "loss": 1.0604, + "step": 8360 + }, + { + "epoch": 0.94, + "learning_rate": 9.357012560748513e-05, + "loss": 1.061, + "step": 8380 + }, + { + "epoch": 0.94, + "learning_rate": 9.353437537080625e-05, + "loss": 1.0678, + "step": 8400 + }, + { + "epoch": 0.95, + "learning_rate": 9.349853289217485e-05, + "loss": 1.0767, + "step": 8420 + }, + { + "epoch": 0.95, + "learning_rate": 9.34625982475351e-05, + "loss": 1.0562, + "step": 8440 + }, + { + "epoch": 0.95, + "learning_rate": 9.342657151302637e-05, + "loss": 1.0301, + "step": 8460 + }, + { + "epoch": 0.95, + "learning_rate": 9.339045276498325e-05, + "loss": 1.063, + "step": 8480 + }, + { + "epoch": 0.96, + "learning_rate": 9.33542420799352e-05, + "loss": 1.0157, + "step": 8500 + }, + { + "epoch": 0.96, + "learning_rate": 9.331793953460653e-05, + "loss": 1.0564, + "step": 8520 + }, + { + "epoch": 0.96, + "learning_rate": 9.328154520591614e-05, + "loss": 1.0817, + "step": 8540 + }, + { + "epoch": 0.96, + "learning_rate": 9.324505917097749e-05, + "loss": 1.0453, + "step": 8560 + }, + { + "epoch": 0.96, + "learning_rate": 9.320848150709826e-05, + "loss": 1.0442, + "step": 8580 + }, + { + "epoch": 0.97, + "learning_rate": 9.317181229178031e-05, + "loss": 1.0379, + "step": 8600 + }, + { + "epoch": 0.97, + "learning_rate": 9.313505160271952e-05, + "loss": 1.0686, + "step": 8620 + }, + { + "epoch": 0.97, + "learning_rate": 9.30981995178055e-05, + "loss": 1.0417, + "step": 8640 + }, + { + "epoch": 0.97, + "learning_rate": 9.306125611512159e-05, + "loss": 1.0569, + "step": 8660 + }, + { + "epoch": 0.98, + "learning_rate": 9.302422147294458e-05, + "loss": 1.0646, + "step": 8680 + }, + { + "epoch": 0.98, + "learning_rate": 9.298709566974462e-05, + "loss": 1.0439, + "step": 8700 + }, + { + "epoch": 0.98, + "learning_rate": 9.294987878418495e-05, + "loss": 1.0529, + "step": 8720 + }, + { + "epoch": 0.98, + "learning_rate": 9.291257089512185e-05, + "loss": 1.0369, + "step": 8740 + }, + { + "epoch": 0.98, + "learning_rate": 9.287517208160439e-05, + "loss": 1.0509, + "step": 8760 + }, + { + "epoch": 0.99, + "learning_rate": 9.283768242287433e-05, + "loss": 1.0825, + "step": 8780 + }, + { + "epoch": 0.99, + "learning_rate": 9.280010199836588e-05, + "loss": 1.0583, + "step": 8800 + }, + { + "epoch": 0.99, + "learning_rate": 9.276243088770559e-05, + "loss": 1.0528, + "step": 8820 + }, + { + "epoch": 0.99, + "learning_rate": 9.272466917071216e-05, + "loss": 1.0307, + "step": 8840 + }, + { + "epoch": 1.0, + "learning_rate": 9.268681692739623e-05, + "loss": 1.0538, + "step": 8860 + }, + { + "epoch": 1.0, + "learning_rate": 9.264887423796029e-05, + "loss": 1.0459, + "step": 8880 + }, + { + "epoch": 1.0, + "learning_rate": 9.261084118279847e-05, + "loss": 1.052, + "step": 8900 + }, + { + "epoch": 1.0, + "learning_rate": 9.257271784249635e-05, + "loss": 0.9985, + "step": 8920 + }, + { + "epoch": 1.0, + "learning_rate": 9.253450429783081e-05, + "loss": 1.0312, + "step": 8940 + }, + { + "epoch": 1.01, + "learning_rate": 9.249620062976988e-05, + "loss": 1.0433, + "step": 8960 + }, + { + "epoch": 1.01, + "learning_rate": 9.245780691947252e-05, + "loss": 1.0061, + "step": 8980 + }, + { + "epoch": 1.01, + "learning_rate": 9.24193232482885e-05, + "loss": 1.0237, + "step": 9000 + }, + { + "epoch": 1.01, + "learning_rate": 9.238074969775818e-05, + "loss": 1.0311, + "step": 9020 + }, + { + "epoch": 1.02, + "learning_rate": 9.234208634961236e-05, + "loss": 1.0467, + "step": 9040 + }, + { + "epoch": 1.02, + "learning_rate": 9.230333328577212e-05, + "loss": 1.0217, + "step": 9060 + }, + { + "epoch": 1.02, + "learning_rate": 9.226449058834863e-05, + "loss": 1.024, + "step": 9080 + }, + { + "epoch": 1.02, + "learning_rate": 9.222555833964296e-05, + "loss": 1.0373, + "step": 9100 + }, + { + "epoch": 1.02, + "learning_rate": 9.218653662214593e-05, + "loss": 1.0248, + "step": 9120 + }, + { + "epoch": 1.03, + "learning_rate": 9.214742551853798e-05, + "loss": 1.0597, + "step": 9140 + }, + { + "epoch": 1.03, + "learning_rate": 9.210822511168884e-05, + "loss": 1.0138, + "step": 9160 + }, + { + "epoch": 1.03, + "learning_rate": 9.206893548465758e-05, + "loss": 1.0406, + "step": 9180 + }, + { + "epoch": 1.03, + "learning_rate": 9.20295567206922e-05, + "loss": 1.0399, + "step": 9200 + }, + { + "epoch": 1.04, + "learning_rate": 9.199008890322963e-05, + "loss": 1.0282, + "step": 9220 + }, + { + "epoch": 1.04, + "learning_rate": 9.19505321158955e-05, + "loss": 1.0156, + "step": 9240 + }, + { + "epoch": 1.04, + "learning_rate": 9.191088644250389e-05, + "loss": 1.0146, + "step": 9260 + }, + { + "epoch": 1.04, + "learning_rate": 9.187115196705731e-05, + "loss": 0.9898, + "step": 9280 + }, + { + "epoch": 1.04, + "learning_rate": 9.183132877374631e-05, + "loss": 1.0027, + "step": 9300 + }, + { + "epoch": 1.05, + "learning_rate": 9.17914169469495e-05, + "loss": 1.0047, + "step": 9320 + }, + { + "epoch": 1.05, + "learning_rate": 9.17514165712333e-05, + "loss": 1.0628, + "step": 9340 + }, + { + "epoch": 1.05, + "learning_rate": 9.171132773135165e-05, + "loss": 1.041, + "step": 9360 + }, + { + "epoch": 1.05, + "learning_rate": 9.167115051224606e-05, + "loss": 1.0387, + "step": 9380 + }, + { + "epoch": 1.06, + "learning_rate": 9.16308849990452e-05, + "loss": 1.0521, + "step": 9400 + }, + { + "epoch": 1.06, + "learning_rate": 9.159053127706487e-05, + "loss": 0.9995, + "step": 9420 + }, + { + "epoch": 1.06, + "learning_rate": 9.155008943180776e-05, + "loss": 1.0068, + "step": 9440 + }, + { + "epoch": 1.06, + "learning_rate": 9.150955954896327e-05, + "loss": 1.0396, + "step": 9460 + }, + { + "epoch": 1.07, + "learning_rate": 9.146894171440735e-05, + "loss": 0.9964, + "step": 9480 + }, + { + "epoch": 1.07, + "learning_rate": 9.14282360142023e-05, + "loss": 0.9995, + "step": 9500 + }, + { + "epoch": 1.07, + "learning_rate": 9.138744253459658e-05, + "loss": 1.0396, + "step": 9520 + }, + { + "epoch": 1.07, + "learning_rate": 9.134656136202466e-05, + "loss": 1.0167, + "step": 9540 + }, + { + "epoch": 1.07, + "learning_rate": 9.130559258310679e-05, + "loss": 1.0319, + "step": 9560 + }, + { + "epoch": 1.08, + "learning_rate": 9.126453628464888e-05, + "loss": 1.0222, + "step": 9580 + }, + { + "epoch": 1.08, + "learning_rate": 9.122339255364224e-05, + "loss": 0.9881, + "step": 9600 + }, + { + "epoch": 1.08, + "learning_rate": 9.118216147726347e-05, + "loss": 1.0193, + "step": 9620 + }, + { + "epoch": 1.08, + "learning_rate": 9.11408431428742e-05, + "loss": 1.0327, + "step": 9640 + }, + { + "epoch": 1.09, + "learning_rate": 9.109943763802097e-05, + "loss": 1.0149, + "step": 9660 + }, + { + "epoch": 1.09, + "learning_rate": 9.105794505043505e-05, + "loss": 0.9916, + "step": 9680 + }, + { + "epoch": 1.09, + "learning_rate": 9.101636546803218e-05, + "loss": 1.0443, + "step": 9700 + }, + { + "epoch": 1.09, + "learning_rate": 9.09746989789124e-05, + "loss": 1.0485, + "step": 9720 + }, + { + "epoch": 1.09, + "learning_rate": 9.093294567135998e-05, + "loss": 1.0536, + "step": 9740 + }, + { + "epoch": 1.1, + "learning_rate": 9.089110563384304e-05, + "loss": 1.0167, + "step": 9760 + }, + { + "epoch": 1.1, + "learning_rate": 9.084917895501357e-05, + "loss": 1.0192, + "step": 9780 + }, + { + "epoch": 1.1, + "learning_rate": 9.080716572370704e-05, + "loss": 1.0163, + "step": 9800 + }, + { + "epoch": 1.1, + "learning_rate": 9.07650660289424e-05, + "loss": 1.0041, + "step": 9820 + }, + { + "epoch": 1.11, + "learning_rate": 9.072287995992172e-05, + "loss": 1.0364, + "step": 9840 + }, + { + "epoch": 1.11, + "learning_rate": 9.068060760603014e-05, + "loss": 1.0567, + "step": 9860 + }, + { + "epoch": 1.11, + "learning_rate": 9.063824905683562e-05, + "loss": 1.0234, + "step": 9880 + }, + { + "epoch": 1.11, + "learning_rate": 9.059580440208869e-05, + "loss": 1.0011, + "step": 9900 + }, + { + "epoch": 1.11, + "learning_rate": 9.05532737317224e-05, + "loss": 1.0375, + "step": 9920 + }, + { + "epoch": 1.12, + "learning_rate": 9.051065713585203e-05, + "loss": 1.0142, + "step": 9940 + }, + { + "epoch": 1.12, + "learning_rate": 9.04679547047749e-05, + "loss": 1.0143, + "step": 9960 + }, + { + "epoch": 1.12, + "learning_rate": 9.04251665289702e-05, + "loss": 1.0232, + "step": 9980 + }, + { + "epoch": 1.12, + "learning_rate": 9.038229269909883e-05, + "loss": 1.0121, + "step": 10000 + }, + { + "epoch": 1.13, + "learning_rate": 9.033933330600316e-05, + "loss": 1.0262, + "step": 10020 + }, + { + "epoch": 1.13, + "learning_rate": 9.029628844070686e-05, + "loss": 1.0398, + "step": 10040 + }, + { + "epoch": 1.13, + "learning_rate": 9.025315819441467e-05, + "loss": 1.0101, + "step": 10060 + }, + { + "epoch": 1.13, + "learning_rate": 9.020994265851226e-05, + "loss": 1.0367, + "step": 10080 + }, + { + "epoch": 1.13, + "learning_rate": 9.016664192456604e-05, + "loss": 0.9912, + "step": 10100 + }, + { + "epoch": 1.14, + "learning_rate": 9.012325608432291e-05, + "loss": 1.0516, + "step": 10120 + }, + { + "epoch": 1.14, + "learning_rate": 9.00797852297101e-05, + "loss": 1.0297, + "step": 10140 + }, + { + "epoch": 1.14, + "learning_rate": 9.003622945283496e-05, + "loss": 1.0401, + "step": 10160 + }, + { + "epoch": 1.14, + "learning_rate": 8.99925888459848e-05, + "loss": 1.0365, + "step": 10180 + }, + { + "epoch": 1.15, + "learning_rate": 8.994886350162666e-05, + "loss": 1.0227, + "step": 10200 + }, + { + "epoch": 1.15, + "learning_rate": 8.990505351240714e-05, + "loss": 1.0176, + "step": 10220 + }, + { + "epoch": 1.15, + "learning_rate": 8.986115897115213e-05, + "loss": 1.0419, + "step": 10240 + }, + { + "epoch": 1.15, + "learning_rate": 8.981717997086674e-05, + "loss": 1.0206, + "step": 10260 + }, + { + "epoch": 1.16, + "learning_rate": 8.977311660473499e-05, + "loss": 0.9932, + "step": 10280 + }, + { + "epoch": 1.16, + "learning_rate": 8.972896896611971e-05, + "loss": 1.0083, + "step": 10300 + }, + { + "epoch": 1.16, + "learning_rate": 8.968473714856222e-05, + "loss": 1.0006, + "step": 10320 + }, + { + "epoch": 1.16, + "learning_rate": 8.964042124578224e-05, + "loss": 1.0136, + "step": 10340 + }, + { + "epoch": 1.16, + "learning_rate": 8.959602135167766e-05, + "loss": 1.0254, + "step": 10360 + }, + { + "epoch": 1.17, + "learning_rate": 8.955153756032428e-05, + "loss": 0.9969, + "step": 10380 + }, + { + "epoch": 1.17, + "learning_rate": 8.950696996597576e-05, + "loss": 0.9852, + "step": 10400 + }, + { + "epoch": 1.17, + "learning_rate": 8.946231866306325e-05, + "loss": 0.9993, + "step": 10420 + }, + { + "epoch": 1.17, + "learning_rate": 8.941758374619525e-05, + "loss": 1.019, + "step": 10440 + }, + { + "epoch": 1.18, + "learning_rate": 8.93727653101575e-05, + "loss": 1.0028, + "step": 10460 + }, + { + "epoch": 1.18, + "learning_rate": 8.932786344991264e-05, + "loss": 1.028, + "step": 10480 + }, + { + "epoch": 1.18, + "learning_rate": 8.928287826060009e-05, + "loss": 1.0152, + "step": 10500 + }, + { + "epoch": 1.18, + "learning_rate": 8.923780983753583e-05, + "loss": 1.0209, + "step": 10520 + }, + { + "epoch": 1.18, + "learning_rate": 8.919265827621218e-05, + "loss": 1.0409, + "step": 10540 + }, + { + "epoch": 1.19, + "learning_rate": 8.914742367229768e-05, + "loss": 1.0155, + "step": 10560 + }, + { + "epoch": 1.19, + "learning_rate": 8.910210612163673e-05, + "loss": 0.994, + "step": 10580 + }, + { + "epoch": 1.19, + "learning_rate": 8.905670572024958e-05, + "loss": 1.0145, + "step": 10600 + }, + { + "epoch": 1.19, + "learning_rate": 8.901122256433195e-05, + "loss": 1.0211, + "step": 10620 + }, + { + "epoch": 1.2, + "learning_rate": 8.89656567502549e-05, + "loss": 0.9977, + "step": 10640 + }, + { + "epoch": 1.2, + "learning_rate": 8.89200083745647e-05, + "loss": 0.9889, + "step": 10660 + }, + { + "epoch": 1.2, + "learning_rate": 8.887427753398248e-05, + "loss": 1.0104, + "step": 10680 + }, + { + "epoch": 1.2, + "learning_rate": 8.882846432540413e-05, + "loss": 1.0535, + "step": 10700 + }, + { + "epoch": 1.2, + "learning_rate": 8.87825688459001e-05, + "loss": 1.0043, + "step": 10720 + }, + { + "epoch": 1.21, + "learning_rate": 8.873659119271507e-05, + "loss": 1.0144, + "step": 10740 + }, + { + "epoch": 1.21, + "learning_rate": 8.869053146326793e-05, + "loss": 1.0118, + "step": 10760 + }, + { + "epoch": 1.21, + "learning_rate": 8.864438975515141e-05, + "loss": 1.0366, + "step": 10780 + }, + { + "epoch": 1.21, + "learning_rate": 8.859816616613194e-05, + "loss": 1.0125, + "step": 10800 + }, + { + "epoch": 1.22, + "learning_rate": 8.855186079414949e-05, + "loss": 0.9938, + "step": 10820 + }, + { + "epoch": 1.22, + "learning_rate": 8.850547373731727e-05, + "loss": 1.0046, + "step": 10840 + }, + { + "epoch": 1.22, + "learning_rate": 8.845900509392158e-05, + "loss": 1.0199, + "step": 10860 + }, + { + "epoch": 1.22, + "learning_rate": 8.841245496242157e-05, + "loss": 1.0019, + "step": 10880 + }, + { + "epoch": 1.22, + "learning_rate": 8.836582344144911e-05, + "loss": 0.9985, + "step": 10900 + }, + { + "epoch": 1.23, + "learning_rate": 8.831911062980845e-05, + "loss": 1.0138, + "step": 10920 + }, + { + "epoch": 1.23, + "learning_rate": 8.827231662647611e-05, + "loss": 1.0053, + "step": 10940 + }, + { + "epoch": 1.23, + "learning_rate": 8.822544153060064e-05, + "loss": 1.012, + "step": 10960 + }, + { + "epoch": 1.23, + "learning_rate": 8.817848544150243e-05, + "loss": 1.0178, + "step": 10980 + }, + { + "epoch": 1.24, + "learning_rate": 8.813144845867345e-05, + "loss": 1.0143, + "step": 11000 + }, + { + "epoch": 1.24, + "learning_rate": 8.808433068177708e-05, + "loss": 1.0087, + "step": 11020 + }, + { + "epoch": 1.24, + "learning_rate": 8.80371322106479e-05, + "loss": 1.0242, + "step": 11040 + }, + { + "epoch": 1.24, + "learning_rate": 8.798985314529146e-05, + "loss": 1.0364, + "step": 11060 + }, + { + "epoch": 1.24, + "learning_rate": 8.794249358588407e-05, + "loss": 0.9967, + "step": 11080 + }, + { + "epoch": 1.25, + "learning_rate": 8.789505363277259e-05, + "loss": 1.0144, + "step": 11100 + }, + { + "epoch": 1.25, + "learning_rate": 8.784753338647424e-05, + "loss": 1.016, + "step": 11120 + }, + { + "epoch": 1.25, + "learning_rate": 8.779993294767635e-05, + "loss": 1.0061, + "step": 11140 + }, + { + "epoch": 1.25, + "learning_rate": 8.77522524172362e-05, + "loss": 1.0166, + "step": 11160 + }, + { + "epoch": 1.26, + "learning_rate": 8.770449189618069e-05, + "loss": 1.0228, + "step": 11180 + }, + { + "epoch": 1.26, + "learning_rate": 8.76566514857063e-05, + "loss": 1.0037, + "step": 11200 + }, + { + "epoch": 1.26, + "learning_rate": 8.76087312871787e-05, + "loss": 1.033, + "step": 11220 + }, + { + "epoch": 1.26, + "learning_rate": 8.75607314021327e-05, + "loss": 1.0347, + "step": 11240 + }, + { + "epoch": 1.27, + "learning_rate": 8.751265193227189e-05, + "loss": 0.9958, + "step": 11260 + }, + { + "epoch": 1.27, + "learning_rate": 8.746449297946853e-05, + "loss": 1.0154, + "step": 11280 + }, + { + "epoch": 1.27, + "learning_rate": 8.741625464576322e-05, + "loss": 1.0308, + "step": 11300 + }, + { + "epoch": 1.27, + "learning_rate": 8.736793703336482e-05, + "loss": 1.0114, + "step": 11320 + }, + { + "epoch": 1.27, + "learning_rate": 8.731954024465017e-05, + "loss": 1.0231, + "step": 11340 + }, + { + "epoch": 1.28, + "learning_rate": 8.727106438216384e-05, + "loss": 1.01, + "step": 11360 + }, + { + "epoch": 1.28, + "learning_rate": 8.722250954861795e-05, + "loss": 0.983, + "step": 11380 + }, + { + "epoch": 1.28, + "learning_rate": 8.717387584689195e-05, + "loss": 0.9938, + "step": 11400 + }, + { + "epoch": 1.28, + "learning_rate": 8.712516338003241e-05, + "loss": 0.9918, + "step": 11420 + }, + { + "epoch": 1.29, + "learning_rate": 8.707637225125276e-05, + "loss": 1.0061, + "step": 11440 + }, + { + "epoch": 1.29, + "learning_rate": 8.702750256393316e-05, + "loss": 1.0493, + "step": 11460 + }, + { + "epoch": 1.29, + "learning_rate": 8.697855442162012e-05, + "loss": 0.9973, + "step": 11480 + }, + { + "epoch": 1.29, + "learning_rate": 8.692952792802651e-05, + "loss": 1.0343, + "step": 11500 + }, + { + "epoch": 1.29, + "learning_rate": 8.688042318703111e-05, + "loss": 1.0333, + "step": 11520 + }, + { + "epoch": 1.3, + "learning_rate": 8.683124030267855e-05, + "loss": 1.0276, + "step": 11540 + }, + { + "epoch": 1.3, + "learning_rate": 8.678197937917901e-05, + "loss": 1.0016, + "step": 11560 + }, + { + "epoch": 1.3, + "learning_rate": 8.673264052090801e-05, + "loss": 0.9907, + "step": 11580 + }, + { + "epoch": 1.3, + "learning_rate": 8.668322383240626e-05, + "loss": 1.0153, + "step": 11600 + }, + { + "epoch": 1.31, + "learning_rate": 8.663372941837929e-05, + "loss": 1.0084, + "step": 11620 + }, + { + "epoch": 1.31, + "learning_rate": 8.658415738369737e-05, + "loss": 1.0255, + "step": 11640 + }, + { + "epoch": 1.31, + "learning_rate": 8.653450783339523e-05, + "loss": 0.9996, + "step": 11660 + }, + { + "epoch": 1.31, + "learning_rate": 8.648478087267187e-05, + "loss": 0.9922, + "step": 11680 + }, + { + "epoch": 1.31, + "learning_rate": 8.643497660689024e-05, + "loss": 1.021, + "step": 11700 + }, + { + "epoch": 1.32, + "learning_rate": 8.638509514157715e-05, + "loss": 1.0102, + "step": 11720 + }, + { + "epoch": 1.32, + "learning_rate": 8.633513658242295e-05, + "loss": 1.0281, + "step": 11740 + }, + { + "epoch": 1.32, + "learning_rate": 8.628510103528134e-05, + "loss": 1.0185, + "step": 11760 + }, + { + "epoch": 1.32, + "learning_rate": 8.623498860616918e-05, + "loss": 0.9624, + "step": 11780 + }, + { + "epoch": 1.33, + "learning_rate": 8.618479940126617e-05, + "loss": 1.0486, + "step": 11800 + }, + { + "epoch": 1.33, + "learning_rate": 8.613453352691473e-05, + "loss": 1.0069, + "step": 11820 + }, + { + "epoch": 1.33, + "learning_rate": 8.608419108961971e-05, + "loss": 1.0449, + "step": 11840 + }, + { + "epoch": 1.33, + "learning_rate": 8.603377219604823e-05, + "loss": 0.9983, + "step": 11860 + }, + { + "epoch": 1.33, + "learning_rate": 8.59832769530293e-05, + "loss": 1.0164, + "step": 11880 + }, + { + "epoch": 1.34, + "learning_rate": 8.59327054675538e-05, + "loss": 1.0222, + "step": 11900 + }, + { + "epoch": 1.34, + "learning_rate": 8.588205784677415e-05, + "loss": 1.0372, + "step": 11920 + }, + { + "epoch": 1.34, + "learning_rate": 8.583133419800404e-05, + "loss": 1.0078, + "step": 11940 + }, + { + "epoch": 1.34, + "learning_rate": 8.578053462871827e-05, + "loss": 1.0216, + "step": 11960 + }, + { + "epoch": 1.35, + "learning_rate": 8.57296592465525e-05, + "loss": 1.0257, + "step": 11980 + }, + { + "epoch": 1.35, + "learning_rate": 8.567870815930305e-05, + "loss": 0.9813, + "step": 12000 + }, + { + "epoch": 1.35, + "learning_rate": 8.562768147492662e-05, + "loss": 0.9851, + "step": 12020 + }, + { + "epoch": 1.35, + "learning_rate": 8.557657930154007e-05, + "loss": 1.0091, + "step": 12040 + }, + { + "epoch": 1.36, + "learning_rate": 8.552540174742025e-05, + "loss": 0.9911, + "step": 12060 + }, + { + "epoch": 1.36, + "learning_rate": 8.547414892100373e-05, + "loss": 1.0127, + "step": 12080 + }, + { + "epoch": 1.36, + "learning_rate": 8.542282093088651e-05, + "loss": 0.9931, + "step": 12100 + }, + { + "epoch": 1.36, + "learning_rate": 8.537141788582393e-05, + "loss": 0.9987, + "step": 12120 + }, + { + "epoch": 1.36, + "learning_rate": 8.53199398947303e-05, + "loss": 1.0192, + "step": 12140 + }, + { + "epoch": 1.37, + "learning_rate": 8.526838706667873e-05, + "loss": 0.984, + "step": 12160 + }, + { + "epoch": 1.37, + "learning_rate": 8.521675951090094e-05, + "loss": 0.9841, + "step": 12180 + }, + { + "epoch": 1.37, + "learning_rate": 8.516505733678695e-05, + "loss": 1.0117, + "step": 12200 + }, + { + "epoch": 1.37, + "learning_rate": 8.511328065388488e-05, + "loss": 0.9758, + "step": 12220 + }, + { + "epoch": 1.38, + "learning_rate": 8.506142957190073e-05, + "loss": 1.0073, + "step": 12240 + }, + { + "epoch": 1.38, + "learning_rate": 8.500950420069817e-05, + "loss": 1.0178, + "step": 12260 + }, + { + "epoch": 1.38, + "learning_rate": 8.495750465029821e-05, + "loss": 1.0222, + "step": 12280 + }, + { + "epoch": 1.38, + "learning_rate": 8.490543103087912e-05, + "loss": 0.9812, + "step": 12300 + }, + { + "epoch": 1.38, + "learning_rate": 8.485328345277603e-05, + "loss": 1.0102, + "step": 12320 + }, + { + "epoch": 1.39, + "learning_rate": 8.48010620264808e-05, + "loss": 1.009, + "step": 12340 + }, + { + "epoch": 1.39, + "learning_rate": 8.47487668626418e-05, + "loss": 1.0279, + "step": 12360 + }, + { + "epoch": 1.39, + "learning_rate": 8.469639807206357e-05, + "loss": 1.019, + "step": 12380 + }, + { + "epoch": 1.39, + "learning_rate": 8.46439557657067e-05, + "loss": 1.0064, + "step": 12400 + }, + { + "epoch": 1.4, + "learning_rate": 8.459144005468756e-05, + "loss": 1.0037, + "step": 12420 + }, + { + "epoch": 1.4, + "learning_rate": 8.453885105027802e-05, + "loss": 0.9955, + "step": 12440 + }, + { + "epoch": 1.4, + "learning_rate": 8.448618886390522e-05, + "loss": 0.9949, + "step": 12460 + }, + { + "epoch": 1.4, + "learning_rate": 8.443345360715143e-05, + "loss": 0.9902, + "step": 12480 + }, + { + "epoch": 1.4, + "learning_rate": 8.43806453917537e-05, + "loss": 1.026, + "step": 12500 + }, + { + "epoch": 1.41, + "learning_rate": 8.432776432960366e-05, + "loss": 0.983, + "step": 12520 + }, + { + "epoch": 1.41, + "learning_rate": 8.427481053274734e-05, + "loss": 1.0039, + "step": 12540 + }, + { + "epoch": 1.41, + "learning_rate": 8.422178411338481e-05, + "loss": 0.99, + "step": 12560 + }, + { + "epoch": 1.41, + "learning_rate": 8.416868518387009e-05, + "loss": 1.0346, + "step": 12580 + }, + { + "epoch": 1.42, + "learning_rate": 8.411551385671077e-05, + "loss": 1.0002, + "step": 12600 + }, + { + "epoch": 1.42, + "learning_rate": 8.406227024456788e-05, + "loss": 0.9916, + "step": 12620 + }, + { + "epoch": 1.42, + "learning_rate": 8.400895446025558e-05, + "loss": 1.0303, + "step": 12640 + }, + { + "epoch": 1.42, + "learning_rate": 8.3955566616741e-05, + "loss": 0.982, + "step": 12660 + }, + { + "epoch": 1.42, + "learning_rate": 8.39021068271439e-05, + "loss": 0.996, + "step": 12680 + }, + { + "epoch": 1.43, + "learning_rate": 8.38485752047365e-05, + "loss": 1.0142, + "step": 12700 + }, + { + "epoch": 1.43, + "learning_rate": 8.379497186294322e-05, + "loss": 1.032, + "step": 12720 + }, + { + "epoch": 1.43, + "learning_rate": 8.374129691534046e-05, + "loss": 1.0146, + "step": 12740 + }, + { + "epoch": 1.43, + "learning_rate": 8.36875504756563e-05, + "loss": 1.0217, + "step": 12760 + }, + { + "epoch": 1.44, + "learning_rate": 8.363373265777034e-05, + "loss": 0.9988, + "step": 12780 + }, + { + "epoch": 1.44, + "learning_rate": 8.357984357571337e-05, + "loss": 1.0119, + "step": 12800 + }, + { + "epoch": 1.44, + "learning_rate": 8.352588334366728e-05, + "loss": 0.9801, + "step": 12820 + }, + { + "epoch": 1.44, + "learning_rate": 8.347185207596457e-05, + "loss": 0.9835, + "step": 12840 + }, + { + "epoch": 1.44, + "learning_rate": 8.341774988708837e-05, + "loss": 0.9979, + "step": 12860 + }, + { + "epoch": 1.45, + "learning_rate": 8.336357689167203e-05, + "loss": 1.0108, + "step": 12880 + }, + { + "epoch": 1.45, + "learning_rate": 8.33093332044989e-05, + "loss": 0.9943, + "step": 12900 + }, + { + "epoch": 1.45, + "learning_rate": 8.325501894050218e-05, + "loss": 0.9962, + "step": 12920 + }, + { + "epoch": 1.45, + "learning_rate": 8.320063421476454e-05, + "loss": 1.0326, + "step": 12940 + }, + { + "epoch": 1.46, + "learning_rate": 8.314617914251805e-05, + "loss": 0.9947, + "step": 12960 + }, + { + "epoch": 1.46, + "learning_rate": 8.30916538391437e-05, + "loss": 1.0253, + "step": 12980 + }, + { + "epoch": 1.46, + "learning_rate": 8.30370584201714e-05, + "loss": 0.9905, + "step": 13000 + }, + { + "epoch": 1.46, + "learning_rate": 8.298239300127954e-05, + "loss": 1.0028, + "step": 13020 + }, + { + "epoch": 1.47, + "learning_rate": 8.292765769829487e-05, + "loss": 1.0467, + "step": 13040 + }, + { + "epoch": 1.47, + "learning_rate": 8.287285262719224e-05, + "loss": 1.0042, + "step": 13060 + }, + { + "epoch": 1.47, + "learning_rate": 8.281797790409425e-05, + "loss": 1.0102, + "step": 13080 + }, + { + "epoch": 1.47, + "learning_rate": 8.276303364527116e-05, + "loss": 1.0038, + "step": 13100 + }, + { + "epoch": 1.47, + "learning_rate": 8.270801996714051e-05, + "loss": 1.0028, + "step": 13120 + }, + { + "epoch": 1.48, + "learning_rate": 8.265293698626694e-05, + "loss": 0.9969, + "step": 13140 + }, + { + "epoch": 1.48, + "learning_rate": 8.259778481936197e-05, + "loss": 0.9955, + "step": 13160 + }, + { + "epoch": 1.48, + "learning_rate": 8.254256358328365e-05, + "loss": 1.0106, + "step": 13180 + }, + { + "epoch": 1.48, + "learning_rate": 8.248727339503641e-05, + "loss": 1.0129, + "step": 13200 + }, + { + "epoch": 1.49, + "learning_rate": 8.243191437177077e-05, + "loss": 0.981, + "step": 13220 + }, + { + "epoch": 1.49, + "learning_rate": 8.237648663078314e-05, + "loss": 1.0168, + "step": 13240 + }, + { + "epoch": 1.49, + "learning_rate": 8.232099028951548e-05, + "loss": 0.9942, + "step": 13260 + }, + { + "epoch": 1.49, + "learning_rate": 8.22654254655551e-05, + "loss": 1.0028, + "step": 13280 + }, + { + "epoch": 1.49, + "learning_rate": 8.22097922766344e-05, + "loss": 1.0011, + "step": 13300 + }, + { + "epoch": 1.5, + "learning_rate": 8.215409084063075e-05, + "loss": 1.0141, + "step": 13320 + }, + { + "epoch": 1.5, + "learning_rate": 8.209832127556598e-05, + "loss": 0.9799, + "step": 13340 + }, + { + "epoch": 1.5, + "learning_rate": 8.204248369960634e-05, + "loss": 0.9972, + "step": 13360 + }, + { + "epoch": 1.5, + "learning_rate": 8.198657823106219e-05, + "loss": 0.9737, + "step": 13380 + }, + { + "epoch": 1.51, + "learning_rate": 8.193060498838774e-05, + "loss": 1.0, + "step": 13400 + }, + { + "epoch": 1.51, + "learning_rate": 8.187456409018074e-05, + "loss": 1.0246, + "step": 13420 + }, + { + "epoch": 1.51, + "learning_rate": 8.18184556551824e-05, + "loss": 1.0043, + "step": 13440 + }, + { + "epoch": 1.51, + "learning_rate": 8.176227980227694e-05, + "loss": 0.9859, + "step": 13460 + }, + { + "epoch": 1.51, + "learning_rate": 8.170603665049146e-05, + "loss": 0.9851, + "step": 13480 + }, + { + "epoch": 1.52, + "learning_rate": 8.164972631899566e-05, + "loss": 1.0025, + "step": 13500 + }, + { + "epoch": 1.52, + "learning_rate": 8.159334892710156e-05, + "loss": 0.9988, + "step": 13520 + }, + { + "epoch": 1.52, + "learning_rate": 8.15369045942633e-05, + "loss": 1.0531, + "step": 13540 + }, + { + "epoch": 1.52, + "learning_rate": 8.148039344007685e-05, + "loss": 0.983, + "step": 13560 + }, + { + "epoch": 1.53, + "learning_rate": 8.142381558427974e-05, + "loss": 1.0149, + "step": 13580 + }, + { + "epoch": 1.53, + "learning_rate": 8.136717114675083e-05, + "loss": 1.0205, + "step": 13600 + }, + { + "epoch": 1.53, + "learning_rate": 8.131046024751009e-05, + "loss": 1.015, + "step": 13620 + }, + { + "epoch": 1.53, + "learning_rate": 8.12536830067183e-05, + "loss": 1.0131, + "step": 13640 + }, + { + "epoch": 1.53, + "learning_rate": 8.119683954467677e-05, + "loss": 1.026, + "step": 13660 + }, + { + "epoch": 1.54, + "learning_rate": 8.113992998182715e-05, + "loss": 0.9792, + "step": 13680 + }, + { + "epoch": 1.54, + "learning_rate": 8.108295443875116e-05, + "loss": 1.0232, + "step": 13700 + }, + { + "epoch": 1.54, + "learning_rate": 8.102591303617031e-05, + "loss": 1.0185, + "step": 13720 + }, + { + "epoch": 1.54, + "learning_rate": 8.096880589494563e-05, + "loss": 0.9907, + "step": 13740 + }, + { + "epoch": 1.55, + "learning_rate": 8.091163313607749e-05, + "loss": 1.0145, + "step": 13760 + }, + { + "epoch": 1.55, + "learning_rate": 8.085439488070521e-05, + "loss": 0.9818, + "step": 13780 + }, + { + "epoch": 1.55, + "learning_rate": 8.079709125010699e-05, + "loss": 0.9885, + "step": 13800 + }, + { + "epoch": 1.55, + "learning_rate": 8.073972236569947e-05, + "loss": 1.0026, + "step": 13820 + }, + { + "epoch": 1.56, + "learning_rate": 8.06822883490376e-05, + "loss": 0.9577, + "step": 13840 + }, + { + "epoch": 1.56, + "learning_rate": 8.06247893218143e-05, + "loss": 1.0351, + "step": 13860 + }, + { + "epoch": 1.56, + "learning_rate": 8.056722540586024e-05, + "loss": 0.9972, + "step": 13880 + }, + { + "epoch": 1.56, + "learning_rate": 8.050959672314359e-05, + "loss": 1.0099, + "step": 13900 + }, + { + "epoch": 1.56, + "learning_rate": 8.045190339576978e-05, + "loss": 0.9896, + "step": 13920 + }, + { + "epoch": 1.57, + "learning_rate": 8.039414554598113e-05, + "loss": 1.0083, + "step": 13940 + }, + { + "epoch": 1.57, + "learning_rate": 8.033632329615676e-05, + "loss": 0.9907, + "step": 13960 + }, + { + "epoch": 1.57, + "learning_rate": 8.027843676881218e-05, + "loss": 1.0125, + "step": 13980 + }, + { + "epoch": 1.57, + "learning_rate": 8.022048608659913e-05, + "loss": 1.0153, + "step": 14000 + }, + { + "epoch": 1.58, + "learning_rate": 8.016247137230525e-05, + "loss": 0.9991, + "step": 14020 + }, + { + "epoch": 1.58, + "learning_rate": 8.010439274885391e-05, + "loss": 0.9889, + "step": 14040 + }, + { + "epoch": 1.58, + "learning_rate": 8.004625033930382e-05, + "loss": 1.0172, + "step": 14060 + }, + { + "epoch": 1.58, + "learning_rate": 7.998804426684889e-05, + "loss": 0.9886, + "step": 14080 + }, + { + "epoch": 1.58, + "learning_rate": 7.992977465481793e-05, + "loss": 1.0062, + "step": 14100 + }, + { + "epoch": 1.59, + "learning_rate": 7.987144162667431e-05, + "loss": 0.9952, + "step": 14120 + }, + { + "epoch": 1.59, + "learning_rate": 7.981304530601586e-05, + "loss": 1.0364, + "step": 14140 + }, + { + "epoch": 1.59, + "learning_rate": 7.975458581657446e-05, + "loss": 1.021, + "step": 14160 + }, + { + "epoch": 1.59, + "learning_rate": 7.969606328221583e-05, + "loss": 1.0118, + "step": 14180 + }, + { + "epoch": 1.6, + "learning_rate": 7.96374778269393e-05, + "loss": 1.0187, + "step": 14200 + }, + { + "epoch": 1.6, + "learning_rate": 7.95788295748775e-05, + "loss": 1.0267, + "step": 14220 + }, + { + "epoch": 1.6, + "learning_rate": 7.952011865029614e-05, + "loss": 1.0121, + "step": 14240 + }, + { + "epoch": 1.6, + "learning_rate": 7.946134517759368e-05, + "loss": 0.971, + "step": 14260 + }, + { + "epoch": 1.6, + "learning_rate": 7.940250928130116e-05, + "loss": 1.0182, + "step": 14280 + }, + { + "epoch": 1.61, + "learning_rate": 7.934361108608183e-05, + "loss": 0.9876, + "step": 14300 + }, + { + "epoch": 1.61, + "learning_rate": 7.9284650716731e-05, + "loss": 0.988, + "step": 14320 + }, + { + "epoch": 1.61, + "learning_rate": 7.922562829817564e-05, + "loss": 1.0128, + "step": 14340 + }, + { + "epoch": 1.61, + "learning_rate": 7.916654395547427e-05, + "loss": 1.029, + "step": 14360 + }, + { + "epoch": 1.62, + "learning_rate": 7.91073978138166e-05, + "loss": 0.9795, + "step": 14380 + }, + { + "epoch": 1.62, + "learning_rate": 7.904818999852323e-05, + "loss": 0.9923, + "step": 14400 + }, + { + "epoch": 1.62, + "learning_rate": 7.898892063504548e-05, + "loss": 1.0132, + "step": 14420 + }, + { + "epoch": 1.62, + "learning_rate": 7.89295898489651e-05, + "loss": 0.999, + "step": 14440 + }, + { + "epoch": 1.62, + "learning_rate": 7.887019776599391e-05, + "loss": 0.9659, + "step": 14460 + }, + { + "epoch": 1.63, + "learning_rate": 7.88107445119737e-05, + "loss": 0.9859, + "step": 14480 + }, + { + "epoch": 1.63, + "learning_rate": 7.875123021287579e-05, + "loss": 0.9863, + "step": 14500 + }, + { + "epoch": 1.63, + "learning_rate": 7.869165499480089e-05, + "loss": 1.0014, + "step": 14520 + }, + { + "epoch": 1.63, + "learning_rate": 7.863201898397878e-05, + "loss": 0.9914, + "step": 14540 + }, + { + "epoch": 1.64, + "learning_rate": 7.857232230676802e-05, + "loss": 0.9893, + "step": 14560 + }, + { + "epoch": 1.64, + "learning_rate": 7.851256508965577e-05, + "loss": 0.9932, + "step": 14580 + }, + { + "epoch": 1.64, + "learning_rate": 7.845274745925744e-05, + "loss": 1.0187, + "step": 14600 + }, + { + "epoch": 1.64, + "learning_rate": 7.83928695423164e-05, + "loss": 1.0001, + "step": 14620 + }, + { + "epoch": 1.64, + "learning_rate": 7.83329314657038e-05, + "loss": 0.991, + "step": 14640 + }, + { + "epoch": 1.65, + "learning_rate": 7.827293335641825e-05, + "loss": 1.0057, + "step": 14660 + }, + { + "epoch": 1.65, + "learning_rate": 7.82128753415856e-05, + "loss": 0.9592, + "step": 14680 + }, + { + "epoch": 1.65, + "learning_rate": 7.815275754845854e-05, + "loss": 1.0121, + "step": 14700 + }, + { + "epoch": 1.65, + "learning_rate": 7.809258010441649e-05, + "loss": 0.9967, + "step": 14720 + }, + { + "epoch": 1.66, + "learning_rate": 7.803234313696524e-05, + "loss": 0.9933, + "step": 14740 + }, + { + "epoch": 1.66, + "learning_rate": 7.79720467737367e-05, + "loss": 0.9841, + "step": 14760 + }, + { + "epoch": 1.66, + "learning_rate": 7.791169114248864e-05, + "loss": 1.0147, + "step": 14780 + }, + { + "epoch": 1.66, + "learning_rate": 7.785127637110438e-05, + "loss": 1.0291, + "step": 14800 + }, + { + "epoch": 1.67, + "learning_rate": 7.779080258759259e-05, + "loss": 0.9922, + "step": 14820 + }, + { + "epoch": 1.67, + "learning_rate": 7.773026992008692e-05, + "loss": 1.0002, + "step": 14840 + }, + { + "epoch": 1.67, + "learning_rate": 7.766967849684584e-05, + "loss": 1.0118, + "step": 14860 + }, + { + "epoch": 1.67, + "learning_rate": 7.760902844625228e-05, + "loss": 1.0237, + "step": 14880 + }, + { + "epoch": 1.67, + "learning_rate": 7.754831989681345e-05, + "loss": 1.0038, + "step": 14900 + }, + { + "epoch": 1.68, + "learning_rate": 7.74875529771604e-05, + "loss": 1.0016, + "step": 14920 + }, + { + "epoch": 1.68, + "learning_rate": 7.742672781604794e-05, + "loss": 0.9977, + "step": 14940 + }, + { + "epoch": 1.68, + "learning_rate": 7.736584454235427e-05, + "loss": 0.9864, + "step": 14960 + }, + { + "epoch": 1.68, + "learning_rate": 7.730490328508072e-05, + "loss": 0.9771, + "step": 14980 + }, + { + "epoch": 1.69, + "learning_rate": 7.724390417335144e-05, + "loss": 0.983, + "step": 15000 + }, + { + "epoch": 1.69, + "learning_rate": 7.718284733641323e-05, + "loss": 0.968, + "step": 15020 + }, + { + "epoch": 1.69, + "learning_rate": 7.712173290363514e-05, + "loss": 0.9642, + "step": 15040 + }, + { + "epoch": 1.69, + "learning_rate": 7.706056100450831e-05, + "loss": 1.0196, + "step": 15060 + }, + { + "epoch": 1.69, + "learning_rate": 7.699933176864558e-05, + "loss": 0.9708, + "step": 15080 + }, + { + "epoch": 1.7, + "learning_rate": 7.693804532578131e-05, + "loss": 0.9916, + "step": 15100 + }, + { + "epoch": 1.7, + "learning_rate": 7.687670180577109e-05, + "loss": 1.0076, + "step": 15120 + }, + { + "epoch": 1.7, + "learning_rate": 7.681530133859142e-05, + "loss": 0.9733, + "step": 15140 + }, + { + "epoch": 1.7, + "learning_rate": 7.675384405433947e-05, + "loss": 0.9965, + "step": 15160 + }, + { + "epoch": 1.71, + "learning_rate": 7.66923300832328e-05, + "loss": 0.9941, + "step": 15180 + }, + { + "epoch": 1.71, + "learning_rate": 7.663075955560906e-05, + "loss": 0.9851, + "step": 15200 + }, + { + "epoch": 1.71, + "learning_rate": 7.656913260192574e-05, + "loss": 0.9879, + "step": 15220 + }, + { + "epoch": 1.71, + "learning_rate": 7.650744935275992e-05, + "loss": 1.0127, + "step": 15240 + }, + { + "epoch": 1.71, + "learning_rate": 7.644570993880791e-05, + "loss": 0.9718, + "step": 15260 + }, + { + "epoch": 1.72, + "learning_rate": 7.63839144908851e-05, + "loss": 0.9495, + "step": 15280 + }, + { + "epoch": 1.72, + "learning_rate": 7.632206313992548e-05, + "loss": 0.9745, + "step": 15300 + }, + { + "epoch": 1.72, + "learning_rate": 7.626015601698163e-05, + "loss": 0.9862, + "step": 15320 + }, + { + "epoch": 1.72, + "learning_rate": 7.619819325322422e-05, + "loss": 0.9923, + "step": 15340 + }, + { + "epoch": 1.73, + "learning_rate": 7.613617497994178e-05, + "loss": 0.9779, + "step": 15360 + }, + { + "epoch": 1.73, + "learning_rate": 7.607410132854059e-05, + "loss": 0.9875, + "step": 15380 + }, + { + "epoch": 1.73, + "learning_rate": 7.60119724305441e-05, + "loss": 1.011, + "step": 15400 + }, + { + "epoch": 1.73, + "learning_rate": 7.594978841759297e-05, + "loss": 0.9933, + "step": 15420 + }, + { + "epoch": 1.73, + "learning_rate": 7.588754942144452e-05, + "loss": 0.9842, + "step": 15440 + }, + { + "epoch": 1.74, + "learning_rate": 7.582525557397264e-05, + "loss": 0.9784, + "step": 15460 + }, + { + "epoch": 1.74, + "learning_rate": 7.576290700716742e-05, + "loss": 0.9794, + "step": 15480 + }, + { + "epoch": 1.74, + "learning_rate": 7.570050385313487e-05, + "loss": 1.0136, + "step": 15500 + }, + { + "epoch": 1.74, + "learning_rate": 7.563804624409672e-05, + "loss": 1.0115, + "step": 15520 + }, + { + "epoch": 1.75, + "learning_rate": 7.557553431239002e-05, + "loss": 0.9926, + "step": 15540 + }, + { + "epoch": 1.75, + "learning_rate": 7.551296819046693e-05, + "loss": 0.9946, + "step": 15560 + }, + { + "epoch": 1.75, + "learning_rate": 7.545034801089448e-05, + "loss": 0.9707, + "step": 15580 + }, + { + "epoch": 1.75, + "learning_rate": 7.538767390635416e-05, + "loss": 0.9644, + "step": 15600 + }, + { + "epoch": 1.76, + "learning_rate": 7.53249460096418e-05, + "loss": 0.9909, + "step": 15620 + }, + { + "epoch": 1.76, + "learning_rate": 7.526216445366713e-05, + "loss": 0.994, + "step": 15640 + }, + { + "epoch": 1.76, + "learning_rate": 7.519932937145364e-05, + "loss": 0.9701, + "step": 15660 + }, + { + "epoch": 1.76, + "learning_rate": 7.513644089613818e-05, + "loss": 0.9868, + "step": 15680 + }, + { + "epoch": 1.76, + "learning_rate": 7.507349916097077e-05, + "loss": 1.018, + "step": 15700 + }, + { + "epoch": 1.77, + "learning_rate": 7.501050429931429e-05, + "loss": 0.9759, + "step": 15720 + }, + { + "epoch": 1.77, + "learning_rate": 7.49474564446441e-05, + "loss": 1.0035, + "step": 15740 + }, + { + "epoch": 1.77, + "learning_rate": 7.488435573054795e-05, + "loss": 0.9836, + "step": 15760 + }, + { + "epoch": 1.77, + "learning_rate": 7.482120229072552e-05, + "loss": 0.9725, + "step": 15780 + }, + { + "epoch": 1.78, + "learning_rate": 7.475799625898825e-05, + "loss": 0.9832, + "step": 15800 + }, + { + "epoch": 1.78, + "learning_rate": 7.469473776925897e-05, + "loss": 0.9895, + "step": 15820 + }, + { + "epoch": 1.78, + "learning_rate": 7.463142695557171e-05, + "loss": 0.979, + "step": 15840 + }, + { + "epoch": 1.78, + "learning_rate": 7.456806395207132e-05, + "loss": 0.987, + "step": 15860 + }, + { + "epoch": 1.78, + "learning_rate": 7.450464889301326e-05, + "loss": 0.9911, + "step": 15880 + }, + { + "epoch": 1.79, + "learning_rate": 7.444118191276326e-05, + "loss": 0.9616, + "step": 15900 + }, + { + "epoch": 1.79, + "learning_rate": 7.43776631457971e-05, + "loss": 0.9772, + "step": 15920 + }, + { + "epoch": 1.79, + "learning_rate": 7.431409272670027e-05, + "loss": 0.9851, + "step": 15940 + }, + { + "epoch": 1.79, + "learning_rate": 7.425047079016765e-05, + "loss": 0.9971, + "step": 15960 + }, + { + "epoch": 1.8, + "learning_rate": 7.418679747100339e-05, + "loss": 0.9858, + "step": 15980 + }, + { + "epoch": 1.8, + "learning_rate": 7.412307290412041e-05, + "loss": 0.9759, + "step": 16000 + }, + { + "epoch": 1.8, + "learning_rate": 7.405929722454026e-05, + "loss": 1.0255, + "step": 16020 + }, + { + "epoch": 1.8, + "learning_rate": 7.399547056739278e-05, + "loss": 0.9645, + "step": 16040 + }, + { + "epoch": 1.8, + "learning_rate": 7.39315930679158e-05, + "loss": 0.9821, + "step": 16060 + }, + { + "epoch": 1.81, + "learning_rate": 7.386766486145496e-05, + "loss": 0.9783, + "step": 16080 + }, + { + "epoch": 1.81, + "learning_rate": 7.380368608346322e-05, + "loss": 0.9899, + "step": 16100 + }, + { + "epoch": 1.81, + "learning_rate": 7.373965686950078e-05, + "loss": 0.9705, + "step": 16120 + }, + { + "epoch": 1.81, + "learning_rate": 7.367557735523467e-05, + "loss": 0.9869, + "step": 16140 + }, + { + "epoch": 1.82, + "learning_rate": 7.361144767643849e-05, + "loss": 0.983, + "step": 16160 + }, + { + "epoch": 1.82, + "learning_rate": 7.354726796899219e-05, + "loss": 1.0142, + "step": 16180 + }, + { + "epoch": 1.82, + "learning_rate": 7.348303836888163e-05, + "loss": 0.9991, + "step": 16200 + }, + { + "epoch": 1.82, + "learning_rate": 7.341875901219845e-05, + "loss": 0.9927, + "step": 16220 + }, + { + "epoch": 1.82, + "learning_rate": 7.33544300351397e-05, + "loss": 1.0073, + "step": 16240 + }, + { + "epoch": 1.83, + "learning_rate": 7.329005157400754e-05, + "loss": 0.998, + "step": 16260 + }, + { + "epoch": 1.83, + "learning_rate": 7.322562376520904e-05, + "loss": 0.9921, + "step": 16280 + }, + { + "epoch": 1.83, + "learning_rate": 7.316114674525578e-05, + "loss": 0.9735, + "step": 16300 + }, + { + "epoch": 1.83, + "learning_rate": 7.30966206507636e-05, + "loss": 0.9668, + "step": 16320 + }, + { + "epoch": 1.84, + "learning_rate": 7.303204561845236e-05, + "loss": 0.9921, + "step": 16340 + }, + { + "epoch": 1.84, + "learning_rate": 7.29674217851456e-05, + "loss": 0.9809, + "step": 16360 + }, + { + "epoch": 1.84, + "learning_rate": 7.290274928777024e-05, + "loss": 0.9915, + "step": 16380 + }, + { + "epoch": 1.84, + "learning_rate": 7.283802826335635e-05, + "loss": 0.9805, + "step": 16400 + }, + { + "epoch": 1.84, + "learning_rate": 7.277325884903674e-05, + "loss": 0.9791, + "step": 16420 + }, + { + "epoch": 1.85, + "learning_rate": 7.270844118204688e-05, + "loss": 0.9702, + "step": 16440 + }, + { + "epoch": 1.85, + "learning_rate": 7.264357539972434e-05, + "loss": 0.9717, + "step": 16460 + }, + { + "epoch": 1.85, + "learning_rate": 7.257866163950873e-05, + "loss": 0.9982, + "step": 16480 + }, + { + "epoch": 1.85, + "learning_rate": 7.251370003894133e-05, + "loss": 0.9703, + "step": 16500 + }, + { + "epoch": 1.86, + "learning_rate": 7.244869073566466e-05, + "loss": 0.9482, + "step": 16520 + }, + { + "epoch": 1.86, + "learning_rate": 7.238363386742249e-05, + "loss": 1.0209, + "step": 16540 + }, + { + "epoch": 1.86, + "learning_rate": 7.23185295720592e-05, + "loss": 0.9672, + "step": 16560 + }, + { + "epoch": 1.86, + "learning_rate": 7.225337798751981e-05, + "loss": 1.0046, + "step": 16580 + }, + { + "epoch": 1.87, + "learning_rate": 7.218817925184944e-05, + "loss": 1.0102, + "step": 16600 + }, + { + "epoch": 1.87, + "learning_rate": 7.212293350319313e-05, + "loss": 0.9578, + "step": 16620 + }, + { + "epoch": 1.87, + "learning_rate": 7.205764087979557e-05, + "loss": 0.9835, + "step": 16640 + }, + { + "epoch": 1.87, + "learning_rate": 7.199230152000074e-05, + "loss": 1.0221, + "step": 16660 + }, + { + "epoch": 1.87, + "learning_rate": 7.192691556225167e-05, + "loss": 0.9698, + "step": 16680 + }, + { + "epoch": 1.88, + "learning_rate": 7.186148314509008e-05, + "loss": 0.9918, + "step": 16700 + }, + { + "epoch": 1.88, + "learning_rate": 7.179600440715615e-05, + "loss": 0.9828, + "step": 16720 + }, + { + "epoch": 1.88, + "learning_rate": 7.173047948718826e-05, + "loss": 1.0046, + "step": 16740 + }, + { + "epoch": 1.88, + "learning_rate": 7.166490852402254e-05, + "loss": 0.9624, + "step": 16760 + }, + { + "epoch": 1.89, + "learning_rate": 7.159929165659277e-05, + "loss": 0.9675, + "step": 16780 + }, + { + "epoch": 1.89, + "learning_rate": 7.153362902392994e-05, + "loss": 0.9811, + "step": 16800 + }, + { + "epoch": 1.89, + "learning_rate": 7.146792076516202e-05, + "loss": 0.9725, + "step": 16820 + }, + { + "epoch": 1.89, + "learning_rate": 7.140216701951366e-05, + "loss": 0.9751, + "step": 16840 + }, + { + "epoch": 1.89, + "learning_rate": 7.13363679263059e-05, + "loss": 0.9734, + "step": 16860 + }, + { + "epoch": 1.9, + "learning_rate": 7.127052362495583e-05, + "loss": 0.9879, + "step": 16880 + }, + { + "epoch": 1.9, + "learning_rate": 7.120463425497637e-05, + "loss": 0.9824, + "step": 16900 + }, + { + "epoch": 1.9, + "learning_rate": 7.11386999559759e-05, + "loss": 0.9833, + "step": 16920 + }, + { + "epoch": 1.9, + "learning_rate": 7.1072720867658e-05, + "loss": 0.9703, + "step": 16940 + }, + { + "epoch": 1.91, + "learning_rate": 7.100669712982119e-05, + "loss": 0.9864, + "step": 16960 + }, + { + "epoch": 1.91, + "learning_rate": 7.094062888235852e-05, + "loss": 0.9809, + "step": 16980 + }, + { + "epoch": 1.91, + "learning_rate": 7.087451626525745e-05, + "loss": 0.9887, + "step": 17000 + }, + { + "epoch": 1.91, + "learning_rate": 7.080835941859932e-05, + "loss": 0.9817, + "step": 17020 + }, + { + "epoch": 1.91, + "learning_rate": 7.074215848255933e-05, + "loss": 0.9697, + "step": 17040 + }, + { + "epoch": 1.92, + "learning_rate": 7.067591359740599e-05, + "loss": 0.9717, + "step": 17060 + }, + { + "epoch": 1.92, + "learning_rate": 7.060962490350098e-05, + "loss": 0.9435, + "step": 17080 + }, + { + "epoch": 1.92, + "learning_rate": 7.05432925412988e-05, + "loss": 0.9578, + "step": 17100 + }, + { + "epoch": 1.92, + "learning_rate": 7.047691665134643e-05, + "loss": 0.9845, + "step": 17120 + }, + { + "epoch": 1.93, + "learning_rate": 7.041049737428316e-05, + "loss": 0.9647, + "step": 17140 + }, + { + "epoch": 1.93, + "learning_rate": 7.034403485084014e-05, + "loss": 0.9781, + "step": 17160 + }, + { + "epoch": 1.93, + "learning_rate": 7.027752922184017e-05, + "loss": 0.9348, + "step": 17180 + }, + { + "epoch": 1.93, + "learning_rate": 7.021098062819743e-05, + "loss": 0.9801, + "step": 17200 + }, + { + "epoch": 1.93, + "learning_rate": 7.014438921091703e-05, + "loss": 0.9987, + "step": 17220 + }, + { + "epoch": 1.94, + "learning_rate": 7.007775511109495e-05, + "loss": 0.9797, + "step": 17240 + }, + { + "epoch": 1.94, + "learning_rate": 7.001107846991751e-05, + "loss": 0.9882, + "step": 17260 + }, + { + "epoch": 1.94, + "learning_rate": 6.994435942866117e-05, + "loss": 0.9706, + "step": 17280 + }, + { + "epoch": 1.94, + "learning_rate": 6.98775981286923e-05, + "loss": 0.9733, + "step": 17300 + }, + { + "epoch": 1.95, + "learning_rate": 6.981079471146672e-05, + "loss": 0.9788, + "step": 17320 + }, + { + "epoch": 1.95, + "learning_rate": 6.974394931852956e-05, + "loss": 0.9797, + "step": 17340 + }, + { + "epoch": 1.95, + "learning_rate": 6.967706209151488e-05, + "loss": 0.9357, + "step": 17360 + }, + { + "epoch": 1.95, + "learning_rate": 6.96101331721453e-05, + "loss": 0.9818, + "step": 17380 + }, + { + "epoch": 1.96, + "learning_rate": 6.954316270223189e-05, + "loss": 0.9728, + "step": 17400 + }, + { + "epoch": 1.96, + "learning_rate": 6.94761508236737e-05, + "loss": 1.0087, + "step": 17420 + }, + { + "epoch": 1.96, + "learning_rate": 6.940909767845753e-05, + "loss": 0.9928, + "step": 17440 + }, + { + "epoch": 1.96, + "learning_rate": 6.934200340865761e-05, + "loss": 0.9464, + "step": 17460 + }, + { + "epoch": 1.96, + "learning_rate": 6.927486815643528e-05, + "loss": 0.9691, + "step": 17480 + }, + { + "epoch": 1.97, + "learning_rate": 6.920769206403881e-05, + "loss": 0.9875, + "step": 17500 + }, + { + "epoch": 1.97, + "learning_rate": 6.914047527380288e-05, + "loss": 0.9622, + "step": 17520 + }, + { + "epoch": 1.97, + "learning_rate": 6.907321792814848e-05, + "loss": 0.9906, + "step": 17540 + }, + { + "epoch": 1.97, + "learning_rate": 6.900592016958252e-05, + "loss": 0.9778, + "step": 17560 + }, + { + "epoch": 1.98, + "learning_rate": 6.89385821406975e-05, + "loss": 0.9606, + "step": 17580 + }, + { + "epoch": 1.98, + "learning_rate": 6.887120398417132e-05, + "loss": 0.9551, + "step": 17600 + }, + { + "epoch": 1.98, + "learning_rate": 6.880378584276682e-05, + "loss": 0.9555, + "step": 17620 + }, + { + "epoch": 1.98, + "learning_rate": 6.87363278593316e-05, + "loss": 0.9735, + "step": 17640 + }, + { + "epoch": 1.98, + "learning_rate": 6.866883017679772e-05, + "loss": 0.9791, + "step": 17660 + }, + { + "epoch": 1.99, + "learning_rate": 6.860129293818124e-05, + "loss": 0.9975, + "step": 17680 + }, + { + "epoch": 1.99, + "learning_rate": 6.853371628658217e-05, + "loss": 0.9816, + "step": 17700 + }, + { + "epoch": 1.99, + "learning_rate": 6.846610036518396e-05, + "loss": 0.985, + "step": 17720 + }, + { + "epoch": 1.99, + "learning_rate": 6.839844531725321e-05, + "loss": 0.9667, + "step": 17740 + }, + { + "epoch": 2.0, + "learning_rate": 6.833075128613955e-05, + "loss": 0.95, + "step": 17760 + }, + { + "epoch": 2.0, + "learning_rate": 6.826301841527512e-05, + "loss": 0.9984, + "step": 17780 + }, + { + "epoch": 2.0, + "learning_rate": 6.819524684817438e-05, + "loss": 0.9795, + "step": 17800 + }, + { + "epoch": 2.0, + "learning_rate": 6.812743672843378e-05, + "loss": 0.932, + "step": 17820 + }, + { + "epoch": 2.0, + "learning_rate": 6.805958819973144e-05, + "loss": 0.9332, + "step": 17840 + }, + { + "epoch": 2.01, + "learning_rate": 6.799170140582689e-05, + "loss": 0.9567, + "step": 17860 + }, + { + "epoch": 2.01, + "learning_rate": 6.792377649056071e-05, + "loss": 0.9452, + "step": 17880 + }, + { + "epoch": 2.01, + "learning_rate": 6.785581359785428e-05, + "loss": 0.9466, + "step": 17900 + }, + { + "epoch": 2.01, + "learning_rate": 6.778781287170946e-05, + "loss": 0.9355, + "step": 17920 + }, + { + "epoch": 2.02, + "learning_rate": 6.771977445620818e-05, + "loss": 0.9449, + "step": 17940 + }, + { + "epoch": 2.02, + "learning_rate": 6.765169849551235e-05, + "loss": 0.9313, + "step": 17960 + }, + { + "epoch": 2.02, + "learning_rate": 6.758358513386335e-05, + "loss": 0.9742, + "step": 17980 + }, + { + "epoch": 2.02, + "learning_rate": 6.751543451558186e-05, + "loss": 0.919, + "step": 18000 + }, + { + "epoch": 2.02, + "learning_rate": 6.744724678506746e-05, + "loss": 0.955, + "step": 18020 + }, + { + "epoch": 2.03, + "learning_rate": 6.737902208679837e-05, + "loss": 0.961, + "step": 18040 + }, + { + "epoch": 2.03, + "learning_rate": 6.731076056533114e-05, + "loss": 0.9699, + "step": 18060 + }, + { + "epoch": 2.03, + "learning_rate": 6.724246236530036e-05, + "loss": 0.9497, + "step": 18080 + }, + { + "epoch": 2.03, + "learning_rate": 6.717412763141832e-05, + "loss": 0.9476, + "step": 18100 + }, + { + "epoch": 2.04, + "learning_rate": 6.710575650847474e-05, + "loss": 0.9641, + "step": 18120 + }, + { + "epoch": 2.04, + "learning_rate": 6.70373491413364e-05, + "loss": 0.9534, + "step": 18140 + }, + { + "epoch": 2.04, + "learning_rate": 6.69689056749469e-05, + "loss": 0.9416, + "step": 18160 + }, + { + "epoch": 2.04, + "learning_rate": 6.690042625432635e-05, + "loss": 0.9359, + "step": 18180 + }, + { + "epoch": 2.04, + "learning_rate": 6.6831911024571e-05, + "loss": 0.9538, + "step": 18200 + }, + { + "epoch": 2.05, + "learning_rate": 6.676336013085302e-05, + "loss": 0.9375, + "step": 18220 + }, + { + "epoch": 2.05, + "learning_rate": 6.669477371842008e-05, + "loss": 0.9479, + "step": 18240 + }, + { + "epoch": 2.05, + "learning_rate": 6.662615193259519e-05, + "loss": 0.9275, + "step": 18260 + }, + { + "epoch": 2.05, + "learning_rate": 6.655749491877623e-05, + "loss": 0.9417, + "step": 18280 + }, + { + "epoch": 2.06, + "learning_rate": 6.648880282243579e-05, + "loss": 0.9558, + "step": 18300 + }, + { + "epoch": 2.06, + "learning_rate": 6.642007578912074e-05, + "loss": 0.9539, + "step": 18320 + }, + { + "epoch": 2.06, + "learning_rate": 6.635131396445199e-05, + "loss": 0.955, + "step": 18340 + }, + { + "epoch": 2.06, + "learning_rate": 6.628251749412421e-05, + "loss": 0.9321, + "step": 18360 + }, + { + "epoch": 2.07, + "learning_rate": 6.621368652390542e-05, + "loss": 0.9354, + "step": 18380 + }, + { + "epoch": 2.07, + "learning_rate": 6.614482119963677e-05, + "loss": 0.932, + "step": 18400 + }, + { + "epoch": 2.07, + "learning_rate": 6.607592166723219e-05, + "loss": 0.9318, + "step": 18420 + }, + { + "epoch": 2.07, + "learning_rate": 6.600698807267811e-05, + "loss": 0.9294, + "step": 18440 + }, + { + "epoch": 2.07, + "learning_rate": 6.59380205620331e-05, + "loss": 0.946, + "step": 18460 + }, + { + "epoch": 2.08, + "learning_rate": 6.586901928142761e-05, + "loss": 0.9338, + "step": 18480 + }, + { + "epoch": 2.08, + "learning_rate": 6.579998437706367e-05, + "loss": 0.9615, + "step": 18500 + }, + { + "epoch": 2.08, + "learning_rate": 6.573091599521448e-05, + "loss": 0.9363, + "step": 18520 + }, + { + "epoch": 2.08, + "learning_rate": 6.566181428222424e-05, + "loss": 0.9891, + "step": 18540 + }, + { + "epoch": 2.09, + "learning_rate": 6.559267938450778e-05, + "loss": 0.9679, + "step": 18560 + }, + { + "epoch": 2.09, + "learning_rate": 6.552351144855015e-05, + "loss": 0.9197, + "step": 18580 + }, + { + "epoch": 2.09, + "learning_rate": 6.545431062090653e-05, + "loss": 0.9464, + "step": 18600 + }, + { + "epoch": 2.09, + "learning_rate": 6.538507704820169e-05, + "loss": 0.9829, + "step": 18620 + }, + { + "epoch": 2.09, + "learning_rate": 6.531581087712984e-05, + "loss": 0.9383, + "step": 18640 + }, + { + "epoch": 2.1, + "learning_rate": 6.524651225445423e-05, + "loss": 0.941, + "step": 18660 + }, + { + "epoch": 2.1, + "learning_rate": 6.517718132700689e-05, + "loss": 0.9647, + "step": 18680 + }, + { + "epoch": 2.1, + "learning_rate": 6.510781824168828e-05, + "loss": 0.9517, + "step": 18700 + }, + { + "epoch": 2.1, + "learning_rate": 6.5038423145467e-05, + "loss": 0.9526, + "step": 18720 + }, + { + "epoch": 2.11, + "learning_rate": 6.496899618537947e-05, + "loss": 0.943, + "step": 18740 + }, + { + "epoch": 2.11, + "learning_rate": 6.489953750852966e-05, + "loss": 0.9427, + "step": 18760 + }, + { + "epoch": 2.11, + "learning_rate": 6.483004726208873e-05, + "loss": 0.9405, + "step": 18780 + }, + { + "epoch": 2.11, + "learning_rate": 6.476052559329467e-05, + "loss": 0.9578, + "step": 18800 + }, + { + "epoch": 2.11, + "learning_rate": 6.469097264945214e-05, + "loss": 0.967, + "step": 18820 + }, + { + "epoch": 2.12, + "learning_rate": 6.4621388577932e-05, + "loss": 0.958, + "step": 18840 + }, + { + "epoch": 2.12, + "learning_rate": 6.45517735261711e-05, + "loss": 0.9582, + "step": 18860 + }, + { + "epoch": 2.12, + "learning_rate": 6.448212764167191e-05, + "loss": 0.9493, + "step": 18880 + }, + { + "epoch": 2.12, + "learning_rate": 6.441245107200223e-05, + "loss": 0.9368, + "step": 18900 + }, + { + "epoch": 2.13, + "learning_rate": 6.43427439647949e-05, + "loss": 0.9792, + "step": 18920 + }, + { + "epoch": 2.13, + "learning_rate": 6.427300646774744e-05, + "loss": 0.9427, + "step": 18940 + }, + { + "epoch": 2.13, + "learning_rate": 6.420323872862179e-05, + "loss": 0.9504, + "step": 18960 + }, + { + "epoch": 2.13, + "learning_rate": 6.413344089524393e-05, + "loss": 0.9439, + "step": 18980 + }, + { + "epoch": 2.13, + "learning_rate": 6.406361311550361e-05, + "loss": 0.92, + "step": 19000 + }, + { + "epoch": 2.14, + "learning_rate": 6.399375553735407e-05, + "loss": 0.9736, + "step": 19020 + }, + { + "epoch": 2.14, + "learning_rate": 6.392386830881164e-05, + "loss": 0.9712, + "step": 19040 + }, + { + "epoch": 2.14, + "learning_rate": 6.385395157795552e-05, + "loss": 0.9777, + "step": 19060 + }, + { + "epoch": 2.14, + "learning_rate": 6.378400549292739e-05, + "loss": 0.9232, + "step": 19080 + }, + { + "epoch": 2.15, + "learning_rate": 6.371403020193109e-05, + "loss": 0.9597, + "step": 19100 + }, + { + "epoch": 2.15, + "learning_rate": 6.364402585323245e-05, + "loss": 0.9131, + "step": 19120 + }, + { + "epoch": 2.15, + "learning_rate": 6.357399259515877e-05, + "loss": 0.9555, + "step": 19140 + }, + { + "epoch": 2.15, + "learning_rate": 6.350393057609865e-05, + "loss": 0.9488, + "step": 19160 + }, + { + "epoch": 2.16, + "learning_rate": 6.343383994450158e-05, + "loss": 0.9597, + "step": 19180 + }, + { + "epoch": 2.16, + "learning_rate": 6.336372084887775e-05, + "loss": 0.9153, + "step": 19200 + }, + { + "epoch": 2.16, + "learning_rate": 6.329357343779763e-05, + "loss": 0.9319, + "step": 19220 + }, + { + "epoch": 2.16, + "learning_rate": 6.322339785989163e-05, + "loss": 0.9723, + "step": 19240 + }, + { + "epoch": 2.16, + "learning_rate": 6.315319426384993e-05, + "loss": 0.9426, + "step": 19260 + }, + { + "epoch": 2.17, + "learning_rate": 6.308296279842205e-05, + "loss": 0.9569, + "step": 19280 + }, + { + "epoch": 2.17, + "learning_rate": 6.301270361241649e-05, + "loss": 0.9376, + "step": 19300 + }, + { + "epoch": 2.17, + "learning_rate": 6.294241685470057e-05, + "loss": 0.953, + "step": 19320 + }, + { + "epoch": 2.17, + "learning_rate": 6.287210267420001e-05, + "loss": 0.9552, + "step": 19340 + }, + { + "epoch": 2.18, + "learning_rate": 6.280176121989861e-05, + "loss": 0.941, + "step": 19360 + }, + { + "epoch": 2.18, + "learning_rate": 6.273139264083798e-05, + "loss": 0.9632, + "step": 19380 + }, + { + "epoch": 2.18, + "learning_rate": 6.266099708611719e-05, + "loss": 0.9531, + "step": 19400 + }, + { + "epoch": 2.18, + "learning_rate": 6.259057470489246e-05, + "loss": 0.9426, + "step": 19420 + }, + { + "epoch": 2.18, + "learning_rate": 6.252012564637689e-05, + "loss": 0.9947, + "step": 19440 + }, + { + "epoch": 2.19, + "learning_rate": 6.244965005984008e-05, + "loss": 0.9713, + "step": 19460 + }, + { + "epoch": 2.19, + "learning_rate": 6.23791480946078e-05, + "loss": 0.9208, + "step": 19480 + }, + { + "epoch": 2.19, + "learning_rate": 6.23086199000618e-05, + "loss": 0.9401, + "step": 19500 + }, + { + "epoch": 2.19, + "learning_rate": 6.223806562563929e-05, + "loss": 0.9537, + "step": 19520 + }, + { + "epoch": 2.2, + "learning_rate": 6.216748542083286e-05, + "loss": 0.9889, + "step": 19540 + }, + { + "epoch": 2.2, + "learning_rate": 6.209687943518996e-05, + "loss": 0.9211, + "step": 19560 + }, + { + "epoch": 2.2, + "learning_rate": 6.202624781831268e-05, + "loss": 0.9332, + "step": 19580 + }, + { + "epoch": 2.2, + "learning_rate": 6.195559071985745e-05, + "loss": 0.9656, + "step": 19600 + }, + { + "epoch": 2.2, + "learning_rate": 6.188490828953465e-05, + "loss": 0.9292, + "step": 19620 + }, + { + "epoch": 2.21, + "learning_rate": 6.181420067710838e-05, + "loss": 0.9479, + "step": 19640 + }, + { + "epoch": 2.21, + "learning_rate": 6.174346803239604e-05, + "loss": 0.9307, + "step": 19660 + }, + { + "epoch": 2.21, + "learning_rate": 6.167271050526812e-05, + "loss": 0.9564, + "step": 19680 + }, + { + "epoch": 2.21, + "learning_rate": 6.160192824564778e-05, + "loss": 0.9316, + "step": 19700 + }, + { + "epoch": 2.22, + "learning_rate": 6.153112140351066e-05, + "loss": 0.9171, + "step": 19720 + }, + { + "epoch": 2.22, + "learning_rate": 6.14602901288844e-05, + "loss": 0.9369, + "step": 19740 + }, + { + "epoch": 2.22, + "learning_rate": 6.138943457184847e-05, + "loss": 0.9351, + "step": 19760 + }, + { + "epoch": 2.22, + "learning_rate": 6.131855488253379e-05, + "loss": 0.9421, + "step": 19780 + }, + { + "epoch": 2.22, + "learning_rate": 6.124765121112233e-05, + "loss": 0.9732, + "step": 19800 + }, + { + "epoch": 2.23, + "learning_rate": 6.1176723707847e-05, + "loss": 0.933, + "step": 19820 + }, + { + "epoch": 2.23, + "learning_rate": 6.110577252299108e-05, + "loss": 0.9656, + "step": 19840 + }, + { + "epoch": 2.23, + "learning_rate": 6.103479780688816e-05, + "loss": 0.9369, + "step": 19860 + }, + { + "epoch": 2.23, + "learning_rate": 6.096379970992157e-05, + "loss": 0.9743, + "step": 19880 + }, + { + "epoch": 2.24, + "learning_rate": 6.089277838252422e-05, + "loss": 0.9686, + "step": 19900 + }, + { + "epoch": 2.24, + "learning_rate": 6.0821733975178276e-05, + "loss": 0.927, + "step": 19920 + }, + { + "epoch": 2.24, + "learning_rate": 6.0750666638414765e-05, + "loss": 0.9462, + "step": 19940 + }, + { + "epoch": 2.24, + "learning_rate": 6.067957652281332e-05, + "loss": 0.9591, + "step": 19960 + }, + { + "epoch": 2.24, + "learning_rate": 6.060846377900182e-05, + "loss": 0.9595, + "step": 19980 + }, + { + "epoch": 2.25, + "learning_rate": 6.0537328557656105e-05, + "loss": 0.9518, + "step": 20000 + }, + { + "epoch": 2.25, + "learning_rate": 6.046617100949965e-05, + "loss": 0.9485, + "step": 20020 + }, + { + "epoch": 2.25, + "learning_rate": 6.0394991285303196e-05, + "loss": 0.961, + "step": 20040 + }, + { + "epoch": 2.25, + "learning_rate": 6.03237895358845e-05, + "loss": 0.9536, + "step": 20060 + }, + { + "epoch": 2.26, + "learning_rate": 6.025256591210799e-05, + "loss": 0.9449, + "step": 20080 + }, + { + "epoch": 2.26, + "learning_rate": 6.0181320564884444e-05, + "loss": 0.947, + "step": 20100 + }, + { + "epoch": 2.26, + "learning_rate": 6.011005364517068e-05, + "loss": 0.9491, + "step": 20120 + }, + { + "epoch": 2.26, + "learning_rate": 6.003876530396916e-05, + "loss": 0.9332, + "step": 20140 + }, + { + "epoch": 2.27, + "learning_rate": 5.99674556923278e-05, + "loss": 0.9612, + "step": 20160 + }, + { + "epoch": 2.27, + "learning_rate": 5.989612496133956e-05, + "loss": 0.9536, + "step": 20180 + }, + { + "epoch": 2.27, + "learning_rate": 5.9824773262142165e-05, + "loss": 0.9351, + "step": 20200 + }, + { + "epoch": 2.27, + "learning_rate": 5.975340074591774e-05, + "loss": 0.955, + "step": 20220 + }, + { + "epoch": 2.27, + "learning_rate": 5.968200756389255e-05, + "loss": 0.9472, + "step": 20240 + }, + { + "epoch": 2.28, + "learning_rate": 5.9610593867336614e-05, + "loss": 0.9185, + "step": 20260 + }, + { + "epoch": 2.28, + "learning_rate": 5.9539159807563437e-05, + "loss": 0.9523, + "step": 20280 + }, + { + "epoch": 2.28, + "learning_rate": 5.9467705535929686e-05, + "loss": 0.9177, + "step": 20300 + }, + { + "epoch": 2.28, + "learning_rate": 5.939623120383481e-05, + "loss": 0.9517, + "step": 20320 + }, + { + "epoch": 2.29, + "learning_rate": 5.9324736962720805e-05, + "loss": 0.9449, + "step": 20340 + }, + { + "epoch": 2.29, + "learning_rate": 5.925322296407181e-05, + "loss": 0.9656, + "step": 20360 + }, + { + "epoch": 2.29, + "learning_rate": 5.918168935941388e-05, + "loss": 0.9617, + "step": 20380 + }, + { + "epoch": 2.29, + "learning_rate": 5.911013630031457e-05, + "loss": 0.9479, + "step": 20400 + }, + { + "epoch": 2.29, + "learning_rate": 5.903856393838265e-05, + "loss": 0.9431, + "step": 20420 + }, + { + "epoch": 2.3, + "learning_rate": 5.896697242526785e-05, + "loss": 0.9583, + "step": 20440 + }, + { + "epoch": 2.3, + "learning_rate": 5.8895361912660374e-05, + "loss": 0.9749, + "step": 20460 + }, + { + "epoch": 2.3, + "learning_rate": 5.882373255229081e-05, + "loss": 0.9783, + "step": 20480 + }, + { + "epoch": 2.3, + "learning_rate": 5.875208449592957e-05, + "loss": 0.9316, + "step": 20500 + }, + { + "epoch": 2.31, + "learning_rate": 5.868041789538675e-05, + "loss": 0.9483, + "step": 20520 + }, + { + "epoch": 2.31, + "learning_rate": 5.8608732902511695e-05, + "loss": 0.9384, + "step": 20540 + }, + { + "epoch": 2.31, + "learning_rate": 5.853702966919275e-05, + "loss": 0.9093, + "step": 20560 + }, + { + "epoch": 2.31, + "learning_rate": 5.8465308347356895e-05, + "loss": 0.9331, + "step": 20580 + }, + { + "epoch": 2.31, + "learning_rate": 5.8393569088969425e-05, + "loss": 0.9641, + "step": 20600 + }, + { + "epoch": 2.32, + "learning_rate": 5.8321812046033666e-05, + "loss": 0.9628, + "step": 20620 + }, + { + "epoch": 2.32, + "learning_rate": 5.825003737059062e-05, + "loss": 0.9644, + "step": 20640 + }, + { + "epoch": 2.32, + "learning_rate": 5.81782452147186e-05, + "loss": 0.9429, + "step": 20660 + }, + { + "epoch": 2.32, + "learning_rate": 5.810643573053306e-05, + "loss": 0.9444, + "step": 20680 + }, + { + "epoch": 2.33, + "learning_rate": 5.803460907018607e-05, + "loss": 0.9412, + "step": 20700 + }, + { + "epoch": 2.33, + "learning_rate": 5.796276538586615e-05, + "loss": 0.9411, + "step": 20720 + }, + { + "epoch": 2.33, + "learning_rate": 5.7890904829797856e-05, + "loss": 0.9342, + "step": 20740 + }, + { + "epoch": 2.33, + "learning_rate": 5.781902755424151e-05, + "loss": 0.9188, + "step": 20760 + }, + { + "epoch": 2.33, + "learning_rate": 5.7747133711492895e-05, + "loss": 0.9423, + "step": 20780 + }, + { + "epoch": 2.34, + "learning_rate": 5.767522345388282e-05, + "loss": 0.9363, + "step": 20800 + }, + { + "epoch": 2.34, + "learning_rate": 5.760329693377693e-05, + "loss": 0.9369, + "step": 20820 + }, + { + "epoch": 2.34, + "learning_rate": 5.7531354303575324e-05, + "loss": 0.9655, + "step": 20840 + }, + { + "epoch": 2.34, + "learning_rate": 5.7459395715712205e-05, + "loss": 0.9417, + "step": 20860 + }, + { + "epoch": 2.35, + "learning_rate": 5.738742132265562e-05, + "loss": 0.9504, + "step": 20880 + }, + { + "epoch": 2.35, + "learning_rate": 5.731543127690709e-05, + "loss": 0.9594, + "step": 20900 + }, + { + "epoch": 2.35, + "learning_rate": 5.724342573100131e-05, + "loss": 0.9268, + "step": 20920 + }, + { + "epoch": 2.35, + "learning_rate": 5.7171404837505796e-05, + "loss": 0.9299, + "step": 20940 + }, + { + "epoch": 2.36, + "learning_rate": 5.709936874902061e-05, + "loss": 0.9372, + "step": 20960 + }, + { + "epoch": 2.36, + "learning_rate": 5.702731761817799e-05, + "loss": 0.9087, + "step": 20980 + }, + { + "epoch": 2.36, + "learning_rate": 5.695525159764206e-05, + "loss": 0.9338, + "step": 21000 + }, + { + "epoch": 2.36, + "learning_rate": 5.688317084010847e-05, + "loss": 0.9435, + "step": 21020 + }, + { + "epoch": 2.36, + "learning_rate": 5.681107549830414e-05, + "loss": 0.9552, + "step": 21040 + }, + { + "epoch": 2.37, + "learning_rate": 5.673896572498683e-05, + "loss": 0.9095, + "step": 21060 + }, + { + "epoch": 2.37, + "learning_rate": 5.6666841672944925e-05, + "loss": 0.9398, + "step": 21080 + }, + { + "epoch": 2.37, + "learning_rate": 5.659470349499707e-05, + "loss": 0.949, + "step": 21100 + }, + { + "epoch": 2.37, + "learning_rate": 5.652255134399178e-05, + "loss": 0.9129, + "step": 21120 + }, + { + "epoch": 2.38, + "learning_rate": 5.645038537280726e-05, + "loss": 0.9701, + "step": 21140 + }, + { + "epoch": 2.38, + "learning_rate": 5.6378205734350916e-05, + "loss": 0.9328, + "step": 21160 + }, + { + "epoch": 2.38, + "learning_rate": 5.630601258155917e-05, + "loss": 0.9371, + "step": 21180 + }, + { + "epoch": 2.38, + "learning_rate": 5.623380606739708e-05, + "loss": 0.9384, + "step": 21200 + }, + { + "epoch": 2.38, + "learning_rate": 5.616158634485793e-05, + "loss": 0.9557, + "step": 21220 + }, + { + "epoch": 2.39, + "learning_rate": 5.608935356696313e-05, + "loss": 0.9288, + "step": 21240 + }, + { + "epoch": 2.39, + "learning_rate": 5.6017107886761634e-05, + "loss": 0.9439, + "step": 21260 + }, + { + "epoch": 2.39, + "learning_rate": 5.5944849457329786e-05, + "loss": 0.9502, + "step": 21280 + }, + { + "epoch": 2.39, + "learning_rate": 5.5872578431770936e-05, + "loss": 0.9466, + "step": 21300 + }, + { + "epoch": 2.4, + "learning_rate": 5.5800294963215116e-05, + "loss": 0.9338, + "step": 21320 + }, + { + "epoch": 2.4, + "learning_rate": 5.5727999204818736e-05, + "loss": 0.9466, + "step": 21340 + }, + { + "epoch": 2.4, + "learning_rate": 5.565569130976422e-05, + "loss": 0.9392, + "step": 21360 + }, + { + "epoch": 2.4, + "learning_rate": 5.5583371431259745e-05, + "loss": 0.9885, + "step": 21380 + }, + { + "epoch": 2.4, + "learning_rate": 5.551103972253884e-05, + "loss": 0.9347, + "step": 21400 + }, + { + "epoch": 2.41, + "learning_rate": 5.543869633686013e-05, + "loss": 0.934, + "step": 21420 + }, + { + "epoch": 2.41, + "learning_rate": 5.536634142750699e-05, + "loss": 0.9347, + "step": 21440 + }, + { + "epoch": 2.41, + "learning_rate": 5.529397514778716e-05, + "loss": 0.9379, + "step": 21460 + }, + { + "epoch": 2.41, + "learning_rate": 5.522159765103251e-05, + "loss": 0.9509, + "step": 21480 + }, + { + "epoch": 2.42, + "learning_rate": 5.5149209090598686e-05, + "loss": 0.9212, + "step": 21500 + }, + { + "epoch": 2.42, + "learning_rate": 5.5076809619864754e-05, + "loss": 0.9608, + "step": 21520 + }, + { + "epoch": 2.42, + "learning_rate": 5.5004399392232906e-05, + "loss": 0.9088, + "step": 21540 + }, + { + "epoch": 2.42, + "learning_rate": 5.493197856112812e-05, + "loss": 0.9442, + "step": 21560 + }, + { + "epoch": 2.42, + "learning_rate": 5.485954727999785e-05, + "loss": 0.9389, + "step": 21580 + }, + { + "epoch": 2.43, + "learning_rate": 5.478710570231168e-05, + "loss": 0.9191, + "step": 21600 + }, + { + "epoch": 2.43, + "learning_rate": 5.4714653981561015e-05, + "loss": 0.9181, + "step": 21620 + }, + { + "epoch": 2.43, + "learning_rate": 5.464219227125877e-05, + "loss": 0.9255, + "step": 21640 + }, + { + "epoch": 2.43, + "learning_rate": 5.4569720724939025e-05, + "loss": 0.9249, + "step": 21660 + }, + { + "epoch": 2.44, + "learning_rate": 5.449723949615664e-05, + "loss": 0.9217, + "step": 21680 + }, + { + "epoch": 2.44, + "learning_rate": 5.442474873848706e-05, + "loss": 0.9316, + "step": 21700 + }, + { + "epoch": 2.44, + "learning_rate": 5.43522486055259e-05, + "loss": 0.9689, + "step": 21720 + }, + { + "epoch": 2.44, + "learning_rate": 5.427973925088865e-05, + "loss": 0.931, + "step": 21740 + }, + { + "epoch": 2.44, + "learning_rate": 5.42072208282103e-05, + "loss": 0.9415, + "step": 21760 + }, + { + "epoch": 2.45, + "learning_rate": 5.4134693491145085e-05, + "loss": 0.9149, + "step": 21780 + }, + { + "epoch": 2.45, + "learning_rate": 5.4062157393366134e-05, + "loss": 0.9355, + "step": 21800 + }, + { + "epoch": 2.45, + "learning_rate": 5.398961268856512e-05, + "loss": 0.9458, + "step": 21820 + }, + { + "epoch": 2.45, + "learning_rate": 5.391705953045195e-05, + "loss": 0.9411, + "step": 21840 + }, + { + "epoch": 2.46, + "learning_rate": 5.3844498072754476e-05, + "loss": 0.9501, + "step": 21860 + }, + { + "epoch": 2.46, + "learning_rate": 5.377192846921808e-05, + "loss": 0.9204, + "step": 21880 + }, + { + "epoch": 2.46, + "learning_rate": 5.369935087360547e-05, + "loss": 0.9033, + "step": 21900 + }, + { + "epoch": 2.46, + "learning_rate": 5.362676543969622e-05, + "loss": 0.9327, + "step": 21920 + }, + { + "epoch": 2.47, + "learning_rate": 5.3554172321286576e-05, + "loss": 0.9579, + "step": 21940 + }, + { + "epoch": 2.47, + "learning_rate": 5.348157167218901e-05, + "loss": 0.9313, + "step": 21960 + }, + { + "epoch": 2.47, + "learning_rate": 5.340896364623198e-05, + "loss": 0.913, + "step": 21980 + }, + { + "epoch": 2.47, + "learning_rate": 5.333634839725958e-05, + "loss": 0.9628, + "step": 22000 + }, + { + "epoch": 2.47, + "learning_rate": 5.3263726079131194e-05, + "loss": 0.9603, + "step": 22020 + }, + { + "epoch": 2.48, + "learning_rate": 5.319109684572118e-05, + "loss": 0.9116, + "step": 22040 + }, + { + "epoch": 2.48, + "learning_rate": 5.311846085091856e-05, + "loss": 0.9344, + "step": 22060 + }, + { + "epoch": 2.48, + "learning_rate": 5.3045818248626676e-05, + "loss": 0.9263, + "step": 22080 + }, + { + "epoch": 2.48, + "learning_rate": 5.29731691927629e-05, + "loss": 0.9622, + "step": 22100 + }, + { + "epoch": 2.49, + "learning_rate": 5.29005138372582e-05, + "loss": 0.9481, + "step": 22120 + }, + { + "epoch": 2.49, + "learning_rate": 5.282785233605698e-05, + "loss": 0.9256, + "step": 22140 + }, + { + "epoch": 2.49, + "learning_rate": 5.2755184843116635e-05, + "loss": 0.9808, + "step": 22160 + }, + { + "epoch": 2.49, + "learning_rate": 5.268251151240722e-05, + "loss": 0.968, + "step": 22180 + }, + { + "epoch": 2.49, + "learning_rate": 5.2609832497911215e-05, + "loss": 0.95, + "step": 22200 + }, + { + "epoch": 2.5, + "learning_rate": 5.253714795362309e-05, + "loss": 0.9662, + "step": 22220 + }, + { + "epoch": 2.5, + "learning_rate": 5.246445803354907e-05, + "loss": 0.9352, + "step": 22240 + }, + { + "epoch": 2.5, + "learning_rate": 5.2391762891706764e-05, + "loss": 0.9437, + "step": 22260 + }, + { + "epoch": 2.5, + "learning_rate": 5.231906268212483e-05, + "loss": 0.9409, + "step": 22280 + }, + { + "epoch": 2.51, + "learning_rate": 5.224635755884268e-05, + "loss": 0.9487, + "step": 22300 + }, + { + "epoch": 2.51, + "learning_rate": 5.217364767591014e-05, + "loss": 0.9401, + "step": 22320 + }, + { + "epoch": 2.51, + "learning_rate": 5.210093318738709e-05, + "loss": 0.952, + "step": 22340 + }, + { + "epoch": 2.51, + "learning_rate": 5.20282142473432e-05, + "loss": 0.9752, + "step": 22360 + }, + { + "epoch": 2.51, + "learning_rate": 5.195549100985756e-05, + "loss": 0.9655, + "step": 22380 + }, + { + "epoch": 2.52, + "learning_rate": 5.188276362901836e-05, + "loss": 0.9752, + "step": 22400 + }, + { + "epoch": 2.52, + "learning_rate": 5.1810032258922605e-05, + "loss": 0.9632, + "step": 22420 + }, + { + "epoch": 2.52, + "learning_rate": 5.173729705367568e-05, + "loss": 0.9166, + "step": 22440 + }, + { + "epoch": 2.52, + "learning_rate": 5.166455816739118e-05, + "loss": 0.9433, + "step": 22460 + }, + { + "epoch": 2.53, + "learning_rate": 5.159181575419043e-05, + "loss": 0.9459, + "step": 22480 + }, + { + "epoch": 2.53, + "learning_rate": 5.151906996820227e-05, + "loss": 0.9316, + "step": 22500 + }, + { + "epoch": 2.53, + "learning_rate": 5.144632096356269e-05, + "loss": 0.945, + "step": 22520 + }, + { + "epoch": 2.53, + "learning_rate": 5.137356889441444e-05, + "loss": 0.9192, + "step": 22540 + }, + { + "epoch": 2.53, + "learning_rate": 5.1300813914906853e-05, + "loss": 0.9338, + "step": 22560 + }, + { + "epoch": 2.54, + "learning_rate": 5.122805617919536e-05, + "loss": 0.9607, + "step": 22580 + }, + { + "epoch": 2.54, + "learning_rate": 5.115529584144125e-05, + "loss": 0.9123, + "step": 22600 + }, + { + "epoch": 2.54, + "learning_rate": 5.108253305581134e-05, + "loss": 0.9547, + "step": 22620 + }, + { + "epoch": 2.54, + "learning_rate": 5.100976797647761e-05, + "loss": 0.9258, + "step": 22640 + }, + { + "epoch": 2.55, + "learning_rate": 5.0937000757616934e-05, + "loss": 0.9028, + "step": 22660 + }, + { + "epoch": 2.55, + "learning_rate": 5.086423155341068e-05, + "loss": 0.9422, + "step": 22680 + }, + { + "epoch": 2.55, + "learning_rate": 5.079146051804444e-05, + "loss": 0.9331, + "step": 22700 + }, + { + "epoch": 2.55, + "learning_rate": 5.071868780570772e-05, + "loss": 0.9227, + "step": 22720 + }, + { + "epoch": 2.56, + "learning_rate": 5.0645913570593484e-05, + "loss": 0.9301, + "step": 22740 + }, + { + "epoch": 2.56, + "learning_rate": 5.057313796689804e-05, + "loss": 0.9385, + "step": 22760 + }, + { + "epoch": 2.56, + "learning_rate": 5.050036114882052e-05, + "loss": 0.9192, + "step": 22780 + }, + { + "epoch": 2.56, + "learning_rate": 5.042758327056265e-05, + "loss": 0.9302, + "step": 22800 + }, + { + "epoch": 2.56, + "learning_rate": 5.03548044863284e-05, + "loss": 0.9305, + "step": 22820 + }, + { + "epoch": 2.57, + "learning_rate": 5.028202495032366e-05, + "loss": 0.9267, + "step": 22840 + }, + { + "epoch": 2.57, + "learning_rate": 5.020924481675593e-05, + "loss": 0.9608, + "step": 22860 + }, + { + "epoch": 2.57, + "learning_rate": 5.013646423983392e-05, + "loss": 0.9333, + "step": 22880 + }, + { + "epoch": 2.57, + "learning_rate": 5.006368337376737e-05, + "loss": 0.9643, + "step": 22900 + }, + { + "epoch": 2.58, + "learning_rate": 4.999090237276657e-05, + "loss": 0.9209, + "step": 22920 + }, + { + "epoch": 2.58, + "learning_rate": 4.991812139104207e-05, + "loss": 0.9363, + "step": 22940 + }, + { + "epoch": 2.58, + "learning_rate": 4.984534058280445e-05, + "loss": 0.9439, + "step": 22960 + }, + { + "epoch": 2.58, + "learning_rate": 4.97725601022639e-05, + "loss": 0.9514, + "step": 22980 + }, + { + "epoch": 2.58, + "learning_rate": 4.969978010362989e-05, + "loss": 0.9453, + "step": 23000 + }, + { + "epoch": 2.59, + "learning_rate": 4.9627000741110865e-05, + "loss": 0.9394, + "step": 23020 + }, + { + "epoch": 2.59, + "learning_rate": 4.955422216891397e-05, + "loss": 0.9316, + "step": 23040 + }, + { + "epoch": 2.59, + "learning_rate": 4.9481444541244665e-05, + "loss": 0.9088, + "step": 23060 + }, + { + "epoch": 2.59, + "learning_rate": 4.9408668012306344e-05, + "loss": 0.9212, + "step": 23080 + }, + { + "epoch": 2.6, + "learning_rate": 4.933589273630013e-05, + "loss": 0.9114, + "step": 23100 + }, + { + "epoch": 2.6, + "learning_rate": 4.9263118867424515e-05, + "loss": 0.9269, + "step": 23120 + }, + { + "epoch": 2.6, + "learning_rate": 4.919034655987493e-05, + "loss": 0.9383, + "step": 23140 + }, + { + "epoch": 2.6, + "learning_rate": 4.911757596784357e-05, + "loss": 0.9495, + "step": 23160 + }, + { + "epoch": 2.6, + "learning_rate": 4.904480724551897e-05, + "loss": 0.9556, + "step": 23180 + }, + { + "epoch": 2.61, + "learning_rate": 4.89720405470857e-05, + "loss": 0.9101, + "step": 23200 + }, + { + "epoch": 2.61, + "learning_rate": 4.8899276026724034e-05, + "loss": 0.9385, + "step": 23220 + }, + { + "epoch": 2.61, + "learning_rate": 4.882651383860963e-05, + "loss": 0.9146, + "step": 23240 + }, + { + "epoch": 2.61, + "learning_rate": 4.875375413691327e-05, + "loss": 0.8875, + "step": 23260 + }, + { + "epoch": 2.62, + "learning_rate": 4.868099707580035e-05, + "loss": 0.9435, + "step": 23280 + }, + { + "epoch": 2.62, + "learning_rate": 4.8608242809430744e-05, + "loss": 0.9215, + "step": 23300 + }, + { + "epoch": 2.62, + "learning_rate": 4.8535491491958415e-05, + "loss": 0.9206, + "step": 23320 + }, + { + "epoch": 2.62, + "learning_rate": 4.846274327753107e-05, + "loss": 0.9159, + "step": 23340 + }, + { + "epoch": 2.62, + "learning_rate": 4.8389998320289785e-05, + "loss": 0.9272, + "step": 23360 + }, + { + "epoch": 2.63, + "learning_rate": 4.8317256774368815e-05, + "loss": 0.9164, + "step": 23380 + }, + { + "epoch": 2.63, + "learning_rate": 4.824451879389513e-05, + "loss": 0.9521, + "step": 23400 + }, + { + "epoch": 2.63, + "learning_rate": 4.8171784532988165e-05, + "loss": 0.9555, + "step": 23420 + }, + { + "epoch": 2.63, + "learning_rate": 4.809905414575947e-05, + "loss": 0.9317, + "step": 23440 + }, + { + "epoch": 2.64, + "learning_rate": 4.802632778631241e-05, + "loss": 0.9336, + "step": 23460 + }, + { + "epoch": 2.64, + "learning_rate": 4.795360560874181e-05, + "loss": 0.9299, + "step": 23480 + }, + { + "epoch": 2.64, + "learning_rate": 4.7880887767133565e-05, + "loss": 0.9365, + "step": 23500 + }, + { + "epoch": 2.64, + "learning_rate": 4.7808174415564484e-05, + "loss": 0.9178, + "step": 23520 + }, + { + "epoch": 2.64, + "learning_rate": 4.773546570810182e-05, + "loss": 0.931, + "step": 23540 + }, + { + "epoch": 2.65, + "learning_rate": 4.766276179880296e-05, + "loss": 0.9326, + "step": 23560 + }, + { + "epoch": 2.65, + "learning_rate": 4.759006284171515e-05, + "loss": 0.9195, + "step": 23580 + }, + { + "epoch": 2.65, + "learning_rate": 4.7517368990875146e-05, + "loss": 0.9536, + "step": 23600 + }, + { + "epoch": 2.65, + "learning_rate": 4.744468040030891e-05, + "loss": 0.9326, + "step": 23620 + }, + { + "epoch": 2.66, + "learning_rate": 4.737199722403117e-05, + "loss": 0.9336, + "step": 23640 + }, + { + "epoch": 2.66, + "learning_rate": 4.729931961604529e-05, + "loss": 0.9292, + "step": 23660 + }, + { + "epoch": 2.66, + "learning_rate": 4.722664773034278e-05, + "loss": 0.9481, + "step": 23680 + }, + { + "epoch": 2.66, + "learning_rate": 4.7153981720902997e-05, + "loss": 0.9314, + "step": 23700 + }, + { + "epoch": 2.67, + "learning_rate": 4.7081321741692904e-05, + "loss": 0.9054, + "step": 23720 + }, + { + "epoch": 2.67, + "learning_rate": 4.7008667946666674e-05, + "loss": 0.946, + "step": 23740 + }, + { + "epoch": 2.67, + "learning_rate": 4.693602048976537e-05, + "loss": 0.9389, + "step": 23760 + }, + { + "epoch": 2.67, + "learning_rate": 4.686337952491659e-05, + "loss": 0.9484, + "step": 23780 + }, + { + "epoch": 2.67, + "learning_rate": 4.679074520603423e-05, + "loss": 0.9318, + "step": 23800 + }, + { + "epoch": 2.68, + "learning_rate": 4.671811768701811e-05, + "loss": 0.9421, + "step": 23820 + }, + { + "epoch": 2.68, + "learning_rate": 4.6645497121753564e-05, + "loss": 0.9526, + "step": 23840 + }, + { + "epoch": 2.68, + "learning_rate": 4.657288366411127e-05, + "loss": 0.9352, + "step": 23860 + }, + { + "epoch": 2.68, + "learning_rate": 4.650027746794686e-05, + "loss": 0.9146, + "step": 23880 + }, + { + "epoch": 2.69, + "learning_rate": 4.642767868710045e-05, + "loss": 0.9241, + "step": 23900 + }, + { + "epoch": 2.69, + "learning_rate": 4.635508747539661e-05, + "loss": 0.9173, + "step": 23920 + }, + { + "epoch": 2.69, + "learning_rate": 4.6282503986643775e-05, + "loss": 0.9367, + "step": 23940 + }, + { + "epoch": 2.69, + "learning_rate": 4.6209928374634036e-05, + "loss": 0.939, + "step": 23960 + }, + { + "epoch": 2.69, + "learning_rate": 4.6137360793142794e-05, + "loss": 0.9138, + "step": 23980 + }, + { + "epoch": 2.7, + "learning_rate": 4.606480139592843e-05, + "loss": 0.9526, + "step": 24000 + }, + { + "epoch": 2.7, + "learning_rate": 4.599225033673203e-05, + "loss": 0.9391, + "step": 24020 + }, + { + "epoch": 2.7, + "learning_rate": 4.591970776927692e-05, + "loss": 0.9484, + "step": 24040 + }, + { + "epoch": 2.7, + "learning_rate": 4.584717384726853e-05, + "loss": 0.9413, + "step": 24060 + }, + { + "epoch": 2.71, + "learning_rate": 4.577464872439391e-05, + "loss": 0.9497, + "step": 24080 + }, + { + "epoch": 2.71, + "learning_rate": 4.57021325543215e-05, + "loss": 0.9306, + "step": 24100 + }, + { + "epoch": 2.71, + "learning_rate": 4.562962549070074e-05, + "loss": 0.9218, + "step": 24120 + }, + { + "epoch": 2.71, + "learning_rate": 4.555712768716179e-05, + "loss": 0.9342, + "step": 24140 + }, + { + "epoch": 2.71, + "learning_rate": 4.548463929731522e-05, + "loss": 0.9354, + "step": 24160 + }, + { + "epoch": 2.72, + "learning_rate": 4.5412160474751595e-05, + "loss": 0.9286, + "step": 24180 + }, + { + "epoch": 2.72, + "learning_rate": 4.5339691373041236e-05, + "loss": 0.9458, + "step": 24200 + }, + { + "epoch": 2.72, + "learning_rate": 4.526723214573389e-05, + "loss": 0.956, + "step": 24220 + }, + { + "epoch": 2.72, + "learning_rate": 4.519478294635837e-05, + "loss": 0.9322, + "step": 24240 + }, + { + "epoch": 2.73, + "learning_rate": 4.51223439284222e-05, + "loss": 0.9161, + "step": 24260 + }, + { + "epoch": 2.73, + "learning_rate": 4.504991524541138e-05, + "loss": 0.9273, + "step": 24280 + }, + { + "epoch": 2.73, + "learning_rate": 4.497749705079001e-05, + "loss": 0.9667, + "step": 24300 + }, + { + "epoch": 2.73, + "learning_rate": 4.490508949799993e-05, + "loss": 0.9419, + "step": 24320 + }, + { + "epoch": 2.73, + "learning_rate": 4.483269274046046e-05, + "loss": 0.9533, + "step": 24340 + }, + { + "epoch": 2.74, + "learning_rate": 4.4760306931568044e-05, + "loss": 0.9396, + "step": 24360 + }, + { + "epoch": 2.74, + "learning_rate": 4.468793222469596e-05, + "loss": 0.917, + "step": 24380 + }, + { + "epoch": 2.74, + "learning_rate": 4.461556877319385e-05, + "loss": 0.9475, + "step": 24400 + }, + { + "epoch": 2.74, + "learning_rate": 4.454321673038766e-05, + "loss": 0.9314, + "step": 24420 + }, + { + "epoch": 2.75, + "learning_rate": 4.447087624957906e-05, + "loss": 0.9221, + "step": 24440 + }, + { + "epoch": 2.75, + "learning_rate": 4.4398547484045245e-05, + "loss": 0.9088, + "step": 24460 + }, + { + "epoch": 2.75, + "learning_rate": 4.4326230587038594e-05, + "loss": 0.9398, + "step": 24480 + }, + { + "epoch": 2.75, + "learning_rate": 4.425392571178635e-05, + "loss": 0.9181, + "step": 24500 + }, + { + "epoch": 2.76, + "learning_rate": 4.418163301149027e-05, + "loss": 0.9506, + "step": 24520 + }, + { + "epoch": 2.76, + "learning_rate": 4.41093526393263e-05, + "loss": 0.9425, + "step": 24540 + }, + { + "epoch": 2.76, + "learning_rate": 4.4037084748444284e-05, + "loss": 0.931, + "step": 24560 + }, + { + "epoch": 2.76, + "learning_rate": 4.3964829491967655e-05, + "loss": 0.9209, + "step": 24580 + }, + { + "epoch": 2.76, + "learning_rate": 4.389258702299298e-05, + "loss": 0.9217, + "step": 24600 + }, + { + "epoch": 2.77, + "learning_rate": 4.3820357494589816e-05, + "loss": 0.9193, + "step": 24620 + }, + { + "epoch": 2.77, + "learning_rate": 4.3748141059800276e-05, + "loss": 0.9129, + "step": 24640 + }, + { + "epoch": 2.77, + "learning_rate": 4.367593787163875e-05, + "loss": 0.9262, + "step": 24660 + }, + { + "epoch": 2.77, + "learning_rate": 4.3603748083091495e-05, + "loss": 0.9432, + "step": 24680 + }, + { + "epoch": 2.78, + "learning_rate": 4.353157184711645e-05, + "loss": 0.942, + "step": 24700 + }, + { + "epoch": 2.78, + "learning_rate": 4.34594093166428e-05, + "loss": 0.94, + "step": 24720 + }, + { + "epoch": 2.78, + "learning_rate": 4.33872606445707e-05, + "loss": 0.9462, + "step": 24740 + }, + { + "epoch": 2.78, + "learning_rate": 4.331512598377092e-05, + "loss": 0.9453, + "step": 24760 + }, + { + "epoch": 2.78, + "learning_rate": 4.3243005487084595e-05, + "loss": 0.9504, + "step": 24780 + }, + { + "epoch": 2.79, + "learning_rate": 4.3170899307322826e-05, + "loss": 0.9104, + "step": 24800 + }, + { + "epoch": 2.79, + "learning_rate": 4.309880759726633e-05, + "loss": 0.9403, + "step": 24820 + }, + { + "epoch": 2.79, + "learning_rate": 4.302673050966523e-05, + "loss": 0.9555, + "step": 24840 + }, + { + "epoch": 2.79, + "learning_rate": 4.295466819723864e-05, + "loss": 0.9586, + "step": 24860 + }, + { + "epoch": 2.8, + "learning_rate": 4.288262081267435e-05, + "loss": 0.9811, + "step": 24880 + }, + { + "epoch": 2.8, + "learning_rate": 4.281058850862856e-05, + "loss": 0.929, + "step": 24900 + }, + { + "epoch": 2.8, + "learning_rate": 4.27385714377255e-05, + "loss": 0.9444, + "step": 24920 + }, + { + "epoch": 2.8, + "learning_rate": 4.266656975255709e-05, + "loss": 0.9514, + "step": 24940 + }, + { + "epoch": 2.8, + "learning_rate": 4.259458360568271e-05, + "loss": 0.9328, + "step": 24960 + }, + { + "epoch": 2.81, + "learning_rate": 4.252261314962878e-05, + "loss": 0.9218, + "step": 24980 + }, + { + "epoch": 2.81, + "learning_rate": 4.245065853688848e-05, + "loss": 0.9279, + "step": 25000 + }, + { + "epoch": 2.81, + "learning_rate": 4.237871991992142e-05, + "loss": 0.9317, + "step": 25020 + }, + { + "epoch": 2.81, + "learning_rate": 4.2306797451153314e-05, + "loss": 0.9481, + "step": 25040 + }, + { + "epoch": 2.82, + "learning_rate": 4.223489128297568e-05, + "loss": 0.9363, + "step": 25060 + }, + { + "epoch": 2.82, + "learning_rate": 4.216300156774548e-05, + "loss": 0.9445, + "step": 25080 + }, + { + "epoch": 2.82, + "learning_rate": 4.209112845778481e-05, + "loss": 0.917, + "step": 25100 + }, + { + "epoch": 2.82, + "learning_rate": 4.201927210538058e-05, + "loss": 0.9566, + "step": 25120 + }, + { + "epoch": 2.82, + "learning_rate": 4.194743266278426e-05, + "loss": 0.9185, + "step": 25140 + }, + { + "epoch": 2.83, + "learning_rate": 4.1875610282211364e-05, + "loss": 0.9006, + "step": 25160 + }, + { + "epoch": 2.83, + "learning_rate": 4.1803805115841366e-05, + "loss": 0.92, + "step": 25180 + }, + { + "epoch": 2.83, + "learning_rate": 4.173201731581724e-05, + "loss": 0.9291, + "step": 25200 + }, + { + "epoch": 2.83, + "learning_rate": 4.166024703424511e-05, + "loss": 0.9299, + "step": 25220 + }, + { + "epoch": 2.84, + "learning_rate": 4.1588494423194046e-05, + "loss": 0.9074, + "step": 25240 + }, + { + "epoch": 2.84, + "learning_rate": 4.151675963469565e-05, + "loss": 0.9473, + "step": 25260 + }, + { + "epoch": 2.84, + "learning_rate": 4.1445042820743764e-05, + "loss": 0.9326, + "step": 25280 + }, + { + "epoch": 2.84, + "learning_rate": 4.137334413329414e-05, + "loss": 0.9276, + "step": 25300 + }, + { + "epoch": 2.84, + "learning_rate": 4.130166372426412e-05, + "loss": 0.9333, + "step": 25320 + }, + { + "epoch": 2.85, + "learning_rate": 4.123000174553235e-05, + "loss": 0.9269, + "step": 25340 + }, + { + "epoch": 2.85, + "learning_rate": 4.1158358348938374e-05, + "loss": 0.9264, + "step": 25360 + }, + { + "epoch": 2.85, + "learning_rate": 4.1086733686282395e-05, + "loss": 0.9503, + "step": 25380 + }, + { + "epoch": 2.85, + "learning_rate": 4.1015127909324936e-05, + "loss": 0.9071, + "step": 25400 + }, + { + "epoch": 2.86, + "learning_rate": 4.094354116978647e-05, + "loss": 0.9654, + "step": 25420 + }, + { + "epoch": 2.86, + "learning_rate": 4.087197361934714e-05, + "loss": 0.949, + "step": 25440 + }, + { + "epoch": 2.86, + "learning_rate": 4.0800425409646456e-05, + "loss": 0.9102, + "step": 25460 + }, + { + "epoch": 2.86, + "learning_rate": 4.0728896692282926e-05, + "loss": 0.943, + "step": 25480 + }, + { + "epoch": 2.87, + "learning_rate": 4.065738761881375e-05, + "loss": 0.9392, + "step": 25500 + }, + { + "epoch": 2.87, + "learning_rate": 4.0585898340754506e-05, + "loss": 0.9151, + "step": 25520 + }, + { + "epoch": 2.87, + "learning_rate": 4.051442900957888e-05, + "loss": 0.937, + "step": 25540 + }, + { + "epoch": 2.87, + "learning_rate": 4.0442979776718237e-05, + "loss": 0.9283, + "step": 25560 + }, + { + "epoch": 2.87, + "learning_rate": 4.037155079356137e-05, + "loss": 0.92, + "step": 25580 + }, + { + "epoch": 2.88, + "learning_rate": 4.030014221145417e-05, + "loss": 0.9313, + "step": 25600 + }, + { + "epoch": 2.88, + "learning_rate": 4.022875418169931e-05, + "loss": 0.9367, + "step": 25620 + }, + { + "epoch": 2.88, + "learning_rate": 4.0157386855555906e-05, + "loss": 0.9642, + "step": 25640 + }, + { + "epoch": 2.88, + "learning_rate": 4.00860403842392e-05, + "loss": 0.9027, + "step": 25660 + }, + { + "epoch": 2.89, + "learning_rate": 4.001471491892026e-05, + "loss": 0.9215, + "step": 25680 + }, + { + "epoch": 2.89, + "learning_rate": 3.9943410610725665e-05, + "loss": 0.9546, + "step": 25700 + }, + { + "epoch": 2.89, + "learning_rate": 3.9872127610737095e-05, + "loss": 0.909, + "step": 25720 + }, + { + "epoch": 2.89, + "learning_rate": 3.9800866069991173e-05, + "loss": 0.9495, + "step": 25740 + }, + { + "epoch": 2.89, + "learning_rate": 3.9729626139478995e-05, + "loss": 0.9311, + "step": 25760 + }, + { + "epoch": 2.9, + "learning_rate": 3.965840797014586e-05, + "loss": 0.9387, + "step": 25780 + }, + { + "epoch": 2.9, + "learning_rate": 3.9587211712891005e-05, + "loss": 0.919, + "step": 25800 + }, + { + "epoch": 2.9, + "learning_rate": 3.9516037518567204e-05, + "loss": 0.938, + "step": 25820 + }, + { + "epoch": 2.9, + "learning_rate": 3.9444885537980526e-05, + "loss": 0.929, + "step": 25840 + }, + { + "epoch": 2.91, + "learning_rate": 3.9373755921889886e-05, + "loss": 0.9502, + "step": 25860 + }, + { + "epoch": 2.91, + "learning_rate": 3.93026488210069e-05, + "loss": 0.9294, + "step": 25880 + }, + { + "epoch": 2.91, + "learning_rate": 3.9231564385995476e-05, + "loss": 0.9643, + "step": 25900 + }, + { + "epoch": 2.91, + "learning_rate": 3.91605027674714e-05, + "loss": 0.9125, + "step": 25920 + }, + { + "epoch": 2.91, + "learning_rate": 3.908946411600222e-05, + "loss": 0.9356, + "step": 25940 + }, + { + "epoch": 2.92, + "learning_rate": 3.9018448582106795e-05, + "loss": 0.9298, + "step": 25960 + }, + { + "epoch": 2.92, + "learning_rate": 3.894745631625495e-05, + "loss": 0.9532, + "step": 25980 + }, + { + "epoch": 2.92, + "learning_rate": 3.887648746886727e-05, + "loss": 0.9326, + "step": 26000 + }, + { + "epoch": 2.92, + "learning_rate": 3.8805542190314705e-05, + "loss": 0.9003, + "step": 26020 + }, + { + "epoch": 2.93, + "learning_rate": 3.873462063091825e-05, + "loss": 0.9361, + "step": 26040 + }, + { + "epoch": 2.93, + "learning_rate": 3.866372294094864e-05, + "loss": 0.9186, + "step": 26060 + }, + { + "epoch": 2.93, + "learning_rate": 3.859284927062604e-05, + "loss": 0.9442, + "step": 26080 + }, + { + "epoch": 2.93, + "learning_rate": 3.8521999770119786e-05, + "loss": 0.929, + "step": 26100 + }, + { + "epoch": 2.93, + "learning_rate": 3.845117458954787e-05, + "loss": 0.9241, + "step": 26120 + }, + { + "epoch": 2.94, + "learning_rate": 3.838037387897688e-05, + "loss": 0.9246, + "step": 26140 + }, + { + "epoch": 2.94, + "learning_rate": 3.8309597788421474e-05, + "loss": 0.9379, + "step": 26160 + }, + { + "epoch": 2.94, + "learning_rate": 3.823884646784421e-05, + "loss": 0.9221, + "step": 26180 + }, + { + "epoch": 2.94, + "learning_rate": 3.8168120067155096e-05, + "loss": 0.9069, + "step": 26200 + }, + { + "epoch": 2.95, + "learning_rate": 3.809741873621138e-05, + "loss": 0.9144, + "step": 26220 + }, + { + "epoch": 2.95, + "learning_rate": 3.802674262481719e-05, + "loss": 0.9446, + "step": 26240 + }, + { + "epoch": 2.95, + "learning_rate": 3.79560918827232e-05, + "loss": 0.972, + "step": 26260 + }, + { + "epoch": 2.95, + "learning_rate": 3.7885466659626334e-05, + "loss": 0.9395, + "step": 26280 + }, + { + "epoch": 2.96, + "learning_rate": 3.781486710516948e-05, + "loss": 0.9471, + "step": 26300 + }, + { + "epoch": 2.96, + "learning_rate": 3.77442933689411e-05, + "loss": 0.9502, + "step": 26320 + }, + { + "epoch": 2.96, + "learning_rate": 3.767374560047495e-05, + "loss": 0.9296, + "step": 26340 + }, + { + "epoch": 2.96, + "learning_rate": 3.760322394924979e-05, + "loss": 0.9643, + "step": 26360 + }, + { + "epoch": 2.96, + "learning_rate": 3.753272856468903e-05, + "loss": 0.929, + "step": 26380 + }, + { + "epoch": 2.97, + "learning_rate": 3.746225959616042e-05, + "loss": 0.9233, + "step": 26400 + }, + { + "epoch": 2.97, + "learning_rate": 3.7391817192975745e-05, + "loss": 0.9169, + "step": 26420 + }, + { + "epoch": 2.97, + "learning_rate": 3.732140150439048e-05, + "loss": 0.891, + "step": 26440 + }, + { + "epoch": 2.97, + "learning_rate": 3.725101267960359e-05, + "loss": 0.9535, + "step": 26460 + }, + { + "epoch": 2.98, + "learning_rate": 3.718065086775695e-05, + "loss": 0.9363, + "step": 26480 + }, + { + "epoch": 2.98, + "learning_rate": 3.7110316217935357e-05, + "loss": 0.964, + "step": 26500 + }, + { + "epoch": 2.98, + "learning_rate": 3.7040008879166e-05, + "loss": 0.9386, + "step": 26520 + }, + { + "epoch": 2.98, + "learning_rate": 3.696972900041816e-05, + "loss": 0.9268, + "step": 26540 + }, + { + "epoch": 2.98, + "learning_rate": 3.6899476730603e-05, + "loss": 0.9139, + "step": 26560 + }, + { + "epoch": 2.99, + "learning_rate": 3.682925221857315e-05, + "loss": 0.9274, + "step": 26580 + }, + { + "epoch": 2.99, + "learning_rate": 3.675905561312244e-05, + "loss": 0.9195, + "step": 26600 + }, + { + "epoch": 2.99, + "learning_rate": 3.668888706298554e-05, + "loss": 0.9289, + "step": 26620 + }, + { + "epoch": 2.99, + "learning_rate": 3.66187467168377e-05, + "loss": 0.9429, + "step": 26640 + }, + { + "epoch": 3.0, + "learning_rate": 3.654863472329445e-05, + "loss": 0.9156, + "step": 26660 + }, + { + "epoch": 3.0, + "learning_rate": 3.647855123091115e-05, + "loss": 0.9355, + "step": 26680 + }, + { + "epoch": 3.0, + "learning_rate": 3.640849638818286e-05, + "loss": 0.9313, + "step": 26700 + }, + { + "epoch": 3.0, + "learning_rate": 3.633847034354389e-05, + "loss": 0.86, + "step": 26720 + }, + { + "epoch": 3.0, + "learning_rate": 3.626847324536755e-05, + "loss": 0.9101, + "step": 26740 + }, + { + "epoch": 3.01, + "learning_rate": 3.6198505241965806e-05, + "loss": 0.9232, + "step": 26760 + }, + { + "epoch": 3.01, + "learning_rate": 3.6128566481588977e-05, + "loss": 0.9325, + "step": 26780 + }, + { + "epoch": 3.01, + "learning_rate": 3.605865711242544e-05, + "loss": 0.9364, + "step": 26800 + }, + { + "epoch": 3.01, + "learning_rate": 3.598877728260127e-05, + "loss": 0.8978, + "step": 26820 + }, + { + "epoch": 3.02, + "learning_rate": 3.591892714017995e-05, + "loss": 0.892, + "step": 26840 + }, + { + "epoch": 3.02, + "learning_rate": 3.5849106833162124e-05, + "loss": 0.8974, + "step": 26860 + }, + { + "epoch": 3.02, + "learning_rate": 3.577931650948512e-05, + "loss": 0.8923, + "step": 26880 + }, + { + "epoch": 3.02, + "learning_rate": 3.5709556317022823e-05, + "loss": 0.9018, + "step": 26900 + }, + { + "epoch": 3.02, + "learning_rate": 3.563982640358523e-05, + "loss": 0.9101, + "step": 26920 + }, + { + "epoch": 3.03, + "learning_rate": 3.55701269169182e-05, + "loss": 0.8988, + "step": 26940 + }, + { + "epoch": 3.03, + "learning_rate": 3.550045800470311e-05, + "loss": 0.8896, + "step": 26960 + }, + { + "epoch": 3.03, + "learning_rate": 3.5430819814556544e-05, + "loss": 0.9021, + "step": 26980 + }, + { + "epoch": 3.03, + "learning_rate": 3.536121249403004e-05, + "loss": 0.8667, + "step": 27000 + }, + { + "epoch": 3.04, + "learning_rate": 3.5291636190609665e-05, + "loss": 0.8931, + "step": 27020 + }, + { + "epoch": 3.04, + "learning_rate": 3.52220910517158e-05, + "loss": 0.8927, + "step": 27040 + }, + { + "epoch": 3.04, + "learning_rate": 3.515257722470281e-05, + "loss": 0.9163, + "step": 27060 + }, + { + "epoch": 3.04, + "learning_rate": 3.50830948568587e-05, + "loss": 0.9169, + "step": 27080 + }, + { + "epoch": 3.04, + "learning_rate": 3.50136440954048e-05, + "loss": 0.9031, + "step": 27100 + }, + { + "epoch": 3.05, + "learning_rate": 3.494422508749547e-05, + "loss": 0.9213, + "step": 27120 + }, + { + "epoch": 3.05, + "learning_rate": 3.487483798021785e-05, + "loss": 0.9082, + "step": 27140 + }, + { + "epoch": 3.05, + "learning_rate": 3.480548292059139e-05, + "loss": 0.911, + "step": 27160 + }, + { + "epoch": 3.05, + "learning_rate": 3.473616005556773e-05, + "loss": 0.9075, + "step": 27180 + }, + { + "epoch": 3.06, + "learning_rate": 3.4666869532030224e-05, + "loss": 0.9072, + "step": 27200 + }, + { + "epoch": 3.06, + "learning_rate": 3.459761149679378e-05, + "loss": 0.9181, + "step": 27220 + }, + { + "epoch": 3.06, + "learning_rate": 3.4528386096604366e-05, + "loss": 0.908, + "step": 27240 + }, + { + "epoch": 3.06, + "learning_rate": 3.445919347813888e-05, + "loss": 0.8838, + "step": 27260 + }, + { + "epoch": 3.07, + "learning_rate": 3.439003378800475e-05, + "loss": 0.8977, + "step": 27280 + }, + { + "epoch": 3.07, + "learning_rate": 3.4320907172739594e-05, + "loss": 0.9024, + "step": 27300 + }, + { + "epoch": 3.07, + "learning_rate": 3.425181377881099e-05, + "loss": 0.9174, + "step": 27320 + }, + { + "epoch": 3.07, + "learning_rate": 3.4182753752616094e-05, + "loss": 0.916, + "step": 27340 + }, + { + "epoch": 3.07, + "learning_rate": 3.411372724048144e-05, + "loss": 0.9103, + "step": 27360 + }, + { + "epoch": 3.08, + "learning_rate": 3.4044734388662426e-05, + "loss": 0.8922, + "step": 27380 + }, + { + "epoch": 3.08, + "learning_rate": 3.3975775343343205e-05, + "loss": 0.8991, + "step": 27400 + }, + { + "epoch": 3.08, + "learning_rate": 3.390685025063633e-05, + "loss": 0.8822, + "step": 27420 + }, + { + "epoch": 3.08, + "learning_rate": 3.383795925658233e-05, + "loss": 0.9007, + "step": 27440 + }, + { + "epoch": 3.09, + "learning_rate": 3.376910250714955e-05, + "loss": 0.9058, + "step": 27460 + }, + { + "epoch": 3.09, + "learning_rate": 3.370028014823375e-05, + "loss": 0.9046, + "step": 27480 + }, + { + "epoch": 3.09, + "learning_rate": 3.363149232565785e-05, + "loss": 0.9123, + "step": 27500 + }, + { + "epoch": 3.09, + "learning_rate": 3.356273918517153e-05, + "loss": 0.8856, + "step": 27520 + }, + { + "epoch": 3.09, + "learning_rate": 3.349402087245104e-05, + "loss": 0.9146, + "step": 27540 + }, + { + "epoch": 3.1, + "learning_rate": 3.342533753309887e-05, + "loss": 0.9106, + "step": 27560 + }, + { + "epoch": 3.1, + "learning_rate": 3.335668931264327e-05, + "loss": 0.8902, + "step": 27580 + }, + { + "epoch": 3.1, + "learning_rate": 3.328807635653822e-05, + "loss": 0.8881, + "step": 27600 + }, + { + "epoch": 3.1, + "learning_rate": 3.321949881016293e-05, + "loss": 0.9172, + "step": 27620 + }, + { + "epoch": 3.11, + "learning_rate": 3.315095681882159e-05, + "loss": 0.9076, + "step": 27640 + }, + { + "epoch": 3.11, + "learning_rate": 3.3082450527743014e-05, + "loss": 0.8837, + "step": 27660 + }, + { + "epoch": 3.11, + "learning_rate": 3.301398008208042e-05, + "loss": 0.8725, + "step": 27680 + }, + { + "epoch": 3.11, + "learning_rate": 3.294554562691108e-05, + "loss": 0.9202, + "step": 27700 + }, + { + "epoch": 3.11, + "learning_rate": 3.287714730723596e-05, + "loss": 0.909, + "step": 27720 + }, + { + "epoch": 3.12, + "learning_rate": 3.280878526797948e-05, + "loss": 0.9094, + "step": 27740 + }, + { + "epoch": 3.12, + "learning_rate": 3.274045965398924e-05, + "loss": 0.8797, + "step": 27760 + }, + { + "epoch": 3.12, + "learning_rate": 3.267217061003562e-05, + "loss": 0.8962, + "step": 27780 + }, + { + "epoch": 3.12, + "learning_rate": 3.260391828081147e-05, + "loss": 0.8772, + "step": 27800 + }, + { + "epoch": 3.13, + "learning_rate": 3.253570281093192e-05, + "loss": 0.8907, + "step": 27820 + }, + { + "epoch": 3.13, + "learning_rate": 3.246752434493398e-05, + "loss": 0.8898, + "step": 27840 + }, + { + "epoch": 3.13, + "learning_rate": 3.239938302727622e-05, + "loss": 0.8992, + "step": 27860 + }, + { + "epoch": 3.13, + "learning_rate": 3.233127900233855e-05, + "loss": 0.8948, + "step": 27880 + }, + { + "epoch": 3.13, + "learning_rate": 3.2263212414421846e-05, + "loss": 0.9386, + "step": 27900 + }, + { + "epoch": 3.14, + "learning_rate": 3.219518340774763e-05, + "loss": 0.915, + "step": 27920 + }, + { + "epoch": 3.14, + "learning_rate": 3.2127192126457815e-05, + "loss": 0.9026, + "step": 27940 + }, + { + "epoch": 3.14, + "learning_rate": 3.205923871461442e-05, + "loss": 0.8793, + "step": 27960 + }, + { + "epoch": 3.14, + "learning_rate": 3.19913233161992e-05, + "loss": 0.9182, + "step": 27980 + }, + { + "epoch": 3.15, + "learning_rate": 3.192344607511329e-05, + "loss": 0.8803, + "step": 28000 + }, + { + "epoch": 3.15, + "learning_rate": 3.18556071351771e-05, + "loss": 0.872, + "step": 28020 + }, + { + "epoch": 3.15, + "learning_rate": 3.1787806640129826e-05, + "loss": 0.899, + "step": 28040 + }, + { + "epoch": 3.15, + "learning_rate": 3.1720044733629196e-05, + "loss": 0.9047, + "step": 28060 + }, + { + "epoch": 3.16, + "learning_rate": 3.165232155925118e-05, + "loss": 0.8979, + "step": 28080 + }, + { + "epoch": 3.16, + "learning_rate": 3.15846372604897e-05, + "loss": 0.8833, + "step": 28100 + }, + { + "epoch": 3.16, + "learning_rate": 3.151699198075633e-05, + "loss": 0.908, + "step": 28120 + }, + { + "epoch": 3.16, + "learning_rate": 3.1449385863379866e-05, + "loss": 0.8998, + "step": 28140 + }, + { + "epoch": 3.16, + "learning_rate": 3.138181905160625e-05, + "loss": 0.8975, + "step": 28160 + }, + { + "epoch": 3.17, + "learning_rate": 3.13142916885981e-05, + "loss": 0.8921, + "step": 28180 + }, + { + "epoch": 3.17, + "learning_rate": 3.124680391743438e-05, + "loss": 0.9263, + "step": 28200 + }, + { + "epoch": 3.17, + "learning_rate": 3.117935588111026e-05, + "loss": 0.9153, + "step": 28220 + }, + { + "epoch": 3.17, + "learning_rate": 3.111194772253668e-05, + "loss": 0.9274, + "step": 28240 + }, + { + "epoch": 3.18, + "learning_rate": 3.104457958454009e-05, + "loss": 0.9159, + "step": 28260 + }, + { + "epoch": 3.18, + "learning_rate": 3.097725160986212e-05, + "loss": 0.9314, + "step": 28280 + }, + { + "epoch": 3.18, + "learning_rate": 3.090996394115933e-05, + "loss": 0.9059, + "step": 28300 + }, + { + "epoch": 3.18, + "learning_rate": 3.0842716721002894e-05, + "loss": 0.9248, + "step": 28320 + }, + { + "epoch": 3.18, + "learning_rate": 3.077551009187821e-05, + "loss": 0.9125, + "step": 28340 + }, + { + "epoch": 3.19, + "learning_rate": 3.0708344196184756e-05, + "loss": 0.9084, + "step": 28360 + }, + { + "epoch": 3.19, + "learning_rate": 3.064121917623566e-05, + "loss": 0.9046, + "step": 28380 + }, + { + "epoch": 3.19, + "learning_rate": 3.0574135174257444e-05, + "loss": 0.8961, + "step": 28400 + }, + { + "epoch": 3.19, + "learning_rate": 3.050709233238972e-05, + "loss": 0.9001, + "step": 28420 + }, + { + "epoch": 3.2, + "learning_rate": 3.0440090792684884e-05, + "loss": 0.91, + "step": 28440 + }, + { + "epoch": 3.2, + "learning_rate": 3.037313069710784e-05, + "loss": 0.9225, + "step": 28460 + }, + { + "epoch": 3.2, + "learning_rate": 3.0306212187535653e-05, + "loss": 0.8991, + "step": 28480 + }, + { + "epoch": 3.2, + "learning_rate": 3.0239335405757275e-05, + "loss": 0.8773, + "step": 28500 + }, + { + "epoch": 3.2, + "learning_rate": 3.0172500493473294e-05, + "loss": 0.8922, + "step": 28520 + }, + { + "epoch": 3.21, + "learning_rate": 3.0105707592295528e-05, + "loss": 0.9035, + "step": 28540 + }, + { + "epoch": 3.21, + "learning_rate": 3.003895684374679e-05, + "loss": 0.9027, + "step": 28560 + }, + { + "epoch": 3.21, + "learning_rate": 2.9972248389260593e-05, + "loss": 0.9163, + "step": 28580 + }, + { + "epoch": 3.21, + "learning_rate": 2.9905582370180836e-05, + "loss": 0.909, + "step": 28600 + }, + { + "epoch": 3.22, + "learning_rate": 2.9838958927761477e-05, + "loss": 0.8952, + "step": 28620 + }, + { + "epoch": 3.22, + "learning_rate": 2.9772378203166307e-05, + "loss": 0.9269, + "step": 28640 + }, + { + "epoch": 3.22, + "learning_rate": 2.9705840337468554e-05, + "loss": 0.8917, + "step": 28660 + }, + { + "epoch": 3.22, + "learning_rate": 2.9639345471650716e-05, + "loss": 0.8882, + "step": 28680 + }, + { + "epoch": 3.22, + "learning_rate": 2.9572893746604052e-05, + "loss": 0.9008, + "step": 28700 + }, + { + "epoch": 3.23, + "learning_rate": 2.950648530312854e-05, + "loss": 0.9153, + "step": 28720 + }, + { + "epoch": 3.23, + "learning_rate": 2.9440120281932403e-05, + "loss": 0.8977, + "step": 28740 + }, + { + "epoch": 3.23, + "learning_rate": 2.937379882363183e-05, + "loss": 0.9006, + "step": 28760 + }, + { + "epoch": 3.23, + "learning_rate": 2.9307521068750748e-05, + "loss": 0.921, + "step": 28780 + }, + { + "epoch": 3.24, + "learning_rate": 2.924128715772047e-05, + "loss": 0.8782, + "step": 28800 + }, + { + "epoch": 3.24, + "learning_rate": 2.9175097230879423e-05, + "loss": 0.8994, + "step": 28820 + }, + { + "epoch": 3.24, + "learning_rate": 2.9108951428472804e-05, + "loss": 0.8945, + "step": 28840 + }, + { + "epoch": 3.24, + "learning_rate": 2.9042849890652352e-05, + "loss": 0.8867, + "step": 28860 + }, + { + "epoch": 3.24, + "learning_rate": 2.8976792757476013e-05, + "loss": 0.8793, + "step": 28880 + }, + { + "epoch": 3.25, + "learning_rate": 2.891078016890763e-05, + "loss": 0.9037, + "step": 28900 + }, + { + "epoch": 3.25, + "learning_rate": 2.8844812264816684e-05, + "loss": 0.9293, + "step": 28920 + }, + { + "epoch": 3.25, + "learning_rate": 2.8778889184977986e-05, + "loss": 0.8962, + "step": 28940 + }, + { + "epoch": 3.25, + "learning_rate": 2.8713011069071306e-05, + "loss": 0.886, + "step": 28960 + }, + { + "epoch": 3.26, + "learning_rate": 2.8647178056681194e-05, + "loss": 0.8791, + "step": 28980 + }, + { + "epoch": 3.26, + "learning_rate": 2.8581390287296672e-05, + "loss": 0.9162, + "step": 29000 + }, + { + "epoch": 3.26, + "learning_rate": 2.851564790031086e-05, + "loss": 0.9088, + "step": 29020 + }, + { + "epoch": 3.26, + "learning_rate": 2.8449951035020672e-05, + "loss": 0.9208, + "step": 29040 + }, + { + "epoch": 3.27, + "learning_rate": 2.8384299830626637e-05, + "loss": 0.8747, + "step": 29060 + }, + { + "epoch": 3.27, + "learning_rate": 2.8318694426232516e-05, + "loss": 0.8721, + "step": 29080 + }, + { + "epoch": 3.27, + "learning_rate": 2.825313496084503e-05, + "loss": 0.9281, + "step": 29100 + }, + { + "epoch": 3.27, + "learning_rate": 2.8187621573373544e-05, + "loss": 0.9045, + "step": 29120 + }, + { + "epoch": 3.27, + "learning_rate": 2.8122154402629818e-05, + "loss": 0.8925, + "step": 29140 + }, + { + "epoch": 3.28, + "learning_rate": 2.8056733587327694e-05, + "loss": 0.8958, + "step": 29160 + }, + { + "epoch": 3.28, + "learning_rate": 2.7991359266082717e-05, + "loss": 0.9155, + "step": 29180 + }, + { + "epoch": 3.28, + "learning_rate": 2.7926031577412038e-05, + "loss": 0.8971, + "step": 29200 + }, + { + "epoch": 3.28, + "learning_rate": 2.7860750659733938e-05, + "loss": 0.9249, + "step": 29220 + }, + { + "epoch": 3.29, + "learning_rate": 2.779551665136756e-05, + "loss": 0.9045, + "step": 29240 + }, + { + "epoch": 3.29, + "learning_rate": 2.773032969053273e-05, + "loss": 0.9207, + "step": 29260 + }, + { + "epoch": 3.29, + "learning_rate": 2.7665189915349533e-05, + "loss": 0.8938, + "step": 29280 + }, + { + "epoch": 3.29, + "learning_rate": 2.7600097463838114e-05, + "loss": 0.9088, + "step": 29300 + }, + { + "epoch": 3.29, + "learning_rate": 2.753505247391832e-05, + "loss": 0.9085, + "step": 29320 + }, + { + "epoch": 3.3, + "learning_rate": 2.7470055083409452e-05, + "loss": 0.8978, + "step": 29340 + }, + { + "epoch": 3.3, + "learning_rate": 2.740510543002996e-05, + "loss": 0.8963, + "step": 29360 + }, + { + "epoch": 3.3, + "learning_rate": 2.734020365139708e-05, + "loss": 0.9295, + "step": 29380 + }, + { + "epoch": 3.3, + "learning_rate": 2.727534988502673e-05, + "loss": 0.9076, + "step": 29400 + }, + { + "epoch": 3.31, + "learning_rate": 2.721054426833301e-05, + "loss": 0.9037, + "step": 29420 + }, + { + "epoch": 3.31, + "learning_rate": 2.7145786938628036e-05, + "loss": 0.8821, + "step": 29440 + }, + { + "epoch": 3.31, + "learning_rate": 2.7081078033121577e-05, + "loss": 0.8976, + "step": 29460 + }, + { + "epoch": 3.31, + "learning_rate": 2.7016417688920815e-05, + "loss": 0.9083, + "step": 29480 + }, + { + "epoch": 3.31, + "learning_rate": 2.695180604303007e-05, + "loss": 0.9198, + "step": 29500 + }, + { + "epoch": 3.32, + "learning_rate": 2.6887243232350434e-05, + "loss": 0.9052, + "step": 29520 + }, + { + "epoch": 3.32, + "learning_rate": 2.6822729393679558e-05, + "loss": 0.9033, + "step": 29540 + }, + { + "epoch": 3.32, + "learning_rate": 2.6758264663711306e-05, + "loss": 0.9058, + "step": 29560 + }, + { + "epoch": 3.32, + "learning_rate": 2.6693849179035513e-05, + "loss": 0.889, + "step": 29580 + }, + { + "epoch": 3.33, + "learning_rate": 2.662948307613764e-05, + "loss": 0.9118, + "step": 29600 + }, + { + "epoch": 3.33, + "learning_rate": 2.6565166491398553e-05, + "loss": 0.8994, + "step": 29620 + }, + { + "epoch": 3.33, + "learning_rate": 2.6500899561094184e-05, + "loss": 0.9048, + "step": 29640 + }, + { + "epoch": 3.33, + "learning_rate": 2.643668242139522e-05, + "loss": 0.9, + "step": 29660 + }, + { + "epoch": 3.33, + "learning_rate": 2.63725152083669e-05, + "loss": 0.9199, + "step": 29680 + }, + { + "epoch": 3.34, + "learning_rate": 2.630839805796863e-05, + "loss": 0.9024, + "step": 29700 + }, + { + "epoch": 3.34, + "learning_rate": 2.624433110605383e-05, + "loss": 0.9102, + "step": 29720 + }, + { + "epoch": 3.34, + "learning_rate": 2.6180314488369452e-05, + "loss": 0.8843, + "step": 29740 + }, + { + "epoch": 3.34, + "learning_rate": 2.611634834055585e-05, + "loss": 0.8905, + "step": 29760 + }, + { + "epoch": 3.35, + "learning_rate": 2.6052432798146436e-05, + "loss": 0.8834, + "step": 29780 + }, + { + "epoch": 3.35, + "learning_rate": 2.5988567996567402e-05, + "loss": 0.8973, + "step": 29800 + }, + { + "epoch": 3.35, + "learning_rate": 2.5924754071137415e-05, + "loss": 0.887, + "step": 29820 + }, + { + "epoch": 3.35, + "learning_rate": 2.5860991157067356e-05, + "loss": 0.9162, + "step": 29840 + }, + { + "epoch": 3.36, + "learning_rate": 2.5797279389460037e-05, + "loss": 0.8948, + "step": 29860 + }, + { + "epoch": 3.36, + "learning_rate": 2.5733618903309843e-05, + "loss": 0.897, + "step": 29880 + }, + { + "epoch": 3.36, + "learning_rate": 2.567000983350254e-05, + "loss": 0.875, + "step": 29900 + }, + { + "epoch": 3.36, + "learning_rate": 2.5606452314815e-05, + "loss": 0.9287, + "step": 29920 + }, + { + "epoch": 3.36, + "learning_rate": 2.554294648191477e-05, + "loss": 0.8996, + "step": 29940 + }, + { + "epoch": 3.37, + "learning_rate": 2.5479492469359944e-05, + "loss": 0.8989, + "step": 29960 + }, + { + "epoch": 3.37, + "learning_rate": 2.5416090411598813e-05, + "loss": 0.91, + "step": 29980 + }, + { + "epoch": 3.37, + "learning_rate": 2.535274044296957e-05, + "loss": 0.9104, + "step": 30000 + }, + { + "epoch": 3.37, + "learning_rate": 2.5289442697700043e-05, + "loss": 0.8902, + "step": 30020 + }, + { + "epoch": 3.38, + "learning_rate": 2.5226197309907418e-05, + "loss": 0.8907, + "step": 30040 + }, + { + "epoch": 3.38, + "learning_rate": 2.5163004413597955e-05, + "loss": 0.9099, + "step": 30060 + }, + { + "epoch": 3.38, + "learning_rate": 2.5099864142666642e-05, + "loss": 0.8979, + "step": 30080 + }, + { + "epoch": 3.38, + "learning_rate": 2.5036776630896985e-05, + "loss": 0.9008, + "step": 30100 + }, + { + "epoch": 3.38, + "learning_rate": 2.4973742011960775e-05, + "loss": 0.9105, + "step": 30120 + }, + { + "epoch": 3.39, + "learning_rate": 2.4910760419417616e-05, + "loss": 0.9075, + "step": 30140 + }, + { + "epoch": 3.39, + "learning_rate": 2.4847831986714837e-05, + "loss": 0.9141, + "step": 30160 + }, + { + "epoch": 3.39, + "learning_rate": 2.47849568471871e-05, + "loss": 0.9281, + "step": 30180 + }, + { + "epoch": 3.39, + "learning_rate": 2.472213513405615e-05, + "loss": 0.9085, + "step": 30200 + }, + { + "epoch": 3.4, + "learning_rate": 2.4659366980430547e-05, + "loss": 0.9308, + "step": 30220 + }, + { + "epoch": 3.4, + "learning_rate": 2.4596652519305346e-05, + "loss": 0.9147, + "step": 30240 + }, + { + "epoch": 3.4, + "learning_rate": 2.4533991883561868e-05, + "loss": 0.91, + "step": 30260 + }, + { + "epoch": 3.4, + "learning_rate": 2.4471385205967323e-05, + "loss": 0.8888, + "step": 30280 + }, + { + "epoch": 3.4, + "learning_rate": 2.4408832619174644e-05, + "loss": 0.894, + "step": 30300 + }, + { + "epoch": 3.41, + "learning_rate": 2.4346334255722168e-05, + "loss": 0.8859, + "step": 30320 + }, + { + "epoch": 3.41, + "learning_rate": 2.4283890248033337e-05, + "loss": 0.9136, + "step": 30340 + }, + { + "epoch": 3.41, + "learning_rate": 2.4221500728416356e-05, + "loss": 0.9353, + "step": 30360 + }, + { + "epoch": 3.41, + "learning_rate": 2.415916582906405e-05, + "loss": 0.9154, + "step": 30380 + }, + { + "epoch": 3.42, + "learning_rate": 2.409688568205349e-05, + "loss": 0.9022, + "step": 30400 + }, + { + "epoch": 3.42, + "learning_rate": 2.403466041934574e-05, + "loss": 0.9071, + "step": 30420 + }, + { + "epoch": 3.42, + "learning_rate": 2.3972490172785567e-05, + "loss": 0.8839, + "step": 30440 + }, + { + "epoch": 3.42, + "learning_rate": 2.3910375074101172e-05, + "loss": 0.9102, + "step": 30460 + }, + { + "epoch": 3.42, + "learning_rate": 2.3848315254903924e-05, + "loss": 0.9196, + "step": 30480 + }, + { + "epoch": 3.43, + "learning_rate": 2.3786310846688e-05, + "loss": 0.8863, + "step": 30500 + }, + { + "epoch": 3.43, + "learning_rate": 2.3724361980830257e-05, + "loss": 0.9043, + "step": 30520 + }, + { + "epoch": 3.43, + "learning_rate": 2.366246878858984e-05, + "loss": 0.8813, + "step": 30540 + }, + { + "epoch": 3.43, + "learning_rate": 2.3600631401107882e-05, + "loss": 0.9023, + "step": 30560 + }, + { + "epoch": 3.44, + "learning_rate": 2.353884994940732e-05, + "loss": 0.8872, + "step": 30580 + }, + { + "epoch": 3.44, + "learning_rate": 2.3477124564392572e-05, + "loss": 0.9056, + "step": 30600 + }, + { + "epoch": 3.44, + "learning_rate": 2.3415455376849248e-05, + "loss": 0.8708, + "step": 30620 + }, + { + "epoch": 3.44, + "learning_rate": 2.3353842517443898e-05, + "loss": 0.8727, + "step": 30640 + }, + { + "epoch": 3.44, + "learning_rate": 2.32922861167237e-05, + "loss": 0.8796, + "step": 30660 + }, + { + "epoch": 3.45, + "learning_rate": 2.3230786305116253e-05, + "loss": 0.9369, + "step": 30680 + }, + { + "epoch": 3.45, + "learning_rate": 2.316934321292915e-05, + "loss": 0.9044, + "step": 30700 + }, + { + "epoch": 3.45, + "learning_rate": 2.3107956970349942e-05, + "loss": 0.8945, + "step": 30720 + }, + { + "epoch": 3.45, + "learning_rate": 2.3046627707445635e-05, + "loss": 0.9097, + "step": 30740 + }, + { + "epoch": 3.46, + "learning_rate": 2.2985355554162546e-05, + "loss": 0.908, + "step": 30760 + }, + { + "epoch": 3.46, + "learning_rate": 2.292414064032593e-05, + "loss": 0.8604, + "step": 30780 + }, + { + "epoch": 3.46, + "learning_rate": 2.2862983095639823e-05, + "loss": 0.8698, + "step": 30800 + }, + { + "epoch": 3.46, + "learning_rate": 2.2801883049686678e-05, + "loss": 0.8969, + "step": 30820 + }, + { + "epoch": 3.47, + "learning_rate": 2.2740840631927118e-05, + "loss": 0.8927, + "step": 30840 + }, + { + "epoch": 3.47, + "learning_rate": 2.2679855971699676e-05, + "loss": 0.9017, + "step": 30860 + }, + { + "epoch": 3.47, + "learning_rate": 2.2618929198220513e-05, + "loss": 0.924, + "step": 30880 + }, + { + "epoch": 3.47, + "learning_rate": 2.2558060440583057e-05, + "loss": 0.8936, + "step": 30900 + }, + { + "epoch": 3.47, + "learning_rate": 2.2497249827757933e-05, + "loss": 0.9073, + "step": 30920 + }, + { + "epoch": 3.48, + "learning_rate": 2.2436497488592497e-05, + "loss": 0.9292, + "step": 30940 + }, + { + "epoch": 3.48, + "learning_rate": 2.2375803551810654e-05, + "loss": 0.9278, + "step": 30960 + }, + { + "epoch": 3.48, + "learning_rate": 2.2315168146012527e-05, + "loss": 0.8894, + "step": 30980 + }, + { + "epoch": 3.48, + "learning_rate": 2.225459139967426e-05, + "loss": 0.8936, + "step": 31000 + }, + { + "epoch": 3.49, + "learning_rate": 2.21940734411477e-05, + "loss": 0.9102, + "step": 31020 + }, + { + "epoch": 3.49, + "learning_rate": 2.213361439866013e-05, + "loss": 0.8849, + "step": 31040 + }, + { + "epoch": 3.49, + "learning_rate": 2.2073214400313997e-05, + "loss": 0.8884, + "step": 31060 + }, + { + "epoch": 3.49, + "learning_rate": 2.201287357408665e-05, + "loss": 0.8864, + "step": 31080 + }, + { + "epoch": 3.49, + "learning_rate": 2.1952592047830055e-05, + "loss": 0.94, + "step": 31100 + }, + { + "epoch": 3.5, + "learning_rate": 2.189236994927054e-05, + "loss": 0.8892, + "step": 31120 + }, + { + "epoch": 3.5, + "learning_rate": 2.1832207406008502e-05, + "loss": 0.8934, + "step": 31140 + }, + { + "epoch": 3.5, + "learning_rate": 2.1772104545518185e-05, + "loss": 0.8911, + "step": 31160 + }, + { + "epoch": 3.5, + "learning_rate": 2.171206149514731e-05, + "loss": 0.9029, + "step": 31180 + }, + { + "epoch": 3.51, + "learning_rate": 2.165207838211693e-05, + "loss": 0.9103, + "step": 31200 + }, + { + "epoch": 3.51, + "learning_rate": 2.159215533352106e-05, + "loss": 0.9166, + "step": 31220 + }, + { + "epoch": 3.51, + "learning_rate": 2.153229247632652e-05, + "loss": 0.8993, + "step": 31240 + }, + { + "epoch": 3.51, + "learning_rate": 2.14724899373725e-05, + "loss": 0.9114, + "step": 31260 + }, + { + "epoch": 3.51, + "learning_rate": 2.141274784337044e-05, + "loss": 0.9053, + "step": 31280 + }, + { + "epoch": 3.52, + "learning_rate": 2.1353066320903698e-05, + "loss": 0.8942, + "step": 31300 + }, + { + "epoch": 3.52, + "learning_rate": 2.1293445496427296e-05, + "loss": 0.8935, + "step": 31320 + }, + { + "epoch": 3.52, + "learning_rate": 2.1233885496267634e-05, + "loss": 0.8798, + "step": 31340 + }, + { + "epoch": 3.52, + "learning_rate": 2.117438644662226e-05, + "loss": 0.9204, + "step": 31360 + }, + { + "epoch": 3.53, + "learning_rate": 2.1114948473559554e-05, + "loss": 0.8907, + "step": 31380 + }, + { + "epoch": 3.53, + "learning_rate": 2.1055571703018474e-05, + "loss": 0.8935, + "step": 31400 + }, + { + "epoch": 3.53, + "learning_rate": 2.0996256260808316e-05, + "loss": 0.8761, + "step": 31420 + }, + { + "epoch": 3.53, + "learning_rate": 2.0937002272608493e-05, + "loss": 0.9049, + "step": 31440 + }, + { + "epoch": 3.53, + "learning_rate": 2.087780986396808e-05, + "loss": 0.9202, + "step": 31460 + }, + { + "epoch": 3.54, + "learning_rate": 2.0818679160305776e-05, + "loss": 0.8871, + "step": 31480 + }, + { + "epoch": 3.54, + "learning_rate": 2.0759610286909508e-05, + "loss": 0.8833, + "step": 31500 + }, + { + "epoch": 3.54, + "learning_rate": 2.0700603368936182e-05, + "loss": 0.917, + "step": 31520 + }, + { + "epoch": 3.54, + "learning_rate": 2.064165853141145e-05, + "loss": 0.9222, + "step": 31540 + }, + { + "epoch": 3.55, + "learning_rate": 2.058277589922942e-05, + "loss": 0.9001, + "step": 31560 + }, + { + "epoch": 3.55, + "learning_rate": 2.05239555971524e-05, + "loss": 0.906, + "step": 31580 + }, + { + "epoch": 3.55, + "learning_rate": 2.0465197749810604e-05, + "loss": 0.9021, + "step": 31600 + }, + { + "epoch": 3.55, + "learning_rate": 2.040650248170194e-05, + "loss": 0.8962, + "step": 31620 + }, + { + "epoch": 3.56, + "learning_rate": 2.034786991719174e-05, + "loss": 0.901, + "step": 31640 + }, + { + "epoch": 3.56, + "learning_rate": 2.0289300180512478e-05, + "loss": 0.9213, + "step": 31660 + }, + { + "epoch": 3.56, + "learning_rate": 2.0230793395763426e-05, + "loss": 0.9018, + "step": 31680 + }, + { + "epoch": 3.56, + "learning_rate": 2.0172349686910568e-05, + "loss": 0.8924, + "step": 31700 + }, + { + "epoch": 3.56, + "learning_rate": 2.011396917778619e-05, + "loss": 0.8989, + "step": 31720 + }, + { + "epoch": 3.57, + "learning_rate": 2.0055651992088692e-05, + "loss": 0.8872, + "step": 31740 + }, + { + "epoch": 3.57, + "learning_rate": 1.9997398253382287e-05, + "loss": 0.9258, + "step": 31760 + }, + { + "epoch": 3.57, + "learning_rate": 1.993920808509676e-05, + "loss": 0.9025, + "step": 31780 + }, + { + "epoch": 3.57, + "learning_rate": 1.988108161052722e-05, + "loss": 0.8995, + "step": 31800 + }, + { + "epoch": 3.58, + "learning_rate": 1.9823018952833748e-05, + "loss": 0.9158, + "step": 31820 + }, + { + "epoch": 3.58, + "learning_rate": 1.9765020235041322e-05, + "loss": 0.9092, + "step": 31840 + }, + { + "epoch": 3.58, + "learning_rate": 1.9707085580039365e-05, + "loss": 0.9003, + "step": 31860 + }, + { + "epoch": 3.58, + "learning_rate": 1.9649215110581553e-05, + "loss": 0.8768, + "step": 31880 + }, + { + "epoch": 3.58, + "learning_rate": 1.9591408949285605e-05, + "loss": 0.9147, + "step": 31900 + }, + { + "epoch": 3.59, + "learning_rate": 1.953366721863297e-05, + "loss": 0.9055, + "step": 31920 + }, + { + "epoch": 3.59, + "learning_rate": 1.947599004096856e-05, + "loss": 0.9076, + "step": 31940 + }, + { + "epoch": 3.59, + "learning_rate": 1.9418377538500543e-05, + "loss": 0.8803, + "step": 31960 + }, + { + "epoch": 3.59, + "learning_rate": 1.936082983330002e-05, + "loss": 0.9346, + "step": 31980 + }, + { + "epoch": 3.6, + "learning_rate": 1.9303347047300834e-05, + "loss": 0.9033, + "step": 32000 + }, + { + "epoch": 3.6, + "learning_rate": 1.9245929302299202e-05, + "loss": 0.8857, + "step": 32020 + }, + { + "epoch": 3.6, + "learning_rate": 1.9188576719953633e-05, + "loss": 0.9229, + "step": 32040 + }, + { + "epoch": 3.6, + "learning_rate": 1.913128942178451e-05, + "loss": 0.9216, + "step": 32060 + }, + { + "epoch": 3.6, + "learning_rate": 1.907406752917386e-05, + "loss": 0.8786, + "step": 32080 + }, + { + "epoch": 3.61, + "learning_rate": 1.9016911163365185e-05, + "loss": 0.896, + "step": 32100 + }, + { + "epoch": 3.61, + "learning_rate": 1.89598204454631e-05, + "loss": 0.9214, + "step": 32120 + }, + { + "epoch": 3.61, + "learning_rate": 1.89027954964332e-05, + "loss": 0.8923, + "step": 32140 + }, + { + "epoch": 3.61, + "learning_rate": 1.8845836437101622e-05, + "loss": 0.9045, + "step": 32160 + }, + { + "epoch": 3.62, + "learning_rate": 1.8788943388154962e-05, + "loss": 0.8815, + "step": 32180 + }, + { + "epoch": 3.62, + "learning_rate": 1.873211647013995e-05, + "loss": 0.9081, + "step": 32200 + }, + { + "epoch": 3.62, + "learning_rate": 1.867535580346313e-05, + "loss": 0.8995, + "step": 32220 + }, + { + "epoch": 3.62, + "learning_rate": 1.861866150839078e-05, + "loss": 0.9083, + "step": 32240 + }, + { + "epoch": 3.62, + "learning_rate": 1.856203370504846e-05, + "loss": 0.8916, + "step": 32260 + }, + { + "epoch": 3.63, + "learning_rate": 1.850547251342089e-05, + "loss": 0.9364, + "step": 32280 + }, + { + "epoch": 3.63, + "learning_rate": 1.844897805335162e-05, + "loss": 0.8781, + "step": 32300 + }, + { + "epoch": 3.63, + "learning_rate": 1.8392550444542793e-05, + "loss": 0.9038, + "step": 32320 + }, + { + "epoch": 3.63, + "learning_rate": 1.8336189806555014e-05, + "loss": 0.8806, + "step": 32340 + }, + { + "epoch": 3.64, + "learning_rate": 1.8279896258806844e-05, + "loss": 0.9133, + "step": 32360 + }, + { + "epoch": 3.64, + "learning_rate": 1.8223669920574772e-05, + "loss": 0.9114, + "step": 32380 + }, + { + "epoch": 3.64, + "learning_rate": 1.8167510910992875e-05, + "loss": 0.8787, + "step": 32400 + }, + { + "epoch": 3.64, + "learning_rate": 1.811141934905255e-05, + "loss": 0.8737, + "step": 32420 + }, + { + "epoch": 3.64, + "learning_rate": 1.8055395353602306e-05, + "loss": 0.9233, + "step": 32440 + }, + { + "epoch": 3.65, + "learning_rate": 1.7999439043347476e-05, + "loss": 0.9049, + "step": 32460 + }, + { + "epoch": 3.65, + "learning_rate": 1.7943550536850006e-05, + "loss": 0.893, + "step": 32480 + }, + { + "epoch": 3.65, + "learning_rate": 1.7887729952528117e-05, + "loss": 0.9078, + "step": 32500 + }, + { + "epoch": 3.65, + "learning_rate": 1.7831977408656153e-05, + "loss": 0.9135, + "step": 32520 + }, + { + "epoch": 3.66, + "learning_rate": 1.7776293023364325e-05, + "loss": 0.8959, + "step": 32540 + }, + { + "epoch": 3.66, + "learning_rate": 1.7720676914638407e-05, + "loss": 0.9199, + "step": 32560 + }, + { + "epoch": 3.66, + "learning_rate": 1.766512920031944e-05, + "loss": 0.9057, + "step": 32580 + }, + { + "epoch": 3.66, + "learning_rate": 1.7609649998103634e-05, + "loss": 0.8747, + "step": 32600 + }, + { + "epoch": 3.67, + "learning_rate": 1.755423942554199e-05, + "loss": 0.911, + "step": 32620 + }, + { + "epoch": 3.67, + "learning_rate": 1.749889760004012e-05, + "loss": 0.8998, + "step": 32640 + }, + { + "epoch": 3.67, + "learning_rate": 1.7443624638857954e-05, + "loss": 0.9014, + "step": 32660 + }, + { + "epoch": 3.67, + "learning_rate": 1.7388420659109515e-05, + "loss": 0.8887, + "step": 32680 + }, + { + "epoch": 3.67, + "learning_rate": 1.7333285777762682e-05, + "loss": 0.9168, + "step": 32700 + }, + { + "epoch": 3.68, + "learning_rate": 1.727822011163886e-05, + "loss": 0.9106, + "step": 32720 + }, + { + "epoch": 3.68, + "learning_rate": 1.7223223777412905e-05, + "loss": 0.907, + "step": 32740 + }, + { + "epoch": 3.68, + "learning_rate": 1.7168296891612707e-05, + "loss": 0.906, + "step": 32760 + }, + { + "epoch": 3.68, + "learning_rate": 1.711343957061899e-05, + "loss": 0.8978, + "step": 32780 + }, + { + "epoch": 3.69, + "learning_rate": 1.7058651930665114e-05, + "loss": 0.8732, + "step": 32800 + }, + { + "epoch": 3.69, + "learning_rate": 1.70039340878368e-05, + "loss": 0.909, + "step": 32820 + }, + { + "epoch": 3.69, + "learning_rate": 1.6949286158071858e-05, + "loss": 0.8888, + "step": 32840 + }, + { + "epoch": 3.69, + "learning_rate": 1.689470825715998e-05, + "loss": 0.9104, + "step": 32860 + }, + { + "epoch": 3.69, + "learning_rate": 1.6840200500742482e-05, + "loss": 0.878, + "step": 32880 + }, + { + "epoch": 3.7, + "learning_rate": 1.6785763004312055e-05, + "loss": 0.9154, + "step": 32900 + }, + { + "epoch": 3.7, + "learning_rate": 1.673139588321247e-05, + "loss": 0.8783, + "step": 32920 + }, + { + "epoch": 3.7, + "learning_rate": 1.6677099252638477e-05, + "loss": 0.8909, + "step": 32940 + }, + { + "epoch": 3.7, + "learning_rate": 1.6622873227635428e-05, + "loss": 0.908, + "step": 32960 + }, + { + "epoch": 3.71, + "learning_rate": 1.656871792309902e-05, + "loss": 0.8948, + "step": 32980 + }, + { + "epoch": 3.71, + "learning_rate": 1.651463345377518e-05, + "loss": 0.888, + "step": 33000 + }, + { + "epoch": 3.71, + "learning_rate": 1.6460619934259707e-05, + "loss": 0.9002, + "step": 33020 + }, + { + "epoch": 3.71, + "learning_rate": 1.6406677478998094e-05, + "loss": 0.9047, + "step": 33040 + }, + { + "epoch": 3.71, + "learning_rate": 1.6352806202285244e-05, + "loss": 0.8803, + "step": 33060 + }, + { + "epoch": 3.72, + "learning_rate": 1.6299006218265246e-05, + "loss": 0.8966, + "step": 33080 + }, + { + "epoch": 3.72, + "learning_rate": 1.624527764093115e-05, + "loss": 0.8901, + "step": 33100 + }, + { + "epoch": 3.72, + "learning_rate": 1.619162058412465e-05, + "loss": 0.9045, + "step": 33120 + }, + { + "epoch": 3.72, + "learning_rate": 1.6138035161535986e-05, + "loss": 0.8999, + "step": 33140 + }, + { + "epoch": 3.73, + "learning_rate": 1.608452148670356e-05, + "loss": 0.8836, + "step": 33160 + }, + { + "epoch": 3.73, + "learning_rate": 1.603107967301378e-05, + "loss": 0.9097, + "step": 33180 + }, + { + "epoch": 3.73, + "learning_rate": 1.597770983370075e-05, + "loss": 0.9269, + "step": 33200 + }, + { + "epoch": 3.73, + "learning_rate": 1.5924412081846113e-05, + "loss": 0.9053, + "step": 33220 + }, + { + "epoch": 3.73, + "learning_rate": 1.5871186530378763e-05, + "loss": 0.8981, + "step": 33240 + }, + { + "epoch": 3.74, + "learning_rate": 1.581803329207461e-05, + "loss": 0.8572, + "step": 33260 + }, + { + "epoch": 3.74, + "learning_rate": 1.5764952479556334e-05, + "loss": 0.8934, + "step": 33280 + }, + { + "epoch": 3.74, + "learning_rate": 1.5711944205293185e-05, + "loss": 0.8725, + "step": 33300 + }, + { + "epoch": 3.74, + "learning_rate": 1.565900858160068e-05, + "loss": 0.8948, + "step": 33320 + }, + { + "epoch": 3.75, + "learning_rate": 1.5606145720640442e-05, + "loss": 0.8983, + "step": 33340 + }, + { + "epoch": 3.75, + "learning_rate": 1.555335573441989e-05, + "loss": 0.9201, + "step": 33360 + }, + { + "epoch": 3.75, + "learning_rate": 1.5500638734792055e-05, + "loss": 0.8854, + "step": 33380 + }, + { + "epoch": 3.75, + "learning_rate": 1.5447994833455292e-05, + "loss": 0.8975, + "step": 33400 + }, + { + "epoch": 3.76, + "learning_rate": 1.53954241419531e-05, + "loss": 0.8831, + "step": 33420 + }, + { + "epoch": 3.76, + "learning_rate": 1.5342926771673842e-05, + "loss": 0.8813, + "step": 33440 + }, + { + "epoch": 3.76, + "learning_rate": 1.5290502833850578e-05, + "loss": 0.8986, + "step": 33460 + }, + { + "epoch": 3.76, + "learning_rate": 1.5238152439560693e-05, + "loss": 0.8652, + "step": 33480 + }, + { + "epoch": 3.76, + "learning_rate": 1.5185875699725793e-05, + "loss": 0.9025, + "step": 33500 + }, + { + "epoch": 3.77, + "learning_rate": 1.5133672725111425e-05, + "loss": 0.8958, + "step": 33520 + }, + { + "epoch": 3.77, + "learning_rate": 1.5081543626326833e-05, + "loss": 0.9204, + "step": 33540 + }, + { + "epoch": 3.77, + "learning_rate": 1.5029488513824724e-05, + "loss": 0.8805, + "step": 33560 + }, + { + "epoch": 3.77, + "learning_rate": 1.4977507497901055e-05, + "loss": 0.9058, + "step": 33580 + }, + { + "epoch": 3.78, + "learning_rate": 1.492560068869478e-05, + "loss": 0.9166, + "step": 33600 + }, + { + "epoch": 3.78, + "learning_rate": 1.4873768196187593e-05, + "loss": 0.9029, + "step": 33620 + }, + { + "epoch": 3.78, + "learning_rate": 1.482201013020375e-05, + "loss": 0.883, + "step": 33640 + }, + { + "epoch": 3.78, + "learning_rate": 1.4770326600409851e-05, + "loss": 0.9034, + "step": 33660 + }, + { + "epoch": 3.78, + "learning_rate": 1.471871771631448e-05, + "loss": 0.8594, + "step": 33680 + }, + { + "epoch": 3.79, + "learning_rate": 1.4667183587268118e-05, + "loss": 0.9064, + "step": 33700 + }, + { + "epoch": 3.79, + "learning_rate": 1.4615724322462838e-05, + "loss": 0.9083, + "step": 33720 + }, + { + "epoch": 3.79, + "learning_rate": 1.4564340030932083e-05, + "loss": 0.8579, + "step": 33740 + }, + { + "epoch": 3.79, + "learning_rate": 1.4513030821550449e-05, + "loss": 0.8899, + "step": 33760 + }, + { + "epoch": 3.8, + "learning_rate": 1.4461796803033445e-05, + "loss": 0.9189, + "step": 33780 + }, + { + "epoch": 3.8, + "learning_rate": 1.4410638083937272e-05, + "loss": 0.9012, + "step": 33800 + }, + { + "epoch": 3.8, + "learning_rate": 1.4359554772658552e-05, + "loss": 0.9, + "step": 33820 + }, + { + "epoch": 3.8, + "learning_rate": 1.4308546977434135e-05, + "loss": 0.957, + "step": 33840 + }, + { + "epoch": 3.8, + "learning_rate": 1.4257614806340919e-05, + "loss": 0.8913, + "step": 33860 + }, + { + "epoch": 3.81, + "learning_rate": 1.4206758367295537e-05, + "loss": 0.9182, + "step": 33880 + }, + { + "epoch": 3.81, + "learning_rate": 1.4155977768054113e-05, + "loss": 0.9013, + "step": 33900 + }, + { + "epoch": 3.81, + "learning_rate": 1.4105273116212136e-05, + "loss": 0.9113, + "step": 33920 + }, + { + "epoch": 3.81, + "learning_rate": 1.4054644519204157e-05, + "loss": 0.8801, + "step": 33940 + }, + { + "epoch": 3.82, + "learning_rate": 1.4004092084303583e-05, + "loss": 0.9287, + "step": 33960 + }, + { + "epoch": 3.82, + "learning_rate": 1.3953615918622443e-05, + "loss": 0.9068, + "step": 33980 + }, + { + "epoch": 3.82, + "learning_rate": 1.3903216129111174e-05, + "loss": 0.8831, + "step": 34000 + }, + { + "epoch": 3.82, + "learning_rate": 1.385289282255835e-05, + "loss": 0.9099, + "step": 34020 + }, + { + "epoch": 3.82, + "learning_rate": 1.380264610559051e-05, + "loss": 0.9004, + "step": 34040 + }, + { + "epoch": 3.83, + "learning_rate": 1.3752476084671962e-05, + "loss": 0.8964, + "step": 34060 + }, + { + "epoch": 3.83, + "learning_rate": 1.3702382866104457e-05, + "loss": 0.8801, + "step": 34080 + }, + { + "epoch": 3.83, + "learning_rate": 1.3652366556026996e-05, + "loss": 0.919, + "step": 34100 + }, + { + "epoch": 3.83, + "learning_rate": 1.3602427260415663e-05, + "loss": 0.8887, + "step": 34120 + }, + { + "epoch": 3.84, + "learning_rate": 1.3552565085083352e-05, + "loss": 0.9103, + "step": 34140 + }, + { + "epoch": 3.84, + "learning_rate": 1.350278013567955e-05, + "loss": 0.9001, + "step": 34160 + }, + { + "epoch": 3.84, + "learning_rate": 1.3453072517690107e-05, + "loss": 0.8922, + "step": 34180 + }, + { + "epoch": 3.84, + "learning_rate": 1.3403442336437039e-05, + "loss": 0.8959, + "step": 34200 + }, + { + "epoch": 3.84, + "learning_rate": 1.3353889697078287e-05, + "loss": 0.8825, + "step": 34220 + }, + { + "epoch": 3.85, + "learning_rate": 1.3304414704607443e-05, + "loss": 0.9026, + "step": 34240 + }, + { + "epoch": 3.85, + "learning_rate": 1.3255017463853659e-05, + "loss": 0.8999, + "step": 34260 + }, + { + "epoch": 3.85, + "learning_rate": 1.3205698079481298e-05, + "loss": 0.8748, + "step": 34280 + }, + { + "epoch": 3.85, + "learning_rate": 1.3156456655989746e-05, + "loss": 0.882, + "step": 34300 + }, + { + "epoch": 3.86, + "learning_rate": 1.3107293297713236e-05, + "loss": 0.8574, + "step": 34320 + }, + { + "epoch": 3.86, + "learning_rate": 1.3058208108820574e-05, + "loss": 0.9052, + "step": 34340 + }, + { + "epoch": 3.86, + "learning_rate": 1.3009201193314947e-05, + "loss": 0.9069, + "step": 34360 + }, + { + "epoch": 3.86, + "learning_rate": 1.2960272655033689e-05, + "loss": 0.8821, + "step": 34380 + }, + { + "epoch": 3.87, + "learning_rate": 1.2911422597648077e-05, + "loss": 0.8953, + "step": 34400 + }, + { + "epoch": 3.87, + "learning_rate": 1.2862651124663095e-05, + "loss": 0.8736, + "step": 34420 + }, + { + "epoch": 3.87, + "learning_rate": 1.2813958339417176e-05, + "loss": 0.9102, + "step": 34440 + }, + { + "epoch": 3.87, + "learning_rate": 1.2765344345082114e-05, + "loss": 0.8673, + "step": 34460 + }, + { + "epoch": 3.87, + "learning_rate": 1.2716809244662691e-05, + "loss": 0.9082, + "step": 34480 + }, + { + "epoch": 3.88, + "learning_rate": 1.266835314099657e-05, + "loss": 0.8893, + "step": 34500 + }, + { + "epoch": 3.88, + "learning_rate": 1.261997613675398e-05, + "loss": 0.8961, + "step": 34520 + }, + { + "epoch": 3.88, + "learning_rate": 1.2571678334437591e-05, + "loss": 0.8572, + "step": 34540 + }, + { + "epoch": 3.88, + "learning_rate": 1.252345983638225e-05, + "loss": 0.9225, + "step": 34560 + }, + { + "epoch": 3.89, + "learning_rate": 1.2475320744754776e-05, + "loss": 0.9017, + "step": 34580 + }, + { + "epoch": 3.89, + "learning_rate": 1.2427261161553732e-05, + "loss": 0.9022, + "step": 34600 + }, + { + "epoch": 3.89, + "learning_rate": 1.2379281188609209e-05, + "loss": 0.9, + "step": 34620 + }, + { + "epoch": 3.89, + "learning_rate": 1.2331380927582642e-05, + "loss": 0.8776, + "step": 34640 + }, + { + "epoch": 3.89, + "learning_rate": 1.2283560479966538e-05, + "loss": 0.8804, + "step": 34660 + }, + { + "epoch": 3.9, + "learning_rate": 1.223581994708432e-05, + "loss": 0.8881, + "step": 34680 + }, + { + "epoch": 3.9, + "learning_rate": 1.2188159430090085e-05, + "loss": 0.8949, + "step": 34700 + }, + { + "epoch": 3.9, + "learning_rate": 1.2140579029968352e-05, + "loss": 0.8953, + "step": 34720 + }, + { + "epoch": 3.9, + "learning_rate": 1.2093078847533922e-05, + "loss": 0.8937, + "step": 34740 + }, + { + "epoch": 3.91, + "learning_rate": 1.2045658983431612e-05, + "loss": 0.9329, + "step": 34760 + }, + { + "epoch": 3.91, + "learning_rate": 1.199831953813611e-05, + "loss": 0.8943, + "step": 34780 + }, + { + "epoch": 3.91, + "learning_rate": 1.1951060611951615e-05, + "loss": 0.8774, + "step": 34800 + }, + { + "epoch": 3.91, + "learning_rate": 1.1903882305011793e-05, + "loss": 0.9075, + "step": 34820 + }, + { + "epoch": 3.91, + "learning_rate": 1.1856784717279462e-05, + "loss": 0.8714, + "step": 34840 + }, + { + "epoch": 3.92, + "learning_rate": 1.1809767948546419e-05, + "loss": 0.8841, + "step": 34860 + }, + { + "epoch": 3.92, + "learning_rate": 1.1762832098433219e-05, + "loss": 0.8719, + "step": 34880 + }, + { + "epoch": 3.92, + "learning_rate": 1.1715977266388961e-05, + "loss": 0.8972, + "step": 34900 + }, + { + "epoch": 3.92, + "learning_rate": 1.1669203551691093e-05, + "loss": 0.8943, + "step": 34920 + }, + { + "epoch": 3.93, + "learning_rate": 1.1622511053445156e-05, + "loss": 0.8861, + "step": 34940 + }, + { + "epoch": 3.93, + "learning_rate": 1.1575899870584621e-05, + "loss": 0.9284, + "step": 34960 + }, + { + "epoch": 3.93, + "learning_rate": 1.1529370101870723e-05, + "loss": 0.8943, + "step": 34980 + }, + { + "epoch": 3.93, + "learning_rate": 1.1482921845892098e-05, + "loss": 0.8904, + "step": 35000 + }, + { + "epoch": 3.93, + "learning_rate": 1.143655520106473e-05, + "loss": 0.8703, + "step": 35020 + }, + { + "epoch": 3.94, + "learning_rate": 1.1390270265631675e-05, + "loss": 0.9096, + "step": 35040 + }, + { + "epoch": 3.94, + "learning_rate": 1.134406713766285e-05, + "loss": 0.9049, + "step": 35060 + }, + { + "epoch": 3.94, + "learning_rate": 1.1297945915054842e-05, + "loss": 0.8983, + "step": 35080 + }, + { + "epoch": 3.94, + "learning_rate": 1.1251906695530701e-05, + "loss": 0.9089, + "step": 35100 + }, + { + "epoch": 3.95, + "learning_rate": 1.1205949576639723e-05, + "loss": 0.8768, + "step": 35120 + }, + { + "epoch": 3.95, + "learning_rate": 1.116007465575722e-05, + "loss": 0.9167, + "step": 35140 + }, + { + "epoch": 3.95, + "learning_rate": 1.1114282030084361e-05, + "loss": 0.9169, + "step": 35160 + }, + { + "epoch": 3.95, + "learning_rate": 1.1068571796647992e-05, + "loss": 0.903, + "step": 35180 + }, + { + "epoch": 3.96, + "learning_rate": 1.1022944052300293e-05, + "loss": 0.8746, + "step": 35200 + }, + { + "epoch": 3.96, + "learning_rate": 1.0977398893718732e-05, + "loss": 0.9006, + "step": 35220 + }, + { + "epoch": 3.96, + "learning_rate": 1.0931936417405764e-05, + "loss": 0.8895, + "step": 35240 + }, + { + "epoch": 3.96, + "learning_rate": 1.0886556719688662e-05, + "loss": 0.8928, + "step": 35260 + }, + { + "epoch": 3.96, + "learning_rate": 1.0841259896719297e-05, + "loss": 0.9004, + "step": 35280 + }, + { + "epoch": 3.97, + "learning_rate": 1.0796046044473962e-05, + "loss": 0.9078, + "step": 35300 + }, + { + "epoch": 3.97, + "learning_rate": 1.0750915258753141e-05, + "loss": 0.8804, + "step": 35320 + }, + { + "epoch": 3.97, + "learning_rate": 1.0705867635181278e-05, + "loss": 0.902, + "step": 35340 + }, + { + "epoch": 3.97, + "learning_rate": 1.0660903269206652e-05, + "loss": 0.898, + "step": 35360 + }, + { + "epoch": 3.98, + "learning_rate": 1.0616022256101143e-05, + "loss": 0.8605, + "step": 35380 + }, + { + "epoch": 3.98, + "learning_rate": 1.0571224690960002e-05, + "loss": 0.8795, + "step": 35400 + }, + { + "epoch": 3.98, + "learning_rate": 1.0526510668701633e-05, + "loss": 0.8985, + "step": 35420 + }, + { + "epoch": 3.98, + "learning_rate": 1.0481880284067485e-05, + "loss": 0.9146, + "step": 35440 + }, + { + "epoch": 3.98, + "learning_rate": 1.0437333631621765e-05, + "loss": 0.8778, + "step": 35460 + }, + { + "epoch": 3.99, + "learning_rate": 1.0392870805751265e-05, + "loss": 0.9129, + "step": 35480 + }, + { + "epoch": 3.99, + "learning_rate": 1.0348491900665164e-05, + "loss": 0.9134, + "step": 35500 + }, + { + "epoch": 3.99, + "learning_rate": 1.030419701039484e-05, + "loss": 0.9043, + "step": 35520 + }, + { + "epoch": 3.99, + "learning_rate": 1.025998622879365e-05, + "loss": 0.8896, + "step": 35540 + }, + { + "epoch": 4.0, + "learning_rate": 1.0215859649536702e-05, + "loss": 0.9067, + "step": 35560 + }, + { + "epoch": 4.0, + "learning_rate": 1.0171817366120767e-05, + "loss": 0.9166, + "step": 35580 + }, + { + "epoch": 4.0, + "learning_rate": 1.012785947186397e-05, + "loss": 0.8699, + "step": 35600 + }, + { + "epoch": 4.0, + "learning_rate": 1.0083986059905598e-05, + "loss": 0.8923, + "step": 35620 + }, + { + "epoch": 4.0, + "learning_rate": 1.0040197223205978e-05, + "loss": 0.8432, + "step": 35640 + }, + { + "epoch": 4.01, + "learning_rate": 9.996493054546214e-06, + "loss": 0.8849, + "step": 35660 + }, + { + "epoch": 4.01, + "learning_rate": 9.95287364652806e-06, + "loss": 0.8686, + "step": 35680 + }, + { + "epoch": 4.01, + "learning_rate": 9.909339091573594e-06, + "loss": 0.8646, + "step": 35700 + }, + { + "epoch": 4.01, + "learning_rate": 9.865889481925167e-06, + "loss": 0.8976, + "step": 35720 + }, + { + "epoch": 4.02, + "learning_rate": 9.822524909645137e-06, + "loss": 0.8732, + "step": 35740 + }, + { + "epoch": 4.02, + "learning_rate": 9.779245466615639e-06, + "loss": 0.8954, + "step": 35760 + }, + { + "epoch": 4.02, + "learning_rate": 9.736051244538497e-06, + "loss": 0.912, + "step": 35780 + }, + { + "epoch": 4.02, + "learning_rate": 9.692942334934935e-06, + "loss": 0.891, + "step": 35800 + }, + { + "epoch": 4.02, + "learning_rate": 9.649918829145415e-06, + "loss": 0.8954, + "step": 35820 + }, + { + "epoch": 4.03, + "learning_rate": 9.60698081832943e-06, + "loss": 0.9018, + "step": 35840 + }, + { + "epoch": 4.03, + "learning_rate": 9.564128393465332e-06, + "loss": 0.8627, + "step": 35860 + }, + { + "epoch": 4.03, + "learning_rate": 9.52136164535018e-06, + "loss": 0.9076, + "step": 35880 + }, + { + "epoch": 4.03, + "learning_rate": 9.478680664599404e-06, + "loss": 0.93, + "step": 35900 + }, + { + "epoch": 4.04, + "learning_rate": 9.436085541646783e-06, + "loss": 0.8731, + "step": 35920 + }, + { + "epoch": 4.04, + "learning_rate": 9.393576366744162e-06, + "loss": 0.8885, + "step": 35940 + }, + { + "epoch": 4.04, + "learning_rate": 9.351153229961223e-06, + "loss": 0.8715, + "step": 35960 + }, + { + "epoch": 4.04, + "learning_rate": 9.308816221185441e-06, + "loss": 0.8789, + "step": 35980 + }, + { + "epoch": 4.04, + "learning_rate": 9.266565430121733e-06, + "loss": 0.886, + "step": 36000 + }, + { + "epoch": 4.05, + "learning_rate": 9.224400946292367e-06, + "loss": 0.8862, + "step": 36020 + }, + { + "epoch": 4.05, + "learning_rate": 9.182322859036702e-06, + "loss": 0.9107, + "step": 36040 + }, + { + "epoch": 4.05, + "learning_rate": 9.14033125751107e-06, + "loss": 0.877, + "step": 36060 + }, + { + "epoch": 4.05, + "learning_rate": 9.098426230688578e-06, + "loss": 0.8937, + "step": 36080 + }, + { + "epoch": 4.06, + "learning_rate": 9.056607867358829e-06, + "loss": 0.8663, + "step": 36100 + }, + { + "epoch": 4.06, + "learning_rate": 9.014876256127852e-06, + "loss": 0.8781, + "step": 36120 + }, + { + "epoch": 4.06, + "learning_rate": 8.973231485417849e-06, + "loss": 0.866, + "step": 36140 + }, + { + "epoch": 4.06, + "learning_rate": 8.931673643467014e-06, + "loss": 0.8693, + "step": 36160 + }, + { + "epoch": 4.07, + "learning_rate": 8.890202818329368e-06, + "loss": 0.8629, + "step": 36180 + }, + { + "epoch": 4.07, + "learning_rate": 8.84881909787455e-06, + "loss": 0.8731, + "step": 36200 + }, + { + "epoch": 4.07, + "learning_rate": 8.807522569787653e-06, + "loss": 0.8921, + "step": 36220 + }, + { + "epoch": 4.07, + "learning_rate": 8.76631332156898e-06, + "loss": 0.8864, + "step": 36240 + }, + { + "epoch": 4.07, + "learning_rate": 8.725191440533936e-06, + "loss": 0.8866, + "step": 36260 + }, + { + "epoch": 4.08, + "learning_rate": 8.684157013812839e-06, + "loss": 0.8669, + "step": 36280 + }, + { + "epoch": 4.08, + "learning_rate": 8.643210128350665e-06, + "loss": 0.8555, + "step": 36300 + }, + { + "epoch": 4.08, + "learning_rate": 8.602350870906895e-06, + "loss": 0.8809, + "step": 36320 + }, + { + "epoch": 4.08, + "learning_rate": 8.561579328055375e-06, + "loss": 0.865, + "step": 36340 + }, + { + "epoch": 4.09, + "learning_rate": 8.52089558618408e-06, + "loss": 0.888, + "step": 36360 + }, + { + "epoch": 4.09, + "learning_rate": 8.480299731494956e-06, + "loss": 0.8966, + "step": 36380 + }, + { + "epoch": 4.09, + "learning_rate": 8.439791850003726e-06, + "loss": 0.8947, + "step": 36400 + }, + { + "epoch": 4.09, + "learning_rate": 8.39937202753972e-06, + "loss": 0.8662, + "step": 36420 + }, + { + "epoch": 4.09, + "learning_rate": 8.35904034974569e-06, + "loss": 0.8828, + "step": 36440 + }, + { + "epoch": 4.1, + "learning_rate": 8.31879690207758e-06, + "loss": 0.8679, + "step": 36460 + }, + { + "epoch": 4.1, + "learning_rate": 8.278641769804469e-06, + "loss": 0.8961, + "step": 36480 + }, + { + "epoch": 4.1, + "learning_rate": 8.23857503800825e-06, + "loss": 0.8836, + "step": 36500 + }, + { + "epoch": 4.1, + "learning_rate": 8.198596791583523e-06, + "loss": 0.8672, + "step": 36520 + }, + { + "epoch": 4.11, + "learning_rate": 8.158707115237407e-06, + "loss": 0.8916, + "step": 36540 + }, + { + "epoch": 4.11, + "learning_rate": 8.118906093489358e-06, + "loss": 0.9051, + "step": 36560 + }, + { + "epoch": 4.11, + "learning_rate": 8.079193810670988e-06, + "loss": 0.8754, + "step": 36580 + }, + { + "epoch": 4.11, + "learning_rate": 8.039570350925878e-06, + "loss": 0.895, + "step": 36600 + }, + { + "epoch": 4.11, + "learning_rate": 8.000035798209421e-06, + "loss": 0.8794, + "step": 36620 + }, + { + "epoch": 4.12, + "learning_rate": 7.960590236288633e-06, + "loss": 0.8761, + "step": 36640 + }, + { + "epoch": 4.12, + "learning_rate": 7.921233748741934e-06, + "loss": 0.8677, + "step": 36660 + }, + { + "epoch": 4.12, + "learning_rate": 7.88196641895907e-06, + "loss": 0.8771, + "step": 36680 + }, + { + "epoch": 4.12, + "learning_rate": 7.842788330140838e-06, + "loss": 0.8851, + "step": 36700 + }, + { + "epoch": 4.13, + "learning_rate": 7.803699565298972e-06, + "loss": 0.9103, + "step": 36720 + }, + { + "epoch": 4.13, + "learning_rate": 7.764700207255903e-06, + "loss": 0.8621, + "step": 36740 + }, + { + "epoch": 4.13, + "learning_rate": 7.725790338644673e-06, + "loss": 0.8558, + "step": 36760 + }, + { + "epoch": 4.13, + "learning_rate": 7.686970041908675e-06, + "loss": 0.8793, + "step": 36780 + }, + { + "epoch": 4.13, + "learning_rate": 7.648239399301544e-06, + "loss": 0.9105, + "step": 36800 + }, + { + "epoch": 4.14, + "learning_rate": 7.6095984928869265e-06, + "loss": 0.879, + "step": 36820 + }, + { + "epoch": 4.14, + "learning_rate": 7.571047404538351e-06, + "loss": 0.8657, + "step": 36840 + }, + { + "epoch": 4.14, + "learning_rate": 7.532586215939025e-06, + "loss": 0.8624, + "step": 36860 + }, + { + "epoch": 4.14, + "learning_rate": 7.49421500858169e-06, + "loss": 0.8568, + "step": 36880 + }, + { + "epoch": 4.15, + "learning_rate": 7.45593386376841e-06, + "loss": 0.8805, + "step": 36900 + }, + { + "epoch": 4.15, + "learning_rate": 7.41774286261045e-06, + "loss": 0.8731, + "step": 36920 + }, + { + "epoch": 4.15, + "learning_rate": 7.379642086028038e-06, + "loss": 0.9025, + "step": 36940 + }, + { + "epoch": 4.15, + "learning_rate": 7.341631614750266e-06, + "loss": 0.867, + "step": 36960 + }, + { + "epoch": 4.16, + "learning_rate": 7.303711529314861e-06, + "loss": 0.877, + "step": 36980 + }, + { + "epoch": 4.16, + "learning_rate": 7.265881910068062e-06, + "loss": 0.8611, + "step": 37000 + }, + { + "epoch": 4.16, + "learning_rate": 7.228142837164404e-06, + "loss": 0.8895, + "step": 37020 + }, + { + "epoch": 4.16, + "learning_rate": 7.190494390566571e-06, + "loss": 0.9216, + "step": 37040 + }, + { + "epoch": 4.16, + "learning_rate": 7.152936650045245e-06, + "loss": 0.8817, + "step": 37060 + }, + { + "epoch": 4.17, + "learning_rate": 7.115469695178895e-06, + "loss": 0.8688, + "step": 37080 + }, + { + "epoch": 4.17, + "learning_rate": 7.078093605353642e-06, + "loss": 0.8903, + "step": 37100 + }, + { + "epoch": 4.17, + "learning_rate": 7.040808459763082e-06, + "loss": 0.8687, + "step": 37120 + }, + { + "epoch": 4.17, + "learning_rate": 7.003614337408099e-06, + "loss": 0.8761, + "step": 37140 + }, + { + "epoch": 4.18, + "learning_rate": 6.96651131709673e-06, + "loss": 0.88, + "step": 37160 + }, + { + "epoch": 4.18, + "learning_rate": 6.929499477443962e-06, + "loss": 0.8919, + "step": 37180 + }, + { + "epoch": 4.18, + "learning_rate": 6.892578896871643e-06, + "loss": 0.9064, + "step": 37200 + }, + { + "epoch": 4.18, + "learning_rate": 6.855749653608179e-06, + "loss": 0.8838, + "step": 37220 + }, + { + "epoch": 4.18, + "learning_rate": 6.819011825688498e-06, + "loss": 0.8945, + "step": 37240 + }, + { + "epoch": 4.19, + "learning_rate": 6.782365490953824e-06, + "loss": 0.8609, + "step": 37260 + }, + { + "epoch": 4.19, + "learning_rate": 6.745810727051521e-06, + "loss": 0.8978, + "step": 37280 + }, + { + "epoch": 4.19, + "learning_rate": 6.709347611434924e-06, + "loss": 0.8814, + "step": 37300 + }, + { + "epoch": 4.19, + "learning_rate": 6.672976221363186e-06, + "loss": 0.8896, + "step": 37320 + }, + { + "epoch": 4.2, + "learning_rate": 6.636696633901124e-06, + "loss": 0.9108, + "step": 37340 + }, + { + "epoch": 4.2, + "learning_rate": 6.600508925919008e-06, + "loss": 0.9018, + "step": 37360 + }, + { + "epoch": 4.2, + "learning_rate": 6.564413174092443e-06, + "loss": 0.9047, + "step": 37380 + }, + { + "epoch": 4.2, + "learning_rate": 6.528409454902235e-06, + "loss": 0.8608, + "step": 37400 + }, + { + "epoch": 4.2, + "learning_rate": 6.492497844634121e-06, + "loss": 0.8941, + "step": 37420 + }, + { + "epoch": 4.21, + "learning_rate": 6.4566784193787255e-06, + "loss": 0.8743, + "step": 37440 + }, + { + "epoch": 4.21, + "learning_rate": 6.4209512550313215e-06, + "loss": 0.8991, + "step": 37460 + }, + { + "epoch": 4.21, + "learning_rate": 6.38531642729171e-06, + "loss": 0.8968, + "step": 37480 + }, + { + "epoch": 4.21, + "learning_rate": 6.3497740116640396e-06, + "loss": 0.8719, + "step": 37500 + }, + { + "epoch": 4.22, + "learning_rate": 6.314324083456663e-06, + "loss": 0.9034, + "step": 37520 + }, + { + "epoch": 4.22, + "learning_rate": 6.2789667177819755e-06, + "loss": 0.8603, + "step": 37540 + }, + { + "epoch": 4.22, + "learning_rate": 6.2437019895561995e-06, + "loss": 0.9047, + "step": 37560 + }, + { + "epoch": 4.22, + "learning_rate": 6.208529973499316e-06, + "loss": 0.9057, + "step": 37580 + }, + { + "epoch": 4.22, + "learning_rate": 6.1734507441348785e-06, + "loss": 0.8938, + "step": 37600 + }, + { + "epoch": 4.23, + "learning_rate": 6.138464375789821e-06, + "loss": 0.8755, + "step": 37620 + }, + { + "epoch": 4.23, + "learning_rate": 6.1035709425943e-06, + "loss": 0.8896, + "step": 37640 + }, + { + "epoch": 4.23, + "learning_rate": 6.068770518481582e-06, + "loss": 0.8586, + "step": 37660 + }, + { + "epoch": 4.23, + "learning_rate": 6.034063177187865e-06, + "loss": 0.8803, + "step": 37680 + }, + { + "epoch": 4.24, + "learning_rate": 5.9994489922521155e-06, + "loss": 0.9121, + "step": 37700 + }, + { + "epoch": 4.24, + "learning_rate": 5.96492803701591e-06, + "loss": 0.8782, + "step": 37720 + }, + { + "epoch": 4.24, + "learning_rate": 5.9305003846233e-06, + "loss": 0.9016, + "step": 37740 + }, + { + "epoch": 4.24, + "learning_rate": 5.8961661080206454e-06, + "loss": 0.8687, + "step": 37760 + }, + { + "epoch": 4.24, + "learning_rate": 5.861925279956415e-06, + "loss": 0.885, + "step": 37780 + }, + { + "epoch": 4.25, + "learning_rate": 5.827777972981152e-06, + "loss": 0.853, + "step": 37800 + }, + { + "epoch": 4.25, + "learning_rate": 5.793724259447203e-06, + "loss": 0.8716, + "step": 37820 + }, + { + "epoch": 4.25, + "learning_rate": 5.759764211508578e-06, + "loss": 0.868, + "step": 37840 + }, + { + "epoch": 4.25, + "learning_rate": 5.7258979011208746e-06, + "loss": 0.8453, + "step": 37860 + }, + { + "epoch": 4.26, + "learning_rate": 5.692125400041049e-06, + "loss": 0.8788, + "step": 37880 + }, + { + "epoch": 4.26, + "learning_rate": 5.658446779827309e-06, + "loss": 0.8741, + "step": 37900 + }, + { + "epoch": 4.26, + "learning_rate": 5.624862111838919e-06, + "loss": 0.8998, + "step": 37920 + }, + { + "epoch": 4.26, + "learning_rate": 5.5913714672361065e-06, + "loss": 0.8717, + "step": 37940 + }, + { + "epoch": 4.27, + "learning_rate": 5.557974916979863e-06, + "loss": 0.8747, + "step": 37960 + }, + { + "epoch": 4.27, + "learning_rate": 5.5246725318317815e-06, + "loss": 0.8667, + "step": 37980 + }, + { + "epoch": 4.27, + "learning_rate": 5.491464382354e-06, + "loss": 0.8788, + "step": 38000 + }, + { + "epoch": 4.27, + "learning_rate": 5.458350538908946e-06, + "loss": 0.8948, + "step": 38020 + }, + { + "epoch": 4.27, + "learning_rate": 5.425331071659212e-06, + "loss": 0.8609, + "step": 38040 + }, + { + "epoch": 4.28, + "learning_rate": 5.392406050567455e-06, + "loss": 0.8659, + "step": 38060 + }, + { + "epoch": 4.28, + "learning_rate": 5.3595755453962115e-06, + "loss": 0.8705, + "step": 38080 + }, + { + "epoch": 4.28, + "learning_rate": 5.3268396257077465e-06, + "loss": 0.8849, + "step": 38100 + }, + { + "epoch": 4.28, + "learning_rate": 5.294198360863917e-06, + "loss": 0.8659, + "step": 38120 + }, + { + "epoch": 4.29, + "learning_rate": 5.26165182002602e-06, + "loss": 0.8674, + "step": 38140 + }, + { + "epoch": 4.29, + "learning_rate": 5.229200072154672e-06, + "loss": 0.8786, + "step": 38160 + }, + { + "epoch": 4.29, + "learning_rate": 5.196843186009581e-06, + "loss": 0.8893, + "step": 38180 + }, + { + "epoch": 4.29, + "learning_rate": 5.164581230149529e-06, + "loss": 0.899, + "step": 38200 + }, + { + "epoch": 4.29, + "learning_rate": 5.132414272932107e-06, + "loss": 0.8991, + "step": 38220 + }, + { + "epoch": 4.3, + "learning_rate": 5.100342382513662e-06, + "loss": 0.867, + "step": 38240 + }, + { + "epoch": 4.3, + "learning_rate": 5.068365626849058e-06, + "loss": 0.8965, + "step": 38260 + }, + { + "epoch": 4.3, + "learning_rate": 5.036484073691622e-06, + "loss": 0.9204, + "step": 38280 + }, + { + "epoch": 4.3, + "learning_rate": 5.004697790592961e-06, + "loss": 0.9037, + "step": 38300 + }, + { + "epoch": 4.31, + "learning_rate": 4.9730068449028e-06, + "loss": 0.8833, + "step": 38320 + }, + { + "epoch": 4.31, + "learning_rate": 4.941411303768889e-06, + "loss": 0.88, + "step": 38340 + }, + { + "epoch": 4.31, + "learning_rate": 4.90991123413681e-06, + "loss": 0.8873, + "step": 38360 + }, + { + "epoch": 4.31, + "learning_rate": 4.878506702749869e-06, + "loss": 0.8802, + "step": 38380 + }, + { + "epoch": 4.31, + "learning_rate": 4.847197776148932e-06, + "loss": 0.8771, + "step": 38400 + }, + { + "epoch": 4.32, + "learning_rate": 4.815984520672301e-06, + "loss": 0.8883, + "step": 38420 + }, + { + "epoch": 4.32, + "learning_rate": 4.784867002455584e-06, + "loss": 0.8629, + "step": 38440 + }, + { + "epoch": 4.32, + "learning_rate": 4.753845287431491e-06, + "loss": 0.8824, + "step": 38460 + }, + { + "epoch": 4.32, + "learning_rate": 4.722919441329782e-06, + "loss": 0.882, + "step": 38480 + }, + { + "epoch": 4.33, + "learning_rate": 4.692089529677074e-06, + "loss": 0.8704, + "step": 38500 + }, + { + "epoch": 4.33, + "learning_rate": 4.661355617796742e-06, + "loss": 0.8956, + "step": 38520 + }, + { + "epoch": 4.33, + "learning_rate": 4.630717770808696e-06, + "loss": 0.8867, + "step": 38540 + }, + { + "epoch": 4.33, + "learning_rate": 4.600176053629346e-06, + "loss": 0.8825, + "step": 38560 + }, + { + "epoch": 4.33, + "learning_rate": 4.569730530971411e-06, + "loss": 0.8755, + "step": 38580 + }, + { + "epoch": 4.34, + "learning_rate": 4.5393812673437844e-06, + "loss": 0.8778, + "step": 38600 + }, + { + "epoch": 4.34, + "learning_rate": 4.5091283270513985e-06, + "loss": 0.8921, + "step": 38620 + }, + { + "epoch": 4.34, + "learning_rate": 4.4789717741951065e-06, + "loss": 0.8814, + "step": 38640 + }, + { + "epoch": 4.34, + "learning_rate": 4.448911672671535e-06, + "loss": 0.8655, + "step": 38660 + }, + { + "epoch": 4.35, + "learning_rate": 4.418948086172914e-06, + "loss": 0.9148, + "step": 38680 + }, + { + "epoch": 4.35, + "learning_rate": 4.389081078186996e-06, + "loss": 0.8551, + "step": 38700 + }, + { + "epoch": 4.35, + "learning_rate": 4.359310711996939e-06, + "loss": 0.8897, + "step": 38720 + }, + { + "epoch": 4.35, + "learning_rate": 4.329637050681057e-06, + "loss": 0.859, + "step": 38740 + }, + { + "epoch": 4.36, + "learning_rate": 4.300060157112817e-06, + "loss": 0.8528, + "step": 38760 + }, + { + "epoch": 4.36, + "learning_rate": 4.270580093960641e-06, + "loss": 0.8661, + "step": 38780 + }, + { + "epoch": 4.36, + "learning_rate": 4.241196923687774e-06, + "loss": 0.8779, + "step": 38800 + }, + { + "epoch": 4.36, + "learning_rate": 4.2119107085521636e-06, + "loss": 0.8813, + "step": 38820 + }, + { + "epoch": 4.36, + "learning_rate": 4.18272151060633e-06, + "loss": 0.865, + "step": 38840 + }, + { + "epoch": 4.37, + "learning_rate": 4.153629391697244e-06, + "loss": 0.8753, + "step": 38860 + }, + { + "epoch": 4.37, + "learning_rate": 4.12463441346615e-06, + "loss": 0.9042, + "step": 38880 + }, + { + "epoch": 4.37, + "learning_rate": 4.09573663734848e-06, + "loss": 0.8962, + "step": 38900 + }, + { + "epoch": 4.37, + "learning_rate": 4.066936124573734e-06, + "loss": 0.8484, + "step": 38920 + }, + { + "epoch": 4.38, + "learning_rate": 4.03823293616532e-06, + "loss": 0.8848, + "step": 38940 + }, + { + "epoch": 4.38, + "learning_rate": 4.009627132940397e-06, + "loss": 0.8626, + "step": 38960 + }, + { + "epoch": 4.38, + "learning_rate": 3.981118775509812e-06, + "loss": 0.8792, + "step": 38980 + }, + { + "epoch": 4.38, + "learning_rate": 3.952707924277949e-06, + "loss": 0.8841, + "step": 39000 + }, + { + "epoch": 4.38, + "learning_rate": 3.9243946394425635e-06, + "loss": 0.8447, + "step": 39020 + }, + { + "epoch": 4.39, + "learning_rate": 3.896178980994714e-06, + "loss": 0.8927, + "step": 39040 + }, + { + "epoch": 4.39, + "learning_rate": 3.868061008718593e-06, + "loss": 0.8913, + "step": 39060 + }, + { + "epoch": 4.39, + "learning_rate": 3.840040782191401e-06, + "loss": 0.889, + "step": 39080 + }, + { + "epoch": 4.39, + "learning_rate": 3.8121183607832344e-06, + "loss": 0.869, + "step": 39100 + }, + { + "epoch": 4.4, + "learning_rate": 3.7842938036569854e-06, + "loss": 0.9043, + "step": 39120 + }, + { + "epoch": 4.4, + "learning_rate": 3.756567169768166e-06, + "loss": 0.8772, + "step": 39140 + }, + { + "epoch": 4.4, + "learning_rate": 3.728938517864794e-06, + "loss": 0.8899, + "step": 39160 + }, + { + "epoch": 4.4, + "learning_rate": 3.701407906487303e-06, + "loss": 0.8797, + "step": 39180 + }, + { + "epoch": 4.4, + "learning_rate": 3.6739753939683817e-06, + "loss": 0.8753, + "step": 39200 + }, + { + "epoch": 4.41, + "learning_rate": 3.6466410384328685e-06, + "loss": 0.8659, + "step": 39220 + }, + { + "epoch": 4.41, + "learning_rate": 3.6194048977976237e-06, + "loss": 0.875, + "step": 39240 + }, + { + "epoch": 4.41, + "learning_rate": 3.5922670297714124e-06, + "loss": 0.8816, + "step": 39260 + }, + { + "epoch": 4.41, + "learning_rate": 3.5652274918547724e-06, + "loss": 0.8792, + "step": 39280 + }, + { + "epoch": 4.42, + "learning_rate": 3.5382863413398694e-06, + "loss": 0.872, + "step": 39300 + }, + { + "epoch": 4.42, + "learning_rate": 3.5114436353104574e-06, + "loss": 0.8681, + "step": 39320 + }, + { + "epoch": 4.42, + "learning_rate": 3.4846994306416746e-06, + "loss": 0.855, + "step": 39340 + }, + { + "epoch": 4.42, + "learning_rate": 3.4580537839999374e-06, + "loss": 0.8619, + "step": 39360 + }, + { + "epoch": 4.42, + "learning_rate": 3.43150675184285e-06, + "loss": 0.8799, + "step": 39380 + }, + { + "epoch": 4.43, + "learning_rate": 3.405058390419069e-06, + "loss": 0.8626, + "step": 39400 + }, + { + "epoch": 4.43, + "learning_rate": 3.3787087557681895e-06, + "loss": 0.8733, + "step": 39420 + }, + { + "epoch": 4.43, + "learning_rate": 3.352457903720613e-06, + "loss": 0.8642, + "step": 39440 + }, + { + "epoch": 4.43, + "learning_rate": 3.326305889897435e-06, + "loss": 0.8841, + "step": 39460 + }, + { + "epoch": 4.44, + "learning_rate": 3.3002527697103435e-06, + "loss": 0.8708, + "step": 39480 + }, + { + "epoch": 4.44, + "learning_rate": 3.274298598361458e-06, + "loss": 0.896, + "step": 39500 + }, + { + "epoch": 4.44, + "learning_rate": 3.2484434308432843e-06, + "loss": 0.8654, + "step": 39520 + }, + { + "epoch": 4.44, + "learning_rate": 3.2226873219385224e-06, + "loss": 0.8616, + "step": 39540 + }, + { + "epoch": 4.44, + "learning_rate": 3.197030326220013e-06, + "loss": 0.8735, + "step": 39560 + }, + { + "epoch": 4.45, + "learning_rate": 3.1714724980505484e-06, + "loss": 0.8782, + "step": 39580 + }, + { + "epoch": 4.45, + "learning_rate": 3.146013891582844e-06, + "loss": 0.8788, + "step": 39600 + }, + { + "epoch": 4.45, + "learning_rate": 3.1206545607593616e-06, + "loss": 0.8564, + "step": 39620 + }, + { + "epoch": 4.45, + "learning_rate": 3.095394559312226e-06, + "loss": 0.8777, + "step": 39640 + }, + { + "epoch": 4.46, + "learning_rate": 3.070233940763084e-06, + "loss": 0.8988, + "step": 39660 + }, + { + "epoch": 4.46, + "learning_rate": 3.0451727584230207e-06, + "loss": 0.871, + "step": 39680 + }, + { + "epoch": 4.46, + "learning_rate": 3.020211065392431e-06, + "loss": 0.885, + "step": 39700 + }, + { + "epoch": 4.46, + "learning_rate": 2.995348914560897e-06, + "loss": 0.8604, + "step": 39720 + }, + { + "epoch": 4.47, + "learning_rate": 2.9705863586071027e-06, + "loss": 0.8535, + "step": 39740 + }, + { + "epoch": 4.47, + "learning_rate": 2.945923449998711e-06, + "loss": 0.9172, + "step": 39760 + }, + { + "epoch": 4.47, + "learning_rate": 2.921360240992216e-06, + "loss": 0.8873, + "step": 39780 + }, + { + "epoch": 4.47, + "learning_rate": 2.8968967836329077e-06, + "loss": 0.8798, + "step": 39800 + }, + { + "epoch": 4.47, + "learning_rate": 2.8725331297546786e-06, + "loss": 0.8649, + "step": 39820 + }, + { + "epoch": 4.48, + "learning_rate": 2.8482693309800136e-06, + "loss": 0.8986, + "step": 39840 + }, + { + "epoch": 4.48, + "learning_rate": 2.8241054387197487e-06, + "loss": 0.92, + "step": 39860 + }, + { + "epoch": 4.48, + "learning_rate": 2.8000415041730845e-06, + "loss": 0.8747, + "step": 39880 + }, + { + "epoch": 4.48, + "learning_rate": 2.7760775783274127e-06, + "loss": 0.8853, + "step": 39900 + }, + { + "epoch": 4.49, + "learning_rate": 2.7522137119582238e-06, + "loss": 0.8646, + "step": 39920 + }, + { + "epoch": 4.49, + "learning_rate": 2.7284499556290033e-06, + "loss": 0.8972, + "step": 39940 + }, + { + "epoch": 4.49, + "learning_rate": 2.7047863596911248e-06, + "loss": 0.8619, + "step": 39960 + }, + { + "epoch": 4.49, + "learning_rate": 2.681222974283715e-06, + "loss": 0.8512, + "step": 39980 + }, + { + "epoch": 4.49, + "learning_rate": 2.657759849333591e-06, + "loss": 0.868, + "step": 40000 + }, + { + "epoch": 4.5, + "learning_rate": 2.6343970345551363e-06, + "loss": 0.8721, + "step": 40020 + }, + { + "epoch": 4.5, + "learning_rate": 2.6111345794502027e-06, + "loss": 0.8967, + "step": 40040 + }, + { + "epoch": 4.5, + "learning_rate": 2.587972533307964e-06, + "loss": 0.8751, + "step": 40060 + }, + { + "epoch": 4.5, + "learning_rate": 2.5649109452048735e-06, + "loss": 0.8797, + "step": 40080 + }, + { + "epoch": 4.51, + "learning_rate": 2.541949864004528e-06, + "loss": 0.8924, + "step": 40100 + }, + { + "epoch": 4.51, + "learning_rate": 2.5190893383575498e-06, + "loss": 0.855, + "step": 40120 + }, + { + "epoch": 4.51, + "learning_rate": 2.4963294167015204e-06, + "loss": 0.8679, + "step": 40140 + }, + { + "epoch": 4.51, + "learning_rate": 2.473670147260848e-06, + "loss": 0.8751, + "step": 40160 + }, + { + "epoch": 4.51, + "learning_rate": 2.451111578046689e-06, + "loss": 0.8775, + "step": 40180 + }, + { + "epoch": 4.52, + "learning_rate": 2.4286537568568023e-06, + "loss": 0.8797, + "step": 40200 + }, + { + "epoch": 4.52, + "learning_rate": 2.4062967312755037e-06, + "loss": 0.8527, + "step": 40220 + }, + { + "epoch": 4.52, + "learning_rate": 2.3840405486735607e-06, + "loss": 0.8736, + "step": 40240 + }, + { + "epoch": 4.52, + "learning_rate": 2.3618852562080187e-06, + "loss": 0.8782, + "step": 40260 + }, + { + "epoch": 4.53, + "learning_rate": 2.339830900822193e-06, + "loss": 0.9045, + "step": 40280 + }, + { + "epoch": 4.53, + "learning_rate": 2.3178775292455214e-06, + "loss": 0.8947, + "step": 40300 + }, + { + "epoch": 4.53, + "learning_rate": 2.296025187993467e-06, + "loss": 0.8586, + "step": 40320 + }, + { + "epoch": 4.53, + "learning_rate": 2.274273923367437e-06, + "loss": 0.8814, + "step": 40340 + }, + { + "epoch": 4.53, + "learning_rate": 2.25262378145466e-06, + "loss": 0.8635, + "step": 40360 + }, + { + "epoch": 4.54, + "learning_rate": 2.231074808128131e-06, + "loss": 0.8544, + "step": 40380 + }, + { + "epoch": 4.54, + "learning_rate": 2.2096270490464476e-06, + "loss": 0.8556, + "step": 40400 + }, + { + "epoch": 4.54, + "learning_rate": 2.1882805496537705e-06, + "loss": 0.9008, + "step": 40420 + }, + { + "epoch": 4.54, + "learning_rate": 2.167035355179725e-06, + "loss": 0.8816, + "step": 40440 + }, + { + "epoch": 4.55, + "learning_rate": 2.1458915106392697e-06, + "loss": 0.8555, + "step": 40460 + }, + { + "epoch": 4.55, + "learning_rate": 2.1248490608326123e-06, + "loss": 0.8593, + "step": 40480 + }, + { + "epoch": 4.55, + "learning_rate": 2.1039080503451325e-06, + "loss": 0.8759, + "step": 40500 + }, + { + "epoch": 4.55, + "learning_rate": 2.083068523547288e-06, + "loss": 0.8855, + "step": 40520 + }, + { + "epoch": 4.56, + "learning_rate": 2.0623305245944913e-06, + "loss": 0.8577, + "step": 40540 + }, + { + "epoch": 4.56, + "learning_rate": 2.0416940974270384e-06, + "loss": 0.8959, + "step": 40560 + }, + { + "epoch": 4.56, + "learning_rate": 2.0211592857700303e-06, + "loss": 0.8824, + "step": 40580 + }, + { + "epoch": 4.56, + "learning_rate": 2.0007261331332404e-06, + "loss": 0.9085, + "step": 40600 + }, + { + "epoch": 4.56, + "learning_rate": 1.9803946828110375e-06, + "loss": 0.8982, + "step": 40620 + }, + { + "epoch": 4.57, + "learning_rate": 1.9601649778823337e-06, + "loss": 0.863, + "step": 40640 + }, + { + "epoch": 4.57, + "learning_rate": 1.940037061210437e-06, + "loss": 0.8765, + "step": 40660 + }, + { + "epoch": 4.57, + "learning_rate": 1.920010975442976e-06, + "loss": 0.8977, + "step": 40680 + }, + { + "epoch": 4.57, + "learning_rate": 1.9000867630118324e-06, + "loss": 0.8776, + "step": 40700 + }, + { + "epoch": 4.58, + "learning_rate": 1.8802644661330304e-06, + "loss": 0.862, + "step": 40720 + }, + { + "epoch": 4.58, + "learning_rate": 1.8605441268066625e-06, + "loss": 0.8683, + "step": 40740 + }, + { + "epoch": 4.58, + "learning_rate": 1.8409257868167718e-06, + "loss": 0.8883, + "step": 40760 + }, + { + "epoch": 4.58, + "learning_rate": 1.8214094877312849e-06, + "loss": 0.8897, + "step": 40780 + }, + { + "epoch": 4.58, + "learning_rate": 1.8019952709019404e-06, + "loss": 0.8771, + "step": 40800 + }, + { + "epoch": 4.59, + "learning_rate": 1.7826831774641617e-06, + "loss": 0.8785, + "step": 40820 + }, + { + "epoch": 4.59, + "learning_rate": 1.7634732483369943e-06, + "loss": 0.8497, + "step": 40840 + }, + { + "epoch": 4.59, + "learning_rate": 1.7443655242230238e-06, + "loss": 0.8961, + "step": 40860 + }, + { + "epoch": 4.59, + "learning_rate": 1.7253600456082764e-06, + "loss": 0.8995, + "step": 40880 + }, + { + "epoch": 4.6, + "learning_rate": 1.7064568527621228e-06, + "loss": 0.8858, + "step": 40900 + }, + { + "epoch": 4.6, + "learning_rate": 1.6876559857372243e-06, + "loss": 0.8618, + "step": 40920 + }, + { + "epoch": 4.6, + "learning_rate": 1.6689574843694433e-06, + "loss": 0.8773, + "step": 40940 + }, + { + "epoch": 4.6, + "learning_rate": 1.6503613882777101e-06, + "loss": 0.8614, + "step": 40960 + }, + { + "epoch": 4.6, + "learning_rate": 1.6318677368640067e-06, + "loss": 0.9059, + "step": 40980 + }, + { + "epoch": 4.61, + "learning_rate": 1.6134765693132382e-06, + "loss": 0.9023, + "step": 41000 + }, + { + "epoch": 4.61, + "learning_rate": 1.5951879245931723e-06, + "loss": 0.8583, + "step": 41020 + }, + { + "epoch": 4.61, + "learning_rate": 1.5770018414543342e-06, + "loss": 0.8845, + "step": 41040 + }, + { + "epoch": 4.61, + "learning_rate": 1.5589183584299504e-06, + "loss": 0.8818, + "step": 41060 + }, + { + "epoch": 4.62, + "learning_rate": 1.5409375138358663e-06, + "loss": 0.8754, + "step": 41080 + }, + { + "epoch": 4.62, + "learning_rate": 1.5230593457704168e-06, + "loss": 0.8513, + "step": 41100 + }, + { + "epoch": 4.62, + "learning_rate": 1.505283892114412e-06, + "loss": 0.8949, + "step": 41120 + }, + { + "epoch": 4.62, + "learning_rate": 1.4876111905310408e-06, + "loss": 0.8962, + "step": 41140 + }, + { + "epoch": 4.62, + "learning_rate": 1.4700412784657336e-06, + "loss": 0.8671, + "step": 41160 + }, + { + "epoch": 4.63, + "learning_rate": 1.4525741931461612e-06, + "loss": 0.8822, + "step": 41180 + }, + { + "epoch": 4.63, + "learning_rate": 1.4352099715821133e-06, + "loss": 0.8698, + "step": 41200 + }, + { + "epoch": 4.63, + "learning_rate": 1.4179486505654316e-06, + "loss": 0.8912, + "step": 41220 + }, + { + "epoch": 4.63, + "learning_rate": 1.4007902666699157e-06, + "loss": 0.9107, + "step": 41240 + }, + { + "epoch": 4.64, + "learning_rate": 1.3837348562512842e-06, + "loss": 0.8674, + "step": 41260 + }, + { + "epoch": 4.64, + "learning_rate": 1.3667824554470466e-06, + "loss": 0.8724, + "step": 41280 + }, + { + "epoch": 4.64, + "learning_rate": 1.3499331001764592e-06, + "loss": 0.8747, + "step": 41300 + }, + { + "epoch": 4.64, + "learning_rate": 1.3331868261404479e-06, + "loss": 0.8595, + "step": 41320 + }, + { + "epoch": 4.64, + "learning_rate": 1.316543668821535e-06, + "loss": 0.8836, + "step": 41340 + }, + { + "epoch": 4.65, + "learning_rate": 1.3000036634837398e-06, + "loss": 0.8632, + "step": 41360 + }, + { + "epoch": 4.65, + "learning_rate": 1.2835668451725292e-06, + "loss": 0.8968, + "step": 41380 + }, + { + "epoch": 4.65, + "learning_rate": 1.267233248714722e-06, + "loss": 0.8771, + "step": 41400 + }, + { + "epoch": 4.65, + "learning_rate": 1.251002908718446e-06, + "loss": 0.8829, + "step": 41420 + }, + { + "epoch": 4.66, + "learning_rate": 1.2348758595730314e-06, + "loss": 0.8628, + "step": 41440 + }, + { + "epoch": 4.66, + "learning_rate": 1.2188521354489613e-06, + "loss": 0.862, + "step": 41460 + }, + { + "epoch": 4.66, + "learning_rate": 1.2029317702977882e-06, + "loss": 0.8919, + "step": 41480 + }, + { + "epoch": 4.66, + "learning_rate": 1.187114797852068e-06, + "loss": 0.8733, + "step": 41500 + }, + { + "epoch": 4.67, + "learning_rate": 1.1714012516252648e-06, + "loss": 0.8723, + "step": 41520 + }, + { + "epoch": 4.67, + "learning_rate": 1.1557911649117293e-06, + "loss": 0.8721, + "step": 41540 + }, + { + "epoch": 4.67, + "learning_rate": 1.1402845707865928e-06, + "loss": 0.8917, + "step": 41560 + }, + { + "epoch": 4.67, + "learning_rate": 1.124881502105679e-06, + "loss": 0.871, + "step": 41580 + }, + { + "epoch": 4.67, + "learning_rate": 1.1095819915054872e-06, + "loss": 0.8686, + "step": 41600 + }, + { + "epoch": 4.68, + "learning_rate": 1.094386071403075e-06, + "loss": 0.8621, + "step": 41620 + }, + { + "epoch": 4.68, + "learning_rate": 1.0792937739960262e-06, + "loss": 0.8884, + "step": 41640 + }, + { + "epoch": 4.68, + "learning_rate": 1.0643051312623553e-06, + "loss": 0.8675, + "step": 41660 + }, + { + "epoch": 4.68, + "learning_rate": 1.0494201749604525e-06, + "loss": 0.8882, + "step": 41680 + }, + { + "epoch": 4.69, + "learning_rate": 1.0346389366290122e-06, + "loss": 0.8953, + "step": 41700 + }, + { + "epoch": 4.69, + "learning_rate": 1.0199614475869646e-06, + "loss": 0.8781, + "step": 41720 + }, + { + "epoch": 4.69, + "learning_rate": 1.0053877389334277e-06, + "loss": 0.8719, + "step": 41740 + }, + { + "epoch": 4.69, + "learning_rate": 9.909178415476116e-07, + "loss": 0.8847, + "step": 41760 + }, + { + "epoch": 4.69, + "learning_rate": 9.765517860887808e-07, + "loss": 0.8803, + "step": 41780 + }, + { + "epoch": 4.7, + "learning_rate": 9.62289602996158e-07, + "loss": 0.8687, + "step": 41800 + }, + { + "epoch": 4.7, + "learning_rate": 9.481313224888877e-07, + "loss": 0.8566, + "step": 41820 + }, + { + "epoch": 4.7, + "learning_rate": 9.340769745659672e-07, + "loss": 0.9047, + "step": 41840 + }, + { + "epoch": 4.7, + "learning_rate": 9.201265890061816e-07, + "loss": 0.8534, + "step": 41860 + }, + { + "epoch": 4.71, + "learning_rate": 9.062801953680145e-07, + "loss": 0.9076, + "step": 41880 + }, + { + "epoch": 4.71, + "learning_rate": 8.925378229896364e-07, + "loss": 0.9027, + "step": 41900 + }, + { + "epoch": 4.71, + "learning_rate": 8.788995009888002e-07, + "loss": 0.8772, + "step": 41920 + }, + { + "epoch": 4.71, + "learning_rate": 8.653652582627958e-07, + "loss": 0.8791, + "step": 41940 + }, + { + "epoch": 4.71, + "learning_rate": 8.519351234883787e-07, + "loss": 0.8871, + "step": 41960 + }, + { + "epoch": 4.72, + "learning_rate": 8.386091251217365e-07, + "loss": 0.8808, + "step": 41980 + }, + { + "epoch": 4.72, + "learning_rate": 8.253872913983884e-07, + "loss": 0.9046, + "step": 42000 + }, + { + "epoch": 4.72, + "learning_rate": 8.122696503331583e-07, + "loss": 0.9211, + "step": 42020 + }, + { + "epoch": 4.72, + "learning_rate": 7.992562297201023e-07, + "loss": 0.8927, + "step": 42040 + }, + { + "epoch": 4.73, + "learning_rate": 7.863470571324527e-07, + "loss": 0.8639, + "step": 42060 + }, + { + "epoch": 4.73, + "learning_rate": 7.735421599225467e-07, + "loss": 0.9165, + "step": 42080 + }, + { + "epoch": 4.73, + "learning_rate": 7.608415652217982e-07, + "loss": 0.8829, + "step": 42100 + }, + { + "epoch": 4.73, + "learning_rate": 7.482452999406087e-07, + "loss": 0.8938, + "step": 42120 + }, + { + "epoch": 4.73, + "learning_rate": 7.357533907683234e-07, + "loss": 0.8722, + "step": 42140 + }, + { + "epoch": 4.74, + "learning_rate": 7.23365864173181e-07, + "loss": 0.8738, + "step": 42160 + }, + { + "epoch": 4.74, + "learning_rate": 7.110827464022474e-07, + "loss": 0.8595, + "step": 42180 + }, + { + "epoch": 4.74, + "learning_rate": 6.989040634813648e-07, + "loss": 0.8597, + "step": 42200 + }, + { + "epoch": 4.74, + "learning_rate": 6.868298412150864e-07, + "loss": 0.9116, + "step": 42220 + }, + { + "epoch": 4.75, + "learning_rate": 6.748601051866532e-07, + "loss": 0.8424, + "step": 42240 + }, + { + "epoch": 4.75, + "learning_rate": 6.629948807579001e-07, + "loss": 0.8709, + "step": 42260 + }, + { + "epoch": 4.75, + "learning_rate": 6.512341930692167e-07, + "loss": 0.9012, + "step": 42280 + }, + { + "epoch": 4.75, + "learning_rate": 6.395780670395147e-07, + "loss": 0.869, + "step": 42300 + }, + { + "epoch": 4.76, + "learning_rate": 6.280265273661379e-07, + "loss": 0.8905, + "step": 42320 + }, + { + "epoch": 4.76, + "learning_rate": 6.165795985248413e-07, + "loss": 0.8933, + "step": 42340 + }, + { + "epoch": 4.76, + "learning_rate": 6.052373047697236e-07, + "loss": 0.8983, + "step": 42360 + }, + { + "epoch": 4.76, + "learning_rate": 5.939996701331884e-07, + "loss": 0.9021, + "step": 42380 + }, + { + "epoch": 4.76, + "learning_rate": 5.828667184258673e-07, + "loss": 0.889, + "step": 42400 + }, + { + "epoch": 4.77, + "learning_rate": 5.718384732365967e-07, + "loss": 0.8627, + "step": 42420 + }, + { + "epoch": 4.77, + "learning_rate": 5.609149579323513e-07, + "loss": 0.8778, + "step": 42440 + }, + { + "epoch": 4.77, + "learning_rate": 5.50096195658223e-07, + "loss": 0.8989, + "step": 42460 + }, + { + "epoch": 4.77, + "learning_rate": 5.393822093373135e-07, + "loss": 0.8895, + "step": 42480 + }, + { + "epoch": 4.78, + "learning_rate": 5.287730216707532e-07, + "loss": 0.8777, + "step": 42500 + }, + { + "epoch": 4.78, + "learning_rate": 5.182686551376048e-07, + "loss": 0.895, + "step": 42520 + }, + { + "epoch": 4.78, + "learning_rate": 5.07869131994837e-07, + "loss": 0.8736, + "step": 42540 + }, + { + "epoch": 4.78, + "learning_rate": 4.975744742772848e-07, + "loss": 0.8999, + "step": 42560 + }, + { + "epoch": 4.78, + "learning_rate": 4.873847037975665e-07, + "loss": 0.8983, + "step": 42580 + }, + { + "epoch": 4.79, + "learning_rate": 4.772998421460895e-07, + "loss": 0.8648, + "step": 42600 + }, + { + "epoch": 4.79, + "learning_rate": 4.6731991069094984e-07, + "loss": 0.8712, + "step": 42620 + }, + { + "epoch": 4.79, + "learning_rate": 4.574449305779327e-07, + "loss": 0.8866, + "step": 42640 + }, + { + "epoch": 4.79, + "learning_rate": 4.4767492273045665e-07, + "loss": 0.9028, + "step": 42660 + }, + { + "epoch": 4.8, + "learning_rate": 4.380099078495015e-07, + "loss": 0.88, + "step": 42680 + }, + { + "epoch": 4.8, + "learning_rate": 4.284499064135916e-07, + "loss": 0.8929, + "step": 42700 + }, + { + "epoch": 4.8, + "learning_rate": 4.189949386787462e-07, + "loss": 0.8876, + "step": 42720 + }, + { + "epoch": 4.8, + "learning_rate": 4.0964502467844e-07, + "loss": 0.9048, + "step": 42740 + }, + { + "epoch": 4.8, + "learning_rate": 4.0040018422355385e-07, + "loss": 0.8726, + "step": 42760 + }, + { + "epoch": 4.81, + "learning_rate": 3.9126043690234093e-07, + "loss": 0.8974, + "step": 42780 + }, + { + "epoch": 4.81, + "learning_rate": 3.82225802080366e-07, + "loss": 0.8804, + "step": 42800 + }, + { + "epoch": 4.81, + "learning_rate": 3.7329629890048846e-07, + "loss": 0.8947, + "step": 42820 + }, + { + "epoch": 4.81, + "learning_rate": 3.6447194628281276e-07, + "loss": 0.8923, + "step": 42840 + }, + { + "epoch": 4.82, + "learning_rate": 3.557527629246438e-07, + "loss": 0.8837, + "step": 42860 + }, + { + "epoch": 4.82, + "learning_rate": 3.471387673004534e-07, + "loss": 0.8695, + "step": 42880 + }, + { + "epoch": 4.82, + "learning_rate": 3.3862997766182515e-07, + "loss": 0.8827, + "step": 42900 + }, + { + "epoch": 4.82, + "learning_rate": 3.302264120374543e-07, + "loss": 0.8632, + "step": 42920 + }, + { + "epoch": 4.82, + "learning_rate": 3.219280882330644e-07, + "loss": 0.8822, + "step": 42940 + }, + { + "epoch": 4.83, + "learning_rate": 3.137350238313963e-07, + "loss": 0.9121, + "step": 42960 + }, + { + "epoch": 4.83, + "learning_rate": 3.0564723619215807e-07, + "loss": 0.9014, + "step": 42980 + }, + { + "epoch": 4.83, + "learning_rate": 2.9766474245200847e-07, + "loss": 0.8743, + "step": 43000 + }, + { + "epoch": 4.83, + "learning_rate": 2.8978755952448475e-07, + "loss": 0.8561, + "step": 43020 + }, + { + "epoch": 4.84, + "learning_rate": 2.8201570410000824e-07, + "loss": 0.8845, + "step": 43040 + }, + { + "epoch": 4.84, + "learning_rate": 2.74349192645823e-07, + "loss": 0.8676, + "step": 43060 + }, + { + "epoch": 4.84, + "learning_rate": 2.6678804140596315e-07, + "loss": 0.8449, + "step": 43080 + }, + { + "epoch": 4.84, + "learning_rate": 2.5933226640121875e-07, + "loss": 0.8725, + "step": 43100 + }, + { + "epoch": 4.84, + "learning_rate": 2.5198188342912543e-07, + "loss": 0.876, + "step": 43120 + }, + { + "epoch": 4.85, + "learning_rate": 2.447369080638806e-07, + "loss": 0.8781, + "step": 43140 + }, + { + "epoch": 4.85, + "learning_rate": 2.3759735565637155e-07, + "loss": 0.8812, + "step": 43160 + }, + { + "epoch": 4.85, + "learning_rate": 2.305632413340919e-07, + "loss": 0.8981, + "step": 43180 + }, + { + "epoch": 4.85, + "learning_rate": 2.236345800011308e-07, + "loss": 0.886, + "step": 43200 + }, + { + "epoch": 4.86, + "learning_rate": 2.1681138633816156e-07, + "loss": 0.8873, + "step": 43220 + }, + { + "epoch": 4.86, + "learning_rate": 2.1009367480235298e-07, + "loss": 0.8964, + "step": 43240 + }, + { + "epoch": 4.86, + "learning_rate": 2.0348145962740817e-07, + "loss": 0.8663, + "step": 43260 + }, + { + "epoch": 4.86, + "learning_rate": 1.9697475482349238e-07, + "loss": 0.868, + "step": 43280 + }, + { + "epoch": 4.87, + "learning_rate": 1.9057357417719968e-07, + "loss": 0.881, + "step": 43300 + }, + { + "epoch": 4.87, + "learning_rate": 1.8427793125154747e-07, + "loss": 0.8902, + "step": 43320 + }, + { + "epoch": 4.87, + "learning_rate": 1.7808783938593198e-07, + "loss": 0.8796, + "step": 43340 + }, + { + "epoch": 4.87, + "learning_rate": 1.7200331169611727e-07, + "loss": 0.8813, + "step": 43360 + }, + { + "epoch": 4.87, + "learning_rate": 1.6602436107417408e-07, + "loss": 0.8988, + "step": 43380 + }, + { + "epoch": 4.88, + "learning_rate": 1.6015100018849095e-07, + "loss": 0.8534, + "step": 43400 + }, + { + "epoch": 4.88, + "learning_rate": 1.5438324148371875e-07, + "loss": 0.8964, + "step": 43420 + }, + { + "epoch": 4.88, + "learning_rate": 1.487210971807651e-07, + "loss": 0.8934, + "step": 43440 + }, + { + "epoch": 4.88, + "learning_rate": 1.4316457927674444e-07, + "loss": 0.8765, + "step": 43460 + }, + { + "epoch": 4.89, + "learning_rate": 1.3771369954497793e-07, + "loss": 0.8458, + "step": 43480 + }, + { + "epoch": 4.89, + "learning_rate": 1.323684695349603e-07, + "loss": 0.8656, + "step": 43500 + }, + { + "epoch": 4.89, + "learning_rate": 1.2712890057232085e-07, + "loss": 0.8792, + "step": 43520 + }, + { + "epoch": 4.89, + "learning_rate": 1.2199500375881247e-07, + "loss": 0.8566, + "step": 43540 + }, + { + "epoch": 4.89, + "learning_rate": 1.1696678997230038e-07, + "loss": 0.8954, + "step": 43560 + }, + { + "epoch": 4.9, + "learning_rate": 1.1204426986671235e-07, + "loss": 0.8871, + "step": 43580 + }, + { + "epoch": 4.9, + "learning_rate": 1.0722745387203859e-07, + "loss": 0.875, + "step": 43600 + }, + { + "epoch": 4.9, + "learning_rate": 1.025163521942818e-07, + "loss": 0.8808, + "step": 43620 + }, + { + "epoch": 4.9, + "learning_rate": 9.791097481547939e-08, + "loss": 0.8818, + "step": 43640 + }, + { + "epoch": 4.91, + "learning_rate": 9.34113314936369e-08, + "loss": 0.884, + "step": 43660 + }, + { + "epoch": 4.91, + "learning_rate": 8.901743176273902e-08, + "loss": 0.8735, + "step": 43680 + }, + { + "epoch": 4.91, + "learning_rate": 8.472928493271082e-08, + "loss": 0.883, + "step": 43700 + }, + { + "epoch": 4.91, + "learning_rate": 8.054690008940657e-08, + "loss": 0.8736, + "step": 43720 + }, + { + "epoch": 4.91, + "learning_rate": 7.647028609459317e-08, + "loss": 0.8778, + "step": 43740 + }, + { + "epoch": 4.92, + "learning_rate": 7.249945158592231e-08, + "loss": 0.8444, + "step": 43760 + }, + { + "epoch": 4.92, + "learning_rate": 6.863440497691942e-08, + "loss": 0.8991, + "step": 43780 + }, + { + "epoch": 4.92, + "learning_rate": 6.487515445696146e-08, + "loss": 0.8613, + "step": 43800 + }, + { + "epoch": 4.92, + "learning_rate": 6.122170799126581e-08, + "loss": 0.8667, + "step": 43820 + }, + { + "epoch": 4.93, + "learning_rate": 5.7674073320856945e-08, + "loss": 0.886, + "step": 43840 + }, + { + "epoch": 4.93, + "learning_rate": 5.423225796257758e-08, + "loss": 0.9095, + "step": 43860 + }, + { + "epoch": 4.93, + "learning_rate": 5.089626920904422e-08, + "loss": 0.8674, + "step": 43880 + }, + { + "epoch": 4.93, + "learning_rate": 4.766611412865829e-08, + "loss": 0.9072, + "step": 43900 + }, + { + "epoch": 4.93, + "learning_rate": 4.4541799565567255e-08, + "loss": 0.8989, + "step": 43920 + }, + { + "epoch": 4.94, + "learning_rate": 4.1523332139664636e-08, + "loss": 0.885, + "step": 43940 + }, + { + "epoch": 4.94, + "learning_rate": 3.861071824656226e-08, + "loss": 0.8705, + "step": 43960 + }, + { + "epoch": 4.94, + "learning_rate": 3.5803964057606885e-08, + "loss": 0.8665, + "step": 43980 + }, + { + "epoch": 4.94, + "learning_rate": 3.310307551983027e-08, + "loss": 0.8746, + "step": 44000 + }, + { + "epoch": 4.95, + "learning_rate": 3.050805835594917e-08, + "loss": 0.8846, + "step": 44020 + }, + { + "epoch": 4.95, + "learning_rate": 2.8018918064376398e-08, + "loss": 0.8902, + "step": 44040 + }, + { + "epoch": 4.95, + "learning_rate": 2.563565991916539e-08, + "loss": 0.8422, + "step": 44060 + }, + { + "epoch": 4.95, + "learning_rate": 2.335828897004344e-08, + "loss": 0.8935, + "step": 44080 + }, + { + "epoch": 4.96, + "learning_rate": 2.1186810042372885e-08, + "loss": 0.8708, + "step": 44100 + }, + { + "epoch": 4.96, + "learning_rate": 1.912122773715108e-08, + "loss": 0.8663, + "step": 44120 + }, + { + "epoch": 4.96, + "learning_rate": 1.7161546430988217e-08, + "loss": 0.8972, + "step": 44140 + }, + { + "epoch": 4.96, + "learning_rate": 1.5307770276123956e-08, + "loss": 0.8774, + "step": 44160 + }, + { + "epoch": 4.96, + "learning_rate": 1.3559903200394131e-08, + "loss": 0.9105, + "step": 44180 + }, + { + "epoch": 4.97, + "learning_rate": 1.1917948907225196e-08, + "loss": 0.8849, + "step": 44200 + }, + { + "epoch": 4.97, + "learning_rate": 1.038191087565088e-08, + "loss": 0.8716, + "step": 44220 + }, + { + "epoch": 4.97, + "learning_rate": 8.951792360267775e-09, + "loss": 0.8811, + "step": 44240 + }, + { + "epoch": 4.97, + "learning_rate": 7.62759639125199e-09, + "loss": 0.8778, + "step": 44260 + }, + { + "epoch": 4.98, + "learning_rate": 6.409325774359154e-09, + "loss": 0.8776, + "step": 44280 + }, + { + "epoch": 4.98, + "learning_rate": 5.296983090891106e-09, + "loss": 0.8683, + "step": 44300 + }, + { + "epoch": 4.98, + "learning_rate": 4.2905706977181e-09, + "loss": 0.8893, + "step": 44320 + }, + { + "epoch": 4.98, + "learning_rate": 3.3900907272510497e-09, + "loss": 0.8707, + "step": 44340 + }, + { + "epoch": 4.98, + "learning_rate": 2.5955450874581845e-09, + "loss": 0.879, + "step": 44360 + }, + { + "epoch": 4.99, + "learning_rate": 1.90693546184284e-09, + "loss": 0.857, + "step": 44380 + }, + { + "epoch": 4.99, + "learning_rate": 1.3242633094545654e-09, + "loss": 0.8935, + "step": 44400 + }, + { + "epoch": 4.99, + "learning_rate": 8.475298648835672e-10, + "loss": 0.9124, + "step": 44420 + }, + { + "epoch": 4.99, + "learning_rate": 4.767361382329583e-10, + "loss": 0.9007, + "step": 44440 + }, + { + "epoch": 5.0, + "learning_rate": 2.1188291516316404e-10, + "loss": 0.8689, + "step": 44460 + }, + { + "epoch": 5.0, + "learning_rate": 5.297075684751463e-11, + "loss": 0.8644, + "step": 44480 + }, + { + "epoch": 5.0, + "learning_rate": 0.0, + "loss": 0.8527, + "step": 44500 + }, + { + "epoch": 5.0, + "step": 44500, + "total_flos": 1.4745575028791706e+19, + "train_loss": 0.9734652058462079, + "train_runtime": 41271.4588, + "train_samples_per_second": 17.251, + "train_steps_per_second": 1.078 } ], - "max_steps": 5640, - "num_train_epochs": 10, - "total_flos": 1.4686171215861645e+19, + "max_steps": 44500, + "num_train_epochs": 5, + "total_flos": 1.4745575028791706e+19, "trial_name": null, "trial_params": null }