|
{ |
|
"best_metric": 0.15827356278896332, |
|
"best_model_checkpoint": "finetuned_models/selection/phi_mini/checkpoint-8828", |
|
"epoch": 4.999660287623145, |
|
"eval_steps": 500, |
|
"global_step": 11035, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004529498358056845, |
|
"grad_norm": 2.3706889152526855, |
|
"learning_rate": 3.7735849056603773e-06, |
|
"loss": 3.1411, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00905899671611369, |
|
"grad_norm": 1.1344068050384521, |
|
"learning_rate": 7.547169811320755e-06, |
|
"loss": 2.7341, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.013588495074170535, |
|
"grad_norm": 0.5109199285507202, |
|
"learning_rate": 1.1320754716981132e-05, |
|
"loss": 2.3962, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01811799343222738, |
|
"grad_norm": 0.2340932935476303, |
|
"learning_rate": 1.509433962264151e-05, |
|
"loss": 2.1062, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.022647491790284226, |
|
"grad_norm": 0.16189691424369812, |
|
"learning_rate": 1.8867924528301888e-05, |
|
"loss": 1.9221, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02717699014834107, |
|
"grad_norm": 0.14064399898052216, |
|
"learning_rate": 2.2641509433962265e-05, |
|
"loss": 1.7971, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.031706488506397915, |
|
"grad_norm": 0.1196412444114685, |
|
"learning_rate": 2.641509433962264e-05, |
|
"loss": 1.7255, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03623598686445476, |
|
"grad_norm": 0.15146440267562866, |
|
"learning_rate": 3.018867924528302e-05, |
|
"loss": 1.6767, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.040765485222511604, |
|
"grad_norm": 0.13450802862644196, |
|
"learning_rate": 3.39622641509434e-05, |
|
"loss": 1.5943, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04529498358056845, |
|
"grad_norm": 0.15073299407958984, |
|
"learning_rate": 3.7735849056603776e-05, |
|
"loss": 1.5428, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0498244819386253, |
|
"grad_norm": 0.13764727115631104, |
|
"learning_rate": 4.150943396226415e-05, |
|
"loss": 1.4956, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05435398029668214, |
|
"grad_norm": 0.23157894611358643, |
|
"learning_rate": 4.528301886792453e-05, |
|
"loss": 1.4492, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05888347865473899, |
|
"grad_norm": 0.1756928712129593, |
|
"learning_rate": 4.9056603773584906e-05, |
|
"loss": 1.4258, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06341297701279583, |
|
"grad_norm": 0.19877882301807404, |
|
"learning_rate": 5.283018867924528e-05, |
|
"loss": 1.3863, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06794247537085268, |
|
"grad_norm": 0.19395482540130615, |
|
"learning_rate": 5.660377358490566e-05, |
|
"loss": 1.3469, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07247197372890953, |
|
"grad_norm": 0.2622753083705902, |
|
"learning_rate": 6.037735849056604e-05, |
|
"loss": 1.3177, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07700147208696637, |
|
"grad_norm": 0.47893616557121277, |
|
"learning_rate": 6.415094339622641e-05, |
|
"loss": 1.2414, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.08153097044502321, |
|
"grad_norm": 0.2570054233074188, |
|
"learning_rate": 6.79245283018868e-05, |
|
"loss": 1.2046, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08606046880308006, |
|
"grad_norm": 0.31944283843040466, |
|
"learning_rate": 7.169811320754717e-05, |
|
"loss": 1.2254, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0905899671611369, |
|
"grad_norm": 0.35244274139404297, |
|
"learning_rate": 7.547169811320755e-05, |
|
"loss": 1.1671, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09511946551919374, |
|
"grad_norm": 0.23283237218856812, |
|
"learning_rate": 7.924528301886794e-05, |
|
"loss": 1.2043, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.0996489638772506, |
|
"grad_norm": 0.38952431082725525, |
|
"learning_rate": 8.30188679245283e-05, |
|
"loss": 1.202, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.10417846223530744, |
|
"grad_norm": 0.28450387716293335, |
|
"learning_rate": 8.679245283018869e-05, |
|
"loss": 1.1323, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.10870796059336428, |
|
"grad_norm": 0.30833789706230164, |
|
"learning_rate": 9.056603773584906e-05, |
|
"loss": 1.1101, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.11323745895142114, |
|
"grad_norm": 0.31221917271614075, |
|
"learning_rate": 9.433962264150944e-05, |
|
"loss": 1.0949, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11776695730947798, |
|
"grad_norm": 0.3738393187522888, |
|
"learning_rate": 9.811320754716981e-05, |
|
"loss": 1.1372, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.12229645566753482, |
|
"grad_norm": 0.2999807596206665, |
|
"learning_rate": 0.0001018867924528302, |
|
"loss": 1.1028, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.12682595402559166, |
|
"grad_norm": 0.4104474186897278, |
|
"learning_rate": 0.00010566037735849057, |
|
"loss": 1.0796, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1313554523836485, |
|
"grad_norm": 0.2639298141002655, |
|
"learning_rate": 0.00010943396226415095, |
|
"loss": 1.0626, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.13588495074170537, |
|
"grad_norm": 0.2657984495162964, |
|
"learning_rate": 0.00011320754716981132, |
|
"loss": 1.0418, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1404144490997622, |
|
"grad_norm": 0.2493669092655182, |
|
"learning_rate": 0.0001169811320754717, |
|
"loss": 1.0157, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.14494394745781905, |
|
"grad_norm": 0.21642285585403442, |
|
"learning_rate": 0.00012075471698113207, |
|
"loss": 0.9852, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1494734458158759, |
|
"grad_norm": 0.2093484252691269, |
|
"learning_rate": 0.00012452830188679244, |
|
"loss": 0.9938, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.15400294417393273, |
|
"grad_norm": 0.2212437391281128, |
|
"learning_rate": 0.00012830188679245283, |
|
"loss": 1.0289, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.1585324425319896, |
|
"grad_norm": 0.22111104428768158, |
|
"learning_rate": 0.0001320754716981132, |
|
"loss": 0.9656, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.16306194089004641, |
|
"grad_norm": 0.31839072704315186, |
|
"learning_rate": 0.0001358490566037736, |
|
"loss": 0.9723, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.16759143924810327, |
|
"grad_norm": 0.26599910855293274, |
|
"learning_rate": 0.00013962264150943395, |
|
"loss": 0.9503, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.17212093760616012, |
|
"grad_norm": 0.273809552192688, |
|
"learning_rate": 0.00014339622641509434, |
|
"loss": 0.9786, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.17665043596421695, |
|
"grad_norm": 0.1905912607908249, |
|
"learning_rate": 0.00014716981132075472, |
|
"loss": 0.9271, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1811799343222738, |
|
"grad_norm": 0.21957655251026154, |
|
"learning_rate": 0.0001509433962264151, |
|
"loss": 0.911, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.18570943268033066, |
|
"grad_norm": 0.21992002427577972, |
|
"learning_rate": 0.0001547169811320755, |
|
"loss": 0.9434, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1902389310383875, |
|
"grad_norm": 0.2033444494009018, |
|
"learning_rate": 0.00015849056603773587, |
|
"loss": 0.9189, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.19476842939644434, |
|
"grad_norm": 0.2479432225227356, |
|
"learning_rate": 0.00016226415094339625, |
|
"loss": 0.9137, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.1992979277545012, |
|
"grad_norm": 0.26578351855278015, |
|
"learning_rate": 0.0001660377358490566, |
|
"loss": 0.9172, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.20382742611255802, |
|
"grad_norm": 0.17441338300704956, |
|
"learning_rate": 0.000169811320754717, |
|
"loss": 0.8783, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.20835692447061488, |
|
"grad_norm": 0.18898604810237885, |
|
"learning_rate": 0.00017358490566037738, |
|
"loss": 0.874, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.21288642282867173, |
|
"grad_norm": 0.18335719406604767, |
|
"learning_rate": 0.00017735849056603776, |
|
"loss": 0.8604, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.21741592118672856, |
|
"grad_norm": 0.20873741805553436, |
|
"learning_rate": 0.00018113207547169812, |
|
"loss": 0.8368, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.22194541954478542, |
|
"grad_norm": 0.2140520066022873, |
|
"learning_rate": 0.0001849056603773585, |
|
"loss": 0.8729, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.22647491790284227, |
|
"grad_norm": 0.20203453302383423, |
|
"learning_rate": 0.00018867924528301889, |
|
"loss": 0.836, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2310044162608991, |
|
"grad_norm": 0.185277059674263, |
|
"learning_rate": 0.00019245283018867927, |
|
"loss": 0.8224, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.23553391461895595, |
|
"grad_norm": 0.207021564245224, |
|
"learning_rate": 0.00019622641509433963, |
|
"loss": 0.8482, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.2400634129770128, |
|
"grad_norm": 0.19016426801681519, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8296, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.24459291133506963, |
|
"grad_norm": 0.20634956657886505, |
|
"learning_rate": 0.00019999983174896345, |
|
"loss": 0.8294, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.2491224096931265, |
|
"grad_norm": 0.16894035041332245, |
|
"learning_rate": 0.00019999932699641984, |
|
"loss": 0.7966, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.2536519080511833, |
|
"grad_norm": 0.21543951332569122, |
|
"learning_rate": 0.00019999848574406778, |
|
"loss": 0.819, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.2581814064092402, |
|
"grad_norm": 0.18474166095256805, |
|
"learning_rate": 0.000199997307994738, |
|
"loss": 0.8073, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.262710904767297, |
|
"grad_norm": 0.1627601534128189, |
|
"learning_rate": 0.0001999957937523937, |
|
"loss": 0.798, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.26724040312535385, |
|
"grad_norm": 0.16344527900218964, |
|
"learning_rate": 0.0001999939430221304, |
|
"loss": 0.7846, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.27176990148341074, |
|
"grad_norm": 0.1784357726573944, |
|
"learning_rate": 0.00019999175581017573, |
|
"loss": 0.7892, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.27629939984146756, |
|
"grad_norm": 0.1735469251871109, |
|
"learning_rate": 0.00019998923212388977, |
|
"loss": 0.7624, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.2808288981995244, |
|
"grad_norm": 0.20232649147510529, |
|
"learning_rate": 0.00019998637197176478, |
|
"loss": 0.7754, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.2853583965575813, |
|
"grad_norm": 0.21980105340480804, |
|
"learning_rate": 0.00019998317536342524, |
|
"loss": 0.7896, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.2898878949156381, |
|
"grad_norm": 0.15072612464427948, |
|
"learning_rate": 0.00019997964230962774, |
|
"loss": 0.7451, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.2944173932736949, |
|
"grad_norm": 0.17559681832790375, |
|
"learning_rate": 0.00019997577282226115, |
|
"loss": 0.719, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.2989468916317518, |
|
"grad_norm": 0.17159104347229004, |
|
"learning_rate": 0.00019997156691434632, |
|
"loss": 0.7356, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.30347638998980864, |
|
"grad_norm": 0.20724473893642426, |
|
"learning_rate": 0.00019996702460003623, |
|
"loss": 0.7257, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.30800588834786546, |
|
"grad_norm": 0.15702813863754272, |
|
"learning_rate": 0.00019996214589461592, |
|
"loss": 0.7104, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.31253538670592235, |
|
"grad_norm": 0.185310959815979, |
|
"learning_rate": 0.00019995693081450227, |
|
"loss": 0.7192, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.3170648850639792, |
|
"grad_norm": 0.17659538984298706, |
|
"learning_rate": 0.00019995137937724413, |
|
"loss": 0.7084, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.321594383422036, |
|
"grad_norm": 0.16541071236133575, |
|
"learning_rate": 0.00019994549160152225, |
|
"loss": 0.7179, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.32612388178009283, |
|
"grad_norm": 0.16881656646728516, |
|
"learning_rate": 0.00019993926750714905, |
|
"loss": 0.7039, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3306533801381497, |
|
"grad_norm": 0.18213717639446259, |
|
"learning_rate": 0.0001999327071150688, |
|
"loss": 0.712, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.33518287849620654, |
|
"grad_norm": 0.16946811974048615, |
|
"learning_rate": 0.00019992581044735736, |
|
"loss": 0.7041, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.33971237685426336, |
|
"grad_norm": 0.20027601718902588, |
|
"learning_rate": 0.00019991857752722208, |
|
"loss": 0.6937, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.34424187521232025, |
|
"grad_norm": 0.17900145053863525, |
|
"learning_rate": 0.000199911008379002, |
|
"loss": 0.689, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.3487713735703771, |
|
"grad_norm": 0.1626042276620865, |
|
"learning_rate": 0.00019990310302816738, |
|
"loss": 0.6923, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.3533008719284339, |
|
"grad_norm": 0.1776456981897354, |
|
"learning_rate": 0.00019989486150131987, |
|
"loss": 0.6725, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.3578303702864908, |
|
"grad_norm": 0.16232900321483612, |
|
"learning_rate": 0.00019988628382619242, |
|
"loss": 0.6621, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.3623598686445476, |
|
"grad_norm": 0.16653478145599365, |
|
"learning_rate": 0.00019987737003164912, |
|
"loss": 0.6825, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.36688936700260444, |
|
"grad_norm": 0.16946111619472504, |
|
"learning_rate": 0.00019986812014768503, |
|
"loss": 0.6634, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.3714188653606613, |
|
"grad_norm": 0.16169489920139313, |
|
"learning_rate": 0.00019985853420542617, |
|
"loss": 0.6592, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.37594836371871815, |
|
"grad_norm": 0.1830553561449051, |
|
"learning_rate": 0.0001998486122371295, |
|
"loss": 0.6661, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.380477862076775, |
|
"grad_norm": 0.18185435235500336, |
|
"learning_rate": 0.00019983835427618262, |
|
"loss": 0.6331, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.38500736043483186, |
|
"grad_norm": 0.17038173973560333, |
|
"learning_rate": 0.0001998277603571038, |
|
"loss": 0.6274, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.3895368587928887, |
|
"grad_norm": 0.15142400562763214, |
|
"learning_rate": 0.00019981683051554174, |
|
"loss": 0.6282, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.3940663571509455, |
|
"grad_norm": 0.18170781433582306, |
|
"learning_rate": 0.00019980556478827564, |
|
"loss": 0.605, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.3985958555090024, |
|
"grad_norm": 0.1576147973537445, |
|
"learning_rate": 0.0001997939632132149, |
|
"loss": 0.6393, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.4031253538670592, |
|
"grad_norm": 0.17267905175685883, |
|
"learning_rate": 0.00019978202582939902, |
|
"loss": 0.6274, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.40765485222511605, |
|
"grad_norm": 0.19358091056346893, |
|
"learning_rate": 0.00019976975267699758, |
|
"loss": 0.5976, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.41218435058317293, |
|
"grad_norm": 0.20368127524852753, |
|
"learning_rate": 0.00019975714379730998, |
|
"loss": 0.637, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.41671384894122976, |
|
"grad_norm": 0.17673739790916443, |
|
"learning_rate": 0.00019974419923276537, |
|
"loss": 0.6014, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.4212433472992866, |
|
"grad_norm": 0.1759296953678131, |
|
"learning_rate": 0.0001997309190269225, |
|
"loss": 0.5822, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.42577284565734347, |
|
"grad_norm": 0.15785963833332062, |
|
"learning_rate": 0.00019971730322446949, |
|
"loss": 0.5856, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.4303023440154003, |
|
"grad_norm": 0.16193810105323792, |
|
"learning_rate": 0.00019970335187122383, |
|
"loss": 0.5854, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.4348318423734571, |
|
"grad_norm": 0.1555752456188202, |
|
"learning_rate": 0.0001996890650141321, |
|
"loss": 0.5852, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.439361340731514, |
|
"grad_norm": 0.17118428647518158, |
|
"learning_rate": 0.00019967444270126988, |
|
"loss": 0.5816, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.44389083908957083, |
|
"grad_norm": 0.15966954827308655, |
|
"learning_rate": 0.00019965948498184153, |
|
"loss": 0.5641, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.44842033744762766, |
|
"grad_norm": 0.20606863498687744, |
|
"learning_rate": 0.0001996441919061801, |
|
"loss": 0.588, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.45294983580568454, |
|
"grad_norm": 0.17158259451389313, |
|
"learning_rate": 0.0001996285635257471, |
|
"loss": 0.5437, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.45747933416374137, |
|
"grad_norm": 0.1764381229877472, |
|
"learning_rate": 0.0001996125998931324, |
|
"loss": 0.5546, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.4620088325217982, |
|
"grad_norm": 0.17307806015014648, |
|
"learning_rate": 0.0001995963010620539, |
|
"loss": 0.5442, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.4665383308798551, |
|
"grad_norm": 0.17395785450935364, |
|
"learning_rate": 0.00019957966708735754, |
|
"loss": 0.5198, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.4710678292379119, |
|
"grad_norm": 0.17280320823192596, |
|
"learning_rate": 0.00019956269802501696, |
|
"loss": 0.5235, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.47559732759596873, |
|
"grad_norm": 0.1894276738166809, |
|
"learning_rate": 0.00019954539393213344, |
|
"loss": 0.539, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.4801268259540256, |
|
"grad_norm": 0.19094131886959076, |
|
"learning_rate": 0.0001995277548669356, |
|
"loss": 0.5445, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.48465632431208244, |
|
"grad_norm": 0.182444766163826, |
|
"learning_rate": 0.00019950978088877923, |
|
"loss": 0.526, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.48918582267013927, |
|
"grad_norm": 0.2150132805109024, |
|
"learning_rate": 0.00019949147205814715, |
|
"loss": 0.5334, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.49371532102819615, |
|
"grad_norm": 0.17609047889709473, |
|
"learning_rate": 0.000199472828436649, |
|
"loss": 0.5239, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.498244819386253, |
|
"grad_norm": 0.18994882702827454, |
|
"learning_rate": 0.0001994538500870209, |
|
"loss": 0.5163, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5027743177443098, |
|
"grad_norm": 0.1678932011127472, |
|
"learning_rate": 0.00019943453707312544, |
|
"loss": 0.5379, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5073038161023666, |
|
"grad_norm": 0.18330644071102142, |
|
"learning_rate": 0.00019941488945995125, |
|
"loss": 0.5037, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5118333144604235, |
|
"grad_norm": 0.1946277767419815, |
|
"learning_rate": 0.00019939490731361298, |
|
"loss": 0.5169, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5163628128184804, |
|
"grad_norm": 0.1769060641527176, |
|
"learning_rate": 0.00019937459070135097, |
|
"loss": 0.5016, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.5208923111765372, |
|
"grad_norm": 0.1812835931777954, |
|
"learning_rate": 0.00019935393969153106, |
|
"loss": 0.4974, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.525421809534594, |
|
"grad_norm": 0.17336933314800262, |
|
"learning_rate": 0.00019933295435364432, |
|
"loss": 0.4936, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.5299513078926509, |
|
"grad_norm": 0.19504410028457642, |
|
"learning_rate": 0.00019931163475830682, |
|
"loss": 0.4892, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.5344808062507077, |
|
"grad_norm": 0.17446300387382507, |
|
"learning_rate": 0.00019928998097725945, |
|
"loss": 0.4851, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.5390103046087645, |
|
"grad_norm": 0.2062528431415558, |
|
"learning_rate": 0.00019926799308336767, |
|
"loss": 0.4796, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.5435398029668215, |
|
"grad_norm": 0.17791499197483063, |
|
"learning_rate": 0.00019924567115062116, |
|
"loss": 0.4704, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5480693013248783, |
|
"grad_norm": 0.20112474262714386, |
|
"learning_rate": 0.00019922301525413368, |
|
"loss": 0.4848, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.5525987996829351, |
|
"grad_norm": 0.1905170977115631, |
|
"learning_rate": 0.00019920002547014283, |
|
"loss": 0.4848, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.557128298040992, |
|
"grad_norm": 0.2167678326368332, |
|
"learning_rate": 0.00019917670187600967, |
|
"loss": 0.475, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.5616577963990488, |
|
"grad_norm": 0.1879906803369522, |
|
"learning_rate": 0.00019915304455021859, |
|
"loss": 0.4661, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.5661872947571056, |
|
"grad_norm": 0.17811033129692078, |
|
"learning_rate": 0.00019912905357237701, |
|
"loss": 0.4758, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5707167931151625, |
|
"grad_norm": 0.18101903796195984, |
|
"learning_rate": 0.00019910472902321503, |
|
"loss": 0.4668, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.5752462914732194, |
|
"grad_norm": 0.1657211035490036, |
|
"learning_rate": 0.0001990800709845853, |
|
"loss": 0.4645, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.5797757898312762, |
|
"grad_norm": 0.32196566462516785, |
|
"learning_rate": 0.00019905507953946257, |
|
"loss": 0.4442, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.584305288189333, |
|
"grad_norm": 0.2010417878627777, |
|
"learning_rate": 0.00019902975477194363, |
|
"loss": 0.4633, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.5888347865473899, |
|
"grad_norm": 0.18759405612945557, |
|
"learning_rate": 0.00019900409676724682, |
|
"loss": 0.4642, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5933642849054467, |
|
"grad_norm": 0.19315552711486816, |
|
"learning_rate": 0.00019897810561171189, |
|
"loss": 0.4308, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.5978937832635036, |
|
"grad_norm": 0.194192036986351, |
|
"learning_rate": 0.00019895178139279956, |
|
"loss": 0.4424, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6024232816215604, |
|
"grad_norm": 0.17403574287891388, |
|
"learning_rate": 0.00019892512419909138, |
|
"loss": 0.4491, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.6069527799796173, |
|
"grad_norm": 0.20866619050502777, |
|
"learning_rate": 0.00019889813412028942, |
|
"loss": 0.4546, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.6114822783376741, |
|
"grad_norm": 0.1847338080406189, |
|
"learning_rate": 0.00019887081124721583, |
|
"loss": 0.4354, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.6160117766957309, |
|
"grad_norm": 0.20528827607631683, |
|
"learning_rate": 0.00019884315567181263, |
|
"loss": 0.432, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.6205412750537878, |
|
"grad_norm": 0.19688895344734192, |
|
"learning_rate": 0.00019881516748714137, |
|
"loss": 0.4256, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.6250707734118447, |
|
"grad_norm": 0.1834789514541626, |
|
"learning_rate": 0.00019878684678738295, |
|
"loss": 0.4142, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.6296002717699015, |
|
"grad_norm": 0.1904083490371704, |
|
"learning_rate": 0.00019875819366783705, |
|
"loss": 0.4072, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.6341297701279583, |
|
"grad_norm": 0.24558007717132568, |
|
"learning_rate": 0.00019872920822492206, |
|
"loss": 0.4168, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6386592684860152, |
|
"grad_norm": 0.19825737178325653, |
|
"learning_rate": 0.0001986998905561745, |
|
"loss": 0.4102, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.643188766844072, |
|
"grad_norm": 0.2427905946969986, |
|
"learning_rate": 0.00019867024076024908, |
|
"loss": 0.4266, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.6477182652021288, |
|
"grad_norm": 0.20517700910568237, |
|
"learning_rate": 0.00019864025893691784, |
|
"loss": 0.4155, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.6522477635601857, |
|
"grad_norm": 0.19519874453544617, |
|
"learning_rate": 0.00019860994518707036, |
|
"loss": 0.4093, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.6567772619182426, |
|
"grad_norm": 0.17730577290058136, |
|
"learning_rate": 0.0001985792996127129, |
|
"loss": 0.3932, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6613067602762994, |
|
"grad_norm": 0.1811046451330185, |
|
"learning_rate": 0.00019854832231696855, |
|
"loss": 0.3953, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.6658362586343562, |
|
"grad_norm": 0.18473340570926666, |
|
"learning_rate": 0.00019851701340407654, |
|
"loss": 0.3846, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.6703657569924131, |
|
"grad_norm": 0.1876707524061203, |
|
"learning_rate": 0.000198485372979392, |
|
"loss": 0.3947, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.6748952553504699, |
|
"grad_norm": 0.21453642845153809, |
|
"learning_rate": 0.00019845340114938562, |
|
"loss": 0.3893, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.6794247537085267, |
|
"grad_norm": 0.19314515590667725, |
|
"learning_rate": 0.00019842109802164327, |
|
"loss": 0.3857, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6839542520665837, |
|
"grad_norm": 0.18713776767253876, |
|
"learning_rate": 0.0001983884637048656, |
|
"loss": 0.3945, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.6884837504246405, |
|
"grad_norm": 0.18545708060264587, |
|
"learning_rate": 0.00019835549830886785, |
|
"loss": 0.3829, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.6930132487826973, |
|
"grad_norm": 0.163354754447937, |
|
"learning_rate": 0.00019832220194457919, |
|
"loss": 0.3681, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.6975427471407541, |
|
"grad_norm": 0.19729359447956085, |
|
"learning_rate": 0.0001982885747240426, |
|
"loss": 0.376, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.702072245498811, |
|
"grad_norm": 0.19601188600063324, |
|
"learning_rate": 0.00019825461676041436, |
|
"loss": 0.3738, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.7066017438568678, |
|
"grad_norm": 0.184451162815094, |
|
"learning_rate": 0.00019822032816796376, |
|
"loss": 0.3689, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.7111312422149247, |
|
"grad_norm": 0.16905899345874786, |
|
"learning_rate": 0.0001981857090620726, |
|
"loss": 0.3667, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.7156607405729816, |
|
"grad_norm": 0.17829935252666473, |
|
"learning_rate": 0.0001981507595592349, |
|
"loss": 0.3718, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.7201902389310384, |
|
"grad_norm": 0.17314116656780243, |
|
"learning_rate": 0.0001981154797770564, |
|
"loss": 0.3711, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.7247197372890952, |
|
"grad_norm": 0.17752452194690704, |
|
"learning_rate": 0.0001980798698342544, |
|
"loss": 0.3711, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.729249235647152, |
|
"grad_norm": 0.16267523169517517, |
|
"learning_rate": 0.00019804392985065702, |
|
"loss": 0.3461, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.7337787340052089, |
|
"grad_norm": 0.1715889424085617, |
|
"learning_rate": 0.00019800765994720308, |
|
"loss": 0.3542, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.7383082323632658, |
|
"grad_norm": 0.2011169195175171, |
|
"learning_rate": 0.00019797106024594153, |
|
"loss": 0.3602, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.7428377307213226, |
|
"grad_norm": 0.16859227418899536, |
|
"learning_rate": 0.00019793413087003115, |
|
"loss": 0.3509, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.7473672290793795, |
|
"grad_norm": 0.18904465436935425, |
|
"learning_rate": 0.0001978968719437401, |
|
"loss": 0.3619, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.7518967274374363, |
|
"grad_norm": 0.1918095499277115, |
|
"learning_rate": 0.00019785928359244533, |
|
"loss": 0.3529, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.7564262257954931, |
|
"grad_norm": 0.16930030286312103, |
|
"learning_rate": 0.0001978213659426325, |
|
"loss": 0.3505, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.76095572415355, |
|
"grad_norm": 0.19345726072788239, |
|
"learning_rate": 0.00019778311912189528, |
|
"loss": 0.3548, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.7654852225116069, |
|
"grad_norm": 0.1755731701850891, |
|
"learning_rate": 0.000197744543258935, |
|
"loss": 0.3549, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.7700147208696637, |
|
"grad_norm": 0.17827914655208588, |
|
"learning_rate": 0.00019770563848356024, |
|
"loss": 0.3622, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7745442192277205, |
|
"grad_norm": 0.1955813765525818, |
|
"learning_rate": 0.0001976664049266864, |
|
"loss": 0.3412, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.7790737175857774, |
|
"grad_norm": 0.18960636854171753, |
|
"learning_rate": 0.00019762684272033515, |
|
"loss": 0.3438, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.7836032159438342, |
|
"grad_norm": 0.20935559272766113, |
|
"learning_rate": 0.00019758695199763418, |
|
"loss": 0.3497, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.788132714301891, |
|
"grad_norm": 0.18760916590690613, |
|
"learning_rate": 0.00019754673289281663, |
|
"loss": 0.3299, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.792662212659948, |
|
"grad_norm": 0.2013741135597229, |
|
"learning_rate": 0.0001975061855412206, |
|
"loss": 0.3395, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.7971917110180048, |
|
"grad_norm": 0.18885807693004608, |
|
"learning_rate": 0.0001974653100792887, |
|
"loss": 0.3321, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.8017212093760616, |
|
"grad_norm": 0.18193817138671875, |
|
"learning_rate": 0.00019742410664456777, |
|
"loss": 0.3387, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.8062507077341184, |
|
"grad_norm": 0.16840125620365143, |
|
"learning_rate": 0.00019738257537570822, |
|
"loss": 0.3302, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.8107802060921753, |
|
"grad_norm": 0.1618867665529251, |
|
"learning_rate": 0.00019734071641246365, |
|
"loss": 0.3212, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.8153097044502321, |
|
"grad_norm": 0.20026183128356934, |
|
"learning_rate": 0.00019729852989569028, |
|
"loss": 0.3274, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.819839202808289, |
|
"grad_norm": 0.18741321563720703, |
|
"learning_rate": 0.00019725601596734668, |
|
"loss": 0.3267, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.8243687011663459, |
|
"grad_norm": 0.17450092732906342, |
|
"learning_rate": 0.000197213174770493, |
|
"loss": 0.3193, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.8288981995244027, |
|
"grad_norm": 0.1721801608800888, |
|
"learning_rate": 0.00019717000644929087, |
|
"loss": 0.3127, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.8334276978824595, |
|
"grad_norm": 0.18926140666007996, |
|
"learning_rate": 0.00019712651114900257, |
|
"loss": 0.3214, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.8379571962405163, |
|
"grad_norm": 0.17309771478176117, |
|
"learning_rate": 0.0001970826890159906, |
|
"loss": 0.318, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.8424866945985732, |
|
"grad_norm": 0.18818823993206024, |
|
"learning_rate": 0.00019703854019771742, |
|
"loss": 0.3154, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.84701619295663, |
|
"grad_norm": 0.18680931627750397, |
|
"learning_rate": 0.00019699406484274468, |
|
"loss": 0.3104, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.8515456913146869, |
|
"grad_norm": 0.16489103436470032, |
|
"learning_rate": 0.0001969492631007329, |
|
"loss": 0.3232, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.8560751896727438, |
|
"grad_norm": 0.17721644043922424, |
|
"learning_rate": 0.0001969041351224409, |
|
"loss": 0.3034, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.8606046880308006, |
|
"grad_norm": 0.19497451186180115, |
|
"learning_rate": 0.00019685868105972517, |
|
"loss": 0.3092, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8651341863888574, |
|
"grad_norm": 0.20427413284778595, |
|
"learning_rate": 0.00019681290106553969, |
|
"loss": 0.3158, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.8696636847469142, |
|
"grad_norm": 0.18642422556877136, |
|
"learning_rate": 0.00019676679529393498, |
|
"loss": 0.3058, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.8741931831049711, |
|
"grad_norm": 0.16172035038471222, |
|
"learning_rate": 0.00019672036390005798, |
|
"loss": 0.3069, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.878722681463028, |
|
"grad_norm": 0.15888796746730804, |
|
"learning_rate": 0.00019667360704015127, |
|
"loss": 0.3075, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.8832521798210848, |
|
"grad_norm": 0.16608227789402008, |
|
"learning_rate": 0.0001966265248715527, |
|
"loss": 0.295, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.8877816781791417, |
|
"grad_norm": 0.18529315292835236, |
|
"learning_rate": 0.00019657911755269466, |
|
"loss": 0.3087, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.8923111765371985, |
|
"grad_norm": 0.1623723804950714, |
|
"learning_rate": 0.0001965313852431038, |
|
"loss": 0.318, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.8968406748952553, |
|
"grad_norm": 0.18999403715133667, |
|
"learning_rate": 0.0001964833281034004, |
|
"loss": 0.3013, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.9013701732533121, |
|
"grad_norm": 0.1742704212665558, |
|
"learning_rate": 0.0001964349462952976, |
|
"loss": 0.2906, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.9058996716113691, |
|
"grad_norm": 0.15007524192333221, |
|
"learning_rate": 0.00019638623998160127, |
|
"loss": 0.2909, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9104291699694259, |
|
"grad_norm": 0.18087700009346008, |
|
"learning_rate": 0.00019633720932620916, |
|
"loss": 0.2852, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.9149586683274827, |
|
"grad_norm": 0.172203928232193, |
|
"learning_rate": 0.0001962878544941104, |
|
"loss": 0.2894, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.9194881666855396, |
|
"grad_norm": 0.1811007559299469, |
|
"learning_rate": 0.00019623817565138512, |
|
"loss": 0.2905, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.9240176650435964, |
|
"grad_norm": 0.17736268043518066, |
|
"learning_rate": 0.00019618817296520355, |
|
"loss": 0.2855, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.9285471634016532, |
|
"grad_norm": 0.1875537484884262, |
|
"learning_rate": 0.00019613784660382582, |
|
"loss": 0.3006, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.9330766617597102, |
|
"grad_norm": 0.16459111869335175, |
|
"learning_rate": 0.00019608719673660117, |
|
"loss": 0.2928, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.937606160117767, |
|
"grad_norm": 0.19852280616760254, |
|
"learning_rate": 0.00019603622353396745, |
|
"loss": 0.2877, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.9421356584758238, |
|
"grad_norm": 0.1441079080104828, |
|
"learning_rate": 0.00019598492716745055, |
|
"loss": 0.2722, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.9466651568338806, |
|
"grad_norm": 0.17091263830661774, |
|
"learning_rate": 0.00019593330780966377, |
|
"loss": 0.2845, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.9511946551919375, |
|
"grad_norm": 0.17907531559467316, |
|
"learning_rate": 0.00019588136563430735, |
|
"loss": 0.2881, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.9557241535499943, |
|
"grad_norm": 0.18411681056022644, |
|
"learning_rate": 0.00019582910081616782, |
|
"loss": 0.2906, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.9602536519080512, |
|
"grad_norm": 0.19341252744197845, |
|
"learning_rate": 0.00019577651353111733, |
|
"loss": 0.2926, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.9647831502661081, |
|
"grad_norm": 0.17022013664245605, |
|
"learning_rate": 0.00019572360395611317, |
|
"loss": 0.2728, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.9693126486241649, |
|
"grad_norm": 0.17077523469924927, |
|
"learning_rate": 0.00019567037226919721, |
|
"loss": 0.2754, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.9738421469822217, |
|
"grad_norm": 0.16188162565231323, |
|
"learning_rate": 0.00019561681864949514, |
|
"loss": 0.2761, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.9783716453402785, |
|
"grad_norm": 0.16258101165294647, |
|
"learning_rate": 0.00019556294327721603, |
|
"loss": 0.2724, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.9829011436983354, |
|
"grad_norm": 0.1751459836959839, |
|
"learning_rate": 0.00019550874633365162, |
|
"loss": 0.2844, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.9874306420563923, |
|
"grad_norm": 0.14674732089042664, |
|
"learning_rate": 0.0001954542280011757, |
|
"loss": 0.2818, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.9919601404144491, |
|
"grad_norm": 0.1843065619468689, |
|
"learning_rate": 0.00019539938846324363, |
|
"loss": 0.2736, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.996489638772506, |
|
"grad_norm": 0.18449115753173828, |
|
"learning_rate": 0.00019534422790439164, |
|
"loss": 0.2828, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.9996602876231457, |
|
"eval_loss": 0.26604515314102173, |
|
"eval_runtime": 617.1505, |
|
"eval_samples_per_second": 12.752, |
|
"eval_steps_per_second": 1.594, |
|
"step": 2207 |
|
}, |
|
{ |
|
"epoch": 1.001358849507417, |
|
"grad_norm": 0.15234586596488953, |
|
"learning_rate": 0.00019528874651023606, |
|
"loss": 0.2608, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.0058883478654739, |
|
"grad_norm": 0.15887659788131714, |
|
"learning_rate": 0.00019523294446747297, |
|
"loss": 0.2417, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.0104178462235307, |
|
"grad_norm": 0.16629189252853394, |
|
"learning_rate": 0.00019517682196387744, |
|
"loss": 0.2306, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.0149473445815875, |
|
"grad_norm": 0.17960551381111145, |
|
"learning_rate": 0.00019512037918830282, |
|
"loss": 0.2279, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.0194768429396444, |
|
"grad_norm": 0.1671302169561386, |
|
"learning_rate": 0.0001950636163306802, |
|
"loss": 0.2181, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.0240063412977012, |
|
"grad_norm": 0.16400860249996185, |
|
"learning_rate": 0.0001950065335820178, |
|
"loss": 0.2333, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.0285358396557582, |
|
"grad_norm": 0.15259268879890442, |
|
"learning_rate": 0.00019494913113440022, |
|
"loss": 0.2307, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.033065338013815, |
|
"grad_norm": 0.1612786501646042, |
|
"learning_rate": 0.00019489140918098796, |
|
"loss": 0.2349, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.0375948363718719, |
|
"grad_norm": 0.15766066312789917, |
|
"learning_rate": 0.00019483336791601655, |
|
"loss": 0.23, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.0421243347299287, |
|
"grad_norm": 0.16044190526008606, |
|
"learning_rate": 0.00019477500753479603, |
|
"loss": 0.2234, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.0466538330879855, |
|
"grad_norm": 0.18357709050178528, |
|
"learning_rate": 0.00019471632823371028, |
|
"loss": 0.2208, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.0511833314460424, |
|
"grad_norm": 0.1702904850244522, |
|
"learning_rate": 0.00019465733021021645, |
|
"loss": 0.2248, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.0557128298040992, |
|
"grad_norm": 0.15621191263198853, |
|
"learning_rate": 0.00019459801366284403, |
|
"loss": 0.2286, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.060242328162156, |
|
"grad_norm": 0.1782391220331192, |
|
"learning_rate": 0.00019453837879119444, |
|
"loss": 0.2304, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.0647718265202128, |
|
"grad_norm": 0.16530479490756989, |
|
"learning_rate": 0.00019447842579594027, |
|
"loss": 0.2306, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.0693013248782697, |
|
"grad_norm": 0.16082873940467834, |
|
"learning_rate": 0.00019441815487882463, |
|
"loss": 0.2252, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.0738308232363265, |
|
"grad_norm": 0.15404802560806274, |
|
"learning_rate": 0.00019435756624266035, |
|
"loss": 0.216, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.0783603215943833, |
|
"grad_norm": 0.14842167496681213, |
|
"learning_rate": 0.00019429666009132944, |
|
"loss": 0.2218, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.0828898199524404, |
|
"grad_norm": 0.16312135756015778, |
|
"learning_rate": 0.00019423543662978245, |
|
"loss": 0.212, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.0874193183104972, |
|
"grad_norm": 0.17386338114738464, |
|
"learning_rate": 0.00019417389606403752, |
|
"loss": 0.2251, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.091948816668554, |
|
"grad_norm": 0.17737415432929993, |
|
"learning_rate": 0.00019411203860117995, |
|
"loss": 0.2304, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.0964783150266109, |
|
"grad_norm": 0.16693584620952606, |
|
"learning_rate": 0.00019404986444936136, |
|
"loss": 0.2175, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.1010078133846677, |
|
"grad_norm": 0.1775166392326355, |
|
"learning_rate": 0.00019398737381779913, |
|
"loss": 0.2209, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.1055373117427245, |
|
"grad_norm": 0.1629152148962021, |
|
"learning_rate": 0.00019392456691677546, |
|
"loss": 0.2113, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.1100668101007813, |
|
"grad_norm": 0.1428159475326538, |
|
"learning_rate": 0.0001938614439576369, |
|
"loss": 0.2141, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.1145963084588382, |
|
"grad_norm": 0.1580020934343338, |
|
"learning_rate": 0.0001937980051527935, |
|
"loss": 0.2193, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.119125806816895, |
|
"grad_norm": 0.13861976563930511, |
|
"learning_rate": 0.0001937342507157182, |
|
"loss": 0.2091, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.1236553051749518, |
|
"grad_norm": 0.16170430183410645, |
|
"learning_rate": 0.00019367018086094594, |
|
"loss": 0.2175, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.1281848035330087, |
|
"grad_norm": 0.15579678118228912, |
|
"learning_rate": 0.00019360579580407315, |
|
"loss": 0.2091, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.1327143018910655, |
|
"grad_norm": 0.15239351987838745, |
|
"learning_rate": 0.00019354109576175685, |
|
"loss": 0.2189, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.1372438002491223, |
|
"grad_norm": 0.16122813522815704, |
|
"learning_rate": 0.00019347608095171407, |
|
"loss": 0.2159, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.1417732986071791, |
|
"grad_norm": 0.14791563153266907, |
|
"learning_rate": 0.00019341075159272096, |
|
"loss": 0.2093, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.1463027969652362, |
|
"grad_norm": 0.138755664229393, |
|
"learning_rate": 0.0001933451079046122, |
|
"loss": 0.2231, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.150832295323293, |
|
"grad_norm": 0.15061049163341522, |
|
"learning_rate": 0.0001932791501082801, |
|
"loss": 0.2067, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.1553617936813498, |
|
"grad_norm": 0.17541393637657166, |
|
"learning_rate": 0.00019321287842567408, |
|
"loss": 0.2197, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.1598912920394067, |
|
"grad_norm": 0.17274054884910583, |
|
"learning_rate": 0.00019314629307979968, |
|
"loss": 0.2179, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.1644207903974635, |
|
"grad_norm": 0.16083642840385437, |
|
"learning_rate": 0.000193079394294718, |
|
"loss": 0.2139, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.1689502887555203, |
|
"grad_norm": 0.16815818846225739, |
|
"learning_rate": 0.00019301218229554482, |
|
"loss": 0.2158, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.1734797871135771, |
|
"grad_norm": 0.15939727425575256, |
|
"learning_rate": 0.0001929446573084499, |
|
"loss": 0.2139, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.178009285471634, |
|
"grad_norm": 0.14855942130088806, |
|
"learning_rate": 0.00019287681956065624, |
|
"loss": 0.2156, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.1825387838296908, |
|
"grad_norm": 0.16065727174282074, |
|
"learning_rate": 0.00019280866928043927, |
|
"loss": 0.2131, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.1870682821877476, |
|
"grad_norm": 0.15156914293766022, |
|
"learning_rate": 0.00019274020669712608, |
|
"loss": 0.2133, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.1915977805458047, |
|
"grad_norm": 0.15163294970989227, |
|
"learning_rate": 0.00019267143204109469, |
|
"loss": 0.2172, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.1961272789038615, |
|
"grad_norm": 0.14060626924037933, |
|
"learning_rate": 0.00019260234554377325, |
|
"loss": 0.2048, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.2006567772619183, |
|
"grad_norm": 0.16215626895427704, |
|
"learning_rate": 0.00019253294743763925, |
|
"loss": 0.2077, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.2051862756199752, |
|
"grad_norm": 0.13906173408031464, |
|
"learning_rate": 0.00019246323795621875, |
|
"loss": 0.2125, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.209715773978032, |
|
"grad_norm": 0.15761959552764893, |
|
"learning_rate": 0.0001923932173340856, |
|
"loss": 0.2104, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.2142452723360888, |
|
"grad_norm": 0.16140113770961761, |
|
"learning_rate": 0.00019232288580686068, |
|
"loss": 0.2131, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.2187747706941456, |
|
"grad_norm": 0.13611847162246704, |
|
"learning_rate": 0.000192252243611211, |
|
"loss": 0.2042, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.2233042690522025, |
|
"grad_norm": 0.14395853877067566, |
|
"learning_rate": 0.00019218129098484902, |
|
"loss": 0.2144, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.2278337674102593, |
|
"grad_norm": 0.14826107025146484, |
|
"learning_rate": 0.0001921100281665318, |
|
"loss": 0.2119, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.2323632657683161, |
|
"grad_norm": 0.1515769064426422, |
|
"learning_rate": 0.0001920384553960602, |
|
"loss": 0.2051, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.236892764126373, |
|
"grad_norm": 0.15898488461971283, |
|
"learning_rate": 0.00019196657291427807, |
|
"loss": 0.2127, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.2414222624844298, |
|
"grad_norm": 0.13833607733249664, |
|
"learning_rate": 0.00019189438096307146, |
|
"loss": 0.2097, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.2459517608424866, |
|
"grad_norm": 0.14516334235668182, |
|
"learning_rate": 0.0001918218797853678, |
|
"loss": 0.1958, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.2504812592005434, |
|
"grad_norm": 0.13684655725955963, |
|
"learning_rate": 0.00019174906962513504, |
|
"loss": 0.2196, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.2550107575586003, |
|
"grad_norm": 0.16645090281963348, |
|
"learning_rate": 0.00019167595072738084, |
|
"loss": 0.2095, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.2595402559166573, |
|
"grad_norm": 0.1568327695131302, |
|
"learning_rate": 0.00019160252333815187, |
|
"loss": 0.2046, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.2640697542747141, |
|
"grad_norm": 0.15349489450454712, |
|
"learning_rate": 0.00019152878770453279, |
|
"loss": 0.2124, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.268599252632771, |
|
"grad_norm": 0.16242361068725586, |
|
"learning_rate": 0.00019145474407464554, |
|
"loss": 0.2059, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.2731287509908278, |
|
"grad_norm": 0.15133287012577057, |
|
"learning_rate": 0.00019138039269764846, |
|
"loss": 0.2068, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.2776582493488846, |
|
"grad_norm": 0.1698140799999237, |
|
"learning_rate": 0.00019130573382373549, |
|
"loss": 0.2165, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.2821877477069414, |
|
"grad_norm": 0.16591964662075043, |
|
"learning_rate": 0.00019123076770413526, |
|
"loss": 0.2052, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.2867172460649983, |
|
"grad_norm": 0.14136140048503876, |
|
"learning_rate": 0.00019115549459111034, |
|
"loss": 0.1972, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.291246744423055, |
|
"grad_norm": 0.15886986255645752, |
|
"learning_rate": 0.0001910799147379563, |
|
"loss": 0.2178, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.295776242781112, |
|
"grad_norm": 0.143589586019516, |
|
"learning_rate": 0.00019100402839900097, |
|
"loss": 0.2139, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.300305741139169, |
|
"grad_norm": 0.16037988662719727, |
|
"learning_rate": 0.0001909278358296034, |
|
"loss": 0.2073, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.3048352394972258, |
|
"grad_norm": 0.1397211104631424, |
|
"learning_rate": 0.00019085133728615313, |
|
"loss": 0.2045, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.3093647378552826, |
|
"grad_norm": 0.1394536942243576, |
|
"learning_rate": 0.00019077453302606944, |
|
"loss": 0.194, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.3138942362133395, |
|
"grad_norm": 0.1598595380783081, |
|
"learning_rate": 0.00019069742330780014, |
|
"loss": 0.205, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.3184237345713963, |
|
"grad_norm": 0.16302059590816498, |
|
"learning_rate": 0.00019062000839082115, |
|
"loss": 0.2044, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.322953232929453, |
|
"grad_norm": 0.15237270295619965, |
|
"learning_rate": 0.0001905422885356352, |
|
"loss": 0.2061, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.32748273128751, |
|
"grad_norm": 0.16175110638141632, |
|
"learning_rate": 0.00019046426400377123, |
|
"loss": 0.2127, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.3320122296455668, |
|
"grad_norm": 0.17352445423603058, |
|
"learning_rate": 0.00019038593505778343, |
|
"loss": 0.2121, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.3365417280036236, |
|
"grad_norm": 0.15539845824241638, |
|
"learning_rate": 0.0001903073019612503, |
|
"loss": 0.1996, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.3410712263616804, |
|
"grad_norm": 0.1654234081506729, |
|
"learning_rate": 0.00019022836497877382, |
|
"loss": 0.1982, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.3456007247197372, |
|
"grad_norm": 0.15698087215423584, |
|
"learning_rate": 0.00019014912437597862, |
|
"loss": 0.2006, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.350130223077794, |
|
"grad_norm": 0.15171001851558685, |
|
"learning_rate": 0.00019006958041951094, |
|
"loss": 0.2066, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.354659721435851, |
|
"grad_norm": 0.15153132379055023, |
|
"learning_rate": 0.00018998973337703784, |
|
"loss": 0.1969, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.3591892197939077, |
|
"grad_norm": 0.14000695943832397, |
|
"learning_rate": 0.00018990958351724634, |
|
"loss": 0.2081, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.3637187181519645, |
|
"grad_norm": 0.14371009171009064, |
|
"learning_rate": 0.00018982913110984225, |
|
"loss": 0.1964, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.3682482165100216, |
|
"grad_norm": 0.1594901829957962, |
|
"learning_rate": 0.0001897483764255497, |
|
"loss": 0.2004, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.3727777148680784, |
|
"grad_norm": 0.15266938507556915, |
|
"learning_rate": 0.00018966731973610985, |
|
"loss": 0.2081, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.3773072132261353, |
|
"grad_norm": 0.17764367163181305, |
|
"learning_rate": 0.0001895859613142801, |
|
"loss": 0.2028, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.381836711584192, |
|
"grad_norm": 0.15086011588573456, |
|
"learning_rate": 0.0001895043014338333, |
|
"loss": 0.1984, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.386366209942249, |
|
"grad_norm": 0.1648501455783844, |
|
"learning_rate": 0.00018942234036955659, |
|
"loss": 0.2019, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.3908957083003057, |
|
"grad_norm": 0.1467510610818863, |
|
"learning_rate": 0.00018934007839725063, |
|
"loss": 0.1972, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.3954252066583626, |
|
"grad_norm": 0.17046092450618744, |
|
"learning_rate": 0.0001892575157937287, |
|
"loss": 0.2053, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.3999547050164194, |
|
"grad_norm": 0.14983297884464264, |
|
"learning_rate": 0.0001891746528368157, |
|
"loss": 0.1986, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.4044842033744762, |
|
"grad_norm": 0.16196715831756592, |
|
"learning_rate": 0.00018909148980534712, |
|
"loss": 0.1982, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.409013701732533, |
|
"grad_norm": 0.15527282655239105, |
|
"learning_rate": 0.00018900802697916836, |
|
"loss": 0.2028, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.41354320009059, |
|
"grad_norm": 0.1645379364490509, |
|
"learning_rate": 0.0001889242646391335, |
|
"loss": 0.1939, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.418072698448647, |
|
"grad_norm": 0.1684643030166626, |
|
"learning_rate": 0.0001888402030671046, |
|
"loss": 0.1931, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.4226021968067037, |
|
"grad_norm": 0.15814268589019775, |
|
"learning_rate": 0.00018875584254595055, |
|
"loss": 0.1951, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.4271316951647606, |
|
"grad_norm": 0.1520155966281891, |
|
"learning_rate": 0.00018867118335954625, |
|
"loss": 0.1886, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.4316611935228174, |
|
"grad_norm": 0.16438494622707367, |
|
"learning_rate": 0.0001885862257927717, |
|
"loss": 0.2015, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.4361906918808742, |
|
"grad_norm": 0.15568524599075317, |
|
"learning_rate": 0.00018850097013151077, |
|
"loss": 0.1898, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.440720190238931, |
|
"grad_norm": 0.15463340282440186, |
|
"learning_rate": 0.00018841541666265058, |
|
"loss": 0.1988, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.4452496885969879, |
|
"grad_norm": 0.14274995028972626, |
|
"learning_rate": 0.00018832956567408032, |
|
"loss": 0.1884, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.4497791869550447, |
|
"grad_norm": 0.17546044290065765, |
|
"learning_rate": 0.00018824341745469033, |
|
"loss": 0.1959, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.4543086853131015, |
|
"grad_norm": 0.14111734926700592, |
|
"learning_rate": 0.0001881569722943712, |
|
"loss": 0.1929, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.4588381836711584, |
|
"grad_norm": 0.1645372211933136, |
|
"learning_rate": 0.00018807023048401263, |
|
"loss": 0.1913, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.4633676820292152, |
|
"grad_norm": 0.16762864589691162, |
|
"learning_rate": 0.00018798319231550265, |
|
"loss": 0.1876, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.467897180387272, |
|
"grad_norm": 0.14765408635139465, |
|
"learning_rate": 0.00018789585808172649, |
|
"loss": 0.1935, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.4724266787453288, |
|
"grad_norm": 0.16272325813770294, |
|
"learning_rate": 0.0001878082280765656, |
|
"loss": 0.199, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.4769561771033857, |
|
"grad_norm": 0.14496152102947235, |
|
"learning_rate": 0.0001877203025948969, |
|
"loss": 0.1987, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.4814856754614427, |
|
"grad_norm": 0.1556200087070465, |
|
"learning_rate": 0.00018763208193259132, |
|
"loss": 0.1938, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.4860151738194995, |
|
"grad_norm": 0.14785943925380707, |
|
"learning_rate": 0.00018754356638651332, |
|
"loss": 0.1905, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.4905446721775564, |
|
"grad_norm": 0.14636161923408508, |
|
"learning_rate": 0.00018745475625451947, |
|
"loss": 0.1928, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.4950741705356132, |
|
"grad_norm": 0.16059593856334686, |
|
"learning_rate": 0.00018736565183545773, |
|
"loss": 0.1967, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.49960366889367, |
|
"grad_norm": 0.15864983201026917, |
|
"learning_rate": 0.00018727625342916633, |
|
"loss": 0.1984, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.5041331672517269, |
|
"grad_norm": 0.14578469097614288, |
|
"learning_rate": 0.00018718656133647277, |
|
"loss": 0.1848, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.5086626656097837, |
|
"grad_norm": 0.16975462436676025, |
|
"learning_rate": 0.00018709657585919275, |
|
"loss": 0.1914, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.5131921639678405, |
|
"grad_norm": 0.14356206357479095, |
|
"learning_rate": 0.00018700629730012934, |
|
"loss": 0.1978, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.5177216623258976, |
|
"grad_norm": 0.14980971813201904, |
|
"learning_rate": 0.00018691572596307173, |
|
"loss": 0.1993, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.5222511606839544, |
|
"grad_norm": 0.1422482430934906, |
|
"learning_rate": 0.00018682486215279435, |
|
"loss": 0.187, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.5267806590420112, |
|
"grad_norm": 0.1586323380470276, |
|
"learning_rate": 0.00018673370617505576, |
|
"loss": 0.1843, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.531310157400068, |
|
"grad_norm": 0.1464434564113617, |
|
"learning_rate": 0.00018664225833659777, |
|
"loss": 0.1973, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.5358396557581249, |
|
"grad_norm": 0.16265639662742615, |
|
"learning_rate": 0.00018655051894514424, |
|
"loss": 0.1873, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.5403691541161817, |
|
"grad_norm": 0.13967713713645935, |
|
"learning_rate": 0.00018645848830940013, |
|
"loss": 0.1834, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.5448986524742385, |
|
"grad_norm": 0.12256325781345367, |
|
"learning_rate": 0.0001863661667390504, |
|
"loss": 0.1849, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.5494281508322953, |
|
"grad_norm": 0.14708378911018372, |
|
"learning_rate": 0.00018627355454475908, |
|
"loss": 0.1921, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.5539576491903522, |
|
"grad_norm": 0.14427697658538818, |
|
"learning_rate": 0.00018618065203816812, |
|
"loss": 0.1863, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.558487147548409, |
|
"grad_norm": 0.1333187371492386, |
|
"learning_rate": 0.0001860874595318964, |
|
"loss": 0.1927, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.5630166459064658, |
|
"grad_norm": 0.15604457259178162, |
|
"learning_rate": 0.00018599397733953858, |
|
"loss": 0.1841, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.5675461442645227, |
|
"grad_norm": 0.147917240858078, |
|
"learning_rate": 0.00018590020577566424, |
|
"loss": 0.1886, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.5720756426225795, |
|
"grad_norm": 0.14821654558181763, |
|
"learning_rate": 0.0001858061451558166, |
|
"loss": 0.1833, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.5766051409806363, |
|
"grad_norm": 0.12086760997772217, |
|
"learning_rate": 0.00018571179579651159, |
|
"loss": 0.1918, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.5811346393386931, |
|
"grad_norm": 0.16424959897994995, |
|
"learning_rate": 0.0001856171580152368, |
|
"loss": 0.1792, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.58566413769675, |
|
"grad_norm": 0.14219975471496582, |
|
"learning_rate": 0.00018552223213045028, |
|
"loss": 0.1946, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.5901936360548068, |
|
"grad_norm": 0.1768968552350998, |
|
"learning_rate": 0.00018542701846157962, |
|
"loss": 0.1843, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.5947231344128636, |
|
"grad_norm": 0.12454737722873688, |
|
"learning_rate": 0.0001853315173290208, |
|
"loss": 0.1836, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.5992526327709207, |
|
"grad_norm": 0.14064136147499084, |
|
"learning_rate": 0.00018523572905413709, |
|
"loss": 0.1841, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.6037821311289775, |
|
"grad_norm": 0.15816141664981842, |
|
"learning_rate": 0.00018513965395925802, |
|
"loss": 0.1882, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.6083116294870343, |
|
"grad_norm": 0.15514902770519257, |
|
"learning_rate": 0.00018504329236767832, |
|
"loss": 0.1881, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.6128411278450911, |
|
"grad_norm": 0.15803417563438416, |
|
"learning_rate": 0.00018494664460365668, |
|
"loss": 0.1859, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.617370626203148, |
|
"grad_norm": 0.12781353294849396, |
|
"learning_rate": 0.00018484971099241485, |
|
"loss": 0.1832, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.6219001245612048, |
|
"grad_norm": 0.16309882700443268, |
|
"learning_rate": 0.0001847524918601365, |
|
"loss": 0.1962, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.6264296229192619, |
|
"grad_norm": 0.12590362131595612, |
|
"learning_rate": 0.00018465498753396595, |
|
"loss": 0.1928, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.6309591212773187, |
|
"grad_norm": 0.1451760232448578, |
|
"learning_rate": 0.00018455719834200728, |
|
"loss": 0.1837, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.6354886196353755, |
|
"grad_norm": 0.14908108115196228, |
|
"learning_rate": 0.0001844591246133232, |
|
"loss": 0.1866, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.6400181179934323, |
|
"grad_norm": 0.13437342643737793, |
|
"learning_rate": 0.00018436076667793382, |
|
"loss": 0.1886, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.6445476163514892, |
|
"grad_norm": 0.13465970754623413, |
|
"learning_rate": 0.00018426212486681562, |
|
"loss": 0.183, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.649077114709546, |
|
"grad_norm": 0.13650234043598175, |
|
"learning_rate": 0.00018416319951190032, |
|
"loss": 0.177, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.6536066130676028, |
|
"grad_norm": 0.1663140207529068, |
|
"learning_rate": 0.00018406399094607386, |
|
"loss": 0.187, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.6581361114256596, |
|
"grad_norm": 0.16565509140491486, |
|
"learning_rate": 0.00018396449950317504, |
|
"loss": 0.1837, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.6626656097837165, |
|
"grad_norm": 0.18802668154239655, |
|
"learning_rate": 0.0001838647255179947, |
|
"loss": 0.1814, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.6671951081417733, |
|
"grad_norm": 0.17005442082881927, |
|
"learning_rate": 0.0001837646693262743, |
|
"loss": 0.1871, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.6717246064998301, |
|
"grad_norm": 0.14796973764896393, |
|
"learning_rate": 0.00018366433126470506, |
|
"loss": 0.1781, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.676254104857887, |
|
"grad_norm": 0.1405303180217743, |
|
"learning_rate": 0.0001835637116709266, |
|
"loss": 0.1792, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.6807836032159438, |
|
"grad_norm": 0.1343483328819275, |
|
"learning_rate": 0.00018346281088352592, |
|
"loss": 0.1807, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.6853131015740006, |
|
"grad_norm": 0.14635176956653595, |
|
"learning_rate": 0.00018336162924203632, |
|
"loss": 0.176, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.6898425999320574, |
|
"grad_norm": 0.13452979922294617, |
|
"learning_rate": 0.0001832601670869361, |
|
"loss": 0.1822, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.6943720982901143, |
|
"grad_norm": 0.14736182987689972, |
|
"learning_rate": 0.00018315842475964748, |
|
"loss": 0.1828, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.698901596648171, |
|
"grad_norm": 0.13288873434066772, |
|
"learning_rate": 0.00018305640260253553, |
|
"loss": 0.1749, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.703431095006228, |
|
"grad_norm": 0.146206796169281, |
|
"learning_rate": 0.00018295410095890696, |
|
"loss": 0.191, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.7079605933642847, |
|
"grad_norm": 0.13878855109214783, |
|
"learning_rate": 0.00018285152017300885, |
|
"loss": 0.1827, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.7124900917223418, |
|
"grad_norm": 0.14912264049053192, |
|
"learning_rate": 0.00018275895908433733, |
|
"loss": 0.173, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.7170195900803986, |
|
"grad_norm": 0.14632469415664673, |
|
"learning_rate": 0.0001826558488798913, |
|
"loss": 0.1776, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.7215490884384554, |
|
"grad_norm": 0.14830105006694794, |
|
"learning_rate": 0.0001825524605368002, |
|
"loss": 0.1762, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.7260785867965123, |
|
"grad_norm": 0.15307176113128662, |
|
"learning_rate": 0.00018244879440296793, |
|
"loss": 0.1753, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.730608085154569, |
|
"grad_norm": 0.15168583393096924, |
|
"learning_rate": 0.0001823448508272332, |
|
"loss": 0.1774, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.735137583512626, |
|
"grad_norm": 0.14207693934440613, |
|
"learning_rate": 0.0001822406301593683, |
|
"loss": 0.1765, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.739667081870683, |
|
"grad_norm": 0.15022936463356018, |
|
"learning_rate": 0.0001821361327500779, |
|
"loss": 0.1852, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.7441965802287398, |
|
"grad_norm": 0.14267757534980774, |
|
"learning_rate": 0.00018203135895099797, |
|
"loss": 0.1788, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.7487260785867966, |
|
"grad_norm": 0.13068848848342896, |
|
"learning_rate": 0.00018192630911469454, |
|
"loss": 0.1834, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.7532555769448535, |
|
"grad_norm": 0.13527341187000275, |
|
"learning_rate": 0.00018182098359466244, |
|
"loss": 0.1878, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.7577850753029103, |
|
"grad_norm": 0.14090019464492798, |
|
"learning_rate": 0.00018171538274532428, |
|
"loss": 0.1825, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.762314573660967, |
|
"grad_norm": 0.16419830918312073, |
|
"learning_rate": 0.00018160950692202907, |
|
"loss": 0.1735, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.766844072019024, |
|
"grad_norm": 0.13737310469150543, |
|
"learning_rate": 0.00018150335648105118, |
|
"loss": 0.1798, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.7713735703770808, |
|
"grad_norm": 0.13491977751255035, |
|
"learning_rate": 0.00018139693177958902, |
|
"loss": 0.1814, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.7759030687351376, |
|
"grad_norm": 0.13069839775562286, |
|
"learning_rate": 0.0001812902331757639, |
|
"loss": 0.1795, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.7804325670931944, |
|
"grad_norm": 0.14693836867809296, |
|
"learning_rate": 0.0001811832610286189, |
|
"loss": 0.1798, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.7849620654512512, |
|
"grad_norm": 0.15298062562942505, |
|
"learning_rate": 0.00018107601569811746, |
|
"loss": 0.1717, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.789491563809308, |
|
"grad_norm": 0.1533603072166443, |
|
"learning_rate": 0.0001809684975451423, |
|
"loss": 0.1825, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.794021062167365, |
|
"grad_norm": 0.15522614121437073, |
|
"learning_rate": 0.00018086070693149435, |
|
"loss": 0.1843, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.7985505605254217, |
|
"grad_norm": 0.12531672418117523, |
|
"learning_rate": 0.00018075264421989117, |
|
"loss": 0.1839, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.8030800588834786, |
|
"grad_norm": 0.1647823303937912, |
|
"learning_rate": 0.00018064430977396607, |
|
"loss": 0.1842, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.8076095572415354, |
|
"grad_norm": 0.14417417347431183, |
|
"learning_rate": 0.00018053570395826666, |
|
"loss": 0.17, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.8121390555995922, |
|
"grad_norm": 0.14394541084766388, |
|
"learning_rate": 0.00018042682713825377, |
|
"loss": 0.181, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.816668553957649, |
|
"grad_norm": 0.13082464039325714, |
|
"learning_rate": 0.0001803176796803002, |
|
"loss": 0.1759, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.821198052315706, |
|
"grad_norm": 0.13551370799541473, |
|
"learning_rate": 0.00018020826195168938, |
|
"loss": 0.1737, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.825727550673763, |
|
"grad_norm": 0.16460978984832764, |
|
"learning_rate": 0.00018009857432061424, |
|
"loss": 0.1788, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.8302570490318197, |
|
"grad_norm": 0.1246340349316597, |
|
"learning_rate": 0.00017998861715617595, |
|
"loss": 0.1648, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.8347865473898766, |
|
"grad_norm": 0.14473074674606323, |
|
"learning_rate": 0.00017987839082838264, |
|
"loss": 0.1683, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.8393160457479334, |
|
"grad_norm": 0.13617068529129028, |
|
"learning_rate": 0.00017976789570814812, |
|
"loss": 0.1731, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.8438455441059902, |
|
"grad_norm": 0.1399005949497223, |
|
"learning_rate": 0.00017965713216729084, |
|
"loss": 0.1726, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.8483750424640473, |
|
"grad_norm": 0.15167087316513062, |
|
"learning_rate": 0.00017954610057853242, |
|
"loss": 0.1769, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.852904540822104, |
|
"grad_norm": 0.1486155092716217, |
|
"learning_rate": 0.00017943480131549637, |
|
"loss": 0.1735, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.857434039180161, |
|
"grad_norm": 0.12672476470470428, |
|
"learning_rate": 0.00017932323475270713, |
|
"loss": 0.1692, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.8619635375382178, |
|
"grad_norm": 0.12943005561828613, |
|
"learning_rate": 0.0001792114012655884, |
|
"loss": 0.1736, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.8664930358962746, |
|
"grad_norm": 0.1305234730243683, |
|
"learning_rate": 0.00017909930123046226, |
|
"loss": 0.1693, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.8710225342543314, |
|
"grad_norm": 0.15078797936439514, |
|
"learning_rate": 0.00017898693502454757, |
|
"loss": 0.1714, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.8755520326123882, |
|
"grad_norm": 0.13605743646621704, |
|
"learning_rate": 0.00017887430302595902, |
|
"loss": 0.1742, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.880081530970445, |
|
"grad_norm": 0.15072084963321686, |
|
"learning_rate": 0.0001787614056137056, |
|
"loss": 0.1761, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.8846110293285019, |
|
"grad_norm": 0.12788626551628113, |
|
"learning_rate": 0.0001786482431676894, |
|
"loss": 0.1698, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.8891405276865587, |
|
"grad_norm": 0.11104808747768402, |
|
"learning_rate": 0.00017853481606870447, |
|
"loss": 0.1673, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.8936700260446155, |
|
"grad_norm": 0.15082287788391113, |
|
"learning_rate": 0.00017842112469843526, |
|
"loss": 0.1771, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.8981995244026724, |
|
"grad_norm": 0.13250093162059784, |
|
"learning_rate": 0.00017830716943945566, |
|
"loss": 0.1693, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.9027290227607292, |
|
"grad_norm": 0.1345834881067276, |
|
"learning_rate": 0.00017819295067522746, |
|
"loss": 0.1657, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.907258521118786, |
|
"grad_norm": 0.12472589313983917, |
|
"learning_rate": 0.00017807846879009916, |
|
"loss": 0.1673, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 1.9117880194768428, |
|
"grad_norm": 0.14480777084827423, |
|
"learning_rate": 0.00017796372416930466, |
|
"loss": 0.1617, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.9163175178348997, |
|
"grad_norm": 0.12188120186328888, |
|
"learning_rate": 0.00017784871719896207, |
|
"loss": 0.1697, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.9208470161929565, |
|
"grad_norm": 0.13561968505382538, |
|
"learning_rate": 0.0001777334482660721, |
|
"loss": 0.1675, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.9253765145510133, |
|
"grad_norm": 0.1565788984298706, |
|
"learning_rate": 0.0001776179177585172, |
|
"loss": 0.1695, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.9299060129090702, |
|
"grad_norm": 0.13274118304252625, |
|
"learning_rate": 0.00017750212606505988, |
|
"loss": 0.173, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.9344355112671272, |
|
"grad_norm": 0.13509687781333923, |
|
"learning_rate": 0.0001773860735753416, |
|
"loss": 0.1711, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.938965009625184, |
|
"grad_norm": 0.14307665824890137, |
|
"learning_rate": 0.0001772697606798814, |
|
"loss": 0.1752, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.9434945079832409, |
|
"grad_norm": 0.14142999053001404, |
|
"learning_rate": 0.0001771531877700746, |
|
"loss": 0.1746, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.9480240063412977, |
|
"grad_norm": 0.13015881180763245, |
|
"learning_rate": 0.0001770363552381914, |
|
"loss": 0.1624, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.9525535046993545, |
|
"grad_norm": 0.15056206285953522, |
|
"learning_rate": 0.00017691926347737573, |
|
"loss": 0.1683, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.9570830030574113, |
|
"grad_norm": 0.1449085772037506, |
|
"learning_rate": 0.00017680191288164382, |
|
"loss": 0.1652, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.9616125014154684, |
|
"grad_norm": 0.13363459706306458, |
|
"learning_rate": 0.00017668430384588278, |
|
"loss": 0.1755, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.9661419997735252, |
|
"grad_norm": 0.11182225495576859, |
|
"learning_rate": 0.00017656643676584955, |
|
"loss": 0.1649, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.970671498131582, |
|
"grad_norm": 0.1344953030347824, |
|
"learning_rate": 0.00017644831203816926, |
|
"loss": 0.1699, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.9752009964896389, |
|
"grad_norm": 0.14654122292995453, |
|
"learning_rate": 0.000176329930060334, |
|
"loss": 0.1646, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.9797304948476957, |
|
"grad_norm": 0.12001664191484451, |
|
"learning_rate": 0.00017621129123070167, |
|
"loss": 0.1732, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.9842599932057525, |
|
"grad_norm": 0.12289103865623474, |
|
"learning_rate": 0.00017609239594849435, |
|
"loss": 0.1665, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.9887894915638094, |
|
"grad_norm": 0.15383568406105042, |
|
"learning_rate": 0.00017597324461379716, |
|
"loss": 0.1668, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.9933189899218662, |
|
"grad_norm": 0.11333877593278885, |
|
"learning_rate": 0.0001758538376275568, |
|
"loss": 0.1699, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.997848488279923, |
|
"grad_norm": 0.13718217611312866, |
|
"learning_rate": 0.00017573417539158017, |
|
"loss": 0.1674, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.9996602876231457, |
|
"eval_loss": 0.17693181335926056, |
|
"eval_runtime": 617.1958, |
|
"eval_samples_per_second": 12.751, |
|
"eval_steps_per_second": 1.594, |
|
"step": 4414 |
|
}, |
|
{ |
|
"epoch": 2.002717699014834, |
|
"grad_norm": 0.12558519840240479, |
|
"learning_rate": 0.0001756142583085333, |
|
"loss": 0.1601, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.007247197372891, |
|
"grad_norm": 0.171942800283432, |
|
"learning_rate": 0.00017549408678193962, |
|
"loss": 0.1325, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 2.0117766957309478, |
|
"grad_norm": 0.12557823956012726, |
|
"learning_rate": 0.0001753736612161788, |
|
"loss": 0.1337, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.0163061940890046, |
|
"grad_norm": 0.1112385243177414, |
|
"learning_rate": 0.00017525298201648534, |
|
"loss": 0.1353, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.0208356924470614, |
|
"grad_norm": 0.10396666824817657, |
|
"learning_rate": 0.00017513204958894728, |
|
"loss": 0.1344, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.0253651908051182, |
|
"grad_norm": 0.11958423256874084, |
|
"learning_rate": 0.0001750108643405047, |
|
"loss": 0.1325, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 2.029894689163175, |
|
"grad_norm": 0.13883349299430847, |
|
"learning_rate": 0.00017488942667894856, |
|
"loss": 0.1308, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.034424187521232, |
|
"grad_norm": 0.12778469920158386, |
|
"learning_rate": 0.00017476773701291905, |
|
"loss": 0.1285, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 2.0389536858792887, |
|
"grad_norm": 0.12921588122844696, |
|
"learning_rate": 0.00017464579575190444, |
|
"loss": 0.1286, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.0434831842373455, |
|
"grad_norm": 0.14378762245178223, |
|
"learning_rate": 0.00017452360330623957, |
|
"loss": 0.1389, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 2.0480126825954024, |
|
"grad_norm": 0.13812440633773804, |
|
"learning_rate": 0.00017440116008710457, |
|
"loss": 0.1342, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 2.052542180953459, |
|
"grad_norm": 0.15414589643478394, |
|
"learning_rate": 0.00017427846650652342, |
|
"loss": 0.1381, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 2.0570716793115165, |
|
"grad_norm": 0.11771693825721741, |
|
"learning_rate": 0.00017415552297736256, |
|
"loss": 0.1344, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.0616011776695733, |
|
"grad_norm": 0.13729040324687958, |
|
"learning_rate": 0.00017403232991332953, |
|
"loss": 0.1323, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.06613067602763, |
|
"grad_norm": 0.11777821183204651, |
|
"learning_rate": 0.00017390888772897148, |
|
"loss": 0.1354, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.070660174385687, |
|
"grad_norm": 0.11759165674448013, |
|
"learning_rate": 0.00017378519683967399, |
|
"loss": 0.1359, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 2.0751896727437438, |
|
"grad_norm": 0.14665256440639496, |
|
"learning_rate": 0.00017366125766165943, |
|
"loss": 0.1295, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.0797191711018006, |
|
"grad_norm": 0.12388816475868225, |
|
"learning_rate": 0.00017353707061198574, |
|
"loss": 0.1366, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 2.0842486694598574, |
|
"grad_norm": 0.12518715858459473, |
|
"learning_rate": 0.00017341263610854487, |
|
"loss": 0.1372, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.0887781678179143, |
|
"grad_norm": 0.1429567039012909, |
|
"learning_rate": 0.00017328795457006153, |
|
"loss": 0.1326, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 2.093307666175971, |
|
"grad_norm": 0.11989770084619522, |
|
"learning_rate": 0.00017316302641609167, |
|
"loss": 0.134, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.097837164534028, |
|
"grad_norm": 0.11995401233434677, |
|
"learning_rate": 0.00017303785206702115, |
|
"loss": 0.136, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 2.1023666628920847, |
|
"grad_norm": 0.11321832239627838, |
|
"learning_rate": 0.0001729124319440642, |
|
"loss": 0.1371, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.1068961612501416, |
|
"grad_norm": 0.11317916214466095, |
|
"learning_rate": 0.00017278676646926219, |
|
"loss": 0.1303, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.1114256596081984, |
|
"grad_norm": 0.11971450597047806, |
|
"learning_rate": 0.00017266085606548197, |
|
"loss": 0.1363, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.115955157966255, |
|
"grad_norm": 0.12779143452644348, |
|
"learning_rate": 0.00017253470115641473, |
|
"loss": 0.1395, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 2.120484656324312, |
|
"grad_norm": 0.12094374746084213, |
|
"learning_rate": 0.00017240830216657432, |
|
"loss": 0.1337, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 2.125014154682369, |
|
"grad_norm": 0.11902227252721786, |
|
"learning_rate": 0.00017228165952129601, |
|
"loss": 0.1342, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 2.1295436530404257, |
|
"grad_norm": 0.12663759291172028, |
|
"learning_rate": 0.00017215477364673486, |
|
"loss": 0.1356, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.1340731513984825, |
|
"grad_norm": 0.12311159074306488, |
|
"learning_rate": 0.0001720276449698645, |
|
"loss": 0.1364, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 2.1386026497565394, |
|
"grad_norm": 0.134132981300354, |
|
"learning_rate": 0.00017190027391847555, |
|
"loss": 0.1352, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 2.143132148114596, |
|
"grad_norm": 0.1177242249250412, |
|
"learning_rate": 0.00017177266092117428, |
|
"loss": 0.132, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 2.147661646472653, |
|
"grad_norm": 0.11641071736812592, |
|
"learning_rate": 0.00017164480640738101, |
|
"loss": 0.1359, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 2.15219114483071, |
|
"grad_norm": 0.1303935945034027, |
|
"learning_rate": 0.00017151671080732888, |
|
"loss": 0.1354, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.1567206431887667, |
|
"grad_norm": 0.13929632306098938, |
|
"learning_rate": 0.0001713883745520622, |
|
"loss": 0.1303, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 2.1612501415468235, |
|
"grad_norm": 0.13775485754013062, |
|
"learning_rate": 0.00017125979807343519, |
|
"loss": 0.1379, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 2.1657796399048808, |
|
"grad_norm": 0.10667065531015396, |
|
"learning_rate": 0.00017113098180411026, |
|
"loss": 0.1323, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 2.1703091382629376, |
|
"grad_norm": 0.12592215836048126, |
|
"learning_rate": 0.00017100192617755693, |
|
"loss": 0.1326, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 2.1748386366209944, |
|
"grad_norm": 0.12523461878299713, |
|
"learning_rate": 0.00017087263162805, |
|
"loss": 0.1361, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.1793681349790512, |
|
"grad_norm": 0.13614587485790253, |
|
"learning_rate": 0.00017074309859066837, |
|
"loss": 0.136, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 2.183897633337108, |
|
"grad_norm": 0.13419945538043976, |
|
"learning_rate": 0.00017061332750129332, |
|
"loss": 0.1299, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 2.188427131695165, |
|
"grad_norm": 0.10393204540014267, |
|
"learning_rate": 0.00017048331879660733, |
|
"loss": 0.1334, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 2.1929566300532217, |
|
"grad_norm": 0.12654437124729156, |
|
"learning_rate": 0.00017035307291409234, |
|
"loss": 0.138, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 2.1974861284112785, |
|
"grad_norm": 0.12029164284467697, |
|
"learning_rate": 0.00017022259029202843, |
|
"loss": 0.1329, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.2020156267693354, |
|
"grad_norm": 0.1427529752254486, |
|
"learning_rate": 0.00017009187136949238, |
|
"loss": 0.1314, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 2.206545125127392, |
|
"grad_norm": 0.10956190526485443, |
|
"learning_rate": 0.00016996091658635603, |
|
"loss": 0.1324, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 2.211074623485449, |
|
"grad_norm": 0.12758436799049377, |
|
"learning_rate": 0.00016982972638328496, |
|
"loss": 0.1326, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 2.215604121843506, |
|
"grad_norm": 0.10729292035102844, |
|
"learning_rate": 0.00016969830120173692, |
|
"loss": 0.1317, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 2.2201336202015627, |
|
"grad_norm": 0.14230488240718842, |
|
"learning_rate": 0.0001695666414839604, |
|
"loss": 0.1387, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.2246631185596195, |
|
"grad_norm": 0.13682898879051208, |
|
"learning_rate": 0.00016943474767299298, |
|
"loss": 0.1341, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 2.2291926169176763, |
|
"grad_norm": 0.14022116363048553, |
|
"learning_rate": 0.0001693026202126602, |
|
"loss": 0.1345, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 2.233722115275733, |
|
"grad_norm": 0.12787717580795288, |
|
"learning_rate": 0.00016917025954757365, |
|
"loss": 0.138, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 2.23825161363379, |
|
"grad_norm": 0.12592186033725739, |
|
"learning_rate": 0.00016903766612312967, |
|
"loss": 0.135, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 2.242781111991847, |
|
"grad_norm": 0.12485472112894058, |
|
"learning_rate": 0.00016890484038550792, |
|
"loss": 0.1305, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.2473106103499036, |
|
"grad_norm": 0.12487582862377167, |
|
"learning_rate": 0.0001687717827816698, |
|
"loss": 0.1352, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 2.2518401087079605, |
|
"grad_norm": 0.1367800235748291, |
|
"learning_rate": 0.0001686384937593568, |
|
"loss": 0.1377, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 2.2563696070660173, |
|
"grad_norm": 0.12008614093065262, |
|
"learning_rate": 0.00016850497376708935, |
|
"loss": 0.1399, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 2.260899105424074, |
|
"grad_norm": 0.1453281044960022, |
|
"learning_rate": 0.00016837122325416494, |
|
"loss": 0.134, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 2.265428603782131, |
|
"grad_norm": 0.1182338148355484, |
|
"learning_rate": 0.00016823724267065683, |
|
"loss": 0.1386, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.269958102140188, |
|
"grad_norm": 0.1372307538986206, |
|
"learning_rate": 0.00016810303246741245, |
|
"loss": 0.1336, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 2.2744876004982446, |
|
"grad_norm": 0.1213153526186943, |
|
"learning_rate": 0.00016796859309605195, |
|
"loss": 0.1345, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 2.2790170988563014, |
|
"grad_norm": 0.12057512998580933, |
|
"learning_rate": 0.00016783392500896652, |
|
"loss": 0.1324, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 2.2835465972143583, |
|
"grad_norm": 0.13681593537330627, |
|
"learning_rate": 0.00016769902865931718, |
|
"loss": 0.1377, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 2.2880760955724155, |
|
"grad_norm": 0.12073809653520584, |
|
"learning_rate": 0.00016756390450103285, |
|
"loss": 0.1358, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.2926055939304724, |
|
"grad_norm": 0.1260959357023239, |
|
"learning_rate": 0.00016742855298880916, |
|
"loss": 0.1327, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 2.297135092288529, |
|
"grad_norm": 0.12705475091934204, |
|
"learning_rate": 0.0001672929745781068, |
|
"loss": 0.1326, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 2.301664590646586, |
|
"grad_norm": 0.12451212108135223, |
|
"learning_rate": 0.00016715716972514984, |
|
"loss": 0.1357, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 2.306194089004643, |
|
"grad_norm": 0.10446886718273163, |
|
"learning_rate": 0.00016702113888692448, |
|
"loss": 0.1346, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 2.3107235873626997, |
|
"grad_norm": 0.1240820363163948, |
|
"learning_rate": 0.0001668848825211773, |
|
"loss": 0.1376, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.3152530857207565, |
|
"grad_norm": 0.11466921865940094, |
|
"learning_rate": 0.00016674840108641382, |
|
"loss": 0.1347, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 2.3197825840788133, |
|
"grad_norm": 0.12086183577775955, |
|
"learning_rate": 0.00016661169504189686, |
|
"loss": 0.1392, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 2.32431208243687, |
|
"grad_norm": 0.12020442634820938, |
|
"learning_rate": 0.0001664747648476451, |
|
"loss": 0.1326, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 2.328841580794927, |
|
"grad_norm": 0.1300458312034607, |
|
"learning_rate": 0.0001663376109644315, |
|
"loss": 0.1382, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 2.333371079152984, |
|
"grad_norm": 0.11588041484355927, |
|
"learning_rate": 0.00016620023385378172, |
|
"loss": 0.1348, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.3379005775110406, |
|
"grad_norm": 0.11398044973611832, |
|
"learning_rate": 0.0001660626339779726, |
|
"loss": 0.1335, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 2.3424300758690975, |
|
"grad_norm": 0.10993365198373795, |
|
"learning_rate": 0.0001659248118000305, |
|
"loss": 0.1314, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 2.3469595742271543, |
|
"grad_norm": 0.11220837384462357, |
|
"learning_rate": 0.00016578676778373, |
|
"loss": 0.1376, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 2.351489072585211, |
|
"grad_norm": 0.12188950926065445, |
|
"learning_rate": 0.000165648502393592, |
|
"loss": 0.1371, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 2.356018570943268, |
|
"grad_norm": 0.11867307126522064, |
|
"learning_rate": 0.00016551001609488246, |
|
"loss": 0.1335, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.3605480693013248, |
|
"grad_norm": 0.14046625792980194, |
|
"learning_rate": 0.00016537130935361064, |
|
"loss": 0.1392, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 2.3650775676593816, |
|
"grad_norm": 0.11454641073942184, |
|
"learning_rate": 0.00016523238263652757, |
|
"loss": 0.139, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 2.3696070660174384, |
|
"grad_norm": 0.1256382018327713, |
|
"learning_rate": 0.00016509323641112456, |
|
"loss": 0.1366, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 2.3741365643754953, |
|
"grad_norm": 0.11187759041786194, |
|
"learning_rate": 0.00016495387114563153, |
|
"loss": 0.1338, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 2.378666062733552, |
|
"grad_norm": 0.14559686183929443, |
|
"learning_rate": 0.0001648142873090155, |
|
"loss": 0.136, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.3831955610916093, |
|
"grad_norm": 0.12695267796516418, |
|
"learning_rate": 0.00016467448537097894, |
|
"loss": 0.1365, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 2.387725059449666, |
|
"grad_norm": 0.1341744363307953, |
|
"learning_rate": 0.0001645344658019583, |
|
"loss": 0.1354, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 2.392254557807723, |
|
"grad_norm": 0.12615807354450226, |
|
"learning_rate": 0.0001643942290731223, |
|
"loss": 0.1317, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 2.39678405616578, |
|
"grad_norm": 0.1132565289735794, |
|
"learning_rate": 0.00016425377565637054, |
|
"loss": 0.1322, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 2.4013135545238367, |
|
"grad_norm": 0.11671450734138489, |
|
"learning_rate": 0.00016411310602433156, |
|
"loss": 0.1296, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.4058430528818935, |
|
"grad_norm": 0.1351209580898285, |
|
"learning_rate": 0.00016397222065036164, |
|
"loss": 0.1304, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 2.4103725512399503, |
|
"grad_norm": 0.1276492178440094, |
|
"learning_rate": 0.000163831120008543, |
|
"loss": 0.1361, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 2.414902049598007, |
|
"grad_norm": 0.13524995744228363, |
|
"learning_rate": 0.00016368980457368216, |
|
"loss": 0.133, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 2.419431547956064, |
|
"grad_norm": 0.1324642449617386, |
|
"learning_rate": 0.00016354827482130855, |
|
"loss": 0.1373, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 2.423961046314121, |
|
"grad_norm": 0.13200613856315613, |
|
"learning_rate": 0.0001634065312276727, |
|
"loss": 0.1367, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.4284905446721776, |
|
"grad_norm": 0.12052213400602341, |
|
"learning_rate": 0.00016326457426974475, |
|
"loss": 0.1335, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 2.4330200430302344, |
|
"grad_norm": 0.1289413571357727, |
|
"learning_rate": 0.00016312240442521278, |
|
"loss": 0.1358, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 2.4375495413882913, |
|
"grad_norm": 0.11921897530555725, |
|
"learning_rate": 0.00016298002217248131, |
|
"loss": 0.1322, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 2.442079039746348, |
|
"grad_norm": 0.14872752130031586, |
|
"learning_rate": 0.00016283742799066953, |
|
"loss": 0.1385, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 2.446608538104405, |
|
"grad_norm": 0.11772260814905167, |
|
"learning_rate": 0.00016269462235960985, |
|
"loss": 0.1336, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.4511380364624618, |
|
"grad_norm": 0.13925409317016602, |
|
"learning_rate": 0.00016255160575984616, |
|
"loss": 0.137, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 2.4556675348205186, |
|
"grad_norm": 0.1357075273990631, |
|
"learning_rate": 0.00016240837867263227, |
|
"loss": 0.1349, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 2.4601970331785754, |
|
"grad_norm": 0.1274648904800415, |
|
"learning_rate": 0.00016226494157993036, |
|
"loss": 0.1307, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 2.4647265315366322, |
|
"grad_norm": 0.1424674391746521, |
|
"learning_rate": 0.00016212129496440914, |
|
"loss": 0.1359, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 2.469256029894689, |
|
"grad_norm": 0.1157744899392128, |
|
"learning_rate": 0.00016197743930944247, |
|
"loss": 0.1371, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.473785528252746, |
|
"grad_norm": 0.1353282928466797, |
|
"learning_rate": 0.00016183337509910762, |
|
"loss": 0.1399, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 2.4783150266108027, |
|
"grad_norm": 0.11779867857694626, |
|
"learning_rate": 0.00016168910281818367, |
|
"loss": 0.1348, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 2.4828445249688595, |
|
"grad_norm": 0.11190491169691086, |
|
"learning_rate": 0.00016154462295214984, |
|
"loss": 0.1341, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 2.4873740233269164, |
|
"grad_norm": 0.1286158561706543, |
|
"learning_rate": 0.0001613999359871838, |
|
"loss": 0.1323, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 2.491903521684973, |
|
"grad_norm": 0.12542322278022766, |
|
"learning_rate": 0.0001612550424101603, |
|
"loss": 0.1365, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.49643302004303, |
|
"grad_norm": 0.12170036882162094, |
|
"learning_rate": 0.00016110994270864912, |
|
"loss": 0.1344, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 2.500962518401087, |
|
"grad_norm": 0.13724590837955475, |
|
"learning_rate": 0.00016096463737091382, |
|
"loss": 0.1325, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 2.5054920167591437, |
|
"grad_norm": 0.11381508409976959, |
|
"learning_rate": 0.00016081912688590988, |
|
"loss": 0.1339, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 2.5100215151172005, |
|
"grad_norm": 0.12289192527532578, |
|
"learning_rate": 0.00016067341174328306, |
|
"loss": 0.1302, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 2.514551013475258, |
|
"grad_norm": 0.12465256452560425, |
|
"learning_rate": 0.00016052749243336786, |
|
"loss": 0.1354, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.5190805118333146, |
|
"grad_norm": 0.12437895685434341, |
|
"learning_rate": 0.0001603813694471858, |
|
"loss": 0.1321, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 2.5236100101913714, |
|
"grad_norm": 0.12177952378988266, |
|
"learning_rate": 0.00016023504327644376, |
|
"loss": 0.1387, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 2.5281395085494283, |
|
"grad_norm": 0.12667645514011383, |
|
"learning_rate": 0.00016008851441353232, |
|
"loss": 0.1383, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 2.532669006907485, |
|
"grad_norm": 0.13816499710083008, |
|
"learning_rate": 0.00015994178335152412, |
|
"loss": 0.1419, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 2.537198505265542, |
|
"grad_norm": 0.13884486258029938, |
|
"learning_rate": 0.00015979485058417226, |
|
"loss": 0.1345, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.5417280036235987, |
|
"grad_norm": 0.13231264054775238, |
|
"learning_rate": 0.0001596477166059085, |
|
"loss": 0.1386, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 2.5462575019816556, |
|
"grad_norm": 0.10923223942518234, |
|
"learning_rate": 0.00015950038191184178, |
|
"loss": 0.1382, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 2.5507870003397124, |
|
"grad_norm": 0.1239657923579216, |
|
"learning_rate": 0.00015935284699775638, |
|
"loss": 0.1345, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 2.5553164986977692, |
|
"grad_norm": 0.11910531669855118, |
|
"learning_rate": 0.00015920511236011038, |
|
"loss": 0.1321, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 2.559845997055826, |
|
"grad_norm": 0.1176079511642456, |
|
"learning_rate": 0.00015905717849603384, |
|
"loss": 0.1379, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.564375495413883, |
|
"grad_norm": 0.10820971429347992, |
|
"learning_rate": 0.0001589090459033273, |
|
"loss": 0.1353, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 2.5689049937719397, |
|
"grad_norm": 0.11455655097961426, |
|
"learning_rate": 0.00015876071508046002, |
|
"loss": 0.1375, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 2.5734344921299965, |
|
"grad_norm": 0.13477309048175812, |
|
"learning_rate": 0.00015861218652656826, |
|
"loss": 0.1345, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 2.5779639904880534, |
|
"grad_norm": 0.1447640061378479, |
|
"learning_rate": 0.00015846346074145374, |
|
"loss": 0.1398, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 2.58249348884611, |
|
"grad_norm": 0.11953482776880264, |
|
"learning_rate": 0.00015831453822558178, |
|
"loss": 0.1323, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.587022987204167, |
|
"grad_norm": 0.11846103519201279, |
|
"learning_rate": 0.00015816541948007967, |
|
"loss": 0.1359, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 2.591552485562224, |
|
"grad_norm": 0.1382216066122055, |
|
"learning_rate": 0.00015801610500673524, |
|
"loss": 0.1406, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 2.5960819839202807, |
|
"grad_norm": 0.12505120038986206, |
|
"learning_rate": 0.0001578665953079946, |
|
"loss": 0.1315, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 2.600611482278338, |
|
"grad_norm": 0.13036322593688965, |
|
"learning_rate": 0.00015771689088696112, |
|
"loss": 0.1322, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 2.6051409806363948, |
|
"grad_norm": 0.10827736556529999, |
|
"learning_rate": 0.00015756699224739323, |
|
"loss": 0.1346, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.6096704789944516, |
|
"grad_norm": 0.12595966458320618, |
|
"learning_rate": 0.00015741689989370294, |
|
"loss": 0.1318, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 2.6141999773525084, |
|
"grad_norm": 0.12824150919914246, |
|
"learning_rate": 0.0001572666143309542, |
|
"loss": 0.1287, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 2.6187294757105652, |
|
"grad_norm": 0.12415400892496109, |
|
"learning_rate": 0.00015711613606486096, |
|
"loss": 0.1329, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 2.623258974068622, |
|
"grad_norm": 0.1439315378665924, |
|
"learning_rate": 0.0001569654656017858, |
|
"loss": 0.1307, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 2.627788472426679, |
|
"grad_norm": 0.11085296422243118, |
|
"learning_rate": 0.00015681460344873786, |
|
"loss": 0.1343, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.6323179707847357, |
|
"grad_norm": 0.12394888699054718, |
|
"learning_rate": 0.00015666355011337147, |
|
"loss": 0.132, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 2.6368474691427926, |
|
"grad_norm": 0.1326746642589569, |
|
"learning_rate": 0.0001565123061039842, |
|
"loss": 0.1354, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 2.6413769675008494, |
|
"grad_norm": 0.11657778173685074, |
|
"learning_rate": 0.00015636087192951527, |
|
"loss": 0.1354, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 2.645906465858906, |
|
"grad_norm": 0.12350430339574814, |
|
"learning_rate": 0.0001562092480995439, |
|
"loss": 0.137, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 2.650435964216963, |
|
"grad_norm": 0.1291380524635315, |
|
"learning_rate": 0.0001560574351242873, |
|
"loss": 0.1332, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.65496546257502, |
|
"grad_norm": 0.13578584790229797, |
|
"learning_rate": 0.00015590543351459937, |
|
"loss": 0.1338, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 2.6594949609330767, |
|
"grad_norm": 0.11825544387102127, |
|
"learning_rate": 0.00015575324378196866, |
|
"loss": 0.1304, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 2.6640244592911335, |
|
"grad_norm": 0.11767857521772385, |
|
"learning_rate": 0.00015560086643851676, |
|
"loss": 0.1346, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 2.6685539576491903, |
|
"grad_norm": 0.12600229680538177, |
|
"learning_rate": 0.00015544830199699662, |
|
"loss": 0.1335, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 2.673083456007247, |
|
"grad_norm": 0.11990875750780106, |
|
"learning_rate": 0.00015529555097079065, |
|
"loss": 0.1341, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.677612954365304, |
|
"grad_norm": 0.10967559367418289, |
|
"learning_rate": 0.00015514261387390935, |
|
"loss": 0.1305, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 2.682142452723361, |
|
"grad_norm": 0.1208115667104721, |
|
"learning_rate": 0.00015498949122098914, |
|
"loss": 0.1329, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 2.6866719510814177, |
|
"grad_norm": 0.12302912771701813, |
|
"learning_rate": 0.00015483618352729093, |
|
"loss": 0.141, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 2.6912014494394745, |
|
"grad_norm": 0.14282426238059998, |
|
"learning_rate": 0.00015468269130869834, |
|
"loss": 0.1312, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 2.6957309477975313, |
|
"grad_norm": 0.1203923374414444, |
|
"learning_rate": 0.0001545290150817158, |
|
"loss": 0.1327, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.700260446155588, |
|
"grad_norm": 0.141504168510437, |
|
"learning_rate": 0.00015437515536346704, |
|
"loss": 0.1307, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 2.704789944513645, |
|
"grad_norm": 0.12170039117336273, |
|
"learning_rate": 0.00015422111267169322, |
|
"loss": 0.139, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 2.709319442871702, |
|
"grad_norm": 0.13064149022102356, |
|
"learning_rate": 0.0001540668875247511, |
|
"loss": 0.1358, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 2.7138489412297586, |
|
"grad_norm": 0.11947247385978699, |
|
"learning_rate": 0.00015391248044161162, |
|
"loss": 0.1301, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 2.7183784395878154, |
|
"grad_norm": 0.10719356685876846, |
|
"learning_rate": 0.00015375789194185772, |
|
"loss": 0.1296, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.7229079379458723, |
|
"grad_norm": 0.11288373172283173, |
|
"learning_rate": 0.00015360312254568295, |
|
"loss": 0.1336, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 2.727437436303929, |
|
"grad_norm": 0.12122143059968948, |
|
"learning_rate": 0.00015344817277388955, |
|
"loss": 0.1293, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 2.731966934661986, |
|
"grad_norm": 0.11723847687244415, |
|
"learning_rate": 0.0001532930431478867, |
|
"loss": 0.133, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 2.736496433020043, |
|
"grad_norm": 0.11670687049627304, |
|
"learning_rate": 0.00015313773418968878, |
|
"loss": 0.127, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 2.7410259313781, |
|
"grad_norm": 0.13267673552036285, |
|
"learning_rate": 0.00015298224642191368, |
|
"loss": 0.1287, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.745555429736157, |
|
"grad_norm": 0.12557269632816315, |
|
"learning_rate": 0.00015282658036778094, |
|
"loss": 0.1371, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 2.7500849280942137, |
|
"grad_norm": 0.12416243553161621, |
|
"learning_rate": 0.0001526707365511101, |
|
"loss": 0.1339, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 2.7546144264522705, |
|
"grad_norm": 0.13237670063972473, |
|
"learning_rate": 0.00015251471549631882, |
|
"loss": 0.1307, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 2.7591439248103273, |
|
"grad_norm": 0.10942938178777695, |
|
"learning_rate": 0.00015235851772842115, |
|
"loss": 0.1325, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 2.763673423168384, |
|
"grad_norm": 0.12319351732730865, |
|
"learning_rate": 0.00015220214377302586, |
|
"loss": 0.1346, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.768202921526441, |
|
"grad_norm": 0.11745291203260422, |
|
"learning_rate": 0.00015204559415633452, |
|
"loss": 0.1358, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 2.772732419884498, |
|
"grad_norm": 0.12627694010734558, |
|
"learning_rate": 0.00015188886940513987, |
|
"loss": 0.1314, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 2.7772619182425546, |
|
"grad_norm": 0.12790648639202118, |
|
"learning_rate": 0.0001517319700468239, |
|
"loss": 0.1314, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 2.7817914166006115, |
|
"grad_norm": 0.12807555496692657, |
|
"learning_rate": 0.00015157489660935625, |
|
"loss": 0.1368, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 2.7863209149586683, |
|
"grad_norm": 0.114469513297081, |
|
"learning_rate": 0.00015141764962129227, |
|
"loss": 0.1364, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.790850413316725, |
|
"grad_norm": 0.12749959528446198, |
|
"learning_rate": 0.00015126022961177134, |
|
"loss": 0.133, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 2.795379911674782, |
|
"grad_norm": 0.12623634934425354, |
|
"learning_rate": 0.00015110263711051505, |
|
"loss": 0.1341, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 2.7999094100328388, |
|
"grad_norm": 0.10407795011997223, |
|
"learning_rate": 0.00015094487264782544, |
|
"loss": 0.1373, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 2.8044389083908956, |
|
"grad_norm": 0.11660348623991013, |
|
"learning_rate": 0.0001507869367545832, |
|
"loss": 0.1336, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 2.8089684067489524, |
|
"grad_norm": 0.13876129686832428, |
|
"learning_rate": 0.00015062882996224586, |
|
"loss": 0.1282, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.8134979051070093, |
|
"grad_norm": 0.12573808431625366, |
|
"learning_rate": 0.0001504705528028461, |
|
"loss": 0.1345, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 2.818027403465066, |
|
"grad_norm": 0.12007986009120941, |
|
"learning_rate": 0.0001503121058089898, |
|
"loss": 0.1342, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 2.8225569018231234, |
|
"grad_norm": 0.10775137692689896, |
|
"learning_rate": 0.00015015348951385443, |
|
"loss": 0.1352, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 2.82708640018118, |
|
"grad_norm": 0.10959987342357635, |
|
"learning_rate": 0.00014999470445118705, |
|
"loss": 0.1299, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 2.831615898539237, |
|
"grad_norm": 0.11662711948156357, |
|
"learning_rate": 0.00014983575115530272, |
|
"loss": 0.136, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.836145396897294, |
|
"grad_norm": 0.11882171779870987, |
|
"learning_rate": 0.00014967663016108258, |
|
"loss": 0.1336, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 2.8406748952553507, |
|
"grad_norm": 0.12361105531454086, |
|
"learning_rate": 0.00014951734200397204, |
|
"loss": 0.1363, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 2.8452043936134075, |
|
"grad_norm": 0.11306975781917572, |
|
"learning_rate": 0.0001493578872199791, |
|
"loss": 0.1315, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 2.8497338919714643, |
|
"grad_norm": 0.10558556020259857, |
|
"learning_rate": 0.0001491982663456724, |
|
"loss": 0.1293, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 2.854263390329521, |
|
"grad_norm": 0.11685465276241302, |
|
"learning_rate": 0.00014903847991817946, |
|
"loss": 0.1309, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.858792888687578, |
|
"grad_norm": 0.10772823542356491, |
|
"learning_rate": 0.00014887852847518497, |
|
"loss": 0.1306, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 2.863322387045635, |
|
"grad_norm": 0.13630211353302002, |
|
"learning_rate": 0.0001487184125549288, |
|
"loss": 0.1301, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 2.8678518854036916, |
|
"grad_norm": 0.11658801138401031, |
|
"learning_rate": 0.0001485581326962044, |
|
"loss": 0.1301, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 2.8723813837617485, |
|
"grad_norm": 0.14447173476219177, |
|
"learning_rate": 0.00014839768943835676, |
|
"loss": 0.1364, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 2.8769108821198053, |
|
"grad_norm": 0.10343156009912491, |
|
"learning_rate": 0.00014823708332128077, |
|
"loss": 0.1305, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.881440380477862, |
|
"grad_norm": 0.14246292412281036, |
|
"learning_rate": 0.00014807631488541938, |
|
"loss": 0.1322, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 2.885969878835919, |
|
"grad_norm": 0.13046808540821075, |
|
"learning_rate": 0.00014791538467176174, |
|
"loss": 0.1327, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 2.8904993771939758, |
|
"grad_norm": 0.1174997016787529, |
|
"learning_rate": 0.00014775429322184128, |
|
"loss": 0.1319, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 2.8950288755520326, |
|
"grad_norm": 0.11900872737169266, |
|
"learning_rate": 0.0001475930410777341, |
|
"loss": 0.1346, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 2.8995583739100894, |
|
"grad_norm": 0.10685596615076065, |
|
"learning_rate": 0.000147431628782057, |
|
"loss": 0.1309, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.9040878722681462, |
|
"grad_norm": 0.1201610341668129, |
|
"learning_rate": 0.00014727005687796573, |
|
"loss": 0.1334, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 2.908617370626203, |
|
"grad_norm": 0.1042858362197876, |
|
"learning_rate": 0.00014710832590915306, |
|
"loss": 0.1305, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 2.91314686898426, |
|
"grad_norm": 0.11404233425855637, |
|
"learning_rate": 0.00014694643641984708, |
|
"loss": 0.1264, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 2.9176763673423167, |
|
"grad_norm": 0.09692881256341934, |
|
"learning_rate": 0.0001467843889548093, |
|
"loss": 0.1356, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 2.9222058657003736, |
|
"grad_norm": 0.11369141191244125, |
|
"learning_rate": 0.0001466221840593327, |
|
"loss": 0.1281, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.9267353640584304, |
|
"grad_norm": 0.12543022632598877, |
|
"learning_rate": 0.0001464598222792402, |
|
"loss": 0.1344, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 2.931264862416487, |
|
"grad_norm": 0.09960107505321503, |
|
"learning_rate": 0.00014629730416088256, |
|
"loss": 0.1347, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 2.935794360774544, |
|
"grad_norm": 0.11416647583246231, |
|
"learning_rate": 0.00014613463025113662, |
|
"loss": 0.128, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 2.940323859132601, |
|
"grad_norm": 0.13363508880138397, |
|
"learning_rate": 0.0001459718010974034, |
|
"loss": 0.1362, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 2.9448533574906577, |
|
"grad_norm": 0.12580367922782898, |
|
"learning_rate": 0.00014580881724760638, |
|
"loss": 0.1331, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.9493828558487145, |
|
"grad_norm": 0.1310282200574875, |
|
"learning_rate": 0.00014564567925018967, |
|
"loss": 0.137, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 2.9539123542067713, |
|
"grad_norm": 0.12097878754138947, |
|
"learning_rate": 0.000145482387654116, |
|
"loss": 0.1327, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 2.9584418525648286, |
|
"grad_norm": 0.11536047607660294, |
|
"learning_rate": 0.0001453189430088649, |
|
"loss": 0.1383, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 2.9629713509228854, |
|
"grad_norm": 0.11799097061157227, |
|
"learning_rate": 0.00014515534586443104, |
|
"loss": 0.1365, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 2.9675008492809423, |
|
"grad_norm": 0.10550688207149506, |
|
"learning_rate": 0.00014499159677132219, |
|
"loss": 0.1304, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.972030347638999, |
|
"grad_norm": 0.13376198709011078, |
|
"learning_rate": 0.00014482769628055748, |
|
"loss": 0.1317, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 2.976559845997056, |
|
"grad_norm": 0.1147933304309845, |
|
"learning_rate": 0.0001446636449436654, |
|
"loss": 0.1317, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 2.9810893443551127, |
|
"grad_norm": 0.12273435294628143, |
|
"learning_rate": 0.00014449944331268216, |
|
"loss": 0.1302, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 2.9856188427131696, |
|
"grad_norm": 0.12308023869991302, |
|
"learning_rate": 0.00014433509194014963, |
|
"loss": 0.1284, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 2.9901483410712264, |
|
"grad_norm": 0.11716390401124954, |
|
"learning_rate": 0.00014417059137911356, |
|
"loss": 0.1286, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.9946778394292832, |
|
"grad_norm": 0.1330905556678772, |
|
"learning_rate": 0.00014400594218312178, |
|
"loss": 0.1321, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 2.99920733778734, |
|
"grad_norm": 0.12336422502994537, |
|
"learning_rate": 0.00014384114490622221, |
|
"loss": 0.1327, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 2.9996602876231457, |
|
"eval_loss": 0.16021211445331573, |
|
"eval_runtime": 617.3452, |
|
"eval_samples_per_second": 12.748, |
|
"eval_steps_per_second": 1.594, |
|
"step": 6621 |
|
}, |
|
{ |
|
"epoch": 3.004076548522251, |
|
"grad_norm": 0.1117822602391243, |
|
"learning_rate": 0.00014367620010296114, |
|
"loss": 0.1199, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 3.008606046880308, |
|
"grad_norm": 0.10662990808486938, |
|
"learning_rate": 0.00014351110832838123, |
|
"loss": 0.1082, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 3.013135545238365, |
|
"grad_norm": 0.09254604578018188, |
|
"learning_rate": 0.00014334587013801976, |
|
"loss": 0.1106, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 3.0176650435964216, |
|
"grad_norm": 0.10764751583337784, |
|
"learning_rate": 0.00014318048608790663, |
|
"loss": 0.1087, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 3.0221945419544785, |
|
"grad_norm": 0.10320322960615158, |
|
"learning_rate": 0.00014301495673456262, |
|
"loss": 0.1072, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 3.0267240403125353, |
|
"grad_norm": 0.09786458313465118, |
|
"learning_rate": 0.00014284928263499742, |
|
"loss": 0.1052, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 3.031253538670592, |
|
"grad_norm": 0.0940663069486618, |
|
"learning_rate": 0.00014268346434670782, |
|
"loss": 0.1141, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 3.035783037028649, |
|
"grad_norm": 0.12340737879276276, |
|
"learning_rate": 0.0001425175024276758, |
|
"loss": 0.1099, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 3.0403125353867058, |
|
"grad_norm": 0.10877358913421631, |
|
"learning_rate": 0.00014235139743636662, |
|
"loss": 0.1066, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 3.0448420337447626, |
|
"grad_norm": 0.09268616884946823, |
|
"learning_rate": 0.00014218514993172705, |
|
"loss": 0.105, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 3.0493715321028194, |
|
"grad_norm": 0.09083138406276703, |
|
"learning_rate": 0.00014201876047318342, |
|
"loss": 0.1103, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 3.0539010304608762, |
|
"grad_norm": 0.10291367769241333, |
|
"learning_rate": 0.00014185222962063965, |
|
"loss": 0.1072, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 3.0584305288189335, |
|
"grad_norm": 0.10415250808000565, |
|
"learning_rate": 0.00014168555793447554, |
|
"loss": 0.1114, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 3.0629600271769903, |
|
"grad_norm": 0.10135282576084137, |
|
"learning_rate": 0.00014151874597554477, |
|
"loss": 0.1086, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 3.067489525535047, |
|
"grad_norm": 0.10510314255952835, |
|
"learning_rate": 0.00014135179430517305, |
|
"loss": 0.1117, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 3.072019023893104, |
|
"grad_norm": 0.11414755135774612, |
|
"learning_rate": 0.0001411847034851562, |
|
"loss": 0.1102, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 3.076548522251161, |
|
"grad_norm": 0.0981656014919281, |
|
"learning_rate": 0.0001410174740777583, |
|
"loss": 0.1112, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 3.0810780206092176, |
|
"grad_norm": 0.09286178648471832, |
|
"learning_rate": 0.00014085010664570974, |
|
"loss": 0.1085, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.0856075189672745, |
|
"grad_norm": 0.10993903875350952, |
|
"learning_rate": 0.00014068260175220546, |
|
"loss": 0.1121, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 3.0901370173253313, |
|
"grad_norm": 0.10415517538785934, |
|
"learning_rate": 0.00014051495996090285, |
|
"loss": 0.109, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 3.094666515683388, |
|
"grad_norm": 0.09917622059583664, |
|
"learning_rate": 0.00014034718183592, |
|
"loss": 0.1085, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 3.099196014041445, |
|
"grad_norm": 0.09848062694072723, |
|
"learning_rate": 0.00014017926794183383, |
|
"loss": 0.1047, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 3.103725512399502, |
|
"grad_norm": 0.12383636087179184, |
|
"learning_rate": 0.00014001121884367804, |
|
"loss": 0.1105, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 3.1082550107575586, |
|
"grad_norm": 0.10345660895109177, |
|
"learning_rate": 0.00013984303510694134, |
|
"loss": 0.1108, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 3.1127845091156154, |
|
"grad_norm": 0.08951733261346817, |
|
"learning_rate": 0.0001396747172975655, |
|
"loss": 0.1117, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 3.1173140074736723, |
|
"grad_norm": 0.09321026504039764, |
|
"learning_rate": 0.00013950626598194346, |
|
"loss": 0.1095, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 3.121843505831729, |
|
"grad_norm": 0.09075412154197693, |
|
"learning_rate": 0.0001393376817269173, |
|
"loss": 0.1111, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 3.126373004189786, |
|
"grad_norm": 0.08038198202848434, |
|
"learning_rate": 0.0001391689650997766, |
|
"loss": 0.1085, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 3.1309025025478427, |
|
"grad_norm": 0.09946314990520477, |
|
"learning_rate": 0.00013900011666825632, |
|
"loss": 0.1079, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 3.1354320009058996, |
|
"grad_norm": 0.083831787109375, |
|
"learning_rate": 0.00013883113700053493, |
|
"loss": 0.108, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 3.1399614992639564, |
|
"grad_norm": 0.09110364317893982, |
|
"learning_rate": 0.00013866202666523245, |
|
"loss": 0.1074, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 3.1444909976220132, |
|
"grad_norm": 0.09342263638973236, |
|
"learning_rate": 0.00013849278623140874, |
|
"loss": 0.1102, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 3.14902049598007, |
|
"grad_norm": 0.10097695142030716, |
|
"learning_rate": 0.00013832341626856135, |
|
"loss": 0.1091, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 3.153549994338127, |
|
"grad_norm": 0.10724612325429916, |
|
"learning_rate": 0.0001381539173466237, |
|
"loss": 0.1095, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 3.1580794926961837, |
|
"grad_norm": 0.113038569688797, |
|
"learning_rate": 0.0001379842900359632, |
|
"loss": 0.1101, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 3.1626089910542405, |
|
"grad_norm": 0.10871588438749313, |
|
"learning_rate": 0.00013781453490737918, |
|
"loss": 0.1074, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 3.167138489412298, |
|
"grad_norm": 0.09797286987304688, |
|
"learning_rate": 0.0001376446525321013, |
|
"loss": 0.1107, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 3.1716679877703546, |
|
"grad_norm": 0.10018666833639145, |
|
"learning_rate": 0.0001374746434817872, |
|
"loss": 0.1112, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.1761974861284115, |
|
"grad_norm": 0.09767764061689377, |
|
"learning_rate": 0.00013730450832852086, |
|
"loss": 0.1117, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 3.1807269844864683, |
|
"grad_norm": 0.10807600617408752, |
|
"learning_rate": 0.00013713424764481066, |
|
"loss": 0.1069, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 3.185256482844525, |
|
"grad_norm": 0.11085067689418793, |
|
"learning_rate": 0.00013696386200358723, |
|
"loss": 0.1098, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 3.189785981202582, |
|
"grad_norm": 0.11777514964342117, |
|
"learning_rate": 0.0001367933519782018, |
|
"loss": 0.1095, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 3.1943154795606388, |
|
"grad_norm": 0.08946658670902252, |
|
"learning_rate": 0.00013662271814242422, |
|
"loss": 0.1091, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 3.1988449779186956, |
|
"grad_norm": 0.10264267772436142, |
|
"learning_rate": 0.0001364519610704408, |
|
"loss": 0.1116, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 3.2033744762767524, |
|
"grad_norm": 0.0933040976524353, |
|
"learning_rate": 0.00013628108133685273, |
|
"loss": 0.1091, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 3.2079039746348093, |
|
"grad_norm": 0.10949963331222534, |
|
"learning_rate": 0.00013611007951667376, |
|
"loss": 0.1122, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 3.212433472992866, |
|
"grad_norm": 0.10518185049295425, |
|
"learning_rate": 0.0001359389561853286, |
|
"loss": 0.1112, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 3.216962971350923, |
|
"grad_norm": 0.10346280038356781, |
|
"learning_rate": 0.00013576771191865078, |
|
"loss": 0.109, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 3.2214924697089797, |
|
"grad_norm": 0.09324981272220612, |
|
"learning_rate": 0.00013559634729288088, |
|
"loss": 0.1092, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 3.2260219680670366, |
|
"grad_norm": 0.10806597769260406, |
|
"learning_rate": 0.00013542486288466428, |
|
"loss": 0.1103, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 3.2305514664250934, |
|
"grad_norm": 0.10441877692937851, |
|
"learning_rate": 0.00013525325927104973, |
|
"loss": 0.1095, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 3.23508096478315, |
|
"grad_norm": 0.08796998858451843, |
|
"learning_rate": 0.00013508153702948683, |
|
"loss": 0.1104, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 3.239610463141207, |
|
"grad_norm": 0.12072450667619705, |
|
"learning_rate": 0.00013490969673782453, |
|
"loss": 0.1095, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 3.244139961499264, |
|
"grad_norm": 0.10589967668056488, |
|
"learning_rate": 0.00013473773897430903, |
|
"loss": 0.107, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 3.2486694598573207, |
|
"grad_norm": 0.10880044102668762, |
|
"learning_rate": 0.00013456566431758164, |
|
"loss": 0.1101, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 3.2531989582153775, |
|
"grad_norm": 0.10041461884975433, |
|
"learning_rate": 0.00013439347334667722, |
|
"loss": 0.1103, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 3.2577284565734344, |
|
"grad_norm": 0.11079218983650208, |
|
"learning_rate": 0.000134221166641022, |
|
"loss": 0.1112, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 3.262257954931491, |
|
"grad_norm": 0.10900229215621948, |
|
"learning_rate": 0.00013404874478043153, |
|
"loss": 0.1117, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.266787453289548, |
|
"grad_norm": 0.10362094640731812, |
|
"learning_rate": 0.000133876208345109, |
|
"loss": 0.1114, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 3.271316951647605, |
|
"grad_norm": 0.10555779188871384, |
|
"learning_rate": 0.00013370355791564306, |
|
"loss": 0.1123, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 3.2758464500056617, |
|
"grad_norm": 0.09255950897932053, |
|
"learning_rate": 0.00013353079407300603, |
|
"loss": 0.1131, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 3.2803759483637185, |
|
"grad_norm": 0.09914428740739822, |
|
"learning_rate": 0.00013335791739855176, |
|
"loss": 0.1113, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 3.2849054467217758, |
|
"grad_norm": 0.10521331429481506, |
|
"learning_rate": 0.0001331849284740139, |
|
"loss": 0.11, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 3.2894349450798326, |
|
"grad_norm": 0.09139056503772736, |
|
"learning_rate": 0.00013301182788150374, |
|
"loss": 0.1109, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 3.2939644434378894, |
|
"grad_norm": 0.09516976028680801, |
|
"learning_rate": 0.00013283861620350836, |
|
"loss": 0.1096, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 3.2984939417959462, |
|
"grad_norm": 0.09153826534748077, |
|
"learning_rate": 0.00013266529402288866, |
|
"loss": 0.1093, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 3.303023440154003, |
|
"grad_norm": 0.11171313375234604, |
|
"learning_rate": 0.00013249186192287735, |
|
"loss": 0.113, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 3.30755293851206, |
|
"grad_norm": 0.1110367551445961, |
|
"learning_rate": 0.00013231832048707712, |
|
"loss": 0.1146, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 3.3120824368701167, |
|
"grad_norm": 0.10271560400724411, |
|
"learning_rate": 0.00013214467029945835, |
|
"loss": 0.1096, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 3.3166119352281735, |
|
"grad_norm": 0.10005812346935272, |
|
"learning_rate": 0.00013197091194435767, |
|
"loss": 0.1089, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 3.3211414335862304, |
|
"grad_norm": 0.09489379823207855, |
|
"learning_rate": 0.00013179704600647547, |
|
"loss": 0.1119, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 3.325670931944287, |
|
"grad_norm": 0.10342545807361603, |
|
"learning_rate": 0.00013162307307087423, |
|
"loss": 0.1128, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 3.330200430302344, |
|
"grad_norm": 0.10697804391384125, |
|
"learning_rate": 0.0001314489937229765, |
|
"loss": 0.1126, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 3.334729928660401, |
|
"grad_norm": 0.11575332283973694, |
|
"learning_rate": 0.00013127480854856295, |
|
"loss": 0.1133, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 3.3392594270184577, |
|
"grad_norm": 0.10017456859350204, |
|
"learning_rate": 0.00013110051813377025, |
|
"loss": 0.1091, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 3.3437889253765145, |
|
"grad_norm": 0.11635085195302963, |
|
"learning_rate": 0.00013092612306508922, |
|
"loss": 0.1139, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 3.3483184237345713, |
|
"grad_norm": 0.09450142085552216, |
|
"learning_rate": 0.00013075162392936295, |
|
"loss": 0.1119, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 3.352847922092628, |
|
"grad_norm": 0.09203408658504486, |
|
"learning_rate": 0.0001305770213137846, |
|
"loss": 0.1088, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 3.357377420450685, |
|
"grad_norm": 0.09736169874668121, |
|
"learning_rate": 0.00013040231580589565, |
|
"loss": 0.1099, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 3.361906918808742, |
|
"grad_norm": 0.09759002178907394, |
|
"learning_rate": 0.0001302275079935837, |
|
"loss": 0.1149, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 3.3664364171667986, |
|
"grad_norm": 0.09410129487514496, |
|
"learning_rate": 0.00013005259846508068, |
|
"loss": 0.1132, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 3.3709659155248555, |
|
"grad_norm": 0.09184587746858597, |
|
"learning_rate": 0.0001298775878089608, |
|
"loss": 0.1099, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 3.3754954138829123, |
|
"grad_norm": 0.10475565493106842, |
|
"learning_rate": 0.00012970247661413855, |
|
"loss": 0.1109, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 3.380024912240969, |
|
"grad_norm": 0.10369405895471573, |
|
"learning_rate": 0.00012952726546986668, |
|
"loss": 0.1144, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 3.3845544105990264, |
|
"grad_norm": 0.1000487357378006, |
|
"learning_rate": 0.00012935195496573435, |
|
"loss": 0.1093, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 3.3890839089570832, |
|
"grad_norm": 0.1104254201054573, |
|
"learning_rate": 0.00012917654569166503, |
|
"loss": 0.1093, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 3.39361340731514, |
|
"grad_norm": 0.10195254534482956, |
|
"learning_rate": 0.0001290010382379146, |
|
"loss": 0.1104, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 3.398142905673197, |
|
"grad_norm": 0.10613837838172913, |
|
"learning_rate": 0.00012882543319506925, |
|
"loss": 0.115, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.4026724040312537, |
|
"grad_norm": 0.10054861009120941, |
|
"learning_rate": 0.0001286497311540436, |
|
"loss": 0.1093, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 3.4072019023893105, |
|
"grad_norm": 0.1072639673948288, |
|
"learning_rate": 0.0001284739327060787, |
|
"loss": 0.114, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 3.4117314007473674, |
|
"grad_norm": 0.09658465534448624, |
|
"learning_rate": 0.00012829803844273987, |
|
"loss": 0.1088, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 3.416260899105424, |
|
"grad_norm": 0.09596540778875351, |
|
"learning_rate": 0.00012812204895591505, |
|
"loss": 0.1124, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 3.420790397463481, |
|
"grad_norm": 0.08748818188905716, |
|
"learning_rate": 0.00012794596483781248, |
|
"loss": 0.1125, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 3.425319895821538, |
|
"grad_norm": 0.09352606534957886, |
|
"learning_rate": 0.00012776978668095884, |
|
"loss": 0.1134, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 3.4298493941795947, |
|
"grad_norm": 0.11329905688762665, |
|
"learning_rate": 0.0001275935150781973, |
|
"loss": 0.1138, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 3.4343788925376515, |
|
"grad_norm": 0.09285202622413635, |
|
"learning_rate": 0.00012741715062268547, |
|
"loss": 0.1096, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 3.4389083908957083, |
|
"grad_norm": 0.10598818957805634, |
|
"learning_rate": 0.00012724069390789342, |
|
"loss": 0.113, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 3.443437889253765, |
|
"grad_norm": 0.11264318972826004, |
|
"learning_rate": 0.0001270641455276016, |
|
"loss": 0.1135, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.447967387611822, |
|
"grad_norm": 0.09473126381635666, |
|
"learning_rate": 0.00012688750607589897, |
|
"loss": 0.1106, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 3.452496885969879, |
|
"grad_norm": 0.09131330251693726, |
|
"learning_rate": 0.000126710776147181, |
|
"loss": 0.1149, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 3.4570263843279356, |
|
"grad_norm": 0.10694695264101028, |
|
"learning_rate": 0.0001265339563361475, |
|
"loss": 0.1126, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 3.4615558826859925, |
|
"grad_norm": 0.1015838012099266, |
|
"learning_rate": 0.00012635704723780087, |
|
"loss": 0.1135, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 3.4660853810440493, |
|
"grad_norm": 0.10224758833646774, |
|
"learning_rate": 0.00012618004944744385, |
|
"loss": 0.1155, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 3.470614879402106, |
|
"grad_norm": 0.11169352382421494, |
|
"learning_rate": 0.00012600296356067768, |
|
"loss": 0.1092, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 3.475144377760163, |
|
"grad_norm": 0.10369731485843658, |
|
"learning_rate": 0.00012582579017340003, |
|
"loss": 0.1107, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 3.4796738761182198, |
|
"grad_norm": 0.09245746582746506, |
|
"learning_rate": 0.00012564852988180305, |
|
"loss": 0.1093, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 3.4842033744762766, |
|
"grad_norm": 0.09676039218902588, |
|
"learning_rate": 0.0001254711832823713, |
|
"loss": 0.1117, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 3.4887328728343334, |
|
"grad_norm": 0.10541850328445435, |
|
"learning_rate": 0.0001252937509718797, |
|
"loss": 0.1119, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 3.4932623711923902, |
|
"grad_norm": 0.08481086790561676, |
|
"learning_rate": 0.0001251162335473917, |
|
"loss": 0.1103, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 3.497791869550447, |
|
"grad_norm": 0.09966452419757843, |
|
"learning_rate": 0.00012493863160625713, |
|
"loss": 0.1147, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 3.502321367908504, |
|
"grad_norm": 0.09558738023042679, |
|
"learning_rate": 0.00012476094574611016, |
|
"loss": 0.1123, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 3.5068508662665607, |
|
"grad_norm": 0.10436621308326721, |
|
"learning_rate": 0.00012458317656486746, |
|
"loss": 0.1129, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 3.5113803646246176, |
|
"grad_norm": 0.10191968828439713, |
|
"learning_rate": 0.00012440532466072597, |
|
"loss": 0.1099, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 3.515909862982675, |
|
"grad_norm": 0.10766720771789551, |
|
"learning_rate": 0.000124227390632161, |
|
"loss": 0.1121, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 3.5204393613407317, |
|
"grad_norm": 0.08841870725154877, |
|
"learning_rate": 0.0001240493750779243, |
|
"loss": 0.1103, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 3.5249688596987885, |
|
"grad_norm": 0.1090930923819542, |
|
"learning_rate": 0.00012387127859704187, |
|
"loss": 0.1164, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 3.5294983580568453, |
|
"grad_norm": 0.10451924055814743, |
|
"learning_rate": 0.00012369310178881205, |
|
"loss": 0.1112, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 3.534027856414902, |
|
"grad_norm": 0.09721478819847107, |
|
"learning_rate": 0.0001235148452528035, |
|
"loss": 0.1135, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 3.538557354772959, |
|
"grad_norm": 0.0975523293018341, |
|
"learning_rate": 0.00012333650958885322, |
|
"loss": 0.1105, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 3.543086853131016, |
|
"grad_norm": 0.08713623881340027, |
|
"learning_rate": 0.00012315809539706436, |
|
"loss": 0.1103, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 3.5476163514890726, |
|
"grad_norm": 0.09232752025127411, |
|
"learning_rate": 0.00012297960327780437, |
|
"loss": 0.1128, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 3.5521458498471294, |
|
"grad_norm": 0.09094680100679398, |
|
"learning_rate": 0.00012280103383170295, |
|
"loss": 0.1104, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 3.5566753482051863, |
|
"grad_norm": 0.09738276153802872, |
|
"learning_rate": 0.00012262238765964995, |
|
"loss": 0.1059, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 3.561204846563243, |
|
"grad_norm": 0.0989813581109047, |
|
"learning_rate": 0.0001224436653627935, |
|
"loss": 0.112, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 3.5657343449213, |
|
"grad_norm": 0.09522037208080292, |
|
"learning_rate": 0.0001222648675425378, |
|
"loss": 0.1081, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 3.5702638432793568, |
|
"grad_norm": 0.10340669006109238, |
|
"learning_rate": 0.00012208599480054125, |
|
"loss": 0.1117, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 3.5747933416374136, |
|
"grad_norm": 0.11090776324272156, |
|
"learning_rate": 0.0001219070477387143, |
|
"loss": 0.1097, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 3.5793228399954704, |
|
"grad_norm": 0.08626790344715118, |
|
"learning_rate": 0.00012172802695921754, |
|
"loss": 0.1128, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 3.5838523383535272, |
|
"grad_norm": 0.09012069553136826, |
|
"learning_rate": 0.00012154893306445961, |
|
"loss": 0.1137, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 3.588381836711584, |
|
"grad_norm": 0.07982558012008667, |
|
"learning_rate": 0.00012136976665709516, |
|
"loss": 0.1117, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 3.592911335069641, |
|
"grad_norm": 0.09850164502859116, |
|
"learning_rate": 0.00012119052834002289, |
|
"loss": 0.1088, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 3.597440833427698, |
|
"grad_norm": 0.09800245612859726, |
|
"learning_rate": 0.00012101121871638343, |
|
"loss": 0.1153, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 3.601970331785755, |
|
"grad_norm": 0.09477314352989197, |
|
"learning_rate": 0.0001208318383895574, |
|
"loss": 0.1104, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 3.606499830143812, |
|
"grad_norm": 0.10447141528129578, |
|
"learning_rate": 0.00012065238796316331, |
|
"loss": 0.1115, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 3.6110293285018686, |
|
"grad_norm": 0.10505667328834534, |
|
"learning_rate": 0.00012047286804105557, |
|
"loss": 0.1096, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 3.6155588268599255, |
|
"grad_norm": 0.0925762876868248, |
|
"learning_rate": 0.00012029327922732242, |
|
"loss": 0.1146, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 3.6200883252179823, |
|
"grad_norm": 0.12217893451452255, |
|
"learning_rate": 0.00012011362212628397, |
|
"loss": 0.1105, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 3.624617823576039, |
|
"grad_norm": 0.09887892752885818, |
|
"learning_rate": 0.00011993389734249006, |
|
"loss": 0.1098, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.629147321934096, |
|
"grad_norm": 0.10694731771945953, |
|
"learning_rate": 0.00011975410548071832, |
|
"loss": 0.1129, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 3.6336768202921528, |
|
"grad_norm": 0.08971285820007324, |
|
"learning_rate": 0.00011957424714597212, |
|
"loss": 0.1084, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 3.6382063186502096, |
|
"grad_norm": 0.08375135064125061, |
|
"learning_rate": 0.00011939432294347848, |
|
"loss": 0.1098, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 3.6427358170082664, |
|
"grad_norm": 0.09610874205827713, |
|
"learning_rate": 0.00011921433347868602, |
|
"loss": 0.1109, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 3.6472653153663233, |
|
"grad_norm": 0.09743242710828781, |
|
"learning_rate": 0.00011903427935726308, |
|
"loss": 0.1176, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 3.65179481372438, |
|
"grad_norm": 0.09157928824424744, |
|
"learning_rate": 0.00011885416118509549, |
|
"loss": 0.1116, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 3.656324312082437, |
|
"grad_norm": 0.10359596461057663, |
|
"learning_rate": 0.00011867397956828463, |
|
"loss": 0.1117, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 3.6608538104404937, |
|
"grad_norm": 0.08667086809873581, |
|
"learning_rate": 0.00011849373511314537, |
|
"loss": 0.1126, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 3.6653833087985506, |
|
"grad_norm": 0.0973113626241684, |
|
"learning_rate": 0.00011831342842620405, |
|
"loss": 0.1099, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 3.6699128071566074, |
|
"grad_norm": 0.09472218155860901, |
|
"learning_rate": 0.00011813306011419642, |
|
"loss": 0.1117, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 3.674442305514664, |
|
"grad_norm": 0.10071218013763428, |
|
"learning_rate": 0.00011795263078406558, |
|
"loss": 0.1096, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 3.678971803872721, |
|
"grad_norm": 0.08343309164047241, |
|
"learning_rate": 0.00011777214104295995, |
|
"loss": 0.1118, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 3.683501302230778, |
|
"grad_norm": 0.0963587686419487, |
|
"learning_rate": 0.00011759159149823127, |
|
"loss": 0.1099, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 3.6880308005888347, |
|
"grad_norm": 0.09920413792133331, |
|
"learning_rate": 0.00011741098275743247, |
|
"loss": 0.1132, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 3.6925602989468915, |
|
"grad_norm": 0.12149636447429657, |
|
"learning_rate": 0.00011723031542831578, |
|
"loss": 0.1146, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 3.6970897973049484, |
|
"grad_norm": 0.09953594207763672, |
|
"learning_rate": 0.00011704959011883043, |
|
"loss": 0.1078, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 3.701619295663005, |
|
"grad_norm": 0.11264549940824509, |
|
"learning_rate": 0.0001168688074371209, |
|
"loss": 0.1098, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 3.706148794021062, |
|
"grad_norm": 0.10793278366327286, |
|
"learning_rate": 0.00011668796799152457, |
|
"loss": 0.1123, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 3.710678292379119, |
|
"grad_norm": 0.10062643885612488, |
|
"learning_rate": 0.00011650707239057, |
|
"loss": 0.1136, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 3.7152077907371757, |
|
"grad_norm": 0.09304151684045792, |
|
"learning_rate": 0.00011632612124297461, |
|
"loss": 0.1126, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 3.7197372890952325, |
|
"grad_norm": 0.10045602172613144, |
|
"learning_rate": 0.00011614511515764277, |
|
"loss": 0.1092, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 3.7242667874532893, |
|
"grad_norm": 0.09587648510932922, |
|
"learning_rate": 0.00011596405474366372, |
|
"loss": 0.1115, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 3.728796285811346, |
|
"grad_norm": 0.10631423443555832, |
|
"learning_rate": 0.00011578294061030947, |
|
"loss": 0.111, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 3.733325784169403, |
|
"grad_norm": 0.09861784428358078, |
|
"learning_rate": 0.00011560177336703291, |
|
"loss": 0.11, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 3.7378552825274602, |
|
"grad_norm": 0.0921064168214798, |
|
"learning_rate": 0.00011542055362346549, |
|
"loss": 0.1109, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 3.742384780885517, |
|
"grad_norm": 0.10424584895372391, |
|
"learning_rate": 0.00011523928198941543, |
|
"loss": 0.11, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 3.746914279243574, |
|
"grad_norm": 0.10199391096830368, |
|
"learning_rate": 0.00011505795907486551, |
|
"loss": 0.112, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 3.7514437776016307, |
|
"grad_norm": 0.09731689840555191, |
|
"learning_rate": 0.00011487658548997115, |
|
"loss": 0.1125, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 3.7559732759596876, |
|
"grad_norm": 0.07730797678232193, |
|
"learning_rate": 0.00011469516184505821, |
|
"loss": 0.1096, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 3.7605027743177444, |
|
"grad_norm": 0.09512131661176682, |
|
"learning_rate": 0.00011451368875062101, |
|
"loss": 0.1115, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 3.765032272675801, |
|
"grad_norm": 0.08450417220592499, |
|
"learning_rate": 0.00011433216681732027, |
|
"loss": 0.1135, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 3.769561771033858, |
|
"grad_norm": 0.08709891885519028, |
|
"learning_rate": 0.00011415059665598105, |
|
"loss": 0.111, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 3.774091269391915, |
|
"grad_norm": 0.12575045228004456, |
|
"learning_rate": 0.00011396897887759071, |
|
"loss": 0.1145, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 3.7786207677499717, |
|
"grad_norm": 0.09050168097019196, |
|
"learning_rate": 0.00011378731409329684, |
|
"loss": 0.1108, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 3.7831502661080285, |
|
"grad_norm": 0.0824236199259758, |
|
"learning_rate": 0.00011360560291440526, |
|
"loss": 0.1137, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 3.7876797644660853, |
|
"grad_norm": 0.10261125862598419, |
|
"learning_rate": 0.00011342384595237776, |
|
"loss": 0.1089, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 3.792209262824142, |
|
"grad_norm": 0.08885115385055542, |
|
"learning_rate": 0.00011324204381883033, |
|
"loss": 0.1109, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 3.796738761182199, |
|
"grad_norm": 0.10409918427467346, |
|
"learning_rate": 0.00011306019712553094, |
|
"loss": 0.1142, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 3.801268259540256, |
|
"grad_norm": 0.0991046279668808, |
|
"learning_rate": 0.00011287830648439746, |
|
"loss": 0.115, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 3.8057977578983126, |
|
"grad_norm": 0.10309819132089615, |
|
"learning_rate": 0.00011269637250749565, |
|
"loss": 0.1112, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 3.8103272562563695, |
|
"grad_norm": 0.09360276162624359, |
|
"learning_rate": 0.00011251439580703716, |
|
"loss": 0.1115, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 3.8148567546144263, |
|
"grad_norm": 0.09267252683639526, |
|
"learning_rate": 0.0001123323769953773, |
|
"loss": 0.1106, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 3.819386252972483, |
|
"grad_norm": 0.11334355920553207, |
|
"learning_rate": 0.00011215031668501322, |
|
"loss": 0.1086, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 3.8239157513305404, |
|
"grad_norm": 0.09532047063112259, |
|
"learning_rate": 0.00011196821548858156, |
|
"loss": 0.1091, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 3.8284452496885972, |
|
"grad_norm": 0.08060566335916519, |
|
"learning_rate": 0.00011178607401885668, |
|
"loss": 0.1102, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 3.832974748046654, |
|
"grad_norm": 0.09655016660690308, |
|
"learning_rate": 0.0001116038928887484, |
|
"loss": 0.1124, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 3.837504246404711, |
|
"grad_norm": 0.10175477713346481, |
|
"learning_rate": 0.00011142167271129996, |
|
"loss": 0.1108, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 3.8420337447627677, |
|
"grad_norm": 0.08714988827705383, |
|
"learning_rate": 0.00011123941409968606, |
|
"loss": 0.111, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 3.8465632431208245, |
|
"grad_norm": 0.08987358957529068, |
|
"learning_rate": 0.00011105711766721067, |
|
"loss": 0.1096, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 3.8510927414788814, |
|
"grad_norm": 0.10814320296049118, |
|
"learning_rate": 0.00011087478402730514, |
|
"loss": 0.1151, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.855622239836938, |
|
"grad_norm": 0.09886670112609863, |
|
"learning_rate": 0.00011069241379352588, |
|
"loss": 0.1078, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 3.860151738194995, |
|
"grad_norm": 0.09303957968950272, |
|
"learning_rate": 0.00011051000757955257, |
|
"loss": 0.113, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 3.864681236553052, |
|
"grad_norm": 0.10088100284337997, |
|
"learning_rate": 0.00011032756599918584, |
|
"loss": 0.1112, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 3.8692107349111087, |
|
"grad_norm": 0.11249160021543503, |
|
"learning_rate": 0.0001101450896663454, |
|
"loss": 0.1124, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 3.8737402332691655, |
|
"grad_norm": 0.0930514931678772, |
|
"learning_rate": 0.00010996257919506794, |
|
"loss": 0.1115, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 3.8782697316272223, |
|
"grad_norm": 0.09656676650047302, |
|
"learning_rate": 0.00010978003519950493, |
|
"loss": 0.1098, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 3.882799229985279, |
|
"grad_norm": 0.091661736369133, |
|
"learning_rate": 0.00010959745829392069, |
|
"loss": 0.1135, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 3.887328728343336, |
|
"grad_norm": 0.09262984991073608, |
|
"learning_rate": 0.00010941484909269036, |
|
"loss": 0.1115, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 3.891858226701393, |
|
"grad_norm": 0.11751729995012283, |
|
"learning_rate": 0.00010923220821029762, |
|
"loss": 0.1132, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 3.8963877250594496, |
|
"grad_norm": 0.10761595517396927, |
|
"learning_rate": 0.00010904953626133287, |
|
"loss": 0.1126, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 3.9009172234175065, |
|
"grad_norm": 0.08337333053350449, |
|
"learning_rate": 0.00010886683386049099, |
|
"loss": 0.111, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 3.9054467217755633, |
|
"grad_norm": 0.10421154648065567, |
|
"learning_rate": 0.00010868410162256935, |
|
"loss": 0.1108, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 3.90997622013362, |
|
"grad_norm": 0.10565438121557236, |
|
"learning_rate": 0.0001085013401624657, |
|
"loss": 0.112, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 3.914505718491677, |
|
"grad_norm": 0.08946827799081802, |
|
"learning_rate": 0.00010831855009517613, |
|
"loss": 0.1101, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 3.9190352168497338, |
|
"grad_norm": 0.08507835865020752, |
|
"learning_rate": 0.00010813573203579306, |
|
"loss": 0.11, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 3.9235647152077906, |
|
"grad_norm": 0.07897284626960754, |
|
"learning_rate": 0.00010795288659950303, |
|
"loss": 0.1111, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 3.9280942135658474, |
|
"grad_norm": 0.09554194658994675, |
|
"learning_rate": 0.00010777001440158472, |
|
"loss": 0.1126, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 3.9326237119239043, |
|
"grad_norm": 0.11981197446584702, |
|
"learning_rate": 0.00010758711605740683, |
|
"loss": 0.1105, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 3.937153210281961, |
|
"grad_norm": 0.11121747642755508, |
|
"learning_rate": 0.00010740419218242615, |
|
"loss": 0.112, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 3.941682708640018, |
|
"grad_norm": 0.10044469684362411, |
|
"learning_rate": 0.00010722124339218524, |
|
"loss": 0.1097, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 3.9462122069980747, |
|
"grad_norm": 0.07444220036268234, |
|
"learning_rate": 0.00010703827030231065, |
|
"loss": 0.1096, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 3.9507417053561316, |
|
"grad_norm": 0.08997642993927002, |
|
"learning_rate": 0.00010685527352851054, |
|
"loss": 0.1098, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 3.9552712037141884, |
|
"grad_norm": 0.09852538257837296, |
|
"learning_rate": 0.0001066722536865729, |
|
"loss": 0.1112, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 3.9598007020722457, |
|
"grad_norm": 0.0946199893951416, |
|
"learning_rate": 0.00010648921139236328, |
|
"loss": 0.113, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 3.9643302004303025, |
|
"grad_norm": 0.10738665610551834, |
|
"learning_rate": 0.0001063061472618228, |
|
"loss": 0.1105, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 3.9688596987883593, |
|
"grad_norm": 0.09911846369504929, |
|
"learning_rate": 0.00010612306191096602, |
|
"loss": 0.1092, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 3.973389197146416, |
|
"grad_norm": 0.09100183844566345, |
|
"learning_rate": 0.00010593995595587898, |
|
"loss": 0.1075, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 3.977918695504473, |
|
"grad_norm": 0.08540119975805283, |
|
"learning_rate": 0.00010575683001271701, |
|
"loss": 0.11, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 3.98244819386253, |
|
"grad_norm": 0.1455107182264328, |
|
"learning_rate": 0.00010557368469770268, |
|
"loss": 0.1072, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 3.9869776922205866, |
|
"grad_norm": 0.09040206670761108, |
|
"learning_rate": 0.0001053905206271238, |
|
"loss": 0.112, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 3.9915071905786434, |
|
"grad_norm": 0.08172180503606796, |
|
"learning_rate": 0.00010520733841733125, |
|
"loss": 0.1128, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 3.9960366889367003, |
|
"grad_norm": 0.09760237485170364, |
|
"learning_rate": 0.000105024138684737, |
|
"loss": 0.1119, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 3.9996602876231457, |
|
"eval_loss": 0.15827356278896332, |
|
"eval_runtime": 617.6968, |
|
"eval_samples_per_second": 12.741, |
|
"eval_steps_per_second": 1.593, |
|
"step": 8828 |
|
}, |
|
{ |
|
"epoch": 4.000905899671611, |
|
"grad_norm": 0.0798049345612526, |
|
"learning_rate": 0.00010484092204581189, |
|
"loss": 0.1153, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 4.005435398029668, |
|
"grad_norm": 0.07974246889352798, |
|
"learning_rate": 0.00010465768911708373, |
|
"loss": 0.0957, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 4.009964896387725, |
|
"grad_norm": 0.08676203340291977, |
|
"learning_rate": 0.00010447444051513513, |
|
"loss": 0.0962, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 4.014494394745782, |
|
"grad_norm": 0.07175087183713913, |
|
"learning_rate": 0.00010429117685660146, |
|
"loss": 0.0961, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 4.019023893103839, |
|
"grad_norm": 0.06814973056316376, |
|
"learning_rate": 0.00010410789875816866, |
|
"loss": 0.0963, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 4.0235533914618955, |
|
"grad_norm": 0.09090814739465714, |
|
"learning_rate": 0.00010392460683657142, |
|
"loss": 0.0994, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 4.028082889819952, |
|
"grad_norm": 0.08229593187570572, |
|
"learning_rate": 0.0001037413017085908, |
|
"loss": 0.0967, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 4.032612388178009, |
|
"grad_norm": 0.07398311048746109, |
|
"learning_rate": 0.00010355798399105235, |
|
"loss": 0.096, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 4.037141886536066, |
|
"grad_norm": 0.06932748854160309, |
|
"learning_rate": 0.00010337465430082403, |
|
"loss": 0.0969, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 4.041671384894123, |
|
"grad_norm": 0.09156011044979095, |
|
"learning_rate": 0.000103191313254814, |
|
"loss": 0.098, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 4.04620088325218, |
|
"grad_norm": 0.07946418970823288, |
|
"learning_rate": 0.00010300796146996874, |
|
"loss": 0.0962, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 4.0507303816102365, |
|
"grad_norm": 0.08557803928852081, |
|
"learning_rate": 0.00010282459956327073, |
|
"loss": 0.0948, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 4.055259879968293, |
|
"grad_norm": 0.0721755251288414, |
|
"learning_rate": 0.00010264122815173665, |
|
"loss": 0.0981, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 4.05978937832635, |
|
"grad_norm": 0.069907546043396, |
|
"learning_rate": 0.0001024578478524151, |
|
"loss": 0.0973, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 4.064318876684407, |
|
"grad_norm": 0.07597635686397552, |
|
"learning_rate": 0.00010227445928238455, |
|
"loss": 0.0985, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 4.068848375042464, |
|
"grad_norm": 0.08416584879159927, |
|
"learning_rate": 0.00010209106305875139, |
|
"loss": 0.0954, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 4.073377873400521, |
|
"grad_norm": 0.08617585897445679, |
|
"learning_rate": 0.00010190765979864764, |
|
"loss": 0.0977, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 4.077907371758577, |
|
"grad_norm": 0.07779661566019058, |
|
"learning_rate": 0.00010172425011922915, |
|
"loss": 0.0968, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.082436870116634, |
|
"grad_norm": 0.08647850900888443, |
|
"learning_rate": 0.00010154083463767323, |
|
"loss": 0.0964, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 4.086966368474691, |
|
"grad_norm": 0.08829203248023987, |
|
"learning_rate": 0.00010135741397117684, |
|
"loss": 0.0992, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 4.091495866832748, |
|
"grad_norm": 0.08579693734645844, |
|
"learning_rate": 0.00010117398873695429, |
|
"loss": 0.0987, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 4.096025365190805, |
|
"grad_norm": 0.06886789947748184, |
|
"learning_rate": 0.00010099055955223531, |
|
"loss": 0.0983, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 4.100554863548862, |
|
"grad_norm": 0.0997413694858551, |
|
"learning_rate": 0.0001008071270342629, |
|
"loss": 0.0956, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 4.105084361906918, |
|
"grad_norm": 0.07166160643100739, |
|
"learning_rate": 0.00010062369180029125, |
|
"loss": 0.0968, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 4.109613860264975, |
|
"grad_norm": 0.07676910609006882, |
|
"learning_rate": 0.00010044025446758381, |
|
"loss": 0.097, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 4.114143358623033, |
|
"grad_norm": 0.08378776907920837, |
|
"learning_rate": 0.00010025681565341091, |
|
"loss": 0.0964, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 4.11867285698109, |
|
"grad_norm": 0.0725962296128273, |
|
"learning_rate": 0.00010007337597504804, |
|
"loss": 0.0982, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 4.123202355339147, |
|
"grad_norm": 0.0860457792878151, |
|
"learning_rate": 9.988993604977352e-05, |
|
"loss": 0.0974, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 4.127731853697203, |
|
"grad_norm": 0.08629846572875977, |
|
"learning_rate": 9.970649649486644e-05, |
|
"loss": 0.0981, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 4.13226135205526, |
|
"grad_norm": 0.08496873825788498, |
|
"learning_rate": 9.952305792760475e-05, |
|
"loss": 0.0991, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 4.136790850413317, |
|
"grad_norm": 0.07953400164842606, |
|
"learning_rate": 9.933962096526302e-05, |
|
"loss": 0.0953, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 4.141320348771374, |
|
"grad_norm": 0.08169267326593399, |
|
"learning_rate": 9.915618622511044e-05, |
|
"loss": 0.0985, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 4.145849847129431, |
|
"grad_norm": 0.09323912113904953, |
|
"learning_rate": 9.897275432440872e-05, |
|
"loss": 0.0955, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 4.1503793454874875, |
|
"grad_norm": 0.07836610078811646, |
|
"learning_rate": 9.878932588040997e-05, |
|
"loss": 0.0983, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 4.154908843845544, |
|
"grad_norm": 0.06795407086610794, |
|
"learning_rate": 9.860590151035473e-05, |
|
"loss": 0.097, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 4.159438342203601, |
|
"grad_norm": 0.082821324467659, |
|
"learning_rate": 9.84224818314698e-05, |
|
"loss": 0.0972, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 4.163967840561658, |
|
"grad_norm": 0.06650907546281815, |
|
"learning_rate": 9.823906746096622e-05, |
|
"loss": 0.0973, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 4.168497338919715, |
|
"grad_norm": 0.07272431999444962, |
|
"learning_rate": 9.805565901603714e-05, |
|
"loss": 0.0974, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 4.173026837277772, |
|
"grad_norm": 0.07406030595302582, |
|
"learning_rate": 9.78722571138558e-05, |
|
"loss": 0.0968, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 4.1775563356358285, |
|
"grad_norm": 0.06534506380558014, |
|
"learning_rate": 9.768886237157337e-05, |
|
"loss": 0.0977, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 4.182085833993885, |
|
"grad_norm": 0.08346185088157654, |
|
"learning_rate": 9.750547540631697e-05, |
|
"loss": 0.0966, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 4.186615332351942, |
|
"grad_norm": 0.0646069347858429, |
|
"learning_rate": 9.732209683518753e-05, |
|
"loss": 0.0957, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 4.191144830709999, |
|
"grad_norm": 0.07642305642366409, |
|
"learning_rate": 9.713872727525778e-05, |
|
"loss": 0.0948, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 4.195674329068056, |
|
"grad_norm": 0.07574049383401871, |
|
"learning_rate": 9.695536734357005e-05, |
|
"loss": 0.0977, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 4.200203827426113, |
|
"grad_norm": 0.08899475634098053, |
|
"learning_rate": 9.677201765713435e-05, |
|
"loss": 0.0979, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 4.2047333257841695, |
|
"grad_norm": 0.07823716104030609, |
|
"learning_rate": 9.658867883292615e-05, |
|
"loss": 0.0986, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 4.209262824142226, |
|
"grad_norm": 0.07970847934484482, |
|
"learning_rate": 9.640535148788443e-05, |
|
"loss": 0.0965, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 4.213792322500283, |
|
"grad_norm": 0.07121343910694122, |
|
"learning_rate": 9.622203623890944e-05, |
|
"loss": 0.098, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 4.21832182085834, |
|
"grad_norm": 0.08438264578580856, |
|
"learning_rate": 9.603873370286083e-05, |
|
"loss": 0.0975, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 4.222851319216397, |
|
"grad_norm": 0.07344311475753784, |
|
"learning_rate": 9.585544449655543e-05, |
|
"loss": 0.0995, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 4.227380817574454, |
|
"grad_norm": 0.08449902385473251, |
|
"learning_rate": 9.567216923676526e-05, |
|
"loss": 0.1, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 4.23191031593251, |
|
"grad_norm": 0.08021081984043121, |
|
"learning_rate": 9.548890854021529e-05, |
|
"loss": 0.0966, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 4.236439814290567, |
|
"grad_norm": 0.08234046399593353, |
|
"learning_rate": 9.530566302358162e-05, |
|
"loss": 0.0948, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 4.240969312648624, |
|
"grad_norm": 0.09645576030015945, |
|
"learning_rate": 9.512243330348917e-05, |
|
"loss": 0.0952, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 4.245498811006681, |
|
"grad_norm": 0.07178854942321777, |
|
"learning_rate": 9.493921999650981e-05, |
|
"loss": 0.0928, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 4.250028309364738, |
|
"grad_norm": 0.08183001726865768, |
|
"learning_rate": 9.475602371916006e-05, |
|
"loss": 0.0969, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 4.254557807722795, |
|
"grad_norm": 0.07914981991052628, |
|
"learning_rate": 9.457284508789922e-05, |
|
"loss": 0.0967, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 4.259087306080851, |
|
"grad_norm": 0.07766249775886536, |
|
"learning_rate": 9.438968471912718e-05, |
|
"loss": 0.0973, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 4.263616804438908, |
|
"grad_norm": 0.06642225384712219, |
|
"learning_rate": 9.420654322918234e-05, |
|
"loss": 0.0972, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 4.268146302796965, |
|
"grad_norm": 0.10396700352430344, |
|
"learning_rate": 9.402342123433968e-05, |
|
"loss": 0.0992, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 4.272675801155022, |
|
"grad_norm": 0.0772017240524292, |
|
"learning_rate": 9.384031935080849e-05, |
|
"loss": 0.0955, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 4.277205299513079, |
|
"grad_norm": 0.08579739928245544, |
|
"learning_rate": 9.365723819473034e-05, |
|
"loss": 0.0999, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 4.2817347978711355, |
|
"grad_norm": 0.07170093059539795, |
|
"learning_rate": 9.347417838217719e-05, |
|
"loss": 0.0978, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 4.286264296229192, |
|
"grad_norm": 0.09926804155111313, |
|
"learning_rate": 9.329114052914905e-05, |
|
"loss": 0.0975, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 4.290793794587249, |
|
"grad_norm": 0.0870131105184555, |
|
"learning_rate": 9.310812525157211e-05, |
|
"loss": 0.0976, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 4.295323292945306, |
|
"grad_norm": 0.09447421133518219, |
|
"learning_rate": 9.29251331652966e-05, |
|
"loss": 0.0978, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 4.299852791303363, |
|
"grad_norm": 0.06886494159698486, |
|
"learning_rate": 9.274216488609465e-05, |
|
"loss": 0.0956, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 4.30438228966142, |
|
"grad_norm": 0.06958340108394623, |
|
"learning_rate": 9.255922102965835e-05, |
|
"loss": 0.0978, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.3089117880194765, |
|
"grad_norm": 0.09395691007375717, |
|
"learning_rate": 9.237630221159751e-05, |
|
"loss": 0.0999, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 4.313441286377533, |
|
"grad_norm": 0.08615806698799133, |
|
"learning_rate": 9.219340904743781e-05, |
|
"loss": 0.0971, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 4.31797078473559, |
|
"grad_norm": 0.09322655200958252, |
|
"learning_rate": 9.201054215261849e-05, |
|
"loss": 0.1008, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 4.322500283093647, |
|
"grad_norm": 0.08992312103509903, |
|
"learning_rate": 9.182770214249046e-05, |
|
"loss": 0.0992, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 4.327029781451705, |
|
"grad_norm": 0.08701404929161072, |
|
"learning_rate": 9.164488963231415e-05, |
|
"loss": 0.0969, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 4.3315592798097615, |
|
"grad_norm": 0.07870589941740036, |
|
"learning_rate": 9.146210523725744e-05, |
|
"loss": 0.0989, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 4.336088778167818, |
|
"grad_norm": 0.061097387224435806, |
|
"learning_rate": 9.127934957239367e-05, |
|
"loss": 0.0986, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 4.340618276525875, |
|
"grad_norm": 0.08281367272138596, |
|
"learning_rate": 9.109662325269932e-05, |
|
"loss": 0.0988, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 4.345147774883932, |
|
"grad_norm": 0.09463726729154587, |
|
"learning_rate": 9.091392689305233e-05, |
|
"loss": 0.0977, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 4.349677273241989, |
|
"grad_norm": 0.07657352089881897, |
|
"learning_rate": 9.073126110822969e-05, |
|
"loss": 0.0995, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 4.354206771600046, |
|
"grad_norm": 0.08821120113134384, |
|
"learning_rate": 9.054862651290559e-05, |
|
"loss": 0.0972, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 4.3587362699581025, |
|
"grad_norm": 0.09997398406267166, |
|
"learning_rate": 9.036602372164922e-05, |
|
"loss": 0.0987, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 4.363265768316159, |
|
"grad_norm": 0.08112788945436478, |
|
"learning_rate": 9.018345334892275e-05, |
|
"loss": 0.0974, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 4.367795266674216, |
|
"grad_norm": 0.07112699747085571, |
|
"learning_rate": 9.000091600907928e-05, |
|
"loss": 0.0977, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 4.372324765032273, |
|
"grad_norm": 0.09066987037658691, |
|
"learning_rate": 8.981841231636073e-05, |
|
"loss": 0.0989, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 4.37685426339033, |
|
"grad_norm": 0.08122070878744125, |
|
"learning_rate": 8.96359428848958e-05, |
|
"loss": 0.0997, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 4.381383761748387, |
|
"grad_norm": 0.08035853505134583, |
|
"learning_rate": 8.945350832869795e-05, |
|
"loss": 0.0979, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 4.3859132601064434, |
|
"grad_norm": 0.07366472482681274, |
|
"learning_rate": 8.927110926166324e-05, |
|
"loss": 0.0969, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 4.3904427584645, |
|
"grad_norm": 0.0794186070561409, |
|
"learning_rate": 8.908874629756827e-05, |
|
"loss": 0.0983, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 4.394972256822557, |
|
"grad_norm": 0.06437776982784271, |
|
"learning_rate": 8.890642005006822e-05, |
|
"loss": 0.0984, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 4.399501755180614, |
|
"grad_norm": 0.07162316143512726, |
|
"learning_rate": 8.872413113269468e-05, |
|
"loss": 0.0975, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 4.404031253538671, |
|
"grad_norm": 0.07623278349637985, |
|
"learning_rate": 8.854188015885368e-05, |
|
"loss": 0.0998, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 4.408560751896728, |
|
"grad_norm": 0.07586734741926193, |
|
"learning_rate": 8.835966774182349e-05, |
|
"loss": 0.0973, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 4.413090250254784, |
|
"grad_norm": 0.0751037672162056, |
|
"learning_rate": 8.817749449475266e-05, |
|
"loss": 0.099, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 4.417619748612841, |
|
"grad_norm": 0.07702226936817169, |
|
"learning_rate": 8.799536103065794e-05, |
|
"loss": 0.098, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 4.422149246970898, |
|
"grad_norm": 0.07942003011703491, |
|
"learning_rate": 8.781326796242222e-05, |
|
"loss": 0.0982, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 4.426678745328955, |
|
"grad_norm": 0.07305794209241867, |
|
"learning_rate": 8.763121590279249e-05, |
|
"loss": 0.0964, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 4.431208243687012, |
|
"grad_norm": 0.07927001267671585, |
|
"learning_rate": 8.744920546437764e-05, |
|
"loss": 0.0985, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 4.4357377420450685, |
|
"grad_norm": 0.08005883544683456, |
|
"learning_rate": 8.726723725964662e-05, |
|
"loss": 0.0996, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 4.440267240403125, |
|
"grad_norm": 0.07482803612947464, |
|
"learning_rate": 8.708531190092619e-05, |
|
"loss": 0.1007, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 4.444796738761182, |
|
"grad_norm": 0.08192785084247589, |
|
"learning_rate": 8.690343000039895e-05, |
|
"loss": 0.1008, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 4.449326237119239, |
|
"grad_norm": 0.07693403214216232, |
|
"learning_rate": 8.67215921701013e-05, |
|
"loss": 0.0982, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 4.453855735477296, |
|
"grad_norm": 0.0875929445028305, |
|
"learning_rate": 8.653979902192125e-05, |
|
"loss": 0.1003, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 4.458385233835353, |
|
"grad_norm": 0.07676168531179428, |
|
"learning_rate": 8.635805116759656e-05, |
|
"loss": 0.0964, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 4.4629147321934095, |
|
"grad_norm": 0.0706658735871315, |
|
"learning_rate": 8.617634921871252e-05, |
|
"loss": 0.0996, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 4.467444230551466, |
|
"grad_norm": 0.08421318978071213, |
|
"learning_rate": 8.599469378669997e-05, |
|
"loss": 0.1004, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 4.471973728909523, |
|
"grad_norm": 0.06626369804143906, |
|
"learning_rate": 8.581308548283313e-05, |
|
"loss": 0.0961, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 4.47650322726758, |
|
"grad_norm": 0.10955769568681717, |
|
"learning_rate": 8.563152491822777e-05, |
|
"loss": 0.0989, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 4.481032725625637, |
|
"grad_norm": 0.07062443345785141, |
|
"learning_rate": 8.545001270383896e-05, |
|
"loss": 0.0996, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 4.485562223983694, |
|
"grad_norm": 0.09103110432624817, |
|
"learning_rate": 8.526854945045903e-05, |
|
"loss": 0.0969, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 4.4900917223417505, |
|
"grad_norm": 0.08335482329130173, |
|
"learning_rate": 8.508713576871564e-05, |
|
"loss": 0.0988, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 4.494621220699807, |
|
"grad_norm": 0.08251272886991501, |
|
"learning_rate": 8.490577226906952e-05, |
|
"loss": 0.1002, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 4.499150719057864, |
|
"grad_norm": 0.0790376290678978, |
|
"learning_rate": 8.472445956181266e-05, |
|
"loss": 0.0959, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 4.503680217415921, |
|
"grad_norm": 0.07596680521965027, |
|
"learning_rate": 8.454319825706607e-05, |
|
"loss": 0.0957, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 4.508209715773978, |
|
"grad_norm": 0.07809595763683319, |
|
"learning_rate": 8.436198896477777e-05, |
|
"loss": 0.0966, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 4.512739214132035, |
|
"grad_norm": 0.0959998071193695, |
|
"learning_rate": 8.418083229472081e-05, |
|
"loss": 0.0983, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 4.517268712490091, |
|
"grad_norm": 0.0705457404255867, |
|
"learning_rate": 8.399972885649115e-05, |
|
"loss": 0.0985, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 4.521798210848148, |
|
"grad_norm": 0.07132048159837723, |
|
"learning_rate": 8.381867925950558e-05, |
|
"loss": 0.0966, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 4.526327709206205, |
|
"grad_norm": 0.08615089952945709, |
|
"learning_rate": 8.363768411299978e-05, |
|
"loss": 0.097, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 4.530857207564262, |
|
"grad_norm": 0.07540059089660645, |
|
"learning_rate": 8.345674402602617e-05, |
|
"loss": 0.1016, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.535386705922319, |
|
"grad_norm": 0.0691477432847023, |
|
"learning_rate": 8.32758596074519e-05, |
|
"loss": 0.1008, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 4.539916204280376, |
|
"grad_norm": 0.07377701252698898, |
|
"learning_rate": 8.309503146595674e-05, |
|
"loss": 0.0995, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 4.544445702638432, |
|
"grad_norm": 0.06582989543676376, |
|
"learning_rate": 8.291426021003117e-05, |
|
"loss": 0.0974, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 4.548975200996489, |
|
"grad_norm": 0.07520575076341629, |
|
"learning_rate": 8.273354644797421e-05, |
|
"loss": 0.0995, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 4.553504699354546, |
|
"grad_norm": 0.0851583182811737, |
|
"learning_rate": 8.255289078789141e-05, |
|
"loss": 0.097, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 4.558034197712603, |
|
"grad_norm": 0.08124125748872757, |
|
"learning_rate": 8.237229383769283e-05, |
|
"loss": 0.1001, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 4.56256369607066, |
|
"grad_norm": 0.08267924189567566, |
|
"learning_rate": 8.219175620509092e-05, |
|
"loss": 0.0969, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 4.5670931944287165, |
|
"grad_norm": 0.07254312187433243, |
|
"learning_rate": 8.201127849759861e-05, |
|
"loss": 0.0993, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 4.571622692786774, |
|
"grad_norm": 0.08983401954174042, |
|
"learning_rate": 8.183086132252706e-05, |
|
"loss": 0.1003, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 4.576152191144831, |
|
"grad_norm": 0.06914500892162323, |
|
"learning_rate": 8.165050528698385e-05, |
|
"loss": 0.1002, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 4.580681689502888, |
|
"grad_norm": 0.06419195234775543, |
|
"learning_rate": 8.147021099787075e-05, |
|
"loss": 0.099, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 4.585211187860945, |
|
"grad_norm": 0.0637657642364502, |
|
"learning_rate": 8.12899790618818e-05, |
|
"loss": 0.0986, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 4.5897406862190016, |
|
"grad_norm": 0.06946605443954468, |
|
"learning_rate": 8.11098100855012e-05, |
|
"loss": 0.1003, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 4.594270184577058, |
|
"grad_norm": 0.06739254295825958, |
|
"learning_rate": 8.092970467500129e-05, |
|
"loss": 0.1002, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 4.598799682935115, |
|
"grad_norm": 0.058849554508924484, |
|
"learning_rate": 8.074966343644056e-05, |
|
"loss": 0.0991, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 4.603329181293172, |
|
"grad_norm": 0.07838159799575806, |
|
"learning_rate": 8.056968697566141e-05, |
|
"loss": 0.0986, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 4.607858679651229, |
|
"grad_norm": 0.06857123970985413, |
|
"learning_rate": 8.038977589828841e-05, |
|
"loss": 0.0995, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 4.612388178009286, |
|
"grad_norm": 0.06318482011556625, |
|
"learning_rate": 8.020993080972607e-05, |
|
"loss": 0.0993, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 4.6169176763673425, |
|
"grad_norm": 0.06283606588840485, |
|
"learning_rate": 8.003015231515683e-05, |
|
"loss": 0.0986, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 4.621447174725399, |
|
"grad_norm": 0.07274708896875381, |
|
"learning_rate": 7.985044101953905e-05, |
|
"loss": 0.0967, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 4.625976673083456, |
|
"grad_norm": 0.0730716809630394, |
|
"learning_rate": 7.967079752760498e-05, |
|
"loss": 0.0998, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 4.630506171441513, |
|
"grad_norm": 0.08666019141674042, |
|
"learning_rate": 7.949122244385869e-05, |
|
"loss": 0.0997, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 4.63503566979957, |
|
"grad_norm": 0.07280432432889938, |
|
"learning_rate": 7.931171637257407e-05, |
|
"loss": 0.098, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 4.639565168157627, |
|
"grad_norm": 0.07623490691184998, |
|
"learning_rate": 7.913227991779275e-05, |
|
"loss": 0.0972, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 4.6440946665156835, |
|
"grad_norm": 0.08786217123270035, |
|
"learning_rate": 7.895291368332213e-05, |
|
"loss": 0.0984, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 4.64862416487374, |
|
"grad_norm": 0.06460744142532349, |
|
"learning_rate": 7.877361827273333e-05, |
|
"loss": 0.1003, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 4.653153663231797, |
|
"grad_norm": 0.0875258669257164, |
|
"learning_rate": 7.859439428935907e-05, |
|
"loss": 0.0973, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 4.657683161589854, |
|
"grad_norm": 0.0640462338924408, |
|
"learning_rate": 7.841524233629182e-05, |
|
"loss": 0.097, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 4.662212659947911, |
|
"grad_norm": 0.08805970847606659, |
|
"learning_rate": 7.823616301638158e-05, |
|
"loss": 0.0977, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 4.666742158305968, |
|
"grad_norm": 0.08403537422418594, |
|
"learning_rate": 7.805715693223403e-05, |
|
"loss": 0.0974, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 4.671271656664024, |
|
"grad_norm": 0.08450974524021149, |
|
"learning_rate": 7.787822468620831e-05, |
|
"loss": 0.0996, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 4.675801155022081, |
|
"grad_norm": 0.06727894395589828, |
|
"learning_rate": 7.76993668804151e-05, |
|
"loss": 0.0968, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 4.680330653380138, |
|
"grad_norm": 0.07860536128282547, |
|
"learning_rate": 7.752058411671469e-05, |
|
"loss": 0.098, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 4.684860151738195, |
|
"grad_norm": 0.0783989354968071, |
|
"learning_rate": 7.734187699671475e-05, |
|
"loss": 0.1001, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 4.689389650096252, |
|
"grad_norm": 0.09318368136882782, |
|
"learning_rate": 7.716324612176848e-05, |
|
"loss": 0.102, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 4.693919148454309, |
|
"grad_norm": 0.06499195098876953, |
|
"learning_rate": 7.698469209297243e-05, |
|
"loss": 0.0972, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 4.698448646812365, |
|
"grad_norm": 0.08642645180225372, |
|
"learning_rate": 7.680621551116464e-05, |
|
"loss": 0.0976, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 4.702978145170422, |
|
"grad_norm": 0.08057048916816711, |
|
"learning_rate": 7.662781697692251e-05, |
|
"loss": 0.1001, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 4.707507643528479, |
|
"grad_norm": 0.07037744671106339, |
|
"learning_rate": 7.644949709056081e-05, |
|
"loss": 0.0954, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 4.712037141886536, |
|
"grad_norm": 0.07643935829401016, |
|
"learning_rate": 7.627125645212962e-05, |
|
"loss": 0.0988, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 4.716566640244593, |
|
"grad_norm": 0.06035691127181053, |
|
"learning_rate": 7.609309566141242e-05, |
|
"loss": 0.0951, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 4.7210961386026495, |
|
"grad_norm": 0.06654711812734604, |
|
"learning_rate": 7.591501531792394e-05, |
|
"loss": 0.0978, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 4.725625636960706, |
|
"grad_norm": 0.0829191505908966, |
|
"learning_rate": 7.573701602090826e-05, |
|
"loss": 0.0974, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 4.730155135318763, |
|
"grad_norm": 0.06532509624958038, |
|
"learning_rate": 7.555909836933668e-05, |
|
"loss": 0.1, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 4.73468463367682, |
|
"grad_norm": 0.07426194101572037, |
|
"learning_rate": 7.538126296190578e-05, |
|
"loss": 0.0978, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 4.739214132034877, |
|
"grad_norm": 0.07493621110916138, |
|
"learning_rate": 7.520351039703539e-05, |
|
"loss": 0.0982, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 4.743743630392934, |
|
"grad_norm": 0.07495691627264023, |
|
"learning_rate": 7.50258412728666e-05, |
|
"loss": 0.0988, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 4.7482731287509905, |
|
"grad_norm": 0.08136378973722458, |
|
"learning_rate": 7.484825618725968e-05, |
|
"loss": 0.097, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 4.752802627109047, |
|
"grad_norm": 0.06776054948568344, |
|
"learning_rate": 7.467075573779215e-05, |
|
"loss": 0.099, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 4.757332125467104, |
|
"grad_norm": 0.06532083451747894, |
|
"learning_rate": 7.449334052175665e-05, |
|
"loss": 0.1008, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.761861623825161, |
|
"grad_norm": 0.08907100558280945, |
|
"learning_rate": 7.431601113615909e-05, |
|
"loss": 0.0995, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 4.766391122183219, |
|
"grad_norm": 0.07240644842386246, |
|
"learning_rate": 7.413876817771655e-05, |
|
"loss": 0.0998, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 4.7709206205412755, |
|
"grad_norm": 0.07485652714967728, |
|
"learning_rate": 7.396161224285521e-05, |
|
"loss": 0.0964, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 4.775450118899332, |
|
"grad_norm": 0.07228762656450272, |
|
"learning_rate": 7.378454392770851e-05, |
|
"loss": 0.0999, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 4.779979617257389, |
|
"grad_norm": 0.08463383466005325, |
|
"learning_rate": 7.360756382811498e-05, |
|
"loss": 0.0962, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 4.784509115615446, |
|
"grad_norm": 0.08021671324968338, |
|
"learning_rate": 7.343067253961633e-05, |
|
"loss": 0.0982, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 4.789038613973503, |
|
"grad_norm": 0.0640299916267395, |
|
"learning_rate": 7.325387065745542e-05, |
|
"loss": 0.0987, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 4.79356811233156, |
|
"grad_norm": 0.08146077394485474, |
|
"learning_rate": 7.307715877657428e-05, |
|
"loss": 0.1004, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 4.7980976106896165, |
|
"grad_norm": 0.0729324147105217, |
|
"learning_rate": 7.290053749161197e-05, |
|
"loss": 0.098, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 4.802627109047673, |
|
"grad_norm": 0.08027558028697968, |
|
"learning_rate": 7.272400739690281e-05, |
|
"loss": 0.1003, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 4.80715660740573, |
|
"grad_norm": 0.07233118265867233, |
|
"learning_rate": 7.254756908647424e-05, |
|
"loss": 0.0969, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 4.811686105763787, |
|
"grad_norm": 0.08703139424324036, |
|
"learning_rate": 7.237122315404483e-05, |
|
"loss": 0.0978, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 4.816215604121844, |
|
"grad_norm": 0.09773527085781097, |
|
"learning_rate": 7.219497019302231e-05, |
|
"loss": 0.1006, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 4.820745102479901, |
|
"grad_norm": 0.07498451322317123, |
|
"learning_rate": 7.201881079650153e-05, |
|
"loss": 0.0953, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 4.8252746008379575, |
|
"grad_norm": 0.08071410655975342, |
|
"learning_rate": 7.184274555726251e-05, |
|
"loss": 0.0997, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 4.829804099196014, |
|
"grad_norm": 0.09239617735147476, |
|
"learning_rate": 7.166677506776847e-05, |
|
"loss": 0.0966, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 4.834333597554071, |
|
"grad_norm": 0.06160885840654373, |
|
"learning_rate": 7.149089992016369e-05, |
|
"loss": 0.0996, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 4.838863095912128, |
|
"grad_norm": 0.06242508441209793, |
|
"learning_rate": 7.131512070627174e-05, |
|
"loss": 0.0971, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 4.843392594270185, |
|
"grad_norm": 0.07087717205286026, |
|
"learning_rate": 7.113943801759328e-05, |
|
"loss": 0.0981, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 4.847922092628242, |
|
"grad_norm": 0.09145446121692657, |
|
"learning_rate": 7.096385244530421e-05, |
|
"loss": 0.1018, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 4.852451590986298, |
|
"grad_norm": 0.06915028393268585, |
|
"learning_rate": 7.078836458025367e-05, |
|
"loss": 0.0975, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 4.856981089344355, |
|
"grad_norm": 0.0731835886836052, |
|
"learning_rate": 7.06129750129619e-05, |
|
"loss": 0.0983, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 4.861510587702412, |
|
"grad_norm": 0.07754811644554138, |
|
"learning_rate": 7.043768433361848e-05, |
|
"loss": 0.0987, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 4.866040086060469, |
|
"grad_norm": 0.07234437018632889, |
|
"learning_rate": 7.026249313208013e-05, |
|
"loss": 0.0999, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 4.870569584418526, |
|
"grad_norm": 0.06629019230604172, |
|
"learning_rate": 7.008740199786891e-05, |
|
"loss": 0.0982, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 4.8750990827765825, |
|
"grad_norm": 0.07004278153181076, |
|
"learning_rate": 6.991241152017009e-05, |
|
"loss": 0.0984, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 4.879628581134639, |
|
"grad_norm": 0.07674950361251831, |
|
"learning_rate": 6.973752228783028e-05, |
|
"loss": 0.0967, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 4.884158079492696, |
|
"grad_norm": 0.08505762368440628, |
|
"learning_rate": 6.956273488935537e-05, |
|
"loss": 0.1013, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 4.888687577850753, |
|
"grad_norm": 0.07949452847242355, |
|
"learning_rate": 6.938804991290856e-05, |
|
"loss": 0.0985, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 4.89321707620881, |
|
"grad_norm": 0.08295728266239166, |
|
"learning_rate": 6.921346794630843e-05, |
|
"loss": 0.0989, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 4.897746574566867, |
|
"grad_norm": 0.06370176374912262, |
|
"learning_rate": 6.903898957702694e-05, |
|
"loss": 0.0973, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 4.9022760729249235, |
|
"grad_norm": 0.07928381115198135, |
|
"learning_rate": 6.886461539218739e-05, |
|
"loss": 0.0997, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 4.90680557128298, |
|
"grad_norm": 0.07781045138835907, |
|
"learning_rate": 6.870776818850459e-05, |
|
"loss": 0.1002, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 4.911335069641037, |
|
"grad_norm": 0.06968411058187485, |
|
"learning_rate": 6.853359357037234e-05, |
|
"loss": 0.0967, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 4.915864567999094, |
|
"grad_norm": 0.08793435990810394, |
|
"learning_rate": 6.835952483735004e-05, |
|
"loss": 0.0985, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 4.920394066357151, |
|
"grad_norm": 0.07273527979850769, |
|
"learning_rate": 6.818556257518263e-05, |
|
"loss": 0.1007, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 4.924923564715208, |
|
"grad_norm": 0.0791454091668129, |
|
"learning_rate": 6.80117073692567e-05, |
|
"loss": 0.0966, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 4.9294530630732645, |
|
"grad_norm": 0.07608039677143097, |
|
"learning_rate": 6.783795980459867e-05, |
|
"loss": 0.1012, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 4.933982561431321, |
|
"grad_norm": 0.07776329666376114, |
|
"learning_rate": 6.766432046587266e-05, |
|
"loss": 0.1003, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 4.938512059789378, |
|
"grad_norm": 0.0679519921541214, |
|
"learning_rate": 6.749078993737871e-05, |
|
"loss": 0.0991, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 4.943041558147435, |
|
"grad_norm": 0.07100383937358856, |
|
"learning_rate": 6.731736880305054e-05, |
|
"loss": 0.0988, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 4.947571056505492, |
|
"grad_norm": 0.0812440738081932, |
|
"learning_rate": 6.714405764645391e-05, |
|
"loss": 0.0998, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 4.952100554863549, |
|
"grad_norm": 0.07612130790948868, |
|
"learning_rate": 6.697085705078447e-05, |
|
"loss": 0.1007, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 4.956630053221605, |
|
"grad_norm": 0.112273670732975, |
|
"learning_rate": 6.679776759886581e-05, |
|
"loss": 0.0987, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 4.961159551579662, |
|
"grad_norm": 0.07123211026191711, |
|
"learning_rate": 6.662478987314751e-05, |
|
"loss": 0.0987, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 4.965689049937719, |
|
"grad_norm": 0.0752432569861412, |
|
"learning_rate": 6.645192445570321e-05, |
|
"loss": 0.0986, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 4.970218548295776, |
|
"grad_norm": 0.08591726422309875, |
|
"learning_rate": 6.627917192822862e-05, |
|
"loss": 0.0987, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 4.974748046653833, |
|
"grad_norm": 0.0789419561624527, |
|
"learning_rate": 6.610653287203959e-05, |
|
"loss": 0.1001, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 4.97927754501189, |
|
"grad_norm": 0.07303869724273682, |
|
"learning_rate": 6.593400786807011e-05, |
|
"loss": 0.1005, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 4.983807043369946, |
|
"grad_norm": 0.062059495598077774, |
|
"learning_rate": 6.57615974968704e-05, |
|
"loss": 0.0993, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.988336541728003, |
|
"grad_norm": 0.07526618242263794, |
|
"learning_rate": 6.558930233860497e-05, |
|
"loss": 0.0994, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 4.99286604008606, |
|
"grad_norm": 0.05961596965789795, |
|
"learning_rate": 6.541712297305054e-05, |
|
"loss": 0.0994, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 4.997395538444117, |
|
"grad_norm": 0.08421042561531067, |
|
"learning_rate": 6.524505997959425e-05, |
|
"loss": 0.0992, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 4.999660287623145, |
|
"eval_loss": 0.1612485647201538, |
|
"eval_runtime": 617.4712, |
|
"eval_samples_per_second": 12.746, |
|
"eval_steps_per_second": 1.594, |
|
"step": 11035 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 17656, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.476002265936691e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|