|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9987217724755006, |
|
"eval_steps": 500, |
|
"global_step": 2346, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008521516829995739, |
|
"grad_norm": 0.6813573837280273, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7024, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.017043033659991477, |
|
"grad_norm": 0.6651060581207275, |
|
"learning_rate": 4.978595890410959e-05, |
|
"loss": 1.4561, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02556455048998722, |
|
"grad_norm": 1.1946938037872314, |
|
"learning_rate": 4.957191780821918e-05, |
|
"loss": 1.4456, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.034086067319982954, |
|
"grad_norm": 1.7951576709747314, |
|
"learning_rate": 4.9357876712328774e-05, |
|
"loss": 1.7177, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04260758414997869, |
|
"grad_norm": 2.754934549331665, |
|
"learning_rate": 4.914383561643836e-05, |
|
"loss": 1.9612, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05112910097997444, |
|
"grad_norm": 0.9254423975944519, |
|
"learning_rate": 4.892979452054795e-05, |
|
"loss": 1.3975, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05965061780997018, |
|
"grad_norm": 0.9828574657440186, |
|
"learning_rate": 4.871575342465753e-05, |
|
"loss": 1.2984, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06817213463996591, |
|
"grad_norm": 1.291460394859314, |
|
"learning_rate": 4.850171232876712e-05, |
|
"loss": 1.5636, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07669365146996165, |
|
"grad_norm": 2.218918800354004, |
|
"learning_rate": 4.8287671232876716e-05, |
|
"loss": 1.765, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08521516829995739, |
|
"grad_norm": 2.730905771255493, |
|
"learning_rate": 4.8073630136986304e-05, |
|
"loss": 1.3425, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09373668512995313, |
|
"grad_norm": 0.9995436668395996, |
|
"learning_rate": 4.785958904109589e-05, |
|
"loss": 1.2667, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.10225820195994888, |
|
"grad_norm": 0.877986490726471, |
|
"learning_rate": 4.764554794520548e-05, |
|
"loss": 1.2679, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11077971878994461, |
|
"grad_norm": 1.7854726314544678, |
|
"learning_rate": 4.743150684931507e-05, |
|
"loss": 1.6378, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.11930123561994035, |
|
"grad_norm": 2.3612940311431885, |
|
"learning_rate": 4.7217465753424664e-05, |
|
"loss": 1.7922, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1278227524499361, |
|
"grad_norm": 2.323775053024292, |
|
"learning_rate": 4.700342465753425e-05, |
|
"loss": 1.2947, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13634426927993182, |
|
"grad_norm": 1.2070249319076538, |
|
"learning_rate": 4.678938356164384e-05, |
|
"loss": 1.269, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.14486578610992756, |
|
"grad_norm": 1.1411925554275513, |
|
"learning_rate": 4.657534246575342e-05, |
|
"loss": 1.3021, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1533873029399233, |
|
"grad_norm": 1.3789585828781128, |
|
"learning_rate": 4.636130136986302e-05, |
|
"loss": 1.3572, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.16190881976991905, |
|
"grad_norm": 2.145219326019287, |
|
"learning_rate": 4.6147260273972605e-05, |
|
"loss": 1.6292, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.17043033659991477, |
|
"grad_norm": 3.3627982139587402, |
|
"learning_rate": 4.5933219178082194e-05, |
|
"loss": 1.4358, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.17895185342991052, |
|
"grad_norm": 1.116105318069458, |
|
"learning_rate": 4.571917808219178e-05, |
|
"loss": 1.2568, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.18747337025990626, |
|
"grad_norm": 1.2009385824203491, |
|
"learning_rate": 4.550513698630137e-05, |
|
"loss": 1.1746, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.195994887089902, |
|
"grad_norm": 2.0525238513946533, |
|
"learning_rate": 4.529109589041096e-05, |
|
"loss": 1.5463, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.20451640391989775, |
|
"grad_norm": 2.649242639541626, |
|
"learning_rate": 4.5077054794520553e-05, |
|
"loss": 1.7533, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.21303792074989347, |
|
"grad_norm": 3.1068289279937744, |
|
"learning_rate": 4.486301369863014e-05, |
|
"loss": 1.3994, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.22155943757988922, |
|
"grad_norm": 1.2716392278671265, |
|
"learning_rate": 4.464897260273973e-05, |
|
"loss": 1.2382, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.23008095440988496, |
|
"grad_norm": 1.20390784740448, |
|
"learning_rate": 4.443493150684932e-05, |
|
"loss": 1.1434, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2386024712398807, |
|
"grad_norm": 1.7929986715316772, |
|
"learning_rate": 4.422089041095891e-05, |
|
"loss": 1.5121, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.24712398806987643, |
|
"grad_norm": 2.645399570465088, |
|
"learning_rate": 4.4006849315068495e-05, |
|
"loss": 1.4523, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2556455048998722, |
|
"grad_norm": 3.460209846496582, |
|
"learning_rate": 4.379280821917808e-05, |
|
"loss": 1.3166, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2641670217298679, |
|
"grad_norm": 1.2120425701141357, |
|
"learning_rate": 4.357876712328767e-05, |
|
"loss": 1.2265, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.27268853855986364, |
|
"grad_norm": 1.2804960012435913, |
|
"learning_rate": 4.336472602739726e-05, |
|
"loss": 1.15, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2812100553898594, |
|
"grad_norm": 2.300981044769287, |
|
"learning_rate": 4.3150684931506855e-05, |
|
"loss": 1.5719, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2897315722198551, |
|
"grad_norm": 2.3201756477355957, |
|
"learning_rate": 4.293664383561644e-05, |
|
"loss": 1.5963, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2982530890498509, |
|
"grad_norm": 3.4875683784484863, |
|
"learning_rate": 4.272260273972603e-05, |
|
"loss": 1.1126, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3067746058798466, |
|
"grad_norm": 1.2687772512435913, |
|
"learning_rate": 4.250856164383562e-05, |
|
"loss": 1.1099, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.31529612270984236, |
|
"grad_norm": 1.367256760597229, |
|
"learning_rate": 4.229452054794521e-05, |
|
"loss": 1.1648, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3238176395398381, |
|
"grad_norm": 1.5560370683670044, |
|
"learning_rate": 4.2080479452054796e-05, |
|
"loss": 1.3107, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.33233915636983385, |
|
"grad_norm": 2.6812076568603516, |
|
"learning_rate": 4.1866438356164385e-05, |
|
"loss": 1.4966, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.34086067319982954, |
|
"grad_norm": 4.098489284515381, |
|
"learning_rate": 4.165239726027397e-05, |
|
"loss": 1.2831, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3493821900298253, |
|
"grad_norm": 1.2586500644683838, |
|
"learning_rate": 4.143835616438356e-05, |
|
"loss": 1.152, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.35790370685982104, |
|
"grad_norm": 1.3851690292358398, |
|
"learning_rate": 4.122431506849315e-05, |
|
"loss": 1.2035, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3664252236898168, |
|
"grad_norm": 1.9357365369796753, |
|
"learning_rate": 4.1010273972602745e-05, |
|
"loss": 1.4145, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3749467405198125, |
|
"grad_norm": 2.84523344039917, |
|
"learning_rate": 4.079623287671233e-05, |
|
"loss": 1.5173, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3834682573498083, |
|
"grad_norm": 2.4506237506866455, |
|
"learning_rate": 4.058219178082192e-05, |
|
"loss": 1.1763, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.391989774179804, |
|
"grad_norm": 1.1524243354797363, |
|
"learning_rate": 4.036815068493151e-05, |
|
"loss": 1.316, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.40051129100979976, |
|
"grad_norm": 1.289616346359253, |
|
"learning_rate": 4.01541095890411e-05, |
|
"loss": 1.102, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4090328078397955, |
|
"grad_norm": 1.482630968093872, |
|
"learning_rate": 3.9940068493150686e-05, |
|
"loss": 1.3212, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.4175543246697912, |
|
"grad_norm": 2.3927276134490967, |
|
"learning_rate": 3.9726027397260274e-05, |
|
"loss": 1.6163, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.42607584149978694, |
|
"grad_norm": 3.26775860786438, |
|
"learning_rate": 3.951198630136986e-05, |
|
"loss": 1.1924, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4345973583297827, |
|
"grad_norm": 1.2278869152069092, |
|
"learning_rate": 3.929794520547945e-05, |
|
"loss": 1.1865, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.44311887515977844, |
|
"grad_norm": 1.3176724910736084, |
|
"learning_rate": 3.908390410958904e-05, |
|
"loss": 1.2366, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4516403919897742, |
|
"grad_norm": 4.769649505615234, |
|
"learning_rate": 3.8869863013698634e-05, |
|
"loss": 1.4883, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.4601619088197699, |
|
"grad_norm": 3.0388917922973633, |
|
"learning_rate": 3.865582191780822e-05, |
|
"loss": 1.4851, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4686834256497657, |
|
"grad_norm": 3.100656270980835, |
|
"learning_rate": 3.844178082191781e-05, |
|
"loss": 1.0437, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4772049424797614, |
|
"grad_norm": 3.1867423057556152, |
|
"learning_rate": 3.82277397260274e-05, |
|
"loss": 1.1181, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.48572645930975716, |
|
"grad_norm": 1.2426332235336304, |
|
"learning_rate": 3.801369863013699e-05, |
|
"loss": 1.1024, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.49424797613975285, |
|
"grad_norm": 1.2760354280471802, |
|
"learning_rate": 3.779965753424658e-05, |
|
"loss": 1.1686, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5027694929697486, |
|
"grad_norm": 2.6802961826324463, |
|
"learning_rate": 3.7585616438356164e-05, |
|
"loss": 1.4582, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5112910097997444, |
|
"grad_norm": 3.5295393466949463, |
|
"learning_rate": 3.737157534246575e-05, |
|
"loss": 1.1565, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5198125266297401, |
|
"grad_norm": 1.3644486665725708, |
|
"learning_rate": 3.715753424657534e-05, |
|
"loss": 1.0841, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5283340434597358, |
|
"grad_norm": 1.268506407737732, |
|
"learning_rate": 3.6943493150684936e-05, |
|
"loss": 1.0859, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5368555602897316, |
|
"grad_norm": 2.1879653930664062, |
|
"learning_rate": 3.6729452054794524e-05, |
|
"loss": 1.415, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5453770771197273, |
|
"grad_norm": 2.482619047164917, |
|
"learning_rate": 3.651541095890411e-05, |
|
"loss": 1.5743, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5538985939497231, |
|
"grad_norm": 3.3282759189605713, |
|
"learning_rate": 3.63013698630137e-05, |
|
"loss": 1.203, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5624201107797188, |
|
"grad_norm": 1.288682460784912, |
|
"learning_rate": 3.608732876712329e-05, |
|
"loss": 1.1415, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5709416276097146, |
|
"grad_norm": 1.232367753982544, |
|
"learning_rate": 3.587328767123288e-05, |
|
"loss": 1.0992, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5794631444397103, |
|
"grad_norm": 2.4475719928741455, |
|
"learning_rate": 3.565924657534247e-05, |
|
"loss": 1.6888, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.587984661269706, |
|
"grad_norm": 2.75734543800354, |
|
"learning_rate": 3.5445205479452054e-05, |
|
"loss": 1.479, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5965061780997017, |
|
"grad_norm": 3.1947202682495117, |
|
"learning_rate": 3.523116438356164e-05, |
|
"loss": 1.0327, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6050276949296974, |
|
"grad_norm": 1.2636553049087524, |
|
"learning_rate": 3.501712328767123e-05, |
|
"loss": 1.0756, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6135492117596932, |
|
"grad_norm": 1.2693008184432983, |
|
"learning_rate": 3.4803082191780825e-05, |
|
"loss": 1.2125, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.6220707285896889, |
|
"grad_norm": 2.964484691619873, |
|
"learning_rate": 3.4589041095890414e-05, |
|
"loss": 1.3954, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.6305922454196847, |
|
"grad_norm": 2.6809933185577393, |
|
"learning_rate": 3.4375e-05, |
|
"loss": 1.3599, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6391137622496804, |
|
"grad_norm": 3.059370994567871, |
|
"learning_rate": 3.416095890410959e-05, |
|
"loss": 1.2158, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6476352790796762, |
|
"grad_norm": 1.3807661533355713, |
|
"learning_rate": 3.394691780821918e-05, |
|
"loss": 1.0973, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6561567959096719, |
|
"grad_norm": 1.525608777999878, |
|
"learning_rate": 3.373287671232877e-05, |
|
"loss": 1.1659, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.6646783127396677, |
|
"grad_norm": 2.267289161682129, |
|
"learning_rate": 3.351883561643836e-05, |
|
"loss": 1.6013, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6731998295696634, |
|
"grad_norm": 2.4192111492156982, |
|
"learning_rate": 3.330479452054795e-05, |
|
"loss": 1.5129, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6817213463996591, |
|
"grad_norm": 4.045669078826904, |
|
"learning_rate": 3.309075342465753e-05, |
|
"loss": 1.1527, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6902428632296549, |
|
"grad_norm": 1.4586423635482788, |
|
"learning_rate": 3.287671232876712e-05, |
|
"loss": 1.0746, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6987643800596506, |
|
"grad_norm": 1.3621727228164673, |
|
"learning_rate": 3.2662671232876715e-05, |
|
"loss": 1.0083, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.7072858968896464, |
|
"grad_norm": 2.002037763595581, |
|
"learning_rate": 3.2448630136986303e-05, |
|
"loss": 1.4296, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.7158074137196421, |
|
"grad_norm": 24.786273956298828, |
|
"learning_rate": 3.223458904109589e-05, |
|
"loss": 1.5759, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.7243289305496379, |
|
"grad_norm": 3.176041841506958, |
|
"learning_rate": 3.202054794520548e-05, |
|
"loss": 1.34, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.7328504473796336, |
|
"grad_norm": 1.4174227714538574, |
|
"learning_rate": 3.180650684931507e-05, |
|
"loss": 1.0625, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.7413719642096294, |
|
"grad_norm": 1.2621195316314697, |
|
"learning_rate": 3.1592465753424663e-05, |
|
"loss": 1.0369, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.749893481039625, |
|
"grad_norm": 2.297351360321045, |
|
"learning_rate": 3.137842465753425e-05, |
|
"loss": 1.4817, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.7584149978696207, |
|
"grad_norm": 2.9399614334106445, |
|
"learning_rate": 3.116438356164384e-05, |
|
"loss": 1.4127, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.7669365146996165, |
|
"grad_norm": 3.5914995670318604, |
|
"learning_rate": 3.095034246575342e-05, |
|
"loss": 1.0546, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7754580315296122, |
|
"grad_norm": 1.2652535438537598, |
|
"learning_rate": 3.073630136986301e-05, |
|
"loss": 1.1101, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.783979548359608, |
|
"grad_norm": 1.644640564918518, |
|
"learning_rate": 3.0522260273972605e-05, |
|
"loss": 1.1791, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7925010651896037, |
|
"grad_norm": 2.482304096221924, |
|
"learning_rate": 3.0308219178082193e-05, |
|
"loss": 1.512, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.8010225820195995, |
|
"grad_norm": 2.763471841812134, |
|
"learning_rate": 3.009417808219178e-05, |
|
"loss": 1.477, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.8095440988495952, |
|
"grad_norm": 3.675570011138916, |
|
"learning_rate": 2.988013698630137e-05, |
|
"loss": 1.1064, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.818065615679591, |
|
"grad_norm": 1.302946925163269, |
|
"learning_rate": 2.966609589041096e-05, |
|
"loss": 1.0189, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.8265871325095867, |
|
"grad_norm": 1.492253303527832, |
|
"learning_rate": 2.945205479452055e-05, |
|
"loss": 1.0817, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.8351086493395824, |
|
"grad_norm": 3.072866439819336, |
|
"learning_rate": 2.923801369863014e-05, |
|
"loss": 1.4999, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.8436301661695782, |
|
"grad_norm": 2.7219836711883545, |
|
"learning_rate": 2.902397260273973e-05, |
|
"loss": 1.4183, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.8521516829995739, |
|
"grad_norm": 3.425384044647217, |
|
"learning_rate": 2.8809931506849318e-05, |
|
"loss": 1.0067, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8606731998295697, |
|
"grad_norm": 1.6666113138198853, |
|
"learning_rate": 2.8595890410958903e-05, |
|
"loss": 1.1709, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.8691947166595654, |
|
"grad_norm": 1.3914527893066406, |
|
"learning_rate": 2.838184931506849e-05, |
|
"loss": 1.0248, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.8777162334895612, |
|
"grad_norm": 2.082874298095703, |
|
"learning_rate": 2.8167808219178083e-05, |
|
"loss": 1.2266, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.8862377503195569, |
|
"grad_norm": 2.5057151317596436, |
|
"learning_rate": 2.795376712328767e-05, |
|
"loss": 1.4517, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.8947592671495527, |
|
"grad_norm": 3.3986401557922363, |
|
"learning_rate": 2.7739726027397263e-05, |
|
"loss": 1.1483, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.9032807839795484, |
|
"grad_norm": 1.3847638368606567, |
|
"learning_rate": 2.752568493150685e-05, |
|
"loss": 1.0509, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.911802300809544, |
|
"grad_norm": 1.515759825706482, |
|
"learning_rate": 2.731164383561644e-05, |
|
"loss": 1.1062, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.9203238176395399, |
|
"grad_norm": 1.9089744091033936, |
|
"learning_rate": 2.709760273972603e-05, |
|
"loss": 1.2536, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.9288453344695355, |
|
"grad_norm": 3.3122973442077637, |
|
"learning_rate": 2.688356164383562e-05, |
|
"loss": 1.6613, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.9373668512995313, |
|
"grad_norm": 4.078606128692627, |
|
"learning_rate": 2.6669520547945208e-05, |
|
"loss": 1.1644, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.945888368129527, |
|
"grad_norm": 1.3247941732406616, |
|
"learning_rate": 2.6455479452054793e-05, |
|
"loss": 0.999, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.9544098849595228, |
|
"grad_norm": 1.617491364479065, |
|
"learning_rate": 2.6241438356164384e-05, |
|
"loss": 1.0226, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.9629314017895185, |
|
"grad_norm": 2.2223851680755615, |
|
"learning_rate": 2.6027397260273973e-05, |
|
"loss": 1.5095, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.9714529186195143, |
|
"grad_norm": 2.7325618267059326, |
|
"learning_rate": 2.581335616438356e-05, |
|
"loss": 1.5794, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.97997443544951, |
|
"grad_norm": 3.270749568939209, |
|
"learning_rate": 2.5599315068493153e-05, |
|
"loss": 1.2044, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9884959522795057, |
|
"grad_norm": 1.6444586515426636, |
|
"learning_rate": 2.538527397260274e-05, |
|
"loss": 1.136, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.9970174691095015, |
|
"grad_norm": 3.6478381156921387, |
|
"learning_rate": 2.517123287671233e-05, |
|
"loss": 1.3373, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.0051129100979974, |
|
"grad_norm": 1.398633599281311, |
|
"learning_rate": 2.495719178082192e-05, |
|
"loss": 0.9056, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.013634426927993, |
|
"grad_norm": 1.3859210014343262, |
|
"learning_rate": 2.4743150684931506e-05, |
|
"loss": 0.9212, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.022155943757989, |
|
"grad_norm": 2.0104875564575195, |
|
"learning_rate": 2.4529109589041097e-05, |
|
"loss": 1.0748, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.0306774605879847, |
|
"grad_norm": 3.378269672393799, |
|
"learning_rate": 2.4315068493150686e-05, |
|
"loss": 1.1882, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.0391989774179804, |
|
"grad_norm": 3.0901708602905273, |
|
"learning_rate": 2.4101027397260274e-05, |
|
"loss": 0.8874, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.047720494247976, |
|
"grad_norm": 1.6910735368728638, |
|
"learning_rate": 2.3886986301369866e-05, |
|
"loss": 0.888, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.0562420110779718, |
|
"grad_norm": 1.5407036542892456, |
|
"learning_rate": 2.367294520547945e-05, |
|
"loss": 0.9438, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.0647635279079677, |
|
"grad_norm": 1.9005461931228638, |
|
"learning_rate": 2.3458904109589042e-05, |
|
"loss": 0.9209, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.0732850447379634, |
|
"grad_norm": 2.394400119781494, |
|
"learning_rate": 2.324486301369863e-05, |
|
"loss": 1.2207, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.081806561567959, |
|
"grad_norm": 2.9217231273651123, |
|
"learning_rate": 2.3030821917808222e-05, |
|
"loss": 1.0554, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.0903280783979548, |
|
"grad_norm": 1.6549851894378662, |
|
"learning_rate": 2.281678082191781e-05, |
|
"loss": 0.9341, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.0988495952279507, |
|
"grad_norm": 1.7708053588867188, |
|
"learning_rate": 2.2602739726027396e-05, |
|
"loss": 0.9768, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.1073711120579464, |
|
"grad_norm": 1.953326940536499, |
|
"learning_rate": 2.2388698630136987e-05, |
|
"loss": 0.9628, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.115892628887942, |
|
"grad_norm": 3.4446678161621094, |
|
"learning_rate": 2.2174657534246575e-05, |
|
"loss": 1.2085, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.1244141457179377, |
|
"grad_norm": 3.5323126316070557, |
|
"learning_rate": 2.1960616438356167e-05, |
|
"loss": 1.124, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.1329356625479337, |
|
"grad_norm": 1.861324429512024, |
|
"learning_rate": 2.1746575342465755e-05, |
|
"loss": 0.8991, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.1414571793779293, |
|
"grad_norm": 1.6703088283538818, |
|
"learning_rate": 2.1532534246575344e-05, |
|
"loss": 0.9776, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.149978696207925, |
|
"grad_norm": 1.834876537322998, |
|
"learning_rate": 2.1318493150684932e-05, |
|
"loss": 0.9773, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.1585002130379207, |
|
"grad_norm": 3.600705862045288, |
|
"learning_rate": 2.110445205479452e-05, |
|
"loss": 1.2392, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.1670217298679164, |
|
"grad_norm": 3.9731180667877197, |
|
"learning_rate": 2.0890410958904112e-05, |
|
"loss": 0.9947, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.1755432466979123, |
|
"grad_norm": 2.0116519927978516, |
|
"learning_rate": 2.06763698630137e-05, |
|
"loss": 0.9025, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.184064763527908, |
|
"grad_norm": 1.8098217248916626, |
|
"learning_rate": 2.046232876712329e-05, |
|
"loss": 0.8956, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.1925862803579037, |
|
"grad_norm": 2.4669318199157715, |
|
"learning_rate": 2.0248287671232877e-05, |
|
"loss": 1.0979, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.2011077971878994, |
|
"grad_norm": 3.1824028491973877, |
|
"learning_rate": 2.0034246575342465e-05, |
|
"loss": 1.2918, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.2096293140178953, |
|
"grad_norm": 3.60017991065979, |
|
"learning_rate": 1.9820205479452057e-05, |
|
"loss": 0.9279, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.218150830847891, |
|
"grad_norm": 6.929809093475342, |
|
"learning_rate": 1.9606164383561645e-05, |
|
"loss": 0.8606, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.2266723476778867, |
|
"grad_norm": 2.342393636703491, |
|
"learning_rate": 1.9392123287671233e-05, |
|
"loss": 0.963, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.2351938645078824, |
|
"grad_norm": 2.6814820766448975, |
|
"learning_rate": 1.9178082191780822e-05, |
|
"loss": 1.0806, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.243715381337878, |
|
"grad_norm": 4.10792350769043, |
|
"learning_rate": 1.896404109589041e-05, |
|
"loss": 1.1623, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.2522368981678738, |
|
"grad_norm": 3.0713534355163574, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.8909, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.2607584149978697, |
|
"grad_norm": 1.9831465482711792, |
|
"learning_rate": 1.853595890410959e-05, |
|
"loss": 0.8631, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.2692799318278654, |
|
"grad_norm": 1.8906506299972534, |
|
"learning_rate": 1.832191780821918e-05, |
|
"loss": 0.9484, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.277801448657861, |
|
"grad_norm": 2.1991984844207764, |
|
"learning_rate": 1.8107876712328767e-05, |
|
"loss": 1.0584, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.286322965487857, |
|
"grad_norm": 3.851630449295044, |
|
"learning_rate": 1.7893835616438355e-05, |
|
"loss": 1.3179, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.2948444823178527, |
|
"grad_norm": 4.507850646972656, |
|
"learning_rate": 1.7679794520547947e-05, |
|
"loss": 1.0782, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.3033659991478483, |
|
"grad_norm": 1.7349963188171387, |
|
"learning_rate": 1.7465753424657535e-05, |
|
"loss": 0.8217, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.311887515977844, |
|
"grad_norm": 1.8865768909454346, |
|
"learning_rate": 1.7251712328767127e-05, |
|
"loss": 0.9012, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.3204090328078397, |
|
"grad_norm": 1.9257206916809082, |
|
"learning_rate": 1.703767123287671e-05, |
|
"loss": 0.8876, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.3289305496378354, |
|
"grad_norm": 3.2287306785583496, |
|
"learning_rate": 1.68236301369863e-05, |
|
"loss": 1.2605, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.3374520664678313, |
|
"grad_norm": 3.4038004875183105, |
|
"learning_rate": 1.660958904109589e-05, |
|
"loss": 0.9881, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.345973583297827, |
|
"grad_norm": 1.9077774286270142, |
|
"learning_rate": 1.639554794520548e-05, |
|
"loss": 0.913, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.3544951001278227, |
|
"grad_norm": 2.548600196838379, |
|
"learning_rate": 1.618150684931507e-05, |
|
"loss": 0.9404, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.3630166169578186, |
|
"grad_norm": 2.9931490421295166, |
|
"learning_rate": 1.596746575342466e-05, |
|
"loss": 1.0795, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.3715381337878143, |
|
"grad_norm": 4.5465216636657715, |
|
"learning_rate": 1.5753424657534248e-05, |
|
"loss": 1.2289, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.38005965061781, |
|
"grad_norm": 4.3074631690979, |
|
"learning_rate": 1.5539383561643836e-05, |
|
"loss": 0.8165, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.3885811674478057, |
|
"grad_norm": 1.9459553956985474, |
|
"learning_rate": 1.5325342465753425e-05, |
|
"loss": 0.8087, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.3971026842778014, |
|
"grad_norm": 1.8323142528533936, |
|
"learning_rate": 1.5111301369863015e-05, |
|
"loss": 0.8715, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.405624201107797, |
|
"grad_norm": 2.3462753295898438, |
|
"learning_rate": 1.4897260273972605e-05, |
|
"loss": 0.8655, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.414145717937793, |
|
"grad_norm": 3.0974302291870117, |
|
"learning_rate": 1.4683219178082191e-05, |
|
"loss": 1.1704, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.4226672347677887, |
|
"grad_norm": 3.1823673248291016, |
|
"learning_rate": 1.4469178082191781e-05, |
|
"loss": 0.9444, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.4311887515977844, |
|
"grad_norm": 2.3283894062042236, |
|
"learning_rate": 1.4255136986301371e-05, |
|
"loss": 0.8847, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.4397102684277803, |
|
"grad_norm": 1.8633939027786255, |
|
"learning_rate": 1.404109589041096e-05, |
|
"loss": 0.8843, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.448231785257776, |
|
"grad_norm": 2.870725393295288, |
|
"learning_rate": 1.382705479452055e-05, |
|
"loss": 0.9102, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.4567533020877717, |
|
"grad_norm": 3.6491854190826416, |
|
"learning_rate": 1.3613013698630136e-05, |
|
"loss": 1.1743, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.4652748189177673, |
|
"grad_norm": 4.350872039794922, |
|
"learning_rate": 1.3398972602739726e-05, |
|
"loss": 0.9763, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.473796335747763, |
|
"grad_norm": 2.080191135406494, |
|
"learning_rate": 1.3184931506849316e-05, |
|
"loss": 0.8528, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.4823178525777587, |
|
"grad_norm": 2.2544877529144287, |
|
"learning_rate": 1.2970890410958906e-05, |
|
"loss": 0.8987, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.4908393694077546, |
|
"grad_norm": 2.3497424125671387, |
|
"learning_rate": 1.2756849315068494e-05, |
|
"loss": 1.0171, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.4993608862377503, |
|
"grad_norm": 3.6866824626922607, |
|
"learning_rate": 1.2542808219178081e-05, |
|
"loss": 1.1774, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.507882403067746, |
|
"grad_norm": 4.5758891105651855, |
|
"learning_rate": 1.2328767123287671e-05, |
|
"loss": 0.9464, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.516403919897742, |
|
"grad_norm": 2.159677743911743, |
|
"learning_rate": 1.2114726027397261e-05, |
|
"loss": 0.8089, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.5249254367277376, |
|
"grad_norm": 2.345613479614258, |
|
"learning_rate": 1.1900684931506851e-05, |
|
"loss": 0.9699, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.5334469535577333, |
|
"grad_norm": 2.187382936477661, |
|
"learning_rate": 1.168664383561644e-05, |
|
"loss": 1.0533, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.541968470387729, |
|
"grad_norm": 6.11014986038208, |
|
"learning_rate": 1.1472602739726027e-05, |
|
"loss": 1.3963, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.5504899872177247, |
|
"grad_norm": 4.699113368988037, |
|
"learning_rate": 1.1258561643835617e-05, |
|
"loss": 0.8968, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.5590115040477204, |
|
"grad_norm": 1.8584600687026978, |
|
"learning_rate": 1.1044520547945206e-05, |
|
"loss": 0.8661, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.5675330208777163, |
|
"grad_norm": 2.1716010570526123, |
|
"learning_rate": 1.0830479452054796e-05, |
|
"loss": 0.9446, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.576054537707712, |
|
"grad_norm": 2.649498701095581, |
|
"learning_rate": 1.0616438356164384e-05, |
|
"loss": 0.9392, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.5845760545377077, |
|
"grad_norm": 5.051261901855469, |
|
"learning_rate": 1.0402397260273972e-05, |
|
"loss": 1.4026, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.5930975713677036, |
|
"grad_norm": 4.0868425369262695, |
|
"learning_rate": 1.0188356164383562e-05, |
|
"loss": 1.0371, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.6016190881976993, |
|
"grad_norm": 2.242595672607422, |
|
"learning_rate": 9.97431506849315e-06, |
|
"loss": 0.7793, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.610140605027695, |
|
"grad_norm": 2.0395429134368896, |
|
"learning_rate": 9.76027397260274e-06, |
|
"loss": 0.9659, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.6186621218576907, |
|
"grad_norm": 3.6967060565948486, |
|
"learning_rate": 9.54623287671233e-06, |
|
"loss": 1.3609, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.6271836386876863, |
|
"grad_norm": 3.8495938777923584, |
|
"learning_rate": 9.332191780821919e-06, |
|
"loss": 0.9838, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.635705155517682, |
|
"grad_norm": 4.630374431610107, |
|
"learning_rate": 9.118150684931507e-06, |
|
"loss": 0.9471, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.6442266723476777, |
|
"grad_norm": 2.2313318252563477, |
|
"learning_rate": 8.904109589041095e-06, |
|
"loss": 0.8377, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.6527481891776736, |
|
"grad_norm": 2.623538017272949, |
|
"learning_rate": 8.690068493150685e-06, |
|
"loss": 0.9168, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.6612697060076693, |
|
"grad_norm": 2.4369919300079346, |
|
"learning_rate": 8.476027397260275e-06, |
|
"loss": 1.1145, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.6697912228376652, |
|
"grad_norm": 3.5387771129608154, |
|
"learning_rate": 8.261986301369864e-06, |
|
"loss": 1.2709, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.678312739667661, |
|
"grad_norm": 3.519103527069092, |
|
"learning_rate": 8.047945205479452e-06, |
|
"loss": 0.8001, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.6868342564976566, |
|
"grad_norm": 2.0191633701324463, |
|
"learning_rate": 7.83390410958904e-06, |
|
"loss": 0.9001, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.6953557733276523, |
|
"grad_norm": 2.1604294776916504, |
|
"learning_rate": 7.61986301369863e-06, |
|
"loss": 0.8404, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.703877290157648, |
|
"grad_norm": 2.4887239933013916, |
|
"learning_rate": 7.40582191780822e-06, |
|
"loss": 1.0109, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.7123988069876437, |
|
"grad_norm": 3.2208356857299805, |
|
"learning_rate": 7.191780821917809e-06, |
|
"loss": 1.135, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.7209203238176394, |
|
"grad_norm": 3.4077229499816895, |
|
"learning_rate": 6.977739726027398e-06, |
|
"loss": 1.0031, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.7294418406476353, |
|
"grad_norm": 1.9562416076660156, |
|
"learning_rate": 6.763698630136987e-06, |
|
"loss": 0.8598, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.737963357477631, |
|
"grad_norm": 2.2411820888519287, |
|
"learning_rate": 6.549657534246575e-06, |
|
"loss": 0.8489, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.7464848743076269, |
|
"grad_norm": 3.026580333709717, |
|
"learning_rate": 6.335616438356165e-06, |
|
"loss": 1.0326, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.7550063911376226, |
|
"grad_norm": 4.257705211639404, |
|
"learning_rate": 6.121575342465754e-06, |
|
"loss": 1.2792, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.7635279079676183, |
|
"grad_norm": 2.8991031646728516, |
|
"learning_rate": 5.907534246575343e-06, |
|
"loss": 0.9655, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.772049424797614, |
|
"grad_norm": 2.1200027465820312, |
|
"learning_rate": 5.693493150684932e-06, |
|
"loss": 0.9003, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.7805709416276096, |
|
"grad_norm": 2.8092291355133057, |
|
"learning_rate": 5.479452054794521e-06, |
|
"loss": 0.9486, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.7890924584576053, |
|
"grad_norm": 2.481590747833252, |
|
"learning_rate": 5.26541095890411e-06, |
|
"loss": 0.8927, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.797613975287601, |
|
"grad_norm": 4.080635070800781, |
|
"learning_rate": 5.051369863013699e-06, |
|
"loss": 1.0988, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.806135492117597, |
|
"grad_norm": 5.39976692199707, |
|
"learning_rate": 4.8373287671232874e-06, |
|
"loss": 0.923, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.8146570089475926, |
|
"grad_norm": 2.044147253036499, |
|
"learning_rate": 4.623287671232877e-06, |
|
"loss": 0.914, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.8231785257775885, |
|
"grad_norm": 2.0422821044921875, |
|
"learning_rate": 4.4092465753424666e-06, |
|
"loss": 0.8518, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.8317000426075842, |
|
"grad_norm": 2.9914565086364746, |
|
"learning_rate": 4.195205479452055e-06, |
|
"loss": 1.1411, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.84022155943758, |
|
"grad_norm": 3.7009713649749756, |
|
"learning_rate": 3.981164383561644e-06, |
|
"loss": 1.1454, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.8487430762675756, |
|
"grad_norm": 5.263967514038086, |
|
"learning_rate": 3.7671232876712327e-06, |
|
"loss": 0.8432, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.8572645930975713, |
|
"grad_norm": 2.337414026260376, |
|
"learning_rate": 3.5530821917808223e-06, |
|
"loss": 0.8604, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.865786109927567, |
|
"grad_norm": 2.2294089794158936, |
|
"learning_rate": 3.3390410958904114e-06, |
|
"loss": 0.933, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.8743076267575627, |
|
"grad_norm": 2.332831621170044, |
|
"learning_rate": 3.125e-06, |
|
"loss": 0.9133, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.8828291435875586, |
|
"grad_norm": 3.756347179412842, |
|
"learning_rate": 2.910958904109589e-06, |
|
"loss": 1.3043, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.8913506604175543, |
|
"grad_norm": 4.380275249481201, |
|
"learning_rate": 2.6969178082191784e-06, |
|
"loss": 0.9552, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.8998721772475502, |
|
"grad_norm": 2.2383973598480225, |
|
"learning_rate": 2.482876712328767e-06, |
|
"loss": 0.8783, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.9083936940775459, |
|
"grad_norm": 2.0851542949676514, |
|
"learning_rate": 2.2688356164383563e-06, |
|
"loss": 0.8612, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.9169152109075416, |
|
"grad_norm": 2.3459975719451904, |
|
"learning_rate": 2.054794520547945e-06, |
|
"loss": 0.9008, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.9254367277375373, |
|
"grad_norm": 4.169739246368408, |
|
"learning_rate": 1.8407534246575344e-06, |
|
"loss": 1.2114, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.933958244567533, |
|
"grad_norm": 4.250594615936279, |
|
"learning_rate": 1.6267123287671233e-06, |
|
"loss": 1.0262, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.9424797613975286, |
|
"grad_norm": 2.187901020050049, |
|
"learning_rate": 1.4126712328767122e-06, |
|
"loss": 0.8497, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.9510012782275243, |
|
"grad_norm": 2.139758348464966, |
|
"learning_rate": 1.1986301369863014e-06, |
|
"loss": 0.8855, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.9595227950575203, |
|
"grad_norm": 3.2545394897460938, |
|
"learning_rate": 9.845890410958905e-07, |
|
"loss": 1.0541, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.968044311887516, |
|
"grad_norm": 4.898044586181641, |
|
"learning_rate": 7.705479452054794e-07, |
|
"loss": 1.1848, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.9765658287175119, |
|
"grad_norm": 5.535640716552734, |
|
"learning_rate": 5.565068493150685e-07, |
|
"loss": 0.8679, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.9850873455475075, |
|
"grad_norm": 1.8863285779953003, |
|
"learning_rate": 3.4246575342465755e-07, |
|
"loss": 0.7762, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.9936088623775032, |
|
"grad_norm": 4.670785427093506, |
|
"learning_rate": 1.2842465753424656e-07, |
|
"loss": 0.9951, |
|
"step": 2340 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2346, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4355734448273408e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|