|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.995695839311334, |
|
"eval_steps": 500, |
|
"global_step": 696, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00430416068866571, |
|
"grad_norm": 5.988248348236084, |
|
"learning_rate": 1.4285714285714287e-07, |
|
"loss": 0.8064, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00860832137733142, |
|
"grad_norm": 5.868161678314209, |
|
"learning_rate": 2.8571428571428575e-07, |
|
"loss": 0.7963, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01291248206599713, |
|
"grad_norm": 6.096586227416992, |
|
"learning_rate": 4.285714285714286e-07, |
|
"loss": 0.8413, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01721664275466284, |
|
"grad_norm": 6.065792083740234, |
|
"learning_rate": 5.714285714285715e-07, |
|
"loss": 0.81, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.021520803443328552, |
|
"grad_norm": 5.93134069442749, |
|
"learning_rate": 7.142857142857143e-07, |
|
"loss": 0.8204, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02582496413199426, |
|
"grad_norm": 5.619973182678223, |
|
"learning_rate": 8.571428571428572e-07, |
|
"loss": 0.7774, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03012912482065997, |
|
"grad_norm": 5.30891227722168, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7689, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.03443328550932568, |
|
"grad_norm": 5.255387306213379, |
|
"learning_rate": 1.142857142857143e-06, |
|
"loss": 0.7698, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03873744619799139, |
|
"grad_norm": 4.594310283660889, |
|
"learning_rate": 1.2857142857142856e-06, |
|
"loss": 0.7616, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.043041606886657105, |
|
"grad_norm": 4.302555084228516, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 0.7349, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.047345767575322814, |
|
"grad_norm": 4.202669143676758, |
|
"learning_rate": 1.5714285714285714e-06, |
|
"loss": 0.7766, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.05164992826398852, |
|
"grad_norm": 2.575376033782959, |
|
"learning_rate": 1.7142857142857145e-06, |
|
"loss": 0.7242, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.05595408895265423, |
|
"grad_norm": 2.4199719429016113, |
|
"learning_rate": 1.8571428571428573e-06, |
|
"loss": 0.7208, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.06025824964131994, |
|
"grad_norm": 2.2063937187194824, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.6931, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.06456241032998565, |
|
"grad_norm": 2.0030524730682373, |
|
"learning_rate": 2.1428571428571427e-06, |
|
"loss": 0.6554, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.06886657101865136, |
|
"grad_norm": 1.7058610916137695, |
|
"learning_rate": 2.285714285714286e-06, |
|
"loss": 0.69, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.07317073170731707, |
|
"grad_norm": 2.8892016410827637, |
|
"learning_rate": 2.428571428571429e-06, |
|
"loss": 0.6773, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.07747489239598278, |
|
"grad_norm": 3.1138792037963867, |
|
"learning_rate": 2.571428571428571e-06, |
|
"loss": 0.6556, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.08177905308464849, |
|
"grad_norm": 3.2966556549072266, |
|
"learning_rate": 2.7142857142857144e-06, |
|
"loss": 0.6714, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.08608321377331421, |
|
"grad_norm": 3.1827149391174316, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 0.6798, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09038737446197992, |
|
"grad_norm": 2.7425405979156494, |
|
"learning_rate": 3e-06, |
|
"loss": 0.6626, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.09469153515064563, |
|
"grad_norm": 2.32190203666687, |
|
"learning_rate": 3.142857142857143e-06, |
|
"loss": 0.6339, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.09899569583931134, |
|
"grad_norm": 1.6165233850479126, |
|
"learning_rate": 3.285714285714286e-06, |
|
"loss": 0.6199, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.10329985652797705, |
|
"grad_norm": 1.4535725116729736, |
|
"learning_rate": 3.428571428571429e-06, |
|
"loss": 0.6393, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.10760401721664276, |
|
"grad_norm": 1.3143177032470703, |
|
"learning_rate": 3.5714285714285718e-06, |
|
"loss": 0.6285, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.11190817790530846, |
|
"grad_norm": 1.0768671035766602, |
|
"learning_rate": 3.7142857142857146e-06, |
|
"loss": 0.6097, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.11621233859397417, |
|
"grad_norm": 1.0377610921859741, |
|
"learning_rate": 3.857142857142858e-06, |
|
"loss": 0.6034, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.12051649928263988, |
|
"grad_norm": 1.0033104419708252, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.5705, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.12482065997130559, |
|
"grad_norm": 1.0018284320831299, |
|
"learning_rate": 4.1428571428571435e-06, |
|
"loss": 0.5675, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.1291248206599713, |
|
"grad_norm": 0.8010370135307312, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 0.5903, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.133428981348637, |
|
"grad_norm": 0.6782485246658325, |
|
"learning_rate": 4.428571428571429e-06, |
|
"loss": 0.5644, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.13773314203730272, |
|
"grad_norm": 0.6411944627761841, |
|
"learning_rate": 4.571428571428572e-06, |
|
"loss": 0.5627, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.14203730272596843, |
|
"grad_norm": 0.727299153804779, |
|
"learning_rate": 4.714285714285715e-06, |
|
"loss": 0.5247, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.14634146341463414, |
|
"grad_norm": 0.8326959609985352, |
|
"learning_rate": 4.857142857142858e-06, |
|
"loss": 0.571, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.15064562410329985, |
|
"grad_norm": 0.7021209597587585, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5347, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.15494978479196556, |
|
"grad_norm": 0.5911878943443298, |
|
"learning_rate": 5.142857142857142e-06, |
|
"loss": 0.5576, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.15925394548063126, |
|
"grad_norm": 0.5217288136482239, |
|
"learning_rate": 5.285714285714286e-06, |
|
"loss": 0.52, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.16355810616929697, |
|
"grad_norm": 0.6184452772140503, |
|
"learning_rate": 5.428571428571429e-06, |
|
"loss": 0.5418, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.1678622668579627, |
|
"grad_norm": 0.6969144344329834, |
|
"learning_rate": 5.571428571428572e-06, |
|
"loss": 0.5394, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.17216642754662842, |
|
"grad_norm": 0.5691121816635132, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 0.532, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.17647058823529413, |
|
"grad_norm": 0.497399240732193, |
|
"learning_rate": 5.857142857142858e-06, |
|
"loss": 0.5591, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.18077474892395984, |
|
"grad_norm": 0.5312875509262085, |
|
"learning_rate": 6e-06, |
|
"loss": 0.5396, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.18507890961262555, |
|
"grad_norm": 0.5770351886749268, |
|
"learning_rate": 6.142857142857144e-06, |
|
"loss": 0.511, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.18938307030129126, |
|
"grad_norm": 0.5931875109672546, |
|
"learning_rate": 6.285714285714286e-06, |
|
"loss": 0.5501, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.19368723098995697, |
|
"grad_norm": 0.45138663053512573, |
|
"learning_rate": 6.4285714285714295e-06, |
|
"loss": 0.5348, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.19799139167862267, |
|
"grad_norm": 0.5075214505195618, |
|
"learning_rate": 6.571428571428572e-06, |
|
"loss": 0.5244, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.20229555236728838, |
|
"grad_norm": 0.5202081203460693, |
|
"learning_rate": 6.714285714285714e-06, |
|
"loss": 0.5368, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.2065997130559541, |
|
"grad_norm": 0.4660142660140991, |
|
"learning_rate": 6.857142857142858e-06, |
|
"loss": 0.5289, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.2109038737446198, |
|
"grad_norm": 0.4269562065601349, |
|
"learning_rate": 7e-06, |
|
"loss": 0.5287, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.2152080344332855, |
|
"grad_norm": 0.4298643469810486, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 0.5133, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.21951219512195122, |
|
"grad_norm": 0.4547773599624634, |
|
"learning_rate": 7.285714285714286e-06, |
|
"loss": 0.5201, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.22381635581061693, |
|
"grad_norm": 0.48563310503959656, |
|
"learning_rate": 7.428571428571429e-06, |
|
"loss": 0.5327, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.22812051649928264, |
|
"grad_norm": 0.4938199818134308, |
|
"learning_rate": 7.571428571428572e-06, |
|
"loss": 0.5184, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.23242467718794835, |
|
"grad_norm": 0.45804429054260254, |
|
"learning_rate": 7.714285714285716e-06, |
|
"loss": 0.5168, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.23672883787661406, |
|
"grad_norm": 0.4584444761276245, |
|
"learning_rate": 7.857142857142858e-06, |
|
"loss": 0.5104, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.24103299856527977, |
|
"grad_norm": 0.4324899911880493, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.4843, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.24533715925394547, |
|
"grad_norm": 0.513468861579895, |
|
"learning_rate": 8.142857142857143e-06, |
|
"loss": 0.5426, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.24964131994261118, |
|
"grad_norm": 0.4595116972923279, |
|
"learning_rate": 8.285714285714287e-06, |
|
"loss": 0.5056, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.2539454806312769, |
|
"grad_norm": 0.4523639380931854, |
|
"learning_rate": 8.428571428571429e-06, |
|
"loss": 0.4841, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.2582496413199426, |
|
"grad_norm": 0.4841617941856384, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.5114, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.26255380200860834, |
|
"grad_norm": 0.47025153040885925, |
|
"learning_rate": 8.714285714285715e-06, |
|
"loss": 0.4994, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.266857962697274, |
|
"grad_norm": 0.4940146207809448, |
|
"learning_rate": 8.857142857142858e-06, |
|
"loss": 0.5052, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.27116212338593976, |
|
"grad_norm": 0.47717440128326416, |
|
"learning_rate": 9e-06, |
|
"loss": 0.4856, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.27546628407460544, |
|
"grad_norm": 0.44342952966690063, |
|
"learning_rate": 9.142857142857144e-06, |
|
"loss": 0.4889, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2797704447632712, |
|
"grad_norm": 0.47397580742836, |
|
"learning_rate": 9.285714285714288e-06, |
|
"loss": 0.4723, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.28407460545193686, |
|
"grad_norm": 0.46283072233200073, |
|
"learning_rate": 9.42857142857143e-06, |
|
"loss": 0.5035, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.2883787661406026, |
|
"grad_norm": 0.4001065194606781, |
|
"learning_rate": 9.571428571428573e-06, |
|
"loss": 0.4608, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.2926829268292683, |
|
"grad_norm": 0.5248638987541199, |
|
"learning_rate": 9.714285714285715e-06, |
|
"loss": 0.5025, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.296987087517934, |
|
"grad_norm": 0.41630035638809204, |
|
"learning_rate": 9.857142857142859e-06, |
|
"loss": 0.4712, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.3012912482065997, |
|
"grad_norm": 0.4507600665092468, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5068, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.30559540889526543, |
|
"grad_norm": 0.537525475025177, |
|
"learning_rate": 9.999937036309402e-06, |
|
"loss": 0.4913, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.3098995695839311, |
|
"grad_norm": 0.4286635220050812, |
|
"learning_rate": 9.999748146823376e-06, |
|
"loss": 0.488, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.31420373027259685, |
|
"grad_norm": 0.4658080041408539, |
|
"learning_rate": 9.999433336299195e-06, |
|
"loss": 0.4808, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.31850789096126253, |
|
"grad_norm": 0.47707945108413696, |
|
"learning_rate": 9.99899261266551e-06, |
|
"loss": 0.5032, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.32281205164992827, |
|
"grad_norm": 0.5236383080482483, |
|
"learning_rate": 9.99842598702216e-06, |
|
"loss": 0.4748, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.32711621233859395, |
|
"grad_norm": 0.43928998708724976, |
|
"learning_rate": 9.997733473639876e-06, |
|
"loss": 0.4755, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.3314203730272597, |
|
"grad_norm": 0.4532296657562256, |
|
"learning_rate": 9.996915089959942e-06, |
|
"loss": 0.4868, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.3357245337159254, |
|
"grad_norm": 0.4567766785621643, |
|
"learning_rate": 9.995970856593739e-06, |
|
"loss": 0.4919, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.3400286944045911, |
|
"grad_norm": 0.48937737941741943, |
|
"learning_rate": 9.994900797322233e-06, |
|
"loss": 0.495, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.34433285509325684, |
|
"grad_norm": 0.48139920830726624, |
|
"learning_rate": 9.993704939095376e-06, |
|
"loss": 0.4902, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.3486370157819225, |
|
"grad_norm": 0.46625345945358276, |
|
"learning_rate": 9.99238331203143e-06, |
|
"loss": 0.475, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.35294117647058826, |
|
"grad_norm": 0.455159991979599, |
|
"learning_rate": 9.9909359494162e-06, |
|
"loss": 0.4568, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.35724533715925394, |
|
"grad_norm": 0.482522189617157, |
|
"learning_rate": 9.989362887702203e-06, |
|
"loss": 0.5028, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.3615494978479197, |
|
"grad_norm": 0.5009233355522156, |
|
"learning_rate": 9.987664166507749e-06, |
|
"loss": 0.4727, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.36585365853658536, |
|
"grad_norm": 0.4649881422519684, |
|
"learning_rate": 9.985839828615937e-06, |
|
"loss": 0.4589, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.3701578192252511, |
|
"grad_norm": 0.49685046076774597, |
|
"learning_rate": 9.983889919973586e-06, |
|
"loss": 0.4782, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.3744619799139168, |
|
"grad_norm": 0.4575681984424591, |
|
"learning_rate": 9.981814489690077e-06, |
|
"loss": 0.4526, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.3787661406025825, |
|
"grad_norm": 0.4114251434803009, |
|
"learning_rate": 9.979613590036108e-06, |
|
"loss": 0.4472, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.3830703012912482, |
|
"grad_norm": 0.5058827996253967, |
|
"learning_rate": 9.977287276442385e-06, |
|
"loss": 0.4867, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.38737446197991393, |
|
"grad_norm": 0.4725145697593689, |
|
"learning_rate": 9.974835607498224e-06, |
|
"loss": 0.4679, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3916786226685796, |
|
"grad_norm": 0.48240530490875244, |
|
"learning_rate": 9.972258644950074e-06, |
|
"loss": 0.4587, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.39598278335724535, |
|
"grad_norm": 0.515557050704956, |
|
"learning_rate": 9.969556453699966e-06, |
|
"loss": 0.4654, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.40028694404591103, |
|
"grad_norm": 0.4809204638004303, |
|
"learning_rate": 9.966729101803872e-06, |
|
"loss": 0.4667, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.40459110473457677, |
|
"grad_norm": 0.5204639434814453, |
|
"learning_rate": 9.963776660469996e-06, |
|
"loss": 0.4775, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.40889526542324245, |
|
"grad_norm": 0.47317999601364136, |
|
"learning_rate": 9.960699204056978e-06, |
|
"loss": 0.4365, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.4131994261119082, |
|
"grad_norm": 0.4386410117149353, |
|
"learning_rate": 9.957496810072027e-06, |
|
"loss": 0.4728, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.41750358680057387, |
|
"grad_norm": 0.5309630632400513, |
|
"learning_rate": 9.954169559168958e-06, |
|
"loss": 0.4643, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.4218077474892396, |
|
"grad_norm": 0.4550637900829315, |
|
"learning_rate": 9.95071753514617e-06, |
|
"loss": 0.4462, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.4261119081779053, |
|
"grad_norm": 0.46194523572921753, |
|
"learning_rate": 9.947140824944533e-06, |
|
"loss": 0.4656, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.430416068866571, |
|
"grad_norm": 0.4840027689933777, |
|
"learning_rate": 9.943439518645193e-06, |
|
"loss": 0.4684, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4347202295552367, |
|
"grad_norm": 0.4388582706451416, |
|
"learning_rate": 9.939613709467317e-06, |
|
"loss": 0.4806, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.43902439024390244, |
|
"grad_norm": 0.44998958706855774, |
|
"learning_rate": 9.935663493765726e-06, |
|
"loss": 0.4464, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.4433285509325681, |
|
"grad_norm": 0.41248953342437744, |
|
"learning_rate": 9.93158897102849e-06, |
|
"loss": 0.4678, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.44763271162123386, |
|
"grad_norm": 0.5117968916893005, |
|
"learning_rate": 9.9273902438744e-06, |
|
"loss": 0.4844, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.4519368723098996, |
|
"grad_norm": 0.42773979902267456, |
|
"learning_rate": 9.923067418050399e-06, |
|
"loss": 0.4786, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.4562410329985653, |
|
"grad_norm": 0.4874267876148224, |
|
"learning_rate": 9.918620602428916e-06, |
|
"loss": 0.4718, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.460545193687231, |
|
"grad_norm": 0.48003584146499634, |
|
"learning_rate": 9.91404990900512e-06, |
|
"loss": 0.4654, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.4648493543758967, |
|
"grad_norm": 0.48111721873283386, |
|
"learning_rate": 9.909355452894098e-06, |
|
"loss": 0.4657, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.46915351506456243, |
|
"grad_norm": 0.5006975531578064, |
|
"learning_rate": 9.904537352327968e-06, |
|
"loss": 0.435, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.4734576757532281, |
|
"grad_norm": 0.4520009756088257, |
|
"learning_rate": 9.899595728652883e-06, |
|
"loss": 0.4405, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.47776183644189385, |
|
"grad_norm": 0.43760401010513306, |
|
"learning_rate": 9.894530706325994e-06, |
|
"loss": 0.4649, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.48206599713055953, |
|
"grad_norm": 0.4725401997566223, |
|
"learning_rate": 9.889342412912296e-06, |
|
"loss": 0.4803, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.48637015781922527, |
|
"grad_norm": 0.4176185429096222, |
|
"learning_rate": 9.88403097908143e-06, |
|
"loss": 0.4395, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.49067431850789095, |
|
"grad_norm": 0.4956033229827881, |
|
"learning_rate": 9.878596538604388e-06, |
|
"loss": 0.4865, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.4949784791965567, |
|
"grad_norm": 0.4664049744606018, |
|
"learning_rate": 9.87303922835014e-06, |
|
"loss": 0.481, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.49928263988522237, |
|
"grad_norm": 0.4784802496433258, |
|
"learning_rate": 9.867359188282193e-06, |
|
"loss": 0.4477, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.503586800573888, |
|
"grad_norm": 0.4246116280555725, |
|
"learning_rate": 9.861556561455061e-06, |
|
"loss": 0.4474, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.5078909612625538, |
|
"grad_norm": 0.4217904508113861, |
|
"learning_rate": 9.855631494010661e-06, |
|
"loss": 0.4519, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.5121951219512195, |
|
"grad_norm": 0.45506662130355835, |
|
"learning_rate": 9.849584135174642e-06, |
|
"loss": 0.4738, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.5164992826398852, |
|
"grad_norm": 0.5048141479492188, |
|
"learning_rate": 9.843414637252615e-06, |
|
"loss": 0.4667, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5208034433285509, |
|
"grad_norm": 0.4061611294746399, |
|
"learning_rate": 9.837123155626323e-06, |
|
"loss": 0.4627, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.5251076040172167, |
|
"grad_norm": 0.5009416937828064, |
|
"learning_rate": 9.830709848749727e-06, |
|
"loss": 0.4492, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.5294117647058824, |
|
"grad_norm": 0.3940972685813904, |
|
"learning_rate": 9.824174878145017e-06, |
|
"loss": 0.4575, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.533715925394548, |
|
"grad_norm": 0.4693102240562439, |
|
"learning_rate": 9.817518408398536e-06, |
|
"loss": 0.476, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.5380200860832137, |
|
"grad_norm": 0.4415923058986664, |
|
"learning_rate": 9.810740607156647e-06, |
|
"loss": 0.4524, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5423242467718795, |
|
"grad_norm": 0.4520181119441986, |
|
"learning_rate": 9.803841645121505e-06, |
|
"loss": 0.4929, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.5466284074605452, |
|
"grad_norm": 0.4665045142173767, |
|
"learning_rate": 9.796821696046748e-06, |
|
"loss": 0.4666, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.5509325681492109, |
|
"grad_norm": 0.48491621017456055, |
|
"learning_rate": 9.78968093673314e-06, |
|
"loss": 0.4667, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.5552367288378766, |
|
"grad_norm": 0.6082088351249695, |
|
"learning_rate": 9.782419547024108e-06, |
|
"loss": 0.4698, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.5595408895265424, |
|
"grad_norm": 0.4258078932762146, |
|
"learning_rate": 9.775037709801206e-06, |
|
"loss": 0.4683, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.563845050215208, |
|
"grad_norm": 0.45697230100631714, |
|
"learning_rate": 9.76753561097952e-06, |
|
"loss": 0.4294, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.5681492109038737, |
|
"grad_norm": 0.4770420789718628, |
|
"learning_rate": 9.759913439502982e-06, |
|
"loss": 0.4744, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.5724533715925395, |
|
"grad_norm": 0.4091593623161316, |
|
"learning_rate": 9.752171387339612e-06, |
|
"loss": 0.4905, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.5767575322812052, |
|
"grad_norm": 0.513836145401001, |
|
"learning_rate": 9.74430964947668e-06, |
|
"loss": 0.463, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.5810616929698709, |
|
"grad_norm": 0.42678093910217285, |
|
"learning_rate": 9.736328423915797e-06, |
|
"loss": 0.4579, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.5853658536585366, |
|
"grad_norm": 0.4890856146812439, |
|
"learning_rate": 9.728227911667934e-06, |
|
"loss": 0.4589, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.5896700143472023, |
|
"grad_norm": 0.4562450349330902, |
|
"learning_rate": 9.720008316748344e-06, |
|
"loss": 0.4649, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.593974175035868, |
|
"grad_norm": 0.42691197991371155, |
|
"learning_rate": 9.711669846171443e-06, |
|
"loss": 0.4434, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.5982783357245337, |
|
"grad_norm": 0.4402090609073639, |
|
"learning_rate": 9.703212709945583e-06, |
|
"loss": 0.4557, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.6025824964131994, |
|
"grad_norm": 0.47748616337776184, |
|
"learning_rate": 9.694637121067764e-06, |
|
"loss": 0.4711, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6068866571018652, |
|
"grad_norm": 0.45207276940345764, |
|
"learning_rate": 9.685943295518283e-06, |
|
"loss": 0.4519, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.6111908177905309, |
|
"grad_norm": 0.4485815167427063, |
|
"learning_rate": 9.677131452255272e-06, |
|
"loss": 0.461, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.6154949784791965, |
|
"grad_norm": 0.47839170694351196, |
|
"learning_rate": 9.668201813209202e-06, |
|
"loss": 0.4747, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.6197991391678622, |
|
"grad_norm": 0.45290523767471313, |
|
"learning_rate": 9.659154603277283e-06, |
|
"loss": 0.4605, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.624103299856528, |
|
"grad_norm": 0.41288816928863525, |
|
"learning_rate": 9.649990050317806e-06, |
|
"loss": 0.4748, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.6284074605451937, |
|
"grad_norm": 0.3928135335445404, |
|
"learning_rate": 9.640708385144403e-06, |
|
"loss": 0.4435, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.6327116212338594, |
|
"grad_norm": 0.46720483899116516, |
|
"learning_rate": 9.631309841520233e-06, |
|
"loss": 0.452, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.6370157819225251, |
|
"grad_norm": 0.42169883847236633, |
|
"learning_rate": 9.62179465615209e-06, |
|
"loss": 0.4544, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.6413199426111909, |
|
"grad_norm": 0.5521188974380493, |
|
"learning_rate": 9.612163068684453e-06, |
|
"loss": 0.4507, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.6456241032998565, |
|
"grad_norm": 0.46388116478919983, |
|
"learning_rate": 9.602415321693434e-06, |
|
"loss": 0.465, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6499282639885222, |
|
"grad_norm": 0.4932452440261841, |
|
"learning_rate": 9.592551660680687e-06, |
|
"loss": 0.4592, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.6542324246771879, |
|
"grad_norm": 0.42455729842185974, |
|
"learning_rate": 9.582572334067213e-06, |
|
"loss": 0.4617, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.6585365853658537, |
|
"grad_norm": 0.4208996593952179, |
|
"learning_rate": 9.572477593187101e-06, |
|
"loss": 0.4629, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.6628407460545194, |
|
"grad_norm": 0.4985635578632355, |
|
"learning_rate": 9.562267692281212e-06, |
|
"loss": 0.4744, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.667144906743185, |
|
"grad_norm": 0.402473121881485, |
|
"learning_rate": 9.551942888490759e-06, |
|
"loss": 0.4369, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.6714490674318508, |
|
"grad_norm": 0.4076909124851227, |
|
"learning_rate": 9.541503441850844e-06, |
|
"loss": 0.4687, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.6757532281205165, |
|
"grad_norm": 0.5572343468666077, |
|
"learning_rate": 9.530949615283902e-06, |
|
"loss": 0.4809, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.6800573888091822, |
|
"grad_norm": 0.416309654712677, |
|
"learning_rate": 9.520281674593084e-06, |
|
"loss": 0.4406, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.6843615494978479, |
|
"grad_norm": 0.41716885566711426, |
|
"learning_rate": 9.509499888455554e-06, |
|
"loss": 0.4687, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.6886657101865137, |
|
"grad_norm": 0.4318557679653168, |
|
"learning_rate": 9.498604528415731e-06, |
|
"loss": 0.4547, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.6929698708751794, |
|
"grad_norm": 0.46745797991752625, |
|
"learning_rate": 9.487595868878447e-06, |
|
"loss": 0.4477, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.697274031563845, |
|
"grad_norm": 0.4273889362812042, |
|
"learning_rate": 9.476474187102033e-06, |
|
"loss": 0.4687, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.7015781922525107, |
|
"grad_norm": 0.40328237414360046, |
|
"learning_rate": 9.465239763191345e-06, |
|
"loss": 0.4547, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.7058823529411765, |
|
"grad_norm": 0.40811076760292053, |
|
"learning_rate": 9.453892880090696e-06, |
|
"loss": 0.4593, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.7101865136298422, |
|
"grad_norm": 0.4333866238594055, |
|
"learning_rate": 9.442433823576741e-06, |
|
"loss": 0.447, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.7144906743185079, |
|
"grad_norm": 0.4304181933403015, |
|
"learning_rate": 9.430862882251279e-06, |
|
"loss": 0.4388, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.7187948350071736, |
|
"grad_norm": 0.4919649660587311, |
|
"learning_rate": 9.419180347533976e-06, |
|
"loss": 0.4457, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.7230989956958394, |
|
"grad_norm": 0.3899565041065216, |
|
"learning_rate": 9.40738651365503e-06, |
|
"loss": 0.4673, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.727403156384505, |
|
"grad_norm": 0.44213035702705383, |
|
"learning_rate": 9.395481677647767e-06, |
|
"loss": 0.4514, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 0.4894409775733948, |
|
"learning_rate": 9.38346613934115e-06, |
|
"loss": 0.4603, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.7360114777618364, |
|
"grad_norm": 0.38324883580207825, |
|
"learning_rate": 9.371340201352234e-06, |
|
"loss": 0.4506, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.7403156384505022, |
|
"grad_norm": 0.5055360198020935, |
|
"learning_rate": 9.359104169078541e-06, |
|
"loss": 0.4528, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.7446197991391679, |
|
"grad_norm": 0.45433783531188965, |
|
"learning_rate": 9.346758350690373e-06, |
|
"loss": 0.4572, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.7489239598278336, |
|
"grad_norm": 0.45139914751052856, |
|
"learning_rate": 9.334303057123044e-06, |
|
"loss": 0.4399, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.7532281205164992, |
|
"grad_norm": 0.44593778252601624, |
|
"learning_rate": 9.321738602069057e-06, |
|
"loss": 0.475, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.757532281205165, |
|
"grad_norm": 0.5288325548171997, |
|
"learning_rate": 9.309065301970193e-06, |
|
"loss": 0.4577, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.7618364418938307, |
|
"grad_norm": 0.45763787627220154, |
|
"learning_rate": 9.296283476009551e-06, |
|
"loss": 0.4614, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.7661406025824964, |
|
"grad_norm": 0.4407108724117279, |
|
"learning_rate": 9.283393446103506e-06, |
|
"loss": 0.4518, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.7704447632711621, |
|
"grad_norm": 0.5397217869758606, |
|
"learning_rate": 9.270395536893599e-06, |
|
"loss": 0.4698, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.7747489239598279, |
|
"grad_norm": 0.4697708189487457, |
|
"learning_rate": 9.257290075738365e-06, |
|
"loss": 0.4505, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7790530846484935, |
|
"grad_norm": 0.4129229187965393, |
|
"learning_rate": 9.244077392705085e-06, |
|
"loss": 0.4336, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.7833572453371592, |
|
"grad_norm": 0.45845144987106323, |
|
"learning_rate": 9.23075782056147e-06, |
|
"loss": 0.4492, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.787661406025825, |
|
"grad_norm": 0.4498468339443207, |
|
"learning_rate": 9.217331694767291e-06, |
|
"loss": 0.45, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.7919655667144907, |
|
"grad_norm": 0.5015835165977478, |
|
"learning_rate": 9.20379935346592e-06, |
|
"loss": 0.4644, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.7962697274031564, |
|
"grad_norm": 0.39411643147468567, |
|
"learning_rate": 9.190161137475814e-06, |
|
"loss": 0.4346, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.8005738880918221, |
|
"grad_norm": 0.44030869007110596, |
|
"learning_rate": 9.176417390281944e-06, |
|
"loss": 0.4266, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.8048780487804879, |
|
"grad_norm": 0.4082520604133606, |
|
"learning_rate": 9.162568458027122e-06, |
|
"loss": 0.4401, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.8091822094691535, |
|
"grad_norm": 0.44995787739753723, |
|
"learning_rate": 9.148614689503307e-06, |
|
"loss": 0.4394, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.8134863701578192, |
|
"grad_norm": 0.4435669183731079, |
|
"learning_rate": 9.134556436142801e-06, |
|
"loss": 0.4533, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.8177905308464849, |
|
"grad_norm": 0.4199334383010864, |
|
"learning_rate": 9.120394052009412e-06, |
|
"loss": 0.4515, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8220946915351507, |
|
"grad_norm": 0.46623045206069946, |
|
"learning_rate": 9.10612789378953e-06, |
|
"loss": 0.4532, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.8263988522238164, |
|
"grad_norm": 0.44973841309547424, |
|
"learning_rate": 9.091758320783139e-06, |
|
"loss": 0.4528, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.830703012912482, |
|
"grad_norm": 0.433490514755249, |
|
"learning_rate": 9.077285694894786e-06, |
|
"loss": 0.4468, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.8350071736011477, |
|
"grad_norm": 0.5686512589454651, |
|
"learning_rate": 9.062710380624439e-06, |
|
"loss": 0.4685, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.8393113342898135, |
|
"grad_norm": 0.43695539236068726, |
|
"learning_rate": 9.048032745058335e-06, |
|
"loss": 0.4653, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.8436154949784792, |
|
"grad_norm": 0.45285752415657043, |
|
"learning_rate": 9.033253157859715e-06, |
|
"loss": 0.4362, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.8479196556671449, |
|
"grad_norm": 0.4667205214500427, |
|
"learning_rate": 9.018371991259516e-06, |
|
"loss": 0.4605, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.8522238163558106, |
|
"grad_norm": 0.39525923132896423, |
|
"learning_rate": 9.003389620047012e-06, |
|
"loss": 0.4515, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.8565279770444764, |
|
"grad_norm": 0.49101290106773376, |
|
"learning_rate": 8.988306421560354e-06, |
|
"loss": 0.4777, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.860832137733142, |
|
"grad_norm": 0.44344207644462585, |
|
"learning_rate": 8.973122775677078e-06, |
|
"loss": 0.449, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8651362984218077, |
|
"grad_norm": 0.4382517337799072, |
|
"learning_rate": 8.957839064804542e-06, |
|
"loss": 0.4584, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.8694404591104734, |
|
"grad_norm": 0.4284365177154541, |
|
"learning_rate": 8.942455673870278e-06, |
|
"loss": 0.441, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.8737446197991392, |
|
"grad_norm": 0.3757075071334839, |
|
"learning_rate": 8.926972990312314e-06, |
|
"loss": 0.4408, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.8780487804878049, |
|
"grad_norm": 0.484001100063324, |
|
"learning_rate": 8.91139140406941e-06, |
|
"loss": 0.4446, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.8823529411764706, |
|
"grad_norm": 0.46470439434051514, |
|
"learning_rate": 8.895711307571235e-06, |
|
"loss": 0.4541, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.8866571018651362, |
|
"grad_norm": 0.39970001578330994, |
|
"learning_rate": 8.879933095728485e-06, |
|
"loss": 0.4382, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.890961262553802, |
|
"grad_norm": 0.5493953824043274, |
|
"learning_rate": 8.864057165922944e-06, |
|
"loss": 0.4737, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.8952654232424677, |
|
"grad_norm": 0.3999284505844116, |
|
"learning_rate": 8.848083917997463e-06, |
|
"loss": 0.4428, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.8995695839311334, |
|
"grad_norm": 0.4151836633682251, |
|
"learning_rate": 8.832013754245895e-06, |
|
"loss": 0.4416, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.9038737446197992, |
|
"grad_norm": 0.5059155225753784, |
|
"learning_rate": 8.815847079402972e-06, |
|
"loss": 0.4445, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9081779053084649, |
|
"grad_norm": 0.39089852571487427, |
|
"learning_rate": 8.799584300634096e-06, |
|
"loss": 0.44, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.9124820659971306, |
|
"grad_norm": 0.4415070712566376, |
|
"learning_rate": 8.783225827525098e-06, |
|
"loss": 0.4423, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.9167862266857962, |
|
"grad_norm": 0.49563416838645935, |
|
"learning_rate": 8.766772072071911e-06, |
|
"loss": 0.492, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.921090387374462, |
|
"grad_norm": 0.4825840890407562, |
|
"learning_rate": 8.750223448670204e-06, |
|
"loss": 0.4745, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.9253945480631277, |
|
"grad_norm": 0.41341516375541687, |
|
"learning_rate": 8.733580374104936e-06, |
|
"loss": 0.4607, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.9296987087517934, |
|
"grad_norm": 0.4265018701553345, |
|
"learning_rate": 8.716843267539868e-06, |
|
"loss": 0.4277, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.9340028694404591, |
|
"grad_norm": 0.4438970386981964, |
|
"learning_rate": 8.700012550507e-06, |
|
"loss": 0.4489, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.9383070301291249, |
|
"grad_norm": 0.427259624004364, |
|
"learning_rate": 8.683088646895955e-06, |
|
"loss": 0.4715, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.9426111908177905, |
|
"grad_norm": 0.4407413899898529, |
|
"learning_rate": 8.666071982943306e-06, |
|
"loss": 0.4482, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.9469153515064562, |
|
"grad_norm": 0.42888280749320984, |
|
"learning_rate": 8.648962987221837e-06, |
|
"loss": 0.4584, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.9512195121951219, |
|
"grad_norm": 0.43822959065437317, |
|
"learning_rate": 8.631762090629756e-06, |
|
"loss": 0.4603, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.9555236728837877, |
|
"grad_norm": 0.4983186423778534, |
|
"learning_rate": 8.614469726379833e-06, |
|
"loss": 0.4278, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.9598278335724534, |
|
"grad_norm": 0.42594388127326965, |
|
"learning_rate": 8.597086329988498e-06, |
|
"loss": 0.4565, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.9641319942611191, |
|
"grad_norm": 0.4089045822620392, |
|
"learning_rate": 8.579612339264867e-06, |
|
"loss": 0.3956, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.9684361549497847, |
|
"grad_norm": 0.5381679534912109, |
|
"learning_rate": 8.562048194299719e-06, |
|
"loss": 0.469, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.9727403156384505, |
|
"grad_norm": 0.46227994561195374, |
|
"learning_rate": 8.544394337454409e-06, |
|
"loss": 0.4566, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.9770444763271162, |
|
"grad_norm": 0.4622877836227417, |
|
"learning_rate": 8.52665121334973e-06, |
|
"loss": 0.4523, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.9813486370157819, |
|
"grad_norm": 0.4579131603240967, |
|
"learning_rate": 8.508819268854713e-06, |
|
"loss": 0.4389, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.9856527977044476, |
|
"grad_norm": 0.47624289989471436, |
|
"learning_rate": 8.49089895307537e-06, |
|
"loss": 0.4619, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.9899569583931134, |
|
"grad_norm": 0.4958518445491791, |
|
"learning_rate": 8.472890717343391e-06, |
|
"loss": 0.4549, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.994261119081779, |
|
"grad_norm": 0.4146155118942261, |
|
"learning_rate": 8.454795015204767e-06, |
|
"loss": 0.4366, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.9985652797704447, |
|
"grad_norm": 0.5480135083198547, |
|
"learning_rate": 8.436612302408376e-06, |
|
"loss": 0.4537, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.0028694404591105, |
|
"grad_norm": 0.814457356929779, |
|
"learning_rate": 8.418343036894497e-06, |
|
"loss": 0.7295, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.007173601147776, |
|
"grad_norm": 0.4416688084602356, |
|
"learning_rate": 8.399987678783285e-06, |
|
"loss": 0.3523, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.011477761836442, |
|
"grad_norm": 0.683137059211731, |
|
"learning_rate": 8.381546690363174e-06, |
|
"loss": 0.4444, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.0157819225251077, |
|
"grad_norm": 0.5394715070724487, |
|
"learning_rate": 8.36302053607924e-06, |
|
"loss": 0.447, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.0200860832137733, |
|
"grad_norm": 0.5086341500282288, |
|
"learning_rate": 8.344409682521499e-06, |
|
"loss": 0.4487, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.024390243902439, |
|
"grad_norm": 0.6077011227607727, |
|
"learning_rate": 8.325714598413169e-06, |
|
"loss": 0.3777, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.0286944045911048, |
|
"grad_norm": 0.4829539358615875, |
|
"learning_rate": 8.306935754598838e-06, |
|
"loss": 0.4263, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.0329985652797704, |
|
"grad_norm": 0.5820494294166565, |
|
"learning_rate": 8.288073624032634e-06, |
|
"loss": 0.4574, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.0373027259684362, |
|
"grad_norm": 0.4590955376625061, |
|
"learning_rate": 8.269128681766296e-06, |
|
"loss": 0.3603, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.0416068866571018, |
|
"grad_norm": 0.5867065191268921, |
|
"learning_rate": 8.250101404937223e-06, |
|
"loss": 0.4503, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.0459110473457676, |
|
"grad_norm": 0.4794483184814453, |
|
"learning_rate": 8.230992272756438e-06, |
|
"loss": 0.4189, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.0502152080344334, |
|
"grad_norm": 0.520908534526825, |
|
"learning_rate": 8.211801766496537e-06, |
|
"loss": 0.448, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.054519368723099, |
|
"grad_norm": 0.4657638967037201, |
|
"learning_rate": 8.192530369479562e-06, |
|
"loss": 0.4121, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.0588235294117647, |
|
"grad_norm": 0.5529608130455017, |
|
"learning_rate": 8.17317856706482e-06, |
|
"loss": 0.4463, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.0631276901004305, |
|
"grad_norm": 0.5230234861373901, |
|
"learning_rate": 8.153746846636675e-06, |
|
"loss": 0.4075, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.067431850789096, |
|
"grad_norm": 0.4816892147064209, |
|
"learning_rate": 8.13423569759226e-06, |
|
"loss": 0.4085, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.0717360114777619, |
|
"grad_norm": 0.475556880235672, |
|
"learning_rate": 8.114645611329152e-06, |
|
"loss": 0.4149, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.0760401721664274, |
|
"grad_norm": 0.42703869938850403, |
|
"learning_rate": 8.094977081233006e-06, |
|
"loss": 0.395, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.0803443328550932, |
|
"grad_norm": 0.4868505895137787, |
|
"learning_rate": 8.075230602665118e-06, |
|
"loss": 0.399, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.084648493543759, |
|
"grad_norm": 0.4027485251426697, |
|
"learning_rate": 8.055406672949957e-06, |
|
"loss": 0.3854, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.0889526542324246, |
|
"grad_norm": 0.4265587627887726, |
|
"learning_rate": 8.03550579136263e-06, |
|
"loss": 0.4077, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.0932568149210904, |
|
"grad_norm": 0.43399757146835327, |
|
"learning_rate": 8.015528459116321e-06, |
|
"loss": 0.4204, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.0975609756097562, |
|
"grad_norm": 0.46295246481895447, |
|
"learning_rate": 7.995475179349657e-06, |
|
"loss": 0.4132, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.1018651362984218, |
|
"grad_norm": 0.3826509118080139, |
|
"learning_rate": 7.975346457114034e-06, |
|
"loss": 0.4238, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.1061692969870875, |
|
"grad_norm": 0.45687171816825867, |
|
"learning_rate": 7.955142799360914e-06, |
|
"loss": 0.4395, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.1104734576757531, |
|
"grad_norm": 0.41050204634666443, |
|
"learning_rate": 7.934864714929036e-06, |
|
"loss": 0.4315, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.114777618364419, |
|
"grad_norm": 0.42092767357826233, |
|
"learning_rate": 7.914512714531612e-06, |
|
"loss": 0.4195, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.1190817790530847, |
|
"grad_norm": 0.4156297445297241, |
|
"learning_rate": 7.894087310743468e-06, |
|
"loss": 0.4628, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.1233859397417503, |
|
"grad_norm": 0.415634423494339, |
|
"learning_rate": 7.873589017988124e-06, |
|
"loss": 0.3867, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.127690100430416, |
|
"grad_norm": 0.40684717893600464, |
|
"learning_rate": 7.853018352524845e-06, |
|
"loss": 0.4505, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.1319942611190819, |
|
"grad_norm": 0.40677744150161743, |
|
"learning_rate": 7.832375832435637e-06, |
|
"loss": 0.4158, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.1362984218077474, |
|
"grad_norm": 0.40581533312797546, |
|
"learning_rate": 7.811661977612202e-06, |
|
"loss": 0.4406, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.1406025824964132, |
|
"grad_norm": 0.3579067587852478, |
|
"learning_rate": 7.790877309742833e-06, |
|
"loss": 0.418, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.144906743185079, |
|
"grad_norm": 0.39150920510292053, |
|
"learning_rate": 7.770022352299294e-06, |
|
"loss": 0.3976, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.1492109038737446, |
|
"grad_norm": 0.47496986389160156, |
|
"learning_rate": 7.749097630523618e-06, |
|
"loss": 0.4337, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.1535150645624104, |
|
"grad_norm": 0.3943747878074646, |
|
"learning_rate": 7.728103671414889e-06, |
|
"loss": 0.3984, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.157819225251076, |
|
"grad_norm": 0.34318605065345764, |
|
"learning_rate": 7.707041003715962e-06, |
|
"loss": 0.3682, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.1621233859397417, |
|
"grad_norm": 0.3580489456653595, |
|
"learning_rate": 7.685910157900158e-06, |
|
"loss": 0.4177, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.1664275466284075, |
|
"grad_norm": 0.4793737232685089, |
|
"learning_rate": 7.66471166615789e-06, |
|
"loss": 0.4102, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.170731707317073, |
|
"grad_norm": 0.39403393864631653, |
|
"learning_rate": 7.643446062383273e-06, |
|
"loss": 0.3908, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.175035868005739, |
|
"grad_norm": 0.39132651686668396, |
|
"learning_rate": 7.622113882160658e-06, |
|
"loss": 0.4313, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.1793400286944047, |
|
"grad_norm": 0.4182010293006897, |
|
"learning_rate": 7.600715662751166e-06, |
|
"loss": 0.3992, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.1836441893830703, |
|
"grad_norm": 0.4303508400917053, |
|
"learning_rate": 7.579251943079145e-06, |
|
"loss": 0.4271, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.187948350071736, |
|
"grad_norm": 0.3482970893383026, |
|
"learning_rate": 7.557723263718596e-06, |
|
"loss": 0.3602, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.1922525107604018, |
|
"grad_norm": 0.45806214213371277, |
|
"learning_rate": 7.536130166879561e-06, |
|
"loss": 0.4503, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.1965566714490674, |
|
"grad_norm": 0.41952845454216003, |
|
"learning_rate": 7.514473196394467e-06, |
|
"loss": 0.4377, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.2008608321377332, |
|
"grad_norm": 0.3832322657108307, |
|
"learning_rate": 7.492752897704432e-06, |
|
"loss": 0.4034, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.2051649928263988, |
|
"grad_norm": 0.4345414340496063, |
|
"learning_rate": 7.470969817845518e-06, |
|
"loss": 0.4549, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.2094691535150646, |
|
"grad_norm": 0.4060963988304138, |
|
"learning_rate": 7.4491245054349716e-06, |
|
"loss": 0.3924, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.2137733142037304, |
|
"grad_norm": 0.3915734887123108, |
|
"learning_rate": 7.427217510657383e-06, |
|
"loss": 0.434, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.218077474892396, |
|
"grad_norm": 0.3792259097099304, |
|
"learning_rate": 7.405249385250854e-06, |
|
"loss": 0.4219, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.2223816355810617, |
|
"grad_norm": 0.39508765935897827, |
|
"learning_rate": 7.383220682493081e-06, |
|
"loss": 0.4042, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.2266857962697273, |
|
"grad_norm": 0.39781150221824646, |
|
"learning_rate": 7.361131957187435e-06, |
|
"loss": 0.4149, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.230989956958393, |
|
"grad_norm": 0.34590664505958557, |
|
"learning_rate": 7.338983765648985e-06, |
|
"loss": 0.3643, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.2352941176470589, |
|
"grad_norm": 0.39236798882484436, |
|
"learning_rate": 7.31677666569048e-06, |
|
"loss": 0.4362, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.2395982783357244, |
|
"grad_norm": 0.36852768063545227, |
|
"learning_rate": 7.294511216608308e-06, |
|
"loss": 0.3771, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.2439024390243902, |
|
"grad_norm": 0.4464952051639557, |
|
"learning_rate": 7.272187979168408e-06, |
|
"loss": 0.4386, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.248206599713056, |
|
"grad_norm": 0.382019579410553, |
|
"learning_rate": 7.249807515592149e-06, |
|
"loss": 0.4192, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.2525107604017216, |
|
"grad_norm": 0.4468196630477905, |
|
"learning_rate": 7.227370389542161e-06, |
|
"loss": 0.4278, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.2568149210903874, |
|
"grad_norm": 0.4168325662612915, |
|
"learning_rate": 7.2048771661081515e-06, |
|
"loss": 0.4011, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.2611190817790532, |
|
"grad_norm": 0.4351046085357666, |
|
"learning_rate": 7.182328411792664e-06, |
|
"loss": 0.4019, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.2654232424677188, |
|
"grad_norm": 0.41555511951446533, |
|
"learning_rate": 7.159724694496815e-06, |
|
"loss": 0.3819, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.2697274031563845, |
|
"grad_norm": 0.4706554710865021, |
|
"learning_rate": 7.137066583505987e-06, |
|
"loss": 0.4372, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.2740315638450501, |
|
"grad_norm": 0.34467652440071106, |
|
"learning_rate": 7.114354649475499e-06, |
|
"loss": 0.3756, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.278335724533716, |
|
"grad_norm": 0.4327693581581116, |
|
"learning_rate": 7.091589464416225e-06, |
|
"loss": 0.4385, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.2826398852223817, |
|
"grad_norm": 0.409885048866272, |
|
"learning_rate": 7.068771601680191e-06, |
|
"loss": 0.426, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.2869440459110473, |
|
"grad_norm": 0.4596419036388397, |
|
"learning_rate": 7.04590163594614e-06, |
|
"loss": 0.4109, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.291248206599713, |
|
"grad_norm": 0.3838300108909607, |
|
"learning_rate": 7.022980143205046e-06, |
|
"loss": 0.391, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.2955523672883786, |
|
"grad_norm": 0.37262919545173645, |
|
"learning_rate": 7.000007700745622e-06, |
|
"loss": 0.4314, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.2998565279770444, |
|
"grad_norm": 0.44159433245658875, |
|
"learning_rate": 6.976984887139775e-06, |
|
"loss": 0.4172, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.3041606886657102, |
|
"grad_norm": 0.4745006859302521, |
|
"learning_rate": 6.9539122822280246e-06, |
|
"loss": 0.4023, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.308464849354376, |
|
"grad_norm": 0.3681146204471588, |
|
"learning_rate": 6.930790467104916e-06, |
|
"loss": 0.3768, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.3127690100430416, |
|
"grad_norm": 0.4388069212436676, |
|
"learning_rate": 6.907620024104377e-06, |
|
"loss": 0.4551, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.3170731707317074, |
|
"grad_norm": 0.40693527460098267, |
|
"learning_rate": 6.884401536785045e-06, |
|
"loss": 0.4001, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.321377331420373, |
|
"grad_norm": 0.47269055247306824, |
|
"learning_rate": 6.861135589915583e-06, |
|
"loss": 0.4123, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.3256814921090387, |
|
"grad_norm": 0.34925857186317444, |
|
"learning_rate": 6.837822769459942e-06, |
|
"loss": 0.3817, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.3299856527977045, |
|
"grad_norm": 0.3714245557785034, |
|
"learning_rate": 6.814463662562609e-06, |
|
"loss": 0.4048, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.33428981348637, |
|
"grad_norm": 0.46117159724235535, |
|
"learning_rate": 6.791058857533814e-06, |
|
"loss": 0.4096, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.338593974175036, |
|
"grad_norm": 0.3898836374282837, |
|
"learning_rate": 6.767608943834721e-06, |
|
"loss": 0.4042, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.3428981348637015, |
|
"grad_norm": 0.4022074043750763, |
|
"learning_rate": 6.744114512062571e-06, |
|
"loss": 0.3824, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.3472022955523673, |
|
"grad_norm": 0.43312016129493713, |
|
"learning_rate": 6.720576153935818e-06, |
|
"loss": 0.4576, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.351506456241033, |
|
"grad_norm": 0.4213770627975464, |
|
"learning_rate": 6.696994462279223e-06, |
|
"loss": 0.428, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.3558106169296988, |
|
"grad_norm": 0.4370782971382141, |
|
"learning_rate": 6.673370031008919e-06, |
|
"loss": 0.4351, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.3601147776183644, |
|
"grad_norm": 0.3750602900981903, |
|
"learning_rate": 6.6497034551174585e-06, |
|
"loss": 0.423, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.3644189383070302, |
|
"grad_norm": 0.3736652135848999, |
|
"learning_rate": 6.625995330658828e-06, |
|
"loss": 0.3869, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.3687230989956958, |
|
"grad_norm": 0.4050619304180145, |
|
"learning_rate": 6.602246254733431e-06, |
|
"loss": 0.4052, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.3730272596843616, |
|
"grad_norm": 0.3807079493999481, |
|
"learning_rate": 6.578456825473055e-06, |
|
"loss": 0.4164, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.3773314203730274, |
|
"grad_norm": 0.39028626680374146, |
|
"learning_rate": 6.554627642025807e-06, |
|
"loss": 0.3715, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.381635581061693, |
|
"grad_norm": 0.4095914959907532, |
|
"learning_rate": 6.53075930454102e-06, |
|
"loss": 0.4294, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.3859397417503587, |
|
"grad_norm": 0.3441252112388611, |
|
"learning_rate": 6.506852414154138e-06, |
|
"loss": 0.395, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.3902439024390243, |
|
"grad_norm": 0.38136348128318787, |
|
"learning_rate": 6.482907572971584e-06, |
|
"loss": 0.4512, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.39454806312769, |
|
"grad_norm": 0.3387623429298401, |
|
"learning_rate": 6.4589253840555856e-06, |
|
"loss": 0.3324, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.3988522238163559, |
|
"grad_norm": 0.3917059600353241, |
|
"learning_rate": 6.434906451408991e-06, |
|
"loss": 0.4544, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.4031563845050214, |
|
"grad_norm": 0.35180503129959106, |
|
"learning_rate": 6.41085137996006e-06, |
|
"loss": 0.3851, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.4074605451936872, |
|
"grad_norm": 0.35865458846092224, |
|
"learning_rate": 6.386760775547221e-06, |
|
"loss": 0.4026, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.4117647058823528, |
|
"grad_norm": 0.34770017862319946, |
|
"learning_rate": 6.362635244903818e-06, |
|
"loss": 0.397, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.4160688665710186, |
|
"grad_norm": 0.397268682718277, |
|
"learning_rate": 6.338475395642834e-06, |
|
"loss": 0.4378, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.4203730272596844, |
|
"grad_norm": 0.35202059149742126, |
|
"learning_rate": 6.314281836241573e-06, |
|
"loss": 0.4169, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.4246771879483502, |
|
"grad_norm": 0.35058972239494324, |
|
"learning_rate": 6.2900551760263564e-06, |
|
"loss": 0.4158, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.4289813486370158, |
|
"grad_norm": 0.4056157171726227, |
|
"learning_rate": 6.265796025157154e-06, |
|
"loss": 0.376, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.4332855093256816, |
|
"grad_norm": 0.3331606090068817, |
|
"learning_rate": 6.241504994612237e-06, |
|
"loss": 0.4236, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.4375896700143471, |
|
"grad_norm": 0.3854994475841522, |
|
"learning_rate": 6.217182696172776e-06, |
|
"loss": 0.4023, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.441893830703013, |
|
"grad_norm": 0.36590859293937683, |
|
"learning_rate": 6.192829742407442e-06, |
|
"loss": 0.4245, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.4461979913916787, |
|
"grad_norm": 0.3405327796936035, |
|
"learning_rate": 6.168446746656973e-06, |
|
"loss": 0.4134, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.4505021520803443, |
|
"grad_norm": 0.35657453536987305, |
|
"learning_rate": 6.144034323018728e-06, |
|
"loss": 0.4235, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.45480631276901, |
|
"grad_norm": 0.3618459701538086, |
|
"learning_rate": 6.119593086331225e-06, |
|
"loss": 0.3759, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.4591104734576756, |
|
"grad_norm": 0.3598412871360779, |
|
"learning_rate": 6.095123652158648e-06, |
|
"loss": 0.4016, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.4634146341463414, |
|
"grad_norm": 0.3681644797325134, |
|
"learning_rate": 6.070626636775349e-06, |
|
"loss": 0.4307, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.4677187948350072, |
|
"grad_norm": 0.32633137702941895, |
|
"learning_rate": 6.046102657150328e-06, |
|
"loss": 0.3716, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.472022955523673, |
|
"grad_norm": 0.3902093172073364, |
|
"learning_rate": 6.021552330931693e-06, |
|
"loss": 0.4195, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.4763271162123386, |
|
"grad_norm": 0.397809237241745, |
|
"learning_rate": 5.996976276431097e-06, |
|
"loss": 0.4444, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.4806312769010044, |
|
"grad_norm": 0.36122214794158936, |
|
"learning_rate": 5.972375112608182e-06, |
|
"loss": 0.3855, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.48493543758967, |
|
"grad_norm": 0.3352508842945099, |
|
"learning_rate": 5.947749459054972e-06, |
|
"loss": 0.4506, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.4892395982783357, |
|
"grad_norm": 0.36873918771743774, |
|
"learning_rate": 5.923099935980278e-06, |
|
"loss": 0.419, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.4935437589670015, |
|
"grad_norm": 0.3478635847568512, |
|
"learning_rate": 5.898427164194084e-06, |
|
"loss": 0.3614, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.497847919655667, |
|
"grad_norm": 0.3391861915588379, |
|
"learning_rate": 5.8737317650918905e-06, |
|
"loss": 0.3903, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.502152080344333, |
|
"grad_norm": 0.3982401490211487, |
|
"learning_rate": 5.849014360639087e-06, |
|
"loss": 0.4654, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.5064562410329985, |
|
"grad_norm": 0.3000711500644684, |
|
"learning_rate": 5.824275573355278e-06, |
|
"loss": 0.3453, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.5107604017216643, |
|
"grad_norm": 0.39935576915740967, |
|
"learning_rate": 5.799516026298601e-06, |
|
"loss": 0.4482, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.51506456241033, |
|
"grad_norm": 0.33801084756851196, |
|
"learning_rate": 5.7747363430500395e-06, |
|
"loss": 0.3708, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.5193687230989958, |
|
"grad_norm": 0.40248093008995056, |
|
"learning_rate": 5.74993714769772e-06, |
|
"loss": 0.4319, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.5236728837876614, |
|
"grad_norm": 0.38056331872940063, |
|
"learning_rate": 5.725119064821185e-06, |
|
"loss": 0.4125, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.527977044476327, |
|
"grad_norm": 0.34441474080085754, |
|
"learning_rate": 5.700282719475672e-06, |
|
"loss": 0.3856, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.5322812051649928, |
|
"grad_norm": 0.36836621165275574, |
|
"learning_rate": 5.675428737176367e-06, |
|
"loss": 0.4293, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.5365853658536586, |
|
"grad_norm": 0.33872583508491516, |
|
"learning_rate": 5.65055774388265e-06, |
|
"loss": 0.4381, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.5408895265423244, |
|
"grad_norm": 0.3220119774341583, |
|
"learning_rate": 5.625670365982332e-06, |
|
"loss": 0.3737, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.54519368723099, |
|
"grad_norm": 0.41862717270851135, |
|
"learning_rate": 5.600767230275878e-06, |
|
"loss": 0.4366, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.5494978479196555, |
|
"grad_norm": 0.36884069442749023, |
|
"learning_rate": 5.575848963960621e-06, |
|
"loss": 0.3808, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.5538020086083213, |
|
"grad_norm": 0.37247779965400696, |
|
"learning_rate": 5.5509161946149635e-06, |
|
"loss": 0.436, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.558106169296987, |
|
"grad_norm": 0.3505042791366577, |
|
"learning_rate": 5.525969550182577e-06, |
|
"loss": 0.3985, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.5624103299856529, |
|
"grad_norm": 0.37073713541030884, |
|
"learning_rate": 5.501009658956583e-06, |
|
"loss": 0.3789, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.5667144906743187, |
|
"grad_norm": 0.3942989706993103, |
|
"learning_rate": 5.4760371495637256e-06, |
|
"loss": 0.4073, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.5710186513629842, |
|
"grad_norm": 0.3741658329963684, |
|
"learning_rate": 5.451052650948549e-06, |
|
"loss": 0.4109, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.5753228120516498, |
|
"grad_norm": 0.3425599932670593, |
|
"learning_rate": 5.426056792357552e-06, |
|
"loss": 0.3824, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.5796269727403156, |
|
"grad_norm": 0.4195639491081238, |
|
"learning_rate": 5.40105020332333e-06, |
|
"loss": 0.407, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.5839311334289814, |
|
"grad_norm": 0.37894967198371887, |
|
"learning_rate": 5.376033513648743e-06, |
|
"loss": 0.4505, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.5882352941176472, |
|
"grad_norm": 0.3418775498867035, |
|
"learning_rate": 5.3510073533910344e-06, |
|
"loss": 0.363, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.5925394548063128, |
|
"grad_norm": 0.40504151582717896, |
|
"learning_rate": 5.325972352845965e-06, |
|
"loss": 0.4566, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.5968436154949783, |
|
"grad_norm": 0.37350550293922424, |
|
"learning_rate": 5.30092914253195e-06, |
|
"loss": 0.4217, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.6011477761836441, |
|
"grad_norm": 0.3756239414215088, |
|
"learning_rate": 5.2758783531741655e-06, |
|
"loss": 0.3634, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.60545193687231, |
|
"grad_norm": 0.4232361316680908, |
|
"learning_rate": 5.25082061568867e-06, |
|
"loss": 0.4412, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.6097560975609757, |
|
"grad_norm": 0.376746267080307, |
|
"learning_rate": 5.225756561166521e-06, |
|
"loss": 0.4105, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.6140602582496413, |
|
"grad_norm": 0.36558979749679565, |
|
"learning_rate": 5.200686820857862e-06, |
|
"loss": 0.414, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.618364418938307, |
|
"grad_norm": 0.31528496742248535, |
|
"learning_rate": 5.175612026156045e-06, |
|
"loss": 0.375, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.6226685796269726, |
|
"grad_norm": 0.3867856562137604, |
|
"learning_rate": 5.150532808581718e-06, |
|
"loss": 0.4215, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.6269727403156384, |
|
"grad_norm": 0.36438819766044617, |
|
"learning_rate": 5.125449799766916e-06, |
|
"loss": 0.3651, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.6312769010043042, |
|
"grad_norm": 0.34973829984664917, |
|
"learning_rate": 5.100363631439162e-06, |
|
"loss": 0.4132, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.63558106169297, |
|
"grad_norm": 0.32271939516067505, |
|
"learning_rate": 5.075274935405554e-06, |
|
"loss": 0.3972, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.6398852223816356, |
|
"grad_norm": 0.36957699060440063, |
|
"learning_rate": 5.0501843435368495e-06, |
|
"loss": 0.3961, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.6441893830703012, |
|
"grad_norm": 0.3583492338657379, |
|
"learning_rate": 5.025092487751552e-06, |
|
"loss": 0.4226, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.648493543758967, |
|
"grad_norm": 0.3203399181365967, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3844, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.6527977044476327, |
|
"grad_norm": 0.3899810314178467, |
|
"learning_rate": 4.974907512248451e-06, |
|
"loss": 0.412, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.6571018651362985, |
|
"grad_norm": 0.3584853410720825, |
|
"learning_rate": 4.949815656463151e-06, |
|
"loss": 0.4106, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.661406025824964, |
|
"grad_norm": 0.3646269142627716, |
|
"learning_rate": 4.924725064594448e-06, |
|
"loss": 0.3887, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.6657101865136297, |
|
"grad_norm": 0.3446120321750641, |
|
"learning_rate": 4.89963636856084e-06, |
|
"loss": 0.4095, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.6700143472022955, |
|
"grad_norm": 0.36433911323547363, |
|
"learning_rate": 4.874550200233085e-06, |
|
"loss": 0.4447, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.6743185078909613, |
|
"grad_norm": 0.3468761444091797, |
|
"learning_rate": 4.8494671914182835e-06, |
|
"loss": 0.4033, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.678622668579627, |
|
"grad_norm": 0.34094858169555664, |
|
"learning_rate": 4.824387973843957e-06, |
|
"loss": 0.4097, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.6829268292682928, |
|
"grad_norm": 0.348934531211853, |
|
"learning_rate": 4.7993131791421385e-06, |
|
"loss": 0.376, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.6872309899569584, |
|
"grad_norm": 0.36609116196632385, |
|
"learning_rate": 4.7742434388334815e-06, |
|
"loss": 0.4128, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.691535150645624, |
|
"grad_norm": 0.3412748873233795, |
|
"learning_rate": 4.749179384311331e-06, |
|
"loss": 0.4038, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.6958393113342898, |
|
"grad_norm": 0.3432169556617737, |
|
"learning_rate": 4.724121646825838e-06, |
|
"loss": 0.3885, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.7001434720229556, |
|
"grad_norm": 0.3838341236114502, |
|
"learning_rate": 4.699070857468052e-06, |
|
"loss": 0.4334, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.7044476327116214, |
|
"grad_norm": 0.32072073221206665, |
|
"learning_rate": 4.674027647154037e-06, |
|
"loss": 0.3766, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.708751793400287, |
|
"grad_norm": 0.3909914493560791, |
|
"learning_rate": 4.648992646608968e-06, |
|
"loss": 0.4301, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.7130559540889525, |
|
"grad_norm": 0.3556109666824341, |
|
"learning_rate": 4.623966486351257e-06, |
|
"loss": 0.3922, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.7173601147776183, |
|
"grad_norm": 0.31979143619537354, |
|
"learning_rate": 4.598949796676672e-06, |
|
"loss": 0.3612, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.721664275466284, |
|
"grad_norm": 0.3717595040798187, |
|
"learning_rate": 4.573943207642452e-06, |
|
"loss": 0.4423, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.7259684361549499, |
|
"grad_norm": 0.3407822847366333, |
|
"learning_rate": 4.548947349051452e-06, |
|
"loss": 0.406, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.7302725968436155, |
|
"grad_norm": 0.3208092451095581, |
|
"learning_rate": 4.523962850436276e-06, |
|
"loss": 0.3839, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.7345767575322812, |
|
"grad_norm": 0.3565741181373596, |
|
"learning_rate": 4.498990341043419e-06, |
|
"loss": 0.4238, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.7388809182209468, |
|
"grad_norm": 0.3671400547027588, |
|
"learning_rate": 4.474030449817423e-06, |
|
"loss": 0.416, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.7431850789096126, |
|
"grad_norm": 0.3624553382396698, |
|
"learning_rate": 4.449083805385037e-06, |
|
"loss": 0.4226, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.7474892395982784, |
|
"grad_norm": 0.30359649658203125, |
|
"learning_rate": 4.424151036039381e-06, |
|
"loss": 0.3579, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.7517934002869442, |
|
"grad_norm": 0.38856279850006104, |
|
"learning_rate": 4.3992327697241225e-06, |
|
"loss": 0.419, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.7560975609756098, |
|
"grad_norm": 0.370222806930542, |
|
"learning_rate": 4.3743296340176694e-06, |
|
"loss": 0.3893, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.7604017216642753, |
|
"grad_norm": 0.3267267048358917, |
|
"learning_rate": 4.3494422561173515e-06, |
|
"loss": 0.3741, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.7647058823529411, |
|
"grad_norm": 0.3811343014240265, |
|
"learning_rate": 4.3245712628236356e-06, |
|
"loss": 0.4255, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.769010043041607, |
|
"grad_norm": 0.3775186538696289, |
|
"learning_rate": 4.299717280524329e-06, |
|
"loss": 0.4033, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.7733142037302727, |
|
"grad_norm": 0.34413671493530273, |
|
"learning_rate": 4.274880935178817e-06, |
|
"loss": 0.4254, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.7776183644189383, |
|
"grad_norm": 0.3962193429470062, |
|
"learning_rate": 4.250062852302283e-06, |
|
"loss": 0.4121, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.781922525107604, |
|
"grad_norm": 0.35180673003196716, |
|
"learning_rate": 4.225263656949961e-06, |
|
"loss": 0.3635, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.7862266857962696, |
|
"grad_norm": 0.3940863311290741, |
|
"learning_rate": 4.200483973701401e-06, |
|
"loss": 0.4167, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.7905308464849354, |
|
"grad_norm": 0.3632669448852539, |
|
"learning_rate": 4.175724426644724e-06, |
|
"loss": 0.368, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.7948350071736012, |
|
"grad_norm": 0.40205034613609314, |
|
"learning_rate": 4.150985639360914e-06, |
|
"loss": 0.4814, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.799139167862267, |
|
"grad_norm": 0.38346531987190247, |
|
"learning_rate": 4.12626823490811e-06, |
|
"loss": 0.377, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.8034433285509326, |
|
"grad_norm": 0.3857615888118744, |
|
"learning_rate": 4.1015728358059185e-06, |
|
"loss": 0.3983, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.8077474892395982, |
|
"grad_norm": 0.36002928018569946, |
|
"learning_rate": 4.076900064019721e-06, |
|
"loss": 0.4242, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.812051649928264, |
|
"grad_norm": 0.3512604832649231, |
|
"learning_rate": 4.052250540945029e-06, |
|
"loss": 0.3908, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.8163558106169297, |
|
"grad_norm": 0.37411588430404663, |
|
"learning_rate": 4.02762488739182e-06, |
|
"loss": 0.4181, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.8206599713055955, |
|
"grad_norm": 0.3773100674152374, |
|
"learning_rate": 4.003023723568903e-06, |
|
"loss": 0.4013, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.824964131994261, |
|
"grad_norm": 0.3702990710735321, |
|
"learning_rate": 3.978447669068309e-06, |
|
"loss": 0.4012, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.8292682926829267, |
|
"grad_norm": 0.38589340448379517, |
|
"learning_rate": 3.953897342849673e-06, |
|
"loss": 0.4213, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.8335724533715925, |
|
"grad_norm": 0.3330689072608948, |
|
"learning_rate": 3.929373363224654e-06, |
|
"loss": 0.4151, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.8378766140602583, |
|
"grad_norm": 0.3233312666416168, |
|
"learning_rate": 3.904876347841354e-06, |
|
"loss": 0.36, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.842180774748924, |
|
"grad_norm": 0.3806193172931671, |
|
"learning_rate": 3.8804069136687775e-06, |
|
"loss": 0.4324, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.8464849354375896, |
|
"grad_norm": 0.792082667350769, |
|
"learning_rate": 3.8559656769812746e-06, |
|
"loss": 0.396, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.8507890961262554, |
|
"grad_norm": 0.342756986618042, |
|
"learning_rate": 3.8315532533430285e-06, |
|
"loss": 0.4244, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.855093256814921, |
|
"grad_norm": 0.38503968715667725, |
|
"learning_rate": 3.8071702575925594e-06, |
|
"loss": 0.4343, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.8593974175035868, |
|
"grad_norm": 0.36260154843330383, |
|
"learning_rate": 3.7828173038272266e-06, |
|
"loss": 0.3749, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.8637015781922526, |
|
"grad_norm": 0.3732585310935974, |
|
"learning_rate": 3.7584950053877646e-06, |
|
"loss": 0.402, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.8680057388809184, |
|
"grad_norm": 0.31354090571403503, |
|
"learning_rate": 3.7342039748428473e-06, |
|
"loss": 0.3647, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.872309899569584, |
|
"grad_norm": 0.3549199402332306, |
|
"learning_rate": 3.709944823973647e-06, |
|
"loss": 0.4235, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.8766140602582495, |
|
"grad_norm": 0.354219913482666, |
|
"learning_rate": 3.685718163758427e-06, |
|
"loss": 0.4093, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.8809182209469153, |
|
"grad_norm": 0.35462522506713867, |
|
"learning_rate": 3.6615246043571674e-06, |
|
"loss": 0.4102, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.885222381635581, |
|
"grad_norm": 0.3581220805644989, |
|
"learning_rate": 3.6373647550961834e-06, |
|
"loss": 0.4017, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.8895265423242469, |
|
"grad_norm": 0.31649893522262573, |
|
"learning_rate": 3.61323922445278e-06, |
|
"loss": 0.398, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.8938307030129125, |
|
"grad_norm": 0.35182642936706543, |
|
"learning_rate": 3.5891486200399413e-06, |
|
"loss": 0.4358, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.8981348637015782, |
|
"grad_norm": 0.34824714064598083, |
|
"learning_rate": 3.5650935485910103e-06, |
|
"loss": 0.3855, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.9024390243902438, |
|
"grad_norm": 0.34262511134147644, |
|
"learning_rate": 3.5410746159444165e-06, |
|
"loss": 0.3932, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.9067431850789096, |
|
"grad_norm": 0.3388313353061676, |
|
"learning_rate": 3.5170924270284166e-06, |
|
"loss": 0.4004, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.9110473457675754, |
|
"grad_norm": 0.3349505364894867, |
|
"learning_rate": 3.4931475858458634e-06, |
|
"loss": 0.411, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.9153515064562412, |
|
"grad_norm": 0.3396126329898834, |
|
"learning_rate": 3.469240695458983e-06, |
|
"loss": 0.4102, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.9196556671449068, |
|
"grad_norm": 0.3140477240085602, |
|
"learning_rate": 3.445372357974194e-06, |
|
"loss": 0.3899, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.9239598278335723, |
|
"grad_norm": 0.37466076016426086, |
|
"learning_rate": 3.4215431745269463e-06, |
|
"loss": 0.4131, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.9282639885222381, |
|
"grad_norm": 0.3352051079273224, |
|
"learning_rate": 3.397753745266571e-06, |
|
"loss": 0.4151, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.932568149210904, |
|
"grad_norm": 0.3039771020412445, |
|
"learning_rate": 3.374004669341173e-06, |
|
"loss": 0.3685, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.9368723098995697, |
|
"grad_norm": 0.36881503462791443, |
|
"learning_rate": 3.350296544882543e-06, |
|
"loss": 0.4254, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.9411764705882353, |
|
"grad_norm": 0.31767529249191284, |
|
"learning_rate": 3.326629968991083e-06, |
|
"loss": 0.3936, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.9454806312769009, |
|
"grad_norm": 0.3398596942424774, |
|
"learning_rate": 3.303005537720778e-06, |
|
"loss": 0.4302, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.9497847919655666, |
|
"grad_norm": 0.33376285433769226, |
|
"learning_rate": 3.2794238460641837e-06, |
|
"loss": 0.3669, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.9540889526542324, |
|
"grad_norm": 0.391971617937088, |
|
"learning_rate": 3.255885487937431e-06, |
|
"loss": 0.4454, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.9583931133428982, |
|
"grad_norm": 0.34021979570388794, |
|
"learning_rate": 3.2323910561652798e-06, |
|
"loss": 0.4036, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.9626972740315638, |
|
"grad_norm": 0.35812684893608093, |
|
"learning_rate": 3.2089411424661864e-06, |
|
"loss": 0.3968, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.9670014347202296, |
|
"grad_norm": 0.35335344076156616, |
|
"learning_rate": 3.185536337437393e-06, |
|
"loss": 0.3989, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.9713055954088952, |
|
"grad_norm": 0.33904704451560974, |
|
"learning_rate": 3.1621772305400603e-06, |
|
"loss": 0.4135, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.975609756097561, |
|
"grad_norm": 0.3260011374950409, |
|
"learning_rate": 3.138864410084419e-06, |
|
"loss": 0.4434, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.9799139167862267, |
|
"grad_norm": 0.3138202428817749, |
|
"learning_rate": 3.1155984632149565e-06, |
|
"loss": 0.3821, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.9842180774748925, |
|
"grad_norm": 0.33707910776138306, |
|
"learning_rate": 3.0923799758956265e-06, |
|
"loss": 0.4051, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.988522238163558, |
|
"grad_norm": 0.350344181060791, |
|
"learning_rate": 3.0692095328950843e-06, |
|
"loss": 0.422, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.9928263988522237, |
|
"grad_norm": 0.3441988527774811, |
|
"learning_rate": 3.0460877177719763e-06, |
|
"loss": 0.4065, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.9971305595408895, |
|
"grad_norm": 0.35449886322021484, |
|
"learning_rate": 3.023015112860228e-06, |
|
"loss": 0.4182, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 2.0014347202295553, |
|
"grad_norm": 0.6956499218940735, |
|
"learning_rate": 2.9999922992543777e-06, |
|
"loss": 0.6638, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 2.005738880918221, |
|
"grad_norm": 0.4010846018791199, |
|
"learning_rate": 2.977019856794955e-06, |
|
"loss": 0.3728, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 2.010043041606887, |
|
"grad_norm": 0.4007827043533325, |
|
"learning_rate": 2.9540983640538635e-06, |
|
"loss": 0.3776, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 2.014347202295552, |
|
"grad_norm": 0.38452622294425964, |
|
"learning_rate": 2.93122839831981e-06, |
|
"loss": 0.3774, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 2.018651362984218, |
|
"grad_norm": 0.3884631097316742, |
|
"learning_rate": 2.908410535583777e-06, |
|
"loss": 0.401, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 2.022955523672884, |
|
"grad_norm": 0.37267711758613586, |
|
"learning_rate": 2.8856453505245018e-06, |
|
"loss": 0.3421, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.0272596843615496, |
|
"grad_norm": 0.41387253999710083, |
|
"learning_rate": 2.8629334164940127e-06, |
|
"loss": 0.3963, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 2.0315638450502154, |
|
"grad_norm": 0.38894036412239075, |
|
"learning_rate": 2.840275305503186e-06, |
|
"loss": 0.411, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 2.0358680057388807, |
|
"grad_norm": 0.3363220989704132, |
|
"learning_rate": 2.817671588207338e-06, |
|
"loss": 0.3524, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 2.0401721664275465, |
|
"grad_norm": 0.3584243357181549, |
|
"learning_rate": 2.7951228338918506e-06, |
|
"loss": 0.4025, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 2.0444763271162123, |
|
"grad_norm": 0.34380313754081726, |
|
"learning_rate": 2.77262961045784e-06, |
|
"loss": 0.3464, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.048780487804878, |
|
"grad_norm": 0.34843042492866516, |
|
"learning_rate": 2.7501924844078538e-06, |
|
"loss": 0.3732, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 2.053084648493544, |
|
"grad_norm": 0.37281617522239685, |
|
"learning_rate": 2.7278120208315927e-06, |
|
"loss": 0.3836, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 2.0573888091822097, |
|
"grad_norm": 0.38437455892562866, |
|
"learning_rate": 2.7054887833916933e-06, |
|
"loss": 0.4123, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 2.061692969870875, |
|
"grad_norm": 0.3231838643550873, |
|
"learning_rate": 2.6832233343095225e-06, |
|
"loss": 0.3418, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 2.065997130559541, |
|
"grad_norm": 0.3335508406162262, |
|
"learning_rate": 2.6610162343510183e-06, |
|
"loss": 0.3768, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.0703012912482066, |
|
"grad_norm": 0.38742804527282715, |
|
"learning_rate": 2.6388680428125657e-06, |
|
"loss": 0.3966, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 2.0746054519368724, |
|
"grad_norm": 0.3358624577522278, |
|
"learning_rate": 2.616779317506921e-06, |
|
"loss": 0.3589, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 2.078909612625538, |
|
"grad_norm": 0.32186657190322876, |
|
"learning_rate": 2.594750614749148e-06, |
|
"loss": 0.3813, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 2.0832137733142035, |
|
"grad_norm": 0.3297998607158661, |
|
"learning_rate": 2.572782489342617e-06, |
|
"loss": 0.3774, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 2.0875179340028693, |
|
"grad_norm": 0.33904680609703064, |
|
"learning_rate": 2.5508754945650305e-06, |
|
"loss": 0.3792, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 2.091822094691535, |
|
"grad_norm": 0.34405916929244995, |
|
"learning_rate": 2.5290301821544826e-06, |
|
"loss": 0.3801, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 2.096126255380201, |
|
"grad_norm": 0.30022576451301575, |
|
"learning_rate": 2.5072471022955703e-06, |
|
"loss": 0.3291, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 2.1004304160688667, |
|
"grad_norm": 0.33434441685676575, |
|
"learning_rate": 2.4855268036055346e-06, |
|
"loss": 0.3989, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 2.104734576757532, |
|
"grad_norm": 0.31754809617996216, |
|
"learning_rate": 2.4638698331204404e-06, |
|
"loss": 0.3622, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 2.109038737446198, |
|
"grad_norm": 0.343357115983963, |
|
"learning_rate": 2.4422767362814045e-06, |
|
"loss": 0.4263, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.1133428981348636, |
|
"grad_norm": 0.30014854669570923, |
|
"learning_rate": 2.420748056920856e-06, |
|
"loss": 0.326, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 2.1176470588235294, |
|
"grad_norm": 0.33290165662765503, |
|
"learning_rate": 2.3992843372488357e-06, |
|
"loss": 0.3947, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 2.1219512195121952, |
|
"grad_norm": 0.33419376611709595, |
|
"learning_rate": 2.3778861178393453e-06, |
|
"loss": 0.3595, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 2.126255380200861, |
|
"grad_norm": 0.34245628118515015, |
|
"learning_rate": 2.3565539376167295e-06, |
|
"loss": 0.4118, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 2.1305595408895264, |
|
"grad_norm": 0.3388068377971649, |
|
"learning_rate": 2.3352883338421085e-06, |
|
"loss": 0.3537, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.134863701578192, |
|
"grad_norm": 0.336579829454422, |
|
"learning_rate": 2.3140898420998425e-06, |
|
"loss": 0.3787, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 2.139167862266858, |
|
"grad_norm": 0.3296073377132416, |
|
"learning_rate": 2.2929589962840375e-06, |
|
"loss": 0.3837, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 2.1434720229555237, |
|
"grad_norm": 0.3224649131298065, |
|
"learning_rate": 2.271896328585114e-06, |
|
"loss": 0.3662, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 2.1477761836441895, |
|
"grad_norm": 0.34240302443504333, |
|
"learning_rate": 2.2509023694763844e-06, |
|
"loss": 0.3793, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 2.152080344332855, |
|
"grad_norm": 0.3357837200164795, |
|
"learning_rate": 2.2299776477007073e-06, |
|
"loss": 0.4106, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.1563845050215207, |
|
"grad_norm": 0.33356544375419617, |
|
"learning_rate": 2.2091226902571673e-06, |
|
"loss": 0.3712, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 2.1606886657101865, |
|
"grad_norm": 0.3245741128921509, |
|
"learning_rate": 2.1883380223878004e-06, |
|
"loss": 0.3951, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 2.1649928263988523, |
|
"grad_norm": 0.3323533236980438, |
|
"learning_rate": 2.1676241675643627e-06, |
|
"loss": 0.3972, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 2.169296987087518, |
|
"grad_norm": 0.30774545669555664, |
|
"learning_rate": 2.1469816474751566e-06, |
|
"loss": 0.3501, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 2.173601147776184, |
|
"grad_norm": 0.343365877866745, |
|
"learning_rate": 2.1264109820118783e-06, |
|
"loss": 0.3766, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 2.177905308464849, |
|
"grad_norm": 0.3731037378311157, |
|
"learning_rate": 2.105912689256533e-06, |
|
"loss": 0.3999, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 2.182209469153515, |
|
"grad_norm": 0.34139010310173035, |
|
"learning_rate": 2.0854872854683877e-06, |
|
"loss": 0.3815, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 2.186513629842181, |
|
"grad_norm": 0.3012133240699768, |
|
"learning_rate": 2.0651352850709656e-06, |
|
"loss": 0.3492, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 2.1908177905308466, |
|
"grad_norm": 0.2982615530490875, |
|
"learning_rate": 2.0448572006390875e-06, |
|
"loss": 0.3643, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 2.1951219512195124, |
|
"grad_norm": 0.29477861523628235, |
|
"learning_rate": 2.0246535428859652e-06, |
|
"loss": 0.3599, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.1994261119081777, |
|
"grad_norm": 0.3607404828071594, |
|
"learning_rate": 2.0045248206503454e-06, |
|
"loss": 0.3724, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 2.2037302725968435, |
|
"grad_norm": 0.34111490845680237, |
|
"learning_rate": 1.984471540883679e-06, |
|
"loss": 0.3729, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 2.2080344332855093, |
|
"grad_norm": 0.3487270772457123, |
|
"learning_rate": 1.964494208637369e-06, |
|
"loss": 0.3818, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 2.212338593974175, |
|
"grad_norm": 0.32086241245269775, |
|
"learning_rate": 1.9445933270500444e-06, |
|
"loss": 0.3413, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 2.216642754662841, |
|
"grad_norm": 0.3221076428890228, |
|
"learning_rate": 1.9247693973348834e-06, |
|
"loss": 0.4038, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 2.2209469153515062, |
|
"grad_norm": 0.3006744086742401, |
|
"learning_rate": 1.905022918766995e-06, |
|
"loss": 0.3628, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 2.225251076040172, |
|
"grad_norm": 0.3235909044742584, |
|
"learning_rate": 1.8853543886708498e-06, |
|
"loss": 0.3845, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 2.229555236728838, |
|
"grad_norm": 0.324224591255188, |
|
"learning_rate": 1.8657643024077431e-06, |
|
"loss": 0.3699, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 2.2338593974175036, |
|
"grad_norm": 0.3157562017440796, |
|
"learning_rate": 1.8462531533633238e-06, |
|
"loss": 0.3697, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 2.2381635581061694, |
|
"grad_norm": 0.28793036937713623, |
|
"learning_rate": 1.8268214329351797e-06, |
|
"loss": 0.3522, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.242467718794835, |
|
"grad_norm": 0.33378034830093384, |
|
"learning_rate": 1.8074696305204397e-06, |
|
"loss": 0.4332, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 2.2467718794835005, |
|
"grad_norm": 0.3230198323726654, |
|
"learning_rate": 1.7881982335034625e-06, |
|
"loss": 0.3762, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 2.2510760401721663, |
|
"grad_norm": 0.3061073422431946, |
|
"learning_rate": 1.7690077272435636e-06, |
|
"loss": 0.3588, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 2.255380200860832, |
|
"grad_norm": 0.34299224615097046, |
|
"learning_rate": 1.7498985950627794e-06, |
|
"loss": 0.4348, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 2.259684361549498, |
|
"grad_norm": 0.3042338192462921, |
|
"learning_rate": 1.7308713182337044e-06, |
|
"loss": 0.3497, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.2639885222381637, |
|
"grad_norm": 0.2962125837802887, |
|
"learning_rate": 1.7119263759673677e-06, |
|
"loss": 0.3568, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 2.2682926829268295, |
|
"grad_norm": 0.3360259532928467, |
|
"learning_rate": 1.6930642454011647e-06, |
|
"loss": 0.3752, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 2.272596843615495, |
|
"grad_norm": 0.31935688853263855, |
|
"learning_rate": 1.6742854015868349e-06, |
|
"loss": 0.4088, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 2.2769010043041606, |
|
"grad_norm": 0.3136122226715088, |
|
"learning_rate": 1.655590317478501e-06, |
|
"loss": 0.3581, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 2.2812051649928264, |
|
"grad_norm": 0.3090389370918274, |
|
"learning_rate": 1.6369794639207626e-06, |
|
"loss": 0.3688, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.2855093256814922, |
|
"grad_norm": 0.3279600441455841, |
|
"learning_rate": 1.6184533096368277e-06, |
|
"loss": 0.3735, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 2.289813486370158, |
|
"grad_norm": 0.30414533615112305, |
|
"learning_rate": 1.6000123212167158e-06, |
|
"loss": 0.3703, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 2.2941176470588234, |
|
"grad_norm": 0.32438209652900696, |
|
"learning_rate": 1.581656963105504e-06, |
|
"loss": 0.3871, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 2.298421807747489, |
|
"grad_norm": 0.3222774863243103, |
|
"learning_rate": 1.5633876975916261e-06, |
|
"loss": 0.3851, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 2.302725968436155, |
|
"grad_norm": 0.33032676577568054, |
|
"learning_rate": 1.5452049847952338e-06, |
|
"loss": 0.3581, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 2.3070301291248207, |
|
"grad_norm": 0.3343183994293213, |
|
"learning_rate": 1.5271092826566108e-06, |
|
"loss": 0.3837, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 2.3113342898134865, |
|
"grad_norm": 0.30717727541923523, |
|
"learning_rate": 1.5091010469246303e-06, |
|
"loss": 0.3783, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 2.315638450502152, |
|
"grad_norm": 0.3004843294620514, |
|
"learning_rate": 1.4911807311452874e-06, |
|
"loss": 0.3855, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 2.3199426111908177, |
|
"grad_norm": 0.3067179322242737, |
|
"learning_rate": 1.4733487866502698e-06, |
|
"loss": 0.3829, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 2.3242467718794835, |
|
"grad_norm": 0.29565826058387756, |
|
"learning_rate": 1.4556056625455922e-06, |
|
"loss": 0.3657, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.3285509325681493, |
|
"grad_norm": 0.303459495306015, |
|
"learning_rate": 1.4379518057002834e-06, |
|
"loss": 0.37, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 2.332855093256815, |
|
"grad_norm": 0.3352012038230896, |
|
"learning_rate": 1.4203876607351347e-06, |
|
"loss": 0.3895, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 2.3371592539454804, |
|
"grad_norm": 0.31783953309059143, |
|
"learning_rate": 1.4029136700115031e-06, |
|
"loss": 0.3876, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 2.341463414634146, |
|
"grad_norm": 0.2909916341304779, |
|
"learning_rate": 1.3855302736201686e-06, |
|
"loss": 0.3556, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 2.345767575322812, |
|
"grad_norm": 0.30111250281333923, |
|
"learning_rate": 1.3682379093702447e-06, |
|
"loss": 0.3431, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 2.350071736011478, |
|
"grad_norm": 0.30051347613334656, |
|
"learning_rate": 1.3510370127781635e-06, |
|
"loss": 0.3874, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 2.3543758967001436, |
|
"grad_norm": 0.31402793526649475, |
|
"learning_rate": 1.3339280170566959e-06, |
|
"loss": 0.3869, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 2.3586800573888094, |
|
"grad_norm": 0.2965995967388153, |
|
"learning_rate": 1.3169113531040462e-06, |
|
"loss": 0.3299, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 2.3629842180774747, |
|
"grad_norm": 0.36196231842041016, |
|
"learning_rate": 1.2999874494930004e-06, |
|
"loss": 0.445, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 2.3672883787661405, |
|
"grad_norm": 0.3123576045036316, |
|
"learning_rate": 1.2831567324601325e-06, |
|
"loss": 0.3626, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.3715925394548063, |
|
"grad_norm": 0.28615859150886536, |
|
"learning_rate": 1.266419625895064e-06, |
|
"loss": 0.3355, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 2.375896700143472, |
|
"grad_norm": 0.30293789505958557, |
|
"learning_rate": 1.2497765513297976e-06, |
|
"loss": 0.3665, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 2.380200860832138, |
|
"grad_norm": 0.29470351338386536, |
|
"learning_rate": 1.2332279279280907e-06, |
|
"loss": 0.3904, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 2.3845050215208037, |
|
"grad_norm": 0.31690046191215515, |
|
"learning_rate": 1.2167741724749026e-06, |
|
"loss": 0.3656, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 2.388809182209469, |
|
"grad_norm": 0.30208268761634827, |
|
"learning_rate": 1.2004156993659028e-06, |
|
"loss": 0.3829, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 2.393113342898135, |
|
"grad_norm": 0.32322484254837036, |
|
"learning_rate": 1.1841529205970281e-06, |
|
"loss": 0.4277, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 2.3974175035868006, |
|
"grad_norm": 0.29321643710136414, |
|
"learning_rate": 1.1679862457541052e-06, |
|
"loss": 0.3543, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 2.4017216642754664, |
|
"grad_norm": 0.329012006521225, |
|
"learning_rate": 1.1519160820025382e-06, |
|
"loss": 0.384, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 2.406025824964132, |
|
"grad_norm": 0.31029197573661804, |
|
"learning_rate": 1.1359428340770567e-06, |
|
"loss": 0.3625, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 2.4103299856527975, |
|
"grad_norm": 0.29086750745773315, |
|
"learning_rate": 1.1200669042715163e-06, |
|
"loss": 0.3786, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.4146341463414633, |
|
"grad_norm": 0.3073953092098236, |
|
"learning_rate": 1.104288692428766e-06, |
|
"loss": 0.3716, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 2.418938307030129, |
|
"grad_norm": 0.29252809286117554, |
|
"learning_rate": 1.0886085959305915e-06, |
|
"loss": 0.3679, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 2.423242467718795, |
|
"grad_norm": 0.32353729009628296, |
|
"learning_rate": 1.0730270096876876e-06, |
|
"loss": 0.3757, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 2.4275466284074607, |
|
"grad_norm": 0.3193880617618561, |
|
"learning_rate": 1.057544326129723e-06, |
|
"loss": 0.3637, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 2.431850789096126, |
|
"grad_norm": 0.3040056824684143, |
|
"learning_rate": 1.0421609351954599e-06, |
|
"loss": 0.3614, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 2.436154949784792, |
|
"grad_norm": 0.30761218070983887, |
|
"learning_rate": 1.026877224322923e-06, |
|
"loss": 0.3713, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 2.4404591104734576, |
|
"grad_norm": 0.3160081207752228, |
|
"learning_rate": 1.0116935784396482e-06, |
|
"loss": 0.3988, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 2.4447632711621234, |
|
"grad_norm": 0.3210393488407135, |
|
"learning_rate": 9.966103799529891e-07, |
|
"loss": 0.3731, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 2.4490674318507892, |
|
"grad_norm": 0.30738240480422974, |
|
"learning_rate": 9.816280087404851e-07, |
|
"loss": 0.3778, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 2.4533715925394546, |
|
"grad_norm": 0.2967623770236969, |
|
"learning_rate": 9.66746842140287e-07, |
|
"loss": 0.3615, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.4576757532281204, |
|
"grad_norm": 0.31555816531181335, |
|
"learning_rate": 9.519672549416659e-07, |
|
"loss": 0.3846, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 2.461979913916786, |
|
"grad_norm": 0.3220941722393036, |
|
"learning_rate": 9.372896193755621e-07, |
|
"loss": 0.3698, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 2.466284074605452, |
|
"grad_norm": 0.31963497400283813, |
|
"learning_rate": 9.227143051052162e-07, |
|
"loss": 0.3646, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 2.4705882352941178, |
|
"grad_norm": 0.3254527449607849, |
|
"learning_rate": 9.082416792168608e-07, |
|
"loss": 0.4002, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 2.4748923959827835, |
|
"grad_norm": 0.301438570022583, |
|
"learning_rate": 8.938721062104727e-07, |
|
"loss": 0.3935, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.479196556671449, |
|
"grad_norm": 0.27928128838539124, |
|
"learning_rate": 8.7960594799059e-07, |
|
"loss": 0.3782, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 2.4835007173601147, |
|
"grad_norm": 0.3043065667152405, |
|
"learning_rate": 8.654435638572e-07, |
|
"loss": 0.392, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 2.4878048780487805, |
|
"grad_norm": 0.2972443699836731, |
|
"learning_rate": 8.513853104966951e-07, |
|
"loss": 0.3619, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 2.4921090387374463, |
|
"grad_norm": 0.3183661997318268, |
|
"learning_rate": 8.374315419728784e-07, |
|
"loss": 0.4039, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 2.496413199426112, |
|
"grad_norm": 0.28181061148643494, |
|
"learning_rate": 8.235826097180566e-07, |
|
"loss": 0.3168, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.500717360114778, |
|
"grad_norm": 0.3265518546104431, |
|
"learning_rate": 8.098388625241854e-07, |
|
"loss": 0.3845, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 2.505021520803443, |
|
"grad_norm": 0.31692567467689514, |
|
"learning_rate": 7.962006465340821e-07, |
|
"loss": 0.3945, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 2.509325681492109, |
|
"grad_norm": 0.2771775722503662, |
|
"learning_rate": 7.8266830523271e-07, |
|
"loss": 0.3473, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 2.513629842180775, |
|
"grad_norm": 0.32277750968933105, |
|
"learning_rate": 7.692421794385313e-07, |
|
"loss": 0.4079, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 2.5179340028694406, |
|
"grad_norm": 0.31330087780952454, |
|
"learning_rate": 7.559226072949166e-07, |
|
"loss": 0.393, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 2.5222381635581064, |
|
"grad_norm": 0.41533592343330383, |
|
"learning_rate": 7.427099242616348e-07, |
|
"loss": 0.3804, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 2.5265423242467717, |
|
"grad_norm": 0.29984721541404724, |
|
"learning_rate": 7.296044631064014e-07, |
|
"loss": 0.3765, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 2.5308464849354375, |
|
"grad_norm": 0.30083853006362915, |
|
"learning_rate": 7.166065538964955e-07, |
|
"loss": 0.3867, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 2.5351506456241033, |
|
"grad_norm": 0.30099427700042725, |
|
"learning_rate": 7.037165239904514e-07, |
|
"loss": 0.3831, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 2.539454806312769, |
|
"grad_norm": 0.3282623589038849, |
|
"learning_rate": 6.909346980298093e-07, |
|
"loss": 0.3948, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.543758967001435, |
|
"grad_norm": 0.27795639634132385, |
|
"learning_rate": 6.782613979309443e-07, |
|
"loss": 0.3888, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 2.5480631276901002, |
|
"grad_norm": 0.2930023968219757, |
|
"learning_rate": 6.656969428769567e-07, |
|
"loss": 0.3468, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 2.552367288378766, |
|
"grad_norm": 0.3128500282764435, |
|
"learning_rate": 6.532416493096272e-07, |
|
"loss": 0.3889, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 2.556671449067432, |
|
"grad_norm": 0.3318694829940796, |
|
"learning_rate": 6.408958309214597e-07, |
|
"loss": 0.3928, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 2.5609756097560976, |
|
"grad_norm": 0.335523784160614, |
|
"learning_rate": 6.286597986477683e-07, |
|
"loss": 0.3666, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 2.5652797704447634, |
|
"grad_norm": 0.29514452815055847, |
|
"learning_rate": 6.165338606588517e-07, |
|
"loss": 0.3416, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 2.5695839311334288, |
|
"grad_norm": 0.35578909516334534, |
|
"learning_rate": 6.045183223522339e-07, |
|
"loss": 0.4163, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 2.5738880918220945, |
|
"grad_norm": 0.3049773871898651, |
|
"learning_rate": 5.926134863449712e-07, |
|
"loss": 0.3584, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 2.5781922525107603, |
|
"grad_norm": 0.2862030267715454, |
|
"learning_rate": 5.808196524660253e-07, |
|
"loss": 0.3552, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 2.582496413199426, |
|
"grad_norm": 0.32195690274238586, |
|
"learning_rate": 5.691371177487215e-07, |
|
"loss": 0.4089, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.586800573888092, |
|
"grad_norm": 0.3421590030193329, |
|
"learning_rate": 5.575661764232593e-07, |
|
"loss": 0.3779, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 2.5911047345767573, |
|
"grad_norm": 0.30208882689476013, |
|
"learning_rate": 5.461071199093048e-07, |
|
"loss": 0.4002, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 2.5954088952654235, |
|
"grad_norm": 0.2821211516857147, |
|
"learning_rate": 5.347602368086563e-07, |
|
"loss": 0.3484, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 2.599713055954089, |
|
"grad_norm": 0.2918562889099121, |
|
"learning_rate": 5.235258128979676e-07, |
|
"loss": 0.3596, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 2.6040172166427547, |
|
"grad_norm": 0.2854442596435547, |
|
"learning_rate": 5.124041311215544e-07, |
|
"loss": 0.3605, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 2.6083213773314204, |
|
"grad_norm": 0.2967503070831299, |
|
"learning_rate": 5.0139547158427e-07, |
|
"loss": 0.4133, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 2.6126255380200862, |
|
"grad_norm": 0.29674726724624634, |
|
"learning_rate": 4.905001115444475e-07, |
|
"loss": 0.3752, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 2.616929698708752, |
|
"grad_norm": 0.30667829513549805, |
|
"learning_rate": 4.797183254069176e-07, |
|
"loss": 0.3829, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 2.6212338593974174, |
|
"grad_norm": 0.3153044879436493, |
|
"learning_rate": 4.690503847160982e-07, |
|
"loss": 0.4159, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 2.625538020086083, |
|
"grad_norm": 0.3004702925682068, |
|
"learning_rate": 4.5849655814915683e-07, |
|
"loss": 0.3548, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.629842180774749, |
|
"grad_norm": 0.2953874468803406, |
|
"learning_rate": 4.4805711150924304e-07, |
|
"loss": 0.3677, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 2.6341463414634148, |
|
"grad_norm": 0.30996379256248474, |
|
"learning_rate": 4.3773230771879004e-07, |
|
"loss": 0.4144, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 2.6384505021520805, |
|
"grad_norm": 0.2848236560821533, |
|
"learning_rate": 4.2752240681290027e-07, |
|
"loss": 0.3533, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 2.642754662840746, |
|
"grad_norm": 0.31599825620651245, |
|
"learning_rate": 4.1742766593278974e-07, |
|
"loss": 0.3949, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 2.6470588235294117, |
|
"grad_norm": 0.2851794362068176, |
|
"learning_rate": 4.074483393193135e-07, |
|
"loss": 0.3824, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 2.6513629842180775, |
|
"grad_norm": 0.2868153154850006, |
|
"learning_rate": 3.9758467830656623e-07, |
|
"loss": 0.3311, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 2.6556671449067433, |
|
"grad_norm": 0.312959223985672, |
|
"learning_rate": 3.8783693131554836e-07, |
|
"loss": 0.3832, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 2.659971305595409, |
|
"grad_norm": 0.320313423871994, |
|
"learning_rate": 3.782053438479094e-07, |
|
"loss": 0.3637, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 2.6642754662840744, |
|
"grad_norm": 0.2905493974685669, |
|
"learning_rate": 3.686901584797675e-07, |
|
"loss": 0.3638, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 2.66857962697274, |
|
"grad_norm": 0.2887527644634247, |
|
"learning_rate": 3.5929161485559694e-07, |
|
"loss": 0.376, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.672883787661406, |
|
"grad_norm": 0.2931526303291321, |
|
"learning_rate": 3.5000994968219406e-07, |
|
"loss": 0.372, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 2.677187948350072, |
|
"grad_norm": 0.2868293821811676, |
|
"learning_rate": 3.4084539672271764e-07, |
|
"loss": 0.361, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 2.6814921090387376, |
|
"grad_norm": 0.3189477026462555, |
|
"learning_rate": 3.3179818679079936e-07, |
|
"loss": 0.3799, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 2.685796269727403, |
|
"grad_norm": 0.30111587047576904, |
|
"learning_rate": 3.228685477447291e-07, |
|
"loss": 0.3515, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 2.6901004304160687, |
|
"grad_norm": 0.31718146800994873, |
|
"learning_rate": 3.140567044817172e-07, |
|
"loss": 0.383, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.6944045911047345, |
|
"grad_norm": 0.3003697991371155, |
|
"learning_rate": 3.0536287893223603e-07, |
|
"loss": 0.3733, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 2.6987087517934003, |
|
"grad_norm": 0.2880096435546875, |
|
"learning_rate": 2.967872900544194e-07, |
|
"loss": 0.3751, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 2.703012912482066, |
|
"grad_norm": 0.27889522910118103, |
|
"learning_rate": 2.883301538285582e-07, |
|
"loss": 0.3578, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 2.7073170731707314, |
|
"grad_norm": 0.29290032386779785, |
|
"learning_rate": 2.799916832516575e-07, |
|
"loss": 0.4041, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 2.7116212338593977, |
|
"grad_norm": 0.2837107181549072, |
|
"learning_rate": 2.717720883320685e-07, |
|
"loss": 0.3779, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.715925394548063, |
|
"grad_norm": 0.2823951840400696, |
|
"learning_rate": 2.6367157608420347e-07, |
|
"loss": 0.3444, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 2.720229555236729, |
|
"grad_norm": 0.32553017139434814, |
|
"learning_rate": 2.556903505233216e-07, |
|
"loss": 0.3881, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 2.7245337159253946, |
|
"grad_norm": 0.28817129135131836, |
|
"learning_rate": 2.4782861266038904e-07, |
|
"loss": 0.3657, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 2.7288378766140604, |
|
"grad_norm": 0.29924365878105164, |
|
"learning_rate": 2.4008656049701875e-07, |
|
"loss": 0.3845, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 2.733142037302726, |
|
"grad_norm": 0.2882951498031616, |
|
"learning_rate": 2.3246438902048196e-07, |
|
"loss": 0.3815, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 2.7374461979913915, |
|
"grad_norm": 0.2708081305027008, |
|
"learning_rate": 2.2496229019879635e-07, |
|
"loss": 0.3297, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 2.7417503586800573, |
|
"grad_norm": 0.2893579602241516, |
|
"learning_rate": 2.175804529758929e-07, |
|
"loss": 0.3956, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 2.746054519368723, |
|
"grad_norm": 0.30808335542678833, |
|
"learning_rate": 2.1031906326685946e-07, |
|
"loss": 0.3644, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 2.750358680057389, |
|
"grad_norm": 0.3239193856716156, |
|
"learning_rate": 2.0317830395325255e-07, |
|
"loss": 0.4271, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 2.7546628407460547, |
|
"grad_norm": 0.29798826575279236, |
|
"learning_rate": 1.9615835487849677e-07, |
|
"loss": 0.3978, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.75896700143472, |
|
"grad_norm": 0.31455305218696594, |
|
"learning_rate": 1.8925939284335225e-07, |
|
"loss": 0.4061, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 2.763271162123386, |
|
"grad_norm": 0.283496618270874, |
|
"learning_rate": 1.824815916014644e-07, |
|
"loss": 0.3499, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 2.7675753228120517, |
|
"grad_norm": 0.2762824296951294, |
|
"learning_rate": 1.7582512185498446e-07, |
|
"loss": 0.3588, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 2.7718794835007174, |
|
"grad_norm": 0.30749961733818054, |
|
"learning_rate": 1.6929015125027314e-07, |
|
"loss": 0.3768, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 2.7761836441893832, |
|
"grad_norm": 0.2810935974121094, |
|
"learning_rate": 1.6287684437367724e-07, |
|
"loss": 0.3517, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 2.7804878048780486, |
|
"grad_norm": 0.2916150689125061, |
|
"learning_rate": 1.5658536274738623e-07, |
|
"loss": 0.3764, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 2.7847919655667144, |
|
"grad_norm": 0.2854636013507843, |
|
"learning_rate": 1.504158648253584e-07, |
|
"loss": 0.3823, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 2.78909612625538, |
|
"grad_norm": 0.2888132929801941, |
|
"learning_rate": 1.443685059893396e-07, |
|
"loss": 0.3456, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 2.793400286944046, |
|
"grad_norm": 0.2825074791908264, |
|
"learning_rate": 1.3844343854494123e-07, |
|
"loss": 0.3695, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 2.7977044476327118, |
|
"grad_norm": 0.2972644865512848, |
|
"learning_rate": 1.3264081171780797e-07, |
|
"loss": 0.3661, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.802008608321377, |
|
"grad_norm": 0.2751018702983856, |
|
"learning_rate": 1.2696077164986e-07, |
|
"loss": 0.3741, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 2.806312769010043, |
|
"grad_norm": 0.2982390224933624, |
|
"learning_rate": 1.2140346139561277e-07, |
|
"loss": 0.3896, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 2.8106169296987087, |
|
"grad_norm": 0.29119473695755005, |
|
"learning_rate": 1.1596902091857043e-07, |
|
"loss": 0.3667, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 2.8149210903873745, |
|
"grad_norm": 0.30065134167671204, |
|
"learning_rate": 1.1065758708770468e-07, |
|
"loss": 0.3591, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 2.8192252510760403, |
|
"grad_norm": 0.28981831669807434, |
|
"learning_rate": 1.0546929367400705e-07, |
|
"loss": 0.3288, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 2.8235294117647056, |
|
"grad_norm": 0.3213971257209778, |
|
"learning_rate": 1.004042713471165e-07, |
|
"loss": 0.393, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 2.827833572453372, |
|
"grad_norm": 0.27952906489372253, |
|
"learning_rate": 9.546264767203328e-08, |
|
"loss": 0.3538, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 2.832137733142037, |
|
"grad_norm": 0.32866302132606506, |
|
"learning_rate": 9.064454710590253e-08, |
|
"loss": 0.3927, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 2.836441893830703, |
|
"grad_norm": 0.3011215329170227, |
|
"learning_rate": 8.595009099488238e-08, |
|
"loss": 0.377, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 2.840746054519369, |
|
"grad_norm": 0.2855226397514343, |
|
"learning_rate": 8.137939757108526e-08, |
|
"loss": 0.3939, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.8450502152080346, |
|
"grad_norm": 0.2855052649974823, |
|
"learning_rate": 7.693258194960252e-08, |
|
"loss": 0.3562, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 2.8493543758967004, |
|
"grad_norm": 0.30429205298423767, |
|
"learning_rate": 7.260975612560173e-08, |
|
"loss": 0.3841, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 2.8536585365853657, |
|
"grad_norm": 0.28108206391334534, |
|
"learning_rate": 6.84110289715112e-08, |
|
"loss": 0.3377, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 2.8579626972740315, |
|
"grad_norm": 0.2989685833454132, |
|
"learning_rate": 6.433650623427379e-08, |
|
"loss": 0.4037, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 2.8622668579626973, |
|
"grad_norm": 0.2865199148654938, |
|
"learning_rate": 6.038629053268464e-08, |
|
"loss": 0.3728, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 2.866571018651363, |
|
"grad_norm": 0.2750672698020935, |
|
"learning_rate": 5.6560481354807625e-08, |
|
"loss": 0.356, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 2.870875179340029, |
|
"grad_norm": 0.2924894094467163, |
|
"learning_rate": 5.285917505546967e-08, |
|
"loss": 0.3998, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 2.8751793400286942, |
|
"grad_norm": 0.2777416706085205, |
|
"learning_rate": 4.928246485383148e-08, |
|
"loss": 0.3753, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 2.87948350071736, |
|
"grad_norm": 0.3033868968486786, |
|
"learning_rate": 4.583044083104282e-08, |
|
"loss": 0.393, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 2.883787661406026, |
|
"grad_norm": 0.32299816608428955, |
|
"learning_rate": 4.250318992797375e-08, |
|
"loss": 0.3897, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.8880918220946916, |
|
"grad_norm": 0.2930081784725189, |
|
"learning_rate": 3.9300795943021943e-08, |
|
"loss": 0.3355, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 2.8923959827833574, |
|
"grad_norm": 0.30189839005470276, |
|
"learning_rate": 3.622333953000601e-08, |
|
"loss": 0.379, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 2.8967001434720228, |
|
"grad_norm": 0.29947370290756226, |
|
"learning_rate": 3.3270898196129944e-08, |
|
"loss": 0.3706, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 2.9010043041606886, |
|
"grad_norm": 0.29969486594200134, |
|
"learning_rate": 3.0443546300035764e-08, |
|
"loss": 0.3586, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 2.9053084648493543, |
|
"grad_norm": 0.3195127546787262, |
|
"learning_rate": 2.77413550499267e-08, |
|
"loss": 0.4084, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.90961262553802, |
|
"grad_norm": 0.27233725786209106, |
|
"learning_rate": 2.516439250177749e-08, |
|
"loss": 0.3481, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 2.913916786226686, |
|
"grad_norm": 0.2925049960613251, |
|
"learning_rate": 2.2712723557616335e-08, |
|
"loss": 0.378, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 2.9182209469153513, |
|
"grad_norm": 0.2842983603477478, |
|
"learning_rate": 2.038640996389285e-08, |
|
"loss": 0.3678, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 2.922525107604017, |
|
"grad_norm": 0.2895757853984833, |
|
"learning_rate": 1.818551030992377e-08, |
|
"loss": 0.3775, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 2.926829268292683, |
|
"grad_norm": 0.2898154854774475, |
|
"learning_rate": 1.6110080026414123e-08, |
|
"loss": 0.3711, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.9311334289813487, |
|
"grad_norm": 0.27027469873428345, |
|
"learning_rate": 1.4160171384064447e-08, |
|
"loss": 0.3636, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 2.9354375896700144, |
|
"grad_norm": 0.29894378781318665, |
|
"learning_rate": 1.2335833492252425e-08, |
|
"loss": 0.3957, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 2.93974175035868, |
|
"grad_norm": 0.29910367727279663, |
|
"learning_rate": 1.063711229779718e-08, |
|
"loss": 0.382, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.944045911047346, |
|
"grad_norm": 0.28962647914886475, |
|
"learning_rate": 9.06405058380022e-09, |
|
"loss": 0.3698, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 2.9483500717360114, |
|
"grad_norm": 0.28313586115837097, |
|
"learning_rate": 7.61668796857018e-09, |
|
"loss": 0.3711, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 2.952654232424677, |
|
"grad_norm": 0.29572346806526184, |
|
"learning_rate": 6.295060904623618e-09, |
|
"loss": 0.3881, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 2.956958393113343, |
|
"grad_norm": 0.29672494530677795, |
|
"learning_rate": 5.099202677767978e-09, |
|
"loss": 0.4193, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 2.9612625538020088, |
|
"grad_norm": 0.27398431301116943, |
|
"learning_rate": 4.02914340626226e-09, |
|
"loss": 0.3409, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 2.9655667144906745, |
|
"grad_norm": 0.2979412376880646, |
|
"learning_rate": 3.0849100400587307e-09, |
|
"loss": 0.336, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 2.96987087517934, |
|
"grad_norm": 0.311142235994339, |
|
"learning_rate": 2.2665263601240328e-09, |
|
"loss": 0.3638, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.9741750358680057, |
|
"grad_norm": 0.27928242087364197, |
|
"learning_rate": 1.5740129778413215e-09, |
|
"loss": 0.3601, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 2.9784791965566715, |
|
"grad_norm": 0.30028945207595825, |
|
"learning_rate": 1.0073873344895735e-09, |
|
"loss": 0.4064, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 2.9827833572453373, |
|
"grad_norm": 0.30556008219718933, |
|
"learning_rate": 5.666637008061582e-10, |
|
"loss": 0.3881, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 2.987087517934003, |
|
"grad_norm": 0.29040220379829407, |
|
"learning_rate": 2.5185317662490547e-10, |
|
"loss": 0.358, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 2.9913916786226684, |
|
"grad_norm": 0.2974533140659332, |
|
"learning_rate": 6.296369059854978e-11, |
|
"loss": 0.3728, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 2.995695839311334, |
|
"grad_norm": 0.28847527503967285, |
|
"learning_rate": 0.0, |
|
"loss": 0.3634, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 2.995695839311334, |
|
"step": 696, |
|
"total_flos": 890435552804864.0, |
|
"train_loss": 0.4294309826760456, |
|
"train_runtime": 14084.5907, |
|
"train_samples_per_second": 4.746, |
|
"train_steps_per_second": 0.049 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 696, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 890435552804864.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|