|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9995664614584236, |
|
"eval_steps": 500, |
|
"global_step": 1441, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0006936616665221538, |
|
"grad_norm": 43.466719125161504, |
|
"learning_rate": 0.0, |
|
"loss": 2.7095, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0013873233330443076, |
|
"grad_norm": 6.96466711942635, |
|
"learning_rate": 2.5595802480981545e-06, |
|
"loss": 2.0266, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0020809849995664614, |
|
"grad_norm": 5.728387691489433, |
|
"learning_rate": 4.056838710822129e-06, |
|
"loss": 2.0082, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0027746466660886152, |
|
"grad_norm": 4.686811256036109, |
|
"learning_rate": 5.119160496196309e-06, |
|
"loss": 1.9589, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.003468308332610769, |
|
"grad_norm": 3.3529589095838874, |
|
"learning_rate": 5.943161289177871e-06, |
|
"loss": 1.9014, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004161969999132923, |
|
"grad_norm": 3.5493873116106522, |
|
"learning_rate": 6.616418958920285e-06, |
|
"loss": 1.8478, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.004855631665655077, |
|
"grad_norm": 3.2248318636630753, |
|
"learning_rate": 7.185650207899778e-06, |
|
"loss": 1.8631, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0055492933321772304, |
|
"grad_norm": 2.5559675536274558, |
|
"learning_rate": 7.678740744294464e-06, |
|
"loss": 1.8771, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.006242954998699385, |
|
"grad_norm": 2.852602324925445, |
|
"learning_rate": 8.113677421644258e-06, |
|
"loss": 1.8517, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.006936616665221538, |
|
"grad_norm": 2.8475514523205687, |
|
"learning_rate": 8.502741537276027e-06, |
|
"loss": 1.8444, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007630278331743692, |
|
"grad_norm": 2.4445595331823116, |
|
"learning_rate": 8.854692840710254e-06, |
|
"loss": 1.8173, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.008323939998265846, |
|
"grad_norm": 2.6236339242092606, |
|
"learning_rate": 9.175999207018439e-06, |
|
"loss": 1.797, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.009017601664787999, |
|
"grad_norm": 2.7480536817929178, |
|
"learning_rate": 9.471572411831843e-06, |
|
"loss": 1.7525, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.009711263331310154, |
|
"grad_norm": 2.8195069251274267, |
|
"learning_rate": 9.745230455997932e-06, |
|
"loss": 1.7421, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.010404924997832308, |
|
"grad_norm": 2.574781912417627, |
|
"learning_rate": 9.999999999999999e-06, |
|
"loss": 1.7563, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.011098586664354461, |
|
"grad_norm": 3.018787395539229, |
|
"learning_rate": 1e-05, |
|
"loss": 1.7235, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.011792248330876614, |
|
"grad_norm": 2.7321515111278085, |
|
"learning_rate": 1e-05, |
|
"loss": 1.7372, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.01248590999739877, |
|
"grad_norm": 2.535876566743008, |
|
"learning_rate": 1e-05, |
|
"loss": 1.7513, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.013179571663920923, |
|
"grad_norm": 2.503169241059693, |
|
"learning_rate": 1e-05, |
|
"loss": 1.7598, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.013873233330443076, |
|
"grad_norm": 2.5731073684349752, |
|
"learning_rate": 1e-05, |
|
"loss": 1.7947, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01456689499696523, |
|
"grad_norm": 2.5424918030748245, |
|
"learning_rate": 1e-05, |
|
"loss": 1.7304, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.015260556663487385, |
|
"grad_norm": 2.280575429076331, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6995, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.015954218330009536, |
|
"grad_norm": 2.493960760059663, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6744, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.01664787999653169, |
|
"grad_norm": 2.238029250091602, |
|
"learning_rate": 1e-05, |
|
"loss": 1.7026, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.017341541663053846, |
|
"grad_norm": 2.2414764681865362, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6437, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.018035203329575998, |
|
"grad_norm": 2.3762425127817135, |
|
"learning_rate": 1e-05, |
|
"loss": 1.7258, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.018728864996098153, |
|
"grad_norm": 2.471220653637295, |
|
"learning_rate": 1e-05, |
|
"loss": 1.7413, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.01942252666262031, |
|
"grad_norm": 2.236115513685021, |
|
"learning_rate": 1e-05, |
|
"loss": 1.7788, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.02011618832914246, |
|
"grad_norm": 2.1158621654066128, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6955, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.020809849995664615, |
|
"grad_norm": 2.438210038803027, |
|
"learning_rate": 1e-05, |
|
"loss": 1.7435, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.021503511662186767, |
|
"grad_norm": 2.2049092103165484, |
|
"learning_rate": 1e-05, |
|
"loss": 1.7103, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.022197173328708922, |
|
"grad_norm": 1.992834120231878, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6941, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.022890834995231077, |
|
"grad_norm": 2.3021272374064767, |
|
"learning_rate": 1e-05, |
|
"loss": 1.667, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.02358449666175323, |
|
"grad_norm": 2.1119253371859426, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6849, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.024278158328275384, |
|
"grad_norm": 2.3243314568193445, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6466, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02497181999479754, |
|
"grad_norm": 2.2531985009302176, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6459, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.02566548166131969, |
|
"grad_norm": 2.1844804086893777, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6794, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.026359143327841845, |
|
"grad_norm": 2.2530966998426245, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6601, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.027052804994364, |
|
"grad_norm": 2.1421735569008686, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6335, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.027746466660886152, |
|
"grad_norm": 2.181725062044003, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6319, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.028440128327408307, |
|
"grad_norm": 2.193692520161033, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6363, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.02913378999393046, |
|
"grad_norm": 2.1086393378123818, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6735, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.029827451660452614, |
|
"grad_norm": 2.0131860874790175, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5967, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.03052111332697477, |
|
"grad_norm": 2.1758701969638, |
|
"learning_rate": 1e-05, |
|
"loss": 1.667, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03121477499349692, |
|
"grad_norm": 2.10881747915129, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6002, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03190843666001907, |
|
"grad_norm": 2.1929235643249707, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6109, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03260209832654123, |
|
"grad_norm": 2.2382019559100503, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6309, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.03329575999306338, |
|
"grad_norm": 2.204542258967164, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6123, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.03398942165958554, |
|
"grad_norm": 2.1983829138121194, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6371, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.03468308332610769, |
|
"grad_norm": 2.010616851796152, |
|
"learning_rate": 1e-05, |
|
"loss": 1.648, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03537674499262985, |
|
"grad_norm": 2.109337350234141, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5939, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.036070406659151996, |
|
"grad_norm": 2.5304099594684657, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5806, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.03676406832567415, |
|
"grad_norm": 2.159949455039585, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6898, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.037457729992196306, |
|
"grad_norm": 2.3028114991190227, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6102, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.03815139165871846, |
|
"grad_norm": 2.137637210948916, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5976, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.03884505332524062, |
|
"grad_norm": 2.283279122337232, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6182, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.039538714991762765, |
|
"grad_norm": 2.352555898736503, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5764, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.04023237665828492, |
|
"grad_norm": 2.2640721232017578, |
|
"learning_rate": 1e-05, |
|
"loss": 1.606, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.040926038324807075, |
|
"grad_norm": 2.2046947158269052, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6621, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.04161969999132923, |
|
"grad_norm": 2.2356515653560955, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5783, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.042313361657851385, |
|
"grad_norm": 2.0914853090761656, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6386, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.04300702332437353, |
|
"grad_norm": 2.0219313193549335, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5409, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.04370068499089569, |
|
"grad_norm": 2.0579915512686675, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6024, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.044394346657417844, |
|
"grad_norm": 2.1350362922236563, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5979, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.04508800832394, |
|
"grad_norm": 2.1574547660781493, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5968, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.045781669990462154, |
|
"grad_norm": 2.032927708489895, |
|
"learning_rate": 1e-05, |
|
"loss": 1.623, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.04647533165698431, |
|
"grad_norm": 2.310697906396082, |
|
"learning_rate": 1e-05, |
|
"loss": 1.58, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.04716899332350646, |
|
"grad_norm": 2.082166726223734, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5855, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.04786265499002861, |
|
"grad_norm": 2.1805698151273205, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5406, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.04855631665655077, |
|
"grad_norm": 2.1517290677451366, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5794, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04924997832307292, |
|
"grad_norm": 2.2324944455303286, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5897, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.04994363998959508, |
|
"grad_norm": 2.117363841822313, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6344, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.050637301656117226, |
|
"grad_norm": 2.0840589709094055, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5593, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.05133096332263938, |
|
"grad_norm": 2.139635042522081, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5604, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.052024624989161536, |
|
"grad_norm": 1.9773852269088779, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5132, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05271828665568369, |
|
"grad_norm": 2.2497342140467227, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5689, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.053411948322205846, |
|
"grad_norm": 2.1828363324950515, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5775, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.054105609988728, |
|
"grad_norm": 2.397270079677964, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5876, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.05479927165525015, |
|
"grad_norm": 2.132037167524842, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5388, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.055492933321772304, |
|
"grad_norm": 2.105011522745257, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5878, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05618659498829446, |
|
"grad_norm": 2.331199900780172, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6095, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.056880256654816615, |
|
"grad_norm": 2.0679915620822915, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5489, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.05757391832133877, |
|
"grad_norm": 2.1615822894325154, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6244, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.05826757998786092, |
|
"grad_norm": 1.979313705736586, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5881, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.05896124165438307, |
|
"grad_norm": 2.1629527862643063, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5775, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.05965490332090523, |
|
"grad_norm": 2.3681176161050956, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5714, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.06034856498742738, |
|
"grad_norm": 1.9456880551179383, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5516, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.06104222665394954, |
|
"grad_norm": 2.1286899349051343, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5368, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.06173588832047169, |
|
"grad_norm": 2.124774583311733, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5244, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.06242954998699384, |
|
"grad_norm": 2.255844207300473, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6468, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.063123211653516, |
|
"grad_norm": 2.253221543014536, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5459, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.06381687332003814, |
|
"grad_norm": 2.161784717901113, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5295, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0645105349865603, |
|
"grad_norm": 2.100171711959203, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5432, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.06520419665308246, |
|
"grad_norm": 2.0472421305517776, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5722, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.06589785831960461, |
|
"grad_norm": 2.174127269142891, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5301, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.06659151998612677, |
|
"grad_norm": 2.302981491215993, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5495, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.06728518165264892, |
|
"grad_norm": 2.093167418727749, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5604, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.06797884331917108, |
|
"grad_norm": 2.0951541746401263, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5769, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.06867250498569323, |
|
"grad_norm": 1.9576369412503245, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6003, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.06936616665221539, |
|
"grad_norm": 2.3419839237311266, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4925, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07005982831873754, |
|
"grad_norm": 2.0633427358605774, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5698, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.0707534899852597, |
|
"grad_norm": 2.1113080337620795, |
|
"learning_rate": 1e-05, |
|
"loss": 1.593, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.07144715165178184, |
|
"grad_norm": 2.200709143893939, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5487, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.07214081331830399, |
|
"grad_norm": 2.115437173154835, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5184, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.07283447498482615, |
|
"grad_norm": 1.9592926067021565, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5824, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.0735281366513483, |
|
"grad_norm": 2.0541106318447366, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5265, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.07422179831787046, |
|
"grad_norm": 1.9381187865210794, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5628, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.07491545998439261, |
|
"grad_norm": 2.1035504116541084, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6001, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.07560912165091477, |
|
"grad_norm": 2.143235938125612, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5153, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.07630278331743692, |
|
"grad_norm": 2.0039392778273357, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5603, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07699644498395908, |
|
"grad_norm": 2.0158397924903233, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5216, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.07769010665048123, |
|
"grad_norm": 1.894029714001099, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5587, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.07838376831700339, |
|
"grad_norm": 2.432706928463119, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5707, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.07907742998352553, |
|
"grad_norm": 2.036785190887291, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4926, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.07977109165004768, |
|
"grad_norm": 1.9400189433970951, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6043, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.08046475331656984, |
|
"grad_norm": 1.919202983721404, |
|
"learning_rate": 1e-05, |
|
"loss": 1.537, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.081158414983092, |
|
"grad_norm": 2.0977172125022707, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5141, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.08185207664961415, |
|
"grad_norm": 2.06407983728045, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5672, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.0825457383161363, |
|
"grad_norm": 2.078141165316271, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5411, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.08323939998265846, |
|
"grad_norm": 2.128515674818184, |
|
"learning_rate": 1e-05, |
|
"loss": 1.574, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08393306164918062, |
|
"grad_norm": 2.204639065305811, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5411, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.08462672331570277, |
|
"grad_norm": 2.028431294158661, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4471, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.08532038498222493, |
|
"grad_norm": 1.9634386333120701, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5498, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.08601404664874707, |
|
"grad_norm": 2.029215017631285, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5568, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.08670770831526922, |
|
"grad_norm": 2.0663413738174397, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5426, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.08740136998179138, |
|
"grad_norm": 1.9619014687764207, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5133, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.08809503164831353, |
|
"grad_norm": 1.9162219051787666, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5464, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.08878869331483569, |
|
"grad_norm": 2.261542371230024, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4646, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.08948235498135784, |
|
"grad_norm": 1.9715291734543514, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5132, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.09017601664788, |
|
"grad_norm": 2.0203777348766834, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5354, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09086967831440215, |
|
"grad_norm": 1.9138585711617677, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5407, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.09156333998092431, |
|
"grad_norm": 2.0173322994738596, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5439, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.09225700164744646, |
|
"grad_norm": 2.107269356193457, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5155, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.09295066331396862, |
|
"grad_norm": 1.8484907386064835, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5765, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.09364432498049076, |
|
"grad_norm": 1.9245702524265067, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5584, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.09433798664701291, |
|
"grad_norm": 2.01491446609071, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5813, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.09503164831353507, |
|
"grad_norm": 2.139603451563103, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5156, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.09572530998005722, |
|
"grad_norm": 2.02926570002898, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5331, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.09641897164657938, |
|
"grad_norm": 2.0788419828866314, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5504, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.09711263331310153, |
|
"grad_norm": 2.1919839551775016, |
|
"learning_rate": 1e-05, |
|
"loss": 1.519, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09780629497962369, |
|
"grad_norm": 1.905930415266768, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5084, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.09849995664614584, |
|
"grad_norm": 2.107652533544824, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5004, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.099193618312668, |
|
"grad_norm": 1.9142412206590709, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5488, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.09988727997919015, |
|
"grad_norm": 1.8910378379533608, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4912, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.10058094164571231, |
|
"grad_norm": 2.0531944494577385, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5011, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.10127460331223445, |
|
"grad_norm": 1.9561470527096887, |
|
"learning_rate": 1e-05, |
|
"loss": 1.483, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.1019682649787566, |
|
"grad_norm": 2.0182745726837186, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4969, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.10266192664527876, |
|
"grad_norm": 2.1655704406766305, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5151, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.10335558831180092, |
|
"grad_norm": 2.067383515526746, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4932, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.10404924997832307, |
|
"grad_norm": 2.16565372834465, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5211, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.10474291164484523, |
|
"grad_norm": 2.067199972931285, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5182, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.10543657331136738, |
|
"grad_norm": 2.021560871683723, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5216, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.10613023497788954, |
|
"grad_norm": 2.0837036440044914, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4789, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.10682389664441169, |
|
"grad_norm": 2.105326579649833, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5532, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.10751755831093385, |
|
"grad_norm": 1.9531358624572481, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5709, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.108211219977456, |
|
"grad_norm": 2.064713519765923, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5378, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.10890488164397814, |
|
"grad_norm": 2.1618499671142226, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5176, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.1095985433105003, |
|
"grad_norm": 1.9086208868139072, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5546, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.11029220497702245, |
|
"grad_norm": 2.02664550739396, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4997, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.11098586664354461, |
|
"grad_norm": 1.999544656281972, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5003, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11167952831006676, |
|
"grad_norm": 2.053756926485644, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5292, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.11237318997658892, |
|
"grad_norm": 2.094168834382519, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4676, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.11306685164311107, |
|
"grad_norm": 2.002754028656303, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5105, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.11376051330963323, |
|
"grad_norm": 1.9434708295091558, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5053, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.11445417497615538, |
|
"grad_norm": 2.0330157669650815, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4888, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.11514783664267754, |
|
"grad_norm": 1.9289370322291217, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5277, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.1158414983091997, |
|
"grad_norm": 1.959995924753835, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4825, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.11653515997572184, |
|
"grad_norm": 1.7991848893944669, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5005, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.11722882164224399, |
|
"grad_norm": 1.8734306548761093, |
|
"learning_rate": 1e-05, |
|
"loss": 1.515, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.11792248330876615, |
|
"grad_norm": 2.0015044635043218, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5252, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1186161449752883, |
|
"grad_norm": 2.2665936004262273, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4838, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.11930980664181046, |
|
"grad_norm": 1.9544216259159037, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4925, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.12000346830833261, |
|
"grad_norm": 2.1154532935532715, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5146, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.12069712997485477, |
|
"grad_norm": 2.1021548296617927, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4915, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.12139079164137692, |
|
"grad_norm": 2.0834121176679634, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5075, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12208445330789908, |
|
"grad_norm": 2.0016972202996213, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5255, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.12277811497442123, |
|
"grad_norm": 2.0612598236678523, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5149, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.12347177664094337, |
|
"grad_norm": 2.0229183358200484, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5249, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.12416543830746553, |
|
"grad_norm": 2.263550407359551, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5244, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.12485909997398768, |
|
"grad_norm": 2.08456086134308, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4981, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12555276164050985, |
|
"grad_norm": 2.0774621427372386, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5143, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.126246423307032, |
|
"grad_norm": 2.1010285933058626, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4504, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.12694008497355416, |
|
"grad_norm": 2.0102176748558405, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4146, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.1276337466400763, |
|
"grad_norm": 2.095717278113951, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5395, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.12832740830659844, |
|
"grad_norm": 2.193298827450061, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4769, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.1290210699731206, |
|
"grad_norm": 1.9388355574681662, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5375, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.12971473163964276, |
|
"grad_norm": 2.0877632629967913, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5471, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.1304083933061649, |
|
"grad_norm": 2.22367106492369, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4583, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.13110205497268707, |
|
"grad_norm": 1.9943605205254191, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5008, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.13179571663920922, |
|
"grad_norm": 2.096604510058919, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5449, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13248937830573138, |
|
"grad_norm": 1.9264619546423505, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5068, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.13318303997225353, |
|
"grad_norm": 2.250824707812072, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4505, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.13387670163877569, |
|
"grad_norm": 2.166944357294215, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5341, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.13457036330529784, |
|
"grad_norm": 2.0250424027409673, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4852, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.13526402497182, |
|
"grad_norm": 2.122488575543949, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4973, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.13595768663834215, |
|
"grad_norm": 2.1098754788199647, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5069, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.1366513483048643, |
|
"grad_norm": 2.088689323771004, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4936, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.13734500997138646, |
|
"grad_norm": 1.88466378266683, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5068, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.13803867163790862, |
|
"grad_norm": 2.0160265524905845, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5368, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.13873233330443077, |
|
"grad_norm": 1.8858982582224784, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4676, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13942599497095293, |
|
"grad_norm": 1.8632802765837246, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5151, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.14011965663747508, |
|
"grad_norm": 1.9258461198592782, |
|
"learning_rate": 1e-05, |
|
"loss": 1.501, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.14081331830399724, |
|
"grad_norm": 1.9872609586669983, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4688, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.1415069799705194, |
|
"grad_norm": 1.8678557794614834, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5344, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.14220064163704152, |
|
"grad_norm": 1.8914203459451417, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4892, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.14289430330356367, |
|
"grad_norm": 1.9911065628954558, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5092, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.14358796497008583, |
|
"grad_norm": 2.0485687976317966, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5167, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.14428162663660798, |
|
"grad_norm": 1.9496266310348234, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4602, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.14497528830313014, |
|
"grad_norm": 2.2871818269739754, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4685, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.1456689499696523, |
|
"grad_norm": 1.984619214022057, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4409, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14636261163617445, |
|
"grad_norm": 1.9696667123704634, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5176, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.1470562733026966, |
|
"grad_norm": 1.933399558736106, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5327, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.14774993496921876, |
|
"grad_norm": 2.1861585612655885, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4685, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.14844359663574092, |
|
"grad_norm": 2.0747570404469804, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4925, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.14913725830226307, |
|
"grad_norm": 2.0747837540555096, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5019, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.14983091996878523, |
|
"grad_norm": 2.0326660466120297, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4597, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.15052458163530738, |
|
"grad_norm": 1.7823779998232254, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4539, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.15121824330182954, |
|
"grad_norm": 2.157188291473536, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5397, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.1519119049683517, |
|
"grad_norm": 2.0183890562420905, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4753, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.15260556663487385, |
|
"grad_norm": 1.9531135223586058, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4315, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.153299228301396, |
|
"grad_norm": 2.038347402060851, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4246, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.15399288996791816, |
|
"grad_norm": 2.0488940765067967, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4461, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.1546865516344403, |
|
"grad_norm": 2.231661501840028, |
|
"learning_rate": 1e-05, |
|
"loss": 1.472, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.15538021330096247, |
|
"grad_norm": 1.9308119943251083, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5128, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.15607387496748462, |
|
"grad_norm": 2.229149512340474, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4479, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.15676753663400678, |
|
"grad_norm": 2.0928578049018483, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5009, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.1574611983005289, |
|
"grad_norm": 1.9663752374718868, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4818, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.15815485996705106, |
|
"grad_norm": 2.1034138941034786, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4859, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.15884852163357321, |
|
"grad_norm": 2.3560738411841626, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4779, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.15954218330009537, |
|
"grad_norm": 2.4332643261654403, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4809, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.16023584496661752, |
|
"grad_norm": 2.0278596830202757, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4559, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.16092950663313968, |
|
"grad_norm": 2.3341820259604984, |
|
"learning_rate": 1e-05, |
|
"loss": 1.479, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.16162316829966183, |
|
"grad_norm": 1.9872750396163408, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4715, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.162316829966184, |
|
"grad_norm": 1.9999402472599845, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5459, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.16301049163270614, |
|
"grad_norm": 2.128103039977492, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5096, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.1637041532992283, |
|
"grad_norm": 1.9768244155184505, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5127, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.16439781496575046, |
|
"grad_norm": 1.788660125975532, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4742, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.1650914766322726, |
|
"grad_norm": 2.091066870692497, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4978, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.16578513829879477, |
|
"grad_norm": 1.9576004965176508, |
|
"learning_rate": 1e-05, |
|
"loss": 1.45, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.16647879996531692, |
|
"grad_norm": 2.0698620965680528, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4502, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.16717246163183908, |
|
"grad_norm": 2.07030748449516, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5028, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.16786612329836123, |
|
"grad_norm": 2.0470194343255455, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4714, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.16855978496488339, |
|
"grad_norm": 2.0084641094309794, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4819, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.16925344663140554, |
|
"grad_norm": 2.0146863278209106, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4604, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.1699471082979277, |
|
"grad_norm": 2.068474055525701, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4494, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.17064076996444985, |
|
"grad_norm": 1.9451803439502662, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4424, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.171334431630972, |
|
"grad_norm": 2.0197781186907835, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4614, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.17202809329749413, |
|
"grad_norm": 1.9032139101620693, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4731, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.1727217549640163, |
|
"grad_norm": 2.115774791694279, |
|
"learning_rate": 1e-05, |
|
"loss": 1.416, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.17341541663053844, |
|
"grad_norm": 2.0399470329500047, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4481, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1741090782970606, |
|
"grad_norm": 2.2047084098275027, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4822, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.17480273996358275, |
|
"grad_norm": 2.0604716275395374, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4698, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.1754964016301049, |
|
"grad_norm": 2.0292160669190107, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4292, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.17619006329662706, |
|
"grad_norm": 1.8814965372884114, |
|
"learning_rate": 1e-05, |
|
"loss": 1.448, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.17688372496314922, |
|
"grad_norm": 2.1057756496393805, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4335, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.17757738662967137, |
|
"grad_norm": 2.3132846447554227, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4856, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.17827104829619353, |
|
"grad_norm": 2.0736038589358734, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4483, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.17896470996271568, |
|
"grad_norm": 2.1910317153749985, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4896, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.17965837162923784, |
|
"grad_norm": 2.054248882735396, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4202, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.18035203329576, |
|
"grad_norm": 1.9735586840820536, |
|
"learning_rate": 1e-05, |
|
"loss": 1.492, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.18104569496228215, |
|
"grad_norm": 2.0486128891661357, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4842, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.1817393566288043, |
|
"grad_norm": 2.0487679336561535, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3914, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.18243301829532646, |
|
"grad_norm": 2.122214099406227, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5043, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.18312667996184862, |
|
"grad_norm": 1.9819401665450367, |
|
"learning_rate": 1e-05, |
|
"loss": 1.461, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.18382034162837077, |
|
"grad_norm": 1.8894079827925012, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5248, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.18451400329489293, |
|
"grad_norm": 1.9721789820827966, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4289, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.18520766496141508, |
|
"grad_norm": 1.9579977527698131, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4313, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.18590132662793724, |
|
"grad_norm": 2.0444647256611885, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4645, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.1865949882944594, |
|
"grad_norm": 2.1214502305742275, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4712, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.18728864996098152, |
|
"grad_norm": 1.9384086787313417, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4641, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.18798231162750367, |
|
"grad_norm": 1.9903323950267982, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4438, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.18867597329402583, |
|
"grad_norm": 2.2552584575632264, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4954, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.18936963496054798, |
|
"grad_norm": 2.0400461455788075, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4574, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.19006329662707014, |
|
"grad_norm": 1.985049295968063, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4714, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.1907569582935923, |
|
"grad_norm": 2.0084753961869173, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4557, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.19145061996011445, |
|
"grad_norm": 2.0212614192536473, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4841, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.1921442816266366, |
|
"grad_norm": 2.1484850741149035, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5103, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.19283794329315876, |
|
"grad_norm": 1.9196012631959583, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4313, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.19353160495968091, |
|
"grad_norm": 1.77676382629001, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4468, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.19422526662620307, |
|
"grad_norm": 1.9938658500301698, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.19491892829272522, |
|
"grad_norm": 2.00675288394433, |
|
"learning_rate": 1e-05, |
|
"loss": 1.393, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.19561258995924738, |
|
"grad_norm": 1.9133179166179877, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4856, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.19630625162576953, |
|
"grad_norm": 2.059048555946398, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4583, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.1969999132922917, |
|
"grad_norm": 1.9633303202331707, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4713, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.19769357495881384, |
|
"grad_norm": 2.062438630313021, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4695, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.198387236625336, |
|
"grad_norm": 2.0881034716487017, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4088, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.19908089829185815, |
|
"grad_norm": 2.1703550326161416, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5003, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.1997745599583803, |
|
"grad_norm": 2.074897325155734, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4233, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.20046822162490247, |
|
"grad_norm": 2.1455316048865667, |
|
"learning_rate": 1e-05, |
|
"loss": 1.499, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.20116188329142462, |
|
"grad_norm": 2.101826626794352, |
|
"learning_rate": 1e-05, |
|
"loss": 1.423, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.20185554495794678, |
|
"grad_norm": 1.9433232261964963, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4258, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.2025492066244689, |
|
"grad_norm": 2.0443499477866878, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4755, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.20324286829099106, |
|
"grad_norm": 2.0522507133871795, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4472, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.2039365299575132, |
|
"grad_norm": 2.138230646253678, |
|
"learning_rate": 1e-05, |
|
"loss": 1.49, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.20463019162403537, |
|
"grad_norm": 1.8226561062473423, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4556, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.20532385329055752, |
|
"grad_norm": 2.038783861493903, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4549, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.20601751495707968, |
|
"grad_norm": 2.0515757165248054, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4662, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.20671117662360183, |
|
"grad_norm": 2.0235486233300564, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4422, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.207404838290124, |
|
"grad_norm": 1.9481327456484323, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4871, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.20809849995664614, |
|
"grad_norm": 2.1490389171940962, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4438, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2087921616231683, |
|
"grad_norm": 1.8636272783701093, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3833, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.20948582328969045, |
|
"grad_norm": 2.0132879418102525, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5079, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.2101794849562126, |
|
"grad_norm": 1.982488095685725, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4497, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.21087314662273476, |
|
"grad_norm": 1.8238922950218728, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4535, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.21156680828925692, |
|
"grad_norm": 1.93657599411593, |
|
"learning_rate": 1e-05, |
|
"loss": 1.413, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.21226046995577907, |
|
"grad_norm": 1.8702247940122898, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4419, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.21295413162230123, |
|
"grad_norm": 1.888122691714043, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4674, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.21364779328882338, |
|
"grad_norm": 1.937294530390563, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5099, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.21434145495534554, |
|
"grad_norm": 1.7678460926752717, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4653, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.2150351166218677, |
|
"grad_norm": 1.9151083060534004, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4605, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.21572877828838985, |
|
"grad_norm": 1.7787361825304597, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4693, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.216422439954912, |
|
"grad_norm": 1.9120724944581113, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4881, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.21711610162143413, |
|
"grad_norm": 1.779761445996506, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4091, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.2178097632879563, |
|
"grad_norm": 2.0485121226808527, |
|
"learning_rate": 1e-05, |
|
"loss": 1.434, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.21850342495447844, |
|
"grad_norm": 1.8510715617371953, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4502, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2191970866210006, |
|
"grad_norm": 2.0639411090008717, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4663, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.21989074828752275, |
|
"grad_norm": 1.8671225612100188, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4679, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.2205844099540449, |
|
"grad_norm": 2.089367239670572, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4195, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.22127807162056706, |
|
"grad_norm": 1.989736808079142, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4397, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.22197173328708922, |
|
"grad_norm": 1.9101605077692294, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3985, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.22266539495361137, |
|
"grad_norm": 1.9384620738328688, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4223, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.22335905662013353, |
|
"grad_norm": 2.1584155527633198, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4836, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.22405271828665568, |
|
"grad_norm": 2.0224370906370694, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4454, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.22474637995317784, |
|
"grad_norm": 2.0788397833125765, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5232, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.2254400416197, |
|
"grad_norm": 2.0341494419793427, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4617, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.22613370328622215, |
|
"grad_norm": 2.008192362429119, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4375, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.2268273649527443, |
|
"grad_norm": 1.7720050916534584, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4517, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.22752102661926646, |
|
"grad_norm": 1.9703464005250477, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4201, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.2282146882857886, |
|
"grad_norm": 1.8046683048934846, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4441, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.22890834995231077, |
|
"grad_norm": 1.996448453239083, |
|
"learning_rate": 1e-05, |
|
"loss": 1.411, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.22960201161883292, |
|
"grad_norm": 2.202209909202205, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4443, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.23029567328535508, |
|
"grad_norm": 1.9288611183096158, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4261, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.23098933495187723, |
|
"grad_norm": 1.9931802186149232, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4228, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.2316829966183994, |
|
"grad_norm": 1.9300657348149677, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4791, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.23237665828492152, |
|
"grad_norm": 1.952656400463476, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4682, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.23307031995144367, |
|
"grad_norm": 1.920902486845839, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4286, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.23376398161796583, |
|
"grad_norm": 1.9689037515231558, |
|
"learning_rate": 1e-05, |
|
"loss": 1.439, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.23445764328448798, |
|
"grad_norm": 1.928911178735969, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4464, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.23515130495101014, |
|
"grad_norm": 2.1288190614130134, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4386, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.2358449666175323, |
|
"grad_norm": 2.076072187290826, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4928, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.23653862828405445, |
|
"grad_norm": 1.8705047682268778, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4536, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.2372322899505766, |
|
"grad_norm": 1.9565604386223752, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4783, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.23792595161709876, |
|
"grad_norm": 2.028892379215102, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4272, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.2386196132836209, |
|
"grad_norm": 1.8650321597118786, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4742, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.23931327495014307, |
|
"grad_norm": 2.1612306768116083, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4405, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.24000693661666522, |
|
"grad_norm": 2.0151472915975748, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4246, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.24070059828318738, |
|
"grad_norm": 2.1329263531073472, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4483, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.24139425994970953, |
|
"grad_norm": 1.8503952674647635, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3852, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.2420879216162317, |
|
"grad_norm": 1.9232118601702997, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4704, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.24278158328275384, |
|
"grad_norm": 1.8955902858682858, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4472, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.243475244949276, |
|
"grad_norm": 2.0209412893438503, |
|
"learning_rate": 1e-05, |
|
"loss": 1.453, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.24416890661579815, |
|
"grad_norm": 2.0049406945887736, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4429, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.2448625682823203, |
|
"grad_norm": 1.8970813615917181, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4145, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.24555622994884246, |
|
"grad_norm": 1.998898929474657, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3887, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.24624989161536462, |
|
"grad_norm": 2.0390603685379944, |
|
"learning_rate": 1e-05, |
|
"loss": 1.477, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.24694355328188675, |
|
"grad_norm": 1.9473625966599237, |
|
"learning_rate": 1e-05, |
|
"loss": 1.422, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.2476372149484089, |
|
"grad_norm": 1.8621154509930413, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4551, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.24833087661493106, |
|
"grad_norm": 2.079525202308318, |
|
"learning_rate": 1e-05, |
|
"loss": 1.496, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.2490245382814532, |
|
"grad_norm": 2.0606010137774162, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3982, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.24971819994797537, |
|
"grad_norm": 2.4565497491600015, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4833, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2504118616144975, |
|
"grad_norm": 1.9825019010439706, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4271, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.2511055232810197, |
|
"grad_norm": 1.9086652448767583, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4125, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.25179918494754183, |
|
"grad_norm": 2.0999318583687625, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4235, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.252492846614064, |
|
"grad_norm": 1.985190014569069, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4501, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.25318650828058614, |
|
"grad_norm": 1.910068963847788, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4658, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.2538801699471083, |
|
"grad_norm": 2.1692090204353764, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4418, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.25457383161363045, |
|
"grad_norm": 1.9609194594229094, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4975, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.2552674932801526, |
|
"grad_norm": 1.9921583707202606, |
|
"learning_rate": 1e-05, |
|
"loss": 1.46, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.25596115494667476, |
|
"grad_norm": 1.946762435726228, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4346, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.2566548166131969, |
|
"grad_norm": 1.9131156156498506, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4145, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2573484782797191, |
|
"grad_norm": 2.0667230542895885, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4428, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.2580421399462412, |
|
"grad_norm": 1.8834877477820704, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4198, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.2587358016127634, |
|
"grad_norm": 2.192277081033706, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4732, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.2594294632792855, |
|
"grad_norm": 2.0923048016266406, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4748, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.2601231249458077, |
|
"grad_norm": 2.086280677890939, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4405, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.2608167866123298, |
|
"grad_norm": 1.905489769808509, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4445, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.261510448278852, |
|
"grad_norm": 2.0450091978264466, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4186, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.26220410994537413, |
|
"grad_norm": 2.133929617755548, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4572, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.2628977716118963, |
|
"grad_norm": 2.040274726964717, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4238, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.26359143327841844, |
|
"grad_norm": 2.031802523503596, |
|
"learning_rate": 1e-05, |
|
"loss": 1.485, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2642850949449406, |
|
"grad_norm": 1.8546387757797609, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4579, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.26497875661146275, |
|
"grad_norm": 2.0419344295588893, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4663, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.26567241827798493, |
|
"grad_norm": 1.9135093773488887, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3924, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.26636607994450706, |
|
"grad_norm": 1.9016139415831943, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4308, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.26705974161102924, |
|
"grad_norm": 1.9082693062876364, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3594, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.26775340327755137, |
|
"grad_norm": 1.9587042651997673, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4879, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.26844706494407355, |
|
"grad_norm": 1.8806230175431726, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4101, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.2691407266105957, |
|
"grad_norm": 1.7730356951796558, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4196, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.2698343882771178, |
|
"grad_norm": 2.0320726341653255, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4207, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.27052804994364, |
|
"grad_norm": 1.9758337171264837, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3527, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2712217116101621, |
|
"grad_norm": 1.8958218154268665, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4802, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.2719153732766843, |
|
"grad_norm": 2.0207609598652, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4304, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.27260903494320643, |
|
"grad_norm": 1.9475691858544313, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4018, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.2733026966097286, |
|
"grad_norm": 1.9389041388735107, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4343, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.27399635827625074, |
|
"grad_norm": 1.905336185660673, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4283, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.2746900199427729, |
|
"grad_norm": 1.729440202579088, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4066, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.27538368160929505, |
|
"grad_norm": 1.7537140656073078, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3722, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.27607734327581723, |
|
"grad_norm": 1.8075317974729948, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3875, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.27677100494233936, |
|
"grad_norm": 1.8284022154266728, |
|
"learning_rate": 1e-05, |
|
"loss": 1.401, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.27746466660886154, |
|
"grad_norm": 2.0107348857071563, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4018, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.27815832827538367, |
|
"grad_norm": 2.117911173802508, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4517, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.27885198994190585, |
|
"grad_norm": 1.8033673093925715, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3847, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.279545651608428, |
|
"grad_norm": 1.7714377429685437, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4638, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.28023931327495016, |
|
"grad_norm": 1.8489883317533833, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4183, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.2809329749414723, |
|
"grad_norm": 2.0970756827183625, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4531, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.2816266366079945, |
|
"grad_norm": 2.0300925713263247, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4129, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.2823202982745166, |
|
"grad_norm": 2.0694538236815365, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4346, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.2830139599410388, |
|
"grad_norm": 2.125152836002329, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4636, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.2837076216075609, |
|
"grad_norm": 1.783820266730962, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4327, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.28440128327408304, |
|
"grad_norm": 2.0534367028415943, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4691, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.2850949449406052, |
|
"grad_norm": 1.876558701718411, |
|
"learning_rate": 1e-05, |
|
"loss": 1.383, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.28578860660712735, |
|
"grad_norm": 2.0521158454990855, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4213, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.28648226827364953, |
|
"grad_norm": 2.0220971535707877, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4658, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.28717592994017166, |
|
"grad_norm": 2.038148661267878, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4389, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.28786959160669384, |
|
"grad_norm": 1.9405516561969085, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4077, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.28856325327321597, |
|
"grad_norm": 1.7499414272985196, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4215, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.28925691493973815, |
|
"grad_norm": 1.8287074775541738, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4734, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.2899505766062603, |
|
"grad_norm": 2.029489052381792, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3857, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.29064423827278246, |
|
"grad_norm": 1.9688645010655113, |
|
"learning_rate": 1e-05, |
|
"loss": 1.377, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.2913378999393046, |
|
"grad_norm": 1.925094261068192, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4246, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2920315616058268, |
|
"grad_norm": 2.117190679742464, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4595, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.2927252232723489, |
|
"grad_norm": 2.076005784675454, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4242, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.2934188849388711, |
|
"grad_norm": 1.9173678536303644, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4209, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.2941125466053932, |
|
"grad_norm": 1.9453593461528418, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4067, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.2948062082719154, |
|
"grad_norm": 2.067561860466064, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4637, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.2954998699384375, |
|
"grad_norm": 1.899343422880033, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4355, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.2961935316049597, |
|
"grad_norm": 1.9847242096071747, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4292, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.29688719327148183, |
|
"grad_norm": 1.854907399156512, |
|
"learning_rate": 1e-05, |
|
"loss": 1.443, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.297580854938004, |
|
"grad_norm": 1.955263267566865, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3928, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.29827451660452614, |
|
"grad_norm": 2.053105452037698, |
|
"learning_rate": 1e-05, |
|
"loss": 1.378, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2989681782710483, |
|
"grad_norm": 1.9109805438113896, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3645, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.29966183993757045, |
|
"grad_norm": 2.043933400997838, |
|
"learning_rate": 1e-05, |
|
"loss": 1.435, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.3003555016040926, |
|
"grad_norm": 2.1971134552732785, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4391, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.30104916327061476, |
|
"grad_norm": 1.9910023447928422, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4084, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.3017428249371369, |
|
"grad_norm": 2.0432125795374607, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4423, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.30243648660365907, |
|
"grad_norm": 1.9609905480132668, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4511, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.3031301482701812, |
|
"grad_norm": 1.8755027815135972, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3937, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.3038238099367034, |
|
"grad_norm": 1.9069584207191346, |
|
"learning_rate": 1e-05, |
|
"loss": 1.38, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.3045174716032255, |
|
"grad_norm": 2.174136818483425, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4535, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.3052111332697477, |
|
"grad_norm": 1.8297620514780848, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4285, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3059047949362698, |
|
"grad_norm": 1.8949261520180545, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4336, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.306598456602792, |
|
"grad_norm": 1.9549110409659416, |
|
"learning_rate": 1e-05, |
|
"loss": 1.373, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.30729211826931413, |
|
"grad_norm": 1.9504761362373537, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3915, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.3079857799358363, |
|
"grad_norm": 1.8994399368824695, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3887, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.30867944160235844, |
|
"grad_norm": 1.8796327717601744, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4075, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.3093731032688806, |
|
"grad_norm": 1.9681787692068275, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4476, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.31006676493540275, |
|
"grad_norm": 2.0373492235623285, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3909, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.31076042660192493, |
|
"grad_norm": 1.8891756577788732, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4804, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.31145408826844706, |
|
"grad_norm": 1.833019021556368, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4207, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.31214774993496924, |
|
"grad_norm": 1.836006077142845, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3933, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.31284141160149137, |
|
"grad_norm": 1.8756183495188887, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4081, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.31353507326801355, |
|
"grad_norm": 1.8832247399343314, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3978, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.3142287349345357, |
|
"grad_norm": 1.9411565103780286, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4346, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.3149223966010578, |
|
"grad_norm": 1.9226021145167342, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4192, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.31561605826758, |
|
"grad_norm": 1.864849847975618, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4023, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.3163097199341021, |
|
"grad_norm": 1.841130556375779, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3843, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.3170033816006243, |
|
"grad_norm": 1.9910619133587744, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4196, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.31769704326714643, |
|
"grad_norm": 1.871071007237103, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4398, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.3183907049336686, |
|
"grad_norm": 1.9525027065397538, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4221, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.31908436660019074, |
|
"grad_norm": 1.875959289372641, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4568, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3197780282667129, |
|
"grad_norm": 1.896488018271282, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4443, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.32047168993323505, |
|
"grad_norm": 1.819017143897324, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3804, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.32116535159975723, |
|
"grad_norm": 1.904368280434257, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3803, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.32185901326627936, |
|
"grad_norm": 1.9630080846645839, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4099, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.32255267493280154, |
|
"grad_norm": 1.8933783131596658, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4426, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.32324633659932367, |
|
"grad_norm": 1.9919230915473398, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4099, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.32393999826584585, |
|
"grad_norm": 2.0057958567267606, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4409, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.324633659932368, |
|
"grad_norm": 2.0237082745853088, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4331, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.32532732159889016, |
|
"grad_norm": 1.9072494572387, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4298, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.3260209832654123, |
|
"grad_norm": 2.1235561783712718, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4125, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3267146449319345, |
|
"grad_norm": 1.844374731285956, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3672, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.3274083065984566, |
|
"grad_norm": 2.1204902498109326, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3909, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.3281019682649788, |
|
"grad_norm": 2.084912862868831, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4521, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.3287956299315009, |
|
"grad_norm": 1.9363382366089963, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4166, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.32948929159802304, |
|
"grad_norm": 2.043516332073307, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4221, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.3301829532645452, |
|
"grad_norm": 2.125873099222709, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4894, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.33087661493106735, |
|
"grad_norm": 2.042031475330511, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4025, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.33157027659758953, |
|
"grad_norm": 1.7849937086119454, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4132, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.33226393826411166, |
|
"grad_norm": 1.7931574592397888, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4273, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.33295759993063384, |
|
"grad_norm": 2.826745637249205, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3552, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.33365126159715597, |
|
"grad_norm": 1.9644473691572903, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3728, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.33434492326367815, |
|
"grad_norm": 1.8815785092027932, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3836, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.3350385849302003, |
|
"grad_norm": 2.1707103250407265, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3872, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.33573224659672246, |
|
"grad_norm": 1.805124910060426, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4269, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.3364259082632446, |
|
"grad_norm": 2.0566289424785165, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4353, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.33711956992976677, |
|
"grad_norm": 1.9392776766197952, |
|
"learning_rate": 1e-05, |
|
"loss": 1.42, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.3378132315962889, |
|
"grad_norm": 1.7990589474342267, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4232, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.3385068932628111, |
|
"grad_norm": 2.042243634171937, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4094, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.3392005549293332, |
|
"grad_norm": 1.9990437442788238, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4349, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.3398942165958554, |
|
"grad_norm": 2.1051212689157777, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3838, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3405878782623775, |
|
"grad_norm": 2.1229600713115238, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3403, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.3412815399288997, |
|
"grad_norm": 1.8779948313961718, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3938, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.34197520159542183, |
|
"grad_norm": 1.9140202797636157, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3917, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.342668863261944, |
|
"grad_norm": 1.974739574014709, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4054, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.34336252492846614, |
|
"grad_norm": 1.9199035763474215, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3922, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.34405618659498827, |
|
"grad_norm": 1.9177941872465927, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4168, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.34474984826151045, |
|
"grad_norm": 2.00337000954214, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3949, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.3454435099280326, |
|
"grad_norm": 1.998945535857092, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3929, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.34613717159455476, |
|
"grad_norm": 2.0182449229484636, |
|
"learning_rate": 1e-05, |
|
"loss": 1.422, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.3468308332610769, |
|
"grad_norm": 1.9361126702422482, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3964, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.34752449492759907, |
|
"grad_norm": 2.0248490486946316, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4293, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.3482181565941212, |
|
"grad_norm": 1.8982822396985153, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3778, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.3489118182606434, |
|
"grad_norm": 2.059259100223746, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4509, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.3496054799271655, |
|
"grad_norm": 2.00329460831796, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3878, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.3502991415936877, |
|
"grad_norm": 1.9719718567850673, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3829, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.3509928032602098, |
|
"grad_norm": 1.9623427987164561, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4036, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.351686464926732, |
|
"grad_norm": 2.022806123443883, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4068, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.35238012659325413, |
|
"grad_norm": 2.00015503601285, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4248, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.3530737882597763, |
|
"grad_norm": 1.904741684713311, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3519, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.35376744992629844, |
|
"grad_norm": 1.9455166972699935, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4152, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3544611115928206, |
|
"grad_norm": 1.9409989480916887, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3916, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.35515477325934275, |
|
"grad_norm": 1.8707742492583141, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4362, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.35584843492586493, |
|
"grad_norm": 1.8314588678913148, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4279, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.35654209659238706, |
|
"grad_norm": 1.913107053098294, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4686, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.35723575825890924, |
|
"grad_norm": 1.8781008328794606, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4257, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.35792941992543137, |
|
"grad_norm": 1.858897618474902, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3299, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.35862308159195355, |
|
"grad_norm": 1.7902966802183116, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3806, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.3593167432584757, |
|
"grad_norm": 1.9406550842391148, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4069, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.3600104049249978, |
|
"grad_norm": 2.032374189763469, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3893, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.36070406659152, |
|
"grad_norm": 2.0622944109637946, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4044, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3613977282580421, |
|
"grad_norm": 1.7694621919787776, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3781, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.3620913899245643, |
|
"grad_norm": 1.9876645276332312, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3658, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.3627850515910864, |
|
"grad_norm": 1.8286541300136883, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3954, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.3634787132576086, |
|
"grad_norm": 2.1598158167597785, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4233, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.36417237492413074, |
|
"grad_norm": 2.0125775815004308, |
|
"learning_rate": 1e-05, |
|
"loss": 1.425, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.3648660365906529, |
|
"grad_norm": 1.8220999673991007, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4689, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.36555969825717505, |
|
"grad_norm": 1.9067444062678136, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4184, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.36625335992369723, |
|
"grad_norm": 1.8244010981958079, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4307, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.36694702159021936, |
|
"grad_norm": 1.9491392457779768, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4046, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.36764068325674154, |
|
"grad_norm": 1.8922109612389026, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4312, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.36833434492326367, |
|
"grad_norm": 1.8416689512552131, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3785, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.36902800658978585, |
|
"grad_norm": 1.8783492413280818, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4109, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.369721668256308, |
|
"grad_norm": 1.849885383573315, |
|
"learning_rate": 1e-05, |
|
"loss": 1.378, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.37041532992283016, |
|
"grad_norm": 2.071142129836207, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3897, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.3711089915893523, |
|
"grad_norm": 1.955558194146127, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3781, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.37180265325587447, |
|
"grad_norm": 1.8539973789480575, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3616, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.3724963149223966, |
|
"grad_norm": 1.9837890849438713, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3713, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.3731899765889188, |
|
"grad_norm": 1.9070579057354713, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3479, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.3738836382554409, |
|
"grad_norm": 1.9425946819156692, |
|
"learning_rate": 1e-05, |
|
"loss": 1.376, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.37457729992196304, |
|
"grad_norm": 1.8771696216581655, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3827, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.3752709615884852, |
|
"grad_norm": 1.9186911492371799, |
|
"learning_rate": 1e-05, |
|
"loss": 1.376, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.37596462325500735, |
|
"grad_norm": 1.7294504716898789, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3706, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.37665828492152953, |
|
"grad_norm": 1.7712039048530457, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4438, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.37735194658805166, |
|
"grad_norm": 1.83847300072347, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4161, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.37804560825457384, |
|
"grad_norm": 1.825146012889819, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4118, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.37873926992109597, |
|
"grad_norm": 1.7989485574696917, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3689, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.37943293158761815, |
|
"grad_norm": 2.1484169058996816, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3477, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.3801265932541403, |
|
"grad_norm": 2.0781089621700777, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4372, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.38082025492066246, |
|
"grad_norm": 1.8367820389064522, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4556, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.3815139165871846, |
|
"grad_norm": 1.8748672667110173, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4059, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.38220757825370677, |
|
"grad_norm": 1.8956025199638242, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4102, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.3829012399202289, |
|
"grad_norm": 1.9601967006624061, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3879, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.3835949015867511, |
|
"grad_norm": 2.0406526524881707, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3885, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.3842885632532732, |
|
"grad_norm": 2.091444436822791, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3999, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.3849822249197954, |
|
"grad_norm": 1.7714596927341815, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3452, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.3856758865863175, |
|
"grad_norm": 1.825942444423705, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4142, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.3863695482528397, |
|
"grad_norm": 1.871025102921204, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3421, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.38706320991936183, |
|
"grad_norm": 1.9540951936439066, |
|
"learning_rate": 1e-05, |
|
"loss": 1.384, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.387756871585884, |
|
"grad_norm": 1.8147562505586048, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3794, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.38845053325240614, |
|
"grad_norm": 1.8138847080198641, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4251, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.38914419491892827, |
|
"grad_norm": 1.8462886417351692, |
|
"learning_rate": 1e-05, |
|
"loss": 1.398, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.38983785658545045, |
|
"grad_norm": 2.076297894089803, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3529, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.3905315182519726, |
|
"grad_norm": 1.8009871240759965, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3739, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.39122517991849476, |
|
"grad_norm": 1.872935136959733, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3842, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.3919188415850169, |
|
"grad_norm": 1.8957537468315788, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3633, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.39261250325153907, |
|
"grad_norm": 1.991998761167588, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3742, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.3933061649180612, |
|
"grad_norm": 1.7016217238658489, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4253, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.3939998265845834, |
|
"grad_norm": 1.8800648961699629, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4451, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.3946934882511055, |
|
"grad_norm": 2.0019042611698774, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3997, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.3953871499176277, |
|
"grad_norm": 2.215323946324701, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3577, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3960808115841498, |
|
"grad_norm": 2.0407346613187016, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3808, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.396774473250672, |
|
"grad_norm": 1.874235605467884, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3883, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.3974681349171941, |
|
"grad_norm": 1.9076714486170196, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3785, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.3981617965837163, |
|
"grad_norm": 1.9438453292084767, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3888, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.39885545825023844, |
|
"grad_norm": 1.9010804186629797, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4044, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.3995491199167606, |
|
"grad_norm": 2.1173409005527213, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4043, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.40024278158328275, |
|
"grad_norm": 1.924471913084561, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3636, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.40093644324980493, |
|
"grad_norm": 2.129867365540973, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4308, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.40163010491632706, |
|
"grad_norm": 1.9302914340159718, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3937, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.40232376658284924, |
|
"grad_norm": 1.978021194477141, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4076, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.40301742824937137, |
|
"grad_norm": 1.9801917793272694, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3756, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.40371108991589355, |
|
"grad_norm": 1.8510216673051632, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3643, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.4044047515824157, |
|
"grad_norm": 2.183109714099149, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4537, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.4050984132489378, |
|
"grad_norm": 2.349408814867446, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4192, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.40579207491546, |
|
"grad_norm": 2.2006000600981106, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4079, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.4064857365819821, |
|
"grad_norm": 1.8981822287744043, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4069, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.4071793982485043, |
|
"grad_norm": 1.760320500086242, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3738, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.4078730599150264, |
|
"grad_norm": 1.8312063491211514, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3887, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.4085667215815486, |
|
"grad_norm": 1.760204313519952, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3332, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.40926038324807074, |
|
"grad_norm": 1.7343834044934423, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3435, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.4099540449145929, |
|
"grad_norm": 2.2088224043427687, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4162, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.41064770658111505, |
|
"grad_norm": 1.8200743654853602, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3931, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.41134136824763723, |
|
"grad_norm": 1.8435586030279356, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4093, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.41203502991415936, |
|
"grad_norm": 1.916392749662975, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3894, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.41272869158068154, |
|
"grad_norm": 1.9161401196518564, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3726, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.41342235324720367, |
|
"grad_norm": 1.8756544580848145, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3697, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.41411601491372585, |
|
"grad_norm": 1.898787166818739, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3941, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.414809676580248, |
|
"grad_norm": 1.8044701136277606, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3999, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.41550333824677016, |
|
"grad_norm": 1.8310418953275842, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3379, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.4161969999132923, |
|
"grad_norm": 1.9227996432956476, |
|
"learning_rate": 1e-05, |
|
"loss": 1.362, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.41689066157981447, |
|
"grad_norm": 1.8826360241413953, |
|
"learning_rate": 1e-05, |
|
"loss": 1.413, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.4175843232463366, |
|
"grad_norm": 1.6984816355671049, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3703, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.4182779849128588, |
|
"grad_norm": 1.8568350133313958, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3909, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.4189716465793809, |
|
"grad_norm": 1.917415585499782, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3698, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.41966530824590303, |
|
"grad_norm": 1.8859603577321011, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4034, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.4203589699124252, |
|
"grad_norm": 1.8889156912270977, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4125, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.42105263157894735, |
|
"grad_norm": 1.911197022952282, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3655, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.42174629324546953, |
|
"grad_norm": 1.9782065265119402, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4201, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.42243995491199166, |
|
"grad_norm": 1.8747816354205493, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3664, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.42313361657851384, |
|
"grad_norm": 1.9012852889769853, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3259, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.42382727824503597, |
|
"grad_norm": 1.8979118524613148, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3971, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.42452093991155815, |
|
"grad_norm": 1.8520667588064368, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4135, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.4252146015780803, |
|
"grad_norm": 1.8342529385215973, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3863, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.42590826324460246, |
|
"grad_norm": 1.8295048253836184, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3087, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.4266019249111246, |
|
"grad_norm": 1.7695063050348687, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4162, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.42729558657764677, |
|
"grad_norm": 2.082777123696616, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4278, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.4279892482441689, |
|
"grad_norm": 1.7051067845957866, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4095, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.4286829099106911, |
|
"grad_norm": 1.8695270037035383, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4503, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.4293765715772132, |
|
"grad_norm": 1.9281893067578681, |
|
"learning_rate": 1e-05, |
|
"loss": 1.38, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.4300702332437354, |
|
"grad_norm": 1.777014355958319, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3984, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4307638949102575, |
|
"grad_norm": 1.842917760375874, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3853, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.4314575565767797, |
|
"grad_norm": 1.8267047634185034, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3524, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.4321512182433018, |
|
"grad_norm": 2.0773027372115394, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3901, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.432844879909824, |
|
"grad_norm": 1.8770974629057278, |
|
"learning_rate": 1e-05, |
|
"loss": 1.376, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.43353854157634614, |
|
"grad_norm": 1.8429345410006643, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3877, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.43423220324286826, |
|
"grad_norm": 1.7713210328211393, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3976, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.43492586490939045, |
|
"grad_norm": 1.8286568674310186, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3465, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.4356195265759126, |
|
"grad_norm": 1.8690248103614588, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3826, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.43631318824243476, |
|
"grad_norm": 2.025467675895459, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3671, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.4370068499089569, |
|
"grad_norm": 1.8903327597767532, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3548, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.43770051157547907, |
|
"grad_norm": 1.7825184036448585, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3978, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.4383941732420012, |
|
"grad_norm": 1.9191594921321666, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4214, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.4390878349085234, |
|
"grad_norm": 1.8710466935982961, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4685, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.4397814965750455, |
|
"grad_norm": 1.9252588373442325, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3529, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.4404751582415677, |
|
"grad_norm": 1.8307810199559515, |
|
"learning_rate": 1e-05, |
|
"loss": 1.379, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.4411688199080898, |
|
"grad_norm": 1.9458457674285412, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3734, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.441862481574612, |
|
"grad_norm": 1.8137288582058262, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3309, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.4425561432411341, |
|
"grad_norm": 1.972371773276373, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4303, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.4432498049076563, |
|
"grad_norm": 1.9163578588020878, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3191, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.44394346657417844, |
|
"grad_norm": 1.8861727336930172, |
|
"learning_rate": 1e-05, |
|
"loss": 1.374, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.4446371282407006, |
|
"grad_norm": 1.7493814291306995, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3992, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.44533078990722275, |
|
"grad_norm": 1.8289477836679608, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3472, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.44602445157374493, |
|
"grad_norm": 1.9323084827259125, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3966, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.44671811324026706, |
|
"grad_norm": 1.866327691734681, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3793, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.44741177490678924, |
|
"grad_norm": 1.8217937158784923, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4244, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.44810543657331137, |
|
"grad_norm": 1.8364372691697852, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3755, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.4487990982398335, |
|
"grad_norm": 2.013774748739019, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3807, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.4494927599063557, |
|
"grad_norm": 1.805702981754065, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3942, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.4501864215728778, |
|
"grad_norm": 1.9948282199322953, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4366, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.4508800832394, |
|
"grad_norm": 1.978073570947312, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4254, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4515737449059221, |
|
"grad_norm": 1.9232595674423625, |
|
"learning_rate": 1e-05, |
|
"loss": 1.404, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.4522674065724443, |
|
"grad_norm": 1.855166028756208, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3791, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.4529610682389664, |
|
"grad_norm": 1.7786637265688048, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3936, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.4536547299054886, |
|
"grad_norm": 1.6650125213330889, |
|
"learning_rate": 1e-05, |
|
"loss": 1.38, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.45434839157201073, |
|
"grad_norm": 1.9212024581799898, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3951, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.4550420532385329, |
|
"grad_norm": 1.855795867809702, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3563, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.45573571490505504, |
|
"grad_norm": 1.8769724131475898, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3974, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.4564293765715772, |
|
"grad_norm": 1.7926551468562113, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3974, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.45712303823809936, |
|
"grad_norm": 1.9178863763832097, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4146, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.45781669990462154, |
|
"grad_norm": 1.9353453542577745, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4187, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.45851036157114367, |
|
"grad_norm": 1.952403141808128, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4008, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.45920402323766585, |
|
"grad_norm": 1.757904596732749, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4264, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.459897684904188, |
|
"grad_norm": 1.9395774430498347, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3671, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.46059134657071016, |
|
"grad_norm": 1.8890203107447723, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3715, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.4612850082372323, |
|
"grad_norm": 1.8286704559781526, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3941, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.46197866990375447, |
|
"grad_norm": 1.810001367913328, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3883, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.4626723315702766, |
|
"grad_norm": 1.9152160013729407, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3813, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.4633659932367988, |
|
"grad_norm": 2.0094067199071546, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3176, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.4640596549033209, |
|
"grad_norm": 1.8734070756332013, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4233, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.46475331656984303, |
|
"grad_norm": 1.7612782018456719, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3438, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4654469782363652, |
|
"grad_norm": 1.8683597363973128, |
|
"learning_rate": 1e-05, |
|
"loss": 1.355, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.46614063990288734, |
|
"grad_norm": 2.0012042847679914, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4169, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.4668343015694095, |
|
"grad_norm": 1.9416276311234992, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3742, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.46752796323593165, |
|
"grad_norm": 1.8922567854833168, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3779, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.46822162490245384, |
|
"grad_norm": 1.7793620481094168, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3915, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.46891528656897596, |
|
"grad_norm": 2.167051856102186, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3989, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.46960894823549815, |
|
"grad_norm": 1.871191468625491, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3547, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.4703026099020203, |
|
"grad_norm": 1.7304516085856814, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3808, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.47099627156854246, |
|
"grad_norm": 1.745983949797038, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3425, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.4716899332350646, |
|
"grad_norm": 1.8779422010813842, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3592, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.47238359490158677, |
|
"grad_norm": 1.9934611197767778, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3538, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.4730772565681089, |
|
"grad_norm": 1.9623233741400323, |
|
"learning_rate": 1e-05, |
|
"loss": 1.346, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.4737709182346311, |
|
"grad_norm": 1.796679910714133, |
|
"learning_rate": 1e-05, |
|
"loss": 1.355, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.4744645799011532, |
|
"grad_norm": 1.7722903042578, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4071, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.4751582415676754, |
|
"grad_norm": 1.8052196535090335, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3814, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.4758519032341975, |
|
"grad_norm": 1.7948553541752041, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3729, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.4765455649007197, |
|
"grad_norm": 2.007593236556849, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3673, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.4772392265672418, |
|
"grad_norm": 1.9797683213286288, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3629, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.477932888233764, |
|
"grad_norm": 1.9548220267121557, |
|
"learning_rate": 1e-05, |
|
"loss": 1.383, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.47862654990028614, |
|
"grad_norm": 1.929009442664775, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3921, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.47932021156680826, |
|
"grad_norm": 1.8421018984754722, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3672, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.48001387323333045, |
|
"grad_norm": 1.8234045076467948, |
|
"learning_rate": 1e-05, |
|
"loss": 1.398, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.4807075348998526, |
|
"grad_norm": 2.066405568328085, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3348, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.48140119656637476, |
|
"grad_norm": 1.9963663982661528, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3662, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.4820948582328969, |
|
"grad_norm": 1.883319503377311, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3835, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.48278851989941907, |
|
"grad_norm": 1.56186845290527, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3386, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.4834821815659412, |
|
"grad_norm": 1.8687876803046082, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3501, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.4841758432324634, |
|
"grad_norm": 2.0432055137936036, |
|
"learning_rate": 1e-05, |
|
"loss": 1.366, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.4848695048989855, |
|
"grad_norm": 1.7989863129663937, |
|
"learning_rate": 1e-05, |
|
"loss": 1.387, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.4855631665655077, |
|
"grad_norm": 1.9153016999718706, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4391, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4862568282320298, |
|
"grad_norm": 1.8256041578604432, |
|
"learning_rate": 1e-05, |
|
"loss": 1.385, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.486950489898552, |
|
"grad_norm": 1.8391212949021127, |
|
"learning_rate": 1e-05, |
|
"loss": 1.388, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.4876441515650741, |
|
"grad_norm": 1.9978982769762854, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3841, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.4883378132315963, |
|
"grad_norm": 1.9247875553761613, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3514, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.48903147489811843, |
|
"grad_norm": 1.8114868285875125, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3659, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.4897251365646406, |
|
"grad_norm": 1.8506801571462923, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4349, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.49041879823116274, |
|
"grad_norm": 1.8480926190821445, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3768, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.4911124598976849, |
|
"grad_norm": 1.9547558985555982, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3767, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.49180612156420705, |
|
"grad_norm": 1.7241605811559706, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4277, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.49249978323072924, |
|
"grad_norm": 1.9597827919247104, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2981, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.49319344489725137, |
|
"grad_norm": 2.241085697740784, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3738, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.4938871065637735, |
|
"grad_norm": 2.1107944914928023, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3788, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.4945807682302957, |
|
"grad_norm": 1.9819856716802244, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3805, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.4952744298968178, |
|
"grad_norm": 1.7818814238988587, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3864, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.49596809156334, |
|
"grad_norm": 1.8028425771808247, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4047, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.4966617532298621, |
|
"grad_norm": 1.810766896499592, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3899, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.4973554148963843, |
|
"grad_norm": 1.8166160127003885, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3603, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.4980490765629064, |
|
"grad_norm": 2.13361803754948, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4022, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.4987427382294286, |
|
"grad_norm": 2.016997065796454, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4034, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.49943639989595073, |
|
"grad_norm": 2.0748032742705793, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4105, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5001300615624729, |
|
"grad_norm": 1.8523948307863285, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3697, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.500823723228995, |
|
"grad_norm": 1.8122879610484914, |
|
"learning_rate": 1e-05, |
|
"loss": 1.404, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.5015173848955172, |
|
"grad_norm": 1.8832267152617044, |
|
"learning_rate": 1e-05, |
|
"loss": 1.412, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.5022110465620394, |
|
"grad_norm": 1.8257618111117269, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4016, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.5029047082285615, |
|
"grad_norm": 1.7942051170674276, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3686, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5035983698950837, |
|
"grad_norm": 1.97042982772308, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3575, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.5042920315616058, |
|
"grad_norm": 2.053740555710897, |
|
"learning_rate": 1e-05, |
|
"loss": 1.379, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.504985693228128, |
|
"grad_norm": 1.977833812394873, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3174, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.5056793548946501, |
|
"grad_norm": 1.9322288272504544, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4038, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.5063730165611723, |
|
"grad_norm": 2.0400420303295275, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3953, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5070666782276945, |
|
"grad_norm": 1.976799409278783, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3895, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.5077603398942167, |
|
"grad_norm": 1.9306400432028785, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4228, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.5084540015607387, |
|
"grad_norm": 1.7347175465813025, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3704, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.5091476632272609, |
|
"grad_norm": 1.7180782433090167, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3547, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.5098413248937831, |
|
"grad_norm": 1.852662073693016, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4228, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5105349865603052, |
|
"grad_norm": 1.8075356657945834, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3887, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.5112286482268273, |
|
"grad_norm": 1.7367287190868546, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3752, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.5119223098933495, |
|
"grad_norm": 1.8286379353529887, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3963, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.5126159715598717, |
|
"grad_norm": 1.7825873791247597, |
|
"learning_rate": 1e-05, |
|
"loss": 1.397, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.5133096332263938, |
|
"grad_norm": 2.0430860649068396, |
|
"learning_rate": 1e-05, |
|
"loss": 1.368, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.514003294892916, |
|
"grad_norm": 1.82138787437088, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3888, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.5146969565594381, |
|
"grad_norm": 1.9949710877946953, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3221, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.5153906182259603, |
|
"grad_norm": 1.7999499915500219, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3214, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.5160842798924824, |
|
"grad_norm": 1.944101461453592, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3641, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.5167779415590046, |
|
"grad_norm": 1.9484142602052452, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4183, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.5174716032255268, |
|
"grad_norm": 1.9261361736136446, |
|
"learning_rate": 1e-05, |
|
"loss": 1.39, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.518165264892049, |
|
"grad_norm": 1.873564918154059, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3511, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.518858926558571, |
|
"grad_norm": 1.975733457802349, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3786, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.5195525882250932, |
|
"grad_norm": 1.9396312909048352, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3161, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.5202462498916154, |
|
"grad_norm": 1.736368973000078, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3704, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5209399115581376, |
|
"grad_norm": 1.7911382730465684, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3379, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.5216335732246596, |
|
"grad_norm": 1.8590253300206483, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3531, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.5223272348911818, |
|
"grad_norm": 1.9765690927121422, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3728, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.523020896557704, |
|
"grad_norm": 2.0025745088354148, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3939, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.5237145582242261, |
|
"grad_norm": 1.9192921819712603, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3914, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.5244082198907483, |
|
"grad_norm": 1.8600023632893699, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3499, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.5251018815572704, |
|
"grad_norm": 1.9355189855324415, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3471, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.5257955432237926, |
|
"grad_norm": 2.0191814963451495, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4244, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.5264892048903147, |
|
"grad_norm": 1.883726645686763, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3619, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.5271828665568369, |
|
"grad_norm": 1.7947607621987598, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3404, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5278765282233591, |
|
"grad_norm": 1.8889256123493143, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4012, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.5285701898898812, |
|
"grad_norm": 1.9230513498083945, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3475, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.5292638515564033, |
|
"grad_norm": 1.8067019812009846, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3911, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.5299575132229255, |
|
"grad_norm": 1.8380178891570427, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3614, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.5306511748894477, |
|
"grad_norm": 1.8235814437026021, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3556, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.5313448365559699, |
|
"grad_norm": 1.830617817421823, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3844, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.5320384982224919, |
|
"grad_norm": 1.9168498268351752, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3669, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.5327321598890141, |
|
"grad_norm": 1.945357782268639, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3595, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.5334258215555363, |
|
"grad_norm": 2.237537090117557, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3683, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.5341194832220585, |
|
"grad_norm": 1.8612240335487669, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3937, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5348131448885806, |
|
"grad_norm": 2.0804814565765266, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3706, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.5355068065551027, |
|
"grad_norm": 1.9951415692701584, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3696, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.5362004682216249, |
|
"grad_norm": 1.7248104660180126, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3747, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.5368941298881471, |
|
"grad_norm": 1.7711620914058126, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3776, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.5375877915546692, |
|
"grad_norm": 1.7502844737194139, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3719, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5382814532211914, |
|
"grad_norm": 1.975837716180595, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3392, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.5389751148877135, |
|
"grad_norm": 2.079903617254406, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3789, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.5396687765542356, |
|
"grad_norm": 1.8776752694265728, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3968, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.5403624382207578, |
|
"grad_norm": 1.7524287384136423, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3538, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.54105609988728, |
|
"grad_norm": 2.034871602096054, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3546, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5417497615538022, |
|
"grad_norm": 2.0262231787193117, |
|
"learning_rate": 1e-05, |
|
"loss": 1.351, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.5424434232203242, |
|
"grad_norm": 1.8800790132697096, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4063, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.5431370848868464, |
|
"grad_norm": 1.8397602468821888, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3284, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.5438307465533686, |
|
"grad_norm": 1.7812165456068076, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3116, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.5445244082198908, |
|
"grad_norm": 1.7794765249417972, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3796, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.5452180698864129, |
|
"grad_norm": 1.9715381587594996, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3771, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.545911731552935, |
|
"grad_norm": 1.7931337412184942, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4108, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.5466053932194572, |
|
"grad_norm": 1.8048639309386612, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3689, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.5472990548859794, |
|
"grad_norm": 1.8622884245084281, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3513, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.5479927165525015, |
|
"grad_norm": 1.816207257972127, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3331, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5486863782190237, |
|
"grad_norm": 1.7508374499546198, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3594, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.5493800398855458, |
|
"grad_norm": 1.7086134745690817, |
|
"learning_rate": 1e-05, |
|
"loss": 1.343, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.550073701552068, |
|
"grad_norm": 1.8826776634822224, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4312, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.5507673632185901, |
|
"grad_norm": 1.85161413953065, |
|
"learning_rate": 1e-05, |
|
"loss": 1.403, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.5514610248851123, |
|
"grad_norm": 2.169026140246512, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3514, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.5521546865516345, |
|
"grad_norm": 1.8974393651417674, |
|
"learning_rate": 1e-05, |
|
"loss": 1.376, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.5528483482181566, |
|
"grad_norm": 1.7818922230701455, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3365, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.5535420098846787, |
|
"grad_norm": 1.8268621071764144, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3267, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.5542356715512009, |
|
"grad_norm": 1.741645703166344, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3401, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.5549293332177231, |
|
"grad_norm": 1.7684014664169725, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3453, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5556229948842452, |
|
"grad_norm": 1.872082247187476, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3849, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.5563166565507673, |
|
"grad_norm": 1.843965544279036, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3515, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.5570103182172895, |
|
"grad_norm": 1.7503097471047913, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3772, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.5577039798838117, |
|
"grad_norm": 2.152027612117084, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3369, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.5583976415503338, |
|
"grad_norm": 1.9678830918650134, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3363, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.559091303216856, |
|
"grad_norm": 1.7813746189463364, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3268, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.5597849648833781, |
|
"grad_norm": 1.835262281512248, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3155, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.5604786265499003, |
|
"grad_norm": 1.778985993964162, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3913, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.5611722882164224, |
|
"grad_norm": 2.07464881345254, |
|
"learning_rate": 1e-05, |
|
"loss": 1.35, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.5618659498829446, |
|
"grad_norm": 2.186610515953725, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3954, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.5625596115494668, |
|
"grad_norm": 2.129937538785471, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3644, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.563253273215989, |
|
"grad_norm": 2.113811860287663, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3397, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.563946934882511, |
|
"grad_norm": 2.0550884583810487, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3316, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.5646405965490332, |
|
"grad_norm": 1.85558729084722, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3586, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.5653342582155554, |
|
"grad_norm": 1.7940205357205186, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3328, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.5660279198820776, |
|
"grad_norm": 1.8867105415827423, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4099, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.5667215815485996, |
|
"grad_norm": 1.9561860862534044, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4491, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.5674152432151218, |
|
"grad_norm": 2.0876714983856313, |
|
"learning_rate": 1e-05, |
|
"loss": 1.364, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.568108904881644, |
|
"grad_norm": 1.99399680820796, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3033, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.5688025665481661, |
|
"grad_norm": 1.976493240480479, |
|
"learning_rate": 1e-05, |
|
"loss": 1.333, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5694962282146883, |
|
"grad_norm": 1.95662043100926, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3136, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.5701898898812104, |
|
"grad_norm": 1.6497733053601713, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3861, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.5708835515477326, |
|
"grad_norm": 1.818935374314111, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3857, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.5715772132142547, |
|
"grad_norm": 1.747967079872631, |
|
"learning_rate": 1e-05, |
|
"loss": 1.349, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.5722708748807769, |
|
"grad_norm": 1.8908128266598878, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3616, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.5729645365472991, |
|
"grad_norm": 1.8720455934356435, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3238, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.5736581982138212, |
|
"grad_norm": 1.7833794579975666, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3956, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.5743518598803433, |
|
"grad_norm": 2.1032994082238203, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3575, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.5750455215468655, |
|
"grad_norm": 2.0134774144141487, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3464, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.5757391832133877, |
|
"grad_norm": 1.8711603789528202, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4115, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.5764328448799099, |
|
"grad_norm": 1.7819151783946192, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3615, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.5771265065464319, |
|
"grad_norm": 1.9690584032628007, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3901, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.5778201682129541, |
|
"grad_norm": 1.9642954115887026, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3518, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.5785138298794763, |
|
"grad_norm": 1.744497422769626, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3469, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.5792074915459985, |
|
"grad_norm": 2.0755337336634767, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3823, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.5799011532125206, |
|
"grad_norm": 1.8340533083393944, |
|
"learning_rate": 1e-05, |
|
"loss": 1.374, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.5805948148790427, |
|
"grad_norm": 1.7560881859736863, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4051, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.5812884765455649, |
|
"grad_norm": 1.7470644935895128, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3751, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.5819821382120871, |
|
"grad_norm": 1.6498957999043187, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3425, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.5826757998786092, |
|
"grad_norm": 1.708284581084529, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3772, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5833694615451314, |
|
"grad_norm": 1.7341358375022322, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4271, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.5840631232116535, |
|
"grad_norm": 1.96090088265045, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3811, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.5847567848781756, |
|
"grad_norm": 1.8309156039915615, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3518, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.5854504465446978, |
|
"grad_norm": 1.766087750027872, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2791, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.58614410821122, |
|
"grad_norm": 1.840859949308765, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3364, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.5868377698777422, |
|
"grad_norm": 1.8811612648079954, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3866, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.5875314315442642, |
|
"grad_norm": 1.7882571243218808, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3818, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.5882250932107864, |
|
"grad_norm": 1.7751021276239416, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3877, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.5889187548773086, |
|
"grad_norm": 1.863598117479808, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3283, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.5896124165438308, |
|
"grad_norm": 1.7934939156618281, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3909, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5903060782103529, |
|
"grad_norm": 1.8701501570430885, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3911, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.590999739876875, |
|
"grad_norm": 1.8075702135316054, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3267, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.5916934015433972, |
|
"grad_norm": 1.7844322891888966, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3749, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.5923870632099194, |
|
"grad_norm": 1.6364137503955962, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3903, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.5930807248764415, |
|
"grad_norm": 1.845018525439836, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3638, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.5937743865429637, |
|
"grad_norm": 1.6438686492795926, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3356, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.5944680482094858, |
|
"grad_norm": 1.8461296262824984, |
|
"learning_rate": 1e-05, |
|
"loss": 1.318, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.595161709876008, |
|
"grad_norm": 1.7523581116541502, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3278, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.5958553715425301, |
|
"grad_norm": 1.7821675206089143, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3257, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.5965490332090523, |
|
"grad_norm": 1.8952855386403753, |
|
"learning_rate": 1e-05, |
|
"loss": 1.352, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.5972426948755745, |
|
"grad_norm": 1.9061495251459173, |
|
"learning_rate": 1e-05, |
|
"loss": 1.361, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.5979363565420966, |
|
"grad_norm": 1.807002944323855, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3498, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.5986300182086187, |
|
"grad_norm": 1.8427398989259318, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3442, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.5993236798751409, |
|
"grad_norm": 2.159582705739885, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4137, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.6000173415416631, |
|
"grad_norm": 1.8186698531077894, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3743, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.6007110032081852, |
|
"grad_norm": 1.7754924011854336, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3758, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.6014046648747073, |
|
"grad_norm": 1.7146434615480912, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3655, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.6020983265412295, |
|
"grad_norm": 1.6864187498892416, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3842, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.6027919882077517, |
|
"grad_norm": 1.7625940182372917, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3563, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.6034856498742738, |
|
"grad_norm": 1.8385401005573274, |
|
"learning_rate": 1e-05, |
|
"loss": 1.317, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.604179311540796, |
|
"grad_norm": 1.8249865396470937, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3666, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.6048729732073181, |
|
"grad_norm": 1.774024094473835, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3677, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.6055666348738403, |
|
"grad_norm": 1.9300061255791403, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3668, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.6062602965403624, |
|
"grad_norm": 1.8538055069719046, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3639, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.6069539582068846, |
|
"grad_norm": 1.8234801513872547, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3183, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.6076476198734068, |
|
"grad_norm": 1.8887104952070137, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3798, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.608341281539929, |
|
"grad_norm": 1.967685873156897, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3733, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.609034943206451, |
|
"grad_norm": 1.8930452243619467, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3627, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.6097286048729732, |
|
"grad_norm": 1.9917808148339706, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3637, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.6104222665394954, |
|
"grad_norm": 1.7953716355637714, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3552, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6111159282060176, |
|
"grad_norm": 2.054090380165633, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3309, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.6118095898725396, |
|
"grad_norm": 1.774125479388038, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3346, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.6125032515390618, |
|
"grad_norm": 1.788110427730304, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3907, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.613196913205584, |
|
"grad_norm": 1.9117105836931287, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3662, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.6138905748721061, |
|
"grad_norm": 1.9761438291924842, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3487, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.6145842365386283, |
|
"grad_norm": 1.7939497044726074, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3617, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.6152778982051504, |
|
"grad_norm": 1.8137308254099254, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4017, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.6159715598716726, |
|
"grad_norm": 1.9358335601206476, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3221, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.6166652215381947, |
|
"grad_norm": 1.8023857684374447, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3645, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.6173588832047169, |
|
"grad_norm": 1.7858631263519855, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3915, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6180525448712391, |
|
"grad_norm": 1.8447027974665438, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3331, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.6187462065377612, |
|
"grad_norm": 1.8973843442097094, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3053, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.6194398682042833, |
|
"grad_norm": 1.7623453790808643, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2926, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.6201335298708055, |
|
"grad_norm": 1.8017523463560905, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3647, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.6208271915373277, |
|
"grad_norm": 1.7450544611778176, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3658, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.6215208532038499, |
|
"grad_norm": 1.823024100294463, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3508, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.6222145148703719, |
|
"grad_norm": 1.8524166096635302, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3446, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.6229081765368941, |
|
"grad_norm": 1.7203042262118677, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3838, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.6236018382034163, |
|
"grad_norm": 1.7896042667571013, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3401, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.6242954998699385, |
|
"grad_norm": 1.8566869792112495, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3856, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6249891615364606, |
|
"grad_norm": 1.7236631672011284, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3455, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.6256828232029827, |
|
"grad_norm": 1.9857281332079058, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3347, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.6263764848695049, |
|
"grad_norm": 1.916490049064551, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2964, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.6270701465360271, |
|
"grad_norm": 1.9713572657543152, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3302, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.6277638082025492, |
|
"grad_norm": 1.7610441348646735, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3344, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.6284574698690714, |
|
"grad_norm": 1.799617843727853, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3284, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.6291511315355935, |
|
"grad_norm": 1.7832143262655586, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3356, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.6298447932021156, |
|
"grad_norm": 1.7971828284576865, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3207, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.6305384548686378, |
|
"grad_norm": 1.7536423641131738, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3499, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.63123211653516, |
|
"grad_norm": 1.7401477745871217, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3398, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6319257782016822, |
|
"grad_norm": 1.7218362910164169, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4193, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.6326194398682042, |
|
"grad_norm": 1.9808528889101304, |
|
"learning_rate": 1e-05, |
|
"loss": 1.392, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.6333131015347264, |
|
"grad_norm": 1.9064408882431807, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3615, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.6340067632012486, |
|
"grad_norm": 1.7248366549906144, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3231, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.6347004248677708, |
|
"grad_norm": 1.795395048611617, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3851, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.6353940865342929, |
|
"grad_norm": 1.7344884424253888, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3528, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.636087748200815, |
|
"grad_norm": 1.700061571947052, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3348, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.6367814098673372, |
|
"grad_norm": 1.8646566885856952, |
|
"learning_rate": 1e-05, |
|
"loss": 1.357, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.6374750715338594, |
|
"grad_norm": 1.8652265942281396, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3343, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.6381687332003815, |
|
"grad_norm": 1.7925656802981118, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3308, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6388623948669037, |
|
"grad_norm": 1.7896253293176538, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3813, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.6395560565334258, |
|
"grad_norm": 1.6534489942501098, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3233, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.640249718199948, |
|
"grad_norm": 1.8033026812852484, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3074, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.6409433798664701, |
|
"grad_norm": 1.822451647210804, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3217, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.6416370415329923, |
|
"grad_norm": 1.7821886850533442, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3531, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.6423307031995145, |
|
"grad_norm": 1.8446137766083273, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3552, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.6430243648660365, |
|
"grad_norm": 1.7365041612964318, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3472, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.6437180265325587, |
|
"grad_norm": 1.9116647427342783, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3356, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.6444116881990809, |
|
"grad_norm": 1.6818488752250975, |
|
"learning_rate": 1e-05, |
|
"loss": 1.35, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.6451053498656031, |
|
"grad_norm": 1.8566305164008303, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3643, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6457990115321252, |
|
"grad_norm": 1.7420953544687154, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3148, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.6464926731986473, |
|
"grad_norm": 1.885744413844102, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3492, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.6471863348651695, |
|
"grad_norm": 1.7944270298154161, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3513, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.6478799965316917, |
|
"grad_norm": 1.7535218523742484, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4101, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.6485736581982138, |
|
"grad_norm": 1.7229852034518358, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3437, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.649267319864736, |
|
"grad_norm": 1.896304422647214, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3156, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.6499609815312581, |
|
"grad_norm": 1.8055244846850502, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3691, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.6506546431977803, |
|
"grad_norm": 1.684941036557295, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3579, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.6513483048643024, |
|
"grad_norm": 1.8888571415510795, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3632, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.6520419665308246, |
|
"grad_norm": 1.8160274262290288, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3099, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6527356281973468, |
|
"grad_norm": 1.755049632438486, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3519, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.653429289863869, |
|
"grad_norm": 1.89712944315266, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3494, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.654122951530391, |
|
"grad_norm": 1.765188366032801, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3856, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.6548166131969132, |
|
"grad_norm": 1.9497117765562002, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3665, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.6555102748634354, |
|
"grad_norm": 1.799101172711031, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3445, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.6562039365299576, |
|
"grad_norm": 1.712616408878392, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3491, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.6568975981964796, |
|
"grad_norm": 1.7946208261432808, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3731, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.6575912598630018, |
|
"grad_norm": 1.7262699314904466, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3585, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.658284921529524, |
|
"grad_norm": 1.9628450564778277, |
|
"learning_rate": 1e-05, |
|
"loss": 1.301, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.6589785831960461, |
|
"grad_norm": 1.8061202922829884, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3783, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6596722448625683, |
|
"grad_norm": 1.7121738551781767, |
|
"learning_rate": 1e-05, |
|
"loss": 1.357, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.6603659065290904, |
|
"grad_norm": 1.8850562141039617, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2819, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.6610595681956126, |
|
"grad_norm": 1.867193802881424, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3156, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.6617532298621347, |
|
"grad_norm": 1.7443179431377005, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2751, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.6624468915286569, |
|
"grad_norm": 1.8733131607506688, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3534, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.6631405531951791, |
|
"grad_norm": 1.9784306105729255, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2742, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.6638342148617012, |
|
"grad_norm": 1.8959702823237385, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4094, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.6645278765282233, |
|
"grad_norm": 1.664080974193892, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3658, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.6652215381947455, |
|
"grad_norm": 1.782008443874851, |
|
"learning_rate": 1e-05, |
|
"loss": 1.347, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.6659151998612677, |
|
"grad_norm": 1.8460350587229146, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3639, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6666088615277899, |
|
"grad_norm": 1.6425972064330443, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3693, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.6673025231943119, |
|
"grad_norm": 1.8565662897573758, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3636, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.6679961848608341, |
|
"grad_norm": 1.7302118661778385, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3566, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.6686898465273563, |
|
"grad_norm": 1.9616014623863918, |
|
"learning_rate": 1e-05, |
|
"loss": 1.374, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.6693835081938785, |
|
"grad_norm": 1.84734239559959, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2604, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.6700771698604006, |
|
"grad_norm": 1.6679061078794732, |
|
"learning_rate": 1e-05, |
|
"loss": 1.344, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.6707708315269227, |
|
"grad_norm": 1.9249982922421873, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3773, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.6714644931934449, |
|
"grad_norm": 1.9130662887594385, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3389, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.6721581548599671, |
|
"grad_norm": 2.0490408445129553, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3271, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.6728518165264892, |
|
"grad_norm": 1.7966765927847634, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3082, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.6735454781930114, |
|
"grad_norm": 1.8365578777762348, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3813, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.6742391398595335, |
|
"grad_norm": 1.7856135676874048, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3791, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.6749328015260556, |
|
"grad_norm": 1.7189842050358264, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3859, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.6756264631925778, |
|
"grad_norm": 1.697736604178591, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3531, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.6763201248591, |
|
"grad_norm": 1.85446433118358, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3377, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.6770137865256222, |
|
"grad_norm": 1.817734345502182, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3316, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.6777074481921442, |
|
"grad_norm": 1.6362921639652548, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2879, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.6784011098586664, |
|
"grad_norm": 1.7236748146694982, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3023, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.6790947715251886, |
|
"grad_norm": 1.874808634354032, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3291, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.6797884331917108, |
|
"grad_norm": 2.143605571091092, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3305, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.6804820948582329, |
|
"grad_norm": 1.7140845838212821, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2762, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.681175756524755, |
|
"grad_norm": 1.7627892247793258, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3793, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.6818694181912772, |
|
"grad_norm": 1.9327806260640557, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3314, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.6825630798577994, |
|
"grad_norm": 1.9570006600139125, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3092, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.6832567415243215, |
|
"grad_norm": 1.857815246395867, |
|
"learning_rate": 1e-05, |
|
"loss": 1.328, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.6839504031908437, |
|
"grad_norm": 1.6521408115836034, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3649, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.6846440648573658, |
|
"grad_norm": 1.6883217532733774, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3742, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.685337726523888, |
|
"grad_norm": 1.7657705434647315, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4009, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.6860313881904101, |
|
"grad_norm": 1.8388470770976078, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3385, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.6867250498569323, |
|
"grad_norm": 1.6446394362553027, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3747, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.6874187115234545, |
|
"grad_norm": 1.76332053954708, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3744, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.6881123731899765, |
|
"grad_norm": 1.8551504155963352, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3461, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.6888060348564987, |
|
"grad_norm": 1.8242720423216203, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3949, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.6894996965230209, |
|
"grad_norm": 1.8728688560334699, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4202, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.6901933581895431, |
|
"grad_norm": 1.8128080132317514, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3528, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.6908870198560652, |
|
"grad_norm": 1.7706897683233593, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3772, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.6915806815225873, |
|
"grad_norm": 1.867842253838222, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3735, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.6922743431891095, |
|
"grad_norm": 1.908519372133083, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3093, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.6929680048556317, |
|
"grad_norm": 1.7680738442803956, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3205, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.6936616665221538, |
|
"grad_norm": 1.752432098173259, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3451, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.694355328188676, |
|
"grad_norm": 1.7866959543838525, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3288, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.6950489898551981, |
|
"grad_norm": 1.6320555911640122, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3157, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.6957426515217203, |
|
"grad_norm": 1.7766379486245896, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3559, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.6964363131882424, |
|
"grad_norm": 1.7801227267982318, |
|
"learning_rate": 1e-05, |
|
"loss": 1.389, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.6971299748547646, |
|
"grad_norm": 1.7763939485733111, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4214, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.6978236365212868, |
|
"grad_norm": 1.7466154961438336, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3974, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.6985172981878089, |
|
"grad_norm": 1.8018054751465553, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3146, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.699210959854331, |
|
"grad_norm": 1.7015362135443022, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3521, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.6999046215208532, |
|
"grad_norm": 1.8044732359887248, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3298, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.7005982831873754, |
|
"grad_norm": 1.7248926110752036, |
|
"learning_rate": 1e-05, |
|
"loss": 1.311, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7012919448538976, |
|
"grad_norm": 1.7408526444267358, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3346, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.7019856065204196, |
|
"grad_norm": 1.756432069962424, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3608, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.7026792681869418, |
|
"grad_norm": 1.9004070384049725, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3061, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.703372929853464, |
|
"grad_norm": 1.7674772646104595, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3365, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.7040665915199861, |
|
"grad_norm": 1.815286596926447, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3112, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.7047602531865083, |
|
"grad_norm": 1.9200058514873535, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3702, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.7054539148530304, |
|
"grad_norm": 1.7499367861528972, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3707, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.7061475765195526, |
|
"grad_norm": 1.925251587075512, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3208, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.7068412381860747, |
|
"grad_norm": 1.7154198796482498, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3336, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.7075348998525969, |
|
"grad_norm": 1.837360393002266, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3328, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7082285615191191, |
|
"grad_norm": 1.6211349139215232, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3284, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.7089222231856412, |
|
"grad_norm": 1.866016563395064, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3198, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.7096158848521633, |
|
"grad_norm": 1.6839566806665383, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3651, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.7103095465186855, |
|
"grad_norm": 1.7159632620855965, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3268, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.7110032081852077, |
|
"grad_norm": 1.806422188485046, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2901, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.7116968698517299, |
|
"grad_norm": 1.7329697047767731, |
|
"learning_rate": 1e-05, |
|
"loss": 1.278, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.7123905315182519, |
|
"grad_norm": 1.7358387057504157, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3959, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.7130841931847741, |
|
"grad_norm": 1.7843805164975317, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3158, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.7137778548512963, |
|
"grad_norm": 1.8034487032951743, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3239, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.7144715165178185, |
|
"grad_norm": 1.7806257674138806, |
|
"learning_rate": 1e-05, |
|
"loss": 1.348, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7151651781843406, |
|
"grad_norm": 1.8562808148693768, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3675, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.7158588398508627, |
|
"grad_norm": 1.9354803733254098, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3331, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.7165525015173849, |
|
"grad_norm": 1.7581325487197559, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3468, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.7172461631839071, |
|
"grad_norm": 1.802989571777425, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3429, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.7179398248504292, |
|
"grad_norm": 1.7427548981299272, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3555, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.7186334865169514, |
|
"grad_norm": 1.9113368723599877, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3458, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.7193271481834735, |
|
"grad_norm": 1.746074482528233, |
|
"learning_rate": 1e-05, |
|
"loss": 1.256, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.7200208098499956, |
|
"grad_norm": 1.8354380987152568, |
|
"learning_rate": 1e-05, |
|
"loss": 1.336, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.7207144715165178, |
|
"grad_norm": 1.945273024066637, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3985, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.72140813318304, |
|
"grad_norm": 1.843452934712379, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3193, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7221017948495622, |
|
"grad_norm": 1.9504440192269308, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3296, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.7227954565160842, |
|
"grad_norm": 1.8706540071171702, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3403, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.7234891181826064, |
|
"grad_norm": 1.9488283001981537, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3245, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.7241827798491286, |
|
"grad_norm": 1.6969025009984984, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3041, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.7248764415156508, |
|
"grad_norm": 2.0072497667921443, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3447, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.7255701031821729, |
|
"grad_norm": 1.8741053070227567, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3814, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.726263764848695, |
|
"grad_norm": 1.8874635982516978, |
|
"learning_rate": 1e-05, |
|
"loss": 1.328, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.7269574265152172, |
|
"grad_norm": 1.8011724514294678, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3165, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.7276510881817394, |
|
"grad_norm": 1.7615037862154284, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3405, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.7283447498482615, |
|
"grad_norm": 1.8936590686337285, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3304, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7290384115147837, |
|
"grad_norm": 1.7561903582794731, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3135, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.7297320731813058, |
|
"grad_norm": 1.9910837825376402, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3328, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.730425734847828, |
|
"grad_norm": 1.7932757018881984, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3244, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.7311193965143501, |
|
"grad_norm": 1.8115170033234536, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3256, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.7318130581808723, |
|
"grad_norm": 1.846025824397134, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2897, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.7325067198473945, |
|
"grad_norm": 1.8145673138145253, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3333, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.7332003815139165, |
|
"grad_norm": 1.6430330564527496, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3569, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.7338940431804387, |
|
"grad_norm": 1.7140027949989802, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3096, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.7345877048469609, |
|
"grad_norm": 1.9676800557132401, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3327, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.7352813665134831, |
|
"grad_norm": 1.76220089185509, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3837, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.7359750281800052, |
|
"grad_norm": 1.8650314152543974, |
|
"learning_rate": 1e-05, |
|
"loss": 1.31, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.7366686898465273, |
|
"grad_norm": 2.1105903583633054, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3189, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.7373623515130495, |
|
"grad_norm": 1.9330298633028662, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3116, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.7380560131795717, |
|
"grad_norm": 1.8697841477985335, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3059, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.7387496748460938, |
|
"grad_norm": 1.8167855532087722, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3725, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.739443336512616, |
|
"grad_norm": 1.9237163410520044, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3216, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.7401369981791381, |
|
"grad_norm": 1.6945312309384704, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3205, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.7408306598456603, |
|
"grad_norm": 1.8403636035002944, |
|
"learning_rate": 1e-05, |
|
"loss": 1.323, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.7415243215121824, |
|
"grad_norm": 1.8463721460630584, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3215, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.7422179831787046, |
|
"grad_norm": 1.9058089022783489, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3117, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.7429116448452268, |
|
"grad_norm": 2.1180556320179935, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3762, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.7436053065117489, |
|
"grad_norm": 1.8334895855944495, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3443, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.744298968178271, |
|
"grad_norm": 2.1232912269113573, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3149, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.7449926298447932, |
|
"grad_norm": 1.933335743238272, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3036, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.7456862915113154, |
|
"grad_norm": 1.7161884340751359, |
|
"learning_rate": 1e-05, |
|
"loss": 1.337, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.7463799531778376, |
|
"grad_norm": 1.8821904641272937, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3484, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.7470736148443596, |
|
"grad_norm": 1.8420209136091366, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3255, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.7477672765108818, |
|
"grad_norm": 1.6844962997358441, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3763, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.748460938177404, |
|
"grad_norm": 1.6853533211643357, |
|
"learning_rate": 1e-05, |
|
"loss": 1.334, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.7491545998439261, |
|
"grad_norm": 1.7019512963652867, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2932, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7498482615104483, |
|
"grad_norm": 1.811962898514401, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3587, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.7505419231769704, |
|
"grad_norm": 1.7755093149292678, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2892, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.7512355848434926, |
|
"grad_norm": 1.8629164456603942, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3037, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.7519292465100147, |
|
"grad_norm": 1.7886710463953541, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2803, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.7526229081765369, |
|
"grad_norm": 1.695860963172146, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4153, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.7533165698430591, |
|
"grad_norm": 1.9667102987846012, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3436, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.7540102315095812, |
|
"grad_norm": 1.7749867238254802, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3543, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.7547038931761033, |
|
"grad_norm": 1.8601842356859748, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3261, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.7553975548426255, |
|
"grad_norm": 1.7933714883604228, |
|
"learning_rate": 1e-05, |
|
"loss": 1.375, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.7560912165091477, |
|
"grad_norm": 1.9108510451623624, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3306, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7567848781756699, |
|
"grad_norm": 1.7031275623765503, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3776, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.7574785398421919, |
|
"grad_norm": 1.723756241244783, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3069, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.7581722015087141, |
|
"grad_norm": 1.790196818419589, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2887, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.7588658631752363, |
|
"grad_norm": 1.6853767916962972, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3328, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.7595595248417585, |
|
"grad_norm": 1.8283907475641092, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3394, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.7602531865082806, |
|
"grad_norm": 1.7686661299844872, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3213, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.7609468481748027, |
|
"grad_norm": 2.0255015885014678, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3553, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.7616405098413249, |
|
"grad_norm": 1.887116897314805, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2652, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.7623341715078471, |
|
"grad_norm": 2.0827192956718816, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2931, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.7630278331743692, |
|
"grad_norm": 1.6696475938133672, |
|
"learning_rate": 1e-05, |
|
"loss": 1.344, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7637214948408914, |
|
"grad_norm": 1.8696319148514944, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2875, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.7644151565074135, |
|
"grad_norm": 1.9161318005356125, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3039, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.7651088181739356, |
|
"grad_norm": 1.7357326775365562, |
|
"learning_rate": 1e-05, |
|
"loss": 1.327, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.7658024798404578, |
|
"grad_norm": 1.9374495795431432, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3118, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.76649614150698, |
|
"grad_norm": 1.8970035705740447, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3518, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.7671898031735022, |
|
"grad_norm": 1.7310701480408488, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3011, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.7678834648400242, |
|
"grad_norm": 1.6347319795022848, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3885, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.7685771265065464, |
|
"grad_norm": 1.7389954204005627, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3407, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.7692707881730686, |
|
"grad_norm": 1.8545763638618038, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3475, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.7699644498395908, |
|
"grad_norm": 1.830290361513446, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3372, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.7706581115061129, |
|
"grad_norm": 1.7554211037949135, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3382, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.771351773172635, |
|
"grad_norm": 1.791182051330342, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2865, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.7720454348391572, |
|
"grad_norm": 1.7944697955298015, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3521, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.7727390965056794, |
|
"grad_norm": 1.7280244820641575, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2855, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.7734327581722015, |
|
"grad_norm": 1.646134246431486, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3443, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.7741264198387237, |
|
"grad_norm": 1.9614831482482202, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3511, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.7748200815052458, |
|
"grad_norm": 1.8931116828060321, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3014, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.775513743171768, |
|
"grad_norm": 1.766409312274434, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3368, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.7762074048382901, |
|
"grad_norm": 1.7681164259480207, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3277, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.7769010665048123, |
|
"grad_norm": 1.8795478880896714, |
|
"learning_rate": 1e-05, |
|
"loss": 1.358, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.7775947281713345, |
|
"grad_norm": 1.7552042041280342, |
|
"learning_rate": 1e-05, |
|
"loss": 1.338, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.7782883898378565, |
|
"grad_norm": 1.6825666179566667, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3468, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.7789820515043787, |
|
"grad_norm": 2.069237558777561, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3285, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.7796757131709009, |
|
"grad_norm": 1.8945277365950668, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3457, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.7803693748374231, |
|
"grad_norm": 1.804604515643157, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3198, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.7810630365039452, |
|
"grad_norm": 1.6687410426653992, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3571, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.7817566981704673, |
|
"grad_norm": 1.8408255487647456, |
|
"learning_rate": 1e-05, |
|
"loss": 1.348, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.7824503598369895, |
|
"grad_norm": 1.7473823693404393, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3449, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.7831440215035117, |
|
"grad_norm": 1.762367934431706, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3296, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.7838376831700338, |
|
"grad_norm": 1.8065045373977573, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3088, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.784531344836556, |
|
"grad_norm": 1.7702834178449094, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3251, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.7852250065030781, |
|
"grad_norm": 1.684651956430982, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3221, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.7859186681696003, |
|
"grad_norm": 1.718401813004295, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3583, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.7866123298361224, |
|
"grad_norm": 1.7593076318939966, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3481, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.7873059915026446, |
|
"grad_norm": 1.9066726353857328, |
|
"learning_rate": 1e-05, |
|
"loss": 1.298, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.7879996531691668, |
|
"grad_norm": 1.666639956360219, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3321, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.7886933148356889, |
|
"grad_norm": 1.722161549449429, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3081, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.789386976502211, |
|
"grad_norm": 1.8556050317918311, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3155, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.7900806381687332, |
|
"grad_norm": 1.906163646274444, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3127, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.7907742998352554, |
|
"grad_norm": 1.91777805112317, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3267, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.7914679615017776, |
|
"grad_norm": 1.758622480343078, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3604, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.7921616231682996, |
|
"grad_norm": 1.818304243791579, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3744, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.7928552848348218, |
|
"grad_norm": 1.7096779670968696, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2967, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.793548946501344, |
|
"grad_norm": 1.8288634798052434, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3836, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.7942426081678661, |
|
"grad_norm": 1.837569087516797, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3306, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.7949362698343883, |
|
"grad_norm": 1.757663683708377, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3289, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.7956299315009104, |
|
"grad_norm": 1.5733908350037635, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3293, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.7963235931674326, |
|
"grad_norm": 1.7006097263550368, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3544, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.7970172548339547, |
|
"grad_norm": 1.852767855910649, |
|
"learning_rate": 1e-05, |
|
"loss": 1.367, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.7977109165004769, |
|
"grad_norm": 1.8236237376393378, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3298, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.7984045781669991, |
|
"grad_norm": 1.7542840584318944, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3305, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.7990982398335212, |
|
"grad_norm": 1.7363578108911824, |
|
"learning_rate": 1e-05, |
|
"loss": 1.312, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.7997919015000433, |
|
"grad_norm": 1.7447796524158012, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3146, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.8004855631665655, |
|
"grad_norm": 1.7998354010324689, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2651, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.8011792248330877, |
|
"grad_norm": 1.72370764035216, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3313, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.8018728864996099, |
|
"grad_norm": 1.9027302235993584, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3275, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.8025665481661319, |
|
"grad_norm": 1.767361577842723, |
|
"learning_rate": 1e-05, |
|
"loss": 1.281, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.8032602098326541, |
|
"grad_norm": 1.6957352595013901, |
|
"learning_rate": 1e-05, |
|
"loss": 1.339, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.8039538714991763, |
|
"grad_norm": 1.8811200117431954, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3694, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.8046475331656985, |
|
"grad_norm": 1.8732110913975033, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3828, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8053411948322206, |
|
"grad_norm": 1.806929633499466, |
|
"learning_rate": 1e-05, |
|
"loss": 1.293, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.8060348564987427, |
|
"grad_norm": 1.9234019762346322, |
|
"learning_rate": 1e-05, |
|
"loss": 1.356, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.8067285181652649, |
|
"grad_norm": 1.8612299857621706, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2944, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.8074221798317871, |
|
"grad_norm": 1.8949079530850956, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2759, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.8081158414983092, |
|
"grad_norm": 1.835747159847785, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2959, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.8088095031648314, |
|
"grad_norm": 1.6994420208999754, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3546, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.8095031648313535, |
|
"grad_norm": 1.7058565978885374, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3016, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.8101968264978756, |
|
"grad_norm": 1.7202328196552465, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3244, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.8108904881643978, |
|
"grad_norm": 1.5969257126998517, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2941, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.81158414983092, |
|
"grad_norm": 2.033122314469678, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3112, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8122778114974422, |
|
"grad_norm": 1.818607135695885, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3372, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.8129714731639642, |
|
"grad_norm": 1.7332347851944176, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2963, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.8136651348304864, |
|
"grad_norm": 1.79912386221675, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3192, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.8143587964970086, |
|
"grad_norm": 1.759247644611021, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3087, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.8150524581635308, |
|
"grad_norm": 1.81196721719679, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3347, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.8157461198300529, |
|
"grad_norm": 1.8329350171829653, |
|
"learning_rate": 1e-05, |
|
"loss": 1.347, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.816439781496575, |
|
"grad_norm": 2.097683322497983, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3187, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.8171334431630972, |
|
"grad_norm": 1.682730633916439, |
|
"learning_rate": 1e-05, |
|
"loss": 1.326, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.8178271048296194, |
|
"grad_norm": 1.7967406525079352, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3229, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.8185207664961415, |
|
"grad_norm": 1.7155776890591734, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3143, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.8192144281626637, |
|
"grad_norm": 1.731948891871397, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3044, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.8199080898291858, |
|
"grad_norm": 1.6258267218121343, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2966, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.820601751495708, |
|
"grad_norm": 1.8947923997247118, |
|
"learning_rate": 1e-05, |
|
"loss": 1.25, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.8212954131622301, |
|
"grad_norm": 1.9268221611589265, |
|
"learning_rate": 1e-05, |
|
"loss": 1.363, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.8219890748287523, |
|
"grad_norm": 1.6151381049081766, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3323, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.8226827364952745, |
|
"grad_norm": 1.6754745238527986, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2736, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.8233763981617965, |
|
"grad_norm": 1.6566597340456615, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3292, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.8240700598283187, |
|
"grad_norm": 1.7868911126447826, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3146, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.8247637214948409, |
|
"grad_norm": 1.7395269678979228, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3172, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.8254573831613631, |
|
"grad_norm": 1.846976443991522, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2845, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.8261510448278852, |
|
"grad_norm": 1.9263142545199925, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3218, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.8268447064944073, |
|
"grad_norm": 1.6753572265693735, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3347, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.8275383681609295, |
|
"grad_norm": 1.8402922863638769, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3389, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.8282320298274517, |
|
"grad_norm": 1.7003805951507205, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3016, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.8289256914939738, |
|
"grad_norm": 1.7016887764707231, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3312, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.829619353160496, |
|
"grad_norm": 1.7450143183218212, |
|
"learning_rate": 1e-05, |
|
"loss": 1.303, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.8303130148270181, |
|
"grad_norm": 1.7676264585702774, |
|
"learning_rate": 1e-05, |
|
"loss": 1.317, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.8310066764935403, |
|
"grad_norm": 2.0267151482602177, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2687, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.8317003381600624, |
|
"grad_norm": 1.6648828369964448, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3436, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.8323939998265846, |
|
"grad_norm": 1.7437721633894316, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2719, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8330876614931068, |
|
"grad_norm": 1.76080267027574, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3364, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.8337813231596289, |
|
"grad_norm": 1.6023127093441858, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3567, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.834474984826151, |
|
"grad_norm": 1.807984298617728, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3243, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.8351686464926732, |
|
"grad_norm": 1.7507073872441543, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2748, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.8358623081591954, |
|
"grad_norm": 1.7340226078649954, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3236, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.8365559698257176, |
|
"grad_norm": 1.8128370813992278, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3398, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.8372496314922396, |
|
"grad_norm": 1.8385313973464554, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3244, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.8379432931587618, |
|
"grad_norm": 1.8202487072236857, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2841, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.838636954825284, |
|
"grad_norm": 1.8226882804903315, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3251, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.8393306164918061, |
|
"grad_norm": 1.7796315284012894, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3503, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.8400242781583283, |
|
"grad_norm": 1.770507934678552, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2873, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.8407179398248504, |
|
"grad_norm": 1.7983739916772261, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3309, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.8414116014913726, |
|
"grad_norm": 1.8581265442393458, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2976, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.8421052631578947, |
|
"grad_norm": 1.9709414587672909, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3244, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.8427989248244169, |
|
"grad_norm": 1.934195079670126, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3197, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.8434925864909391, |
|
"grad_norm": 1.5905446332751805, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3144, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.8441862481574612, |
|
"grad_norm": 1.8012514681125382, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2984, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 0.8448799098239833, |
|
"grad_norm": 1.8322906369289444, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3365, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.8455735714905055, |
|
"grad_norm": 1.7098942463292028, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2994, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 0.8462672331570277, |
|
"grad_norm": 1.806262096500155, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3135, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8469608948235499, |
|
"grad_norm": 1.8088243417346301, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2548, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.8476545564900719, |
|
"grad_norm": 1.7615794277621235, |
|
"learning_rate": 1e-05, |
|
"loss": 1.369, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.8483482181565941, |
|
"grad_norm": 1.7446487987023735, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3417, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 0.8490418798231163, |
|
"grad_norm": 1.9292547174186359, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2619, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.8497355414896385, |
|
"grad_norm": 1.7385855689942564, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3367, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.8504292031561606, |
|
"grad_norm": 1.7893857799911939, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3231, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.8511228648226827, |
|
"grad_norm": 1.7904188745955463, |
|
"learning_rate": 1e-05, |
|
"loss": 1.273, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.8518165264892049, |
|
"grad_norm": 1.7311471203311328, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3036, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.852510188155727, |
|
"grad_norm": 1.8405281776102504, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3266, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 0.8532038498222492, |
|
"grad_norm": 1.7910111933959387, |
|
"learning_rate": 1e-05, |
|
"loss": 1.332, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8538975114887714, |
|
"grad_norm": 1.692055737327334, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2594, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 0.8545911731552935, |
|
"grad_norm": 1.715888470034707, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2974, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.8552848348218156, |
|
"grad_norm": 1.6920575665727629, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3081, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.8559784964883378, |
|
"grad_norm": 1.8820212499017275, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2579, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.85667215815486, |
|
"grad_norm": 1.7590796982626669, |
|
"learning_rate": 1e-05, |
|
"loss": 1.346, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.8573658198213822, |
|
"grad_norm": 1.7175897993182907, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2952, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.8580594814879042, |
|
"grad_norm": 1.8872407973916834, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2678, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 0.8587531431544264, |
|
"grad_norm": 1.820685981576064, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3161, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.8594468048209486, |
|
"grad_norm": 1.7106563905509156, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2805, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.8601404664874708, |
|
"grad_norm": 1.835121012618051, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3411, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8608341281539929, |
|
"grad_norm": 1.759648204282831, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3169, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 0.861527789820515, |
|
"grad_norm": 1.8108439058590786, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3365, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.8622214514870372, |
|
"grad_norm": 1.8264838456429158, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3122, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 0.8629151131535594, |
|
"grad_norm": 1.8561243343760983, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3126, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.8636087748200815, |
|
"grad_norm": 1.7744496574339597, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3419, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.8643024364866037, |
|
"grad_norm": 1.7102771712690799, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3913, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.8649960981531258, |
|
"grad_norm": 1.637011563504676, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3294, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.865689759819648, |
|
"grad_norm": 1.7378852441605857, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3604, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.8663834214861701, |
|
"grad_norm": 1.892145137680064, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3839, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 0.8670770831526923, |
|
"grad_norm": 1.7114640198993971, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3164, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.8677707448192145, |
|
"grad_norm": 1.857472545380348, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3409, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 0.8684644064857365, |
|
"grad_norm": 1.6636749531242045, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3103, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.8691580681522587, |
|
"grad_norm": 1.853753890533686, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3372, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 0.8698517298187809, |
|
"grad_norm": 1.7274626500933439, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3198, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 0.8705453914853031, |
|
"grad_norm": 1.7681696772117461, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3451, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.8712390531518251, |
|
"grad_norm": 1.7074991789276823, |
|
"learning_rate": 1e-05, |
|
"loss": 1.306, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.8719327148183473, |
|
"grad_norm": 1.8022859354848255, |
|
"learning_rate": 1e-05, |
|
"loss": 1.307, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 0.8726263764848695, |
|
"grad_norm": 1.722826958334196, |
|
"learning_rate": 1e-05, |
|
"loss": 1.348, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 0.8733200381513917, |
|
"grad_norm": 1.6278820667115972, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3156, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 0.8740136998179138, |
|
"grad_norm": 1.8615627558866032, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3642, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.874707361484436, |
|
"grad_norm": 1.6977360433618884, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3661, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 0.8754010231509581, |
|
"grad_norm": 1.7119935578912018, |
|
"learning_rate": 1e-05, |
|
"loss": 1.343, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 0.8760946848174803, |
|
"grad_norm": 1.8828273441618772, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3286, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 0.8767883464840024, |
|
"grad_norm": 1.7991745556104566, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2877, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.8774820081505246, |
|
"grad_norm": 1.7692125953469466, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3132, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.8781756698170468, |
|
"grad_norm": 1.763539004235656, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2516, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 0.8788693314835689, |
|
"grad_norm": 1.8097604071898914, |
|
"learning_rate": 1e-05, |
|
"loss": 1.322, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 0.879562993150091, |
|
"grad_norm": 1.8504952339835505, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3257, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.8802566548166132, |
|
"grad_norm": 1.8228681067260823, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3207, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 0.8809503164831354, |
|
"grad_norm": 1.8569141776272553, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3415, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.8816439781496576, |
|
"grad_norm": 1.8653037577154865, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2882, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 0.8823376398161796, |
|
"grad_norm": 1.7842091027351248, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3011, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.8830313014827018, |
|
"grad_norm": 1.865231093038103, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3421, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 0.883724963149224, |
|
"grad_norm": 1.8445484018508556, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2994, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 0.8844186248157461, |
|
"grad_norm": 2.0112180595207585, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3491, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.8851122864822683, |
|
"grad_norm": 1.9313320335876165, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2972, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.8858059481487904, |
|
"grad_norm": 1.766314463801755, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3551, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 0.8864996098153126, |
|
"grad_norm": 1.6939200547787165, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3346, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 0.8871932714818347, |
|
"grad_norm": 1.6553779025228499, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3331, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 0.8878869331483569, |
|
"grad_norm": 1.672522216377223, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3143, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.888580594814879, |
|
"grad_norm": 1.6477930929287763, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2655, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 0.8892742564814012, |
|
"grad_norm": 1.9082634726577539, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3101, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 0.8899679181479233, |
|
"grad_norm": 1.6619545725705775, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3134, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 0.8906615798144455, |
|
"grad_norm": 1.8186223504046093, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3356, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.8913552414809677, |
|
"grad_norm": 1.6755642327768199, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3005, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.8920489031474899, |
|
"grad_norm": 1.7174416264813488, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3347, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 0.8927425648140119, |
|
"grad_norm": 1.7186710255881854, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2908, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 0.8934362264805341, |
|
"grad_norm": 1.6346319252027368, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2927, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.8941298881470563, |
|
"grad_norm": 1.8474495055475482, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3255, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 0.8948235498135785, |
|
"grad_norm": 1.8788390378390694, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3385, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.8955172114801006, |
|
"grad_norm": 1.629566356590922, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3482, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 0.8962108731466227, |
|
"grad_norm": 1.7412289424540435, |
|
"learning_rate": 1e-05, |
|
"loss": 1.267, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.8969045348131449, |
|
"grad_norm": 1.5904673607353297, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3107, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 0.897598196479667, |
|
"grad_norm": 1.7668320875825854, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2968, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 0.8982918581461892, |
|
"grad_norm": 1.6434271863603802, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3531, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.8989855198127114, |
|
"grad_norm": 1.6446049917777663, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3067, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.8996791814792335, |
|
"grad_norm": 1.7114850493139724, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2526, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 0.9003728431457556, |
|
"grad_norm": 1.7703812278053084, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3284, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 0.9010665048122778, |
|
"grad_norm": 1.783598858952647, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3449, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 0.9017601664788, |
|
"grad_norm": 1.7594643222023651, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2865, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9024538281453222, |
|
"grad_norm": 1.7979500686841217, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2826, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 0.9031474898118442, |
|
"grad_norm": 1.7054221381249888, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3525, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.9038411514783664, |
|
"grad_norm": 1.7423889164937596, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3044, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 0.9045348131448886, |
|
"grad_norm": 1.8080520927021586, |
|
"learning_rate": 1e-05, |
|
"loss": 1.331, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.9052284748114108, |
|
"grad_norm": 1.9903383637954946, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3213, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.9059221364779328, |
|
"grad_norm": 1.622554431537696, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3307, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 0.906615798144455, |
|
"grad_norm": 1.891392187663753, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3484, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 0.9073094598109772, |
|
"grad_norm": 1.5686215128270367, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3101, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.9080031214774994, |
|
"grad_norm": 1.7879215692958745, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3499, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 0.9086967831440215, |
|
"grad_norm": 1.7583743340224038, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2757, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9093904448105437, |
|
"grad_norm": 1.7396925828139032, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2925, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 0.9100841064770658, |
|
"grad_norm": 1.7958710900831494, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3058, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.910777768143588, |
|
"grad_norm": 1.764507973805378, |
|
"learning_rate": 1e-05, |
|
"loss": 1.297, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 0.9114714298101101, |
|
"grad_norm": 1.8550793576750044, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3146, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 0.9121650914766323, |
|
"grad_norm": 1.6803054339754033, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2966, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.9128587531431545, |
|
"grad_norm": 1.885929787117982, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3201, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.9135524148096765, |
|
"grad_norm": 1.6131928750808537, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2899, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 0.9142460764761987, |
|
"grad_norm": 1.8283426626966652, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3149, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 0.9149397381427209, |
|
"grad_norm": 1.7895417190819627, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3135, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 0.9156333998092431, |
|
"grad_norm": 1.6655898144804122, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3506, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.9163270614757651, |
|
"grad_norm": 1.789395903422411, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2944, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 0.9170207231422873, |
|
"grad_norm": 1.5554671892032146, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3035, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 0.9177143848088095, |
|
"grad_norm": 1.7462183316659554, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2871, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 0.9184080464753317, |
|
"grad_norm": 1.6407955316695355, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2842, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 0.9191017081418538, |
|
"grad_norm": 1.8054786932821483, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2801, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.919795369808376, |
|
"grad_norm": 1.7774882584688303, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3111, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 0.9204890314748981, |
|
"grad_norm": 1.7487308168423374, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3086, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 0.9211826931414203, |
|
"grad_norm": 1.830348412513128, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2398, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 0.9218763548079424, |
|
"grad_norm": 1.8745249129153505, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3174, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 0.9225700164744646, |
|
"grad_norm": 1.6997745347291242, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2795, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.9232636781409868, |
|
"grad_norm": 1.6461004692498915, |
|
"learning_rate": 1e-05, |
|
"loss": 1.369, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 0.9239573398075089, |
|
"grad_norm": 1.867846237084916, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3492, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 0.924651001474031, |
|
"grad_norm": 1.7338725633875975, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3425, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.9253446631405532, |
|
"grad_norm": 1.7058365907688622, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3289, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 0.9260383248070754, |
|
"grad_norm": 1.6812689204725293, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3166, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.9267319864735976, |
|
"grad_norm": 1.7854594207936143, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3357, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 0.9274256481401196, |
|
"grad_norm": 1.8632837097505413, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2385, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 0.9281193098066418, |
|
"grad_norm": 1.838541328017032, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2703, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 0.928812971473164, |
|
"grad_norm": 1.8476109239446323, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2934, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 0.9295066331396861, |
|
"grad_norm": 1.735630365745877, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2978, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.9302002948062082, |
|
"grad_norm": 1.6182767450065856, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2999, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 0.9308939564727304, |
|
"grad_norm": 1.7314432313872, |
|
"learning_rate": 1e-05, |
|
"loss": 1.291, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 0.9315876181392526, |
|
"grad_norm": 1.9298812546673758, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3352, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 0.9322812798057747, |
|
"grad_norm": 1.890305436960625, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3603, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.9329749414722969, |
|
"grad_norm": 1.7012847869156689, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2705, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.933668603138819, |
|
"grad_norm": 1.7624548099246455, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3229, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 0.9343622648053412, |
|
"grad_norm": 1.8174643785961295, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2613, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 0.9350559264718633, |
|
"grad_norm": 1.766018762161775, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3907, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 0.9357495881383855, |
|
"grad_norm": 1.8138067691928326, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2878, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 0.9364432498049077, |
|
"grad_norm": 1.8378030129613956, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3192, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9371369114714299, |
|
"grad_norm": 2.012247327683597, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3096, |
|
"step": 1351 |
|
}, |
|
{ |
|
"epoch": 0.9378305731379519, |
|
"grad_norm": 1.704402821997915, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3304, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 0.9385242348044741, |
|
"grad_norm": 1.7885693471410014, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3065, |
|
"step": 1353 |
|
}, |
|
{ |
|
"epoch": 0.9392178964709963, |
|
"grad_norm": 1.6661434949017349, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3307, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 0.9399115581375185, |
|
"grad_norm": 1.7962358503874996, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3129, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.9406052198040405, |
|
"grad_norm": 1.718189556083174, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2994, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 0.9412988814705627, |
|
"grad_norm": 1.9092876605935403, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3859, |
|
"step": 1357 |
|
}, |
|
{ |
|
"epoch": 0.9419925431370849, |
|
"grad_norm": 1.8331634920945954, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2668, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 0.942686204803607, |
|
"grad_norm": 1.7795600411027266, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3158, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 0.9433798664701292, |
|
"grad_norm": 1.7718647937619816, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2766, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9440735281366514, |
|
"grad_norm": 1.7833385647199957, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3188, |
|
"step": 1361 |
|
}, |
|
{ |
|
"epoch": 0.9447671898031735, |
|
"grad_norm": 1.7094537156535095, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2916, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 0.9454608514696956, |
|
"grad_norm": 1.7218575319930665, |
|
"learning_rate": 1e-05, |
|
"loss": 1.363, |
|
"step": 1363 |
|
}, |
|
{ |
|
"epoch": 0.9461545131362178, |
|
"grad_norm": 1.8291690131158997, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3067, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.94684817480274, |
|
"grad_norm": 1.5981127580277035, |
|
"learning_rate": 1e-05, |
|
"loss": 1.316, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.9475418364692622, |
|
"grad_norm": 1.7097133890389782, |
|
"learning_rate": 1e-05, |
|
"loss": 1.31, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 0.9482354981357842, |
|
"grad_norm": 1.7177869042959357, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3261, |
|
"step": 1367 |
|
}, |
|
{ |
|
"epoch": 0.9489291598023064, |
|
"grad_norm": 1.7391018887085676, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3476, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 0.9496228214688286, |
|
"grad_norm": 1.835174110490517, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3198, |
|
"step": 1369 |
|
}, |
|
{ |
|
"epoch": 0.9503164831353508, |
|
"grad_norm": 1.8268669014129535, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3338, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.9510101448018728, |
|
"grad_norm": 1.6581115729537674, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2378, |
|
"step": 1371 |
|
}, |
|
{ |
|
"epoch": 0.951703806468395, |
|
"grad_norm": 1.809305155351, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3432, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 0.9523974681349172, |
|
"grad_norm": 1.7869170912103947, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3173, |
|
"step": 1373 |
|
}, |
|
{ |
|
"epoch": 0.9530911298014394, |
|
"grad_norm": 1.5946853342076313, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3202, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 0.9537847914679615, |
|
"grad_norm": 1.7449681923450435, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3368, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.9544784531344837, |
|
"grad_norm": 1.7867785161300096, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2523, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.9551721148010058, |
|
"grad_norm": 1.7493793708109926, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3143, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 0.955865776467528, |
|
"grad_norm": 1.6708212386781847, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3197, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 0.9565594381340501, |
|
"grad_norm": 1.7624923123146528, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2976, |
|
"step": 1379 |
|
}, |
|
{ |
|
"epoch": 0.9572530998005723, |
|
"grad_norm": 1.8444740226606935, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3251, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.9579467614670945, |
|
"grad_norm": 1.5953334218837774, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2664, |
|
"step": 1381 |
|
}, |
|
{ |
|
"epoch": 0.9586404231336165, |
|
"grad_norm": 1.9455556499425986, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2883, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 0.9593340848001387, |
|
"grad_norm": 1.7447825684583413, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3101, |
|
"step": 1383 |
|
}, |
|
{ |
|
"epoch": 0.9600277464666609, |
|
"grad_norm": 1.7815515049059591, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2931, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 0.9607214081331831, |
|
"grad_norm": 1.8473821407663016, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2916, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.9614150697997051, |
|
"grad_norm": 1.8287625516543624, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3302, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 0.9621087314662273, |
|
"grad_norm": 1.9131178436318876, |
|
"learning_rate": 1e-05, |
|
"loss": 1.292, |
|
"step": 1387 |
|
}, |
|
{ |
|
"epoch": 0.9628023931327495, |
|
"grad_norm": 1.6731211620160438, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2662, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 0.9634960547992717, |
|
"grad_norm": 1.8100000986534872, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3462, |
|
"step": 1389 |
|
}, |
|
{ |
|
"epoch": 0.9641897164657938, |
|
"grad_norm": 1.915036526264764, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2819, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.964883378132316, |
|
"grad_norm": 1.9069098292199362, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3002, |
|
"step": 1391 |
|
}, |
|
{ |
|
"epoch": 0.9655770397988381, |
|
"grad_norm": 1.7930073332304715, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2816, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 0.9662707014653603, |
|
"grad_norm": 1.8530639734903125, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3521, |
|
"step": 1393 |
|
}, |
|
{ |
|
"epoch": 0.9669643631318824, |
|
"grad_norm": 1.7315815210793186, |
|
"learning_rate": 1e-05, |
|
"loss": 1.299, |
|
"step": 1394 |
|
}, |
|
{ |
|
"epoch": 0.9676580247984046, |
|
"grad_norm": 1.8034499383080111, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2712, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.9683516864649268, |
|
"grad_norm": 1.8019607332463998, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3431, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 0.9690453481314489, |
|
"grad_norm": 1.7026349253270283, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3306, |
|
"step": 1397 |
|
}, |
|
{ |
|
"epoch": 0.969739009797971, |
|
"grad_norm": 1.8029112479233105, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2791, |
|
"step": 1398 |
|
}, |
|
{ |
|
"epoch": 0.9704326714644932, |
|
"grad_norm": 1.6640138276094931, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3551, |
|
"step": 1399 |
|
}, |
|
{ |
|
"epoch": 0.9711263331310154, |
|
"grad_norm": 1.8773693641133087, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3214, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9718199947975376, |
|
"grad_norm": 1.6189625859175014, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3045, |
|
"step": 1401 |
|
}, |
|
{ |
|
"epoch": 0.9725136564640596, |
|
"grad_norm": 1.8655984733394448, |
|
"learning_rate": 1e-05, |
|
"loss": 1.295, |
|
"step": 1402 |
|
}, |
|
{ |
|
"epoch": 0.9732073181305818, |
|
"grad_norm": 1.6495459791796045, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3209, |
|
"step": 1403 |
|
}, |
|
{ |
|
"epoch": 0.973900979797104, |
|
"grad_norm": 1.7262446380853802, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3179, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 0.9745946414636261, |
|
"grad_norm": 1.8782015233926619, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2991, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.9752883031301482, |
|
"grad_norm": 1.6294972087965263, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3022, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 0.9759819647966704, |
|
"grad_norm": 1.79967261492696, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3019, |
|
"step": 1407 |
|
}, |
|
{ |
|
"epoch": 0.9766756264631926, |
|
"grad_norm": 1.8301257668132722, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2952, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.9773692881297147, |
|
"grad_norm": 1.7858886287008595, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3455, |
|
"step": 1409 |
|
}, |
|
{ |
|
"epoch": 0.9780629497962369, |
|
"grad_norm": 1.645627337737642, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2788, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.978756611462759, |
|
"grad_norm": 1.8493218045117754, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2946, |
|
"step": 1411 |
|
}, |
|
{ |
|
"epoch": 0.9794502731292812, |
|
"grad_norm": 1.9503637295771707, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2623, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 0.9801439347958033, |
|
"grad_norm": 1.854587002167059, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2966, |
|
"step": 1413 |
|
}, |
|
{ |
|
"epoch": 0.9808375964623255, |
|
"grad_norm": 1.7795692734633988, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3538, |
|
"step": 1414 |
|
}, |
|
{ |
|
"epoch": 0.9815312581288477, |
|
"grad_norm": 1.8026744879009458, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3534, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.9822249197953699, |
|
"grad_norm": 1.7902484770921805, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3155, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 0.9829185814618919, |
|
"grad_norm": 1.7212800748245163, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2941, |
|
"step": 1417 |
|
}, |
|
{ |
|
"epoch": 0.9836122431284141, |
|
"grad_norm": 1.801323328800465, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3823, |
|
"step": 1418 |
|
}, |
|
{ |
|
"epoch": 0.9843059047949363, |
|
"grad_norm": 1.7021881473197502, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3371, |
|
"step": 1419 |
|
}, |
|
{ |
|
"epoch": 0.9849995664614585, |
|
"grad_norm": 1.8046118812459044, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3433, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.9856932281279805, |
|
"grad_norm": 1.6685271100241381, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2897, |
|
"step": 1421 |
|
}, |
|
{ |
|
"epoch": 0.9863868897945027, |
|
"grad_norm": 1.6454889789915157, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3029, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 0.9870805514610249, |
|
"grad_norm": 1.8167751934930396, |
|
"learning_rate": 1e-05, |
|
"loss": 1.278, |
|
"step": 1423 |
|
}, |
|
{ |
|
"epoch": 0.987774213127547, |
|
"grad_norm": 1.7280323726187787, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3137, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 0.9884678747940692, |
|
"grad_norm": 1.670222942611059, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3248, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.9891615364605914, |
|
"grad_norm": 1.799342131947682, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3215, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 0.9898551981271135, |
|
"grad_norm": 1.8123264473720575, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3548, |
|
"step": 1427 |
|
}, |
|
{ |
|
"epoch": 0.9905488597936356, |
|
"grad_norm": 1.7747475128663022, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2765, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 0.9912425214601578, |
|
"grad_norm": 1.8041680654218195, |
|
"learning_rate": 1e-05, |
|
"loss": 1.31, |
|
"step": 1429 |
|
}, |
|
{ |
|
"epoch": 0.99193618312668, |
|
"grad_norm": 1.7105917151107914, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2958, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.9926298447932022, |
|
"grad_norm": 1.6626340057182631, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3484, |
|
"step": 1431 |
|
}, |
|
{ |
|
"epoch": 0.9933235064597242, |
|
"grad_norm": 1.6610746706958375, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3256, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 0.9940171681262464, |
|
"grad_norm": 1.7615692816320323, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3303, |
|
"step": 1433 |
|
}, |
|
{ |
|
"epoch": 0.9947108297927686, |
|
"grad_norm": 1.7533916584851055, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2955, |
|
"step": 1434 |
|
}, |
|
{ |
|
"epoch": 0.9954044914592908, |
|
"grad_norm": 1.7232538432339657, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3177, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.9960981531258128, |
|
"grad_norm": 1.7441612385283174, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3013, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 0.996791814792335, |
|
"grad_norm": 1.8597160448645227, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2923, |
|
"step": 1437 |
|
}, |
|
{ |
|
"epoch": 0.9974854764588572, |
|
"grad_norm": 1.742932722373845, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3469, |
|
"step": 1438 |
|
}, |
|
{ |
|
"epoch": 0.9981791381253794, |
|
"grad_norm": 1.8961285251801105, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3101, |
|
"step": 1439 |
|
}, |
|
{ |
|
"epoch": 0.9988727997919015, |
|
"grad_norm": 1.6967076327288442, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3157, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.9995664614584236, |
|
"grad_norm": 1.669534255209671, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3203, |
|
"step": 1441 |
|
}, |
|
{ |
|
"epoch": 0.9995664614584236, |
|
"step": 1441, |
|
"total_flos": 2332396250726400.0, |
|
"train_loss": 1.4015557253881927, |
|
"train_runtime": 204310.3582, |
|
"train_samples_per_second": 0.903, |
|
"train_steps_per_second": 0.007 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1441, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2332396250726400.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|