|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9979522184300342, |
|
"eval_steps": 500, |
|
"global_step": 1098, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0027303754266211604, |
|
"grad_norm": 1.3254656791687012, |
|
"learning_rate": 1.818181818181818e-06, |
|
"loss": 1.2897, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005460750853242321, |
|
"grad_norm": 1.3267881870269775, |
|
"learning_rate": 3.636363636363636e-06, |
|
"loss": 1.2823, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.008191126279863481, |
|
"grad_norm": 1.2982481718063354, |
|
"learning_rate": 5.4545454545454545e-06, |
|
"loss": 1.2595, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.010921501706484642, |
|
"grad_norm": 1.2894413471221924, |
|
"learning_rate": 7.272727272727272e-06, |
|
"loss": 1.2653, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.013651877133105802, |
|
"grad_norm": 1.2869772911071777, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 1.2545, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.016382252559726963, |
|
"grad_norm": 1.2373387813568115, |
|
"learning_rate": 1.0909090909090909e-05, |
|
"loss": 1.2194, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01911262798634812, |
|
"grad_norm": 1.20195734500885, |
|
"learning_rate": 1.2727272727272727e-05, |
|
"loss": 1.2139, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.021843003412969283, |
|
"grad_norm": 1.1426103115081787, |
|
"learning_rate": 1.4545454545454545e-05, |
|
"loss": 1.2315, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.024573378839590442, |
|
"grad_norm": 1.0495123863220215, |
|
"learning_rate": 1.6363636363636366e-05, |
|
"loss": 1.1944, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.027303754266211604, |
|
"grad_norm": 0.8776500821113586, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 1.183, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.030034129692832763, |
|
"grad_norm": 0.8687052130699158, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1592, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.032764505119453925, |
|
"grad_norm": 0.7476271390914917, |
|
"learning_rate": 2.1818181818181818e-05, |
|
"loss": 1.1483, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03549488054607509, |
|
"grad_norm": 0.6418495774269104, |
|
"learning_rate": 2.3636363636363637e-05, |
|
"loss": 1.0762, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03822525597269624, |
|
"grad_norm": 0.600390613079071, |
|
"learning_rate": 2.5454545454545454e-05, |
|
"loss": 1.0492, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.040955631399317405, |
|
"grad_norm": 0.5653348565101624, |
|
"learning_rate": 2.7272727272727273e-05, |
|
"loss": 1.0548, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04368600682593857, |
|
"grad_norm": 0.5357097387313843, |
|
"learning_rate": 2.909090909090909e-05, |
|
"loss": 1.0273, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04641638225255973, |
|
"grad_norm": 0.4480445683002472, |
|
"learning_rate": 3.090909090909091e-05, |
|
"loss": 1.0065, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.049146757679180884, |
|
"grad_norm": 0.40983352065086365, |
|
"learning_rate": 3.272727272727273e-05, |
|
"loss": 0.9908, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05187713310580205, |
|
"grad_norm": 0.42159780859947205, |
|
"learning_rate": 3.454545454545455e-05, |
|
"loss": 0.9888, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05460750853242321, |
|
"grad_norm": 0.41620174050331116, |
|
"learning_rate": 3.6363636363636364e-05, |
|
"loss": 0.9575, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05733788395904437, |
|
"grad_norm": 0.3804452419281006, |
|
"learning_rate": 3.818181818181819e-05, |
|
"loss": 0.9413, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.060068259385665526, |
|
"grad_norm": 0.37021100521087646, |
|
"learning_rate": 4e-05, |
|
"loss": 0.9223, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06279863481228669, |
|
"grad_norm": 0.34090206027030945, |
|
"learning_rate": 4.181818181818182e-05, |
|
"loss": 0.8878, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06552901023890785, |
|
"grad_norm": 0.32232972979545593, |
|
"learning_rate": 4.3636363636363636e-05, |
|
"loss": 0.8986, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06825938566552901, |
|
"grad_norm": 0.2941684424877167, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 0.8857, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07098976109215017, |
|
"grad_norm": 0.27072674036026, |
|
"learning_rate": 4.7272727272727275e-05, |
|
"loss": 0.8736, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07372013651877134, |
|
"grad_norm": 0.2696637511253357, |
|
"learning_rate": 4.909090909090909e-05, |
|
"loss": 0.8698, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07645051194539249, |
|
"grad_norm": 0.2565267086029053, |
|
"learning_rate": 5.090909090909091e-05, |
|
"loss": 0.8324, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07918088737201365, |
|
"grad_norm": 0.2474038451910019, |
|
"learning_rate": 5.272727272727272e-05, |
|
"loss": 0.841, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.08191126279863481, |
|
"grad_norm": 0.22430865466594696, |
|
"learning_rate": 5.4545454545454546e-05, |
|
"loss": 0.8219, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08464163822525597, |
|
"grad_norm": 0.21238166093826294, |
|
"learning_rate": 5.636363636363636e-05, |
|
"loss": 0.8328, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.08737201365187713, |
|
"grad_norm": 0.2210783213376999, |
|
"learning_rate": 5.818181818181818e-05, |
|
"loss": 0.8187, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0901023890784983, |
|
"grad_norm": 0.24119816720485687, |
|
"learning_rate": 6e-05, |
|
"loss": 0.8078, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.09283276450511946, |
|
"grad_norm": 0.21313577890396118, |
|
"learning_rate": 6.181818181818182e-05, |
|
"loss": 0.8051, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09556313993174062, |
|
"grad_norm": 0.17824789881706238, |
|
"learning_rate": 6.363636363636364e-05, |
|
"loss": 0.7841, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.09829351535836177, |
|
"grad_norm": 0.18413369357585907, |
|
"learning_rate": 6.545454545454546e-05, |
|
"loss": 0.7851, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.10102389078498293, |
|
"grad_norm": 0.2003067582845688, |
|
"learning_rate": 6.727272727272727e-05, |
|
"loss": 0.8064, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.1037542662116041, |
|
"grad_norm": 0.1989540457725525, |
|
"learning_rate": 6.90909090909091e-05, |
|
"loss": 0.7841, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.10648464163822526, |
|
"grad_norm": 0.14544272422790527, |
|
"learning_rate": 7.090909090909092e-05, |
|
"loss": 0.7745, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.10921501706484642, |
|
"grad_norm": 0.1559988260269165, |
|
"learning_rate": 7.272727272727273e-05, |
|
"loss": 0.7845, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11194539249146758, |
|
"grad_norm": 0.1705523580312729, |
|
"learning_rate": 7.454545454545455e-05, |
|
"loss": 0.7876, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.11467576791808874, |
|
"grad_norm": 0.1464846283197403, |
|
"learning_rate": 7.636363636363637e-05, |
|
"loss": 0.7846, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.1174061433447099, |
|
"grad_norm": 0.1304199993610382, |
|
"learning_rate": 7.818181818181818e-05, |
|
"loss": 0.7734, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.12013651877133105, |
|
"grad_norm": 0.1516261249780655, |
|
"learning_rate": 8e-05, |
|
"loss": 0.7836, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.12286689419795221, |
|
"grad_norm": 0.1361905336380005, |
|
"learning_rate": 8.181818181818183e-05, |
|
"loss": 0.7751, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.12559726962457338, |
|
"grad_norm": 0.14435631036758423, |
|
"learning_rate": 8.363636363636364e-05, |
|
"loss": 0.7655, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.12832764505119454, |
|
"grad_norm": 0.13407501578330994, |
|
"learning_rate": 8.545454545454545e-05, |
|
"loss": 0.7729, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.1310580204778157, |
|
"grad_norm": 0.13555769622325897, |
|
"learning_rate": 8.727272727272727e-05, |
|
"loss": 0.7693, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.13378839590443686, |
|
"grad_norm": 0.1439952850341797, |
|
"learning_rate": 8.90909090909091e-05, |
|
"loss": 0.7787, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.13651877133105803, |
|
"grad_norm": 0.14037510752677917, |
|
"learning_rate": 9.090909090909092e-05, |
|
"loss": 0.7757, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1392491467576792, |
|
"grad_norm": 0.11772281676530838, |
|
"learning_rate": 9.272727272727273e-05, |
|
"loss": 0.7408, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.14197952218430035, |
|
"grad_norm": 0.1545950025320053, |
|
"learning_rate": 9.454545454545455e-05, |
|
"loss": 0.7381, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1447098976109215, |
|
"grad_norm": 0.12565699219703674, |
|
"learning_rate": 9.636363636363637e-05, |
|
"loss": 0.7769, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.14744027303754267, |
|
"grad_norm": 0.15412947535514832, |
|
"learning_rate": 9.818181818181818e-05, |
|
"loss": 0.7585, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.15017064846416384, |
|
"grad_norm": 0.11638892441987991, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7661, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.15290102389078497, |
|
"grad_norm": 0.16432470083236694, |
|
"learning_rate": 0.00010181818181818181, |
|
"loss": 0.7546, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.15563139931740613, |
|
"grad_norm": 0.11529026180505753, |
|
"learning_rate": 0.00010363636363636364, |
|
"loss": 0.7535, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.1583617747440273, |
|
"grad_norm": 0.23582805693149567, |
|
"learning_rate": 0.00010545454545454545, |
|
"loss": 0.7683, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.16109215017064846, |
|
"grad_norm": 0.12393908202648163, |
|
"learning_rate": 0.00010727272727272728, |
|
"loss": 0.7526, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.16382252559726962, |
|
"grad_norm": 0.20981422066688538, |
|
"learning_rate": 0.00010909090909090909, |
|
"loss": 0.7397, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16655290102389078, |
|
"grad_norm": 0.1519405096769333, |
|
"learning_rate": 0.00011090909090909092, |
|
"loss": 0.7472, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.16928327645051194, |
|
"grad_norm": 0.15360122919082642, |
|
"learning_rate": 0.00011272727272727272, |
|
"loss": 0.7452, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1720136518771331, |
|
"grad_norm": 0.14730164408683777, |
|
"learning_rate": 0.00011454545454545456, |
|
"loss": 0.7068, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.17474402730375427, |
|
"grad_norm": 0.166826993227005, |
|
"learning_rate": 0.00011636363636363636, |
|
"loss": 0.7476, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.17747440273037543, |
|
"grad_norm": 0.17365337908267975, |
|
"learning_rate": 0.0001181818181818182, |
|
"loss": 0.7083, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.1802047781569966, |
|
"grad_norm": 0.2101927250623703, |
|
"learning_rate": 0.00012, |
|
"loss": 0.755, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.18293515358361775, |
|
"grad_norm": 0.1802549660205841, |
|
"learning_rate": 0.00012181818181818183, |
|
"loss": 0.7388, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.18566552901023892, |
|
"grad_norm": 0.17372193932533264, |
|
"learning_rate": 0.00012363636363636364, |
|
"loss": 0.7067, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.18839590443686008, |
|
"grad_norm": 0.15312258899211884, |
|
"learning_rate": 0.00012545454545454546, |
|
"loss": 0.7424, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.19112627986348124, |
|
"grad_norm": 0.24253840744495392, |
|
"learning_rate": 0.00012727272727272728, |
|
"loss": 0.7418, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.19385665529010238, |
|
"grad_norm": 0.2043231725692749, |
|
"learning_rate": 0.0001290909090909091, |
|
"loss": 0.7362, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.19658703071672354, |
|
"grad_norm": 0.19859246909618378, |
|
"learning_rate": 0.00013090909090909093, |
|
"loss": 0.7071, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1993174061433447, |
|
"grad_norm": 0.20175667107105255, |
|
"learning_rate": 0.00013272727272727275, |
|
"loss": 0.7202, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.20204778156996586, |
|
"grad_norm": 0.1653033196926117, |
|
"learning_rate": 0.00013454545454545455, |
|
"loss": 0.7229, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.20477815699658702, |
|
"grad_norm": 0.17003491520881653, |
|
"learning_rate": 0.00013636363636363637, |
|
"loss": 0.7353, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2075085324232082, |
|
"grad_norm": 0.18356764316558838, |
|
"learning_rate": 0.0001381818181818182, |
|
"loss": 0.7138, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.21023890784982935, |
|
"grad_norm": 0.2215511053800583, |
|
"learning_rate": 0.00014, |
|
"loss": 0.7238, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.2129692832764505, |
|
"grad_norm": 0.34184327721595764, |
|
"learning_rate": 0.00014181818181818184, |
|
"loss": 0.7297, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.21569965870307167, |
|
"grad_norm": 0.25543472170829773, |
|
"learning_rate": 0.00014363636363636363, |
|
"loss": 0.742, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.21843003412969283, |
|
"grad_norm": 0.2220849245786667, |
|
"learning_rate": 0.00014545454545454546, |
|
"loss": 0.6897, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.221160409556314, |
|
"grad_norm": 0.25552013516426086, |
|
"learning_rate": 0.00014727272727272728, |
|
"loss": 0.744, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.22389078498293516, |
|
"grad_norm": 0.2621108889579773, |
|
"learning_rate": 0.0001490909090909091, |
|
"loss": 0.7193, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.22662116040955632, |
|
"grad_norm": 0.1840047836303711, |
|
"learning_rate": 0.0001509090909090909, |
|
"loss": 0.6991, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.22935153583617748, |
|
"grad_norm": 0.21538959443569183, |
|
"learning_rate": 0.00015272727272727275, |
|
"loss": 0.7058, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.23208191126279865, |
|
"grad_norm": 0.34613293409347534, |
|
"learning_rate": 0.00015454545454545454, |
|
"loss": 0.7011, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.2348122866894198, |
|
"grad_norm": 0.26010966300964355, |
|
"learning_rate": 0.00015636363636363637, |
|
"loss": 0.7009, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.23754266211604094, |
|
"grad_norm": 0.18031255900859833, |
|
"learning_rate": 0.0001581818181818182, |
|
"loss": 0.7036, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.2402730375426621, |
|
"grad_norm": 0.24567286670207977, |
|
"learning_rate": 0.00016, |
|
"loss": 0.6921, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.24300341296928327, |
|
"grad_norm": 0.19522973895072937, |
|
"learning_rate": 0.00016181818181818184, |
|
"loss": 0.7109, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.24573378839590443, |
|
"grad_norm": 0.2405068725347519, |
|
"learning_rate": 0.00016363636363636366, |
|
"loss": 0.7134, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2484641638225256, |
|
"grad_norm": 0.15669392049312592, |
|
"learning_rate": 0.00016545454545454545, |
|
"loss": 0.6966, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.25119453924914675, |
|
"grad_norm": 0.23415732383728027, |
|
"learning_rate": 0.00016727272727272728, |
|
"loss": 0.6771, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.25392491467576794, |
|
"grad_norm": 0.1842266321182251, |
|
"learning_rate": 0.0001690909090909091, |
|
"loss": 0.6895, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2566552901023891, |
|
"grad_norm": 0.21642841398715973, |
|
"learning_rate": 0.0001709090909090909, |
|
"loss": 0.6913, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2593856655290102, |
|
"grad_norm": 0.26327016949653625, |
|
"learning_rate": 0.00017272727272727275, |
|
"loss": 0.6856, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2621160409556314, |
|
"grad_norm": 0.20735357701778412, |
|
"learning_rate": 0.00017454545454545454, |
|
"loss": 0.6769, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.26484641638225254, |
|
"grad_norm": 0.3127861022949219, |
|
"learning_rate": 0.00017636363636363637, |
|
"loss": 0.6518, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2675767918088737, |
|
"grad_norm": 0.34650346636772156, |
|
"learning_rate": 0.0001781818181818182, |
|
"loss": 0.6937, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.27030716723549486, |
|
"grad_norm": 0.252059668302536, |
|
"learning_rate": 0.00018, |
|
"loss": 0.6589, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.27303754266211605, |
|
"grad_norm": 0.1896669715642929, |
|
"learning_rate": 0.00018181818181818183, |
|
"loss": 0.6717, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2757679180887372, |
|
"grad_norm": 0.2724236249923706, |
|
"learning_rate": 0.00018363636363636366, |
|
"loss": 0.6671, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2784982935153584, |
|
"grad_norm": 0.1814826875925064, |
|
"learning_rate": 0.00018545454545454545, |
|
"loss": 0.6564, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2812286689419795, |
|
"grad_norm": 0.24140000343322754, |
|
"learning_rate": 0.00018727272727272728, |
|
"loss": 0.6708, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.2839590443686007, |
|
"grad_norm": 0.19333204627037048, |
|
"learning_rate": 0.0001890909090909091, |
|
"loss": 0.6628, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.28668941979522183, |
|
"grad_norm": 0.21080803871154785, |
|
"learning_rate": 0.00019090909090909092, |
|
"loss": 0.6457, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.289419795221843, |
|
"grad_norm": 0.20848962664604187, |
|
"learning_rate": 0.00019272727272727274, |
|
"loss": 0.6589, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.29215017064846416, |
|
"grad_norm": 0.2381501942873001, |
|
"learning_rate": 0.00019454545454545457, |
|
"loss": 0.6846, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.29488054607508535, |
|
"grad_norm": 0.1734190434217453, |
|
"learning_rate": 0.00019636363636363636, |
|
"loss": 0.6691, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2976109215017065, |
|
"grad_norm": 0.2187424749135971, |
|
"learning_rate": 0.00019818181818181821, |
|
"loss": 0.6806, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.3003412969283277, |
|
"grad_norm": 0.21607345342636108, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6588, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3030716723549488, |
|
"grad_norm": 0.1873304545879364, |
|
"learning_rate": 0.00019999949446003433, |
|
"loss": 0.6513, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.30580204778156994, |
|
"grad_norm": 0.21443282067775726, |
|
"learning_rate": 0.00019999797784524866, |
|
"loss": 0.6704, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.30853242320819113, |
|
"grad_norm": 0.18743731081485748, |
|
"learning_rate": 0.00019999545017097728, |
|
"loss": 0.6346, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.31126279863481227, |
|
"grad_norm": 0.18916335701942444, |
|
"learning_rate": 0.0001999919114627769, |
|
"loss": 0.68, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.31399317406143346, |
|
"grad_norm": 0.1925644427537918, |
|
"learning_rate": 0.00019998736175642673, |
|
"loss": 0.6408, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3167235494880546, |
|
"grad_norm": 0.17722898721694946, |
|
"learning_rate": 0.0001999818010979279, |
|
"loss": 0.6787, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.3194539249146758, |
|
"grad_norm": 0.19374825060367584, |
|
"learning_rate": 0.0001999752295435032, |
|
"loss": 0.669, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.3221843003412969, |
|
"grad_norm": 0.20013949275016785, |
|
"learning_rate": 0.00019996764715959618, |
|
"loss": 0.653, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.3249146757679181, |
|
"grad_norm": 0.18780681490898132, |
|
"learning_rate": 0.00019995905402287094, |
|
"loss": 0.6557, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.32764505119453924, |
|
"grad_norm": 0.1718084216117859, |
|
"learning_rate": 0.00019994945022021082, |
|
"loss": 0.6272, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.33037542662116043, |
|
"grad_norm": 0.1613592952489853, |
|
"learning_rate": 0.00019993883584871808, |
|
"loss": 0.6515, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.33310580204778156, |
|
"grad_norm": 0.1737043410539627, |
|
"learning_rate": 0.00019992721101571236, |
|
"loss": 0.6134, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.33583617747440275, |
|
"grad_norm": 0.16362418234348297, |
|
"learning_rate": 0.0001999145758387301, |
|
"loss": 0.6448, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.3385665529010239, |
|
"grad_norm": 0.19181552529335022, |
|
"learning_rate": 0.00019990093044552304, |
|
"loss": 0.6497, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3412969283276451, |
|
"grad_norm": 0.15803317725658417, |
|
"learning_rate": 0.00019988627497405696, |
|
"loss": 0.6116, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3440273037542662, |
|
"grad_norm": 0.2208717167377472, |
|
"learning_rate": 0.00019987060957251047, |
|
"loss": 0.6459, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.34675767918088735, |
|
"grad_norm": 0.20142869651317596, |
|
"learning_rate": 0.00019985393439927323, |
|
"loss": 0.6589, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.34948805460750854, |
|
"grad_norm": 0.17945925891399384, |
|
"learning_rate": 0.00019983624962294458, |
|
"loss": 0.6252, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.35221843003412967, |
|
"grad_norm": 0.22226247191429138, |
|
"learning_rate": 0.00019981755542233177, |
|
"loss": 0.6379, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.35494880546075086, |
|
"grad_norm": 0.1588139832019806, |
|
"learning_rate": 0.00019979785198644806, |
|
"loss": 0.6408, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.357679180887372, |
|
"grad_norm": 0.22639498114585876, |
|
"learning_rate": 0.00019977713951451102, |
|
"loss": 0.6434, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.3604095563139932, |
|
"grad_norm": 0.16015386581420898, |
|
"learning_rate": 0.00019975541821594026, |
|
"loss": 0.6151, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.3631399317406143, |
|
"grad_norm": 0.21671050786972046, |
|
"learning_rate": 0.00019973268831035545, |
|
"loss": 0.6357, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.3658703071672355, |
|
"grad_norm": 0.1871589720249176, |
|
"learning_rate": 0.00019970895002757413, |
|
"loss": 0.6436, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.36860068259385664, |
|
"grad_norm": 0.19527480006217957, |
|
"learning_rate": 0.00019968420360760926, |
|
"loss": 0.6308, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.37133105802047783, |
|
"grad_norm": 0.20158074796199799, |
|
"learning_rate": 0.000199658449300667, |
|
"loss": 0.6227, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.37406143344709897, |
|
"grad_norm": 0.15605966746807098, |
|
"learning_rate": 0.00019963168736714392, |
|
"loss": 0.615, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.37679180887372016, |
|
"grad_norm": 0.22042252123355865, |
|
"learning_rate": 0.00019960391807762463, |
|
"loss": 0.6263, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.3795221843003413, |
|
"grad_norm": 0.16206978261470795, |
|
"learning_rate": 0.00019957514171287875, |
|
"loss": 0.6182, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3822525597269625, |
|
"grad_norm": 0.2251751869916916, |
|
"learning_rate": 0.00019954535856385837, |
|
"loss": 0.6376, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3849829351535836, |
|
"grad_norm": 0.16586551070213318, |
|
"learning_rate": 0.00019951456893169497, |
|
"loss": 0.6285, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.38771331058020475, |
|
"grad_norm": 0.27427414059638977, |
|
"learning_rate": 0.0001994827731276963, |
|
"loss": 0.6397, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.39044368600682594, |
|
"grad_norm": 0.21177491545677185, |
|
"learning_rate": 0.00019944997147334337, |
|
"loss": 0.6034, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.3931740614334471, |
|
"grad_norm": 0.25477880239486694, |
|
"learning_rate": 0.0001994161643002871, |
|
"loss": 0.6199, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.39590443686006827, |
|
"grad_norm": 0.23290970921516418, |
|
"learning_rate": 0.00019938135195034508, |
|
"loss": 0.6201, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.3986348122866894, |
|
"grad_norm": 0.19198672473430634, |
|
"learning_rate": 0.00019934553477549794, |
|
"loss": 0.6213, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.4013651877133106, |
|
"grad_norm": 0.1911400705575943, |
|
"learning_rate": 0.000199308713137886, |
|
"loss": 0.6146, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.4040955631399317, |
|
"grad_norm": 0.18605491518974304, |
|
"learning_rate": 0.0001992708874098054, |
|
"loss": 0.6123, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.4068259385665529, |
|
"grad_norm": 0.18028293550014496, |
|
"learning_rate": 0.0001992320579737045, |
|
"loss": 0.6061, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.40955631399317405, |
|
"grad_norm": 0.1961037963628769, |
|
"learning_rate": 0.00019919222522217996, |
|
"loss": 0.622, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.41228668941979524, |
|
"grad_norm": 0.17400594055652618, |
|
"learning_rate": 0.00019915138955797272, |
|
"loss": 0.6138, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.4150170648464164, |
|
"grad_norm": 0.17892149090766907, |
|
"learning_rate": 0.00019910955139396396, |
|
"loss": 0.6242, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.41774744027303756, |
|
"grad_norm": 0.21851663291454315, |
|
"learning_rate": 0.000199066711153171, |
|
"loss": 0.5913, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.4204778156996587, |
|
"grad_norm": 0.1468774825334549, |
|
"learning_rate": 0.0001990228692687429, |
|
"loss": 0.6025, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.4232081911262799, |
|
"grad_norm": 0.1920468658208847, |
|
"learning_rate": 0.00019897802618395614, |
|
"loss": 0.6127, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.425938566552901, |
|
"grad_norm": 0.17375442385673523, |
|
"learning_rate": 0.00019893218235221015, |
|
"loss": 0.6211, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.4286689419795222, |
|
"grad_norm": 0.15414904057979584, |
|
"learning_rate": 0.00019888533823702277, |
|
"loss": 0.6183, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.43139931740614335, |
|
"grad_norm": 0.2245103418827057, |
|
"learning_rate": 0.0001988374943120254, |
|
"loss": 0.6248, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.4341296928327645, |
|
"grad_norm": 0.17193332314491272, |
|
"learning_rate": 0.00019878865106095835, |
|
"loss": 0.5969, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.43686006825938567, |
|
"grad_norm": 0.13767646253108978, |
|
"learning_rate": 0.00019873880897766598, |
|
"loss": 0.5943, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4395904436860068, |
|
"grad_norm": 0.1449906826019287, |
|
"learning_rate": 0.00019868796856609152, |
|
"loss": 0.573, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.442320819112628, |
|
"grad_norm": 0.1392473578453064, |
|
"learning_rate": 0.00019863613034027224, |
|
"loss": 0.5926, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.44505119453924913, |
|
"grad_norm": 0.1772463023662567, |
|
"learning_rate": 0.00019858329482433403, |
|
"loss": 0.6007, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.4477815699658703, |
|
"grad_norm": 0.13768768310546875, |
|
"learning_rate": 0.0001985294625524861, |
|
"loss": 0.5901, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.45051194539249145, |
|
"grad_norm": 0.1631435751914978, |
|
"learning_rate": 0.00019847463406901588, |
|
"loss": 0.5907, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.45324232081911264, |
|
"grad_norm": 0.14781758189201355, |
|
"learning_rate": 0.00019841880992828306, |
|
"loss": 0.5903, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.4559726962457338, |
|
"grad_norm": 0.13440802693367004, |
|
"learning_rate": 0.00019836199069471437, |
|
"loss": 0.5884, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.45870307167235497, |
|
"grad_norm": 0.1414463371038437, |
|
"learning_rate": 0.00019830417694279766, |
|
"loss": 0.598, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.4614334470989761, |
|
"grad_norm": 0.13185666501522064, |
|
"learning_rate": 0.0001982453692570762, |
|
"loss": 0.621, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.4641638225255973, |
|
"grad_norm": 0.14422471821308136, |
|
"learning_rate": 0.00019818556823214268, |
|
"loss": 0.6065, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4668941979522184, |
|
"grad_norm": 0.13765788078308105, |
|
"learning_rate": 0.00019812477447263326, |
|
"loss": 0.6073, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.4696245733788396, |
|
"grad_norm": 0.16028070449829102, |
|
"learning_rate": 0.0001980629885932214, |
|
"loss": 0.5767, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.47235494880546075, |
|
"grad_norm": 0.14638394117355347, |
|
"learning_rate": 0.00019800021121861182, |
|
"loss": 0.5971, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.4750853242320819, |
|
"grad_norm": 0.14843404293060303, |
|
"learning_rate": 0.0001979364429835339, |
|
"loss": 0.5894, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.4778156996587031, |
|
"grad_norm": 0.13411492109298706, |
|
"learning_rate": 0.00019787168453273544, |
|
"loss": 0.5757, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.4805460750853242, |
|
"grad_norm": 0.14304684102535248, |
|
"learning_rate": 0.0001978059365209762, |
|
"loss": 0.5846, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.4832764505119454, |
|
"grad_norm": 0.13569754362106323, |
|
"learning_rate": 0.00019773919961302113, |
|
"loss": 0.5872, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.48600682593856653, |
|
"grad_norm": 0.14318887889385223, |
|
"learning_rate": 0.00019767147448363366, |
|
"loss": 0.5804, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.4887372013651877, |
|
"grad_norm": 0.1457952857017517, |
|
"learning_rate": 0.00019760276181756903, |
|
"loss": 0.5973, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.49146757679180886, |
|
"grad_norm": 0.13820476830005646, |
|
"learning_rate": 0.00019753306230956718, |
|
"loss": 0.569, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49419795221843005, |
|
"grad_norm": 0.19338561594486237, |
|
"learning_rate": 0.00019746237666434587, |
|
"loss": 0.5723, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.4969283276450512, |
|
"grad_norm": 0.17352697253227234, |
|
"learning_rate": 0.00019739070559659347, |
|
"loss": 0.578, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.49965870307167237, |
|
"grad_norm": 0.15502339601516724, |
|
"learning_rate": 0.00019731804983096177, |
|
"loss": 0.5953, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.5023890784982935, |
|
"grad_norm": 0.18948784470558167, |
|
"learning_rate": 0.00019724441010205863, |
|
"loss": 0.5883, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.5051194539249146, |
|
"grad_norm": 0.17587606608867645, |
|
"learning_rate": 0.00019716978715444056, |
|
"loss": 0.5723, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5078498293515359, |
|
"grad_norm": 0.1599951833486557, |
|
"learning_rate": 0.0001970941817426052, |
|
"loss": 0.5799, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.510580204778157, |
|
"grad_norm": 0.1717846840620041, |
|
"learning_rate": 0.00019701759463098374, |
|
"loss": 0.5543, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5133105802047782, |
|
"grad_norm": 0.14032602310180664, |
|
"learning_rate": 0.00019694002659393305, |
|
"loss": 0.5845, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.5160409556313993, |
|
"grad_norm": 0.17668449878692627, |
|
"learning_rate": 0.000196861478415728, |
|
"loss": 0.6026, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.5187713310580204, |
|
"grad_norm": 0.17806965112686157, |
|
"learning_rate": 0.00019678195089055346, |
|
"loss": 0.5681, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5215017064846417, |
|
"grad_norm": 0.13321803510189056, |
|
"learning_rate": 0.00019670144482249627, |
|
"loss": 0.5586, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.5242320819112628, |
|
"grad_norm": 0.14684653282165527, |
|
"learning_rate": 0.00019661996102553718, |
|
"loss": 0.5589, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5269624573378839, |
|
"grad_norm": 0.1308140754699707, |
|
"learning_rate": 0.0001965375003235424, |
|
"loss": 0.568, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.5296928327645051, |
|
"grad_norm": 0.17461615800857544, |
|
"learning_rate": 0.00019645406355025565, |
|
"loss": 0.5757, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5324232081911263, |
|
"grad_norm": 0.15591022372245789, |
|
"learning_rate": 0.0001963696515492893, |
|
"loss": 0.5946, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5351535836177475, |
|
"grad_norm": 0.14174342155456543, |
|
"learning_rate": 0.00019628426517411625, |
|
"loss": 0.5839, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5378839590443686, |
|
"grad_norm": 0.15242989361286163, |
|
"learning_rate": 0.0001961979052880609, |
|
"loss": 0.5567, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.5406143344709897, |
|
"grad_norm": 0.16651766002178192, |
|
"learning_rate": 0.00019611057276429085, |
|
"loss": 0.5593, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.543344709897611, |
|
"grad_norm": 0.14858382940292358, |
|
"learning_rate": 0.00019602226848580763, |
|
"loss": 0.5848, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.5460750853242321, |
|
"grad_norm": 0.14774656295776367, |
|
"learning_rate": 0.00019593299334543808, |
|
"loss": 0.563, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5488054607508532, |
|
"grad_norm": 0.13993892073631287, |
|
"learning_rate": 0.0001958427482458253, |
|
"loss": 0.5742, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.5515358361774744, |
|
"grad_norm": 0.15201717615127563, |
|
"learning_rate": 0.0001957515340994193, |
|
"loss": 0.5726, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.5542662116040956, |
|
"grad_norm": 0.1567879170179367, |
|
"learning_rate": 0.00019565935182846802, |
|
"loss": 0.5707, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.5569965870307167, |
|
"grad_norm": 0.13955365121364594, |
|
"learning_rate": 0.00019556620236500793, |
|
"loss": 0.5339, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.5597269624573379, |
|
"grad_norm": 0.1425381898880005, |
|
"learning_rate": 0.00019547208665085457, |
|
"loss": 0.5698, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.562457337883959, |
|
"grad_norm": 0.14695167541503906, |
|
"learning_rate": 0.00019537700563759304, |
|
"loss": 0.578, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.5651877133105802, |
|
"grad_norm": 0.15581448376178741, |
|
"learning_rate": 0.00019528096028656832, |
|
"loss": 0.5552, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.5679180887372014, |
|
"grad_norm": 0.13141174614429474, |
|
"learning_rate": 0.00019518395156887576, |
|
"loss": 0.5598, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.5706484641638225, |
|
"grad_norm": 0.16678418219089508, |
|
"learning_rate": 0.00019508598046535095, |
|
"loss": 0.5485, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.5733788395904437, |
|
"grad_norm": 0.1717272400856018, |
|
"learning_rate": 0.00019498704796656018, |
|
"loss": 0.5849, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5761092150170648, |
|
"grad_norm": 0.14453086256980896, |
|
"learning_rate": 0.00019488715507278998, |
|
"loss": 0.5757, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.578839590443686, |
|
"grad_norm": 0.16329538822174072, |
|
"learning_rate": 0.0001947863027940374, |
|
"loss": 0.5632, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.5815699658703072, |
|
"grad_norm": 0.15865112841129303, |
|
"learning_rate": 0.00019468449214999955, |
|
"loss": 0.5728, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.5843003412969283, |
|
"grad_norm": 0.15316785871982574, |
|
"learning_rate": 0.00019458172417006347, |
|
"loss": 0.5556, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.5870307167235495, |
|
"grad_norm": 0.16666734218597412, |
|
"learning_rate": 0.00019447799989329555, |
|
"loss": 0.5759, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.5897610921501707, |
|
"grad_norm": 0.16525249183177948, |
|
"learning_rate": 0.00019437332036843118, |
|
"loss": 0.5667, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.5924914675767918, |
|
"grad_norm": 0.14022761583328247, |
|
"learning_rate": 0.00019426768665386398, |
|
"loss": 0.5611, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.595221843003413, |
|
"grad_norm": 0.15930500626564026, |
|
"learning_rate": 0.00019416109981763526, |
|
"loss": 0.5414, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.5979522184300341, |
|
"grad_norm": 0.141464963555336, |
|
"learning_rate": 0.00019405356093742313, |
|
"loss": 0.5363, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6006825938566553, |
|
"grad_norm": 0.1541200429201126, |
|
"learning_rate": 0.0001939450711005316, |
|
"loss": 0.5487, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6034129692832765, |
|
"grad_norm": 0.13717712461948395, |
|
"learning_rate": 0.00019383563140387965, |
|
"loss": 0.5564, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.6061433447098976, |
|
"grad_norm": 0.14139863848686218, |
|
"learning_rate": 0.00019372524295399013, |
|
"loss": 0.5592, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.6088737201365187, |
|
"grad_norm": 0.13494791090488434, |
|
"learning_rate": 0.00019361390686697846, |
|
"loss": 0.5452, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.6116040955631399, |
|
"grad_norm": 0.1512797623872757, |
|
"learning_rate": 0.0001935016242685415, |
|
"loss": 0.5595, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.6143344709897611, |
|
"grad_norm": 0.1422545164823532, |
|
"learning_rate": 0.00019338839629394605, |
|
"loss": 0.5602, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6170648464163823, |
|
"grad_norm": 0.14444862306118011, |
|
"learning_rate": 0.00019327422408801744, |
|
"loss": 0.5574, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.6197952218430034, |
|
"grad_norm": 0.15434536337852478, |
|
"learning_rate": 0.0001931591088051279, |
|
"loss": 0.5467, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.6225255972696245, |
|
"grad_norm": 0.1420368254184723, |
|
"learning_rate": 0.000193043051609185, |
|
"loss": 0.5519, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.6252559726962458, |
|
"grad_norm": 0.12724490463733673, |
|
"learning_rate": 0.00019292605367361978, |
|
"loss": 0.5779, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.6279863481228669, |
|
"grad_norm": 0.13820625841617584, |
|
"learning_rate": 0.00019280811618137484, |
|
"loss": 0.5468, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.630716723549488, |
|
"grad_norm": 0.1629246473312378, |
|
"learning_rate": 0.00019268924032489248, |
|
"loss": 0.5721, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.6334470989761092, |
|
"grad_norm": 0.193836510181427, |
|
"learning_rate": 0.00019256942730610268, |
|
"loss": 0.5392, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.6361774744027304, |
|
"grad_norm": 0.18511579930782318, |
|
"learning_rate": 0.0001924486783364108, |
|
"loss": 0.5718, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.6389078498293516, |
|
"grad_norm": 0.14278100430965424, |
|
"learning_rate": 0.00019232699463668542, |
|
"loss": 0.5589, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.6416382252559727, |
|
"grad_norm": 0.14693580567836761, |
|
"learning_rate": 0.00019220437743724605, |
|
"loss": 0.515, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6443686006825938, |
|
"grad_norm": 0.20018483698368073, |
|
"learning_rate": 0.00019208082797785055, |
|
"loss": 0.5484, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.647098976109215, |
|
"grad_norm": 0.1597984880208969, |
|
"learning_rate": 0.00019195634750768275, |
|
"loss": 0.539, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.6498293515358362, |
|
"grad_norm": 0.14270828664302826, |
|
"learning_rate": 0.00019183093728533966, |
|
"loss": 0.5496, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.6525597269624573, |
|
"grad_norm": 0.1788954734802246, |
|
"learning_rate": 0.0001917045985788189, |
|
"loss": 0.5629, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.6552901023890785, |
|
"grad_norm": 0.15234531462192535, |
|
"learning_rate": 0.00019157733266550575, |
|
"loss": 0.5454, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6580204778156996, |
|
"grad_norm": 0.17805363237857819, |
|
"learning_rate": 0.00019144914083216034, |
|
"loss": 0.5599, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.6607508532423209, |
|
"grad_norm": 0.18313759565353394, |
|
"learning_rate": 0.00019132002437490458, |
|
"loss": 0.5544, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.663481228668942, |
|
"grad_norm": 0.16873271763324738, |
|
"learning_rate": 0.00019118998459920902, |
|
"loss": 0.5445, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.6662116040955631, |
|
"grad_norm": 0.23297423124313354, |
|
"learning_rate": 0.00019105902281987976, |
|
"loss": 0.5623, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.6689419795221843, |
|
"grad_norm": 0.13279114663600922, |
|
"learning_rate": 0.00019092714036104508, |
|
"loss": 0.5354, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.6716723549488055, |
|
"grad_norm": 0.18942788243293762, |
|
"learning_rate": 0.00019079433855614201, |
|
"loss": 0.5574, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.6744027303754266, |
|
"grad_norm": 0.15293289721012115, |
|
"learning_rate": 0.00019066061874790303, |
|
"loss": 0.5473, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.6771331058020478, |
|
"grad_norm": 0.1669221818447113, |
|
"learning_rate": 0.00019052598228834217, |
|
"loss": 0.5272, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.6798634812286689, |
|
"grad_norm": 0.16368111968040466, |
|
"learning_rate": 0.00019039043053874175, |
|
"loss": 0.5387, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.6825938566552902, |
|
"grad_norm": 0.15945561230182648, |
|
"learning_rate": 0.00019025396486963827, |
|
"loss": 0.548, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6853242320819113, |
|
"grad_norm": 0.1538572758436203, |
|
"learning_rate": 0.00019011658666080874, |
|
"loss": 0.5419, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.6880546075085324, |
|
"grad_norm": 0.13356320559978485, |
|
"learning_rate": 0.00018997829730125663, |
|
"loss": 0.5271, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.6907849829351536, |
|
"grad_norm": 0.1389850527048111, |
|
"learning_rate": 0.0001898390981891979, |
|
"loss": 0.5489, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.6935153583617747, |
|
"grad_norm": 0.14726200699806213, |
|
"learning_rate": 0.00018969899073204686, |
|
"loss": 0.554, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.6962457337883959, |
|
"grad_norm": 0.13865283131599426, |
|
"learning_rate": 0.0001895579763464019, |
|
"loss": 0.5486, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.6989761092150171, |
|
"grad_norm": 0.15216147899627686, |
|
"learning_rate": 0.00018941605645803115, |
|
"loss": 0.5156, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.7017064846416382, |
|
"grad_norm": 0.13091793656349182, |
|
"learning_rate": 0.00018927323250185815, |
|
"loss": 0.5359, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.7044368600682593, |
|
"grad_norm": 0.14097946882247925, |
|
"learning_rate": 0.0001891295059219472, |
|
"loss": 0.5367, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.7071672354948806, |
|
"grad_norm": 0.1434548795223236, |
|
"learning_rate": 0.00018898487817148898, |
|
"loss": 0.5395, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.7098976109215017, |
|
"grad_norm": 0.15296564996242523, |
|
"learning_rate": 0.0001888393507127856, |
|
"loss": 0.5552, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7126279863481229, |
|
"grad_norm": 0.15522688627243042, |
|
"learning_rate": 0.000188692925017236, |
|
"loss": 0.5433, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.715358361774744, |
|
"grad_norm": 0.1461726576089859, |
|
"learning_rate": 0.000188545602565321, |
|
"loss": 0.5291, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.7180887372013652, |
|
"grad_norm": 0.14327403903007507, |
|
"learning_rate": 0.00018839738484658836, |
|
"loss": 0.5502, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.7208191126279864, |
|
"grad_norm": 0.1613275706768036, |
|
"learning_rate": 0.00018824827335963765, |
|
"loss": 0.5565, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.7235494880546075, |
|
"grad_norm": 0.2040315866470337, |
|
"learning_rate": 0.00018809826961210525, |
|
"loss": 0.5324, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.7262798634812286, |
|
"grad_norm": 0.13186219334602356, |
|
"learning_rate": 0.0001879473751206489, |
|
"loss": 0.5395, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.7290102389078499, |
|
"grad_norm": 0.15462934970855713, |
|
"learning_rate": 0.00018779559141093258, |
|
"loss": 0.527, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.731740614334471, |
|
"grad_norm": 0.155660942196846, |
|
"learning_rate": 0.0001876429200176108, |
|
"loss": 0.5429, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.7344709897610922, |
|
"grad_norm": 0.15280231833457947, |
|
"learning_rate": 0.00018748936248431353, |
|
"loss": 0.5571, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.7372013651877133, |
|
"grad_norm": 0.19389967620372772, |
|
"learning_rate": 0.00018733492036363005, |
|
"loss": 0.5325, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7399317406143344, |
|
"grad_norm": 0.14486828446388245, |
|
"learning_rate": 0.0001871795952170937, |
|
"loss": 0.5229, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.7426621160409557, |
|
"grad_norm": 0.18223008513450623, |
|
"learning_rate": 0.00018702338861516587, |
|
"loss": 0.538, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.7453924914675768, |
|
"grad_norm": 0.1434670090675354, |
|
"learning_rate": 0.00018686630213722016, |
|
"loss": 0.5353, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.7481228668941979, |
|
"grad_norm": 0.1796412169933319, |
|
"learning_rate": 0.0001867083373715264, |
|
"loss": 0.5221, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.7508532423208191, |
|
"grad_norm": 0.14365307986736298, |
|
"learning_rate": 0.00018654949591523465, |
|
"loss": 0.5211, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.7535836177474403, |
|
"grad_norm": 0.15224212408065796, |
|
"learning_rate": 0.000186389779374359, |
|
"loss": 0.5353, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.7563139931740614, |
|
"grad_norm": 0.1557937115430832, |
|
"learning_rate": 0.00018622918936376132, |
|
"loss": 0.5359, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.7590443686006826, |
|
"grad_norm": 0.13806480169296265, |
|
"learning_rate": 0.00018606772750713504, |
|
"loss": 0.5404, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.7617747440273037, |
|
"grad_norm": 0.14786981046199799, |
|
"learning_rate": 0.00018590539543698854, |
|
"loss": 0.5377, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.764505119453925, |
|
"grad_norm": 0.14674220979213715, |
|
"learning_rate": 0.00018574219479462878, |
|
"loss": 0.531, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7672354948805461, |
|
"grad_norm": 0.146319180727005, |
|
"learning_rate": 0.00018557812723014476, |
|
"loss": 0.5244, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.7699658703071672, |
|
"grad_norm": 0.15404731035232544, |
|
"learning_rate": 0.00018541319440239066, |
|
"loss": 0.5419, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.7726962457337884, |
|
"grad_norm": 0.13534784317016602, |
|
"learning_rate": 0.00018524739797896923, |
|
"loss": 0.5162, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.7754266211604095, |
|
"grad_norm": 0.14462386071681976, |
|
"learning_rate": 0.0001850807396362148, |
|
"loss": 0.5338, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.7781569965870307, |
|
"grad_norm": 0.126676544547081, |
|
"learning_rate": 0.00018491322105917645, |
|
"loss": 0.5291, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.7808873720136519, |
|
"grad_norm": 0.1487646847963333, |
|
"learning_rate": 0.0001847448439416009, |
|
"loss": 0.5235, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.783617747440273, |
|
"grad_norm": 0.14197687804698944, |
|
"learning_rate": 0.00018457560998591538, |
|
"loss": 0.518, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.7863481228668942, |
|
"grad_norm": 0.13646024465560913, |
|
"learning_rate": 0.00018440552090321047, |
|
"loss": 0.5425, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.7890784982935154, |
|
"grad_norm": 0.15339186787605286, |
|
"learning_rate": 0.00018423457841322273, |
|
"loss": 0.5203, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.7918088737201365, |
|
"grad_norm": 0.14754898846149445, |
|
"learning_rate": 0.00018406278424431736, |
|
"loss": 0.5259, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7945392491467577, |
|
"grad_norm": 0.1315944641828537, |
|
"learning_rate": 0.00018389014013347078, |
|
"loss": 0.5248, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.7972696245733788, |
|
"grad_norm": 0.14218468964099884, |
|
"learning_rate": 0.00018371664782625287, |
|
"loss": 0.5328, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.1456083059310913, |
|
"learning_rate": 0.00018354230907680958, |
|
"loss": 0.5352, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.8027303754266212, |
|
"grad_norm": 0.14852645993232727, |
|
"learning_rate": 0.00018336712564784503, |
|
"loss": 0.5379, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.8054607508532423, |
|
"grad_norm": 0.17426247894763947, |
|
"learning_rate": 0.0001831910993106037, |
|
"loss": 0.5358, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.8081911262798634, |
|
"grad_norm": 0.14848141372203827, |
|
"learning_rate": 0.0001830142318448525, |
|
"loss": 0.5377, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.8109215017064847, |
|
"grad_norm": 0.14610137045383453, |
|
"learning_rate": 0.000182836525038863, |
|
"loss": 0.5331, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.8136518771331058, |
|
"grad_norm": 0.1565829962491989, |
|
"learning_rate": 0.00018265798068939294, |
|
"loss": 0.5162, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.816382252559727, |
|
"grad_norm": 0.1328669935464859, |
|
"learning_rate": 0.0001824786006016685, |
|
"loss": 0.5045, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.8191126279863481, |
|
"grad_norm": 0.14329080283641815, |
|
"learning_rate": 0.00018229838658936564, |
|
"loss": 0.5083, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8218430034129692, |
|
"grad_norm": 0.13927890360355377, |
|
"learning_rate": 0.00018211734047459216, |
|
"loss": 0.5221, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.8245733788395905, |
|
"grad_norm": 0.14164070785045624, |
|
"learning_rate": 0.00018193546408786898, |
|
"loss": 0.5243, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.8273037542662116, |
|
"grad_norm": 0.14735499024391174, |
|
"learning_rate": 0.00018175275926811174, |
|
"loss": 0.5136, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.8300341296928327, |
|
"grad_norm": 0.1411520093679428, |
|
"learning_rate": 0.00018156922786261216, |
|
"loss": 0.5164, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.8327645051194539, |
|
"grad_norm": 0.14603470265865326, |
|
"learning_rate": 0.0001813848717270195, |
|
"loss": 0.5306, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.8354948805460751, |
|
"grad_norm": 0.13602910935878754, |
|
"learning_rate": 0.00018119969272532166, |
|
"loss": 0.5014, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.8382252559726963, |
|
"grad_norm": 0.15259377658367157, |
|
"learning_rate": 0.00018101369272982632, |
|
"loss": 0.5309, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.8409556313993174, |
|
"grad_norm": 0.13250286877155304, |
|
"learning_rate": 0.00018082687362114212, |
|
"loss": 0.5034, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.8436860068259385, |
|
"grad_norm": 0.15984928607940674, |
|
"learning_rate": 0.00018063923728815957, |
|
"loss": 0.5167, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.8464163822525598, |
|
"grad_norm": 0.14640702307224274, |
|
"learning_rate": 0.00018045078562803203, |
|
"loss": 0.5361, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8491467576791809, |
|
"grad_norm": 0.1568063646554947, |
|
"learning_rate": 0.00018026152054615634, |
|
"loss": 0.5221, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.851877133105802, |
|
"grad_norm": 0.14193738996982574, |
|
"learning_rate": 0.0001800714439561538, |
|
"loss": 0.5151, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.8546075085324232, |
|
"grad_norm": 0.15847285091876984, |
|
"learning_rate": 0.00017988055777985067, |
|
"loss": 0.4923, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.8573378839590444, |
|
"grad_norm": 0.16404108703136444, |
|
"learning_rate": 0.00017968886394725874, |
|
"loss": 0.5114, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.8600682593856656, |
|
"grad_norm": 0.22749370336532593, |
|
"learning_rate": 0.00017949636439655592, |
|
"loss": 0.5225, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.8627986348122867, |
|
"grad_norm": 0.17752817273139954, |
|
"learning_rate": 0.00017930306107406653, |
|
"loss": 0.5138, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.8655290102389078, |
|
"grad_norm": 0.16554813086986542, |
|
"learning_rate": 0.00017910895593424163, |
|
"loss": 0.5087, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.868259385665529, |
|
"grad_norm": 0.1888076812028885, |
|
"learning_rate": 0.00017891405093963938, |
|
"loss": 0.5163, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.8709897610921502, |
|
"grad_norm": 0.15817560255527496, |
|
"learning_rate": 0.00017871834806090501, |
|
"loss": 0.4918, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.8737201365187713, |
|
"grad_norm": 0.15167172253131866, |
|
"learning_rate": 0.00017852184927675112, |
|
"loss": 0.5248, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8764505119453925, |
|
"grad_norm": 0.15847422182559967, |
|
"learning_rate": 0.00017832455657393746, |
|
"loss": 0.5205, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.8791808873720136, |
|
"grad_norm": 0.1436643898487091, |
|
"learning_rate": 0.00017812647194725094, |
|
"loss": 0.4869, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.8819112627986349, |
|
"grad_norm": 0.1533607393503189, |
|
"learning_rate": 0.00017792759739948546, |
|
"loss": 0.5141, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.884641638225256, |
|
"grad_norm": 0.1520746648311615, |
|
"learning_rate": 0.00017772793494142167, |
|
"loss": 0.5158, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.8873720136518771, |
|
"grad_norm": 0.1398312896490097, |
|
"learning_rate": 0.0001775274865918066, |
|
"loss": 0.5126, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.8901023890784983, |
|
"grad_norm": 0.14314454793930054, |
|
"learning_rate": 0.00017732625437733335, |
|
"loss": 0.5187, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.8928327645051195, |
|
"grad_norm": 0.1434595286846161, |
|
"learning_rate": 0.00017712424033262042, |
|
"loss": 0.5018, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.8955631399317406, |
|
"grad_norm": 0.13712714612483978, |
|
"learning_rate": 0.00017692144650019125, |
|
"loss": 0.5044, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.8982935153583618, |
|
"grad_norm": 0.13560262322425842, |
|
"learning_rate": 0.00017671787493045356, |
|
"loss": 0.5153, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.9010238907849829, |
|
"grad_norm": 0.15035240352153778, |
|
"learning_rate": 0.0001765135276816787, |
|
"loss": 0.5023, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.903754266211604, |
|
"grad_norm": 0.14878690242767334, |
|
"learning_rate": 0.00017630840681998066, |
|
"loss": 0.4916, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.9064846416382253, |
|
"grad_norm": 0.14930297434329987, |
|
"learning_rate": 0.00017610251441929533, |
|
"loss": 0.5092, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.9092150170648464, |
|
"grad_norm": 0.14058218896389008, |
|
"learning_rate": 0.0001758958525613594, |
|
"loss": 0.5091, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.9119453924914676, |
|
"grad_norm": 0.18505944311618805, |
|
"learning_rate": 0.00017568842333568952, |
|
"loss": 0.5047, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.9146757679180887, |
|
"grad_norm": 0.16390664875507355, |
|
"learning_rate": 0.0001754802288395609, |
|
"loss": 0.5115, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.9174061433447099, |
|
"grad_norm": 0.16126009821891785, |
|
"learning_rate": 0.00017527127117798635, |
|
"loss": 0.5017, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.9201365187713311, |
|
"grad_norm": 0.16674400866031647, |
|
"learning_rate": 0.0001750615524636948, |
|
"loss": 0.5193, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.9228668941979522, |
|
"grad_norm": 0.13656651973724365, |
|
"learning_rate": 0.00017485107481711012, |
|
"loss": 0.4909, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.9255972696245733, |
|
"grad_norm": 0.15961140394210815, |
|
"learning_rate": 0.00017463984036632954, |
|
"loss": 0.5145, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.9283276450511946, |
|
"grad_norm": 0.13122031092643738, |
|
"learning_rate": 0.00017442785124710227, |
|
"loss": 0.4854, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9310580204778157, |
|
"grad_norm": 0.14256154000759125, |
|
"learning_rate": 0.0001742151096028076, |
|
"loss": 0.4852, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.9337883959044369, |
|
"grad_norm": 0.14853668212890625, |
|
"learning_rate": 0.00017400161758443375, |
|
"loss": 0.5202, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.936518771331058, |
|
"grad_norm": 0.16885364055633545, |
|
"learning_rate": 0.00017378737735055562, |
|
"loss": 0.5136, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.9392491467576792, |
|
"grad_norm": 0.1398458182811737, |
|
"learning_rate": 0.00017357239106731317, |
|
"loss": 0.4908, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.9419795221843004, |
|
"grad_norm": 0.1871606856584549, |
|
"learning_rate": 0.00017335666090838965, |
|
"loss": 0.5076, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.9447098976109215, |
|
"grad_norm": 0.17375800013542175, |
|
"learning_rate": 0.00017314018905498931, |
|
"loss": 0.5032, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.9474402730375426, |
|
"grad_norm": 0.14859388768672943, |
|
"learning_rate": 0.00017292297769581571, |
|
"loss": 0.5119, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.9501706484641638, |
|
"grad_norm": 0.18141716718673706, |
|
"learning_rate": 0.00017270502902704926, |
|
"loss": 0.4982, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.952901023890785, |
|
"grad_norm": 0.17015349864959717, |
|
"learning_rate": 0.00017248634525232523, |
|
"loss": 0.4835, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.9556313993174061, |
|
"grad_norm": 0.14770372211933136, |
|
"learning_rate": 0.00017226692858271134, |
|
"loss": 0.4851, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9583617747440273, |
|
"grad_norm": 0.19386035203933716, |
|
"learning_rate": 0.00017204678123668556, |
|
"loss": 0.5172, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.9610921501706484, |
|
"grad_norm": 0.15225833654403687, |
|
"learning_rate": 0.00017182590544011347, |
|
"loss": 0.4972, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.9638225255972697, |
|
"grad_norm": 0.1522843837738037, |
|
"learning_rate": 0.00017160430342622596, |
|
"loss": 0.493, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.9665529010238908, |
|
"grad_norm": 0.13483871519565582, |
|
"learning_rate": 0.00017138197743559654, |
|
"loss": 0.4882, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.9692832764505119, |
|
"grad_norm": 0.14176194369792938, |
|
"learning_rate": 0.00017115892971611863, |
|
"loss": 0.4921, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.9720136518771331, |
|
"grad_norm": 0.15088023245334625, |
|
"learning_rate": 0.00017093516252298296, |
|
"loss": 0.5073, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.9747440273037543, |
|
"grad_norm": 0.14758853614330292, |
|
"learning_rate": 0.00017071067811865476, |
|
"loss": 0.5106, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.9774744027303754, |
|
"grad_norm": 0.14779959619045258, |
|
"learning_rate": 0.00017048547877285077, |
|
"loss": 0.5066, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.9802047781569966, |
|
"grad_norm": 0.14550894498825073, |
|
"learning_rate": 0.00017025956676251636, |
|
"loss": 0.484, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.9829351535836177, |
|
"grad_norm": 0.14111794531345367, |
|
"learning_rate": 0.00017003294437180255, |
|
"loss": 0.4719, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.985665529010239, |
|
"grad_norm": 0.15354299545288086, |
|
"learning_rate": 0.00016980561389204284, |
|
"loss": 0.4874, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.9883959044368601, |
|
"grad_norm": 0.14522601664066315, |
|
"learning_rate": 0.0001695775776217301, |
|
"loss": 0.4722, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.9911262798634812, |
|
"grad_norm": 0.143354594707489, |
|
"learning_rate": 0.00016934883786649333, |
|
"loss": 0.5038, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.9938566552901024, |
|
"grad_norm": 0.14298783242702484, |
|
"learning_rate": 0.0001691193969390742, |
|
"loss": 0.5007, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.9965870307167235, |
|
"grad_norm": 0.14418889582157135, |
|
"learning_rate": 0.00016888925715930394, |
|
"loss": 0.5054, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.9993174061433447, |
|
"grad_norm": 0.16697633266448975, |
|
"learning_rate": 0.0001686584208540797, |
|
"loss": 0.4948, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.0020477815699658, |
|
"grad_norm": 0.4087926745414734, |
|
"learning_rate": 0.000168426890357341, |
|
"loss": 0.8762, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.004778156996587, |
|
"grad_norm": 0.17126424610614777, |
|
"learning_rate": 0.00016819466801004621, |
|
"loss": 0.4964, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.0075085324232083, |
|
"grad_norm": 0.18887566030025482, |
|
"learning_rate": 0.00016796175616014893, |
|
"loss": 0.4842, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.0102389078498293, |
|
"grad_norm": 0.16404148936271667, |
|
"learning_rate": 0.00016772815716257412, |
|
"loss": 0.4761, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.0129692832764505, |
|
"grad_norm": 0.2001708298921585, |
|
"learning_rate": 0.00016749387337919433, |
|
"loss": 0.5005, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.0156996587030718, |
|
"grad_norm": 0.16301579773426056, |
|
"learning_rate": 0.0001672589071788059, |
|
"loss": 0.497, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.0184300341296928, |
|
"grad_norm": 0.15777342021465302, |
|
"learning_rate": 0.0001670232609371049, |
|
"loss": 0.4771, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.021160409556314, |
|
"grad_norm": 0.14179396629333496, |
|
"learning_rate": 0.00016678693703666325, |
|
"loss": 0.4491, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.023890784982935, |
|
"grad_norm": 0.16155582666397095, |
|
"learning_rate": 0.00016654993786690444, |
|
"loss": 0.4846, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.0266211604095563, |
|
"grad_norm": 0.15554013848304749, |
|
"learning_rate": 0.00016631226582407952, |
|
"loss": 0.4663, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.0293515358361776, |
|
"grad_norm": 0.15946047008037567, |
|
"learning_rate": 0.00016607392331124282, |
|
"loss": 0.4978, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.0320819112627986, |
|
"grad_norm": 0.14758449792861938, |
|
"learning_rate": 0.00016583491273822765, |
|
"loss": 0.4868, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.0348122866894198, |
|
"grad_norm": 0.16146820783615112, |
|
"learning_rate": 0.0001655952365216219, |
|
"loss": 0.5056, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.0375426621160408, |
|
"grad_norm": 0.14648115634918213, |
|
"learning_rate": 0.0001653548970847438, |
|
"loss": 0.5028, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.040273037542662, |
|
"grad_norm": 0.14611080288887024, |
|
"learning_rate": 0.00016511389685761708, |
|
"loss": 0.4975, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.0430034129692833, |
|
"grad_norm": 0.13934309780597687, |
|
"learning_rate": 0.00016487223827694672, |
|
"loss": 0.4881, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.0457337883959044, |
|
"grad_norm": 0.1680765300989151, |
|
"learning_rate": 0.00016462992378609407, |
|
"loss": 0.4947, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.0484641638225256, |
|
"grad_norm": 0.16694432497024536, |
|
"learning_rate": 0.00016438695583505242, |
|
"loss": 0.5, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.0511945392491469, |
|
"grad_norm": 0.13027995824813843, |
|
"learning_rate": 0.00016414333688042186, |
|
"loss": 0.4783, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.0539249146757679, |
|
"grad_norm": 0.15040864050388336, |
|
"learning_rate": 0.0001638990693853848, |
|
"loss": 0.4743, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.0566552901023891, |
|
"grad_norm": 0.17924848198890686, |
|
"learning_rate": 0.00016365415581968084, |
|
"loss": 0.493, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.0593856655290101, |
|
"grad_norm": 0.14788490533828735, |
|
"learning_rate": 0.0001634085986595819, |
|
"loss": 0.4932, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.0621160409556314, |
|
"grad_norm": 0.1745985448360443, |
|
"learning_rate": 0.00016316240038786718, |
|
"loss": 0.5192, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.0648464163822526, |
|
"grad_norm": 0.1356726437807083, |
|
"learning_rate": 0.00016291556349379795, |
|
"loss": 0.4829, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0675767918088737, |
|
"grad_norm": 0.1533443182706833, |
|
"learning_rate": 0.00016266809047309251, |
|
"loss": 0.4881, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.070307167235495, |
|
"grad_norm": 0.16772933304309845, |
|
"learning_rate": 0.00016241998382790095, |
|
"loss": 0.4967, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.073037542662116, |
|
"grad_norm": 0.1441749781370163, |
|
"learning_rate": 0.0001621712460667797, |
|
"loss": 0.4867, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.0757679180887372, |
|
"grad_norm": 0.15063367784023285, |
|
"learning_rate": 0.00016192187970466644, |
|
"loss": 0.4916, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.0784982935153584, |
|
"grad_norm": 0.14568567276000977, |
|
"learning_rate": 0.00016167188726285434, |
|
"loss": 0.488, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.0812286689419794, |
|
"grad_norm": 0.14870832860469818, |
|
"learning_rate": 0.0001614212712689668, |
|
"loss": 0.4867, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.0839590443686007, |
|
"grad_norm": 0.15368396043777466, |
|
"learning_rate": 0.00016117003425693188, |
|
"loss": 0.4766, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.086689419795222, |
|
"grad_norm": 0.163841113448143, |
|
"learning_rate": 0.00016091817876695655, |
|
"loss": 0.4803, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.089419795221843, |
|
"grad_norm": 0.17485839128494263, |
|
"learning_rate": 0.0001606657073455012, |
|
"loss": 0.4875, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.0921501706484642, |
|
"grad_norm": 0.14765095710754395, |
|
"learning_rate": 0.00016041262254525362, |
|
"loss": 0.5058, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0948805460750852, |
|
"grad_norm": 0.1618352234363556, |
|
"learning_rate": 0.0001601589269251035, |
|
"loss": 0.4903, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.0976109215017065, |
|
"grad_norm": 0.1456744223833084, |
|
"learning_rate": 0.0001599046230501163, |
|
"loss": 0.4822, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.1003412969283277, |
|
"grad_norm": 0.15738914906978607, |
|
"learning_rate": 0.00015964971349150746, |
|
"loss": 0.4944, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.1030716723549487, |
|
"grad_norm": 0.1475927084684372, |
|
"learning_rate": 0.0001593942008266164, |
|
"loss": 0.4971, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.10580204778157, |
|
"grad_norm": 0.14080214500427246, |
|
"learning_rate": 0.00015913808763888039, |
|
"loss": 0.4864, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.108532423208191, |
|
"grad_norm": 0.16853410005569458, |
|
"learning_rate": 0.00015888137651780845, |
|
"loss": 0.4891, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.1112627986348123, |
|
"grad_norm": 0.13720810413360596, |
|
"learning_rate": 0.00015862407005895522, |
|
"loss": 0.4822, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.1139931740614335, |
|
"grad_norm": 0.16895629465579987, |
|
"learning_rate": 0.00015836617086389468, |
|
"loss": 0.4598, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.1167235494880545, |
|
"grad_norm": 0.15242214500904083, |
|
"learning_rate": 0.00015810768154019385, |
|
"loss": 0.4761, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.1194539249146758, |
|
"grad_norm": 0.1633676141500473, |
|
"learning_rate": 0.00015784860470138633, |
|
"loss": 0.4912, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.122184300341297, |
|
"grad_norm": 0.16565294563770294, |
|
"learning_rate": 0.00015758894296694615, |
|
"loss": 0.4836, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.124914675767918, |
|
"grad_norm": 0.160204216837883, |
|
"learning_rate": 0.00015732869896226094, |
|
"loss": 0.4694, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.1276450511945393, |
|
"grad_norm": 0.19067788124084473, |
|
"learning_rate": 0.00015706787531860557, |
|
"loss": 0.4853, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.1303754266211605, |
|
"grad_norm": 0.14547327160835266, |
|
"learning_rate": 0.00015680647467311557, |
|
"loss": 0.4709, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.1331058020477816, |
|
"grad_norm": 0.18047383427619934, |
|
"learning_rate": 0.0001565444996687605, |
|
"loss": 0.4738, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.1358361774744028, |
|
"grad_norm": 0.16099834442138672, |
|
"learning_rate": 0.00015628195295431697, |
|
"loss": 0.4805, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.1385665529010238, |
|
"grad_norm": 0.1426517814397812, |
|
"learning_rate": 0.00015601883718434207, |
|
"loss": 0.4573, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.141296928327645, |
|
"grad_norm": 0.14879460632801056, |
|
"learning_rate": 0.00015575515501914668, |
|
"loss": 0.4778, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.144027303754266, |
|
"grad_norm": 0.1393750160932541, |
|
"learning_rate": 0.0001554909091247682, |
|
"loss": 0.447, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.1467576791808873, |
|
"grad_norm": 0.17599152028560638, |
|
"learning_rate": 0.00015522610217294375, |
|
"loss": 0.4969, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.1494880546075086, |
|
"grad_norm": 0.17399606108665466, |
|
"learning_rate": 0.0001549607368410834, |
|
"loss": 0.4933, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.1522184300341296, |
|
"grad_norm": 0.16555064916610718, |
|
"learning_rate": 0.00015469481581224272, |
|
"loss": 0.475, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.1549488054607508, |
|
"grad_norm": 0.20848453044891357, |
|
"learning_rate": 0.00015442834177509582, |
|
"loss": 0.4872, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.157679180887372, |
|
"grad_norm": 0.15129271149635315, |
|
"learning_rate": 0.00015416131742390827, |
|
"loss": 0.4963, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.1604095563139931, |
|
"grad_norm": 0.1703735888004303, |
|
"learning_rate": 0.00015389374545850973, |
|
"loss": 0.479, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.1631399317406144, |
|
"grad_norm": 0.152608722448349, |
|
"learning_rate": 0.00015362562858426654, |
|
"loss": 0.4831, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.1658703071672356, |
|
"grad_norm": 0.14749537408351898, |
|
"learning_rate": 0.0001533569695120547, |
|
"loss": 0.4784, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.1686006825938566, |
|
"grad_norm": 0.15642118453979492, |
|
"learning_rate": 0.0001530877709582321, |
|
"loss": 0.4679, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.1713310580204779, |
|
"grad_norm": 0.13505250215530396, |
|
"learning_rate": 0.00015281803564461135, |
|
"loss": 0.4779, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.174061433447099, |
|
"grad_norm": 0.14518040418624878, |
|
"learning_rate": 0.00015254776629843205, |
|
"loss": 0.448, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.1767918088737201, |
|
"grad_norm": 0.12947289645671844, |
|
"learning_rate": 0.0001522769656523333, |
|
"loss": 0.4735, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.1795221843003414, |
|
"grad_norm": 0.15066657960414886, |
|
"learning_rate": 0.00015200563644432612, |
|
"loss": 0.4791, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.1822525597269624, |
|
"grad_norm": 0.13076473772525787, |
|
"learning_rate": 0.00015173378141776568, |
|
"loss": 0.4582, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.1849829351535837, |
|
"grad_norm": 0.15804897248744965, |
|
"learning_rate": 0.00015146140332132358, |
|
"loss": 0.482, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.1877133105802047, |
|
"grad_norm": 0.1330975741147995, |
|
"learning_rate": 0.00015118850490896012, |
|
"loss": 0.4736, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.190443686006826, |
|
"grad_norm": 0.16358092427253723, |
|
"learning_rate": 0.00015091508893989633, |
|
"loss": 0.4992, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.1931740614334472, |
|
"grad_norm": 0.14177009463310242, |
|
"learning_rate": 0.00015064115817858622, |
|
"loss": 0.4646, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.1959044368600682, |
|
"grad_norm": 0.1569090485572815, |
|
"learning_rate": 0.00015036671539468878, |
|
"loss": 0.4683, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.1986348122866894, |
|
"grad_norm": 0.15897628664970398, |
|
"learning_rate": 0.00015009176336303986, |
|
"loss": 0.4829, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.2013651877133107, |
|
"grad_norm": 0.1482827216386795, |
|
"learning_rate": 0.00014981630486362435, |
|
"loss": 0.4552, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.2040955631399317, |
|
"grad_norm": 0.1546843945980072, |
|
"learning_rate": 0.00014954034268154778, |
|
"loss": 0.4702, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.206825938566553, |
|
"grad_norm": 0.1493782103061676, |
|
"learning_rate": 0.00014926387960700842, |
|
"loss": 0.4708, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.209556313993174, |
|
"grad_norm": 0.15456125140190125, |
|
"learning_rate": 0.00014898691843526899, |
|
"loss": 0.4738, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.2122866894197952, |
|
"grad_norm": 0.17920009791851044, |
|
"learning_rate": 0.00014870946196662822, |
|
"loss": 0.4616, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.2150170648464165, |
|
"grad_norm": 0.15904481709003448, |
|
"learning_rate": 0.00014843151300639282, |
|
"loss": 0.4791, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.2177474402730375, |
|
"grad_norm": 0.18129463493824005, |
|
"learning_rate": 0.00014815307436484898, |
|
"loss": 0.4789, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.2204778156996587, |
|
"grad_norm": 0.14939218759536743, |
|
"learning_rate": 0.00014787414885723385, |
|
"loss": 0.4774, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.2232081911262798, |
|
"grad_norm": 0.17625784873962402, |
|
"learning_rate": 0.00014759473930370736, |
|
"loss": 0.4785, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.225938566552901, |
|
"grad_norm": 0.158066987991333, |
|
"learning_rate": 0.0001473148485293234, |
|
"loss": 0.4588, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.2286689419795223, |
|
"grad_norm": 0.16107094287872314, |
|
"learning_rate": 0.00014703447936400134, |
|
"loss": 0.4734, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.2313993174061433, |
|
"grad_norm": 0.1387171894311905, |
|
"learning_rate": 0.00014675363464249763, |
|
"loss": 0.4733, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.2341296928327645, |
|
"grad_norm": 0.1561625450849533, |
|
"learning_rate": 0.00014647231720437686, |
|
"loss": 0.4502, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.2368600682593858, |
|
"grad_norm": 0.1419583112001419, |
|
"learning_rate": 0.00014619052989398322, |
|
"loss": 0.4674, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.2395904436860068, |
|
"grad_norm": 0.1578853279352188, |
|
"learning_rate": 0.00014590827556041158, |
|
"loss": 0.4635, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.242320819112628, |
|
"grad_norm": 0.1325417309999466, |
|
"learning_rate": 0.00014562555705747892, |
|
"loss": 0.4557, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.245051194539249, |
|
"grad_norm": 0.17075014114379883, |
|
"learning_rate": 0.00014534237724369534, |
|
"loss": 0.4928, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.2477815699658703, |
|
"grad_norm": 0.136972576379776, |
|
"learning_rate": 0.00014505873898223496, |
|
"loss": 0.4699, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.2505119453924913, |
|
"grad_norm": 0.16119800508022308, |
|
"learning_rate": 0.00014477464514090743, |
|
"loss": 0.4807, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.2532423208191126, |
|
"grad_norm": 0.13321495056152344, |
|
"learning_rate": 0.00014449009859212857, |
|
"loss": 0.4806, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.2559726962457338, |
|
"grad_norm": 0.14326980710029602, |
|
"learning_rate": 0.00014420510221289137, |
|
"loss": 0.4491, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.2587030716723548, |
|
"grad_norm": 0.13098248839378357, |
|
"learning_rate": 0.00014391965888473703, |
|
"loss": 0.4653, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.261433447098976, |
|
"grad_norm": 0.14561250805854797, |
|
"learning_rate": 0.00014363377149372584, |
|
"loss": 0.479, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.2641638225255973, |
|
"grad_norm": 0.1377183198928833, |
|
"learning_rate": 0.0001433474429304077, |
|
"loss": 0.4607, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.2668941979522184, |
|
"grad_norm": 0.14758490025997162, |
|
"learning_rate": 0.0001430606760897934, |
|
"loss": 0.472, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.2696245733788396, |
|
"grad_norm": 0.15359081327915192, |
|
"learning_rate": 0.0001427734738713248, |
|
"loss": 0.469, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.2723549488054609, |
|
"grad_norm": 0.16317234933376312, |
|
"learning_rate": 0.00014248583917884594, |
|
"loss": 0.4749, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.2750853242320819, |
|
"grad_norm": 0.1310819685459137, |
|
"learning_rate": 0.00014219777492057348, |
|
"loss": 0.4722, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.2778156996587031, |
|
"grad_norm": 0.14236976206302643, |
|
"learning_rate": 0.0001419092840090673, |
|
"loss": 0.4704, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.2805460750853244, |
|
"grad_norm": 0.1274275928735733, |
|
"learning_rate": 0.00014162036936120115, |
|
"loss": 0.4687, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.2832764505119454, |
|
"grad_norm": 0.13622865080833435, |
|
"learning_rate": 0.00014133103389813302, |
|
"loss": 0.4649, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.2860068259385666, |
|
"grad_norm": 0.14750082790851593, |
|
"learning_rate": 0.0001410412805452757, |
|
"loss": 0.4654, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.2887372013651877, |
|
"grad_norm": 0.14838138222694397, |
|
"learning_rate": 0.0001407511122322672, |
|
"loss": 0.4626, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.291467576791809, |
|
"grad_norm": 0.1439974308013916, |
|
"learning_rate": 0.00014046053189294112, |
|
"loss": 0.4499, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.29419795221843, |
|
"grad_norm": 0.14686186611652374, |
|
"learning_rate": 0.00014016954246529696, |
|
"loss": 0.4676, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.2969283276450512, |
|
"grad_norm": 0.15870115160942078, |
|
"learning_rate": 0.00013987814689147041, |
|
"loss": 0.4448, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.2996587030716724, |
|
"grad_norm": 0.16382190585136414, |
|
"learning_rate": 0.0001395863481177036, |
|
"loss": 0.4476, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.3023890784982934, |
|
"grad_norm": 0.21564428508281708, |
|
"learning_rate": 0.00013929414909431544, |
|
"loss": 0.4751, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.3051194539249147, |
|
"grad_norm": 0.15450774133205414, |
|
"learning_rate": 0.00013900155277567157, |
|
"loss": 0.4776, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.307849829351536, |
|
"grad_norm": 0.15922100841999054, |
|
"learning_rate": 0.00013870856212015468, |
|
"loss": 0.4693, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.310580204778157, |
|
"grad_norm": 0.13661661744117737, |
|
"learning_rate": 0.00013841518009013445, |
|
"loss": 0.454, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.3133105802047782, |
|
"grad_norm": 0.1507856249809265, |
|
"learning_rate": 0.00013812140965193773, |
|
"loss": 0.4701, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.3160409556313994, |
|
"grad_norm": 0.14306284487247467, |
|
"learning_rate": 0.00013782725377581848, |
|
"loss": 0.4842, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.3187713310580205, |
|
"grad_norm": 0.15984225273132324, |
|
"learning_rate": 0.00013753271543592773, |
|
"loss": 0.4628, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.3215017064846417, |
|
"grad_norm": 0.15242429077625275, |
|
"learning_rate": 0.00013723779761028347, |
|
"loss": 0.4708, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.3242320819112627, |
|
"grad_norm": 0.14520719647407532, |
|
"learning_rate": 0.0001369425032807407, |
|
"loss": 0.4608, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.326962457337884, |
|
"grad_norm": 0.16152748465538025, |
|
"learning_rate": 0.00013664683543296112, |
|
"loss": 0.4729, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.329692832764505, |
|
"grad_norm": 0.15082891285419464, |
|
"learning_rate": 0.00013635079705638298, |
|
"loss": 0.4593, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.3324232081911263, |
|
"grad_norm": 0.16038447618484497, |
|
"learning_rate": 0.00013605439114419094, |
|
"loss": 0.4718, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.3351535836177475, |
|
"grad_norm": 0.1532922238111496, |
|
"learning_rate": 0.00013575762069328566, |
|
"loss": 0.4758, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.3378839590443685, |
|
"grad_norm": 0.12969861924648285, |
|
"learning_rate": 0.00013546048870425356, |
|
"loss": 0.4582, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.3406143344709898, |
|
"grad_norm": 0.13830237090587616, |
|
"learning_rate": 0.00013516299818133664, |
|
"loss": 0.4585, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.343344709897611, |
|
"grad_norm": 0.148755744099617, |
|
"learning_rate": 0.00013486515213240188, |
|
"loss": 0.4803, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.346075085324232, |
|
"grad_norm": 0.13623669743537903, |
|
"learning_rate": 0.0001345669535689108, |
|
"loss": 0.4562, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.3488054607508533, |
|
"grad_norm": 0.14996616542339325, |
|
"learning_rate": 0.00013426840550588933, |
|
"loss": 0.4546, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.3515358361774745, |
|
"grad_norm": 0.1468917429447174, |
|
"learning_rate": 0.000133969510961897, |
|
"loss": 0.4712, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.3542662116040955, |
|
"grad_norm": 0.14525148272514343, |
|
"learning_rate": 0.0001336702729589965, |
|
"loss": 0.4614, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.3569965870307168, |
|
"grad_norm": 0.13571806252002716, |
|
"learning_rate": 0.00013337069452272333, |
|
"loss": 0.4601, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.3597269624573378, |
|
"grad_norm": 0.15235814452171326, |
|
"learning_rate": 0.00013307077868205487, |
|
"loss": 0.4785, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.362457337883959, |
|
"grad_norm": 0.14220909774303436, |
|
"learning_rate": 0.00013277052846937996, |
|
"loss": 0.4561, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.36518771331058, |
|
"grad_norm": 0.14699751138687134, |
|
"learning_rate": 0.00013246994692046836, |
|
"loss": 0.446, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3679180887372013, |
|
"grad_norm": 0.1636335849761963, |
|
"learning_rate": 0.00013216903707443967, |
|
"loss": 0.4614, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.3706484641638226, |
|
"grad_norm": 0.1447010040283203, |
|
"learning_rate": 0.00013186780197373306, |
|
"loss": 0.4573, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.3733788395904436, |
|
"grad_norm": 0.17758530378341675, |
|
"learning_rate": 0.0001315662446640761, |
|
"loss": 0.4701, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.3761092150170648, |
|
"grad_norm": 0.1493985950946808, |
|
"learning_rate": 0.00013126436819445422, |
|
"loss": 0.4671, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.378839590443686, |
|
"grad_norm": 0.16412951052188873, |
|
"learning_rate": 0.0001309621756170799, |
|
"loss": 0.4705, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.3815699658703071, |
|
"grad_norm": 0.14819127321243286, |
|
"learning_rate": 0.00013065966998736155, |
|
"loss": 0.4579, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.3843003412969284, |
|
"grad_norm": 0.1500328630208969, |
|
"learning_rate": 0.00013035685436387298, |
|
"loss": 0.4484, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.3870307167235496, |
|
"grad_norm": 0.14388103783130646, |
|
"learning_rate": 0.0001300537318083221, |
|
"loss": 0.4325, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.3897610921501706, |
|
"grad_norm": 0.17138421535491943, |
|
"learning_rate": 0.00012975030538552032, |
|
"loss": 0.4717, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.3924914675767919, |
|
"grad_norm": 0.15119241178035736, |
|
"learning_rate": 0.00012944657816335123, |
|
"loss": 0.4279, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.395221843003413, |
|
"grad_norm": 0.15165849030017853, |
|
"learning_rate": 0.00012914255321273986, |
|
"loss": 0.4716, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.3979522184300341, |
|
"grad_norm": 0.14222781360149384, |
|
"learning_rate": 0.0001288382336076215, |
|
"loss": 0.4393, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.4006825938566552, |
|
"grad_norm": 0.1435043066740036, |
|
"learning_rate": 0.00012853362242491053, |
|
"loss": 0.4589, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.4034129692832764, |
|
"grad_norm": 0.15017302334308624, |
|
"learning_rate": 0.00012822872274446958, |
|
"loss": 0.4492, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.4061433447098977, |
|
"grad_norm": 0.14196786284446716, |
|
"learning_rate": 0.00012792353764907804, |
|
"loss": 0.4588, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.4088737201365187, |
|
"grad_norm": 0.144223153591156, |
|
"learning_rate": 0.0001276180702244012, |
|
"loss": 0.4678, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.41160409556314, |
|
"grad_norm": 0.12913116812705994, |
|
"learning_rate": 0.0001273123235589589, |
|
"loss": 0.4515, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.4143344709897612, |
|
"grad_norm": 0.1370343565940857, |
|
"learning_rate": 0.00012700630074409427, |
|
"loss": 0.444, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.4170648464163822, |
|
"grad_norm": 0.15820138156414032, |
|
"learning_rate": 0.00012670000487394266, |
|
"loss": 0.4693, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.4197952218430034, |
|
"grad_norm": 0.13780242204666138, |
|
"learning_rate": 0.0001263934390454001, |
|
"loss": 0.4621, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.4225255972696247, |
|
"grad_norm": 0.13959269225597382, |
|
"learning_rate": 0.00012608660635809207, |
|
"loss": 0.448, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.4252559726962457, |
|
"grad_norm": 0.14923076331615448, |
|
"learning_rate": 0.00012577950991434248, |
|
"loss": 0.4694, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.427986348122867, |
|
"grad_norm": 0.14546504616737366, |
|
"learning_rate": 0.00012547215281914168, |
|
"loss": 0.4679, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.430716723549488, |
|
"grad_norm": 0.13985708355903625, |
|
"learning_rate": 0.00012516453818011566, |
|
"loss": 0.469, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.4334470989761092, |
|
"grad_norm": 0.15217062830924988, |
|
"learning_rate": 0.00012485666910749428, |
|
"loss": 0.4698, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.4361774744027302, |
|
"grad_norm": 0.1293148696422577, |
|
"learning_rate": 0.00012454854871407994, |
|
"loss": 0.4448, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.4389078498293515, |
|
"grad_norm": 0.16416317224502563, |
|
"learning_rate": 0.0001242401801152161, |
|
"loss": 0.4589, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.4416382252559727, |
|
"grad_norm": 0.14671452343463898, |
|
"learning_rate": 0.0001239315664287558, |
|
"loss": 0.4498, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.4443686006825938, |
|
"grad_norm": 0.17271259427070618, |
|
"learning_rate": 0.00012362271077503008, |
|
"loss": 0.4492, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.447098976109215, |
|
"grad_norm": 0.15541909635066986, |
|
"learning_rate": 0.00012331361627681645, |
|
"loss": 0.4281, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.4498293515358363, |
|
"grad_norm": 0.14913444221019745, |
|
"learning_rate": 0.00012300428605930736, |
|
"loss": 0.4334, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.4525597269624573, |
|
"grad_norm": 0.167875275015831, |
|
"learning_rate": 0.00012269472325007858, |
|
"loss": 0.4736, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.4552901023890785, |
|
"grad_norm": 0.15073426067829132, |
|
"learning_rate": 0.00012238493097905756, |
|
"loss": 0.4744, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.4580204778156998, |
|
"grad_norm": 0.16593150794506073, |
|
"learning_rate": 0.00012207491237849172, |
|
"loss": 0.455, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.4607508532423208, |
|
"grad_norm": 0.13389617204666138, |
|
"learning_rate": 0.00012176467058291699, |
|
"loss": 0.4394, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.463481228668942, |
|
"grad_norm": 0.16609057784080505, |
|
"learning_rate": 0.00012145420872912585, |
|
"loss": 0.4562, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.466211604095563, |
|
"grad_norm": 0.1394118070602417, |
|
"learning_rate": 0.00012114352995613582, |
|
"loss": 0.4451, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.4689419795221843, |
|
"grad_norm": 0.17529746890068054, |
|
"learning_rate": 0.00012083263740515765, |
|
"loss": 0.4522, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.4716723549488053, |
|
"grad_norm": 0.15724115073680878, |
|
"learning_rate": 0.00012052153421956342, |
|
"loss": 0.4556, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.4744027303754266, |
|
"grad_norm": 0.17186792194843292, |
|
"learning_rate": 0.00012021022354485514, |
|
"loss": 0.4546, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.4771331058020478, |
|
"grad_norm": 0.1301499456167221, |
|
"learning_rate": 0.00011989870852863254, |
|
"loss": 0.4431, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.4798634812286688, |
|
"grad_norm": 0.1480223387479782, |
|
"learning_rate": 0.00011958699232056134, |
|
"loss": 0.4491, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.48259385665529, |
|
"grad_norm": 0.13150086998939514, |
|
"learning_rate": 0.00011927507807234168, |
|
"loss": 0.4568, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.4853242320819113, |
|
"grad_norm": 0.15769197046756744, |
|
"learning_rate": 0.00011896296893767587, |
|
"loss": 0.4501, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.4880546075085324, |
|
"grad_norm": 0.13996848464012146, |
|
"learning_rate": 0.0001186506680722367, |
|
"loss": 0.4673, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.4907849829351536, |
|
"grad_norm": 0.16406555473804474, |
|
"learning_rate": 0.00011833817863363564, |
|
"loss": 0.4634, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.4935153583617748, |
|
"grad_norm": 0.14007951319217682, |
|
"learning_rate": 0.0001180255037813906, |
|
"loss": 0.4466, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.4962457337883959, |
|
"grad_norm": 0.15525664389133453, |
|
"learning_rate": 0.00011771264667689427, |
|
"loss": 0.4323, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.4989761092150171, |
|
"grad_norm": 0.143234983086586, |
|
"learning_rate": 0.00011739961048338213, |
|
"loss": 0.4395, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.5017064846416384, |
|
"grad_norm": 0.15597446262836456, |
|
"learning_rate": 0.00011708639836590023, |
|
"loss": 0.4546, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.5044368600682594, |
|
"grad_norm": 0.13816912472248077, |
|
"learning_rate": 0.00011677301349127348, |
|
"loss": 0.4586, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.5071672354948804, |
|
"grad_norm": 0.16299140453338623, |
|
"learning_rate": 0.00011645945902807341, |
|
"loss": 0.4465, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.5098976109215017, |
|
"grad_norm": 0.15032370388507843, |
|
"learning_rate": 0.00011614573814658629, |
|
"loss": 0.4579, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.512627986348123, |
|
"grad_norm": 0.158245250582695, |
|
"learning_rate": 0.00011583185401878101, |
|
"loss": 0.4462, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.515358361774744, |
|
"grad_norm": 0.16943717002868652, |
|
"learning_rate": 0.00011551780981827698, |
|
"loss": 0.4572, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.5180887372013652, |
|
"grad_norm": 0.14559145271778107, |
|
"learning_rate": 0.00011520360872031209, |
|
"loss": 0.4693, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.5208191126279864, |
|
"grad_norm": 0.16138285398483276, |
|
"learning_rate": 0.00011488925390171059, |
|
"loss": 0.4623, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.5235494880546074, |
|
"grad_norm": 0.14859908819198608, |
|
"learning_rate": 0.00011457474854085096, |
|
"loss": 0.4684, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.5262798634812287, |
|
"grad_norm": 0.15695518255233765, |
|
"learning_rate": 0.00011426009581763377, |
|
"loss": 0.4619, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.52901023890785, |
|
"grad_norm": 0.14711041748523712, |
|
"learning_rate": 0.00011394529891344958, |
|
"loss": 0.4556, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.531740614334471, |
|
"grad_norm": 0.15271785855293274, |
|
"learning_rate": 0.0001136303610111467, |
|
"loss": 0.4557, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.5344709897610922, |
|
"grad_norm": 0.1541603058576584, |
|
"learning_rate": 0.00011331528529499909, |
|
"loss": 0.4644, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.5372013651877134, |
|
"grad_norm": 0.1686154007911682, |
|
"learning_rate": 0.00011300007495067401, |
|
"loss": 0.4446, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.5399317406143345, |
|
"grad_norm": 0.14042454957962036, |
|
"learning_rate": 0.00011268473316520007, |
|
"loss": 0.4275, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.5426621160409555, |
|
"grad_norm": 0.14575007557868958, |
|
"learning_rate": 0.00011236926312693479, |
|
"loss": 0.4579, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.545392491467577, |
|
"grad_norm": 0.16124123334884644, |
|
"learning_rate": 0.0001120536680255323, |
|
"loss": 0.441, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.548122866894198, |
|
"grad_norm": 0.14471302926540375, |
|
"learning_rate": 0.00011173795105191145, |
|
"loss": 0.4543, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.550853242320819, |
|
"grad_norm": 0.162650004029274, |
|
"learning_rate": 0.00011142211539822318, |
|
"loss": 0.4483, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.5535836177474402, |
|
"grad_norm": 0.14518044888973236, |
|
"learning_rate": 0.00011110616425781833, |
|
"loss": 0.4386, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.5563139931740615, |
|
"grad_norm": 0.1545732617378235, |
|
"learning_rate": 0.00011079010082521557, |
|
"loss": 0.4505, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.5590443686006825, |
|
"grad_norm": 0.12941716611385345, |
|
"learning_rate": 0.00011047392829606876, |
|
"loss": 0.4288, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.5617747440273038, |
|
"grad_norm": 0.15107029676437378, |
|
"learning_rate": 0.0001101576498671349, |
|
"loss": 0.4275, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.564505119453925, |
|
"grad_norm": 0.13776972889900208, |
|
"learning_rate": 0.00010984126873624179, |
|
"loss": 0.432, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.567235494880546, |
|
"grad_norm": 0.15370745956897736, |
|
"learning_rate": 0.00010952478810225548, |
|
"loss": 0.4523, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.5699658703071673, |
|
"grad_norm": 0.14505314826965332, |
|
"learning_rate": 0.00010920821116504816, |
|
"loss": 0.4444, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.5726962457337885, |
|
"grad_norm": 0.13053090870380402, |
|
"learning_rate": 0.0001088915411254657, |
|
"loss": 0.4492, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.5754266211604095, |
|
"grad_norm": 0.15613074600696564, |
|
"learning_rate": 0.00010857478118529533, |
|
"loss": 0.465, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.5781569965870306, |
|
"grad_norm": 0.164808988571167, |
|
"learning_rate": 0.00010825793454723325, |
|
"loss": 0.4498, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.580887372013652, |
|
"grad_norm": 0.14633600413799286, |
|
"learning_rate": 0.0001079410044148522, |
|
"loss": 0.4416, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.583617747440273, |
|
"grad_norm": 0.15268942713737488, |
|
"learning_rate": 0.00010762399399256917, |
|
"loss": 0.4431, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.586348122866894, |
|
"grad_norm": 0.16290055215358734, |
|
"learning_rate": 0.00010730690648561292, |
|
"loss": 0.465, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.5890784982935153, |
|
"grad_norm": 0.13567085564136505, |
|
"learning_rate": 0.00010698974509999158, |
|
"loss": 0.4397, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.5918088737201366, |
|
"grad_norm": 0.14896200597286224, |
|
"learning_rate": 0.00010667251304246029, |
|
"loss": 0.4458, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.5945392491467576, |
|
"grad_norm": 0.14076146483421326, |
|
"learning_rate": 0.00010635521352048872, |
|
"loss": 0.4476, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.5972696245733788, |
|
"grad_norm": 0.13334687054157257, |
|
"learning_rate": 0.00010603784974222861, |
|
"loss": 0.4283, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.16997142136096954, |
|
"learning_rate": 0.00010572042491648149, |
|
"loss": 0.4472, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.6027303754266211, |
|
"grad_norm": 0.15508471429347992, |
|
"learning_rate": 0.00010540294225266607, |
|
"loss": 0.4518, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.6054607508532424, |
|
"grad_norm": 0.1571933478116989, |
|
"learning_rate": 0.0001050854049607858, |
|
"loss": 0.4493, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.6081911262798636, |
|
"grad_norm": 0.15027360618114471, |
|
"learning_rate": 0.00010476781625139656, |
|
"loss": 0.4562, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.6109215017064846, |
|
"grad_norm": 0.14502452313899994, |
|
"learning_rate": 0.00010445017933557404, |
|
"loss": 0.4476, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.6136518771331056, |
|
"grad_norm": 0.1403171718120575, |
|
"learning_rate": 0.00010413249742488131, |
|
"loss": 0.4154, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.6163822525597271, |
|
"grad_norm": 0.14815428853034973, |
|
"learning_rate": 0.00010381477373133652, |
|
"loss": 0.4501, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.6191126279863481, |
|
"grad_norm": 0.16107513010501862, |
|
"learning_rate": 0.00010349701146738007, |
|
"loss": 0.4507, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.6218430034129692, |
|
"grad_norm": 0.14111128449440002, |
|
"learning_rate": 0.00010317921384584244, |
|
"loss": 0.4483, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.6245733788395904, |
|
"grad_norm": 0.1680098921060562, |
|
"learning_rate": 0.0001028613840799117, |
|
"loss": 0.4584, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.6273037542662117, |
|
"grad_norm": 0.13612088561058044, |
|
"learning_rate": 0.00010254352538310075, |
|
"loss": 0.4389, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.6300341296928327, |
|
"grad_norm": 0.16298632323741913, |
|
"learning_rate": 0.00010222564096921505, |
|
"loss": 0.452, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.632764505119454, |
|
"grad_norm": 0.14744146168231964, |
|
"learning_rate": 0.00010190773405232024, |
|
"loss": 0.4588, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.6354948805460752, |
|
"grad_norm": 0.1594633013010025, |
|
"learning_rate": 0.00010158980784670927, |
|
"loss": 0.4403, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.6382252559726962, |
|
"grad_norm": 0.15123943984508514, |
|
"learning_rate": 0.00010127186556687019, |
|
"loss": 0.4395, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.6409556313993174, |
|
"grad_norm": 0.1518649309873581, |
|
"learning_rate": 0.00010095391042745361, |
|
"loss": 0.4265, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.6436860068259387, |
|
"grad_norm": 0.1611323356628418, |
|
"learning_rate": 0.00010063594564324012, |
|
"loss": 0.444, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.6464163822525597, |
|
"grad_norm": 0.14762264490127563, |
|
"learning_rate": 0.00010031797442910789, |
|
"loss": 0.4409, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.6491467576791807, |
|
"grad_norm": 0.1546734869480133, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4472, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.6518771331058022, |
|
"grad_norm": 0.1580485701560974, |
|
"learning_rate": 9.968202557089212e-05, |
|
"loss": 0.4497, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.6546075085324232, |
|
"grad_norm": 0.151153564453125, |
|
"learning_rate": 9.93640543567599e-05, |
|
"loss": 0.4495, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.6573378839590442, |
|
"grad_norm": 0.14238281548023224, |
|
"learning_rate": 9.904608957254642e-05, |
|
"loss": 0.4481, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.6600682593856655, |
|
"grad_norm": 0.13984693586826324, |
|
"learning_rate": 9.872813443312984e-05, |
|
"loss": 0.4262, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.6627986348122867, |
|
"grad_norm": 0.14657770097255707, |
|
"learning_rate": 9.84101921532908e-05, |
|
"loss": 0.4554, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.6655290102389078, |
|
"grad_norm": 0.15702606737613678, |
|
"learning_rate": 9.809226594767978e-05, |
|
"loss": 0.4553, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.668259385665529, |
|
"grad_norm": 0.15596133470535278, |
|
"learning_rate": 9.777435903078494e-05, |
|
"loss": 0.4353, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.6709897610921502, |
|
"grad_norm": 0.16193975508213043, |
|
"learning_rate": 9.745647461689931e-05, |
|
"loss": 0.4313, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.6737201365187713, |
|
"grad_norm": 0.15690681338310242, |
|
"learning_rate": 9.713861592008833e-05, |
|
"loss": 0.4498, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.6764505119453925, |
|
"grad_norm": 0.15277935564517975, |
|
"learning_rate": 9.682078615415754e-05, |
|
"loss": 0.4358, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.6791808873720138, |
|
"grad_norm": 0.1447397917509079, |
|
"learning_rate": 9.650298853261997e-05, |
|
"loss": 0.4264, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.6819112627986348, |
|
"grad_norm": 0.14292937517166138, |
|
"learning_rate": 9.61852262686635e-05, |
|
"loss": 0.4521, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.6846416382252558, |
|
"grad_norm": 0.15041732788085938, |
|
"learning_rate": 9.586750257511867e-05, |
|
"loss": 0.445, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.6873720136518773, |
|
"grad_norm": 0.14610610902309418, |
|
"learning_rate": 9.5549820664426e-05, |
|
"loss": 0.4493, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.6901023890784983, |
|
"grad_norm": 0.1494503617286682, |
|
"learning_rate": 9.523218374860348e-05, |
|
"loss": 0.4473, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.6928327645051193, |
|
"grad_norm": 0.14970283210277557, |
|
"learning_rate": 9.491459503921421e-05, |
|
"loss": 0.453, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.6955631399317406, |
|
"grad_norm": 0.15402431786060333, |
|
"learning_rate": 9.459705774733396e-05, |
|
"loss": 0.4435, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.6982935153583618, |
|
"grad_norm": 0.1583550125360489, |
|
"learning_rate": 9.427957508351852e-05, |
|
"loss": 0.4379, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.7010238907849828, |
|
"grad_norm": 0.15820656716823578, |
|
"learning_rate": 9.396215025777139e-05, |
|
"loss": 0.4416, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.703754266211604, |
|
"grad_norm": 0.14503423869609833, |
|
"learning_rate": 9.364478647951133e-05, |
|
"loss": 0.434, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.7064846416382253, |
|
"grad_norm": 0.1509648561477661, |
|
"learning_rate": 9.332748695753973e-05, |
|
"loss": 0.4516, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.7092150170648464, |
|
"grad_norm": 0.15958918631076813, |
|
"learning_rate": 9.301025490000841e-05, |
|
"loss": 0.4504, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.7119453924914676, |
|
"grad_norm": 0.1522430032491684, |
|
"learning_rate": 9.269309351438711e-05, |
|
"loss": 0.4339, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.7146757679180888, |
|
"grad_norm": 0.14348183572292328, |
|
"learning_rate": 9.237600600743085e-05, |
|
"loss": 0.4236, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.7174061433447099, |
|
"grad_norm": 0.1558932512998581, |
|
"learning_rate": 9.20589955851478e-05, |
|
"loss": 0.4514, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.7201365187713311, |
|
"grad_norm": 0.13524165749549866, |
|
"learning_rate": 9.174206545276677e-05, |
|
"loss": 0.4426, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.7228668941979524, |
|
"grad_norm": 0.1464926302433014, |
|
"learning_rate": 9.142521881470469e-05, |
|
"loss": 0.4397, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.7255972696245734, |
|
"grad_norm": 0.14485323429107666, |
|
"learning_rate": 9.11084588745343e-05, |
|
"loss": 0.4497, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.7283276450511944, |
|
"grad_norm": 0.139760822057724, |
|
"learning_rate": 9.07917888349519e-05, |
|
"loss": 0.4465, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.7310580204778157, |
|
"grad_norm": 0.16193385422229767, |
|
"learning_rate": 9.047521189774455e-05, |
|
"loss": 0.4377, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.733788395904437, |
|
"grad_norm": 0.17076647281646729, |
|
"learning_rate": 9.015873126375822e-05, |
|
"loss": 0.4632, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.736518771331058, |
|
"grad_norm": 0.14881980419158936, |
|
"learning_rate": 8.984235013286511e-05, |
|
"loss": 0.4401, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.7392491467576792, |
|
"grad_norm": 0.15158087015151978, |
|
"learning_rate": 8.952607170393125e-05, |
|
"loss": 0.4292, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.7419795221843004, |
|
"grad_norm": 0.14353424310684204, |
|
"learning_rate": 8.920989917478447e-05, |
|
"loss": 0.4439, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.7447098976109214, |
|
"grad_norm": 0.13718026876449585, |
|
"learning_rate": 8.88938357421817e-05, |
|
"loss": 0.4267, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.7474402730375427, |
|
"grad_norm": 0.15788930654525757, |
|
"learning_rate": 8.857788460177686e-05, |
|
"loss": 0.4241, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.750170648464164, |
|
"grad_norm": 0.14324265718460083, |
|
"learning_rate": 8.826204894808855e-05, |
|
"loss": 0.4472, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.752901023890785, |
|
"grad_norm": 0.15020030736923218, |
|
"learning_rate": 8.79463319744677e-05, |
|
"loss": 0.4339, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.7556313993174062, |
|
"grad_norm": 0.13715054094791412, |
|
"learning_rate": 8.763073687306524e-05, |
|
"loss": 0.4429, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.7583617747440274, |
|
"grad_norm": 0.16862636804580688, |
|
"learning_rate": 8.731526683479992e-05, |
|
"loss": 0.4509, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.7610921501706485, |
|
"grad_norm": 0.14781633019447327, |
|
"learning_rate": 8.6999925049326e-05, |
|
"loss": 0.4319, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.7638225255972695, |
|
"grad_norm": 0.15873977541923523, |
|
"learning_rate": 8.668471470500095e-05, |
|
"loss": 0.4486, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.7665529010238907, |
|
"grad_norm": 0.1477411389350891, |
|
"learning_rate": 8.63696389888533e-05, |
|
"loss": 0.4461, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.769283276450512, |
|
"grad_norm": 0.1804722100496292, |
|
"learning_rate": 8.605470108655045e-05, |
|
"loss": 0.4402, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.772013651877133, |
|
"grad_norm": 0.15354932844638824, |
|
"learning_rate": 8.573990418236625e-05, |
|
"loss": 0.4564, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.7747440273037542, |
|
"grad_norm": 0.1494126319885254, |
|
"learning_rate": 8.542525145914905e-05, |
|
"loss": 0.4377, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.7774744027303755, |
|
"grad_norm": 0.14122453331947327, |
|
"learning_rate": 8.511074609828944e-05, |
|
"loss": 0.4361, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.7802047781569965, |
|
"grad_norm": 0.16938751935958862, |
|
"learning_rate": 8.479639127968792e-05, |
|
"loss": 0.4593, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.7829351535836178, |
|
"grad_norm": 0.1502314805984497, |
|
"learning_rate": 8.448219018172303e-05, |
|
"loss": 0.4336, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.785665529010239, |
|
"grad_norm": 0.180609330534935, |
|
"learning_rate": 8.4168145981219e-05, |
|
"loss": 0.45, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.78839590443686, |
|
"grad_norm": 0.1395808309316635, |
|
"learning_rate": 8.385426185341374e-05, |
|
"loss": 0.4188, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.7911262798634813, |
|
"grad_norm": 0.1596853882074356, |
|
"learning_rate": 8.35405409719266e-05, |
|
"loss": 0.452, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.7938566552901025, |
|
"grad_norm": 0.15330305695533752, |
|
"learning_rate": 8.322698650872656e-05, |
|
"loss": 0.4355, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.7965870307167235, |
|
"grad_norm": 0.1545482873916626, |
|
"learning_rate": 8.291360163409978e-05, |
|
"loss": 0.4366, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.7993174061433446, |
|
"grad_norm": 0.13950030505657196, |
|
"learning_rate": 8.260038951661787e-05, |
|
"loss": 0.4169, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.802047781569966, |
|
"grad_norm": 0.14191307127475739, |
|
"learning_rate": 8.228735332310575e-05, |
|
"loss": 0.4471, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.804778156996587, |
|
"grad_norm": 0.14557993412017822, |
|
"learning_rate": 8.197449621860943e-05, |
|
"loss": 0.4028, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.807508532423208, |
|
"grad_norm": 0.13985979557037354, |
|
"learning_rate": 8.16618213663644e-05, |
|
"loss": 0.4293, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.8102389078498293, |
|
"grad_norm": 0.1420183628797531, |
|
"learning_rate": 8.134933192776333e-05, |
|
"loss": 0.4313, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.8129692832764506, |
|
"grad_norm": 0.1498919129371643, |
|
"learning_rate": 8.103703106232416e-05, |
|
"loss": 0.4315, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.8156996587030716, |
|
"grad_norm": 0.1570868194103241, |
|
"learning_rate": 8.072492192765833e-05, |
|
"loss": 0.4348, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.8184300341296928, |
|
"grad_norm": 0.15423277020454407, |
|
"learning_rate": 8.041300767943867e-05, |
|
"loss": 0.44, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.821160409556314, |
|
"grad_norm": 0.13805197179317474, |
|
"learning_rate": 8.010129147136749e-05, |
|
"loss": 0.4317, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.823890784982935, |
|
"grad_norm": 0.14628642797470093, |
|
"learning_rate": 7.978977645514487e-05, |
|
"loss": 0.4379, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.8266211604095564, |
|
"grad_norm": 0.15710268914699554, |
|
"learning_rate": 7.947846578043659e-05, |
|
"loss": 0.4557, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.8293515358361776, |
|
"grad_norm": 0.15317128598690033, |
|
"learning_rate": 7.916736259484239e-05, |
|
"loss": 0.4506, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.8320819112627986, |
|
"grad_norm": 0.15919502079486847, |
|
"learning_rate": 7.88564700438642e-05, |
|
"loss": 0.435, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.8348122866894196, |
|
"grad_norm": 0.1551041305065155, |
|
"learning_rate": 7.854579127087417e-05, |
|
"loss": 0.4283, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.8375426621160411, |
|
"grad_norm": 0.16782739758491516, |
|
"learning_rate": 7.823532941708303e-05, |
|
"loss": 0.4507, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.8402730375426621, |
|
"grad_norm": 0.15251149237155914, |
|
"learning_rate": 7.792508762150833e-05, |
|
"loss": 0.4255, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.8430034129692832, |
|
"grad_norm": 0.17279517650604248, |
|
"learning_rate": 7.761506902094248e-05, |
|
"loss": 0.4255, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.8457337883959044, |
|
"grad_norm": 0.13385094702243805, |
|
"learning_rate": 7.730527674992143e-05, |
|
"loss": 0.4229, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.8484641638225257, |
|
"grad_norm": 0.15888231992721558, |
|
"learning_rate": 7.699571394069269e-05, |
|
"loss": 0.4335, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.8511945392491467, |
|
"grad_norm": 0.1549587994813919, |
|
"learning_rate": 7.668638372318359e-05, |
|
"loss": 0.4083, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.853924914675768, |
|
"grad_norm": 0.16143332421779633, |
|
"learning_rate": 7.637728922496996e-05, |
|
"loss": 0.4367, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.8566552901023892, |
|
"grad_norm": 0.13735996186733246, |
|
"learning_rate": 7.606843357124426e-05, |
|
"loss": 0.4296, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.8593856655290102, |
|
"grad_norm": 0.14317500591278076, |
|
"learning_rate": 7.575981988478392e-05, |
|
"loss": 0.4419, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.8621160409556314, |
|
"grad_norm": 0.14451129734516144, |
|
"learning_rate": 7.54514512859201e-05, |
|
"loss": 0.4389, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.8648464163822527, |
|
"grad_norm": 0.14233650267124176, |
|
"learning_rate": 7.514333089250577e-05, |
|
"loss": 0.4258, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.8675767918088737, |
|
"grad_norm": 0.14275044202804565, |
|
"learning_rate": 7.483546181988436e-05, |
|
"loss": 0.4456, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.8703071672354947, |
|
"grad_norm": 0.15278606116771698, |
|
"learning_rate": 7.452784718085833e-05, |
|
"loss": 0.4344, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.8730375426621162, |
|
"grad_norm": 0.13797658681869507, |
|
"learning_rate": 7.422049008565757e-05, |
|
"loss": 0.4392, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.8757679180887372, |
|
"grad_norm": 0.1518598347902298, |
|
"learning_rate": 7.391339364190794e-05, |
|
"loss": 0.431, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.8784982935153582, |
|
"grad_norm": 0.1477964073419571, |
|
"learning_rate": 7.360656095459995e-05, |
|
"loss": 0.4449, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.8812286689419795, |
|
"grad_norm": 0.14295299351215363, |
|
"learning_rate": 7.329999512605738e-05, |
|
"loss": 0.4384, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.8839590443686007, |
|
"grad_norm": 0.1487056165933609, |
|
"learning_rate": 7.299369925590574e-05, |
|
"loss": 0.4311, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.8866894197952218, |
|
"grad_norm": 0.14108945429325104, |
|
"learning_rate": 7.268767644104112e-05, |
|
"loss": 0.4239, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.889419795221843, |
|
"grad_norm": 0.15047363936901093, |
|
"learning_rate": 7.238192977559884e-05, |
|
"loss": 0.4427, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.8921501706484642, |
|
"grad_norm": 0.14634403586387634, |
|
"learning_rate": 7.2076462350922e-05, |
|
"loss": 0.4416, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.8948805460750853, |
|
"grad_norm": 0.15272392332553864, |
|
"learning_rate": 7.177127725553045e-05, |
|
"loss": 0.43, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.8976109215017065, |
|
"grad_norm": 0.15047992765903473, |
|
"learning_rate": 7.146637757508949e-05, |
|
"loss": 0.4436, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.9003412969283278, |
|
"grad_norm": 0.14413118362426758, |
|
"learning_rate": 7.116176639237852e-05, |
|
"loss": 0.4169, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.9030716723549488, |
|
"grad_norm": 0.1437167525291443, |
|
"learning_rate": 7.085744678726013e-05, |
|
"loss": 0.4389, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.9058020477815698, |
|
"grad_norm": 0.14662359654903412, |
|
"learning_rate": 7.05534218366488e-05, |
|
"loss": 0.435, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.9085324232081913, |
|
"grad_norm": 0.14521794021129608, |
|
"learning_rate": 7.024969461447972e-05, |
|
"loss": 0.4505, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.9112627986348123, |
|
"grad_norm": 0.14155706763267517, |
|
"learning_rate": 6.994626819167789e-05, |
|
"loss": 0.4301, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.9139931740614333, |
|
"grad_norm": 0.14663158357143402, |
|
"learning_rate": 6.964314563612708e-05, |
|
"loss": 0.4262, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.9167235494880546, |
|
"grad_norm": 0.15034128725528717, |
|
"learning_rate": 6.934033001263847e-05, |
|
"loss": 0.4424, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.9194539249146758, |
|
"grad_norm": 0.15093255043029785, |
|
"learning_rate": 6.903782438292015e-05, |
|
"loss": 0.4425, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.9221843003412968, |
|
"grad_norm": 0.1556250900030136, |
|
"learning_rate": 6.873563180554583e-05, |
|
"loss": 0.4225, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.924914675767918, |
|
"grad_norm": 0.16173475980758667, |
|
"learning_rate": 6.843375533592395e-05, |
|
"loss": 0.4228, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.9276450511945393, |
|
"grad_norm": 0.1532420516014099, |
|
"learning_rate": 6.813219802626698e-05, |
|
"loss": 0.432, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.9303754266211604, |
|
"grad_norm": 0.1596469283103943, |
|
"learning_rate": 6.783096292556035e-05, |
|
"loss": 0.4232, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.9331058020477816, |
|
"grad_norm": 0.16538076102733612, |
|
"learning_rate": 6.753005307953167e-05, |
|
"loss": 0.45, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.9358361774744028, |
|
"grad_norm": 0.15187640488147736, |
|
"learning_rate": 6.722947153062003e-05, |
|
"loss": 0.4442, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.9385665529010239, |
|
"grad_norm": 0.14827731251716614, |
|
"learning_rate": 6.692922131794517e-05, |
|
"loss": 0.414, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.9412969283276449, |
|
"grad_norm": 0.16438645124435425, |
|
"learning_rate": 6.662930547727668e-05, |
|
"loss": 0.419, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.9440273037542664, |
|
"grad_norm": 0.15135832130908966, |
|
"learning_rate": 6.632972704100349e-05, |
|
"loss": 0.4155, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.9467576791808874, |
|
"grad_norm": 0.15094083547592163, |
|
"learning_rate": 6.603048903810305e-05, |
|
"loss": 0.4258, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.9494880546075084, |
|
"grad_norm": 0.14968033134937286, |
|
"learning_rate": 6.57315944941107e-05, |
|
"loss": 0.4395, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.9522184300341296, |
|
"grad_norm": 0.16519851982593536, |
|
"learning_rate": 6.54330464310892e-05, |
|
"loss": 0.4406, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.954948805460751, |
|
"grad_norm": 0.13855180144309998, |
|
"learning_rate": 6.513484786759818e-05, |
|
"loss": 0.43, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.957679180887372, |
|
"grad_norm": 0.1570328176021576, |
|
"learning_rate": 6.483700181866337e-05, |
|
"loss": 0.4288, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.9604095563139932, |
|
"grad_norm": 0.14928270876407623, |
|
"learning_rate": 6.453951129574644e-05, |
|
"loss": 0.4224, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.9631399317406144, |
|
"grad_norm": 0.16348999738693237, |
|
"learning_rate": 6.42423793067144e-05, |
|
"loss": 0.4379, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.9658703071672354, |
|
"grad_norm": 0.14947615563869476, |
|
"learning_rate": 6.39456088558091e-05, |
|
"loss": 0.445, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.9686006825938567, |
|
"grad_norm": 0.14767783880233765, |
|
"learning_rate": 6.3649202943617e-05, |
|
"loss": 0.4388, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.971331058020478, |
|
"grad_norm": 0.14223739504814148, |
|
"learning_rate": 6.33531645670389e-05, |
|
"loss": 0.433, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.974061433447099, |
|
"grad_norm": 0.1595824956893921, |
|
"learning_rate": 6.305749671925931e-05, |
|
"loss": 0.4325, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.9767918088737202, |
|
"grad_norm": 0.14971914887428284, |
|
"learning_rate": 6.276220238971652e-05, |
|
"loss": 0.4336, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.9795221843003414, |
|
"grad_norm": 0.15426860749721527, |
|
"learning_rate": 6.24672845640723e-05, |
|
"loss": 0.433, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.9822525597269625, |
|
"grad_norm": 0.1470557302236557, |
|
"learning_rate": 6.217274622418153e-05, |
|
"loss": 0.435, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.9849829351535835, |
|
"grad_norm": 0.1564924120903015, |
|
"learning_rate": 6.187859034806224e-05, |
|
"loss": 0.4371, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.9877133105802047, |
|
"grad_norm": 0.16420651972293854, |
|
"learning_rate": 6.158481990986557e-05, |
|
"loss": 0.4478, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.990443686006826, |
|
"grad_norm": 0.15158484876155853, |
|
"learning_rate": 6.129143787984533e-05, |
|
"loss": 0.4267, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.993174061433447, |
|
"grad_norm": 0.15700684487819672, |
|
"learning_rate": 6.099844722432843e-05, |
|
"loss": 0.4293, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.9959044368600682, |
|
"grad_norm": 0.14848262071609497, |
|
"learning_rate": 6.070585090568459e-05, |
|
"loss": 0.4339, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.9986348122866895, |
|
"grad_norm": 0.15596534311771393, |
|
"learning_rate": 6.0413651882296406e-05, |
|
"loss": 0.4245, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 2.0013651877133105, |
|
"grad_norm": 0.3338797092437744, |
|
"learning_rate": 6.012185310852962e-05, |
|
"loss": 0.7156, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 2.0040955631399315, |
|
"grad_norm": 0.1896699219942093, |
|
"learning_rate": 5.983045753470308e-05, |
|
"loss": 0.4228, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 2.006825938566553, |
|
"grad_norm": 0.20619529485702515, |
|
"learning_rate": 5.953946810705888e-05, |
|
"loss": 0.4244, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 2.009556313993174, |
|
"grad_norm": 0.17412033677101135, |
|
"learning_rate": 5.924888776773281e-05, |
|
"loss": 0.4186, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 2.012286689419795, |
|
"grad_norm": 0.1896408498287201, |
|
"learning_rate": 5.8958719454724346e-05, |
|
"loss": 0.4259, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 2.0150170648464165, |
|
"grad_norm": 0.16549214720726013, |
|
"learning_rate": 5.8668966101867005e-05, |
|
"loss": 0.3967, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 2.0177474402730375, |
|
"grad_norm": 0.16784432530403137, |
|
"learning_rate": 5.837963063879884e-05, |
|
"loss": 0.4347, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 2.0204778156996586, |
|
"grad_norm": 0.18551002442836761, |
|
"learning_rate": 5.809071599093272e-05, |
|
"loss": 0.4407, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.02320819112628, |
|
"grad_norm": 0.17308658361434937, |
|
"learning_rate": 5.780222507942654e-05, |
|
"loss": 0.4343, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 2.025938566552901, |
|
"grad_norm": 0.17182452976703644, |
|
"learning_rate": 5.751416082115408e-05, |
|
"loss": 0.422, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 2.028668941979522, |
|
"grad_norm": 0.17356833815574646, |
|
"learning_rate": 5.722652612867523e-05, |
|
"loss": 0.4168, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 2.0313993174061435, |
|
"grad_norm": 0.17314977943897247, |
|
"learning_rate": 5.6939323910206645e-05, |
|
"loss": 0.4285, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 2.0341296928327646, |
|
"grad_norm": 0.1868155151605606, |
|
"learning_rate": 5.6652557069592304e-05, |
|
"loss": 0.4211, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 2.0368600682593856, |
|
"grad_norm": 0.1524539738893509, |
|
"learning_rate": 5.63662285062742e-05, |
|
"loss": 0.4233, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 2.0395904436860066, |
|
"grad_norm": 0.17929619550704956, |
|
"learning_rate": 5.608034111526298e-05, |
|
"loss": 0.4305, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 2.042320819112628, |
|
"grad_norm": 0.1604132503271103, |
|
"learning_rate": 5.579489778710867e-05, |
|
"loss": 0.4214, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 2.045051194539249, |
|
"grad_norm": 0.16710160672664642, |
|
"learning_rate": 5.550990140787147e-05, |
|
"loss": 0.4369, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 2.04778156996587, |
|
"grad_norm": 0.16114738583564758, |
|
"learning_rate": 5.522535485909257e-05, |
|
"loss": 0.4216, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.0505119453924916, |
|
"grad_norm": 0.16698378324508667, |
|
"learning_rate": 5.494126101776505e-05, |
|
"loss": 0.4323, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 2.0532423208191126, |
|
"grad_norm": 0.174140065908432, |
|
"learning_rate": 5.4657622756304704e-05, |
|
"loss": 0.4135, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 2.0559726962457336, |
|
"grad_norm": 0.1496962457895279, |
|
"learning_rate": 5.437444294252107e-05, |
|
"loss": 0.438, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 2.058703071672355, |
|
"grad_norm": 0.1660911738872528, |
|
"learning_rate": 5.409172443958843e-05, |
|
"loss": 0.4262, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 2.061433447098976, |
|
"grad_norm": 0.18081265687942505, |
|
"learning_rate": 5.380947010601681e-05, |
|
"loss": 0.4172, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 2.064163822525597, |
|
"grad_norm": 0.14415475726127625, |
|
"learning_rate": 5.3527682795623146e-05, |
|
"loss": 0.4181, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 2.0668941979522186, |
|
"grad_norm": 0.20684713125228882, |
|
"learning_rate": 5.324636535750238e-05, |
|
"loss": 0.4291, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 2.0696245733788396, |
|
"grad_norm": 0.17085103690624237, |
|
"learning_rate": 5.296552063599868e-05, |
|
"loss": 0.4372, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 2.0723549488054607, |
|
"grad_norm": 0.17061397433280945, |
|
"learning_rate": 5.2685151470676653e-05, |
|
"loss": 0.4247, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 2.0750853242320817, |
|
"grad_norm": 0.17692053318023682, |
|
"learning_rate": 5.240526069629265e-05, |
|
"loss": 0.4261, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.077815699658703, |
|
"grad_norm": 0.15353117883205414, |
|
"learning_rate": 5.212585114276614e-05, |
|
"loss": 0.4272, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 2.080546075085324, |
|
"grad_norm": 0.17618128657341003, |
|
"learning_rate": 5.1846925635151045e-05, |
|
"loss": 0.4206, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 2.083276450511945, |
|
"grad_norm": 0.154897078871727, |
|
"learning_rate": 5.156848699360719e-05, |
|
"loss": 0.4086, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 2.0860068259385667, |
|
"grad_norm": 0.15779747068881989, |
|
"learning_rate": 5.129053803337181e-05, |
|
"loss": 0.4073, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 2.0887372013651877, |
|
"grad_norm": 0.186599463224411, |
|
"learning_rate": 5.101308156473104e-05, |
|
"loss": 0.4204, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.0914675767918087, |
|
"grad_norm": 0.15039357542991638, |
|
"learning_rate": 5.073612039299157e-05, |
|
"loss": 0.4277, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 2.09419795221843, |
|
"grad_norm": 0.16809961199760437, |
|
"learning_rate": 5.0459657318452224e-05, |
|
"loss": 0.4312, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 2.096928327645051, |
|
"grad_norm": 0.18085776269435883, |
|
"learning_rate": 5.0183695136375664e-05, |
|
"loss": 0.4233, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 2.0996587030716722, |
|
"grad_norm": 0.15629561245441437, |
|
"learning_rate": 4.9908236636960126e-05, |
|
"loss": 0.4252, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 2.1023890784982937, |
|
"grad_norm": 0.18560980260372162, |
|
"learning_rate": 4.963328460531127e-05, |
|
"loss": 0.4112, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.1051194539249147, |
|
"grad_norm": 0.16012516617774963, |
|
"learning_rate": 4.935884182141377e-05, |
|
"loss": 0.4108, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 2.1078498293515358, |
|
"grad_norm": 0.1524171382188797, |
|
"learning_rate": 4.908491106010368e-05, |
|
"loss": 0.4183, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 2.1105802047781568, |
|
"grad_norm": 0.173212930560112, |
|
"learning_rate": 4.8811495091039926e-05, |
|
"loss": 0.4058, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 2.1133105802047782, |
|
"grad_norm": 0.1540430635213852, |
|
"learning_rate": 4.8538596678676406e-05, |
|
"loss": 0.4016, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 2.1160409556313993, |
|
"grad_norm": 0.1477975845336914, |
|
"learning_rate": 4.826621858223431e-05, |
|
"loss": 0.3975, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.1187713310580203, |
|
"grad_norm": 0.17888864874839783, |
|
"learning_rate": 4.79943635556739e-05, |
|
"loss": 0.4208, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 2.1215017064846418, |
|
"grad_norm": 0.15199348330497742, |
|
"learning_rate": 4.7723034347666696e-05, |
|
"loss": 0.4304, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 2.124232081911263, |
|
"grad_norm": 0.15406261384487152, |
|
"learning_rate": 4.745223370156797e-05, |
|
"loss": 0.439, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 2.126962457337884, |
|
"grad_norm": 0.1821894645690918, |
|
"learning_rate": 4.71819643553887e-05, |
|
"loss": 0.4298, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 2.1296928327645053, |
|
"grad_norm": 0.15180355310440063, |
|
"learning_rate": 4.691222904176791e-05, |
|
"loss": 0.4136, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.1324232081911263, |
|
"grad_norm": 0.17140239477157593, |
|
"learning_rate": 4.6643030487945326e-05, |
|
"loss": 0.4292, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 2.1351535836177473, |
|
"grad_norm": 0.15253609418869019, |
|
"learning_rate": 4.6374371415733496e-05, |
|
"loss": 0.4393, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 2.137883959044369, |
|
"grad_norm": 0.15631216764450073, |
|
"learning_rate": 4.6106254541490325e-05, |
|
"loss": 0.4162, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 2.14061433447099, |
|
"grad_norm": 0.18031221628189087, |
|
"learning_rate": 4.583868257609171e-05, |
|
"loss": 0.415, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 2.143344709897611, |
|
"grad_norm": 0.15720027685165405, |
|
"learning_rate": 4.55716582249042e-05, |
|
"loss": 0.4288, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 2.146075085324232, |
|
"grad_norm": 0.15618009865283966, |
|
"learning_rate": 4.530518418775733e-05, |
|
"loss": 0.4238, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 2.1488054607508533, |
|
"grad_norm": 0.16991287469863892, |
|
"learning_rate": 4.50392631589166e-05, |
|
"loss": 0.4321, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 2.1515358361774743, |
|
"grad_norm": 0.16138002276420593, |
|
"learning_rate": 4.477389782705628e-05, |
|
"loss": 0.4172, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 2.1542662116040954, |
|
"grad_norm": 0.15910767018795013, |
|
"learning_rate": 4.450909087523186e-05, |
|
"loss": 0.4149, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 2.156996587030717, |
|
"grad_norm": 0.15495507419109344, |
|
"learning_rate": 4.424484498085335e-05, |
|
"loss": 0.4351, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.159726962457338, |
|
"grad_norm": 0.15659591555595398, |
|
"learning_rate": 4.398116281565794e-05, |
|
"loss": 0.4254, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 2.162457337883959, |
|
"grad_norm": 0.16147974133491516, |
|
"learning_rate": 4.371804704568309e-05, |
|
"loss": 0.4265, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 2.1651877133105804, |
|
"grad_norm": 0.14304347336292267, |
|
"learning_rate": 4.345550033123954e-05, |
|
"loss": 0.4211, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 2.1679180887372014, |
|
"grad_norm": 0.15479592978954315, |
|
"learning_rate": 4.3193525326884435e-05, |
|
"loss": 0.4002, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 2.1706484641638224, |
|
"grad_norm": 0.1610931009054184, |
|
"learning_rate": 4.293212468139447e-05, |
|
"loss": 0.4156, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 2.173378839590444, |
|
"grad_norm": 0.16558706760406494, |
|
"learning_rate": 4.267130103773911e-05, |
|
"loss": 0.4285, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 2.176109215017065, |
|
"grad_norm": 0.16455373167991638, |
|
"learning_rate": 4.241105703305388e-05, |
|
"loss": 0.4058, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 2.178839590443686, |
|
"grad_norm": 0.15886934101581573, |
|
"learning_rate": 4.215139529861367e-05, |
|
"loss": 0.4311, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 2.181569965870307, |
|
"grad_norm": 0.15277110040187836, |
|
"learning_rate": 4.189231845980618e-05, |
|
"loss": 0.4176, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 2.1843003412969284, |
|
"grad_norm": 0.15656784176826477, |
|
"learning_rate": 4.163382913610533e-05, |
|
"loss": 0.4205, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.1870307167235494, |
|
"grad_norm": 0.1564100980758667, |
|
"learning_rate": 4.1375929941044786e-05, |
|
"loss": 0.4313, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 2.1897610921501705, |
|
"grad_norm": 0.14734816551208496, |
|
"learning_rate": 4.111862348219158e-05, |
|
"loss": 0.4258, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 2.192491467576792, |
|
"grad_norm": 0.15179724991321564, |
|
"learning_rate": 4.086191236111964e-05, |
|
"loss": 0.4372, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 2.195221843003413, |
|
"grad_norm": 0.16015468537807465, |
|
"learning_rate": 4.060579917338362e-05, |
|
"loss": 0.4104, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 2.197952218430034, |
|
"grad_norm": 0.1574854701757431, |
|
"learning_rate": 4.0350286508492554e-05, |
|
"loss": 0.4068, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 2.2006825938566554, |
|
"grad_norm": 0.14426739513874054, |
|
"learning_rate": 4.009537694988372e-05, |
|
"loss": 0.3904, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 2.2034129692832765, |
|
"grad_norm": 0.14949829876422882, |
|
"learning_rate": 3.9841073074896517e-05, |
|
"loss": 0.3934, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 2.2061433447098975, |
|
"grad_norm": 0.1610165387392044, |
|
"learning_rate": 3.958737745474638e-05, |
|
"loss": 0.4207, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 2.208873720136519, |
|
"grad_norm": 0.15804022550582886, |
|
"learning_rate": 3.933429265449882e-05, |
|
"loss": 0.3968, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 2.21160409556314, |
|
"grad_norm": 0.15507763624191284, |
|
"learning_rate": 3.9081821233043436e-05, |
|
"loss": 0.4322, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.214334470989761, |
|
"grad_norm": 0.16331470012664795, |
|
"learning_rate": 3.8829965743068174e-05, |
|
"loss": 0.4317, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 2.217064846416382, |
|
"grad_norm": 0.14913159608840942, |
|
"learning_rate": 3.857872873103322e-05, |
|
"loss": 0.4098, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 2.2197952218430035, |
|
"grad_norm": 0.15193897485733032, |
|
"learning_rate": 3.832811273714569e-05, |
|
"loss": 0.4319, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 2.2225255972696245, |
|
"grad_norm": 0.15656188130378723, |
|
"learning_rate": 3.807812029533362e-05, |
|
"loss": 0.3962, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 2.2252559726962455, |
|
"grad_norm": 0.1457897126674652, |
|
"learning_rate": 3.7828753933220295e-05, |
|
"loss": 0.4044, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 2.227986348122867, |
|
"grad_norm": 0.15471549332141876, |
|
"learning_rate": 3.758001617209906e-05, |
|
"loss": 0.4251, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 2.230716723549488, |
|
"grad_norm": 0.16151392459869385, |
|
"learning_rate": 3.733190952690753e-05, |
|
"loss": 0.4278, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 2.233447098976109, |
|
"grad_norm": 0.15231560170650482, |
|
"learning_rate": 3.708443650620206e-05, |
|
"loss": 0.4286, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 2.2361774744027305, |
|
"grad_norm": 0.13981011509895325, |
|
"learning_rate": 3.683759961213282e-05, |
|
"loss": 0.4127, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 2.2389078498293515, |
|
"grad_norm": 0.16484162211418152, |
|
"learning_rate": 3.6591401340418116e-05, |
|
"loss": 0.4399, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.2416382252559726, |
|
"grad_norm": 0.15228329598903656, |
|
"learning_rate": 3.634584418031915e-05, |
|
"loss": 0.4247, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 2.244368600682594, |
|
"grad_norm": 0.14780929684638977, |
|
"learning_rate": 3.6100930614615205e-05, |
|
"loss": 0.4324, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 2.247098976109215, |
|
"grad_norm": 0.1611994057893753, |
|
"learning_rate": 3.585666311957817e-05, |
|
"loss": 0.4263, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 2.249829351535836, |
|
"grad_norm": 0.16382110118865967, |
|
"learning_rate": 3.561304416494762e-05, |
|
"loss": 0.4332, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 2.252559726962457, |
|
"grad_norm": 0.15646643936634064, |
|
"learning_rate": 3.53700762139059e-05, |
|
"loss": 0.4132, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.2552901023890786, |
|
"grad_norm": 0.16182062029838562, |
|
"learning_rate": 3.512776172305331e-05, |
|
"loss": 0.4199, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 2.2580204778156996, |
|
"grad_norm": 0.15407824516296387, |
|
"learning_rate": 3.4886103142382945e-05, |
|
"loss": 0.4087, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 2.260750853242321, |
|
"grad_norm": 0.1593010425567627, |
|
"learning_rate": 3.46451029152562e-05, |
|
"loss": 0.416, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 2.263481228668942, |
|
"grad_norm": 0.15628038346767426, |
|
"learning_rate": 3.440476347837811e-05, |
|
"loss": 0.4296, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 2.266211604095563, |
|
"grad_norm": 0.1571022868156433, |
|
"learning_rate": 3.41650872617724e-05, |
|
"loss": 0.419, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.268941979522184, |
|
"grad_norm": 0.15472716093063354, |
|
"learning_rate": 3.392607668875718e-05, |
|
"loss": 0.4151, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 2.2716723549488056, |
|
"grad_norm": 0.1419110745191574, |
|
"learning_rate": 3.36877341759205e-05, |
|
"loss": 0.3991, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 2.2744027303754266, |
|
"grad_norm": 0.15272633731365204, |
|
"learning_rate": 3.345006213309557e-05, |
|
"loss": 0.3965, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 2.2771331058020476, |
|
"grad_norm": 0.15313783288002014, |
|
"learning_rate": 3.321306296333673e-05, |
|
"loss": 0.4022, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 2.279863481228669, |
|
"grad_norm": 0.14547322690486908, |
|
"learning_rate": 3.29767390628951e-05, |
|
"loss": 0.4127, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 2.28259385665529, |
|
"grad_norm": 0.15116067230701447, |
|
"learning_rate": 3.274109282119413e-05, |
|
"loss": 0.4086, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 2.285324232081911, |
|
"grad_norm": 0.1611739993095398, |
|
"learning_rate": 3.250612662080567e-05, |
|
"loss": 0.4261, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 2.288054607508532, |
|
"grad_norm": 0.1671733260154724, |
|
"learning_rate": 3.227184283742591e-05, |
|
"loss": 0.4244, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 2.2907849829351536, |
|
"grad_norm": 0.16232764720916748, |
|
"learning_rate": 3.2038243839851075e-05, |
|
"loss": 0.4118, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 2.2935153583617747, |
|
"grad_norm": 0.1595815271139145, |
|
"learning_rate": 3.180533198995379e-05, |
|
"loss": 0.4333, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.296245733788396, |
|
"grad_norm": 0.14766521751880646, |
|
"learning_rate": 3.1573109642659024e-05, |
|
"loss": 0.4066, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 2.298976109215017, |
|
"grad_norm": 0.16081617772579193, |
|
"learning_rate": 3.134157914592032e-05, |
|
"loss": 0.4032, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 2.301706484641638, |
|
"grad_norm": 0.15681862831115723, |
|
"learning_rate": 3.111074284069606e-05, |
|
"loss": 0.4256, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 2.304436860068259, |
|
"grad_norm": 0.15217512845993042, |
|
"learning_rate": 3.088060306092582e-05, |
|
"loss": 0.4142, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 2.3071672354948807, |
|
"grad_norm": 0.15541419386863708, |
|
"learning_rate": 3.065116213350671e-05, |
|
"loss": 0.4246, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 2.3098976109215017, |
|
"grad_norm": 0.16863110661506653, |
|
"learning_rate": 3.042242237826991e-05, |
|
"loss": 0.4167, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 2.3126279863481227, |
|
"grad_norm": 0.15719062089920044, |
|
"learning_rate": 3.0194386107957173e-05, |
|
"loss": 0.4272, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 2.315358361774744, |
|
"grad_norm": 0.14599116146564484, |
|
"learning_rate": 2.9967055628197472e-05, |
|
"loss": 0.3826, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 2.318088737201365, |
|
"grad_norm": 0.16562673449516296, |
|
"learning_rate": 2.974043323748367e-05, |
|
"loss": 0.4246, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 2.3208191126279862, |
|
"grad_norm": 0.16425776481628418, |
|
"learning_rate": 2.951452122714926e-05, |
|
"loss": 0.4182, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.3235494880546073, |
|
"grad_norm": 0.15654055774211884, |
|
"learning_rate": 2.9289321881345254e-05, |
|
"loss": 0.4349, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 2.3262798634812287, |
|
"grad_norm": 0.15405891835689545, |
|
"learning_rate": 2.9064837477017048e-05, |
|
"loss": 0.4045, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 2.3290102389078498, |
|
"grad_norm": 0.16419056057929993, |
|
"learning_rate": 2.88410702838814e-05, |
|
"loss": 0.4174, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 2.331740614334471, |
|
"grad_norm": 0.16045036911964417, |
|
"learning_rate": 2.861802256440348e-05, |
|
"loss": 0.4185, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 2.3344709897610922, |
|
"grad_norm": 0.15399502217769623, |
|
"learning_rate": 2.8395696573774032e-05, |
|
"loss": 0.4228, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 2.3372013651877133, |
|
"grad_norm": 0.1503557562828064, |
|
"learning_rate": 2.8174094559886534e-05, |
|
"loss": 0.4146, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 2.3399317406143343, |
|
"grad_norm": 0.16562052071094513, |
|
"learning_rate": 2.7953218763314458e-05, |
|
"loss": 0.4127, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 2.3426621160409558, |
|
"grad_norm": 0.15860068798065186, |
|
"learning_rate": 2.773307141728867e-05, |
|
"loss": 0.4221, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 2.345392491467577, |
|
"grad_norm": 0.16427016258239746, |
|
"learning_rate": 2.7513654747674788e-05, |
|
"loss": 0.4202, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 2.348122866894198, |
|
"grad_norm": 0.15462426841259003, |
|
"learning_rate": 2.729497097295075e-05, |
|
"loss": 0.3986, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.3508532423208193, |
|
"grad_norm": 0.150539368391037, |
|
"learning_rate": 2.7077022304184295e-05, |
|
"loss": 0.4112, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 2.3535836177474403, |
|
"grad_norm": 0.14942197501659393, |
|
"learning_rate": 2.685981094501069e-05, |
|
"loss": 0.4173, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 2.3563139931740613, |
|
"grad_norm": 0.15172016620635986, |
|
"learning_rate": 2.6643339091610377e-05, |
|
"loss": 0.4273, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 2.359044368600683, |
|
"grad_norm": 0.15123523771762848, |
|
"learning_rate": 2.6427608932686843e-05, |
|
"loss": 0.4104, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 2.361774744027304, |
|
"grad_norm": 0.15563499927520752, |
|
"learning_rate": 2.621262264944444e-05, |
|
"loss": 0.4215, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 2.364505119453925, |
|
"grad_norm": 0.15889500081539154, |
|
"learning_rate": 2.599838241556626e-05, |
|
"loss": 0.4058, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 2.3672354948805463, |
|
"grad_norm": 0.1514395922422409, |
|
"learning_rate": 2.5784890397192398e-05, |
|
"loss": 0.4058, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 2.3699658703071673, |
|
"grad_norm": 0.15626998245716095, |
|
"learning_rate": 2.5572148752897795e-05, |
|
"loss": 0.4092, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 2.3726962457337883, |
|
"grad_norm": 0.15478669106960297, |
|
"learning_rate": 2.5360159633670457e-05, |
|
"loss": 0.424, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 2.3754266211604094, |
|
"grad_norm": 0.14791764318943024, |
|
"learning_rate": 2.514892518288988e-05, |
|
"loss": 0.4294, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.378156996587031, |
|
"grad_norm": 0.15729525685310364, |
|
"learning_rate": 2.4938447536305243e-05, |
|
"loss": 0.4204, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 2.380887372013652, |
|
"grad_norm": 0.1549883335828781, |
|
"learning_rate": 2.472872882201368e-05, |
|
"loss": 0.4302, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 2.383617747440273, |
|
"grad_norm": 0.15608322620391846, |
|
"learning_rate": 2.451977116043911e-05, |
|
"loss": 0.4208, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 2.3863481228668944, |
|
"grad_norm": 0.15836332738399506, |
|
"learning_rate": 2.431157666431052e-05, |
|
"loss": 0.4141, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 2.3890784982935154, |
|
"grad_norm": 0.15775950253009796, |
|
"learning_rate": 2.410414743864059e-05, |
|
"loss": 0.4142, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.3918088737201364, |
|
"grad_norm": 0.1490509808063507, |
|
"learning_rate": 2.3897485580704682e-05, |
|
"loss": 0.4079, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 2.394539249146758, |
|
"grad_norm": 0.16001944243907928, |
|
"learning_rate": 2.3691593180019366e-05, |
|
"loss": 0.4276, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 2.397269624573379, |
|
"grad_norm": 0.14967067539691925, |
|
"learning_rate": 2.3486472318321307e-05, |
|
"loss": 0.4045, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.14551271498203278, |
|
"learning_rate": 2.3282125069546433e-05, |
|
"loss": 0.4169, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 2.4027303754266214, |
|
"grad_norm": 0.15170224010944366, |
|
"learning_rate": 2.3078553499808797e-05, |
|
"loss": 0.4166, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.4054607508532424, |
|
"grad_norm": 0.15381450951099396, |
|
"learning_rate": 2.2875759667379614e-05, |
|
"loss": 0.4046, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 2.4081911262798634, |
|
"grad_norm": 0.15133820474147797, |
|
"learning_rate": 2.267374562266662e-05, |
|
"loss": 0.4053, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 2.4109215017064844, |
|
"grad_norm": 0.15135996043682098, |
|
"learning_rate": 2.2472513408193384e-05, |
|
"loss": 0.4145, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 2.413651877133106, |
|
"grad_norm": 0.1551310420036316, |
|
"learning_rate": 2.227206505857834e-05, |
|
"loss": 0.4107, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 2.416382252559727, |
|
"grad_norm": 0.15202271938323975, |
|
"learning_rate": 2.207240260051453e-05, |
|
"loss": 0.409, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 2.419112627986348, |
|
"grad_norm": 0.15912394225597382, |
|
"learning_rate": 2.1873528052749092e-05, |
|
"loss": 0.4293, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 2.4218430034129694, |
|
"grad_norm": 0.164555624127388, |
|
"learning_rate": 2.167544342606256e-05, |
|
"loss": 0.4153, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 2.4245733788395905, |
|
"grad_norm": 0.1536960005760193, |
|
"learning_rate": 2.1478150723248857e-05, |
|
"loss": 0.4161, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 2.4273037542662115, |
|
"grad_norm": 0.1572561115026474, |
|
"learning_rate": 2.1281651939094992e-05, |
|
"loss": 0.3975, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 2.430034129692833, |
|
"grad_norm": 0.14872194826602936, |
|
"learning_rate": 2.1085949060360654e-05, |
|
"loss": 0.4028, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.432764505119454, |
|
"grad_norm": 0.15553632378578186, |
|
"learning_rate": 2.089104406575837e-05, |
|
"loss": 0.4113, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 2.435494880546075, |
|
"grad_norm": 0.15172426402568817, |
|
"learning_rate": 2.0696938925933506e-05, |
|
"loss": 0.407, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 2.4382252559726965, |
|
"grad_norm": 0.1533356010913849, |
|
"learning_rate": 2.0503635603444094e-05, |
|
"loss": 0.4225, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 2.4409556313993175, |
|
"grad_norm": 0.15870912373065948, |
|
"learning_rate": 2.0311136052741277e-05, |
|
"loss": 0.4176, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 2.4436860068259385, |
|
"grad_norm": 0.15616737306118011, |
|
"learning_rate": 2.0119442220149353e-05, |
|
"loss": 0.4158, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 2.4464163822525595, |
|
"grad_norm": 0.15833789110183716, |
|
"learning_rate": 1.9928556043846214e-05, |
|
"loss": 0.403, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 2.449146757679181, |
|
"grad_norm": 0.1554342359304428, |
|
"learning_rate": 1.9738479453843682e-05, |
|
"loss": 0.396, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 2.451877133105802, |
|
"grad_norm": 0.17515774071216583, |
|
"learning_rate": 1.9549214371968004e-05, |
|
"loss": 0.4096, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 2.454607508532423, |
|
"grad_norm": 0.15816594660282135, |
|
"learning_rate": 1.936076271184044e-05, |
|
"loss": 0.4232, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 2.4573378839590445, |
|
"grad_norm": 0.1528582125902176, |
|
"learning_rate": 1.9173126378857907e-05, |
|
"loss": 0.4145, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.4600682593856655, |
|
"grad_norm": 0.16006483137607574, |
|
"learning_rate": 1.898630727017371e-05, |
|
"loss": 0.4201, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 2.4627986348122866, |
|
"grad_norm": 0.15996922552585602, |
|
"learning_rate": 1.8800307274678364e-05, |
|
"loss": 0.4056, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 2.465529010238908, |
|
"grad_norm": 0.1555391103029251, |
|
"learning_rate": 1.861512827298051e-05, |
|
"loss": 0.4315, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 2.468259385665529, |
|
"grad_norm": 0.15501669049263, |
|
"learning_rate": 1.8430772137387853e-05, |
|
"loss": 0.4159, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 2.47098976109215, |
|
"grad_norm": 0.1523975431919098, |
|
"learning_rate": 1.8247240731888294e-05, |
|
"loss": 0.4004, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 2.4737201365187715, |
|
"grad_norm": 0.1715194135904312, |
|
"learning_rate": 1.806453591213103e-05, |
|
"loss": 0.4124, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 2.4764505119453926, |
|
"grad_norm": 0.15396980941295624, |
|
"learning_rate": 1.788265952540784e-05, |
|
"loss": 0.4094, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 2.4791808873720136, |
|
"grad_norm": 0.1634356677532196, |
|
"learning_rate": 1.7701613410634365e-05, |
|
"loss": 0.4257, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 2.4819112627986346, |
|
"grad_norm": 0.1548430621623993, |
|
"learning_rate": 1.752139939833154e-05, |
|
"loss": 0.3942, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 2.484641638225256, |
|
"grad_norm": 0.15945452451705933, |
|
"learning_rate": 1.734201931060706e-05, |
|
"loss": 0.3979, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.487372013651877, |
|
"grad_norm": 0.16635702550411224, |
|
"learning_rate": 1.7163474961137028e-05, |
|
"loss": 0.422, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 2.490102389078498, |
|
"grad_norm": 0.16245630383491516, |
|
"learning_rate": 1.6985768155147496e-05, |
|
"loss": 0.4126, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 2.4928327645051196, |
|
"grad_norm": 0.14662671089172363, |
|
"learning_rate": 1.6808900689396336e-05, |
|
"loss": 0.4062, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 2.4955631399317406, |
|
"grad_norm": 0.1555013507604599, |
|
"learning_rate": 1.663287435215498e-05, |
|
"loss": 0.4101, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 2.4982935153583616, |
|
"grad_norm": 0.14830157160758972, |
|
"learning_rate": 1.645769092319045e-05, |
|
"loss": 0.3843, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 2.5010238907849827, |
|
"grad_norm": 0.16135641932487488, |
|
"learning_rate": 1.6283352173747145e-05, |
|
"loss": 0.4229, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 2.503754266211604, |
|
"grad_norm": 0.15229038894176483, |
|
"learning_rate": 1.6109859866529255e-05, |
|
"loss": 0.4209, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 2.506484641638225, |
|
"grad_norm": 0.15836934745311737, |
|
"learning_rate": 1.5937215755682665e-05, |
|
"loss": 0.422, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 2.5092150170648466, |
|
"grad_norm": 0.1639019101858139, |
|
"learning_rate": 1.5765421586777284e-05, |
|
"loss": 0.4206, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 2.5119453924914676, |
|
"grad_norm": 0.15451960265636444, |
|
"learning_rate": 1.5594479096789537e-05, |
|
"loss": 0.411, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.5146757679180887, |
|
"grad_norm": 0.16513267159461975, |
|
"learning_rate": 1.5424390014084644e-05, |
|
"loss": 0.4324, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 2.5174061433447097, |
|
"grad_norm": 0.15432654321193695, |
|
"learning_rate": 1.5255156058399122e-05, |
|
"loss": 0.4074, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 2.520136518771331, |
|
"grad_norm": 0.16064870357513428, |
|
"learning_rate": 1.5086778940823543e-05, |
|
"loss": 0.417, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 2.522866894197952, |
|
"grad_norm": 0.16009055078029633, |
|
"learning_rate": 1.4919260363785215e-05, |
|
"loss": 0.4128, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 2.5255972696245736, |
|
"grad_norm": 0.1598517745733261, |
|
"learning_rate": 1.4752602021030792e-05, |
|
"loss": 0.4191, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.5283276450511947, |
|
"grad_norm": 0.15252196788787842, |
|
"learning_rate": 1.4586805597609331e-05, |
|
"loss": 0.4124, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 2.5310580204778157, |
|
"grad_norm": 0.1643335521221161, |
|
"learning_rate": 1.442187276985526e-05, |
|
"loss": 0.4207, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 2.5337883959044367, |
|
"grad_norm": 0.15445098280906677, |
|
"learning_rate": 1.4257805205371234e-05, |
|
"loss": 0.3993, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 2.536518771331058, |
|
"grad_norm": 0.15789660811424255, |
|
"learning_rate": 1.4094604563011472e-05, |
|
"loss": 0.4103, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 2.539249146757679, |
|
"grad_norm": 0.15002034604549408, |
|
"learning_rate": 1.3932272492864984e-05, |
|
"loss": 0.4042, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.5419795221843002, |
|
"grad_norm": 0.16790151596069336, |
|
"learning_rate": 1.3770810636238684e-05, |
|
"loss": 0.4245, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 2.5447098976109217, |
|
"grad_norm": 0.1551153063774109, |
|
"learning_rate": 1.3610220625641002e-05, |
|
"loss": 0.4145, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 2.5474402730375427, |
|
"grad_norm": 0.15363937616348267, |
|
"learning_rate": 1.3450504084765381e-05, |
|
"loss": 0.384, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 2.5501706484641637, |
|
"grad_norm": 0.15322524309158325, |
|
"learning_rate": 1.3291662628473633e-05, |
|
"loss": 0.4042, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 2.5529010238907848, |
|
"grad_norm": 0.1649988293647766, |
|
"learning_rate": 1.313369786277987e-05, |
|
"loss": 0.4236, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 2.5556313993174062, |
|
"grad_norm": 0.150667205452919, |
|
"learning_rate": 1.2976611384834148e-05, |
|
"loss": 0.3908, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 2.5583617747440273, |
|
"grad_norm": 0.16129009425640106, |
|
"learning_rate": 1.2820404782906315e-05, |
|
"loss": 0.4167, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 2.5610921501706487, |
|
"grad_norm": 0.16965742409229279, |
|
"learning_rate": 1.2665079636369969e-05, |
|
"loss": 0.4145, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 2.5638225255972698, |
|
"grad_norm": 0.14878158271312714, |
|
"learning_rate": 1.2510637515686496e-05, |
|
"loss": 0.4236, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 2.5665529010238908, |
|
"grad_norm": 0.14919213950634003, |
|
"learning_rate": 1.2357079982389197e-05, |
|
"loss": 0.396, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.569283276450512, |
|
"grad_norm": 0.1563798040151596, |
|
"learning_rate": 1.2204408589067462e-05, |
|
"loss": 0.3992, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 2.5720136518771333, |
|
"grad_norm": 0.15581347048282623, |
|
"learning_rate": 1.2052624879351104e-05, |
|
"loss": 0.4261, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 2.5747440273037543, |
|
"grad_norm": 0.15438248217105865, |
|
"learning_rate": 1.190173038789476e-05, |
|
"loss": 0.4013, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 2.5774744027303753, |
|
"grad_norm": 0.15199199318885803, |
|
"learning_rate": 1.1751726640362349e-05, |
|
"loss": 0.4089, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 2.580204778156997, |
|
"grad_norm": 0.1516939401626587, |
|
"learning_rate": 1.1602615153411667e-05, |
|
"loss": 0.4008, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 2.582935153583618, |
|
"grad_norm": 0.15474575757980347, |
|
"learning_rate": 1.1454397434679021e-05, |
|
"loss": 0.4115, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 2.585665529010239, |
|
"grad_norm": 0.15419447422027588, |
|
"learning_rate": 1.1307074982764022e-05, |
|
"loss": 0.4187, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 2.58839590443686, |
|
"grad_norm": 0.15413175523281097, |
|
"learning_rate": 1.116064928721442e-05, |
|
"loss": 0.4191, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 2.5911262798634813, |
|
"grad_norm": 0.1559099704027176, |
|
"learning_rate": 1.1015121828511032e-05, |
|
"loss": 0.4136, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 2.5938566552901023, |
|
"grad_norm": 0.16210560500621796, |
|
"learning_rate": 1.0870494078052796e-05, |
|
"loss": 0.4204, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.596587030716724, |
|
"grad_norm": 0.15410131216049194, |
|
"learning_rate": 1.0726767498141877e-05, |
|
"loss": 0.4098, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 2.599317406143345, |
|
"grad_norm": 0.14466793835163116, |
|
"learning_rate": 1.0583943541968856e-05, |
|
"loss": 0.3832, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 2.602047781569966, |
|
"grad_norm": 0.1512717753648758, |
|
"learning_rate": 1.044202365359811e-05, |
|
"loss": 0.4132, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 2.604778156996587, |
|
"grad_norm": 0.1530720293521881, |
|
"learning_rate": 1.0301009267953143e-05, |
|
"loss": 0.4165, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 2.6075085324232083, |
|
"grad_norm": 0.16781674325466156, |
|
"learning_rate": 1.0160901810802115e-05, |
|
"loss": 0.4203, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 2.6102389078498294, |
|
"grad_norm": 0.14876051247119904, |
|
"learning_rate": 1.0021702698743407e-05, |
|
"loss": 0.4168, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 2.6129692832764504, |
|
"grad_norm": 0.15021638572216034, |
|
"learning_rate": 9.883413339191294e-06, |
|
"loss": 0.4173, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 2.615699658703072, |
|
"grad_norm": 0.159826397895813, |
|
"learning_rate": 9.746035130361742e-06, |
|
"loss": 0.4279, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 2.618430034129693, |
|
"grad_norm": 0.156574085354805, |
|
"learning_rate": 9.609569461258262e-06, |
|
"loss": 0.4277, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 2.621160409556314, |
|
"grad_norm": 0.157151460647583, |
|
"learning_rate": 9.474017711657834e-06, |
|
"loss": 0.412, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.623890784982935, |
|
"grad_norm": 0.14887213706970215, |
|
"learning_rate": 9.339381252097e-06, |
|
"loss": 0.4012, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 2.6266211604095564, |
|
"grad_norm": 0.15450581908226013, |
|
"learning_rate": 9.205661443857994e-06, |
|
"loss": 0.4077, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 2.6293515358361774, |
|
"grad_norm": 0.14593878388404846, |
|
"learning_rate": 9.072859638954955e-06, |
|
"loss": 0.4064, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 2.632081911262799, |
|
"grad_norm": 0.156602144241333, |
|
"learning_rate": 8.940977180120247e-06, |
|
"loss": 0.4267, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 2.63481228668942, |
|
"grad_norm": 0.16739366948604584, |
|
"learning_rate": 8.810015400790994e-06, |
|
"loss": 0.4176, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 2.637542662116041, |
|
"grad_norm": 0.15772853791713715, |
|
"learning_rate": 8.67997562509546e-06, |
|
"loss": 0.424, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 2.640273037542662, |
|
"grad_norm": 0.15860068798065186, |
|
"learning_rate": 8.550859167839664e-06, |
|
"loss": 0.4167, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 2.6430034129692834, |
|
"grad_norm": 0.15044620633125305, |
|
"learning_rate": 8.422667334494249e-06, |
|
"loss": 0.3916, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 2.6457337883959045, |
|
"grad_norm": 0.150175541639328, |
|
"learning_rate": 8.295401421181125e-06, |
|
"loss": 0.3953, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 2.6484641638225255, |
|
"grad_norm": 0.14696063101291656, |
|
"learning_rate": 8.169062714660346e-06, |
|
"loss": 0.4115, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.651194539249147, |
|
"grad_norm": 0.1482568085193634, |
|
"learning_rate": 8.043652492317256e-06, |
|
"loss": 0.4017, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 2.653924914675768, |
|
"grad_norm": 0.15316608548164368, |
|
"learning_rate": 7.919172022149456e-06, |
|
"loss": 0.4176, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 2.656655290102389, |
|
"grad_norm": 0.15325787663459778, |
|
"learning_rate": 7.795622562753957e-06, |
|
"loss": 0.413, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 2.65938566552901, |
|
"grad_norm": 0.16979162395000458, |
|
"learning_rate": 7.673005363314579e-06, |
|
"loss": 0.4244, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 2.6621160409556315, |
|
"grad_norm": 0.16061224043369293, |
|
"learning_rate": 7.551321663589228e-06, |
|
"loss": 0.4082, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.6648464163822525, |
|
"grad_norm": 0.1511377990245819, |
|
"learning_rate": 7.430572693897342e-06, |
|
"loss": 0.4047, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 2.667576791808874, |
|
"grad_norm": 0.1549064815044403, |
|
"learning_rate": 7.310759675107515e-06, |
|
"loss": 0.4181, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 2.670307167235495, |
|
"grad_norm": 0.15855662524700165, |
|
"learning_rate": 7.191883818625189e-06, |
|
"loss": 0.4242, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 2.673037542662116, |
|
"grad_norm": 0.16046655178070068, |
|
"learning_rate": 7.073946326380243e-06, |
|
"loss": 0.4077, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 2.675767918088737, |
|
"grad_norm": 0.1561538577079773, |
|
"learning_rate": 6.956948390814977e-06, |
|
"loss": 0.4117, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.6784982935153585, |
|
"grad_norm": 0.16078175604343414, |
|
"learning_rate": 6.840891194872112e-06, |
|
"loss": 0.4342, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 2.6812286689419795, |
|
"grad_norm": 0.15352275967597961, |
|
"learning_rate": 6.725775911982601e-06, |
|
"loss": 0.402, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 2.6839590443686006, |
|
"grad_norm": 0.15391647815704346, |
|
"learning_rate": 6.6116037060539704e-06, |
|
"loss": 0.4095, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 2.686689419795222, |
|
"grad_norm": 0.15556836128234863, |
|
"learning_rate": 6.498375731458528e-06, |
|
"loss": 0.4063, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 2.689419795221843, |
|
"grad_norm": 0.15222905576229095, |
|
"learning_rate": 6.386093133021554e-06, |
|
"loss": 0.4139, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 2.692150170648464, |
|
"grad_norm": 0.15189246833324432, |
|
"learning_rate": 6.274757046009871e-06, |
|
"loss": 0.4195, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 2.694880546075085, |
|
"grad_norm": 0.15635477006435394, |
|
"learning_rate": 6.164368596120351e-06, |
|
"loss": 0.4137, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 2.6976109215017066, |
|
"grad_norm": 0.15748678147792816, |
|
"learning_rate": 6.054928899468426e-06, |
|
"loss": 0.396, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 2.7003412969283276, |
|
"grad_norm": 0.1585109978914261, |
|
"learning_rate": 5.946439062576903e-06, |
|
"loss": 0.4111, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 2.703071672354949, |
|
"grad_norm": 0.15509194135665894, |
|
"learning_rate": 5.83890018236476e-06, |
|
"loss": 0.4259, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.70580204778157, |
|
"grad_norm": 0.15443935990333557, |
|
"learning_rate": 5.732313346136031e-06, |
|
"loss": 0.4138, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 2.708532423208191, |
|
"grad_norm": 0.1619240939617157, |
|
"learning_rate": 5.626679631568832e-06, |
|
"loss": 0.4091, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 2.711262798634812, |
|
"grad_norm": 0.1597377061843872, |
|
"learning_rate": 5.522000106704439e-06, |
|
"loss": 0.4193, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 2.7139931740614336, |
|
"grad_norm": 0.15020039677619934, |
|
"learning_rate": 5.418275829936537e-06, |
|
"loss": 0.4216, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 2.7167235494880546, |
|
"grad_norm": 0.15186108648777008, |
|
"learning_rate": 5.315507850000456e-06, |
|
"loss": 0.4057, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 2.7194539249146756, |
|
"grad_norm": 0.1630185842514038, |
|
"learning_rate": 5.2136972059626314e-06, |
|
"loss": 0.4141, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 2.722184300341297, |
|
"grad_norm": 0.1610775589942932, |
|
"learning_rate": 5.112844927210048e-06, |
|
"loss": 0.4025, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 2.724914675767918, |
|
"grad_norm": 0.15820352733135223, |
|
"learning_rate": 5.012952033439844e-06, |
|
"loss": 0.4197, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 2.727645051194539, |
|
"grad_norm": 0.1567496657371521, |
|
"learning_rate": 4.914019534649039e-06, |
|
"loss": 0.4215, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 2.73037542662116, |
|
"grad_norm": 0.1540801227092743, |
|
"learning_rate": 4.816048431124265e-06, |
|
"loss": 0.4237, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.7331058020477816, |
|
"grad_norm": 0.15339985489845276, |
|
"learning_rate": 4.719039713431694e-06, |
|
"loss": 0.4127, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 2.7358361774744027, |
|
"grad_norm": 0.1545177400112152, |
|
"learning_rate": 4.622994362406996e-06, |
|
"loss": 0.424, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 2.738566552901024, |
|
"grad_norm": 0.15001171827316284, |
|
"learning_rate": 4.527913349145441e-06, |
|
"loss": 0.4199, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 2.741296928327645, |
|
"grad_norm": 0.15352268517017365, |
|
"learning_rate": 4.433797634992077e-06, |
|
"loss": 0.3991, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 2.744027303754266, |
|
"grad_norm": 0.1552933305501938, |
|
"learning_rate": 4.340648171531992e-06, |
|
"loss": 0.4173, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 2.746757679180887, |
|
"grad_norm": 0.1541508436203003, |
|
"learning_rate": 4.248465900580734e-06, |
|
"loss": 0.4231, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 2.7494880546075087, |
|
"grad_norm": 0.15616647899150848, |
|
"learning_rate": 4.1572517541747294e-06, |
|
"loss": 0.4295, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 2.7522184300341297, |
|
"grad_norm": 0.14823675155639648, |
|
"learning_rate": 4.0670066545619225e-06, |
|
"loss": 0.403, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 2.7549488054607507, |
|
"grad_norm": 0.15372464060783386, |
|
"learning_rate": 3.977731514192385e-06, |
|
"loss": 0.4184, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 2.757679180887372, |
|
"grad_norm": 0.15206997096538544, |
|
"learning_rate": 3.889427235709153e-06, |
|
"loss": 0.4119, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.760409556313993, |
|
"grad_norm": 0.15120883285999298, |
|
"learning_rate": 3.802094711939075e-06, |
|
"loss": 0.413, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 2.7631399317406142, |
|
"grad_norm": 0.16259510815143585, |
|
"learning_rate": 3.7157348258837652e-06, |
|
"loss": 0.4253, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 2.7658703071672353, |
|
"grad_norm": 0.15381862223148346, |
|
"learning_rate": 3.6303484507106966e-06, |
|
"loss": 0.4162, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 2.7686006825938567, |
|
"grad_norm": 0.15234719216823578, |
|
"learning_rate": 3.5459364497443694e-06, |
|
"loss": 0.3893, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 2.7713310580204777, |
|
"grad_norm": 0.1562722623348236, |
|
"learning_rate": 3.4624996764575977e-06, |
|
"loss": 0.3979, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 2.774061433447099, |
|
"grad_norm": 0.15057340264320374, |
|
"learning_rate": 3.3800389744628404e-06, |
|
"loss": 0.3861, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 2.7767918088737202, |
|
"grad_norm": 0.1512756198644638, |
|
"learning_rate": 3.298555177503726e-06, |
|
"loss": 0.4135, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 2.7795221843003413, |
|
"grad_norm": 0.1467510461807251, |
|
"learning_rate": 3.2180491094465415e-06, |
|
"loss": 0.4133, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 2.7822525597269623, |
|
"grad_norm": 0.1518513709306717, |
|
"learning_rate": 3.1385215842720027e-06, |
|
"loss": 0.4032, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 2.7849829351535837, |
|
"grad_norm": 0.15052153170108795, |
|
"learning_rate": 3.059973406066963e-06, |
|
"loss": 0.4157, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.7877133105802048, |
|
"grad_norm": 0.15297654271125793, |
|
"learning_rate": 2.9824053690162723e-06, |
|
"loss": 0.4153, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 2.790443686006826, |
|
"grad_norm": 0.15268519520759583, |
|
"learning_rate": 2.905818257394799e-06, |
|
"loss": 0.4083, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 2.7931740614334473, |
|
"grad_norm": 0.1531766653060913, |
|
"learning_rate": 2.8302128455594656e-06, |
|
"loss": 0.3943, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 2.7959044368600683, |
|
"grad_norm": 0.1546778380870819, |
|
"learning_rate": 2.7555898979413797e-06, |
|
"loss": 0.4091, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 2.7986348122866893, |
|
"grad_norm": 0.15699811279773712, |
|
"learning_rate": 2.6819501690382277e-06, |
|
"loss": 0.4249, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.8013651877133103, |
|
"grad_norm": 0.15149515867233276, |
|
"learning_rate": 2.609294403406537e-06, |
|
"loss": 0.4064, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 2.804095563139932, |
|
"grad_norm": 0.15525740385055542, |
|
"learning_rate": 2.537623335654127e-06, |
|
"loss": 0.4086, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 2.806825938566553, |
|
"grad_norm": 0.15066871047019958, |
|
"learning_rate": 2.4669376904328247e-06, |
|
"loss": 0.4046, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 2.8095563139931743, |
|
"grad_norm": 0.1503557562828064, |
|
"learning_rate": 2.397238182430994e-06, |
|
"loss": 0.4007, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 2.8122866894197953, |
|
"grad_norm": 0.15836714208126068, |
|
"learning_rate": 2.3285255163663532e-06, |
|
"loss": 0.4297, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.8150170648464163, |
|
"grad_norm": 0.1532403528690338, |
|
"learning_rate": 2.2608003869788786e-06, |
|
"loss": 0.4096, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 2.8177474402730374, |
|
"grad_norm": 0.1478443145751953, |
|
"learning_rate": 2.1940634790238e-06, |
|
"loss": 0.3819, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 2.820477815699659, |
|
"grad_norm": 0.1495964378118515, |
|
"learning_rate": 2.128315467264552e-06, |
|
"loss": 0.4086, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 2.82320819112628, |
|
"grad_norm": 0.15715493261814117, |
|
"learning_rate": 2.063557016466111e-06, |
|
"loss": 0.403, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 2.825938566552901, |
|
"grad_norm": 0.14644889533519745, |
|
"learning_rate": 1.999788781388201e-06, |
|
"loss": 0.405, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 2.8286689419795223, |
|
"grad_norm": 0.15347984433174133, |
|
"learning_rate": 1.9370114067785994e-06, |
|
"loss": 0.4198, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 2.8313993174061434, |
|
"grad_norm": 0.14934033155441284, |
|
"learning_rate": 1.8752255273667752e-06, |
|
"loss": 0.4078, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 2.8341296928327644, |
|
"grad_norm": 0.1513030081987381, |
|
"learning_rate": 1.8144317678573497e-06, |
|
"loss": 0.4165, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 2.8368600682593854, |
|
"grad_norm": 0.15821826457977295, |
|
"learning_rate": 1.754630742923813e-06, |
|
"loss": 0.4213, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 2.839590443686007, |
|
"grad_norm": 0.1506132036447525, |
|
"learning_rate": 1.6958230572023503e-06, |
|
"loss": 0.4058, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.842320819112628, |
|
"grad_norm": 0.15292277932167053, |
|
"learning_rate": 1.6380093052856483e-06, |
|
"loss": 0.4219, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 2.8450511945392494, |
|
"grad_norm": 0.15926344692707062, |
|
"learning_rate": 1.5811900717169538e-06, |
|
"loss": 0.4144, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 2.8477815699658704, |
|
"grad_norm": 0.15735220909118652, |
|
"learning_rate": 1.525365930984146e-06, |
|
"loss": 0.3986, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 2.8505119453924914, |
|
"grad_norm": 0.15544278919696808, |
|
"learning_rate": 1.4705374475138978e-06, |
|
"loss": 0.4151, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 2.8532423208191124, |
|
"grad_norm": 0.14867156744003296, |
|
"learning_rate": 1.416705175666e-06, |
|
"loss": 0.4039, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 2.855972696245734, |
|
"grad_norm": 0.15105663239955902, |
|
"learning_rate": 1.3638696597277679e-06, |
|
"loss": 0.4022, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 2.858703071672355, |
|
"grad_norm": 0.15061454474925995, |
|
"learning_rate": 1.3120314339084783e-06, |
|
"loss": 0.3928, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 2.861433447098976, |
|
"grad_norm": 0.15541335940361023, |
|
"learning_rate": 1.2611910223340407e-06, |
|
"loss": 0.4108, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 2.8641638225255974, |
|
"grad_norm": 0.15430454909801483, |
|
"learning_rate": 1.2113489390416566e-06, |
|
"loss": 0.4142, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 2.8668941979522184, |
|
"grad_norm": 0.1592140942811966, |
|
"learning_rate": 1.1625056879746133e-06, |
|
"loss": 0.4123, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.8696245733788395, |
|
"grad_norm": 0.15730910003185272, |
|
"learning_rate": 1.1146617629772315e-06, |
|
"loss": 0.4047, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 2.8723549488054605, |
|
"grad_norm": 0.15714263916015625, |
|
"learning_rate": 1.0678176477898372e-06, |
|
"loss": 0.4258, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 2.875085324232082, |
|
"grad_norm": 0.15376383066177368, |
|
"learning_rate": 1.0219738160438753e-06, |
|
"loss": 0.4098, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 2.877815699658703, |
|
"grad_norm": 0.15481893718242645, |
|
"learning_rate": 9.771307312571254e-07, |
|
"loss": 0.408, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 2.8805460750853245, |
|
"grad_norm": 0.15664780139923096, |
|
"learning_rate": 9.332888468290169e-07, |
|
"loss": 0.3983, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 2.8832764505119455, |
|
"grad_norm": 0.15640319883823395, |
|
"learning_rate": 8.90448606036054e-07, |
|
"loss": 0.4215, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.8860068259385665, |
|
"grad_norm": 0.15267756581306458, |
|
"learning_rate": 8.486104420272977e-07, |
|
"loss": 0.401, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 2.8887372013651875, |
|
"grad_norm": 0.15257929265499115, |
|
"learning_rate": 8.077747778200473e-07, |
|
"loss": 0.4145, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 2.891467576791809, |
|
"grad_norm": 0.1439686268568039, |
|
"learning_rate": 7.679420262954984e-07, |
|
"loss": 0.3806, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 2.89419795221843, |
|
"grad_norm": 0.15835194289684296, |
|
"learning_rate": 7.291125901946027e-07, |
|
"loss": 0.4156, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.896928327645051, |
|
"grad_norm": 0.15433841943740845, |
|
"learning_rate": 6.912868621140045e-07, |
|
"loss": 0.4198, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 2.8996587030716725, |
|
"grad_norm": 0.15369294583797455, |
|
"learning_rate": 6.544652245020433e-07, |
|
"loss": 0.4086, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 2.9023890784982935, |
|
"grad_norm": 0.16046328842639923, |
|
"learning_rate": 6.18648049654913e-07, |
|
"loss": 0.4046, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 2.9051194539249146, |
|
"grad_norm": 0.15750819444656372, |
|
"learning_rate": 5.838356997128869e-07, |
|
"loss": 0.4245, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 2.9078498293515356, |
|
"grad_norm": 0.16013328731060028, |
|
"learning_rate": 5.500285266566319e-07, |
|
"loss": 0.4128, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 2.910580204778157, |
|
"grad_norm": 0.15908414125442505, |
|
"learning_rate": 5.172268723036999e-07, |
|
"loss": 0.4256, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 2.913310580204778, |
|
"grad_norm": 0.15130603313446045, |
|
"learning_rate": 4.854310683050312e-07, |
|
"loss": 0.4253, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 2.9160409556313995, |
|
"grad_norm": 0.1521066129207611, |
|
"learning_rate": 4.546414361416229e-07, |
|
"loss": 0.4131, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 2.9187713310580206, |
|
"grad_norm": 0.15544620156288147, |
|
"learning_rate": 4.2485828712126583e-07, |
|
"loss": 0.4088, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 2.9215017064846416, |
|
"grad_norm": 0.1541679948568344, |
|
"learning_rate": 3.96081922375402e-07, |
|
"loss": 0.4083, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.9242320819112626, |
|
"grad_norm": 0.15230417251586914, |
|
"learning_rate": 3.6831263285608266e-07, |
|
"loss": 0.4067, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 2.926962457337884, |
|
"grad_norm": 0.15117131173610687, |
|
"learning_rate": 3.415506993330153e-07, |
|
"loss": 0.4138, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 2.929692832764505, |
|
"grad_norm": 0.1522316336631775, |
|
"learning_rate": 3.1579639239074365e-07, |
|
"loss": 0.4052, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 2.932423208191126, |
|
"grad_norm": 0.1582721471786499, |
|
"learning_rate": 2.9104997242590527e-07, |
|
"loss": 0.4056, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 2.9351535836177476, |
|
"grad_norm": 0.1515754610300064, |
|
"learning_rate": 2.673116896445671e-07, |
|
"loss": 0.4054, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.9378839590443686, |
|
"grad_norm": 0.15097399055957794, |
|
"learning_rate": 2.4458178405974975e-07, |
|
"loss": 0.4036, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 2.9406143344709896, |
|
"grad_norm": 0.1505846381187439, |
|
"learning_rate": 2.2286048548897376e-07, |
|
"loss": 0.4269, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 2.9433447098976107, |
|
"grad_norm": 0.14722640812397003, |
|
"learning_rate": 2.0214801355192824e-07, |
|
"loss": 0.3938, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 2.946075085324232, |
|
"grad_norm": 0.15851718187332153, |
|
"learning_rate": 1.824445776682504e-07, |
|
"loss": 0.4163, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 2.948805460750853, |
|
"grad_norm": 0.1539052575826645, |
|
"learning_rate": 1.6375037705543826e-07, |
|
"loss": 0.4119, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.9515358361774746, |
|
"grad_norm": 0.15709060430526733, |
|
"learning_rate": 1.4606560072679687e-07, |
|
"loss": 0.4125, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 2.9542662116040956, |
|
"grad_norm": 0.14519765973091125, |
|
"learning_rate": 1.2939042748955077e-07, |
|
"loss": 0.399, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 2.9569965870307167, |
|
"grad_norm": 0.1516365110874176, |
|
"learning_rate": 1.1372502594303446e-07, |
|
"loss": 0.4181, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 2.9597269624573377, |
|
"grad_norm": 0.15326349437236786, |
|
"learning_rate": 9.906955447697153e-08, |
|
"loss": 0.4174, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 2.962457337883959, |
|
"grad_norm": 0.15291821956634521, |
|
"learning_rate": 8.542416126989805e-08, |
|
"loss": 0.4166, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 2.96518771331058, |
|
"grad_norm": 0.14680640399456024, |
|
"learning_rate": 7.27889842876417e-08, |
|
"loss": 0.4111, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 2.967918088737201, |
|
"grad_norm": 0.15194863080978394, |
|
"learning_rate": 6.116415128194497e-08, |
|
"loss": 0.4054, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 2.9706484641638227, |
|
"grad_norm": 0.160339817404747, |
|
"learning_rate": 5.054977978916631e-08, |
|
"loss": 0.4133, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 2.9733788395904437, |
|
"grad_norm": 0.15164430439472198, |
|
"learning_rate": 4.094597712908099e-08, |
|
"loss": 0.4134, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 2.9761092150170647, |
|
"grad_norm": 0.14412933588027954, |
|
"learning_rate": 3.2352840403804264e-08, |
|
"loss": 0.3914, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.9788395904436857, |
|
"grad_norm": 0.15993493795394897, |
|
"learning_rate": 2.477045649681431e-08, |
|
"loss": 0.4335, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 2.981569965870307, |
|
"grad_norm": 0.1526769995689392, |
|
"learning_rate": 1.81989020720974e-08, |
|
"loss": 0.4129, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 2.9843003412969282, |
|
"grad_norm": 0.14546047151088715, |
|
"learning_rate": 1.2638243573293018e-08, |
|
"loss": 0.3924, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 2.9870307167235497, |
|
"grad_norm": 0.150767520070076, |
|
"learning_rate": 8.088537223116532e-09, |
|
"loss": 0.3982, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 2.9897610921501707, |
|
"grad_norm": 0.15718306601047516, |
|
"learning_rate": 4.549829022748586e-09, |
|
"loss": 0.418, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 2.9924914675767917, |
|
"grad_norm": 0.14539772272109985, |
|
"learning_rate": 2.0221547513243897e-09, |
|
"loss": 0.4034, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 2.9952218430034128, |
|
"grad_norm": 0.15158307552337646, |
|
"learning_rate": 5.055399656894721e-10, |
|
"loss": 0.4038, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 2.9979522184300342, |
|
"grad_norm": 0.148399218916893, |
|
"learning_rate": 0.0, |
|
"loss": 0.4121, |
|
"step": 1098 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1098, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.529684799263867e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|