|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"global_step": 468, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2e-06, |
|
"loss": 2.4992, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4e-06, |
|
"loss": 2.5133, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6e-06, |
|
"loss": 1.9138, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8e-06, |
|
"loss": 1.3926, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.999999999999999e-06, |
|
"loss": 1.04, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.839, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.4e-05, |
|
"loss": 0.7951, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.6e-05, |
|
"loss": 0.776, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.7137, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 0.6901, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.2e-05, |
|
"loss": 0.6603, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.6495, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.6096, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.8e-05, |
|
"loss": 0.6309, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5983, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.999963928611156e-05, |
|
"loss": 0.6011, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9998557161794857e-05, |
|
"loss": 0.5982, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9996753679094844e-05, |
|
"loss": 0.5863, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9994228924750363e-05, |
|
"loss": 0.5939, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9990983020189943e-05, |
|
"loss": 0.5674, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9987016121525965e-05, |
|
"loss": 0.574, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9982328419547154e-05, |
|
"loss": 0.5602, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.99769201397094e-05, |
|
"loss": 0.5723, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9970791542124925e-05, |
|
"loss": 0.5662, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9963942921549767e-05, |
|
"loss": 0.5683, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.99563746073696e-05, |
|
"loss": 0.5539, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9948086963583895e-05, |
|
"loss": 0.5468, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9939080388788412e-05, |
|
"loss": 0.555, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9929355316156036e-05, |
|
"loss": 0.5614, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9918912213415933e-05, |
|
"loss": 0.5401, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9907751582831066e-05, |
|
"loss": 0.5571, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9895873961174027e-05, |
|
"loss": 0.5562, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9883279919701227e-05, |
|
"loss": 0.5495, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9869970064125424e-05, |
|
"loss": 0.5486, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9855945034586584e-05, |
|
"loss": 0.5411, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9841205505621104e-05, |
|
"loss": 0.5376, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9825752186129355e-05, |
|
"loss": 0.5393, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.98095858193416e-05, |
|
"loss": 0.532, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.979270718278224e-05, |
|
"loss": 0.5242, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.977511708823242e-05, |
|
"loss": 0.5452, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9756816381691003e-05, |
|
"loss": 0.5531, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9737805943333857e-05, |
|
"loss": 0.537, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.971808668747153e-05, |
|
"loss": 0.5144, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9697659562505286e-05, |
|
"loss": 0.5443, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9676525550881482e-05, |
|
"loss": 0.5299, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9654685669044313e-05, |
|
"loss": 0.5374, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.963214096738695e-05, |
|
"loss": 0.5354, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.960889253020099e-05, |
|
"loss": 0.5374, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9584941475624315e-05, |
|
"loss": 0.5297, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9560288955587334e-05, |
|
"loss": 0.5401, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9534936155757568e-05, |
|
"loss": 0.5427, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9508884295482606e-05, |
|
"loss": 0.5443, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.94821346277315e-05, |
|
"loss": 0.5414, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9454688439034475e-05, |
|
"loss": 0.5168, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9426547049421047e-05, |
|
"loss": 0.5337, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9397711812356565e-05, |
|
"loss": 0.5168, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.936818411467709e-05, |
|
"loss": 0.5147, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9337965376522717e-05, |
|
"loss": 0.5343, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.9307057051269242e-05, |
|
"loss": 0.5167, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.9275460625458298e-05, |
|
"loss": 0.521, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.9243177618725826e-05, |
|
"loss": 0.5258, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.921020958372902e-05, |
|
"loss": 0.5284, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.9176558106071615e-05, |
|
"loss": 0.5291, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.914222480422767e-05, |
|
"loss": 0.5145, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.9107211329463685e-05, |
|
"loss": 0.5349, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.9071519365759214e-05, |
|
"loss": 0.5243, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.9035150629725858e-05, |
|
"loss": 0.5288, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8998106870524714e-05, |
|
"loss": 0.5376, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.896038986978223e-05, |
|
"loss": 0.5104, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8922001441504544e-05, |
|
"loss": 0.5206, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.888294343199022e-05, |
|
"loss": 0.5061, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8843217719741458e-05, |
|
"loss": 0.5431, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8802826215373742e-05, |
|
"loss": 0.5365, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.876177086152395e-05, |
|
"loss": 0.5281, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8720053632756923e-05, |
|
"loss": 0.5243, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.867767653547051e-05, |
|
"loss": 0.5075, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8634641607799046e-05, |
|
"loss": 0.5178, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.859095091951534e-05, |
|
"loss": 0.5498, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8546606571931142e-05, |
|
"loss": 0.5254, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8501610697796044e-05, |
|
"loss": 0.5181, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8455965461194963e-05, |
|
"loss": 0.5138, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8409673057443985e-05, |
|
"loss": 0.5192, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.8362735712984848e-05, |
|
"loss": 0.5205, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.8315155685277816e-05, |
|
"loss": 0.5093, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.8266935262693138e-05, |
|
"loss": 0.5084, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.821807676440096e-05, |
|
"loss": 0.5074, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.8168582540259803e-05, |
|
"loss": 0.5122, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.811845497070354e-05, |
|
"loss": 0.5265, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.806769646662691e-05, |
|
"loss": 0.523, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.801630946926956e-05, |
|
"loss": 0.5117, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.7964296450098646e-05, |
|
"loss": 0.5034, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.7911659910689947e-05, |
|
"loss": 0.5263, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.7858402382607577e-05, |
|
"loss": 0.5161, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.78045264272822e-05, |
|
"loss": 0.484, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.7750034635887874e-05, |
|
"loss": 0.4986, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.769492962921738e-05, |
|
"loss": 0.5019, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.7639214057556227e-05, |
|
"loss": 0.5266, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.758289060055514e-05, |
|
"loss": 0.5144, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.7525961967101216e-05, |
|
"loss": 0.5084, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.7468430895187616e-05, |
|
"loss": 0.5138, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.741030015178189e-05, |
|
"loss": 0.5159, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.7351572532692916e-05, |
|
"loss": 0.5136, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.7292250862436397e-05, |
|
"loss": 0.5048, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.7232337994099044e-05, |
|
"loss": 0.5153, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.7171836809201357e-05, |
|
"loss": 0.5041, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.711075021755902e-05, |
|
"loss": 0.5017, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.704908115714297e-05, |
|
"loss": 0.5295, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.6986832593938088e-05, |
|
"loss": 0.5036, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.6924007521800533e-05, |
|
"loss": 0.5113, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.686060896231379e-05, |
|
"loss": 0.5024, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.6796639964643306e-05, |
|
"loss": 0.5115, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.6732103605389876e-05, |
|
"loss": 0.5231, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.6667002988441638e-05, |
|
"loss": 0.5252, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.660134124482482e-05, |
|
"loss": 0.5157, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.6535121532553135e-05, |
|
"loss": 0.5136, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.6468347036475902e-05, |
|
"loss": 0.5108, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.6401020968124874e-05, |
|
"loss": 0.5173, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.6333146565559775e-05, |
|
"loss": 0.5163, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.6264727093212554e-05, |
|
"loss": 0.5105, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.6195765841730404e-05, |
|
"loss": 0.5158, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.6126266127817483e-05, |
|
"loss": 0.5165, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.6056231294075393e-05, |
|
"loss": 0.5177, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.5985664708842438e-05, |
|
"loss": 0.493, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.5914569766031586e-05, |
|
"loss": 0.5119, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.584294988496728e-05, |
|
"loss": 0.5003, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.5770808510220957e-05, |
|
"loss": 0.5101, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.569814911144539e-05, |
|
"loss": 0.5188, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.5624975183207813e-05, |
|
"loss": 0.5102, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.5551290244821856e-05, |
|
"loss": 0.5066, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.547709784017826e-05, |
|
"loss": 0.4941, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.5402401537574476e-05, |
|
"loss": 0.4988, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.5327204929543e-05, |
|
"loss": 0.5019, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.5251511632678613e-05, |
|
"loss": 0.513, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.5175325287464444e-05, |
|
"loss": 0.5045, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.5098649558096864e-05, |
|
"loss": 0.5137, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.5021488132309282e-05, |
|
"loss": 0.5149, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.4943844721194745e-05, |
|
"loss": 0.5009, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.486572305902749e-05, |
|
"loss": 0.508, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.4787126903083323e-05, |
|
"loss": 0.5011, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.4708060033458908e-05, |
|
"loss": 0.4966, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.4628526252889985e-05, |
|
"loss": 0.498, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.454852938656845e-05, |
|
"loss": 0.5132, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.4468073281958393e-05, |
|
"loss": 0.4918, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.438716180861106e-05, |
|
"loss": 0.5032, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.4305798857978756e-05, |
|
"loss": 0.5182, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.4223988343227638e-05, |
|
"loss": 0.5097, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.4141734199049564e-05, |
|
"loss": 0.5133, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.405904038147282e-05, |
|
"loss": 0.4896, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.397591086767188e-05, |
|
"loss": 0.5178, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.3892349655776095e-05, |
|
"loss": 0.495, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.3808360764677416e-05, |
|
"loss": 0.4903, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.3723948233837116e-05, |
|
"loss": 0.5125, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.363911612309149e-05, |
|
"loss": 0.4956, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.3553868512456604e-05, |
|
"loss": 0.5071, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.346820950193208e-05, |
|
"loss": 0.5151, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.3382143211303894e-05, |
|
"loss": 0.5136, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.3295673779946207e-05, |
|
"loss": 0.5037, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.3208805366622342e-05, |
|
"loss": 0.5074, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.3121542149284712e-05, |
|
"loss": 0.5107, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.303388832487391e-05, |
|
"loss": 0.4903, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.294584810911686e-05, |
|
"loss": 0.4975, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.2857425736324024e-05, |
|
"loss": 0.4947, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.276862545918579e-05, |
|
"loss": 0.4999, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.267945154856793e-05, |
|
"loss": 0.5, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.258990829330619e-05, |
|
"loss": 0.4934, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.511, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.2409730992805378e-05, |
|
"loss": 0.5291, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.2319105613226925e-05, |
|
"loss": 0.5124, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.2228128219909057e-05, |
|
"loss": 0.5061, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.2136803188426344e-05, |
|
"loss": 0.509, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.204513491107309e-05, |
|
"loss": 0.4965, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.1953127796652057e-05, |
|
"loss": 0.4923, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.1860786270262444e-05, |
|
"loss": 0.4864, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.1768114773087063e-05, |
|
"loss": 0.496, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.167511776217872e-05, |
|
"loss": 0.5027, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.158179971024588e-05, |
|
"loss": 0.4988, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.1488165105437516e-05, |
|
"loss": 0.4905, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.139421845112729e-05, |
|
"loss": 0.512, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.1299964265696923e-05, |
|
"loss": 0.4978, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.1205407082318925e-05, |
|
"loss": 0.4977, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.111055144873852e-05, |
|
"loss": 0.4985, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.1015401927054977e-05, |
|
"loss": 0.5058, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.0919963093502146e-05, |
|
"loss": 0.4959, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.0824239538228404e-05, |
|
"loss": 0.5018, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.0728235865075865e-05, |
|
"loss": 0.4961, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.0631956691358952e-05, |
|
"loss": 0.4984, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.053540664764235e-05, |
|
"loss": 0.4878, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.0438590377518292e-05, |
|
"loss": 0.5144, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.0341512537383202e-05, |
|
"loss": 0.4977, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.024417779621379e-05, |
|
"loss": 0.4948, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.0146590835342436e-05, |
|
"loss": 0.4942, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.0048756348232097e-05, |
|
"loss": 0.4892, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9950679040250536e-05, |
|
"loss": 0.4957, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9852363628444042e-05, |
|
"loss": 0.5053, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9753814841310544e-05, |
|
"loss": 0.4974, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9655037418572202e-05, |
|
"loss": 0.4973, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.955603611094745e-05, |
|
"loss": 0.4996, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9456815679922512e-05, |
|
"loss": 0.5048, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9357380897522376e-05, |
|
"loss": 0.5041, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.925773654608132e-05, |
|
"loss": 0.5117, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.915788741801286e-05, |
|
"loss": 0.4922, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.9057838315579305e-05, |
|
"loss": 0.4847, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8957594050660752e-05, |
|
"loss": 0.4898, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.8857159444523688e-05, |
|
"loss": 0.4801, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.8756539327589087e-05, |
|
"loss": 0.4944, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.8655738539200092e-05, |
|
"loss": 0.4923, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.8554761927389283e-05, |
|
"loss": 0.4856, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.8453614348645504e-05, |
|
"loss": 0.5041, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.8352300667680277e-05, |
|
"loss": 0.4845, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.8250825757193848e-05, |
|
"loss": 0.4947, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.8149194497640817e-05, |
|
"loss": 0.5005, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.8047411776995424e-05, |
|
"loss": 0.4906, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.7945482490516465e-05, |
|
"loss": 0.5018, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.784341154051184e-05, |
|
"loss": 0.4992, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.7741203836102794e-05, |
|
"loss": 0.4932, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.763886429298778e-05, |
|
"loss": 0.4859, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.7536397833206082e-05, |
|
"loss": 0.4872, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.743380938490104e-05, |
|
"loss": 0.4988, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.7331103882083075e-05, |
|
"loss": 0.4847, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.7228286264392357e-05, |
|
"loss": 0.4849, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.7125361476861237e-05, |
|
"loss": 0.4913, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.7022334469676433e-05, |
|
"loss": 0.4875, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.691921019794093e-05, |
|
"loss": 0.4859, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.6815993621435656e-05, |
|
"loss": 0.4922, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.6712689704380978e-05, |
|
"loss": 0.4849, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.6609303415197904e-05, |
|
"loss": 0.4892, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.6505839726269153e-05, |
|
"loss": 0.4749, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.64023036137e-05, |
|
"loss": 0.4793, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.6298700057078945e-05, |
|
"loss": 0.4966, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.619503403923823e-05, |
|
"loss": 0.5009, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.6091310546014162e-05, |
|
"loss": 0.4898, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.598753456600735e-05, |
|
"loss": 0.4885, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.5883711090342767e-05, |
|
"loss": 0.4881, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.5779845112429706e-05, |
|
"loss": 0.5056, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.567594162772159e-05, |
|
"loss": 0.4927, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.5572005633475758e-05, |
|
"loss": 0.4958, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.5468042128513085e-05, |
|
"loss": 0.5028, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.53640561129776e-05, |
|
"loss": 0.5009, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.5260052588095966e-05, |
|
"loss": 0.4787, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.5156036555936963e-05, |
|
"loss": 0.4865, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.5052013019170917e-05, |
|
"loss": 0.4838, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.4947986980829084e-05, |
|
"loss": 0.4934, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.484396344406304e-05, |
|
"loss": 0.4955, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.4739947411904036e-05, |
|
"loss": 0.4953, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.4635943887022402e-05, |
|
"loss": 0.4914, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.453195787148691e-05, |
|
"loss": 0.4856, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.4427994366524248e-05, |
|
"loss": 0.498, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.4324058372278415e-05, |
|
"loss": 0.4895, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.4220154887570298e-05, |
|
"loss": 0.4873, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.4116288909657232e-05, |
|
"loss": 0.4877, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.4012465433992651e-05, |
|
"loss": 0.4962, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.3908689453985844e-05, |
|
"loss": 0.5092, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.3804965960761774e-05, |
|
"loss": 0.4829, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.3701299942921052e-05, |
|
"loss": 0.4868, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.3597696386299997e-05, |
|
"loss": 0.4912, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.3494160273730844e-05, |
|
"loss": 0.4825, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.33906965848021e-05, |
|
"loss": 0.5019, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.3287310295619027e-05, |
|
"loss": 0.4891, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.3184006378564348e-05, |
|
"loss": 0.4858, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.3080789802059076e-05, |
|
"loss": 0.4808, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.2977665530323568e-05, |
|
"loss": 0.4965, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.2874638523138764e-05, |
|
"loss": 0.4884, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.2771713735607647e-05, |
|
"loss": 0.4935, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.2668896117916928e-05, |
|
"loss": 0.4744, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.256619061509896e-05, |
|
"loss": 0.5041, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.2463602166793919e-05, |
|
"loss": 0.4933, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.2361135707012223e-05, |
|
"loss": 0.4844, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.2258796163897212e-05, |
|
"loss": 0.4839, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.215658845948816e-05, |
|
"loss": 0.5059, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.2054517509483537e-05, |
|
"loss": 0.4756, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.195258822300458e-05, |
|
"loss": 0.4766, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.1850805502359189e-05, |
|
"loss": 0.5047, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.1749174242806153e-05, |
|
"loss": 0.4672, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.164769933231972e-05, |
|
"loss": 0.4784, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.1546385651354495e-05, |
|
"loss": 0.5188, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.144523807261072e-05, |
|
"loss": 0.4915, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.1344261460799914e-05, |
|
"loss": 0.4741, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.1243460672410919e-05, |
|
"loss": 0.5056, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.1142840555476313e-05, |
|
"loss": 0.4824, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.1042405949339247e-05, |
|
"loss": 0.4941, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.0942161684420697e-05, |
|
"loss": 0.4867, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.0842112581987143e-05, |
|
"loss": 0.4774, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.0742263453918684e-05, |
|
"loss": 0.4695, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.0642619102477623e-05, |
|
"loss": 0.4899, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.054318432007749e-05, |
|
"loss": 0.4964, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.0443963889052553e-05, |
|
"loss": 0.4747, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.0344962581427802e-05, |
|
"loss": 0.4979, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.024618515868946e-05, |
|
"loss": 0.487, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.0147636371555964e-05, |
|
"loss": 0.4785, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.0049320959749467e-05, |
|
"loss": 0.4875, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.951243651767909e-06, |
|
"loss": 0.5079, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.853409164657566e-06, |
|
"loss": 0.4956, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.755822203786213e-06, |
|
"loss": 0.4936, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.658487462616795e-06, |
|
"loss": 0.4805, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.56140962248171e-06, |
|
"loss": 0.4746, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.464593352357654e-06, |
|
"loss": 0.4797, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.368043308641054e-06, |
|
"loss": 0.4793, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.27176413492414e-06, |
|
"loss": 0.4775, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.175760461771597e-06, |
|
"loss": 0.4923, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.080036906497855e-06, |
|
"loss": 0.4719, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 8.984598072945029e-06, |
|
"loss": 0.478, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 8.889448551261481e-06, |
|
"loss": 0.4788, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.794592917681079e-06, |
|
"loss": 0.4825, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.700035734303074e-06, |
|
"loss": 0.491, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.605781548872711e-06, |
|
"loss": 0.5009, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.511834894562488e-06, |
|
"loss": 0.4684, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.418200289754128e-06, |
|
"loss": 0.4667, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.324882237821284e-06, |
|
"loss": 0.4856, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.231885226912944e-06, |
|
"loss": 0.4908, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.139213729737562e-06, |
|
"loss": 0.4947, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.046872203347943e-06, |
|
"loss": 0.4788, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.95486508892691e-06, |
|
"loss": 0.4785, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.863196811573651e-06, |
|
"loss": 0.4669, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.771871780090942e-06, |
|
"loss": 0.4988, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.680894386773074e-06, |
|
"loss": 0.4925, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.59026900719463e-06, |
|
"loss": 0.4828, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.500000000000004e-06, |
|
"loss": 0.4891, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.410091706693814e-06, |
|
"loss": 0.4688, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.320548451432072e-06, |
|
"loss": 0.4708, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.231374540814215e-06, |
|
"loss": 0.4664, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.1425742636759835e-06, |
|
"loss": 0.4917, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.054151890883147e-06, |
|
"loss": 0.485, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.96611167512609e-06, |
|
"loss": 0.4848, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.878457850715293e-06, |
|
"loss": 0.4797, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.791194633377658e-06, |
|
"loss": 0.4794, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.704326220053796e-06, |
|
"loss": 0.4867, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.617856788696111e-06, |
|
"loss": 0.5003, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.5317904980679176e-06, |
|
"loss": 0.4704, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.446131487543397e-06, |
|
"loss": 0.4849, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.360883876908513e-06, |
|
"loss": 0.4706, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 6.276051766162887e-06, |
|
"loss": 0.4791, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 6.191639235322586e-06, |
|
"loss": 0.485, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 6.107650344223909e-06, |
|
"loss": 0.5052, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 6.024089132328122e-06, |
|
"loss": 0.4835, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.940959618527182e-06, |
|
"loss": 0.4718, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.8582658009504395e-06, |
|
"loss": 0.4741, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.776011656772364e-06, |
|
"loss": 0.4605, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.694201142021245e-06, |
|
"loss": 0.4758, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.612838191388936e-06, |
|
"loss": 0.494, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.531926718041609e-06, |
|
"loss": 0.4833, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.451470613431554e-06, |
|
"loss": 0.4759, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.371473747110016e-06, |
|
"loss": 0.4713, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.291939966541091e-06, |
|
"loss": 0.4675, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.212873096916677e-06, |
|
"loss": 0.4797, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.134276940972514e-06, |
|
"loss": 0.4722, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.05615527880526e-06, |
|
"loss": 0.4626, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.978511867690725e-06, |
|
"loss": 0.5055, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.901350441903135e-06, |
|
"loss": 0.4758, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.824674712535559e-06, |
|
"loss": 0.4899, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.748488367321388e-06, |
|
"loss": 0.4975, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.672795070457e-06, |
|
"loss": 0.4901, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.597598462425523e-06, |
|
"loss": 0.4809, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.522902159821737e-06, |
|
"loss": 0.4712, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.4487097551781465e-06, |
|
"loss": 0.4807, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.3750248167921925e-06, |
|
"loss": 0.4743, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.301850888554617e-06, |
|
"loss": 0.4886, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.229191489779048e-06, |
|
"loss": 0.4991, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.157050115032724e-06, |
|
"loss": 0.4873, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.085430233968418e-06, |
|
"loss": 0.4837, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.014335291157566e-06, |
|
"loss": 0.4655, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.9437687059246065e-06, |
|
"loss": 0.4809, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.873733872182515e-06, |
|
"loss": 0.4875, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.804234158269593e-06, |
|
"loss": 0.4705, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.7352729067874436e-06, |
|
"loss": 0.4761, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.6668534344402287e-06, |
|
"loss": 0.495, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.598979031875127e-06, |
|
"loss": 0.5018, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.5316529635240997e-06, |
|
"loss": 0.4753, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.4648784674468687e-06, |
|
"loss": 0.5023, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.398658755175183e-06, |
|
"loss": 0.4863, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.3329970115583637e-06, |
|
"loss": 0.4678, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.2678963946101275e-06, |
|
"loss": 0.4765, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.203360035356695e-06, |
|
"loss": 0.4708, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.139391037686214e-06, |
|
"loss": 0.4732, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.0759924781994702e-06, |
|
"loss": 0.4845, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.013167406061916e-06, |
|
"loss": 0.4746, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.9509188428570287e-06, |
|
"loss": 0.458, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.889249782440979e-06, |
|
"loss": 0.4893, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.8281631907986445e-06, |
|
"loss": 0.4691, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.767662005900958e-06, |
|
"loss": 0.4837, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.7077491375636074e-06, |
|
"loss": 0.4632, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.6484274673070864e-06, |
|
"loss": 0.4821, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.5896998482181104e-06, |
|
"loss": 0.4624, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.5315691048123886e-06, |
|
"loss": 0.4631, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.474038032898786e-06, |
|
"loss": 0.4778, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.4171093994448634e-06, |
|
"loss": 0.4773, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.3607859424437745e-06, |
|
"loss": 0.4731, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.3050703707826187e-06, |
|
"loss": 0.494, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.2499653641121275e-06, |
|
"loss": 0.4681, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.195473572717797e-06, |
|
"loss": 0.4674, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.141597617392425e-06, |
|
"loss": 0.4534, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.0883400893100535e-06, |
|
"loss": 0.4915, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.0357035499013548e-06, |
|
"loss": 0.4764, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.983690530730439e-06, |
|
"loss": 0.4859, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.9323035333730898e-06, |
|
"loss": 0.4738, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.8815450292964625e-06, |
|
"loss": 0.4834, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.8314174597401995e-06, |
|
"loss": 0.4655, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.7819232355990428e-06, |
|
"loss": 0.473, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.7330647373068642e-06, |
|
"loss": 0.4844, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.6848443147221832e-06, |
|
"loss": 0.4715, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.6372642870151538e-06, |
|
"loss": 0.4625, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.5903269425560146e-06, |
|
"loss": 0.4679, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.5440345388050393e-06, |
|
"loss": 0.4685, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4983893022039518e-06, |
|
"loss": 0.4678, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4533934280688632e-06, |
|
"loss": 0.4801, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4090490804846612e-06, |
|
"loss": 0.4823, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.3653583922009576e-06, |
|
"loss": 0.4874, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.3223234645294907e-06, |
|
"loss": 0.4695, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.2799463672430766e-06, |
|
"loss": 0.4806, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.2382291384760536e-06, |
|
"loss": 0.4865, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.197173784626261e-06, |
|
"loss": 0.4874, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.1567822802585436e-06, |
|
"loss": 0.4961, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.11705656800978e-06, |
|
"loss": 0.4695, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.0779985584954556e-06, |
|
"loss": 0.4672, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.0396101302177725e-06, |
|
"loss": 0.4753, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.0018931294752897e-06, |
|
"loss": 0.4681, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.648493702741429e-07, |
|
"loss": 0.4865, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.284806342407881e-07, |
|
"loss": 0.4697, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.927886705363181e-07, |
|
"loss": 0.488, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.577751957723339e-07, |
|
"loss": 0.4819, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 8.234418939283866e-07, |
|
"loss": 0.4722, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.897904162709846e-07, |
|
"loss": 0.4689, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.568223812741743e-07, |
|
"loss": 0.4722, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.245393745417039e-07, |
|
"loss": 0.4823, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.929429487307581e-07, |
|
"loss": 0.4659, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.620346234772839e-07, |
|
"loss": 0.4677, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.318158853229095e-07, |
|
"loss": 0.4634, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.022881876434377e-07, |
|
"loss": 0.4893, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.73452950578956e-07, |
|
"loss": 0.4806, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5.453115609655285e-07, |
|
"loss": 0.4919, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5.178653722684984e-07, |
|
"loss": 0.4676, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.911157045173931e-07, |
|
"loss": 0.4819, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.650638442424349e-07, |
|
"loss": 0.479, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.3971104441266565e-07, |
|
"loss": 0.4836, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.1505852437568823e-07, |
|
"loss": 0.4667, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.911074697990136e-07, |
|
"loss": 0.4864, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.678590326130493e-07, |
|
"loss": 0.4743, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.4531433095568445e-07, |
|
"loss": 0.4719, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.234744491185204e-07, |
|
"loss": 0.4767, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.023404374947147e-07, |
|
"loss": 0.4714, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.8191331252847117e-07, |
|
"loss": 0.4781, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.62194056666144e-07, |
|
"loss": 0.4685, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.431836183089975e-07, |
|
"loss": 0.4696, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.2488291176758047e-07, |
|
"loss": 0.4513, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.0729281721776682e-07, |
|
"loss": 0.4784, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.904141806584042e-07, |
|
"loss": 0.4916, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.7424781387064658e-07, |
|
"loss": 0.4572, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5879449437889547e-07, |
|
"loss": 0.4676, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.440549654134149e-07, |
|
"loss": 0.4801, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.3002993587457656e-07, |
|
"loss": 0.4615, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.167200802987739e-07, |
|
"loss": 0.4805, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.041260388259746e-07, |
|
"loss": 0.4831, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.224841716893362e-08, |
|
"loss": 0.4841, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.108778658406646e-08, |
|
"loss": 0.468, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 7.06446838439645e-08, |
|
"loss": 0.4702, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.091961121158896e-08, |
|
"loss": 0.4761, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.1913036416106895e-08, |
|
"loss": 0.4567, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.3625392630400884e-08, |
|
"loss": 0.4556, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.605707845023232e-08, |
|
"loss": 0.4701, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.920845787507509e-08, |
|
"loss": 0.47, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.3079860290601272e-08, |
|
"loss": 0.4785, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.7671580452848777e-08, |
|
"loss": 0.4803, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2983878474034372e-08, |
|
"loss": 0.4768, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.016979810055337e-09, |
|
"loss": 0.4613, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.771075249634827e-09, |
|
"loss": 0.4798, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.246320905155864e-09, |
|
"loss": 0.4797, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.44283820514568e-09, |
|
"loss": 0.4639, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.6071388843705867e-10, |
|
"loss": 0.4692, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.4776, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 468, |
|
"total_flos": 1.7416145444232233e+18, |
|
"train_loss": 0.5158002531299224, |
|
"train_runtime": 41353.3131, |
|
"train_samples_per_second": 1.451, |
|
"train_steps_per_second": 0.011 |
|
} |
|
], |
|
"max_steps": 468, |
|
"num_train_epochs": 1, |
|
"total_flos": 1.7416145444232233e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|