|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.998479858120091, |
|
"eval_steps": 500, |
|
"global_step": 986, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3333333333333334e-07, |
|
"loss": 2.0093, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.6666666666666667e-07, |
|
"loss": 2.1267, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4e-07, |
|
"loss": 2.0596, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.333333333333333e-07, |
|
"loss": 2.0195, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.666666666666666e-07, |
|
"loss": 1.9924, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8e-07, |
|
"loss": 2.0199, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.333333333333333e-07, |
|
"loss": 2.1074, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.0666666666666667e-06, |
|
"loss": 2.0768, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.2e-06, |
|
"loss": 2.0471, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.3333333333333332e-06, |
|
"loss": 2.0815, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.4666666666666665e-06, |
|
"loss": 2.0491, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.6e-06, |
|
"loss": 1.9666, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.7333333333333334e-06, |
|
"loss": 2.0742, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8666666666666667e-06, |
|
"loss": 2.0388, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2e-06, |
|
"loss": 2.0142, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9999784020568754e-06, |
|
"loss": 2.0564, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.999913609160443e-06, |
|
"loss": 2.0268, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.999805624109491e-06, |
|
"loss": 2.0147, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.999654451568528e-06, |
|
"loss": 2.0643, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.999460098067586e-06, |
|
"loss": 2.1189, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.999222572001937e-06, |
|
"loss": 2.1375, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9989418836317303e-06, |
|
"loss": 2.1305, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9986180450815485e-06, |
|
"loss": 1.9724, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9982510703398844e-06, |
|
"loss": 1.9929, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9978409752585376e-06, |
|
"loss": 2.0284, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9973877775519285e-06, |
|
"loss": 2.0599, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9968914967963333e-06, |
|
"loss": 2.0289, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.99635215442904e-06, |
|
"loss": 1.971, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9957697737474197e-06, |
|
"loss": 2.1077, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9951443799079214e-06, |
|
"loss": 1.9899, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.994475999924987e-06, |
|
"loss": 2.0886, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.993764662669882e-06, |
|
"loss": 2.0682, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9930103988694497e-06, |
|
"loss": 2.0396, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9922132411047833e-06, |
|
"loss": 2.0508, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.991373223809819e-06, |
|
"loss": 2.0468, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.990490383269848e-06, |
|
"loss": 2.1025, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9895647576199505e-06, |
|
"loss": 2.1965, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.988596386843346e-06, |
|
"loss": 2.0172, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.987585312769669e-06, |
|
"loss": 2.0095, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9865315790731598e-06, |
|
"loss": 1.9972, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9854352312707796e-06, |
|
"loss": 2.115, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.984296316720243e-06, |
|
"loss": 2.0605, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.983114884617974e-06, |
|
"loss": 1.9956, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9818909859969784e-06, |
|
"loss": 2.0327, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.980624673724643e-06, |
|
"loss": 2.0218, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9793160025004475e-06, |
|
"loss": 1.9757, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9779650288536056e-06, |
|
"loss": 2.0011, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9765718111406216e-06, |
|
"loss": 2.0784, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.975136409542769e-06, |
|
"loss": 2.0673, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9736588860634923e-06, |
|
"loss": 2.0109, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9721393045257275e-06, |
|
"loss": 2.0554, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9705777305691457e-06, |
|
"loss": 1.9808, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9689742316473177e-06, |
|
"loss": 1.9741, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.967328877024801e-06, |
|
"loss": 2.0922, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9656417377741466e-06, |
|
"loss": 2.0362, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.96391288677283e-06, |
|
"loss": 2.0535, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.962142398700101e-06, |
|
"loss": 2.0159, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9603303500337626e-06, |
|
"loss": 1.9948, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9584768190468624e-06, |
|
"loss": 2.0756, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9565818858043136e-06, |
|
"loss": 1.9603, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9546456321594373e-06, |
|
"loss": 2.0958, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9526681417504258e-06, |
|
"loss": 2.0459, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.9506494999967296e-06, |
|
"loss": 2.0352, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.9485897940953686e-06, |
|
"loss": 2.0477, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.9464891130171644e-06, |
|
"loss": 2.0226, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9443475475028982e-06, |
|
"loss": 2.0586, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9421651900593897e-06, |
|
"loss": 2.0146, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9399421349555036e-06, |
|
"loss": 2.0293, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9376784782180743e-06, |
|
"loss": 2.0364, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.935374317627762e-06, |
|
"loss": 2.0551, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9330297527148245e-06, |
|
"loss": 2.035, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9306448847548215e-06, |
|
"loss": 2.063, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.928219816764238e-06, |
|
"loss": 2.0036, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.9257546534960348e-06, |
|
"loss": 2.0141, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.9232495014351246e-06, |
|
"loss": 2.0572, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.92070446879377e-06, |
|
"loss": 1.9757, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9181196655069123e-06, |
|
"loss": 2.054, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.9154952032274205e-06, |
|
"loss": 2.0053, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.9128311953212676e-06, |
|
"loss": 2.0291, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.910127756862637e-06, |
|
"loss": 2.0518, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.907385004628948e-06, |
|
"loss": 2.014, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.904603057095815e-06, |
|
"loss": 1.9771, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9017820344319269e-06, |
|
"loss": 2.0273, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.8989220584938572e-06, |
|
"loss": 1.9516, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.896023252820802e-06, |
|
"loss": 1.9544, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.893085742629241e-06, |
|
"loss": 2.0229, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.8901096548075303e-06, |
|
"loss": 2.0656, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.8870951179104212e-06, |
|
"loss": 2.0522, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.8840422621535064e-06, |
|
"loss": 1.9912, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.8809512194075957e-06, |
|
"loss": 1.9998, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.8778221231930203e-06, |
|
"loss": 2.0262, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.8746551086738639e-06, |
|
"loss": 1.9977, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.871450312652126e-06, |
|
"loss": 2.0313, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.8682078735618107e-06, |
|
"loss": 2.0137, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.8649279314629483e-06, |
|
"loss": 2.1067, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.8616106280355442e-06, |
|
"loss": 2.0302, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.85825610657346e-06, |
|
"loss": 2.0871, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.8548645119782237e-06, |
|
"loss": 2.0279, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.851435990752769e-06, |
|
"loss": 2.098, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.8479706909951095e-06, |
|
"loss": 1.9829, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.8444687623919383e-06, |
|
"loss": 2.0382, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.8409303562121661e-06, |
|
"loss": 2.083, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.837355625300383e-06, |
|
"loss": 1.9887, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.8337447240702592e-06, |
|
"loss": 1.9671, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8300978084978734e-06, |
|
"loss": 1.9845, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8264150361149757e-06, |
|
"loss": 2.0818, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8226965660021834e-06, |
|
"loss": 1.932, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.818942558782108e-06, |
|
"loss": 2.0247, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.8151531766124184e-06, |
|
"loss": 1.9921, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.8113285831788363e-06, |
|
"loss": 2.1025, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.807468943688064e-06, |
|
"loss": 2.0222, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.8035744248606509e-06, |
|
"loss": 2.066, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.7996451949237876e-06, |
|
"loss": 2.0426, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.7956814236040449e-06, |
|
"loss": 2.0251, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.7916832821200373e-06, |
|
"loss": 1.9788, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.78765094317503e-06, |
|
"loss": 2.0021, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.7835845809494766e-06, |
|
"loss": 2.0346, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.7794843710934978e-06, |
|
"loss": 2.1158, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.7753504907192923e-06, |
|
"loss": 2.0334, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.7711831183934858e-06, |
|
"loss": 2.0871, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.7669824341294199e-06, |
|
"loss": 2.0769, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.7627486193793741e-06, |
|
"loss": 2.0154, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.7584818570267283e-06, |
|
"loss": 2.0399, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.7541823313780645e-06, |
|
"loss": 2.0746, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.749850228155203e-06, |
|
"loss": 2.0178, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.745485734487182e-06, |
|
"loss": 2.0285, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.7410890389021735e-06, |
|
"loss": 1.9656, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.7366603313193397e-06, |
|
"loss": 2.0397, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.73219980304063e-06, |
|
"loss": 2.0359, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.727707646742516e-06, |
|
"loss": 2.0021, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.7231840564676707e-06, |
|
"loss": 2.0783, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.7186292276165847e-06, |
|
"loss": 2.0932, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.7140433569391272e-06, |
|
"loss": 2.0216, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.709426642526046e-06, |
|
"loss": 1.9481, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.7047792838004117e-06, |
|
"loss": 1.9999, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.7001014815090037e-06, |
|
"loss": 2.0074, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.6953934377136375e-06, |
|
"loss": 2.0013, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.690655355782437e-06, |
|
"loss": 2.0775, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.6858874403810505e-06, |
|
"loss": 2.0042, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.6810898974638094e-06, |
|
"loss": 1.997, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.6762629342648318e-06, |
|
"loss": 1.9919, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.671406759289071e-06, |
|
"loss": 2.0189, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.666521582303309e-06, |
|
"loss": 1.9571, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.6616076143270947e-06, |
|
"loss": 1.989, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.6566650676236305e-06, |
|
"loss": 2.0321, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.6516941556906009e-06, |
|
"loss": 2.0197, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.646695093250953e-06, |
|
"loss": 2.0096, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.6416680962436187e-06, |
|
"loss": 1.9316, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.6366133818141891e-06, |
|
"loss": 2.0131, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.631531168305534e-06, |
|
"loss": 2.0016, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.6264216752483695e-06, |
|
"loss": 2.0862, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.621285123351777e-06, |
|
"loss": 1.9881, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.6161217344936677e-06, |
|
"loss": 2.0715, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.6109317317111995e-06, |
|
"loss": 2.0205, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.605715339191142e-06, |
|
"loss": 2.0036, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.6004727822601932e-06, |
|
"loss": 2.0367, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.595204287375246e-06, |
|
"loss": 1.9935, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.5899100821136061e-06, |
|
"loss": 2.0275, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.584590395163162e-06, |
|
"loss": 1.947, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.5792454563125058e-06, |
|
"loss": 2.009, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.573875496441008e-06, |
|
"loss": 2.0075, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5684807475088451e-06, |
|
"loss": 1.9395, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5630614425469775e-06, |
|
"loss": 2.0678, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.557617815647086e-06, |
|
"loss": 2.0026, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5521501019514595e-06, |
|
"loss": 2.0108, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5466585376428362e-06, |
|
"loss": 2.0776, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5411433599342037e-06, |
|
"loss": 1.9468, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.535604807058551e-06, |
|
"loss": 1.9065, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5300431182585777e-06, |
|
"loss": 2.0186, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.524458533776361e-06, |
|
"loss": 2.0968, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5188512948429764e-06, |
|
"loss": 2.0716, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5132216436680797e-06, |
|
"loss": 2.0505, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5075698234294422e-06, |
|
"loss": 2.035, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5018960782624484e-06, |
|
"loss": 1.999, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4962006532495487e-06, |
|
"loss": 1.975, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4904837944096743e-06, |
|
"loss": 2.0454, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4847457486876096e-06, |
|
"loss": 2.0372, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4789867639433246e-06, |
|
"loss": 1.9876, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4732070889412692e-06, |
|
"loss": 2.018, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4674069733396275e-06, |
|
"loss": 2.0072, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4615866676795332e-06, |
|
"loss": 2.0242, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4557464233742476e-06, |
|
"loss": 1.992, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4498864926982995e-06, |
|
"loss": 2.01, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4440071287765874e-06, |
|
"loss": 1.9977, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4381085855734468e-06, |
|
"loss": 2.0828, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4321911178816788e-06, |
|
"loss": 1.9816, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4262549813115447e-06, |
|
"loss": 2.0288, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4203004322797249e-06, |
|
"loss": 2.075, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.4143277279982413e-06, |
|
"loss": 2.0101, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.4083371264633494e-06, |
|
"loss": 1.9774, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.4023288864443915e-06, |
|
"loss": 2.0611, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.3963032674726196e-06, |
|
"loss": 2.0358, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.390260529829986e-06, |
|
"loss": 1.9747, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.3842009345378975e-06, |
|
"loss": 2.0558, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.3781247433459446e-06, |
|
"loss": 2.0173, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.3720322187205895e-06, |
|
"loss": 2.0066, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.3659236238338336e-06, |
|
"loss": 2.0284, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.3597992225518464e-06, |
|
"loss": 2.0022, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.3536592794235695e-06, |
|
"loss": 2.0356, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.3475040596692876e-06, |
|
"loss": 2.0387, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.3413338291691726e-06, |
|
"loss": 2.0006, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.3351488544518e-06, |
|
"loss": 2.0543, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.3289494026826335e-06, |
|
"loss": 2.0021, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.3227357416524875e-06, |
|
"loss": 2.0066, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.316508139765956e-06, |
|
"loss": 1.9943, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.3102668660298226e-06, |
|
"loss": 1.9659, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.304012190041437e-06, |
|
"loss": 2.0319, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.2977443819770714e-06, |
|
"loss": 2.0505, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.2914637125802511e-06, |
|
"loss": 1.9475, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.2851704531500562e-06, |
|
"loss": 2.0061, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.2788648755294054e-06, |
|
"loss": 1.9459, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.272547252093312e-06, |
|
"loss": 2.003, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.2662178557371197e-06, |
|
"loss": 2.0195, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.2598769598647134e-06, |
|
"loss": 2.0393, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.2535248383767101e-06, |
|
"loss": 2.0154, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.2471617656586268e-06, |
|
"loss": 2.0338, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.2407880165690287e-06, |
|
"loss": 1.8944, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.2344038664276568e-06, |
|
"loss": 2.0415, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.2280095910035341e-06, |
|
"loss": 1.9654, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.221605466503055e-06, |
|
"loss": 2.0289, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.2151917695580523e-06, |
|
"loss": 2.0046, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.2087687772138499e-06, |
|
"loss": 2.0994, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.2023367669172946e-06, |
|
"loss": 2.0389, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1958960165047715e-06, |
|
"loss": 2.0108, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1894468041902029e-06, |
|
"loss": 2.1058, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.1829894085530296e-06, |
|
"loss": 2.0575, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.1765241085261801e-06, |
|
"loss": 2.0362, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.1700511833840186e-06, |
|
"loss": 1.9836, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.163570912730283e-06, |
|
"loss": 2.0067, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.157083576486007e-06, |
|
"loss": 2.099, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.1505894548774293e-06, |
|
"loss": 2.0218, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.1440888284238889e-06, |
|
"loss": 2.0159, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.1375819779257057e-06, |
|
"loss": 2.0044, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.1310691844520542e-06, |
|
"loss": 2.098, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.1245507293288204e-06, |
|
"loss": 2.0625, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1180268941264498e-06, |
|
"loss": 2.0096, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1114979606477865e-06, |
|
"loss": 1.999, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.104964210915898e-06, |
|
"loss": 2.0256, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0984259271618945e-06, |
|
"loss": 2.0035, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0918833918127375e-06, |
|
"loss": 2.0277, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0853368874790392e-06, |
|
"loss": 2.0701, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0787866969428567e-06, |
|
"loss": 1.9819, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0722331031454747e-06, |
|
"loss": 2.0453, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0656763891751863e-06, |
|
"loss": 2.059, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0591168382550616e-06, |
|
"loss": 2.0781, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.052554733730716e-06, |
|
"loss": 2.0133, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0459903590580706e-06, |
|
"loss": 1.9779, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.0394239977911068e-06, |
|
"loss": 1.9705, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.0328559335696187e-06, |
|
"loss": 1.9889, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.0262864501069616e-06, |
|
"loss": 2.0143, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.0197158311777955e-06, |
|
"loss": 1.9436, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.013144360605829e-06, |
|
"loss": 2.0313, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.0065723222515565e-06, |
|
"loss": 1.9854, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1e-06, |
|
"loss": 1.9626, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.934276777484434e-07, |
|
"loss": 2.0107, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.868556393941712e-07, |
|
"loss": 1.9761, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.802841688222042e-07, |
|
"loss": 1.9393, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.737135498930383e-07, |
|
"loss": 2.0417, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.671440664303812e-07, |
|
"loss": 1.9319, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.605760022088933e-07, |
|
"loss": 1.8757, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.540096409419296e-07, |
|
"loss": 1.9148, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.474452662692837e-07, |
|
"loss": 1.9374, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.408831617449384e-07, |
|
"loss": 1.9556, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.343236108248138e-07, |
|
"loss": 1.9933, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.277668968545253e-07, |
|
"loss": 2.0201, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.212133030571436e-07, |
|
"loss": 1.9669, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.146631125209607e-07, |
|
"loss": 1.9788, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.081166081872625e-07, |
|
"loss": 1.9948, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.015740728381054e-07, |
|
"loss": 1.9575, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 8.950357890841019e-07, |
|
"loss": 2.0099, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.885020393522135e-07, |
|
"loss": 1.9485, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.8197310587355e-07, |
|
"loss": 1.9656, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.754492706711797e-07, |
|
"loss": 1.9441, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.689308155479459e-07, |
|
"loss": 1.9749, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.624180220742945e-07, |
|
"loss": 1.9914, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.559111715761114e-07, |
|
"loss": 1.9579, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.494105451225703e-07, |
|
"loss": 1.9853, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.42916423513993e-07, |
|
"loss": 2.002, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.364290872697173e-07, |
|
"loss": 1.9817, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.299488166159816e-07, |
|
"loss": 1.946, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.234758914738198e-07, |
|
"loss": 1.9846, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.1701059144697e-07, |
|
"loss": 2.0105, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.105531958097972e-07, |
|
"loss": 2.0266, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.041039834952286e-07, |
|
"loss": 2.0291, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 7.976632330827054e-07, |
|
"loss": 2.0173, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 7.912312227861502e-07, |
|
"loss": 1.8973, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 7.848082304419477e-07, |
|
"loss": 1.9388, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 7.78394533496945e-07, |
|
"loss": 1.9652, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 7.719904089964658e-07, |
|
"loss": 1.974, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 7.655961335723432e-07, |
|
"loss": 1.9029, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 7.592119834309713e-07, |
|
"loss": 1.9993, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 7.528382343413732e-07, |
|
"loss": 1.9763, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 7.4647516162329e-07, |
|
"loss": 1.9842, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 7.401230401352866e-07, |
|
"loss": 1.9444, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.337821442628805e-07, |
|
"loss": 2.0126, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.274527479066882e-07, |
|
"loss": 1.9759, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.211351244705946e-07, |
|
"loss": 2.0648, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.148295468499437e-07, |
|
"loss": 1.9903, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.085362874197489e-07, |
|
"loss": 1.9218, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.022556180229284e-07, |
|
"loss": 2.0351, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 6.959878099585633e-07, |
|
"loss": 1.9695, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 6.897331339701775e-07, |
|
"loss": 1.9638, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 6.834918602340438e-07, |
|
"loss": 1.9558, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 6.772642583475125e-07, |
|
"loss": 1.9801, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 6.710505973173664e-07, |
|
"loss": 1.938, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 6.648511455482003e-07, |
|
"loss": 2.0556, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 6.586661708308272e-07, |
|
"loss": 1.984, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 6.524959403307125e-07, |
|
"loss": 1.9922, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 6.463407205764305e-07, |
|
"loss": 1.9744, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 6.402007774481535e-07, |
|
"loss": 1.9974, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 6.340763761661665e-07, |
|
"loss": 1.9169, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 6.279677812794102e-07, |
|
"loss": 1.9615, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 6.218752566540554e-07, |
|
"loss": 2.0275, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 6.157990654621023e-07, |
|
"loss": 1.9945, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 6.097394701700144e-07, |
|
"loss": 1.9583, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 6.036967325273806e-07, |
|
"loss": 1.919, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5.976711135556086e-07, |
|
"loss": 2.0058, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5.916628735366504e-07, |
|
"loss": 1.8788, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5.856722720017586e-07, |
|
"loss": 2.0375, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.796995677202753e-07, |
|
"loss": 2.0462, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.737450186884554e-07, |
|
"loss": 1.979, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.678088821183211e-07, |
|
"loss": 1.9989, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.618914144265531e-07, |
|
"loss": 1.913, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.559928712234126e-07, |
|
"loss": 1.9717, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5.501135073017007e-07, |
|
"loss": 1.9494, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5.442535766257524e-07, |
|
"loss": 1.9811, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5.384133323204665e-07, |
|
"loss": 2.0013, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5.325930266603723e-07, |
|
"loss": 1.993, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5.267929110587307e-07, |
|
"loss": 2.0253, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.210132360566754e-07, |
|
"loss": 1.9134, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.152542513123905e-07, |
|
"loss": 1.9898, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.095162055903257e-07, |
|
"loss": 2.0297, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.037993467504514e-07, |
|
"loss": 1.9936, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.981039217375519e-07, |
|
"loss": 1.9283, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.924301765705579e-07, |
|
"loss": 2.0013, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.867783563319205e-07, |
|
"loss": 2.0661, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.811487051570234e-07, |
|
"loss": 1.9584, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.755414662236391e-07, |
|
"loss": 1.9999, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.6995688174142236e-07, |
|
"loss": 1.9383, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.6439519294144925e-07, |
|
"loss": 2.0078, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.5885664006579636e-07, |
|
"loss": 1.9662, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.5334146235716364e-07, |
|
"loss": 2.0411, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.4784989804854047e-07, |
|
"loss": 1.9178, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.4238218435291385e-07, |
|
"loss": 1.971, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.3693855745302265e-07, |
|
"loss": 1.9665, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.3151925249115505e-07, |
|
"loss": 1.9915, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.2612450355899165e-07, |
|
"loss": 2.0694, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.207545436874941e-07, |
|
"loss": 1.9952, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.1540960483683793e-07, |
|
"loss": 1.9204, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.100899178863938e-07, |
|
"loss": 2.0088, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.047957126247541e-07, |
|
"loss": 1.9492, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.9952721773980694e-07, |
|
"loss": 1.9639, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.9428466080885825e-07, |
|
"loss": 2.045, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.890682682888008e-07, |
|
"loss": 1.9398, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.838782655063325e-07, |
|
"loss": 1.9746, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.7871487664822324e-07, |
|
"loss": 1.9819, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.735783247516304e-07, |
|
"loss": 1.9442, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.684688316944662e-07, |
|
"loss": 2.0245, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.6338661818581096e-07, |
|
"loss": 1.9864, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.5833190375638155e-07, |
|
"loss": 1.9861, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.5330490674904736e-07, |
|
"loss": 1.9732, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.483058443093989e-07, |
|
"loss": 2.0337, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.4333493237636955e-07, |
|
"loss": 1.9916, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.383923856729052e-07, |
|
"loss": 1.9695, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.334784176966912e-07, |
|
"loss": 1.9782, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.28593240710929e-07, |
|
"loss": 1.9212, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.237370657351679e-07, |
|
"loss": 1.9699, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.189101025361904e-07, |
|
"loss": 2.0059, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.1411255961894933e-07, |
|
"loss": 2.01, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.09344644217563e-07, |
|
"loss": 2.0103, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.046065622863625e-07, |
|
"loss": 2.0671, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.998985184909959e-07, |
|
"loss": 1.9484, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.952207161995879e-07, |
|
"loss": 1.9604, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.9057335747395415e-07, |
|
"loss": 2.0014, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.859566430608731e-07, |
|
"loss": 2.0554, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.813707723834152e-07, |
|
"loss": 2.0169, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.768159435323293e-07, |
|
"loss": 1.9582, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.722923532574839e-07, |
|
"loss": 2.0139, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.678001969593701e-07, |
|
"loss": 1.9763, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.6333966868066036e-07, |
|
"loss": 1.9817, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.589109610978264e-07, |
|
"loss": 1.9206, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.5451426551281797e-07, |
|
"loss": 2.0391, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.501497718447969e-07, |
|
"loss": 1.9825, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.4581766862193555e-07, |
|
"loss": 1.9787, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.4151814297327155e-07, |
|
"loss": 1.9006, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.3725138062062577e-07, |
|
"loss": 1.9489, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3301756587057987e-07, |
|
"loss": 2.0418, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.2881688160651402e-07, |
|
"loss": 1.9554, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.246495092807077e-07, |
|
"loss": 1.9084, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.20515628906502e-07, |
|
"loss": 2.0168, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.1641541905052306e-07, |
|
"loss": 2.0983, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.1234905682496984e-07, |
|
"loss": 1.9285, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.083167178799623e-07, |
|
"loss": 2.0109, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.0431857639595486e-07, |
|
"loss": 1.9614, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.0035480507621216e-07, |
|
"loss": 1.9873, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.9642557513934932e-07, |
|
"loss": 1.9627, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.9253105631193577e-07, |
|
"loss": 2.0673, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.8867141682116372e-07, |
|
"loss": 2.0468, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.848468233875815e-07, |
|
"loss": 1.9262, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.8105744121789223e-07, |
|
"loss": 1.9323, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.7730343399781666e-07, |
|
"loss": 2.0252, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.735849638850242e-07, |
|
"loss": 1.9096, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.699021915021266e-07, |
|
"loss": 1.991, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.6625527592974076e-07, |
|
"loss": 1.9263, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.62644374699617e-07, |
|
"loss": 1.9394, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.5906964378783372e-07, |
|
"loss": 2.0109, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.5553123760806143e-07, |
|
"loss": 1.9834, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.5202930900489053e-07, |
|
"loss": 1.9854, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.4856400924723077e-07, |
|
"loss": 1.9498, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.4513548802177632e-07, |
|
"loss": 1.9421, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.417438934265397e-07, |
|
"loss": 1.9429, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.3838937196445578e-07, |
|
"loss": 1.9836, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.3507206853705178e-07, |
|
"loss": 1.9655, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.3179212643818926e-07, |
|
"loss": 1.9888, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.2854968734787397e-07, |
|
"loss": 1.9259, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.2534489132613602e-07, |
|
"loss": 1.981, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.2217787680697988e-07, |
|
"loss": 1.9655, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.1904878059240442e-07, |
|
"loss": 1.9276, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.1595773784649387e-07, |
|
"loss": 1.9787, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.1290488208957894e-07, |
|
"loss": 2.0179, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.0989034519246953e-07, |
|
"loss": 2.1181, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.0691425737075899e-07, |
|
"loss": 2.0391, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.0397674717919803e-07, |
|
"loss": 1.9924, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.0107794150614279e-07, |
|
"loss": 2.0327, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 9.821796556807338e-08, |
|
"loss": 2.0083, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 9.539694290418487e-08, |
|
"loss": 1.9819, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 9.261499537105178e-08, |
|
"loss": 1.9439, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.987224313736308e-08, |
|
"loss": 1.9482, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.716880467873233e-08, |
|
"loss": 2.0, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.45047967725796e-08, |
|
"loss": 1.8927, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.188033449308718e-08, |
|
"loss": 1.9897, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 7.929553120622967e-08, |
|
"loss": 1.9354, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 7.675049856487548e-08, |
|
"loss": 2.0003, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 7.424534650396519e-08, |
|
"loss": 2.0627, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 7.178018323576207e-08, |
|
"loss": 2.0044, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 6.935511524517834e-08, |
|
"loss": 1.8565, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 6.697024728517531e-08, |
|
"loss": 1.917, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 6.462568237223786e-08, |
|
"loss": 1.9613, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 6.232152178192529e-08, |
|
"loss": 1.9487, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 6.00578650444965e-08, |
|
"loss": 1.9888, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.7834809940610186e-08, |
|
"loss": 1.9773, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.565245249710193e-08, |
|
"loss": 1.9982, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.351088698283557e-08, |
|
"loss": 1.9104, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.141020590463141e-08, |
|
"loss": 1.9952, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.9350500003270456e-08, |
|
"loss": 1.9558, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.73318582495742e-08, |
|
"loss": 1.9665, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.535436784056268e-08, |
|
"loss": 1.9709, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.341811419568653e-08, |
|
"loss": 1.9914, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.1523180953137785e-08, |
|
"loss": 2.0053, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.966964996623734e-08, |
|
"loss": 1.9524, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.785760129989868e-08, |
|
"loss": 1.9811, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.608711322717028e-08, |
|
"loss": 2.0287, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.435826222585325e-08, |
|
"loss": 1.9743, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.267112297519881e-08, |
|
"loss": 1.9485, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.1025768352682114e-08, |
|
"loss": 1.9946, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.942226943085424e-08, |
|
"loss": 1.982, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.7860695474272388e-08, |
|
"loss": 1.9575, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.634111393650751e-08, |
|
"loss": 1.9026, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.4863590457230743e-08, |
|
"loss": 1.9581, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.342818885937825e-08, |
|
"loss": 2.0418, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.2034971146394298e-08, |
|
"loss": 2.0184, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.068399749955263e-08, |
|
"loss": 1.9043, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.9375326275357207e-08, |
|
"loss": 1.9647, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.810901400302145e-08, |
|
"loss": 1.9884, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.6885115382026082e-08, |
|
"loss": 1.9841, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.5703683279756795e-08, |
|
"loss": 1.9336, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.456476872922041e-08, |
|
"loss": 1.9851, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.3468420926840196e-08, |
|
"loss": 1.9871, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.2414687230331122e-08, |
|
"loss": 1.9545, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.1403613156654057e-08, |
|
"loss": 1.969, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.0435242380049558e-08, |
|
"loss": 1.9843, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 9.509616730151826e-09, |
|
"loss": 1.927, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.62677619018104e-09, |
|
"loss": 1.976, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.786758895216628e-09, |
|
"loss": 2.03, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.989601130550249e-09, |
|
"loss": 1.9603, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.235337330117829e-09, |
|
"loss": 1.9866, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5.524000075012969e-09, |
|
"loss": 1.9754, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 4.8556200920786276e-09, |
|
"loss": 1.94, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 4.2302262525805155e-09, |
|
"loss": 1.9412, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.647845570959873e-09, |
|
"loss": 2.0471, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.1085032036664015e-09, |
|
"loss": 1.9434, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 2.612222448071577e-09, |
|
"loss": 2.0134, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 2.1590247414624564e-09, |
|
"loss": 2.006, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.7489296601156388e-09, |
|
"loss": 1.9638, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.3819549184516111e-09, |
|
"loss": 2.0321, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.0581163682695793e-09, |
|
"loss": 1.9233, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 7.774279980626853e-10, |
|
"loss": 2.0179, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.399019324139331e-10, |
|
"loss": 1.9882, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.455484314721646e-10, |
|
"loss": 1.9149, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.9437589050907978e-10, |
|
"loss": 1.9477, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 8.639083955663818e-11, |
|
"loss": 1.9924, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.1597943124729288e-11, |
|
"loss": 1.9547, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.9569, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 986, |
|
"total_flos": 4.368889634042675e+16, |
|
"train_loss": 2.002830022247036, |
|
"train_runtime": 7223.1483, |
|
"train_samples_per_second": 8.741, |
|
"train_steps_per_second": 0.137 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 986, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 2000, |
|
"total_flos": 4.368889634042675e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|