|
{ |
|
"best_metric": 2.2723255157470703, |
|
"best_model_checkpoint": "output/checkpoint-500", |
|
"epoch": 0.08333333333333333, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 5.1377, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 5.1144, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.4e-05, |
|
"loss": 5.2944, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 5.277, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4e-05, |
|
"loss": 4.7688, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 4.7068, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.6000000000000006e-05, |
|
"loss": 4.905, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.400000000000001e-05, |
|
"loss": 4.6467, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 7.2e-05, |
|
"loss": 4.4809, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8e-05, |
|
"loss": 4.4408, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.800000000000001e-05, |
|
"loss": 4.2903, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.6e-05, |
|
"loss": 4.2546, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00010400000000000001, |
|
"loss": 4.1372, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00011200000000000001, |
|
"loss": 4.1673, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00012, |
|
"loss": 3.8483, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00012800000000000002, |
|
"loss": 3.7866, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00013600000000000003, |
|
"loss": 3.7765, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000144, |
|
"loss": 3.61, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000152, |
|
"loss": 3.5922, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00016, |
|
"loss": 3.5748, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000168, |
|
"loss": 3.462, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00017600000000000002, |
|
"loss": 3.5482, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018400000000000003, |
|
"loss": 3.3806, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000192, |
|
"loss": 3.375, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0002, |
|
"loss": 3.4461, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00020800000000000001, |
|
"loss": 3.3302, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00021600000000000002, |
|
"loss": 3.2568, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00022400000000000002, |
|
"loss": 3.1755, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000232, |
|
"loss": 3.1303, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00024, |
|
"loss": 3.1867, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000248, |
|
"loss": 3.086, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00025600000000000004, |
|
"loss": 3.0142, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000264, |
|
"loss": 2.9919, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00027200000000000005, |
|
"loss": 2.9977, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00028, |
|
"loss": 3.0582, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000288, |
|
"loss": 3.0244, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000296, |
|
"loss": 3.0201, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000304, |
|
"loss": 3.0081, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00031200000000000005, |
|
"loss": 3.0072, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00032, |
|
"loss": 3.0163, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000328, |
|
"loss": 2.9476, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000336, |
|
"loss": 2.9289, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000344, |
|
"loss": 2.8889, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00035200000000000005, |
|
"loss": 2.9479, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00036, |
|
"loss": 2.913, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00036800000000000005, |
|
"loss": 2.8848, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000376, |
|
"loss": 2.9424, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000384, |
|
"loss": 2.8967, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000392, |
|
"loss": 2.8882, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004, |
|
"loss": 2.783, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00040800000000000005, |
|
"loss": 2.9005, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00041600000000000003, |
|
"loss": 2.8624, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00042400000000000006, |
|
"loss": 2.8651, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00043200000000000004, |
|
"loss": 2.9019, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00044000000000000007, |
|
"loss": 2.9397, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00044800000000000005, |
|
"loss": 2.9143, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00045599999999999997, |
|
"loss": 2.816, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000464, |
|
"loss": 2.7941, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000472, |
|
"loss": 2.7889, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00048, |
|
"loss": 2.816, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000488, |
|
"loss": 2.7483, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000496, |
|
"loss": 2.8676, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000504, |
|
"loss": 2.7342, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0005120000000000001, |
|
"loss": 2.8743, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0005200000000000001, |
|
"loss": 2.7635, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000528, |
|
"loss": 2.8303, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000536, |
|
"loss": 2.8751, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0005440000000000001, |
|
"loss": 2.7702, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000552, |
|
"loss": 2.7697, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00056, |
|
"loss": 2.7656, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000568, |
|
"loss": 2.6676, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000576, |
|
"loss": 2.7541, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000584, |
|
"loss": 2.7413, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000592, |
|
"loss": 2.655, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0006000000000000001, |
|
"loss": 2.7992, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000608, |
|
"loss": 2.7969, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000616, |
|
"loss": 2.8561, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0006240000000000001, |
|
"loss": 2.7182, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0006320000000000001, |
|
"loss": 2.7121, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00064, |
|
"loss": 2.7505, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000648, |
|
"loss": 2.7664, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000656, |
|
"loss": 2.7699, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000664, |
|
"loss": 2.687, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000672, |
|
"loss": 2.6854, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00068, |
|
"loss": 2.7284, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000688, |
|
"loss": 2.7535, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000696, |
|
"loss": 2.7291, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0007040000000000001, |
|
"loss": 2.6795, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0007120000000000001, |
|
"loss": 2.6847, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00072, |
|
"loss": 2.8037, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000728, |
|
"loss": 2.7104, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007360000000000001, |
|
"loss": 2.7022, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007440000000000001, |
|
"loss": 2.6876, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000752, |
|
"loss": 2.7657, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00076, |
|
"loss": 2.6784, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000768, |
|
"loss": 2.7005, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000776, |
|
"loss": 2.6969, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000784, |
|
"loss": 2.727, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007920000000000001, |
|
"loss": 2.6475, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0008, |
|
"loss": 2.7626, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000799864406779661, |
|
"loss": 2.7642, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007997288135593221, |
|
"loss": 2.6881, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007995932203389831, |
|
"loss": 2.722, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007994576271186441, |
|
"loss": 2.761, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000799322033898305, |
|
"loss": 2.6431, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007991864406779661, |
|
"loss": 2.6869, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007990508474576271, |
|
"loss": 2.6963, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007989152542372882, |
|
"loss": 2.8242, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007987796610169492, |
|
"loss": 2.6418, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007986440677966103, |
|
"loss": 2.7597, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007985084745762711, |
|
"loss": 2.7569, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007983728813559322, |
|
"loss": 2.7329, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007982372881355932, |
|
"loss": 2.6758, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007981016949152543, |
|
"loss": 2.7202, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007979661016949153, |
|
"loss": 2.6658, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007978305084745764, |
|
"loss": 2.6372, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007976949152542374, |
|
"loss": 2.665, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007975593220338983, |
|
"loss": 2.717, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007974237288135593, |
|
"loss": 2.8033, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007972881355932204, |
|
"loss": 2.6871, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007971525423728814, |
|
"loss": 2.7634, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007970169491525424, |
|
"loss": 2.6665, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007968813559322035, |
|
"loss": 2.6286, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007967457627118644, |
|
"loss": 2.7076, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007966101694915254, |
|
"loss": 2.6828, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007964745762711864, |
|
"loss": 2.6324, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007963389830508475, |
|
"loss": 2.6649, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007962033898305085, |
|
"loss": 2.692, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007960677966101696, |
|
"loss": 2.6541, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007959322033898305, |
|
"loss": 2.6768, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007957966101694916, |
|
"loss": 2.7182, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007956610169491526, |
|
"loss": 2.7264, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007955254237288136, |
|
"loss": 2.6101, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007953898305084746, |
|
"loss": 2.7315, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007952542372881357, |
|
"loss": 2.7184, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007951186440677966, |
|
"loss": 2.7155, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007949830508474577, |
|
"loss": 2.6894, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007948474576271187, |
|
"loss": 2.6394, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007947118644067797, |
|
"loss": 2.6448, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007945762711864407, |
|
"loss": 2.6805, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007944406779661018, |
|
"loss": 2.6213, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007943050847457627, |
|
"loss": 2.6665, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007941694915254237, |
|
"loss": 2.5491, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007940338983050848, |
|
"loss": 2.7293, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007938983050847458, |
|
"loss": 2.6627, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007937627118644069, |
|
"loss": 2.6717, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007936271186440678, |
|
"loss": 2.6122, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007934915254237288, |
|
"loss": 2.6305, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007933559322033898, |
|
"loss": 2.6266, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007932203389830509, |
|
"loss": 2.6781, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007930847457627119, |
|
"loss": 2.6851, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000792949152542373, |
|
"loss": 2.6379, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000792813559322034, |
|
"loss": 2.6714, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007926779661016949, |
|
"loss": 2.7392, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007925423728813559, |
|
"loss": 2.6727, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000792406779661017, |
|
"loss": 2.7512, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000792271186440678, |
|
"loss": 2.731, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007921355932203391, |
|
"loss": 2.7441, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007920000000000001, |
|
"loss": 2.638, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000791864406779661, |
|
"loss": 2.6419, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000791728813559322, |
|
"loss": 2.582, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007915932203389831, |
|
"loss": 2.724, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007914576271186441, |
|
"loss": 2.7238, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007913220338983051, |
|
"loss": 2.7242, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007911864406779662, |
|
"loss": 2.6581, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007910508474576271, |
|
"loss": 2.6638, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007909152542372882, |
|
"loss": 2.6857, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007907796610169492, |
|
"loss": 2.6805, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007906440677966102, |
|
"loss": 2.5892, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007905084745762712, |
|
"loss": 2.6294, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007903728813559323, |
|
"loss": 2.7016, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007902372881355932, |
|
"loss": 2.6009, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007901016949152543, |
|
"loss": 2.6274, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007899661016949153, |
|
"loss": 2.6633, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007898305084745763, |
|
"loss": 2.6486, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007896949152542373, |
|
"loss": 2.5538, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007895593220338984, |
|
"loss": 2.6583, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007894237288135593, |
|
"loss": 2.6365, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007892881355932204, |
|
"loss": 2.6217, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007891525423728814, |
|
"loss": 2.6431, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007890169491525425, |
|
"loss": 2.6859, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007888813559322034, |
|
"loss": 2.5771, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007887457627118645, |
|
"loss": 2.6483, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007886101694915254, |
|
"loss": 2.6999, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007884745762711864, |
|
"loss": 2.7057, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007883389830508475, |
|
"loss": 2.6446, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007882033898305085, |
|
"loss": 2.7161, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007880677966101696, |
|
"loss": 2.6047, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007879322033898306, |
|
"loss": 2.6412, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007877966101694915, |
|
"loss": 2.631, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007876610169491525, |
|
"loss": 2.6807, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007875254237288136, |
|
"loss": 2.6596, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007873898305084746, |
|
"loss": 2.6051, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007872542372881357, |
|
"loss": 2.6938, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007871186440677967, |
|
"loss": 2.7125, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007869830508474576, |
|
"loss": 2.664, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007868474576271186, |
|
"loss": 2.6355, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007867118644067797, |
|
"loss": 2.6469, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007865762711864407, |
|
"loss": 2.6451, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007864406779661018, |
|
"loss": 2.6577, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007863050847457628, |
|
"loss": 2.6677, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007861694915254239, |
|
"loss": 2.6208, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007860338983050848, |
|
"loss": 2.5922, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007858983050847457, |
|
"loss": 2.633, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007857627118644068, |
|
"loss": 2.6587, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007856271186440678, |
|
"loss": 2.6327, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007854915254237289, |
|
"loss": 2.6015, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007853559322033898, |
|
"loss": 2.6667, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0007852203389830509, |
|
"loss": 2.6262, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007850847457627119, |
|
"loss": 2.7435, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007849491525423729, |
|
"loss": 2.4698, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007848135593220339, |
|
"loss": 2.5554, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000784677966101695, |
|
"loss": 2.5718, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007845423728813559, |
|
"loss": 2.5544, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000784406779661017, |
|
"loss": 2.6508, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000784271186440678, |
|
"loss": 2.613, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007841355932203391, |
|
"loss": 2.5898, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000784, |
|
"loss": 2.5876, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007838644067796611, |
|
"loss": 2.5573, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000783728813559322, |
|
"loss": 2.624, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007835932203389831, |
|
"loss": 2.5814, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007834576271186441, |
|
"loss": 2.6656, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007833220338983052, |
|
"loss": 2.6862, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007831864406779662, |
|
"loss": 2.8027, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007830508474576272, |
|
"loss": 2.8105, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007829152542372881, |
|
"loss": 2.7149, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007827796610169491, |
|
"loss": 2.8449, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007826440677966102, |
|
"loss": 2.9684, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007825084745762712, |
|
"loss": 2.9016, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007823728813559323, |
|
"loss": 2.8576, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007822372881355933, |
|
"loss": 2.9106, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007821016949152542, |
|
"loss": 2.826, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007819661016949152, |
|
"loss": 2.9042, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007818305084745763, |
|
"loss": 2.8738, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007816949152542373, |
|
"loss": 2.9855, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007815593220338984, |
|
"loss": 2.9692, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007814237288135594, |
|
"loss": 3.199, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007812881355932205, |
|
"loss": 3.0884, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007811525423728813, |
|
"loss": 3.042, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007810169491525424, |
|
"loss": 3.1018, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007808813559322034, |
|
"loss": 3.0346, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007807457627118645, |
|
"loss": 3.0977, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007806101694915255, |
|
"loss": 3.115, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007804745762711866, |
|
"loss": 3.2372, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007803389830508475, |
|
"loss": 3.2689, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007802033898305085, |
|
"loss": 3.1267, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007800677966101695, |
|
"loss": 3.0774, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007799322033898305, |
|
"loss": 3.0283, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007797966101694916, |
|
"loss": 3.0163, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007796610169491525, |
|
"loss": 2.9164, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007795254237288136, |
|
"loss": 2.8825, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007793898305084746, |
|
"loss": 2.9542, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007792542372881356, |
|
"loss": 2.9018, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007791186440677966, |
|
"loss": 2.9929, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007789830508474577, |
|
"loss": 3.0326, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007788474576271186, |
|
"loss": 2.9234, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007787118644067797, |
|
"loss": 3.0114, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007785762711864407, |
|
"loss": 3.0775, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007784406779661018, |
|
"loss": 2.9171, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007783050847457628, |
|
"loss": 3.0288, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007781694915254238, |
|
"loss": 2.9062, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007780338983050847, |
|
"loss": 3.092, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007778983050847458, |
|
"loss": 2.9368, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007777627118644068, |
|
"loss": 2.9572, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007776271186440679, |
|
"loss": 2.936, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007774915254237289, |
|
"loss": 2.9461, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007773559322033899, |
|
"loss": 3.0886, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007772203389830508, |
|
"loss": 2.8941, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007770847457627118, |
|
"loss": 2.8142, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0007769491525423729, |
|
"loss": 2.777, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007768135593220339, |
|
"loss": 2.8028, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000776677966101695, |
|
"loss": 2.6842, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000776542372881356, |
|
"loss": 2.7688, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007764067796610171, |
|
"loss": 2.6817, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007762711864406779, |
|
"loss": 2.6677, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000776135593220339, |
|
"loss": 2.6759, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000776, |
|
"loss": 2.7113, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007758644067796611, |
|
"loss": 2.5817, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007757288135593221, |
|
"loss": 2.6208, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007755932203389832, |
|
"loss": 2.6325, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007754576271186441, |
|
"loss": 2.7245, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007753220338983051, |
|
"loss": 2.7362, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007751864406779661, |
|
"loss": 2.5645, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007750508474576272, |
|
"loss": 2.6167, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007749152542372882, |
|
"loss": 2.6517, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007747796610169493, |
|
"loss": 2.6042, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007746440677966102, |
|
"loss": 2.652, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007745084745762712, |
|
"loss": 2.619, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007743728813559322, |
|
"loss": 2.5819, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007742372881355932, |
|
"loss": 2.6991, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007741016949152543, |
|
"loss": 2.6854, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007739661016949152, |
|
"loss": 2.6002, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007738305084745763, |
|
"loss": 2.6408, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007736949152542373, |
|
"loss": 2.6261, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007735593220338984, |
|
"loss": 2.5894, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007734237288135594, |
|
"loss": 2.6171, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007732881355932204, |
|
"loss": 2.6116, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007731525423728813, |
|
"loss": 2.6656, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007730169491525424, |
|
"loss": 2.6228, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007728813559322034, |
|
"loss": 2.608, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007727457627118645, |
|
"loss": 2.5309, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007726101694915255, |
|
"loss": 2.6226, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007724745762711865, |
|
"loss": 2.5953, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007723389830508474, |
|
"loss": 2.6025, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007722033898305085, |
|
"loss": 2.5493, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007720677966101695, |
|
"loss": 2.6022, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007719322033898306, |
|
"loss": 2.5399, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007717966101694916, |
|
"loss": 2.5497, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007716610169491526, |
|
"loss": 2.6287, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007715254237288135, |
|
"loss": 2.6215, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007713898305084745, |
|
"loss": 2.6294, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007712542372881356, |
|
"loss": 2.6212, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007711186440677966, |
|
"loss": 2.5755, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007709830508474577, |
|
"loss": 2.5396, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007708474576271187, |
|
"loss": 2.642, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007707118644067798, |
|
"loss": 2.5289, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007705762711864407, |
|
"loss": 2.5519, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007704406779661017, |
|
"loss": 2.586, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007703050847457627, |
|
"loss": 2.594, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007701694915254238, |
|
"loss": 2.5997, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007700338983050848, |
|
"loss": 2.5599, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007698983050847459, |
|
"loss": 2.603, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007697627118644068, |
|
"loss": 2.5505, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007696271186440678, |
|
"loss": 2.5184, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007694915254237288, |
|
"loss": 2.6093, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007693559322033899, |
|
"loss": 2.6121, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007692203389830509, |
|
"loss": 2.587, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000769084745762712, |
|
"loss": 2.555, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0007689491525423729, |
|
"loss": 2.5838, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007688135593220339, |
|
"loss": 2.4801, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000768677966101695, |
|
"loss": 2.6102, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007685423728813559, |
|
"loss": 2.5922, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000768406779661017, |
|
"loss": 2.5824, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000768271186440678, |
|
"loss": 2.6137, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000768135593220339, |
|
"loss": 2.5511, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000768, |
|
"loss": 2.5406, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007678644067796611, |
|
"loss": 2.5357, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007677288135593221, |
|
"loss": 2.4895, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007675932203389831, |
|
"loss": 2.6371, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000767457627118644, |
|
"loss": 2.5476, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007673220338983051, |
|
"loss": 2.5362, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007671864406779661, |
|
"loss": 2.5852, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007670508474576272, |
|
"loss": 2.5826, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007669152542372882, |
|
"loss": 2.4969, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007667796610169493, |
|
"loss": 2.5199, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007666440677966102, |
|
"loss": 2.5435, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007665084745762712, |
|
"loss": 2.5403, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007663728813559322, |
|
"loss": 2.6631, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007662372881355933, |
|
"loss": 2.6509, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007661016949152543, |
|
"loss": 2.5466, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007659661016949153, |
|
"loss": 2.6346, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007658305084745764, |
|
"loss": 2.6195, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007656949152542373, |
|
"loss": 2.5258, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007655593220338983, |
|
"loss": 2.562, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007654237288135593, |
|
"loss": 2.5812, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007652881355932204, |
|
"loss": 2.5627, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007651525423728814, |
|
"loss": 2.5146, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007650169491525425, |
|
"loss": 2.518, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007648813559322034, |
|
"loss": 2.5651, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007647457627118644, |
|
"loss": 2.5486, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007646101694915254, |
|
"loss": 2.5635, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007644745762711865, |
|
"loss": 2.5231, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007643389830508475, |
|
"loss": 2.5416, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007642033898305086, |
|
"loss": 2.4828, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007640677966101695, |
|
"loss": 2.5094, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007639322033898306, |
|
"loss": 2.5963, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007637966101694915, |
|
"loss": 2.5987, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007636610169491526, |
|
"loss": 2.4665, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007635254237288136, |
|
"loss": 2.5982, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007633898305084747, |
|
"loss": 2.4934, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007632542372881356, |
|
"loss": 2.5585, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007631186440677966, |
|
"loss": 2.595, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007629830508474577, |
|
"loss": 2.5229, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007628474576271187, |
|
"loss": 2.5127, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007627118644067797, |
|
"loss": 2.5398, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007625762711864407, |
|
"loss": 2.4711, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007624406779661017, |
|
"loss": 2.5616, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007623050847457627, |
|
"loss": 2.4629, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007621694915254238, |
|
"loss": 2.4795, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007620338983050848, |
|
"loss": 2.4815, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007618983050847458, |
|
"loss": 2.5178, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007617627118644068, |
|
"loss": 2.535, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007616271186440678, |
|
"loss": 2.5254, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007614915254237288, |
|
"loss": 2.543, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007613559322033899, |
|
"loss": 2.5363, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007612203389830509, |
|
"loss": 2.5608, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000761084745762712, |
|
"loss": 2.5059, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000760949152542373, |
|
"loss": 2.4691, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0007608135593220339, |
|
"loss": 2.5462, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007606779661016949, |
|
"loss": 2.5665, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007605423728813559, |
|
"loss": 2.4889, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000760406779661017, |
|
"loss": 2.4813, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000760271186440678, |
|
"loss": 2.5234, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007601355932203391, |
|
"loss": 2.4285, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00076, |
|
"loss": 2.484, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000759864406779661, |
|
"loss": 2.517, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000759728813559322, |
|
"loss": 2.5167, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007595932203389831, |
|
"loss": 2.5136, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007594576271186441, |
|
"loss": 2.4747, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007593220338983052, |
|
"loss": 2.4998, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007591864406779661, |
|
"loss": 2.5182, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007590508474576272, |
|
"loss": 2.4585, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007589152542372881, |
|
"loss": 2.5584, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007587796610169492, |
|
"loss": 2.4856, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007586440677966102, |
|
"loss": 2.4919, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007585084745762713, |
|
"loss": 2.5149, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007583728813559322, |
|
"loss": 2.522, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007582372881355933, |
|
"loss": 2.4943, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007581016949152543, |
|
"loss": 2.5093, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007579661016949153, |
|
"loss": 2.517, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007578305084745763, |
|
"loss": 2.4795, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007576949152542373, |
|
"loss": 2.5078, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007575593220338983, |
|
"loss": 2.5792, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007574237288135593, |
|
"loss": 2.5151, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007572881355932204, |
|
"loss": 2.4612, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007571525423728814, |
|
"loss": 2.4582, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007570169491525424, |
|
"loss": 2.5092, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007568813559322034, |
|
"loss": 2.4834, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007567457627118644, |
|
"loss": 2.5014, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007566101694915254, |
|
"loss": 2.4331, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007564745762711865, |
|
"loss": 2.5328, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007563389830508475, |
|
"loss": 2.4425, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007562033898305086, |
|
"loss": 2.4065, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007560677966101696, |
|
"loss": 2.4847, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007559322033898305, |
|
"loss": 2.5003, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007557966101694915, |
|
"loss": 2.5485, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007556610169491526, |
|
"loss": 2.5025, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007555254237288136, |
|
"loss": 2.4961, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007553898305084747, |
|
"loss": 2.5246, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007552542372881357, |
|
"loss": 2.4668, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007551186440677966, |
|
"loss": 2.5051, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007549830508474576, |
|
"loss": 2.4929, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007548474576271186, |
|
"loss": 2.4197, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007547118644067797, |
|
"loss": 2.5754, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007545762711864407, |
|
"loss": 2.4829, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007544406779661018, |
|
"loss": 2.4313, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007543050847457627, |
|
"loss": 2.4745, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007541694915254237, |
|
"loss": 2.4724, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007540338983050847, |
|
"loss": 2.4648, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007538983050847458, |
|
"loss": 2.4234, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007537627118644068, |
|
"loss": 2.4681, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007536271186440679, |
|
"loss": 2.4547, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007534915254237288, |
|
"loss": 2.4622, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007533559322033899, |
|
"loss": 2.5777, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007532203389830509, |
|
"loss": 2.4368, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007530847457627119, |
|
"loss": 2.5213, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007529491525423729, |
|
"loss": 2.4985, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000752813559322034, |
|
"loss": 2.4489, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007526779661016949, |
|
"loss": 2.5418, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000752542372881356, |
|
"loss": 2.4771, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000752406779661017, |
|
"loss": 2.5463, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000752271186440678, |
|
"loss": 2.4252, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000752135593220339, |
|
"loss": 2.5508, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000752, |
|
"loss": 2.506, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000751864406779661, |
|
"loss": 2.484, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000751728813559322, |
|
"loss": 2.4426, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007515932203389831, |
|
"loss": 2.5205, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007514576271186441, |
|
"loss": 2.4948, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007513220338983052, |
|
"loss": 2.4423, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007511864406779661, |
|
"loss": 2.522, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007510508474576271, |
|
"loss": 2.5116, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007509152542372881, |
|
"loss": 2.4785, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007507796610169492, |
|
"loss": 2.5521, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007506440677966102, |
|
"loss": 2.52, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007505084745762713, |
|
"loss": 2.5449, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007503728813559323, |
|
"loss": 2.4744, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007502372881355932, |
|
"loss": 2.5192, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007501016949152542, |
|
"loss": 2.4494, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007499661016949153, |
|
"loss": 2.5145, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007498305084745763, |
|
"loss": 2.4541, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007496949152542374, |
|
"loss": 2.563, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007495593220338984, |
|
"loss": 2.4924, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007494237288135595, |
|
"loss": 2.5099, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007492881355932203, |
|
"loss": 2.6177, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007491525423728813, |
|
"loss": 2.5639, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007490169491525424, |
|
"loss": 2.4748, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007488813559322034, |
|
"loss": 2.4668, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007487457627118645, |
|
"loss": 2.6352, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007486101694915254, |
|
"loss": 2.5874, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007484745762711865, |
|
"loss": 2.5707, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007483389830508475, |
|
"loss": 2.5545, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007482033898305085, |
|
"loss": 2.5301, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007480677966101695, |
|
"loss": 2.5508, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007479322033898306, |
|
"loss": 2.5526, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007477966101694915, |
|
"loss": 2.4831, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007476610169491526, |
|
"loss": 2.4991, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007475254237288136, |
|
"loss": 2.4024, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007473898305084746, |
|
"loss": 2.5339, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007472542372881356, |
|
"loss": 2.5592, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007471186440677967, |
|
"loss": 2.4575, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007469830508474576, |
|
"loss": 2.4742, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007468474576271187, |
|
"loss": 2.4736, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007467118644067797, |
|
"loss": 2.4658, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007465762711864408, |
|
"loss": 2.5223, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007464406779661017, |
|
"loss": 2.4854, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007463050847457627, |
|
"loss": 2.4874, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007461694915254237, |
|
"loss": 2.5765, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007460338983050847, |
|
"loss": 2.5211, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007458983050847458, |
|
"loss": 2.5138, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007457627118644068, |
|
"loss": 2.4739, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.2723255157470703, |
|
"eval_rouge1": 0.2537, |
|
"eval_rouge2": 0.0882, |
|
"eval_rougeL": 0.2125, |
|
"eval_rougeLsum": 0.2111, |
|
"eval_runtime": 18.6819, |
|
"eval_samples_per_second": 2.676, |
|
"eval_steps_per_second": 0.375, |
|
"step": 500 |
|
} |
|
], |
|
"max_steps": 6000, |
|
"num_train_epochs": 9223372036854775807, |
|
"total_flos": 4.762543607571456e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|