|
{ |
|
"best_metric": 0.4159727096557617, |
|
"best_model_checkpoint": "vit-msn-small-lateral_flow_ivalidation_train_test_7/checkpoint-3", |
|
"epoch": 92.3076923076923, |
|
"eval_steps": 500, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"eval_accuracy": 0.8791208791208791, |
|
"eval_loss": 0.4159727096557617, |
|
"eval_runtime": 1.1776, |
|
"eval_samples_per_second": 231.83, |
|
"eval_steps_per_second": 4.246, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 1.8461538461538463, |
|
"eval_accuracy": 0.8388278388278388, |
|
"eval_loss": 0.4668194353580475, |
|
"eval_runtime": 1.1816, |
|
"eval_samples_per_second": 231.05, |
|
"eval_steps_per_second": 4.232, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 2.769230769230769, |
|
"eval_accuracy": 0.8021978021978022, |
|
"eval_loss": 0.543300986289978, |
|
"eval_runtime": 1.172, |
|
"eval_samples_per_second": 232.936, |
|
"eval_steps_per_second": 4.266, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"grad_norm": 27.540616989135742, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.3869, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8168498168498168, |
|
"eval_loss": 0.5052347779273987, |
|
"eval_runtime": 1.166, |
|
"eval_samples_per_second": 234.13, |
|
"eval_steps_per_second": 4.288, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 4.923076923076923, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_loss": 0.45910125970840454, |
|
"eval_runtime": 1.1375, |
|
"eval_samples_per_second": 239.995, |
|
"eval_steps_per_second": 4.396, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 5.846153846153846, |
|
"eval_accuracy": 0.8278388278388278, |
|
"eval_loss": 0.4820004999637604, |
|
"eval_runtime": 1.1463, |
|
"eval_samples_per_second": 238.165, |
|
"eval_steps_per_second": 4.362, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 6.153846153846154, |
|
"grad_norm": 4.4243292808532715, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.3658, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.769230769230769, |
|
"eval_accuracy": 0.8095238095238095, |
|
"eval_loss": 0.4953090250492096, |
|
"eval_runtime": 1.1575, |
|
"eval_samples_per_second": 235.858, |
|
"eval_steps_per_second": 4.32, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8608058608058609, |
|
"eval_loss": 0.4496568441390991, |
|
"eval_runtime": 1.2191, |
|
"eval_samples_per_second": 223.934, |
|
"eval_steps_per_second": 4.101, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 8.923076923076923, |
|
"eval_accuracy": 0.8315018315018315, |
|
"eval_loss": 0.46859756112098694, |
|
"eval_runtime": 1.1496, |
|
"eval_samples_per_second": 237.484, |
|
"eval_steps_per_second": 4.35, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 9.23076923076923, |
|
"grad_norm": 9.303510665893555, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3439, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 9.846153846153847, |
|
"eval_accuracy": 0.8608058608058609, |
|
"eval_loss": 0.45060521364212036, |
|
"eval_runtime": 1.205, |
|
"eval_samples_per_second": 226.549, |
|
"eval_steps_per_second": 4.149, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 10.76923076923077, |
|
"eval_accuracy": 0.8168498168498168, |
|
"eval_loss": 0.485858678817749, |
|
"eval_runtime": 1.1431, |
|
"eval_samples_per_second": 238.818, |
|
"eval_steps_per_second": 4.374, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8168498168498168, |
|
"eval_loss": 0.49286317825317383, |
|
"eval_runtime": 1.1624, |
|
"eval_samples_per_second": 234.854, |
|
"eval_steps_per_second": 4.301, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 12.307692307692308, |
|
"grad_norm": 5.916250228881836, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.3416, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 12.923076923076923, |
|
"eval_accuracy": 0.8058608058608059, |
|
"eval_loss": 0.49565577507019043, |
|
"eval_runtime": 1.2294, |
|
"eval_samples_per_second": 222.062, |
|
"eval_steps_per_second": 4.067, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 13.846153846153847, |
|
"eval_accuracy": 0.7875457875457875, |
|
"eval_loss": 0.5228819251060486, |
|
"eval_runtime": 1.2179, |
|
"eval_samples_per_second": 224.159, |
|
"eval_steps_per_second": 4.105, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 14.76923076923077, |
|
"eval_accuracy": 0.8534798534798534, |
|
"eval_loss": 0.44727277755737305, |
|
"eval_runtime": 1.1724, |
|
"eval_samples_per_second": 232.862, |
|
"eval_steps_per_second": 4.265, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 15.384615384615385, |
|
"grad_norm": 8.750184059143066, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.324, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8058608058608059, |
|
"eval_loss": 0.5260118842124939, |
|
"eval_runtime": 1.1888, |
|
"eval_samples_per_second": 229.645, |
|
"eval_steps_per_second": 4.206, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 16.923076923076923, |
|
"eval_accuracy": 0.8461538461538461, |
|
"eval_loss": 0.4581928849220276, |
|
"eval_runtime": 1.1456, |
|
"eval_samples_per_second": 238.297, |
|
"eval_steps_per_second": 4.364, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 17.846153846153847, |
|
"eval_accuracy": 0.7838827838827839, |
|
"eval_loss": 0.5298556089401245, |
|
"eval_runtime": 1.1808, |
|
"eval_samples_per_second": 231.203, |
|
"eval_steps_per_second": 4.234, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 18.46153846153846, |
|
"grad_norm": 11.985779762268066, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3273, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 18.76923076923077, |
|
"eval_accuracy": 0.8205128205128205, |
|
"eval_loss": 0.49467265605926514, |
|
"eval_runtime": 1.2593, |
|
"eval_samples_per_second": 216.781, |
|
"eval_steps_per_second": 3.97, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 0.5393444895744324, |
|
"eval_runtime": 1.2277, |
|
"eval_samples_per_second": 222.374, |
|
"eval_steps_per_second": 4.073, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 20.923076923076923, |
|
"eval_accuracy": 0.8278388278388278, |
|
"eval_loss": 0.49159082770347595, |
|
"eval_runtime": 1.1597, |
|
"eval_samples_per_second": 235.406, |
|
"eval_steps_per_second": 4.311, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 21.53846153846154, |
|
"grad_norm": 5.8529887199401855, |
|
"learning_rate": 9.583333333333335e-06, |
|
"loss": 0.3397, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 21.846153846153847, |
|
"eval_accuracy": 0.7802197802197802, |
|
"eval_loss": 0.5359878540039062, |
|
"eval_runtime": 1.1883, |
|
"eval_samples_per_second": 229.735, |
|
"eval_steps_per_second": 4.208, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 22.76923076923077, |
|
"eval_accuracy": 0.7655677655677655, |
|
"eval_loss": 0.5660694241523743, |
|
"eval_runtime": 1.174, |
|
"eval_samples_per_second": 232.531, |
|
"eval_steps_per_second": 4.259, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7216117216117216, |
|
"eval_loss": 0.6353771090507507, |
|
"eval_runtime": 1.231, |
|
"eval_samples_per_second": 221.763, |
|
"eval_steps_per_second": 4.062, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 24.615384615384617, |
|
"grad_norm": 13.490166664123535, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 0.3344, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 24.923076923076923, |
|
"eval_accuracy": 0.7032967032967034, |
|
"eval_loss": 0.6782160401344299, |
|
"eval_runtime": 1.1828, |
|
"eval_samples_per_second": 230.817, |
|
"eval_steps_per_second": 4.227, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 25.846153846153847, |
|
"eval_accuracy": 0.7582417582417582, |
|
"eval_loss": 0.5704138875007629, |
|
"eval_runtime": 1.1635, |
|
"eval_samples_per_second": 234.632, |
|
"eval_steps_per_second": 4.297, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 26.76923076923077, |
|
"eval_accuracy": 0.6776556776556777, |
|
"eval_loss": 0.653683066368103, |
|
"eval_runtime": 1.149, |
|
"eval_samples_per_second": 237.606, |
|
"eval_steps_per_second": 4.352, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 27.692307692307693, |
|
"grad_norm": 11.456792831420898, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 0.3325, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8424908424908425, |
|
"eval_loss": 0.47976481914520264, |
|
"eval_runtime": 1.2235, |
|
"eval_samples_per_second": 223.128, |
|
"eval_steps_per_second": 4.087, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 28.923076923076923, |
|
"eval_accuracy": 0.8058608058608059, |
|
"eval_loss": 0.5158453583717346, |
|
"eval_runtime": 1.1703, |
|
"eval_samples_per_second": 233.277, |
|
"eval_steps_per_second": 4.272, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 29.846153846153847, |
|
"eval_accuracy": 0.7912087912087912, |
|
"eval_loss": 0.5408114790916443, |
|
"eval_runtime": 1.2604, |
|
"eval_samples_per_second": 216.59, |
|
"eval_steps_per_second": 3.967, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 30.76923076923077, |
|
"grad_norm": 13.521626472473145, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.3283, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 30.76923076923077, |
|
"eval_accuracy": 0.73992673992674, |
|
"eval_loss": 0.5963976383209229, |
|
"eval_runtime": 1.2378, |
|
"eval_samples_per_second": 220.561, |
|
"eval_steps_per_second": 4.04, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8205128205128205, |
|
"eval_loss": 0.5069018602371216, |
|
"eval_runtime": 1.2566, |
|
"eval_samples_per_second": 217.258, |
|
"eval_steps_per_second": 3.979, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 32.92307692307692, |
|
"eval_accuracy": 0.7875457875457875, |
|
"eval_loss": 0.5396149754524231, |
|
"eval_runtime": 1.1548, |
|
"eval_samples_per_second": 236.412, |
|
"eval_steps_per_second": 4.33, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 33.84615384615385, |
|
"grad_norm": 7.220104694366455, |
|
"learning_rate": 7.916666666666667e-06, |
|
"loss": 0.3229, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 33.84615384615385, |
|
"eval_accuracy": 0.7985347985347986, |
|
"eval_loss": 0.52034592628479, |
|
"eval_runtime": 1.2979, |
|
"eval_samples_per_second": 210.337, |
|
"eval_steps_per_second": 3.852, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 34.76923076923077, |
|
"eval_accuracy": 0.7875457875457875, |
|
"eval_loss": 0.5464060306549072, |
|
"eval_runtime": 1.1496, |
|
"eval_samples_per_second": 237.467, |
|
"eval_steps_per_second": 4.349, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7509157509157509, |
|
"eval_loss": 0.5890459418296814, |
|
"eval_runtime": 1.1555, |
|
"eval_samples_per_second": 236.272, |
|
"eval_steps_per_second": 4.327, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"grad_norm": 7.553558826446533, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.3207, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"eval_accuracy": 0.8131868131868132, |
|
"eval_loss": 0.5079773664474487, |
|
"eval_runtime": 1.1865, |
|
"eval_samples_per_second": 230.094, |
|
"eval_steps_per_second": 4.214, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 37.84615384615385, |
|
"eval_accuracy": 0.8168498168498168, |
|
"eval_loss": 0.49443885684013367, |
|
"eval_runtime": 1.2087, |
|
"eval_samples_per_second": 225.87, |
|
"eval_steps_per_second": 4.137, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 38.76923076923077, |
|
"eval_accuracy": 0.8095238095238095, |
|
"eval_loss": 0.49684473872184753, |
|
"eval_runtime": 1.2281, |
|
"eval_samples_per_second": 222.303, |
|
"eval_steps_per_second": 4.071, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 7.015711307525635, |
|
"learning_rate": 7.083333333333335e-06, |
|
"loss": 0.3286, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8131868131868132, |
|
"eval_loss": 0.4874400496482849, |
|
"eval_runtime": 1.1583, |
|
"eval_samples_per_second": 235.685, |
|
"eval_steps_per_second": 4.317, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 40.92307692307692, |
|
"eval_accuracy": 0.8058608058608059, |
|
"eval_loss": 0.5012958645820618, |
|
"eval_runtime": 1.1701, |
|
"eval_samples_per_second": 233.312, |
|
"eval_steps_per_second": 4.273, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 41.84615384615385, |
|
"eval_accuracy": 0.7655677655677655, |
|
"eval_loss": 0.5328759551048279, |
|
"eval_runtime": 1.2067, |
|
"eval_samples_per_second": 226.236, |
|
"eval_steps_per_second": 4.144, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 42.76923076923077, |
|
"eval_accuracy": 0.6996336996336996, |
|
"eval_loss": 0.6198887228965759, |
|
"eval_runtime": 1.1849, |
|
"eval_samples_per_second": 230.395, |
|
"eval_steps_per_second": 4.22, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 43.07692307692308, |
|
"grad_norm": 12.217916488647461, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.3154, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.8058608058608059, |
|
"eval_loss": 0.4853915870189667, |
|
"eval_runtime": 1.2003, |
|
"eval_samples_per_second": 227.44, |
|
"eval_steps_per_second": 4.166, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 44.92307692307692, |
|
"eval_accuracy": 0.7509157509157509, |
|
"eval_loss": 0.5544903874397278, |
|
"eval_runtime": 1.2613, |
|
"eval_samples_per_second": 216.445, |
|
"eval_steps_per_second": 3.964, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 45.84615384615385, |
|
"eval_accuracy": 0.7728937728937729, |
|
"eval_loss": 0.5267203450202942, |
|
"eval_runtime": 1.1694, |
|
"eval_samples_per_second": 233.448, |
|
"eval_steps_per_second": 4.276, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 46.15384615384615, |
|
"grad_norm": 5.860217571258545, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3119, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 46.76923076923077, |
|
"eval_accuracy": 0.7802197802197802, |
|
"eval_loss": 0.5214048027992249, |
|
"eval_runtime": 1.1505, |
|
"eval_samples_per_second": 237.28, |
|
"eval_steps_per_second": 4.346, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.7838827838827839, |
|
"eval_loss": 0.5265443325042725, |
|
"eval_runtime": 1.2352, |
|
"eval_samples_per_second": 221.013, |
|
"eval_steps_per_second": 4.048, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 48.92307692307692, |
|
"eval_accuracy": 0.7985347985347986, |
|
"eval_loss": 0.5136817693710327, |
|
"eval_runtime": 1.1863, |
|
"eval_samples_per_second": 230.125, |
|
"eval_steps_per_second": 4.215, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 49.23076923076923, |
|
"grad_norm": 10.198068618774414, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 0.3036, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 49.84615384615385, |
|
"eval_accuracy": 0.7838827838827839, |
|
"eval_loss": 0.5353964567184448, |
|
"eval_runtime": 1.2084, |
|
"eval_samples_per_second": 225.92, |
|
"eval_steps_per_second": 4.138, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 50.76923076923077, |
|
"eval_accuracy": 0.7875457875457875, |
|
"eval_loss": 0.5269169807434082, |
|
"eval_runtime": 1.1557, |
|
"eval_samples_per_second": 236.211, |
|
"eval_steps_per_second": 4.326, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.73992673992674, |
|
"eval_loss": 0.579745888710022, |
|
"eval_runtime": 1.1971, |
|
"eval_samples_per_second": 228.046, |
|
"eval_steps_per_second": 4.177, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 52.30769230769231, |
|
"grad_norm": 4.502013206481934, |
|
"learning_rate": 5.416666666666667e-06, |
|
"loss": 0.2995, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 52.92307692307692, |
|
"eval_accuracy": 0.717948717948718, |
|
"eval_loss": 0.6257714629173279, |
|
"eval_runtime": 1.2141, |
|
"eval_samples_per_second": 224.863, |
|
"eval_steps_per_second": 4.118, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 53.84615384615385, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 0.5511525869369507, |
|
"eval_runtime": 1.2158, |
|
"eval_samples_per_second": 224.549, |
|
"eval_steps_per_second": 4.113, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 54.76923076923077, |
|
"eval_accuracy": 0.7619047619047619, |
|
"eval_loss": 0.5516501069068909, |
|
"eval_runtime": 1.1426, |
|
"eval_samples_per_second": 238.92, |
|
"eval_steps_per_second": 4.376, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 55.38461538461539, |
|
"grad_norm": 7.7625651359558105, |
|
"learning_rate": 5e-06, |
|
"loss": 0.306, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.7545787545787546, |
|
"eval_loss": 0.558984637260437, |
|
"eval_runtime": 1.2053, |
|
"eval_samples_per_second": 226.503, |
|
"eval_steps_per_second": 4.148, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 56.92307692307692, |
|
"eval_accuracy": 0.7619047619047619, |
|
"eval_loss": 0.5514388084411621, |
|
"eval_runtime": 1.1913, |
|
"eval_samples_per_second": 229.152, |
|
"eval_steps_per_second": 4.197, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 57.84615384615385, |
|
"eval_accuracy": 0.7509157509157509, |
|
"eval_loss": 0.5596668124198914, |
|
"eval_runtime": 1.2098, |
|
"eval_samples_per_second": 225.659, |
|
"eval_steps_per_second": 4.133, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 58.46153846153846, |
|
"grad_norm": 8.888143539428711, |
|
"learning_rate": 4.583333333333333e-06, |
|
"loss": 0.2989, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 58.76923076923077, |
|
"eval_accuracy": 0.7326007326007326, |
|
"eval_loss": 0.5956733822822571, |
|
"eval_runtime": 1.1927, |
|
"eval_samples_per_second": 228.893, |
|
"eval_steps_per_second": 4.192, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7765567765567766, |
|
"eval_loss": 0.5365986824035645, |
|
"eval_runtime": 1.2153, |
|
"eval_samples_per_second": 224.642, |
|
"eval_steps_per_second": 4.114, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 60.92307692307692, |
|
"eval_accuracy": 0.7728937728937729, |
|
"eval_loss": 0.5464850664138794, |
|
"eval_runtime": 1.23, |
|
"eval_samples_per_second": 221.956, |
|
"eval_steps_per_second": 4.065, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 61.53846153846154, |
|
"grad_norm": 10.457231521606445, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.2931, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 61.84615384615385, |
|
"eval_accuracy": 0.7252747252747253, |
|
"eval_loss": 0.6171274781227112, |
|
"eval_runtime": 1.2391, |
|
"eval_samples_per_second": 220.329, |
|
"eval_steps_per_second": 4.035, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 62.76923076923077, |
|
"eval_accuracy": 0.7509157509157509, |
|
"eval_loss": 0.5768119692802429, |
|
"eval_runtime": 1.1827, |
|
"eval_samples_per_second": 230.828, |
|
"eval_steps_per_second": 4.228, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.7509157509157509, |
|
"eval_loss": 0.5706220865249634, |
|
"eval_runtime": 1.1854, |
|
"eval_samples_per_second": 230.307, |
|
"eval_steps_per_second": 4.218, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 64.61538461538461, |
|
"grad_norm": 6.158288478851318, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.299, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 64.92307692307692, |
|
"eval_accuracy": 0.7362637362637363, |
|
"eval_loss": 0.5962250232696533, |
|
"eval_runtime": 1.1514, |
|
"eval_samples_per_second": 237.094, |
|
"eval_steps_per_second": 4.342, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 65.84615384615384, |
|
"eval_accuracy": 0.7216117216117216, |
|
"eval_loss": 0.6219912767410278, |
|
"eval_runtime": 1.2105, |
|
"eval_samples_per_second": 225.52, |
|
"eval_steps_per_second": 4.13, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 66.76923076923077, |
|
"eval_accuracy": 0.7362637362637363, |
|
"eval_loss": 0.5929316282272339, |
|
"eval_runtime": 1.1491, |
|
"eval_samples_per_second": 237.574, |
|
"eval_steps_per_second": 4.351, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 67.6923076923077, |
|
"grad_norm": 5.717153549194336, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.2969, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.7252747252747253, |
|
"eval_loss": 0.6135749816894531, |
|
"eval_runtime": 1.2314, |
|
"eval_samples_per_second": 221.695, |
|
"eval_steps_per_second": 4.06, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 68.92307692307692, |
|
"eval_accuracy": 0.7289377289377289, |
|
"eval_loss": 0.6092182993888855, |
|
"eval_runtime": 1.165, |
|
"eval_samples_per_second": 234.334, |
|
"eval_steps_per_second": 4.292, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 69.84615384615384, |
|
"eval_accuracy": 0.7252747252747253, |
|
"eval_loss": 0.6028769612312317, |
|
"eval_runtime": 1.1827, |
|
"eval_samples_per_second": 230.837, |
|
"eval_steps_per_second": 4.228, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 70.76923076923077, |
|
"grad_norm": 4.415896415710449, |
|
"learning_rate": 2.916666666666667e-06, |
|
"loss": 0.3015, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 70.76923076923077, |
|
"eval_accuracy": 0.7765567765567766, |
|
"eval_loss": 0.5355977416038513, |
|
"eval_runtime": 1.2214, |
|
"eval_samples_per_second": 223.506, |
|
"eval_steps_per_second": 4.094, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 0.5375617146492004, |
|
"eval_runtime": 1.2823, |
|
"eval_samples_per_second": 212.899, |
|
"eval_steps_per_second": 3.899, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 72.92307692307692, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 0.5885952115058899, |
|
"eval_runtime": 1.158, |
|
"eval_samples_per_second": 235.755, |
|
"eval_steps_per_second": 4.318, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 73.84615384615384, |
|
"grad_norm": 9.932133674621582, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2919, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 73.84615384615384, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 0.5869150161743164, |
|
"eval_runtime": 1.2325, |
|
"eval_samples_per_second": 221.501, |
|
"eval_steps_per_second": 4.057, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 74.76923076923077, |
|
"eval_accuracy": 0.7472527472527473, |
|
"eval_loss": 0.584571897983551, |
|
"eval_runtime": 1.1539, |
|
"eval_samples_per_second": 236.584, |
|
"eval_steps_per_second": 4.333, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.7655677655677655, |
|
"eval_loss": 0.5507005453109741, |
|
"eval_runtime": 1.172, |
|
"eval_samples_per_second": 232.937, |
|
"eval_steps_per_second": 4.266, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"grad_norm": 8.504337310791016, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"loss": 0.288, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"eval_accuracy": 0.7509157509157509, |
|
"eval_loss": 0.5800787806510925, |
|
"eval_runtime": 1.1706, |
|
"eval_samples_per_second": 233.207, |
|
"eval_steps_per_second": 4.271, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 77.84615384615384, |
|
"eval_accuracy": 0.73992673992674, |
|
"eval_loss": 0.6076557040214539, |
|
"eval_runtime": 1.1434, |
|
"eval_samples_per_second": 238.759, |
|
"eval_steps_per_second": 4.373, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 78.76923076923077, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 0.5848133563995361, |
|
"eval_runtime": 1.2128, |
|
"eval_samples_per_second": 225.096, |
|
"eval_steps_per_second": 4.123, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 5.167693614959717, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.2951, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 0.5435333847999573, |
|
"eval_runtime": 1.1913, |
|
"eval_samples_per_second": 229.17, |
|
"eval_steps_per_second": 4.197, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 80.92307692307692, |
|
"eval_accuracy": 0.7655677655677655, |
|
"eval_loss": 0.56382155418396, |
|
"eval_runtime": 1.1629, |
|
"eval_samples_per_second": 234.755, |
|
"eval_steps_per_second": 4.3, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 81.84615384615384, |
|
"eval_accuracy": 0.73992673992674, |
|
"eval_loss": 0.5795217156410217, |
|
"eval_runtime": 1.1592, |
|
"eval_samples_per_second": 235.498, |
|
"eval_steps_per_second": 4.313, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 82.76923076923077, |
|
"eval_accuracy": 0.7509157509157509, |
|
"eval_loss": 0.5773537755012512, |
|
"eval_runtime": 1.1515, |
|
"eval_samples_per_second": 237.079, |
|
"eval_steps_per_second": 4.342, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 83.07692307692308, |
|
"grad_norm": 5.818239688873291, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.2875, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.7509157509157509, |
|
"eval_loss": 0.5702620148658752, |
|
"eval_runtime": 1.1961, |
|
"eval_samples_per_second": 228.243, |
|
"eval_steps_per_second": 4.18, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 84.92307692307692, |
|
"eval_accuracy": 0.7509157509157509, |
|
"eval_loss": 0.5712842345237732, |
|
"eval_runtime": 1.1634, |
|
"eval_samples_per_second": 234.664, |
|
"eval_steps_per_second": 4.298, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 85.84615384615384, |
|
"eval_accuracy": 0.7472527472527473, |
|
"eval_loss": 0.5783692598342896, |
|
"eval_runtime": 1.204, |
|
"eval_samples_per_second": 226.74, |
|
"eval_steps_per_second": 4.153, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 86.15384615384616, |
|
"grad_norm": 6.248855113983154, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 0.2855, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 86.76923076923077, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 0.5903602838516235, |
|
"eval_runtime": 1.1602, |
|
"eval_samples_per_second": 235.3, |
|
"eval_steps_per_second": 4.31, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.7326007326007326, |
|
"eval_loss": 0.5916617512702942, |
|
"eval_runtime": 1.2119, |
|
"eval_samples_per_second": 225.264, |
|
"eval_steps_per_second": 4.126, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 88.92307692307692, |
|
"eval_accuracy": 0.7472527472527473, |
|
"eval_loss": 0.5859794020652771, |
|
"eval_runtime": 1.1576, |
|
"eval_samples_per_second": 235.841, |
|
"eval_steps_per_second": 4.319, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 89.23076923076923, |
|
"grad_norm": 5.813708305358887, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"loss": 0.2964, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 89.84615384615384, |
|
"eval_accuracy": 0.7472527472527473, |
|
"eval_loss": 0.585824728012085, |
|
"eval_runtime": 1.2365, |
|
"eval_samples_per_second": 220.783, |
|
"eval_steps_per_second": 4.044, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 90.76923076923077, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 0.5823063850402832, |
|
"eval_runtime": 1.1583, |
|
"eval_samples_per_second": 235.693, |
|
"eval_steps_per_second": 4.317, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 0.5816611647605896, |
|
"eval_runtime": 1.2737, |
|
"eval_samples_per_second": 214.329, |
|
"eval_steps_per_second": 3.925, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"grad_norm": 5.621781826019287, |
|
"learning_rate": 0.0, |
|
"loss": 0.291, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 0.5815560221672058, |
|
"eval_runtime": 1.2025, |
|
"eval_samples_per_second": 227.027, |
|
"eval_steps_per_second": 4.158, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"step": 300, |
|
"total_flos": 1.4722503891660472e+18, |
|
"train_loss": 0.31525614658991497, |
|
"train_runtime": 772.9292, |
|
"train_samples_per_second": 105.443, |
|
"train_steps_per_second": 0.388 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4722503891660472e+18, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|