|
{ |
|
"best_metric": 0.9047619047619048, |
|
"best_model_checkpoint": "vit-msn-small-corect_dataset_lateral_flow_ivalidation/checkpoint-84", |
|
"epoch": 36.92307692307692, |
|
"eval_steps": 500, |
|
"global_step": 120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"eval_accuracy": 0.6336996336996337, |
|
"eval_loss": 0.6349812746047974, |
|
"eval_runtime": 1.1638, |
|
"eval_samples_per_second": 234.566, |
|
"eval_steps_per_second": 4.296, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 1.8461538461538463, |
|
"eval_accuracy": 0.8021978021978022, |
|
"eval_loss": 0.5046992301940918, |
|
"eval_runtime": 1.119, |
|
"eval_samples_per_second": 243.967, |
|
"eval_steps_per_second": 4.468, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 2.769230769230769, |
|
"eval_accuracy": 0.8791208791208791, |
|
"eval_loss": 0.3700959086418152, |
|
"eval_runtime": 1.119, |
|
"eval_samples_per_second": 243.967, |
|
"eval_steps_per_second": 4.468, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"grad_norm": 46.272850036621094, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.5485, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 0.5379207134246826, |
|
"eval_runtime": 1.1645, |
|
"eval_samples_per_second": 234.43, |
|
"eval_steps_per_second": 4.294, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 4.923076923076923, |
|
"eval_accuracy": 0.8937728937728938, |
|
"eval_loss": 0.27481070160865784, |
|
"eval_runtime": 1.1071, |
|
"eval_samples_per_second": 246.598, |
|
"eval_steps_per_second": 4.516, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 5.846153846153846, |
|
"eval_accuracy": 0.8974358974358975, |
|
"eval_loss": 0.30044275522232056, |
|
"eval_runtime": 1.1757, |
|
"eval_samples_per_second": 232.199, |
|
"eval_steps_per_second": 4.253, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 6.153846153846154, |
|
"grad_norm": 8.343905448913574, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.3335, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.769230769230769, |
|
"eval_accuracy": 0.8681318681318682, |
|
"eval_loss": 0.34923306107521057, |
|
"eval_runtime": 1.1527, |
|
"eval_samples_per_second": 236.837, |
|
"eval_steps_per_second": 4.338, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8974358974358975, |
|
"eval_loss": 0.24970969557762146, |
|
"eval_runtime": 1.1752, |
|
"eval_samples_per_second": 232.297, |
|
"eval_steps_per_second": 4.255, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 8.923076923076923, |
|
"eval_accuracy": 0.8315018315018315, |
|
"eval_loss": 0.43036821484565735, |
|
"eval_runtime": 1.1393, |
|
"eval_samples_per_second": 239.625, |
|
"eval_steps_per_second": 4.389, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 9.23076923076923, |
|
"grad_norm": 14.322465896606445, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.3087, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 9.846153846153847, |
|
"eval_accuracy": 0.8791208791208791, |
|
"eval_loss": 0.3478649854660034, |
|
"eval_runtime": 1.2364, |
|
"eval_samples_per_second": 220.797, |
|
"eval_steps_per_second": 4.044, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 10.76923076923077, |
|
"eval_accuracy": 0.8644688644688645, |
|
"eval_loss": 0.37963706254959106, |
|
"eval_runtime": 1.2083, |
|
"eval_samples_per_second": 225.946, |
|
"eval_steps_per_second": 4.138, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8351648351648352, |
|
"eval_loss": 0.4151943325996399, |
|
"eval_runtime": 1.1429, |
|
"eval_samples_per_second": 238.873, |
|
"eval_steps_per_second": 4.375, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 12.307692307692308, |
|
"grad_norm": 14.211008071899414, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.2614, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 12.923076923076923, |
|
"eval_accuracy": 0.9010989010989011, |
|
"eval_loss": 0.31987789273262024, |
|
"eval_runtime": 1.1044, |
|
"eval_samples_per_second": 247.187, |
|
"eval_steps_per_second": 4.527, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 13.846153846153847, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.34341761469841003, |
|
"eval_runtime": 1.1686, |
|
"eval_samples_per_second": 233.619, |
|
"eval_steps_per_second": 4.279, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 14.76923076923077, |
|
"eval_accuracy": 0.8461538461538461, |
|
"eval_loss": 0.400124728679657, |
|
"eval_runtime": 1.2364, |
|
"eval_samples_per_second": 220.801, |
|
"eval_steps_per_second": 4.044, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 15.384615384615385, |
|
"grad_norm": 37.66127395629883, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 0.2471, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8901098901098901, |
|
"eval_loss": 0.322020947933197, |
|
"eval_runtime": 1.1904, |
|
"eval_samples_per_second": 229.338, |
|
"eval_steps_per_second": 4.2, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 16.923076923076923, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.35403817892074585, |
|
"eval_runtime": 1.1771, |
|
"eval_samples_per_second": 231.923, |
|
"eval_steps_per_second": 4.248, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 17.846153846153847, |
|
"eval_accuracy": 0.8534798534798534, |
|
"eval_loss": 0.401885062456131, |
|
"eval_runtime": 1.1593, |
|
"eval_samples_per_second": 235.494, |
|
"eval_steps_per_second": 4.313, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 18.46153846153846, |
|
"grad_norm": 17.38436508178711, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.2817, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 18.76923076923077, |
|
"eval_accuracy": 0.8974358974358975, |
|
"eval_loss": 0.3152279257774353, |
|
"eval_runtime": 1.1761, |
|
"eval_samples_per_second": 232.121, |
|
"eval_steps_per_second": 4.251, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_loss": 0.39776933193206787, |
|
"eval_runtime": 1.1622, |
|
"eval_samples_per_second": 234.895, |
|
"eval_steps_per_second": 4.302, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 20.923076923076923, |
|
"eval_accuracy": 0.8388278388278388, |
|
"eval_loss": 0.42894455790519714, |
|
"eval_runtime": 1.1297, |
|
"eval_samples_per_second": 241.661, |
|
"eval_steps_per_second": 4.426, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 21.53846153846154, |
|
"grad_norm": 5.773376941680908, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.2353, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 21.846153846153847, |
|
"eval_accuracy": 0.8974358974358975, |
|
"eval_loss": 0.31458553671836853, |
|
"eval_runtime": 1.1608, |
|
"eval_samples_per_second": 235.18, |
|
"eval_steps_per_second": 4.307, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 22.76923076923077, |
|
"eval_accuracy": 0.8864468864468864, |
|
"eval_loss": 0.3206166625022888, |
|
"eval_runtime": 1.1912, |
|
"eval_samples_per_second": 229.187, |
|
"eval_steps_per_second": 4.198, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8827838827838828, |
|
"eval_loss": 0.3715476095676422, |
|
"eval_runtime": 1.2718, |
|
"eval_samples_per_second": 214.649, |
|
"eval_steps_per_second": 3.931, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 24.615384615384617, |
|
"grad_norm": 7.774621963500977, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.2339, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 24.923076923076923, |
|
"eval_accuracy": 0.8937728937728938, |
|
"eval_loss": 0.34460321068763733, |
|
"eval_runtime": 1.1136, |
|
"eval_samples_per_second": 245.157, |
|
"eval_steps_per_second": 4.49, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 25.846153846153847, |
|
"eval_accuracy": 0.9047619047619048, |
|
"eval_loss": 0.29301854968070984, |
|
"eval_runtime": 1.1263, |
|
"eval_samples_per_second": 242.377, |
|
"eval_steps_per_second": 4.439, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 26.76923076923077, |
|
"eval_accuracy": 0.8205128205128205, |
|
"eval_loss": 0.43494826555252075, |
|
"eval_runtime": 1.1806, |
|
"eval_samples_per_second": 231.241, |
|
"eval_steps_per_second": 4.235, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 27.692307692307693, |
|
"grad_norm": 14.50500774383545, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.2301, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8681318681318682, |
|
"eval_loss": 0.3630487322807312, |
|
"eval_runtime": 1.192, |
|
"eval_samples_per_second": 229.021, |
|
"eval_steps_per_second": 4.195, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 28.923076923076923, |
|
"eval_accuracy": 0.8644688644688645, |
|
"eval_loss": 0.366910845041275, |
|
"eval_runtime": 1.1667, |
|
"eval_samples_per_second": 234.003, |
|
"eval_steps_per_second": 4.286, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 29.846153846153847, |
|
"eval_accuracy": 0.7912087912087912, |
|
"eval_loss": 0.5037412047386169, |
|
"eval_runtime": 1.1322, |
|
"eval_samples_per_second": 241.114, |
|
"eval_steps_per_second": 4.416, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 30.76923076923077, |
|
"grad_norm": 5.89783239364624, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.2115, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 30.76923076923077, |
|
"eval_accuracy": 0.8827838827838828, |
|
"eval_loss": 0.34486547112464905, |
|
"eval_runtime": 1.1276, |
|
"eval_samples_per_second": 242.098, |
|
"eval_steps_per_second": 4.434, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.9010989010989011, |
|
"eval_loss": 0.328006774187088, |
|
"eval_runtime": 1.1424, |
|
"eval_samples_per_second": 238.979, |
|
"eval_steps_per_second": 4.377, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 32.92307692307692, |
|
"eval_accuracy": 0.8424908424908425, |
|
"eval_loss": 0.40313562750816345, |
|
"eval_runtime": 1.1332, |
|
"eval_samples_per_second": 240.909, |
|
"eval_steps_per_second": 4.412, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 33.84615384615385, |
|
"grad_norm": 13.135037422180176, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 0.2033, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 33.84615384615385, |
|
"eval_accuracy": 0.8534798534798534, |
|
"eval_loss": 0.3611760437488556, |
|
"eval_runtime": 1.1669, |
|
"eval_samples_per_second": 233.958, |
|
"eval_steps_per_second": 4.285, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 34.76923076923077, |
|
"eval_accuracy": 0.8901098901098901, |
|
"eval_loss": 0.31633052229881287, |
|
"eval_runtime": 1.1155, |
|
"eval_samples_per_second": 244.743, |
|
"eval_steps_per_second": 4.482, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8864468864468864, |
|
"eval_loss": 0.3233657777309418, |
|
"eval_runtime": 1.1244, |
|
"eval_samples_per_second": 242.803, |
|
"eval_steps_per_second": 4.447, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"grad_norm": 3.5747175216674805, |
|
"learning_rate": 0.0, |
|
"loss": 0.1807, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"eval_accuracy": 0.8791208791208791, |
|
"eval_loss": 0.33070895075798035, |
|
"eval_runtime": 1.2006, |
|
"eval_samples_per_second": 227.387, |
|
"eval_steps_per_second": 4.165, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"step": 120, |
|
"total_flos": 5.905752132626842e+17, |
|
"train_loss": 0.27296837071577706, |
|
"train_runtime": 300.1153, |
|
"train_samples_per_second": 108.891, |
|
"train_steps_per_second": 0.4 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.905752132626842e+17, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|