|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 30.0, |
|
"global_step": 3180, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.3832186408159307e-05, |
|
"loss": 2.8049, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5764944934955999, |
|
"eval_loss": 2.5287580490112305, |
|
"eval_runtime": 22.0895, |
|
"eval_samples_per_second": 80.128, |
|
"eval_steps_per_second": 0.272, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.5888124272106204e-05, |
|
"loss": 2.4814, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5855989573905803, |
|
"eval_loss": 2.429943561553955, |
|
"eval_runtime": 20.8189, |
|
"eval_samples_per_second": 85.019, |
|
"eval_steps_per_second": 0.288, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.7090770826327895e-05, |
|
"loss": 2.3778, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5945957840073934, |
|
"eval_loss": 2.348792314529419, |
|
"eval_runtime": 20.8031, |
|
"eval_samples_per_second": 85.084, |
|
"eval_steps_per_second": 0.288, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.7944062136053104e-05, |
|
"loss": 2.307, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6146158043850404, |
|
"eval_loss": 2.1809699535369873, |
|
"eval_runtime": 20.6813, |
|
"eval_samples_per_second": 85.585, |
|
"eval_steps_per_second": 0.29, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.860592629580032e-05, |
|
"loss": 2.0807, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7058374199856394, |
|
"eval_loss": 1.4772239923477173, |
|
"eval_runtime": 20.7293, |
|
"eval_samples_per_second": 85.386, |
|
"eval_steps_per_second": 0.289, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.9146708690274792e-05, |
|
"loss": 1.6063, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7498181077384244, |
|
"eval_loss": 1.183250069618225, |
|
"eval_runtime": 20.8379, |
|
"eval_samples_per_second": 84.941, |
|
"eval_steps_per_second": 0.288, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.9603933689955228e-05, |
|
"loss": 1.3122, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7694247060500784, |
|
"eval_loss": 1.0655418634414673, |
|
"eval_runtime": 20.7257, |
|
"eval_samples_per_second": 85.401, |
|
"eval_steps_per_second": 0.289, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1911, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7711616016924874, |
|
"eval_loss": 1.059058427810669, |
|
"eval_runtime": 20.8383, |
|
"eval_samples_per_second": 84.94, |
|
"eval_steps_per_second": 0.288, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1273, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7756633909375628, |
|
"eval_loss": 1.020793080329895, |
|
"eval_runtime": 20.7787, |
|
"eval_samples_per_second": 85.183, |
|
"eval_steps_per_second": 0.289, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0954, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7866386428464195, |
|
"eval_loss": 0.9602928161621094, |
|
"eval_runtime": 20.6664, |
|
"eval_samples_per_second": 85.646, |
|
"eval_steps_per_second": 0.29, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0565, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7900944085581504, |
|
"eval_loss": 0.9395522475242615, |
|
"eval_runtime": 20.7654, |
|
"eval_samples_per_second": 85.238, |
|
"eval_steps_per_second": 0.289, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0351, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7892073832790445, |
|
"eval_loss": 0.9436615705490112, |
|
"eval_runtime": 20.1273, |
|
"eval_samples_per_second": 87.94, |
|
"eval_steps_per_second": 0.298, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0136, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7930297660705511, |
|
"eval_loss": 0.9114692211151123, |
|
"eval_runtime": 20.7577, |
|
"eval_samples_per_second": 85.269, |
|
"eval_steps_per_second": 0.289, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.996, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7986084994358782, |
|
"eval_loss": 0.9008192420005798, |
|
"eval_runtime": 20.8068, |
|
"eval_samples_per_second": 85.068, |
|
"eval_steps_per_second": 0.288, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.985, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8006165209970277, |
|
"eval_loss": 0.8874076008796692, |
|
"eval_runtime": 20.7048, |
|
"eval_samples_per_second": 85.487, |
|
"eval_steps_per_second": 0.29, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9654, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8016079609686441, |
|
"eval_loss": 0.8684276938438416, |
|
"eval_runtime": 20.7798, |
|
"eval_samples_per_second": 85.179, |
|
"eval_steps_per_second": 0.289, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9529, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7997229383352594, |
|
"eval_loss": 0.8870733976364136, |
|
"eval_runtime": 20.7541, |
|
"eval_samples_per_second": 85.284, |
|
"eval_steps_per_second": 0.289, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9442, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8027364602986559, |
|
"eval_loss": 0.8661928772926331, |
|
"eval_runtime": 20.7884, |
|
"eval_samples_per_second": 85.144, |
|
"eval_steps_per_second": 0.289, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9327, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8069067033935976, |
|
"eval_loss": 0.8454752564430237, |
|
"eval_runtime": 20.7342, |
|
"eval_samples_per_second": 85.366, |
|
"eval_steps_per_second": 0.289, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9204, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8092386081748497, |
|
"eval_loss": 0.8449112772941589, |
|
"eval_runtime": 20.6952, |
|
"eval_samples_per_second": 85.527, |
|
"eval_steps_per_second": 0.29, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9097, |
|
"step": 2226 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.809981731823164, |
|
"eval_loss": 0.8322621583938599, |
|
"eval_runtime": 20.7947, |
|
"eval_samples_per_second": 85.118, |
|
"eval_steps_per_second": 0.289, |
|
"step": 2226 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9024, |
|
"step": 2332 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.8133925596890617, |
|
"eval_loss": 0.8194364905357361, |
|
"eval_runtime": 20.8718, |
|
"eval_samples_per_second": 84.803, |
|
"eval_steps_per_second": 0.287, |
|
"step": 2332 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.895, |
|
"step": 2438 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.8129690134121053, |
|
"eval_loss": 0.8122667670249939, |
|
"eval_runtime": 20.8197, |
|
"eval_samples_per_second": 85.016, |
|
"eval_steps_per_second": 0.288, |
|
"step": 2438 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.891, |
|
"step": 2544 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8167250838730983, |
|
"eval_loss": 0.7972639799118042, |
|
"eval_runtime": 20.743, |
|
"eval_samples_per_second": 85.33, |
|
"eval_steps_per_second": 0.289, |
|
"step": 2544 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.8784, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.8133820647746538, |
|
"eval_loss": 0.8083846569061279, |
|
"eval_runtime": 20.8823, |
|
"eval_samples_per_second": 84.761, |
|
"eval_steps_per_second": 0.287, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.8771, |
|
"step": 2756 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.8162191337201458, |
|
"eval_loss": 0.8039098381996155, |
|
"eval_runtime": 20.8754, |
|
"eval_samples_per_second": 84.789, |
|
"eval_steps_per_second": 0.287, |
|
"step": 2756 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.8713, |
|
"step": 2862 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.8169723253335294, |
|
"eval_loss": 0.7862613797187805, |
|
"eval_runtime": 20.8372, |
|
"eval_samples_per_second": 84.944, |
|
"eval_steps_per_second": 0.288, |
|
"step": 2862 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.8707, |
|
"step": 2968 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8173417825447652, |
|
"eval_loss": 0.7960302829742432, |
|
"eval_runtime": 20.7882, |
|
"eval_samples_per_second": 85.145, |
|
"eval_steps_per_second": 0.289, |
|
"step": 2968 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.8571, |
|
"step": 3074 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.8181885887496183, |
|
"eval_loss": 0.7875123023986816, |
|
"eval_runtime": 20.7114, |
|
"eval_samples_per_second": 85.46, |
|
"eval_steps_per_second": 0.29, |
|
"step": 3074 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.8593, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.8199450484519026, |
|
"eval_loss": 0.7765340805053711, |
|
"eval_runtime": 20.9094, |
|
"eval_samples_per_second": 84.651, |
|
"eval_steps_per_second": 0.287, |
|
"step": 3180 |
|
} |
|
], |
|
"max_steps": 4240, |
|
"num_train_epochs": 40, |
|
"total_flos": 1006048479543296.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|