|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 37.0, |
|
"global_step": 999, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.2262943855309169e-05, |
|
"loss": 3.3787, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5104135133498819, |
|
"eval_loss": 3.0533134937286377, |
|
"eval_runtime": 7.7526, |
|
"eval_samples_per_second": 57.271, |
|
"eval_steps_per_second": 0.258, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.4841962570206113e-05, |
|
"loss": 3.0423, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5218076561694905, |
|
"eval_loss": 2.9270966053009033, |
|
"eval_runtime": 7.1668, |
|
"eval_samples_per_second": 61.953, |
|
"eval_steps_per_second": 0.279, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.6350591807078892e-05, |
|
"loss": 2.8826, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5349013657056145, |
|
"eval_loss": 2.826730489730835, |
|
"eval_runtime": 7.1613, |
|
"eval_samples_per_second": 62.0, |
|
"eval_steps_per_second": 0.279, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.7420981285103056e-05, |
|
"loss": 2.7528, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5677539663746152, |
|
"eval_loss": 2.5704185962677, |
|
"eval_runtime": 7.1293, |
|
"eval_samples_per_second": 62.279, |
|
"eval_steps_per_second": 0.281, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.825123986666868e-05, |
|
"loss": 2.676, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5691213903273941, |
|
"eval_loss": 2.544525384902954, |
|
"eval_runtime": 7.2153, |
|
"eval_samples_per_second": 61.536, |
|
"eval_steps_per_second": 0.277, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.892961052197583e-05, |
|
"loss": 2.6468, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5640251055842328, |
|
"eval_loss": 2.552178144454956, |
|
"eval_runtime": 7.1679, |
|
"eval_samples_per_second": 61.943, |
|
"eval_steps_per_second": 0.279, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.9503164738653782e-05, |
|
"loss": 2.5425, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.592901878914405, |
|
"eval_loss": 2.344503879547119, |
|
"eval_runtime": 7.1022, |
|
"eval_samples_per_second": 62.516, |
|
"eval_steps_per_second": 0.282, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 2.4507, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6077949332933593, |
|
"eval_loss": 2.190293788909912, |
|
"eval_runtime": 7.1171, |
|
"eval_samples_per_second": 62.385, |
|
"eval_steps_per_second": 0.281, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2e-05, |
|
"loss": 2.3779, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6202360876897133, |
|
"eval_loss": 2.0489487648010254, |
|
"eval_runtime": 7.164, |
|
"eval_samples_per_second": 61.977, |
|
"eval_steps_per_second": 0.279, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2e-05, |
|
"loss": 2.2947, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6456804315147228, |
|
"eval_loss": 1.8817191123962402, |
|
"eval_runtime": 7.1714, |
|
"eval_samples_per_second": 61.912, |
|
"eval_steps_per_second": 0.279, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 2e-05, |
|
"loss": 2.1394, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6489209509025051, |
|
"eval_loss": 1.842598795890808, |
|
"eval_runtime": 7.136, |
|
"eval_samples_per_second": 62.22, |
|
"eval_steps_per_second": 0.28, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2e-05, |
|
"loss": 2.0426, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6790733111349398, |
|
"eval_loss": 1.6428455114364624, |
|
"eval_runtime": 7.107, |
|
"eval_samples_per_second": 62.474, |
|
"eval_steps_per_second": 0.281, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.9533, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.690136927023769, |
|
"eval_loss": 1.5633041858673096, |
|
"eval_runtime": 7.193, |
|
"eval_samples_per_second": 61.726, |
|
"eval_steps_per_second": 0.278, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.8598, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7024347351505412, |
|
"eval_loss": 1.4617172479629517, |
|
"eval_runtime": 7.1532, |
|
"eval_samples_per_second": 62.071, |
|
"eval_steps_per_second": 0.28, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7533, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7221761780724604, |
|
"eval_loss": 1.3566689491271973, |
|
"eval_runtime": 7.1795, |
|
"eval_samples_per_second": 61.843, |
|
"eval_steps_per_second": 0.279, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6829, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7226731441436104, |
|
"eval_loss": 1.3594402074813843, |
|
"eval_runtime": 7.1593, |
|
"eval_samples_per_second": 62.017, |
|
"eval_steps_per_second": 0.279, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6363, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7319418805454654, |
|
"eval_loss": 1.3049547672271729, |
|
"eval_runtime": 7.1172, |
|
"eval_samples_per_second": 62.384, |
|
"eval_steps_per_second": 0.281, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5438, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7309124489856713, |
|
"eval_loss": 1.3055365085601807, |
|
"eval_runtime": 7.1524, |
|
"eval_samples_per_second": 62.077, |
|
"eval_steps_per_second": 0.28, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5025, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7245466228759103, |
|
"eval_loss": 1.3259419202804565, |
|
"eval_runtime": 7.228, |
|
"eval_samples_per_second": 61.428, |
|
"eval_steps_per_second": 0.277, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4319, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7438057920631648, |
|
"eval_loss": 1.2239311933517456, |
|
"eval_runtime": 7.1524, |
|
"eval_samples_per_second": 62.077, |
|
"eval_steps_per_second": 0.28, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3768, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7472951634598515, |
|
"eval_loss": 1.1993966102600098, |
|
"eval_runtime": 7.1565, |
|
"eval_samples_per_second": 62.042, |
|
"eval_steps_per_second": 0.279, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3384, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7496693916806925, |
|
"eval_loss": 1.1782174110412598, |
|
"eval_runtime": 7.2171, |
|
"eval_samples_per_second": 61.52, |
|
"eval_steps_per_second": 0.277, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.308, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7524660728164047, |
|
"eval_loss": 1.1727790832519531, |
|
"eval_runtime": 7.1544, |
|
"eval_samples_per_second": 62.06, |
|
"eval_steps_per_second": 0.28, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3139, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7564876470222167, |
|
"eval_loss": 1.1401317119598389, |
|
"eval_runtime": 6.2932, |
|
"eval_samples_per_second": 70.552, |
|
"eval_steps_per_second": 0.318, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2701, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7505940307157346, |
|
"eval_loss": 1.1718164682388306, |
|
"eval_runtime": 7.2194, |
|
"eval_samples_per_second": 61.501, |
|
"eval_steps_per_second": 0.277, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2614, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7606208191526816, |
|
"eval_loss": 1.111540675163269, |
|
"eval_runtime": 7.1423, |
|
"eval_samples_per_second": 62.165, |
|
"eval_steps_per_second": 0.28, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2549, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7517491504126567, |
|
"eval_loss": 1.1640406847000122, |
|
"eval_runtime": 7.113, |
|
"eval_samples_per_second": 62.421, |
|
"eval_steps_per_second": 0.281, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2287, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7552987090963674, |
|
"eval_loss": 1.1474734544754028, |
|
"eval_runtime": 7.171, |
|
"eval_samples_per_second": 61.916, |
|
"eval_steps_per_second": 0.279, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1967, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7647311090144953, |
|
"eval_loss": 1.0948566198349, |
|
"eval_runtime": 7.1733, |
|
"eval_samples_per_second": 61.896, |
|
"eval_steps_per_second": 0.279, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1938, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7628133294013565, |
|
"eval_loss": 1.104235291481018, |
|
"eval_runtime": 7.2106, |
|
"eval_samples_per_second": 61.576, |
|
"eval_steps_per_second": 0.277, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1831, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7568993506493507, |
|
"eval_loss": 1.1557021141052246, |
|
"eval_runtime": 7.147, |
|
"eval_samples_per_second": 62.124, |
|
"eval_steps_per_second": 0.28, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1783, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7663818512012861, |
|
"eval_loss": 1.0878251791000366, |
|
"eval_runtime": 7.2016, |
|
"eval_samples_per_second": 61.653, |
|
"eval_steps_per_second": 0.278, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1571, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7626662971175167, |
|
"eval_loss": 1.1019645929336548, |
|
"eval_runtime": 7.115, |
|
"eval_samples_per_second": 62.403, |
|
"eval_steps_per_second": 0.281, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1511, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7707477642809286, |
|
"eval_loss": 1.0570372343063354, |
|
"eval_runtime": 7.1537, |
|
"eval_samples_per_second": 62.066, |
|
"eval_steps_per_second": 0.28, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1332, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7779759669545625, |
|
"eval_loss": 1.0286684036254883, |
|
"eval_runtime": 7.1266, |
|
"eval_samples_per_second": 62.301, |
|
"eval_steps_per_second": 0.281, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1343, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7709718033554885, |
|
"eval_loss": 1.0590564012527466, |
|
"eval_runtime": 7.1932, |
|
"eval_samples_per_second": 61.725, |
|
"eval_steps_per_second": 0.278, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1164, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.7806313208703647, |
|
"eval_loss": 1.0081170797348022, |
|
"eval_runtime": 7.1199, |
|
"eval_samples_per_second": 62.36, |
|
"eval_steps_per_second": 0.281, |
|
"step": 999 |
|
} |
|
], |
|
"max_steps": 1080, |
|
"num_train_epochs": 40, |
|
"total_flos": 118496962805760.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|