bert-base-cased-scsmall / trainer_state.json
ZongqianLi's picture
Upload 130 files
d89b034
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 37.0,
"global_step": 999,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 1.2262943855309169e-05,
"loss": 3.3787,
"step": 27
},
{
"epoch": 1.0,
"eval_accuracy": 0.5104135133498819,
"eval_loss": 3.0533134937286377,
"eval_runtime": 7.7526,
"eval_samples_per_second": 57.271,
"eval_steps_per_second": 0.258,
"step": 27
},
{
"epoch": 2.0,
"learning_rate": 1.4841962570206113e-05,
"loss": 3.0423,
"step": 54
},
{
"epoch": 2.0,
"eval_accuracy": 0.5218076561694905,
"eval_loss": 2.9270966053009033,
"eval_runtime": 7.1668,
"eval_samples_per_second": 61.953,
"eval_steps_per_second": 0.279,
"step": 54
},
{
"epoch": 3.0,
"learning_rate": 1.6350591807078892e-05,
"loss": 2.8826,
"step": 81
},
{
"epoch": 3.0,
"eval_accuracy": 0.5349013657056145,
"eval_loss": 2.826730489730835,
"eval_runtime": 7.1613,
"eval_samples_per_second": 62.0,
"eval_steps_per_second": 0.279,
"step": 81
},
{
"epoch": 4.0,
"learning_rate": 1.7420981285103056e-05,
"loss": 2.7528,
"step": 108
},
{
"epoch": 4.0,
"eval_accuracy": 0.5677539663746152,
"eval_loss": 2.5704185962677,
"eval_runtime": 7.1293,
"eval_samples_per_second": 62.279,
"eval_steps_per_second": 0.281,
"step": 108
},
{
"epoch": 5.0,
"learning_rate": 1.825123986666868e-05,
"loss": 2.676,
"step": 135
},
{
"epoch": 5.0,
"eval_accuracy": 0.5691213903273941,
"eval_loss": 2.544525384902954,
"eval_runtime": 7.2153,
"eval_samples_per_second": 61.536,
"eval_steps_per_second": 0.277,
"step": 135
},
{
"epoch": 6.0,
"learning_rate": 1.892961052197583e-05,
"loss": 2.6468,
"step": 162
},
{
"epoch": 6.0,
"eval_accuracy": 0.5640251055842328,
"eval_loss": 2.552178144454956,
"eval_runtime": 7.1679,
"eval_samples_per_second": 61.943,
"eval_steps_per_second": 0.279,
"step": 162
},
{
"epoch": 7.0,
"learning_rate": 1.9503164738653782e-05,
"loss": 2.5425,
"step": 189
},
{
"epoch": 7.0,
"eval_accuracy": 0.592901878914405,
"eval_loss": 2.344503879547119,
"eval_runtime": 7.1022,
"eval_samples_per_second": 62.516,
"eval_steps_per_second": 0.282,
"step": 189
},
{
"epoch": 8.0,
"learning_rate": 1.9999999999999998e-05,
"loss": 2.4507,
"step": 216
},
{
"epoch": 8.0,
"eval_accuracy": 0.6077949332933593,
"eval_loss": 2.190293788909912,
"eval_runtime": 7.1171,
"eval_samples_per_second": 62.385,
"eval_steps_per_second": 0.281,
"step": 216
},
{
"epoch": 9.0,
"learning_rate": 2e-05,
"loss": 2.3779,
"step": 243
},
{
"epoch": 9.0,
"eval_accuracy": 0.6202360876897133,
"eval_loss": 2.0489487648010254,
"eval_runtime": 7.164,
"eval_samples_per_second": 61.977,
"eval_steps_per_second": 0.279,
"step": 243
},
{
"epoch": 10.0,
"learning_rate": 2e-05,
"loss": 2.2947,
"step": 270
},
{
"epoch": 10.0,
"eval_accuracy": 0.6456804315147228,
"eval_loss": 1.8817191123962402,
"eval_runtime": 7.1714,
"eval_samples_per_second": 61.912,
"eval_steps_per_second": 0.279,
"step": 270
},
{
"epoch": 11.0,
"learning_rate": 2e-05,
"loss": 2.1394,
"step": 297
},
{
"epoch": 11.0,
"eval_accuracy": 0.6489209509025051,
"eval_loss": 1.842598795890808,
"eval_runtime": 7.136,
"eval_samples_per_second": 62.22,
"eval_steps_per_second": 0.28,
"step": 297
},
{
"epoch": 12.0,
"learning_rate": 2e-05,
"loss": 2.0426,
"step": 324
},
{
"epoch": 12.0,
"eval_accuracy": 0.6790733111349398,
"eval_loss": 1.6428455114364624,
"eval_runtime": 7.107,
"eval_samples_per_second": 62.474,
"eval_steps_per_second": 0.281,
"step": 324
},
{
"epoch": 13.0,
"learning_rate": 2e-05,
"loss": 1.9533,
"step": 351
},
{
"epoch": 13.0,
"eval_accuracy": 0.690136927023769,
"eval_loss": 1.5633041858673096,
"eval_runtime": 7.193,
"eval_samples_per_second": 61.726,
"eval_steps_per_second": 0.278,
"step": 351
},
{
"epoch": 14.0,
"learning_rate": 2e-05,
"loss": 1.8598,
"step": 378
},
{
"epoch": 14.0,
"eval_accuracy": 0.7024347351505412,
"eval_loss": 1.4617172479629517,
"eval_runtime": 7.1532,
"eval_samples_per_second": 62.071,
"eval_steps_per_second": 0.28,
"step": 378
},
{
"epoch": 15.0,
"learning_rate": 2e-05,
"loss": 1.7533,
"step": 405
},
{
"epoch": 15.0,
"eval_accuracy": 0.7221761780724604,
"eval_loss": 1.3566689491271973,
"eval_runtime": 7.1795,
"eval_samples_per_second": 61.843,
"eval_steps_per_second": 0.279,
"step": 405
},
{
"epoch": 16.0,
"learning_rate": 2e-05,
"loss": 1.6829,
"step": 432
},
{
"epoch": 16.0,
"eval_accuracy": 0.7226731441436104,
"eval_loss": 1.3594402074813843,
"eval_runtime": 7.1593,
"eval_samples_per_second": 62.017,
"eval_steps_per_second": 0.279,
"step": 432
},
{
"epoch": 17.0,
"learning_rate": 2e-05,
"loss": 1.6363,
"step": 459
},
{
"epoch": 17.0,
"eval_accuracy": 0.7319418805454654,
"eval_loss": 1.3049547672271729,
"eval_runtime": 7.1172,
"eval_samples_per_second": 62.384,
"eval_steps_per_second": 0.281,
"step": 459
},
{
"epoch": 18.0,
"learning_rate": 2e-05,
"loss": 1.5438,
"step": 486
},
{
"epoch": 18.0,
"eval_accuracy": 0.7309124489856713,
"eval_loss": 1.3055365085601807,
"eval_runtime": 7.1524,
"eval_samples_per_second": 62.077,
"eval_steps_per_second": 0.28,
"step": 486
},
{
"epoch": 19.0,
"learning_rate": 2e-05,
"loss": 1.5025,
"step": 513
},
{
"epoch": 19.0,
"eval_accuracy": 0.7245466228759103,
"eval_loss": 1.3259419202804565,
"eval_runtime": 7.228,
"eval_samples_per_second": 61.428,
"eval_steps_per_second": 0.277,
"step": 513
},
{
"epoch": 20.0,
"learning_rate": 2e-05,
"loss": 1.4319,
"step": 540
},
{
"epoch": 20.0,
"eval_accuracy": 0.7438057920631648,
"eval_loss": 1.2239311933517456,
"eval_runtime": 7.1524,
"eval_samples_per_second": 62.077,
"eval_steps_per_second": 0.28,
"step": 540
},
{
"epoch": 21.0,
"learning_rate": 2e-05,
"loss": 1.3768,
"step": 567
},
{
"epoch": 21.0,
"eval_accuracy": 0.7472951634598515,
"eval_loss": 1.1993966102600098,
"eval_runtime": 7.1565,
"eval_samples_per_second": 62.042,
"eval_steps_per_second": 0.279,
"step": 567
},
{
"epoch": 22.0,
"learning_rate": 2e-05,
"loss": 1.3384,
"step": 594
},
{
"epoch": 22.0,
"eval_accuracy": 0.7496693916806925,
"eval_loss": 1.1782174110412598,
"eval_runtime": 7.2171,
"eval_samples_per_second": 61.52,
"eval_steps_per_second": 0.277,
"step": 594
},
{
"epoch": 23.0,
"learning_rate": 2e-05,
"loss": 1.308,
"step": 621
},
{
"epoch": 23.0,
"eval_accuracy": 0.7524660728164047,
"eval_loss": 1.1727790832519531,
"eval_runtime": 7.1544,
"eval_samples_per_second": 62.06,
"eval_steps_per_second": 0.28,
"step": 621
},
{
"epoch": 24.0,
"learning_rate": 2e-05,
"loss": 1.3139,
"step": 648
},
{
"epoch": 24.0,
"eval_accuracy": 0.7564876470222167,
"eval_loss": 1.1401317119598389,
"eval_runtime": 6.2932,
"eval_samples_per_second": 70.552,
"eval_steps_per_second": 0.318,
"step": 648
},
{
"epoch": 25.0,
"learning_rate": 2e-05,
"loss": 1.2701,
"step": 675
},
{
"epoch": 25.0,
"eval_accuracy": 0.7505940307157346,
"eval_loss": 1.1718164682388306,
"eval_runtime": 7.2194,
"eval_samples_per_second": 61.501,
"eval_steps_per_second": 0.277,
"step": 675
},
{
"epoch": 26.0,
"learning_rate": 2e-05,
"loss": 1.2614,
"step": 702
},
{
"epoch": 26.0,
"eval_accuracy": 0.7606208191526816,
"eval_loss": 1.111540675163269,
"eval_runtime": 7.1423,
"eval_samples_per_second": 62.165,
"eval_steps_per_second": 0.28,
"step": 702
},
{
"epoch": 27.0,
"learning_rate": 2e-05,
"loss": 1.2549,
"step": 729
},
{
"epoch": 27.0,
"eval_accuracy": 0.7517491504126567,
"eval_loss": 1.1640406847000122,
"eval_runtime": 7.113,
"eval_samples_per_second": 62.421,
"eval_steps_per_second": 0.281,
"step": 729
},
{
"epoch": 28.0,
"learning_rate": 2e-05,
"loss": 1.2287,
"step": 756
},
{
"epoch": 28.0,
"eval_accuracy": 0.7552987090963674,
"eval_loss": 1.1474734544754028,
"eval_runtime": 7.171,
"eval_samples_per_second": 61.916,
"eval_steps_per_second": 0.279,
"step": 756
},
{
"epoch": 29.0,
"learning_rate": 2e-05,
"loss": 1.1967,
"step": 783
},
{
"epoch": 29.0,
"eval_accuracy": 0.7647311090144953,
"eval_loss": 1.0948566198349,
"eval_runtime": 7.1733,
"eval_samples_per_second": 61.896,
"eval_steps_per_second": 0.279,
"step": 783
},
{
"epoch": 30.0,
"learning_rate": 2e-05,
"loss": 1.1938,
"step": 810
},
{
"epoch": 30.0,
"eval_accuracy": 0.7628133294013565,
"eval_loss": 1.104235291481018,
"eval_runtime": 7.2106,
"eval_samples_per_second": 61.576,
"eval_steps_per_second": 0.277,
"step": 810
},
{
"epoch": 31.0,
"learning_rate": 2e-05,
"loss": 1.1831,
"step": 837
},
{
"epoch": 31.0,
"eval_accuracy": 0.7568993506493507,
"eval_loss": 1.1557021141052246,
"eval_runtime": 7.147,
"eval_samples_per_second": 62.124,
"eval_steps_per_second": 0.28,
"step": 837
},
{
"epoch": 32.0,
"learning_rate": 2e-05,
"loss": 1.1783,
"step": 864
},
{
"epoch": 32.0,
"eval_accuracy": 0.7663818512012861,
"eval_loss": 1.0878251791000366,
"eval_runtime": 7.2016,
"eval_samples_per_second": 61.653,
"eval_steps_per_second": 0.278,
"step": 864
},
{
"epoch": 33.0,
"learning_rate": 2e-05,
"loss": 1.1571,
"step": 891
},
{
"epoch": 33.0,
"eval_accuracy": 0.7626662971175167,
"eval_loss": 1.1019645929336548,
"eval_runtime": 7.115,
"eval_samples_per_second": 62.403,
"eval_steps_per_second": 0.281,
"step": 891
},
{
"epoch": 34.0,
"learning_rate": 2e-05,
"loss": 1.1511,
"step": 918
},
{
"epoch": 34.0,
"eval_accuracy": 0.7707477642809286,
"eval_loss": 1.0570372343063354,
"eval_runtime": 7.1537,
"eval_samples_per_second": 62.066,
"eval_steps_per_second": 0.28,
"step": 918
},
{
"epoch": 35.0,
"learning_rate": 2e-05,
"loss": 1.1332,
"step": 945
},
{
"epoch": 35.0,
"eval_accuracy": 0.7779759669545625,
"eval_loss": 1.0286684036254883,
"eval_runtime": 7.1266,
"eval_samples_per_second": 62.301,
"eval_steps_per_second": 0.281,
"step": 945
},
{
"epoch": 36.0,
"learning_rate": 2e-05,
"loss": 1.1343,
"step": 972
},
{
"epoch": 36.0,
"eval_accuracy": 0.7709718033554885,
"eval_loss": 1.0590564012527466,
"eval_runtime": 7.1932,
"eval_samples_per_second": 61.725,
"eval_steps_per_second": 0.278,
"step": 972
},
{
"epoch": 37.0,
"learning_rate": 2e-05,
"loss": 1.1164,
"step": 999
},
{
"epoch": 37.0,
"eval_accuracy": 0.7806313208703647,
"eval_loss": 1.0081170797348022,
"eval_runtime": 7.1199,
"eval_samples_per_second": 62.36,
"eval_steps_per_second": 0.281,
"step": 999
}
],
"max_steps": 1080,
"num_train_epochs": 40,
"total_flos": 118496962805760.0,
"trial_name": null,
"trial_params": null
}