bert-large-uncased-sclarge / trainer_state.json
ZongqianLi's picture
Upload 130 files
45c9723
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"global_step": 19200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 1.535132032339264e-05,
"loss": 1.5294,
"step": 960
},
{
"epoch": 1.0,
"eval_accuracy": 0.6997061800943186,
"eval_loss": 1.505336046218872,
"eval_runtime": 59.2609,
"eval_samples_per_second": 128.314,
"eval_steps_per_second": 0.81,
"step": 960
},
{
"epoch": 2.0,
"learning_rate": 1.6900880215595094e-05,
"loss": 1.3178,
"step": 1920
},
{
"epoch": 2.0,
"eval_accuracy": 0.7119164174032121,
"eval_loss": 1.4108233451843262,
"eval_runtime": 57.8828,
"eval_samples_per_second": 131.369,
"eval_steps_per_second": 0.829,
"step": 1920
},
{
"epoch": 3.0,
"learning_rate": 1.7807314645155048e-05,
"loss": 1.2464,
"step": 2880
},
{
"epoch": 3.0,
"eval_accuracy": 0.716069757803329,
"eval_loss": 1.3826147317886353,
"eval_runtime": 57.5,
"eval_samples_per_second": 132.244,
"eval_steps_per_second": 0.835,
"step": 2880
},
{
"epoch": 4.0,
"learning_rate": 1.8450440107797548e-05,
"loss": 1.2004,
"step": 3840
},
{
"epoch": 4.0,
"eval_accuracy": 0.7212539141360841,
"eval_loss": 1.3423837423324585,
"eval_runtime": 57.7225,
"eval_samples_per_second": 131.734,
"eval_steps_per_second": 0.832,
"step": 3840
},
{
"epoch": 5.0,
"learning_rate": 1.894928697180815e-05,
"loss": 1.1671,
"step": 4800
},
{
"epoch": 5.0,
"eval_accuracy": 0.723119868068725,
"eval_loss": 1.3292474746704102,
"eval_runtime": 57.4277,
"eval_samples_per_second": 132.41,
"eval_steps_per_second": 0.836,
"step": 4800
},
{
"epoch": 6.0,
"learning_rate": 1.93568745373575e-05,
"loss": 1.1405,
"step": 5760
},
{
"epoch": 6.0,
"eval_accuracy": 0.7279360245602308,
"eval_loss": 1.3002619743347168,
"eval_runtime": 57.5343,
"eval_samples_per_second": 132.165,
"eval_steps_per_second": 0.834,
"step": 5760
},
{
"epoch": 7.0,
"learning_rate": 1.9701484913790247e-05,
"loss": 1.1195,
"step": 6720
},
{
"epoch": 7.0,
"eval_accuracy": 0.7286793766001343,
"eval_loss": 1.2911103963851929,
"eval_runtime": 57.3971,
"eval_samples_per_second": 132.481,
"eval_steps_per_second": 0.836,
"step": 6720
},
{
"epoch": 8.0,
"learning_rate": 2e-05,
"loss": 1.0984,
"step": 7680
},
{
"epoch": 8.0,
"eval_accuracy": 0.731524743026423,
"eval_loss": 1.2744665145874023,
"eval_runtime": 58.0408,
"eval_samples_per_second": 131.011,
"eval_steps_per_second": 0.827,
"step": 7680
},
{
"epoch": 9.0,
"learning_rate": 2e-05,
"loss": 1.0802,
"step": 8640
},
{
"epoch": 9.0,
"eval_accuracy": 0.7321408265476137,
"eval_loss": 1.2718857526779175,
"eval_runtime": 57.237,
"eval_samples_per_second": 132.851,
"eval_steps_per_second": 0.839,
"step": 8640
},
{
"epoch": 10.0,
"learning_rate": 2e-05,
"loss": 1.0678,
"step": 9600
},
{
"epoch": 10.0,
"eval_accuracy": 0.7324240391543068,
"eval_loss": 1.261126160621643,
"eval_runtime": 57.4079,
"eval_samples_per_second": 132.456,
"eval_steps_per_second": 0.836,
"step": 9600
},
{
"epoch": 11.0,
"learning_rate": 2e-05,
"loss": 1.055,
"step": 10560
},
{
"epoch": 11.0,
"eval_accuracy": 0.7351396479197737,
"eval_loss": 1.2426605224609375,
"eval_runtime": 57.4037,
"eval_samples_per_second": 132.465,
"eval_steps_per_second": 0.836,
"step": 10560
},
{
"epoch": 12.0,
"learning_rate": 2e-05,
"loss": 1.0426,
"step": 11520
},
{
"epoch": 12.0,
"eval_accuracy": 0.738889192995377,
"eval_loss": 1.226466417312622,
"eval_runtime": 57.6438,
"eval_samples_per_second": 131.914,
"eval_steps_per_second": 0.833,
"step": 11520
},
{
"epoch": 13.0,
"learning_rate": 2e-05,
"loss": 1.0329,
"step": 12480
},
{
"epoch": 13.0,
"eval_accuracy": 0.7372212557134691,
"eval_loss": 1.234350323677063,
"eval_runtime": 57.4326,
"eval_samples_per_second": 132.399,
"eval_steps_per_second": 0.836,
"step": 12480
},
{
"epoch": 14.0,
"learning_rate": 2e-05,
"loss": 1.0211,
"step": 13440
},
{
"epoch": 14.0,
"eval_accuracy": 0.7388404872505433,
"eval_loss": 1.2182461023330688,
"eval_runtime": 57.2924,
"eval_samples_per_second": 132.723,
"eval_steps_per_second": 0.838,
"step": 13440
},
{
"epoch": 15.0,
"learning_rate": 2e-05,
"loss": 1.0118,
"step": 14400
},
{
"epoch": 15.0,
"eval_accuracy": 0.7408094590258935,
"eval_loss": 1.214383602142334,
"eval_runtime": 62.5016,
"eval_samples_per_second": 121.661,
"eval_steps_per_second": 0.768,
"step": 14400
},
{
"epoch": 16.0,
"learning_rate": 2e-05,
"loss": 1.0038,
"step": 15360
},
{
"epoch": 16.0,
"eval_accuracy": 0.740329327948473,
"eval_loss": 1.2094308137893677,
"eval_runtime": 57.7614,
"eval_samples_per_second": 131.645,
"eval_steps_per_second": 0.831,
"step": 15360
},
{
"epoch": 17.0,
"learning_rate": 2e-05,
"loss": 0.9952,
"step": 16320
},
{
"epoch": 17.0,
"eval_accuracy": 0.7427147172532104,
"eval_loss": 1.1963080167770386,
"eval_runtime": 57.4115,
"eval_samples_per_second": 132.447,
"eval_steps_per_second": 0.836,
"step": 16320
},
{
"epoch": 18.0,
"learning_rate": 2e-05,
"loss": 0.988,
"step": 17280
},
{
"epoch": 18.0,
"eval_accuracy": 0.7429841884959819,
"eval_loss": 1.1982266902923584,
"eval_runtime": 57.4423,
"eval_samples_per_second": 132.376,
"eval_steps_per_second": 0.836,
"step": 17280
},
{
"epoch": 19.0,
"learning_rate": 2e-05,
"loss": 0.9808,
"step": 18240
},
{
"epoch": 19.0,
"eval_accuracy": 0.7437883910645381,
"eval_loss": 1.1920512914657593,
"eval_runtime": 57.5877,
"eval_samples_per_second": 132.042,
"eval_steps_per_second": 0.834,
"step": 18240
},
{
"epoch": 20.0,
"learning_rate": 2e-05,
"loss": 0.9747,
"step": 19200
},
{
"epoch": 20.0,
"eval_accuracy": 0.7435962484051245,
"eval_loss": 1.1889218091964722,
"eval_runtime": 57.672,
"eval_samples_per_second": 131.849,
"eval_steps_per_second": 0.832,
"step": 19200
}
],
"max_steps": 38400,
"num_train_epochs": 40,
"total_flos": 3062931751174144.0,
"trial_name": null,
"trial_params": null
}