bert-base-cased-sclarge / trainer_state.json
ZongqianLi's picture
Upload 130 files
7d40fe7
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 35.0,
"global_step": 33600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 1.535132032339264e-05,
"loss": 2.3734,
"step": 960
},
{
"epoch": 1.0,
"eval_accuracy": 0.6711076941043658,
"eval_loss": 1.6856393814086914,
"eval_runtime": 42.9945,
"eval_samples_per_second": 176.86,
"eval_steps_per_second": 1.116,
"step": 960
},
{
"epoch": 2.0,
"learning_rate": 1.6900880215595094e-05,
"loss": 1.5002,
"step": 1920
},
{
"epoch": 2.0,
"eval_accuracy": 0.6930295365255809,
"eval_loss": 1.5316802263259888,
"eval_runtime": 40.2605,
"eval_samples_per_second": 188.87,
"eval_steps_per_second": 1.192,
"step": 1920
},
{
"epoch": 3.0,
"learning_rate": 1.7807314645155048e-05,
"loss": 1.3682,
"step": 2880
},
{
"epoch": 3.0,
"eval_accuracy": 0.7001060409279067,
"eval_loss": 1.4794470071792603,
"eval_runtime": 40.3426,
"eval_samples_per_second": 188.486,
"eval_steps_per_second": 1.19,
"step": 2880
},
{
"epoch": 4.0,
"learning_rate": 1.8450440107797548e-05,
"loss": 1.3057,
"step": 3840
},
{
"epoch": 4.0,
"eval_accuracy": 0.7058089394925496,
"eval_loss": 1.445176362991333,
"eval_runtime": 40.2825,
"eval_samples_per_second": 188.767,
"eval_steps_per_second": 1.192,
"step": 3840
},
{
"epoch": 5.0,
"learning_rate": 1.894928697180815e-05,
"loss": 1.2652,
"step": 4800
},
{
"epoch": 5.0,
"eval_accuracy": 0.7078205742901283,
"eval_loss": 1.4240751266479492,
"eval_runtime": 44.133,
"eval_samples_per_second": 172.297,
"eval_steps_per_second": 1.088,
"step": 4800
},
{
"epoch": 6.0,
"learning_rate": 1.93568745373575e-05,
"loss": 1.2347,
"step": 5760
},
{
"epoch": 6.0,
"eval_accuracy": 0.712788831055115,
"eval_loss": 1.393662452697754,
"eval_runtime": 40.438,
"eval_samples_per_second": 188.041,
"eval_steps_per_second": 1.187,
"step": 5760
},
{
"epoch": 7.0,
"learning_rate": 1.9701484913790247e-05,
"loss": 1.2117,
"step": 6720
},
{
"epoch": 7.0,
"eval_accuracy": 0.7158340831339961,
"eval_loss": 1.3783458471298218,
"eval_runtime": 47.0316,
"eval_samples_per_second": 161.679,
"eval_steps_per_second": 1.021,
"step": 6720
},
{
"epoch": 8.0,
"learning_rate": 2e-05,
"loss": 1.1863,
"step": 7680
},
{
"epoch": 8.0,
"eval_accuracy": 0.7177737277852768,
"eval_loss": 1.356780767440796,
"eval_runtime": 40.2772,
"eval_samples_per_second": 188.791,
"eval_steps_per_second": 1.192,
"step": 7680
},
{
"epoch": 9.0,
"learning_rate": 2e-05,
"loss": 1.167,
"step": 8640
},
{
"epoch": 9.0,
"eval_accuracy": 0.7172315754804711,
"eval_loss": 1.362362265586853,
"eval_runtime": 40.3662,
"eval_samples_per_second": 188.375,
"eval_steps_per_second": 1.189,
"step": 8640
},
{
"epoch": 10.0,
"learning_rate": 2e-05,
"loss": 1.1528,
"step": 9600
},
{
"epoch": 10.0,
"eval_accuracy": 0.7208890541082557,
"eval_loss": 1.3375591039657593,
"eval_runtime": 40.4301,
"eval_samples_per_second": 188.077,
"eval_steps_per_second": 1.187,
"step": 9600
},
{
"epoch": 11.0,
"learning_rate": 2e-05,
"loss": 1.1403,
"step": 10560
},
{
"epoch": 11.0,
"eval_accuracy": 0.722637380785269,
"eval_loss": 1.3316693305969238,
"eval_runtime": 40.2778,
"eval_samples_per_second": 188.789,
"eval_steps_per_second": 1.192,
"step": 10560
},
{
"epoch": 12.0,
"learning_rate": 2e-05,
"loss": 1.1276,
"step": 11520
},
{
"epoch": 12.0,
"eval_accuracy": 0.7243298395325108,
"eval_loss": 1.3127739429473877,
"eval_runtime": 40.1949,
"eval_samples_per_second": 189.178,
"eval_steps_per_second": 1.194,
"step": 11520
},
{
"epoch": 13.0,
"learning_rate": 2e-05,
"loss": 1.1176,
"step": 12480
},
{
"epoch": 13.0,
"eval_accuracy": 0.7242034818873964,
"eval_loss": 1.3149378299713135,
"eval_runtime": 40.5071,
"eval_samples_per_second": 187.72,
"eval_steps_per_second": 1.185,
"step": 12480
},
{
"epoch": 14.0,
"learning_rate": 2e-05,
"loss": 1.1061,
"step": 13440
},
{
"epoch": 14.0,
"eval_accuracy": 0.7251281499452183,
"eval_loss": 1.3011534214019775,
"eval_runtime": 41.5767,
"eval_samples_per_second": 182.891,
"eval_steps_per_second": 1.154,
"step": 13440
},
{
"epoch": 15.0,
"learning_rate": 2e-05,
"loss": 1.0953,
"step": 14400
},
{
"epoch": 15.0,
"eval_accuracy": 0.7274233697774307,
"eval_loss": 1.2953981161117554,
"eval_runtime": 40.2998,
"eval_samples_per_second": 188.686,
"eval_steps_per_second": 1.191,
"step": 14400
},
{
"epoch": 16.0,
"learning_rate": 2e-05,
"loss": 1.0872,
"step": 15360
},
{
"epoch": 16.0,
"eval_accuracy": 0.729807233922918,
"eval_loss": 1.2836934328079224,
"eval_runtime": 40.3358,
"eval_samples_per_second": 188.517,
"eval_steps_per_second": 1.19,
"step": 15360
},
{
"epoch": 17.0,
"learning_rate": 2e-05,
"loss": 1.0778,
"step": 16320
},
{
"epoch": 17.0,
"eval_accuracy": 0.728892083446406,
"eval_loss": 1.2819887399673462,
"eval_runtime": 40.2817,
"eval_samples_per_second": 188.77,
"eval_steps_per_second": 1.192,
"step": 16320
},
{
"epoch": 18.0,
"learning_rate": 2e-05,
"loss": 1.0709,
"step": 17280
},
{
"epoch": 18.0,
"eval_accuracy": 0.7314453291503982,
"eval_loss": 1.2700670957565308,
"eval_runtime": 40.2932,
"eval_samples_per_second": 188.717,
"eval_steps_per_second": 1.191,
"step": 17280
},
{
"epoch": 19.0,
"learning_rate": 2e-05,
"loss": 1.0629,
"step": 18240
},
{
"epoch": 19.0,
"eval_accuracy": 0.7313673197000715,
"eval_loss": 1.2694642543792725,
"eval_runtime": 40.2951,
"eval_samples_per_second": 188.708,
"eval_steps_per_second": 1.191,
"step": 18240
},
{
"epoch": 20.0,
"learning_rate": 2e-05,
"loss": 1.0575,
"step": 19200
},
{
"epoch": 20.0,
"eval_accuracy": 0.7321188555482103,
"eval_loss": 1.269392490386963,
"eval_runtime": 40.9309,
"eval_samples_per_second": 185.776,
"eval_steps_per_second": 1.173,
"step": 19200
},
{
"epoch": 21.0,
"learning_rate": 2e-05,
"loss": 1.0494,
"step": 20160
},
{
"epoch": 21.0,
"eval_accuracy": 0.7336233174474587,
"eval_loss": 1.2491707801818848,
"eval_runtime": 41.3244,
"eval_samples_per_second": 184.007,
"eval_steps_per_second": 1.162,
"step": 20160
},
{
"epoch": 22.0,
"learning_rate": 2e-05,
"loss": 1.0443,
"step": 21120
},
{
"epoch": 22.0,
"eval_accuracy": 0.7333931208601605,
"eval_loss": 1.2573738098144531,
"eval_runtime": 40.4041,
"eval_samples_per_second": 188.199,
"eval_steps_per_second": 1.188,
"step": 21120
},
{
"epoch": 23.0,
"learning_rate": 2e-05,
"loss": 1.0375,
"step": 22080
},
{
"epoch": 23.0,
"eval_accuracy": 0.7354173520930503,
"eval_loss": 1.2430847883224487,
"eval_runtime": 40.4071,
"eval_samples_per_second": 188.185,
"eval_steps_per_second": 1.188,
"step": 22080
},
{
"epoch": 24.0,
"learning_rate": 2e-05,
"loss": 1.0332,
"step": 23040
},
{
"epoch": 24.0,
"eval_accuracy": 0.7351443165552266,
"eval_loss": 1.240692377090454,
"eval_runtime": 40.2279,
"eval_samples_per_second": 189.023,
"eval_steps_per_second": 1.193,
"step": 23040
},
{
"epoch": 25.0,
"learning_rate": 2e-05,
"loss": 1.0279,
"step": 24000
},
{
"epoch": 25.0,
"eval_accuracy": 0.7350845404729578,
"eval_loss": 1.2445788383483887,
"eval_runtime": 40.3432,
"eval_samples_per_second": 188.483,
"eval_steps_per_second": 1.19,
"step": 24000
},
{
"epoch": 26.0,
"learning_rate": 2e-05,
"loss": 1.0233,
"step": 24960
},
{
"epoch": 26.0,
"eval_accuracy": 0.7361292168770468,
"eval_loss": 1.2367281913757324,
"eval_runtime": 40.3061,
"eval_samples_per_second": 188.657,
"eval_steps_per_second": 1.191,
"step": 24960
},
{
"epoch": 27.0,
"learning_rate": 2e-05,
"loss": 1.018,
"step": 25920
},
{
"epoch": 27.0,
"eval_accuracy": 0.7351869592986123,
"eval_loss": 1.2435057163238525,
"eval_runtime": 41.4116,
"eval_samples_per_second": 183.62,
"eval_steps_per_second": 1.159,
"step": 25920
},
{
"epoch": 28.0,
"learning_rate": 2e-05,
"loss": 1.0128,
"step": 26880
},
{
"epoch": 28.0,
"eval_accuracy": 0.7379444038039502,
"eval_loss": 1.2293747663497925,
"eval_runtime": 42.7481,
"eval_samples_per_second": 177.879,
"eval_steps_per_second": 1.123,
"step": 26880
},
{
"epoch": 29.0,
"learning_rate": 2e-05,
"loss": 1.008,
"step": 27840
},
{
"epoch": 29.0,
"eval_accuracy": 0.7381460506414618,
"eval_loss": 1.224423885345459,
"eval_runtime": 40.3086,
"eval_samples_per_second": 188.645,
"eval_steps_per_second": 1.191,
"step": 27840
},
{
"epoch": 30.0,
"learning_rate": 2e-05,
"loss": 1.0036,
"step": 28800
},
{
"epoch": 30.0,
"eval_accuracy": 0.7393030501323384,
"eval_loss": 1.2178620100021362,
"eval_runtime": 40.2497,
"eval_samples_per_second": 188.921,
"eval_steps_per_second": 1.193,
"step": 28800
},
{
"epoch": 31.0,
"learning_rate": 2e-05,
"loss": 0.9997,
"step": 29760
},
{
"epoch": 31.0,
"eval_accuracy": 0.7388722666381714,
"eval_loss": 1.2249476909637451,
"eval_runtime": 40.2623,
"eval_samples_per_second": 188.862,
"eval_steps_per_second": 1.192,
"step": 29760
},
{
"epoch": 32.0,
"learning_rate": 2e-05,
"loss": 0.9969,
"step": 30720
},
{
"epoch": 32.0,
"eval_accuracy": 0.7389634850823794,
"eval_loss": 1.2235573530197144,
"eval_runtime": 40.3447,
"eval_samples_per_second": 188.476,
"eval_steps_per_second": 1.19,
"step": 30720
},
{
"epoch": 33.0,
"learning_rate": 2e-05,
"loss": 0.992,
"step": 31680
},
{
"epoch": 33.0,
"eval_accuracy": 0.7388708814628272,
"eval_loss": 1.217455506324768,
"eval_runtime": 42.7026,
"eval_samples_per_second": 178.069,
"eval_steps_per_second": 1.124,
"step": 31680
},
{
"epoch": 34.0,
"learning_rate": 2e-05,
"loss": 0.988,
"step": 32640
},
{
"epoch": 34.0,
"eval_accuracy": 0.7401730933519727,
"eval_loss": 1.2093894481658936,
"eval_runtime": 40.7008,
"eval_samples_per_second": 186.827,
"eval_steps_per_second": 1.179,
"step": 32640
},
{
"epoch": 35.0,
"learning_rate": 2e-05,
"loss": 0.9836,
"step": 33600
},
{
"epoch": 35.0,
"eval_accuracy": 0.7400202510644008,
"eval_loss": 1.208998203277588,
"eval_runtime": 40.6904,
"eval_samples_per_second": 186.874,
"eval_steps_per_second": 1.18,
"step": 33600
}
],
"max_steps": 38400,
"num_train_epochs": 40,
"total_flos": 2041828249436160.0,
"trial_name": null,
"trial_params": null
}