|
{
|
|
"best_metric": 0.7818181818181819,
|
|
"best_model_checkpoint": "resnet-50-finetuned\\checkpoint-1190",
|
|
"epoch": 100.0,
|
|
"global_step": 1400,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.5714285714285714e-06,
|
|
"loss": 1.7972,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"eval_accuracy": 0.11363636363636363,
|
|
"eval_loss": 1.800904393196106,
|
|
"eval_runtime": 5.5218,
|
|
"eval_samples_per_second": 79.685,
|
|
"eval_steps_per_second": 1.268,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 7.142857142857143e-06,
|
|
"loss": 1.7991,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"eval_accuracy": 0.16136363636363638,
|
|
"eval_loss": 1.7934340238571167,
|
|
"eval_runtime": 3.2902,
|
|
"eval_samples_per_second": 133.73,
|
|
"eval_steps_per_second": 2.128,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 1.0714285714285714e-05,
|
|
"loss": 1.7925,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 1.4285714285714285e-05,
|
|
"loss": 1.7871,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"eval_accuracy": 0.2,
|
|
"eval_loss": 1.78143310546875,
|
|
"eval_runtime": 3.2024,
|
|
"eval_samples_per_second": 137.395,
|
|
"eval_steps_per_second": 2.186,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 3.57,
|
|
"learning_rate": 1.785714285714286e-05,
|
|
"loss": 1.7813,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"eval_accuracy": 0.20909090909090908,
|
|
"eval_loss": 1.7663880586624146,
|
|
"eval_runtime": 3.3082,
|
|
"eval_samples_per_second": 133.005,
|
|
"eval_steps_per_second": 2.116,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 4.29,
|
|
"learning_rate": 2.1428571428571428e-05,
|
|
"loss": 1.7673,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"learning_rate": 2.5e-05,
|
|
"loss": 1.7602,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"eval_accuracy": 0.2772727272727273,
|
|
"eval_loss": 1.7464295625686646,
|
|
"eval_runtime": 3.3301,
|
|
"eval_samples_per_second": 132.128,
|
|
"eval_steps_per_second": 2.102,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 5.71,
|
|
"learning_rate": 2.857142857142857e-05,
|
|
"loss": 1.7455,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"eval_accuracy": 0.2840909090909091,
|
|
"eval_loss": 1.726538896560669,
|
|
"eval_runtime": 3.2579,
|
|
"eval_samples_per_second": 135.057,
|
|
"eval_steps_per_second": 2.149,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 6.43,
|
|
"learning_rate": 3.2142857142857144e-05,
|
|
"loss": 1.7321,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"eval_accuracy": 0.2727272727272727,
|
|
"eval_loss": 1.7056454420089722,
|
|
"eval_runtime": 3.2693,
|
|
"eval_samples_per_second": 134.587,
|
|
"eval_steps_per_second": 2.141,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 7.14,
|
|
"learning_rate": 3.571428571428572e-05,
|
|
"loss": 1.7215,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 7.86,
|
|
"learning_rate": 3.928571428571429e-05,
|
|
"loss": 1.706,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"eval_accuracy": 0.3068181818181818,
|
|
"eval_loss": 1.6843111515045166,
|
|
"eval_runtime": 3.2827,
|
|
"eval_samples_per_second": 134.038,
|
|
"eval_steps_per_second": 2.132,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 8.57,
|
|
"learning_rate": 4.2857142857142856e-05,
|
|
"loss": 1.6891,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 9.0,
|
|
"eval_accuracy": 0.325,
|
|
"eval_loss": 1.6573911905288696,
|
|
"eval_runtime": 3.1775,
|
|
"eval_samples_per_second": 138.473,
|
|
"eval_steps_per_second": 2.203,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 9.29,
|
|
"learning_rate": 4.642857142857143e-05,
|
|
"loss": 1.6751,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 10.0,
|
|
"learning_rate": 5e-05,
|
|
"loss": 1.6524,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 10.0,
|
|
"eval_accuracy": 0.3090909090909091,
|
|
"eval_loss": 1.6302080154418945,
|
|
"eval_runtime": 3.3459,
|
|
"eval_samples_per_second": 131.503,
|
|
"eval_steps_per_second": 2.092,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 10.71,
|
|
"learning_rate": 4.960317460317461e-05,
|
|
"loss": 1.6348,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 11.0,
|
|
"eval_accuracy": 0.3613636363636364,
|
|
"eval_loss": 1.5982272624969482,
|
|
"eval_runtime": 3.2269,
|
|
"eval_samples_per_second": 136.356,
|
|
"eval_steps_per_second": 2.169,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 11.43,
|
|
"learning_rate": 4.9206349206349204e-05,
|
|
"loss": 1.611,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"eval_accuracy": 0.39545454545454545,
|
|
"eval_loss": 1.556486964225769,
|
|
"eval_runtime": 3.2229,
|
|
"eval_samples_per_second": 136.521,
|
|
"eval_steps_per_second": 2.172,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 12.14,
|
|
"learning_rate": 4.880952380952381e-05,
|
|
"loss": 1.5891,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 12.86,
|
|
"learning_rate": 4.841269841269841e-05,
|
|
"loss": 1.5673,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 13.0,
|
|
"eval_accuracy": 0.41363636363636364,
|
|
"eval_loss": 1.516886591911316,
|
|
"eval_runtime": 3.2733,
|
|
"eval_samples_per_second": 134.423,
|
|
"eval_steps_per_second": 2.139,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 13.57,
|
|
"learning_rate": 4.801587301587302e-05,
|
|
"loss": 1.5239,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 14.0,
|
|
"eval_accuracy": 0.509090909090909,
|
|
"eval_loss": 1.4703104496002197,
|
|
"eval_runtime": 3.2812,
|
|
"eval_samples_per_second": 134.096,
|
|
"eval_steps_per_second": 2.133,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 14.29,
|
|
"learning_rate": 4.761904761904762e-05,
|
|
"loss": 1.5131,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 15.0,
|
|
"learning_rate": 4.722222222222222e-05,
|
|
"loss": 1.4698,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 15.0,
|
|
"eval_accuracy": 0.5363636363636364,
|
|
"eval_loss": 1.4071372747421265,
|
|
"eval_runtime": 3.3595,
|
|
"eval_samples_per_second": 130.973,
|
|
"eval_steps_per_second": 2.084,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 15.71,
|
|
"learning_rate": 4.682539682539683e-05,
|
|
"loss": 1.4277,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 16.0,
|
|
"eval_accuracy": 0.5886363636363636,
|
|
"eval_loss": 1.3305206298828125,
|
|
"eval_runtime": 3.3151,
|
|
"eval_samples_per_second": 132.725,
|
|
"eval_steps_per_second": 2.112,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 16.43,
|
|
"learning_rate": 4.642857142857143e-05,
|
|
"loss": 1.3798,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 17.0,
|
|
"eval_accuracy": 0.6068181818181818,
|
|
"eval_loss": 1.250695824623108,
|
|
"eval_runtime": 3.348,
|
|
"eval_samples_per_second": 131.42,
|
|
"eval_steps_per_second": 2.091,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 17.14,
|
|
"learning_rate": 4.603174603174603e-05,
|
|
"loss": 1.355,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 17.86,
|
|
"learning_rate": 4.563492063492064e-05,
|
|
"loss": 1.2858,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 18.0,
|
|
"eval_accuracy": 0.6409090909090909,
|
|
"eval_loss": 1.1516846418380737,
|
|
"eval_runtime": 3.35,
|
|
"eval_samples_per_second": 131.342,
|
|
"eval_steps_per_second": 2.09,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 18.57,
|
|
"learning_rate": 4.523809523809524e-05,
|
|
"loss": 1.2543,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 19.0,
|
|
"eval_accuracy": 0.6659090909090909,
|
|
"eval_loss": 1.0930873155593872,
|
|
"eval_runtime": 3.4597,
|
|
"eval_samples_per_second": 127.177,
|
|
"eval_steps_per_second": 2.023,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 19.29,
|
|
"learning_rate": 4.4841269841269846e-05,
|
|
"loss": 1.2006,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"learning_rate": 4.4444444444444447e-05,
|
|
"loss": 1.1696,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"eval_accuracy": 0.675,
|
|
"eval_loss": 1.012363314628601,
|
|
"eval_runtime": 3.3077,
|
|
"eval_samples_per_second": 133.023,
|
|
"eval_steps_per_second": 2.116,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 20.71,
|
|
"learning_rate": 4.404761904761905e-05,
|
|
"loss": 1.1124,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 21.0,
|
|
"eval_accuracy": 0.6931818181818182,
|
|
"eval_loss": 0.954047679901123,
|
|
"eval_runtime": 3.3411,
|
|
"eval_samples_per_second": 131.693,
|
|
"eval_steps_per_second": 2.095,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 21.43,
|
|
"learning_rate": 4.3650793650793655e-05,
|
|
"loss": 1.059,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 22.0,
|
|
"eval_accuracy": 0.7022727272727273,
|
|
"eval_loss": 0.9010851979255676,
|
|
"eval_runtime": 3.6911,
|
|
"eval_samples_per_second": 119.205,
|
|
"eval_steps_per_second": 1.896,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 22.14,
|
|
"learning_rate": 4.3253968253968256e-05,
|
|
"loss": 1.0318,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 22.86,
|
|
"learning_rate": 4.2857142857142856e-05,
|
|
"loss": 1.0006,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 23.0,
|
|
"eval_accuracy": 0.7090909090909091,
|
|
"eval_loss": 0.8692610859870911,
|
|
"eval_runtime": 3.4254,
|
|
"eval_samples_per_second": 128.451,
|
|
"eval_steps_per_second": 2.044,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 23.57,
|
|
"learning_rate": 4.2460317460317464e-05,
|
|
"loss": 0.9606,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 24.0,
|
|
"eval_accuracy": 0.6977272727272728,
|
|
"eval_loss": 0.8458234667778015,
|
|
"eval_runtime": 3.7789,
|
|
"eval_samples_per_second": 116.436,
|
|
"eval_steps_per_second": 1.852,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 24.29,
|
|
"learning_rate": 4.2063492063492065e-05,
|
|
"loss": 0.9541,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 25.0,
|
|
"learning_rate": 4.166666666666667e-05,
|
|
"loss": 0.9146,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 25.0,
|
|
"eval_accuracy": 0.7227272727272728,
|
|
"eval_loss": 0.8226051926612854,
|
|
"eval_runtime": 3.375,
|
|
"eval_samples_per_second": 130.371,
|
|
"eval_steps_per_second": 2.074,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 25.71,
|
|
"learning_rate": 4.126984126984127e-05,
|
|
"loss": 0.8864,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 26.0,
|
|
"eval_accuracy": 0.7159090909090909,
|
|
"eval_loss": 0.8187159895896912,
|
|
"eval_runtime": 3.4251,
|
|
"eval_samples_per_second": 128.462,
|
|
"eval_steps_per_second": 2.044,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 26.43,
|
|
"learning_rate": 4.0873015873015874e-05,
|
|
"loss": 0.8864,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 27.0,
|
|
"eval_accuracy": 0.7227272727272728,
|
|
"eval_loss": 0.7862333655357361,
|
|
"eval_runtime": 3.3921,
|
|
"eval_samples_per_second": 129.714,
|
|
"eval_steps_per_second": 2.064,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 27.14,
|
|
"learning_rate": 4.047619047619048e-05,
|
|
"loss": 0.8447,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 27.86,
|
|
"learning_rate": 4.007936507936508e-05,
|
|
"loss": 0.8458,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 28.0,
|
|
"eval_accuracy": 0.7295454545454545,
|
|
"eval_loss": 0.7686573266983032,
|
|
"eval_runtime": 3.4126,
|
|
"eval_samples_per_second": 128.935,
|
|
"eval_steps_per_second": 2.051,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 28.57,
|
|
"learning_rate": 3.968253968253968e-05,
|
|
"loss": 0.8337,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 29.0,
|
|
"eval_accuracy": 0.7272727272727273,
|
|
"eval_loss": 0.747409999370575,
|
|
"eval_runtime": 3.4047,
|
|
"eval_samples_per_second": 129.233,
|
|
"eval_steps_per_second": 2.056,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 29.29,
|
|
"learning_rate": 3.928571428571429e-05,
|
|
"loss": 0.8107,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 30.0,
|
|
"learning_rate": 3.888888888888889e-05,
|
|
"loss": 0.7943,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 30.0,
|
|
"eval_accuracy": 0.7454545454545455,
|
|
"eval_loss": 0.7368342876434326,
|
|
"eval_runtime": 3.4657,
|
|
"eval_samples_per_second": 126.957,
|
|
"eval_steps_per_second": 2.02,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 30.71,
|
|
"learning_rate": 3.84920634920635e-05,
|
|
"loss": 0.7884,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 31.0,
|
|
"eval_accuracy": 0.7454545454545455,
|
|
"eval_loss": 0.7331739068031311,
|
|
"eval_runtime": 3.4308,
|
|
"eval_samples_per_second": 128.249,
|
|
"eval_steps_per_second": 2.04,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 31.43,
|
|
"learning_rate": 3.809523809523809e-05,
|
|
"loss": 0.7495,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 32.0,
|
|
"eval_accuracy": 0.725,
|
|
"eval_loss": 0.7354092597961426,
|
|
"eval_runtime": 3.3899,
|
|
"eval_samples_per_second": 129.796,
|
|
"eval_steps_per_second": 2.065,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 32.14,
|
|
"learning_rate": 3.76984126984127e-05,
|
|
"loss": 0.78,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 32.86,
|
|
"learning_rate": 3.730158730158731e-05,
|
|
"loss": 0.7503,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 33.0,
|
|
"eval_accuracy": 0.7477272727272727,
|
|
"eval_loss": 0.7180919051170349,
|
|
"eval_runtime": 3.3351,
|
|
"eval_samples_per_second": 131.931,
|
|
"eval_steps_per_second": 2.099,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 33.57,
|
|
"learning_rate": 3.690476190476191e-05,
|
|
"loss": 0.7358,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 34.0,
|
|
"eval_accuracy": 0.7477272727272727,
|
|
"eval_loss": 0.7007948160171509,
|
|
"eval_runtime": 3.4029,
|
|
"eval_samples_per_second": 129.301,
|
|
"eval_steps_per_second": 2.057,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 34.29,
|
|
"learning_rate": 3.650793650793651e-05,
|
|
"loss": 0.7605,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 35.0,
|
|
"learning_rate": 3.611111111111111e-05,
|
|
"loss": 0.7229,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 35.0,
|
|
"eval_accuracy": 0.740909090909091,
|
|
"eval_loss": 0.6959581971168518,
|
|
"eval_runtime": 3.6574,
|
|
"eval_samples_per_second": 120.304,
|
|
"eval_steps_per_second": 1.914,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 35.71,
|
|
"learning_rate": 3.571428571428572e-05,
|
|
"loss": 0.7114,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 36.0,
|
|
"eval_accuracy": 0.7431818181818182,
|
|
"eval_loss": 0.6837091445922852,
|
|
"eval_runtime": 3.6041,
|
|
"eval_samples_per_second": 122.083,
|
|
"eval_steps_per_second": 1.942,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 36.43,
|
|
"learning_rate": 3.5317460317460324e-05,
|
|
"loss": 0.7075,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 37.0,
|
|
"eval_accuracy": 0.740909090909091,
|
|
"eval_loss": 0.6922788023948669,
|
|
"eval_runtime": 3.3849,
|
|
"eval_samples_per_second": 129.987,
|
|
"eval_steps_per_second": 2.068,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 37.14,
|
|
"learning_rate": 3.492063492063492e-05,
|
|
"loss": 0.6915,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 37.86,
|
|
"learning_rate": 3.4523809523809526e-05,
|
|
"loss": 0.682,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 38.0,
|
|
"eval_accuracy": 0.7431818181818182,
|
|
"eval_loss": 0.6930937767028809,
|
|
"eval_runtime": 3.4683,
|
|
"eval_samples_per_second": 126.864,
|
|
"eval_steps_per_second": 2.018,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 38.57,
|
|
"learning_rate": 3.412698412698413e-05,
|
|
"loss": 0.6626,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 39.0,
|
|
"eval_accuracy": 0.740909090909091,
|
|
"eval_loss": 0.6872156262397766,
|
|
"eval_runtime": 3.3949,
|
|
"eval_samples_per_second": 129.605,
|
|
"eval_steps_per_second": 2.062,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 39.29,
|
|
"learning_rate": 3.3730158730158734e-05,
|
|
"loss": 0.6708,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 40.0,
|
|
"learning_rate": 3.3333333333333335e-05,
|
|
"loss": 0.6688,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 40.0,
|
|
"eval_accuracy": 0.75,
|
|
"eval_loss": 0.6758830547332764,
|
|
"eval_runtime": 3.2304,
|
|
"eval_samples_per_second": 136.208,
|
|
"eval_steps_per_second": 2.167,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 40.71,
|
|
"learning_rate": 3.2936507936507936e-05,
|
|
"loss": 0.655,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 41.0,
|
|
"eval_accuracy": 0.7340909090909091,
|
|
"eval_loss": 0.7006358504295349,
|
|
"eval_runtime": 3.3002,
|
|
"eval_samples_per_second": 133.326,
|
|
"eval_steps_per_second": 2.121,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 41.43,
|
|
"learning_rate": 3.253968253968254e-05,
|
|
"loss": 0.6533,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 42.0,
|
|
"eval_accuracy": 0.75,
|
|
"eval_loss": 0.6774057149887085,
|
|
"eval_runtime": 3.351,
|
|
"eval_samples_per_second": 131.303,
|
|
"eval_steps_per_second": 2.089,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 42.14,
|
|
"learning_rate": 3.2142857142857144e-05,
|
|
"loss": 0.6488,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 42.86,
|
|
"learning_rate": 3.1746031746031745e-05,
|
|
"loss": 0.6215,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 43.0,
|
|
"eval_accuracy": 0.7454545454545455,
|
|
"eval_loss": 0.6663152575492859,
|
|
"eval_runtime": 3.4833,
|
|
"eval_samples_per_second": 126.317,
|
|
"eval_steps_per_second": 2.01,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 43.57,
|
|
"learning_rate": 3.134920634920635e-05,
|
|
"loss": 0.6267,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 44.0,
|
|
"eval_accuracy": 0.7545454545454545,
|
|
"eval_loss": 0.6608263850212097,
|
|
"eval_runtime": 3.375,
|
|
"eval_samples_per_second": 130.371,
|
|
"eval_steps_per_second": 2.074,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 44.29,
|
|
"learning_rate": 3.095238095238095e-05,
|
|
"loss": 0.6383,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 45.0,
|
|
"learning_rate": 3.055555555555556e-05,
|
|
"loss": 0.6122,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 45.0,
|
|
"eval_accuracy": 0.7477272727272727,
|
|
"eval_loss": 0.6523904800415039,
|
|
"eval_runtime": 3.3102,
|
|
"eval_samples_per_second": 132.923,
|
|
"eval_steps_per_second": 2.115,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 45.71,
|
|
"learning_rate": 3.0158730158730158e-05,
|
|
"loss": 0.6047,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 46.0,
|
|
"eval_accuracy": 0.7477272727272727,
|
|
"eval_loss": 0.6757220029830933,
|
|
"eval_runtime": 3.3012,
|
|
"eval_samples_per_second": 133.286,
|
|
"eval_steps_per_second": 2.12,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 46.43,
|
|
"learning_rate": 2.9761904761904762e-05,
|
|
"loss": 0.6135,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 47.0,
|
|
"eval_accuracy": 0.7431818181818182,
|
|
"eval_loss": 0.6492685675621033,
|
|
"eval_runtime": 3.4379,
|
|
"eval_samples_per_second": 127.986,
|
|
"eval_steps_per_second": 2.036,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 47.14,
|
|
"learning_rate": 2.9365079365079366e-05,
|
|
"loss": 0.6145,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 47.86,
|
|
"learning_rate": 2.8968253968253974e-05,
|
|
"loss": 0.5805,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 48.0,
|
|
"eval_accuracy": 0.75,
|
|
"eval_loss": 0.6503338813781738,
|
|
"eval_runtime": 3.383,
|
|
"eval_samples_per_second": 130.064,
|
|
"eval_steps_per_second": 2.069,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 48.57,
|
|
"learning_rate": 2.857142857142857e-05,
|
|
"loss": 0.6124,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 49.0,
|
|
"eval_accuracy": 0.7454545454545455,
|
|
"eval_loss": 0.6626009941101074,
|
|
"eval_runtime": 3.5236,
|
|
"eval_samples_per_second": 124.873,
|
|
"eval_steps_per_second": 1.987,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 49.29,
|
|
"learning_rate": 2.8174603174603175e-05,
|
|
"loss": 0.6079,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 50.0,
|
|
"learning_rate": 2.777777777777778e-05,
|
|
"loss": 0.5826,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 50.0,
|
|
"eval_accuracy": 0.7522727272727273,
|
|
"eval_loss": 0.6511955857276917,
|
|
"eval_runtime": 3.2723,
|
|
"eval_samples_per_second": 134.464,
|
|
"eval_steps_per_second": 2.139,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 50.71,
|
|
"learning_rate": 2.7380952380952383e-05,
|
|
"loss": 0.5698,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 51.0,
|
|
"eval_accuracy": 0.7477272727272727,
|
|
"eval_loss": 0.6589775085449219,
|
|
"eval_runtime": 3.4253,
|
|
"eval_samples_per_second": 128.455,
|
|
"eval_steps_per_second": 2.044,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 51.43,
|
|
"learning_rate": 2.6984126984126984e-05,
|
|
"loss": 0.5721,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 52.0,
|
|
"eval_accuracy": 0.7454545454545455,
|
|
"eval_loss": 0.6558127403259277,
|
|
"eval_runtime": 3.2646,
|
|
"eval_samples_per_second": 134.777,
|
|
"eval_steps_per_second": 2.144,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 52.14,
|
|
"learning_rate": 2.6587301587301588e-05,
|
|
"loss": 0.5912,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 52.86,
|
|
"learning_rate": 2.6190476190476192e-05,
|
|
"loss": 0.5569,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 53.0,
|
|
"eval_accuracy": 0.7454545454545455,
|
|
"eval_loss": 0.6654322743415833,
|
|
"eval_runtime": 3.2932,
|
|
"eval_samples_per_second": 133.609,
|
|
"eval_steps_per_second": 2.126,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 53.57,
|
|
"learning_rate": 2.5793650793650796e-05,
|
|
"loss": 0.573,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 54.0,
|
|
"eval_accuracy": 0.7545454545454545,
|
|
"eval_loss": 0.636202335357666,
|
|
"eval_runtime": 3.3271,
|
|
"eval_samples_per_second": 132.247,
|
|
"eval_steps_per_second": 2.104,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 54.29,
|
|
"learning_rate": 2.5396825396825397e-05,
|
|
"loss": 0.5816,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 55.0,
|
|
"learning_rate": 2.5e-05,
|
|
"loss": 0.543,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 55.0,
|
|
"eval_accuracy": 0.7636363636363637,
|
|
"eval_loss": 0.6368376016616821,
|
|
"eval_runtime": 3.2054,
|
|
"eval_samples_per_second": 137.267,
|
|
"eval_steps_per_second": 2.184,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 55.71,
|
|
"learning_rate": 2.4603174603174602e-05,
|
|
"loss": 0.5494,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 56.0,
|
|
"eval_accuracy": 0.759090909090909,
|
|
"eval_loss": 0.6263792514801025,
|
|
"eval_runtime": 3.2663,
|
|
"eval_samples_per_second": 134.71,
|
|
"eval_steps_per_second": 2.143,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 56.43,
|
|
"learning_rate": 2.4206349206349206e-05,
|
|
"loss": 0.5598,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 57.0,
|
|
"eval_accuracy": 0.7681818181818182,
|
|
"eval_loss": 0.627074658870697,
|
|
"eval_runtime": 3.4193,
|
|
"eval_samples_per_second": 128.683,
|
|
"eval_steps_per_second": 2.047,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 57.14,
|
|
"learning_rate": 2.380952380952381e-05,
|
|
"loss": 0.5655,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 57.86,
|
|
"learning_rate": 2.3412698412698414e-05,
|
|
"loss": 0.5412,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 58.0,
|
|
"eval_accuracy": 0.7522727272727273,
|
|
"eval_loss": 0.6506673693656921,
|
|
"eval_runtime": 3.2684,
|
|
"eval_samples_per_second": 134.623,
|
|
"eval_steps_per_second": 2.142,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 58.57,
|
|
"learning_rate": 2.3015873015873015e-05,
|
|
"loss": 0.5512,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 59.0,
|
|
"eval_accuracy": 0.7568181818181818,
|
|
"eval_loss": 0.6446354985237122,
|
|
"eval_runtime": 3.3303,
|
|
"eval_samples_per_second": 132.119,
|
|
"eval_steps_per_second": 2.102,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 59.29,
|
|
"learning_rate": 2.261904761904762e-05,
|
|
"loss": 0.5217,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 60.0,
|
|
"learning_rate": 2.2222222222222223e-05,
|
|
"loss": 0.5504,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 60.0,
|
|
"eval_accuracy": 0.7727272727272727,
|
|
"eval_loss": 0.6208813190460205,
|
|
"eval_runtime": 3.2503,
|
|
"eval_samples_per_second": 135.372,
|
|
"eval_steps_per_second": 2.154,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 60.71,
|
|
"learning_rate": 2.1825396825396827e-05,
|
|
"loss": 0.5304,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 61.0,
|
|
"eval_accuracy": 0.7568181818181818,
|
|
"eval_loss": 0.6428102850914001,
|
|
"eval_runtime": 3.4103,
|
|
"eval_samples_per_second": 129.021,
|
|
"eval_steps_per_second": 2.053,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 61.43,
|
|
"learning_rate": 2.1428571428571428e-05,
|
|
"loss": 0.5238,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 62.0,
|
|
"eval_accuracy": 0.7613636363636364,
|
|
"eval_loss": 0.6323724985122681,
|
|
"eval_runtime": 3.2991,
|
|
"eval_samples_per_second": 133.368,
|
|
"eval_steps_per_second": 2.122,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 62.14,
|
|
"learning_rate": 2.1031746031746032e-05,
|
|
"loss": 0.5344,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 62.86,
|
|
"learning_rate": 2.0634920634920636e-05,
|
|
"loss": 0.5298,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 63.0,
|
|
"eval_accuracy": 0.7568181818181818,
|
|
"eval_loss": 0.6347512006759644,
|
|
"eval_runtime": 3.2796,
|
|
"eval_samples_per_second": 134.162,
|
|
"eval_steps_per_second": 2.134,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 63.57,
|
|
"learning_rate": 2.023809523809524e-05,
|
|
"loss": 0.4965,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 64.0,
|
|
"eval_accuracy": 0.7545454545454545,
|
|
"eval_loss": 0.6351690292358398,
|
|
"eval_runtime": 3.394,
|
|
"eval_samples_per_second": 129.639,
|
|
"eval_steps_per_second": 2.062,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 64.29,
|
|
"learning_rate": 1.984126984126984e-05,
|
|
"loss": 0.5338,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 65.0,
|
|
"learning_rate": 1.9444444444444445e-05,
|
|
"loss": 0.5188,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 65.0,
|
|
"eval_accuracy": 0.7704545454545455,
|
|
"eval_loss": 0.6352004408836365,
|
|
"eval_runtime": 3.3712,
|
|
"eval_samples_per_second": 130.519,
|
|
"eval_steps_per_second": 2.076,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 65.71,
|
|
"learning_rate": 1.9047619047619046e-05,
|
|
"loss": 0.5081,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 66.0,
|
|
"eval_accuracy": 0.7681818181818182,
|
|
"eval_loss": 0.6369162201881409,
|
|
"eval_runtime": 3.3987,
|
|
"eval_samples_per_second": 129.462,
|
|
"eval_steps_per_second": 2.06,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 66.43,
|
|
"learning_rate": 1.8650793650793654e-05,
|
|
"loss": 0.4725,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 67.0,
|
|
"eval_accuracy": 0.775,
|
|
"eval_loss": 0.6235827207565308,
|
|
"eval_runtime": 3.2862,
|
|
"eval_samples_per_second": 133.893,
|
|
"eval_steps_per_second": 2.13,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 67.14,
|
|
"learning_rate": 1.8253968253968254e-05,
|
|
"loss": 0.52,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 67.86,
|
|
"learning_rate": 1.785714285714286e-05,
|
|
"loss": 0.4977,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 68.0,
|
|
"eval_accuracy": 0.7681818181818182,
|
|
"eval_loss": 0.621067225933075,
|
|
"eval_runtime": 3.3453,
|
|
"eval_samples_per_second": 131.527,
|
|
"eval_steps_per_second": 2.092,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 68.57,
|
|
"learning_rate": 1.746031746031746e-05,
|
|
"loss": 0.5118,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 69.0,
|
|
"eval_accuracy": 0.7568181818181818,
|
|
"eval_loss": 0.6289528012275696,
|
|
"eval_runtime": 3.3375,
|
|
"eval_samples_per_second": 131.834,
|
|
"eval_steps_per_second": 2.097,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 69.29,
|
|
"learning_rate": 1.7063492063492063e-05,
|
|
"loss": 0.5001,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 70.0,
|
|
"learning_rate": 1.6666666666666667e-05,
|
|
"loss": 0.4876,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 70.0,
|
|
"eval_accuracy": 0.7636363636363637,
|
|
"eval_loss": 0.6366029381752014,
|
|
"eval_runtime": 3.2398,
|
|
"eval_samples_per_second": 135.809,
|
|
"eval_steps_per_second": 2.161,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 70.71,
|
|
"learning_rate": 1.626984126984127e-05,
|
|
"loss": 0.49,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 71.0,
|
|
"eval_accuracy": 0.7727272727272727,
|
|
"eval_loss": 0.6185997128486633,
|
|
"eval_runtime": 3.2603,
|
|
"eval_samples_per_second": 134.958,
|
|
"eval_steps_per_second": 2.147,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 71.43,
|
|
"learning_rate": 1.5873015873015872e-05,
|
|
"loss": 0.4912,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 72.0,
|
|
"eval_accuracy": 0.7727272727272727,
|
|
"eval_loss": 0.6244505643844604,
|
|
"eval_runtime": 3.3698,
|
|
"eval_samples_per_second": 130.572,
|
|
"eval_steps_per_second": 2.077,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 72.14,
|
|
"learning_rate": 1.5476190476190476e-05,
|
|
"loss": 0.4971,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 72.86,
|
|
"learning_rate": 1.5079365079365079e-05,
|
|
"loss": 0.4774,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 73.0,
|
|
"eval_accuracy": 0.7727272727272727,
|
|
"eval_loss": 0.6223783493041992,
|
|
"eval_runtime": 3.4233,
|
|
"eval_samples_per_second": 128.533,
|
|
"eval_steps_per_second": 2.045,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 73.57,
|
|
"learning_rate": 1.4682539682539683e-05,
|
|
"loss": 0.4972,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 74.0,
|
|
"eval_accuracy": 0.7454545454545455,
|
|
"eval_loss": 0.6407915353775024,
|
|
"eval_runtime": 3.2294,
|
|
"eval_samples_per_second": 136.25,
|
|
"eval_steps_per_second": 2.168,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 74.29,
|
|
"learning_rate": 1.4285714285714285e-05,
|
|
"loss": 0.4663,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 75.0,
|
|
"learning_rate": 1.388888888888889e-05,
|
|
"loss": 0.4973,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 75.0,
|
|
"eval_accuracy": 0.7795454545454545,
|
|
"eval_loss": 0.6241269707679749,
|
|
"eval_runtime": 3.4141,
|
|
"eval_samples_per_second": 128.879,
|
|
"eval_steps_per_second": 2.05,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 75.71,
|
|
"learning_rate": 1.3492063492063492e-05,
|
|
"loss": 0.4807,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 76.0,
|
|
"eval_accuracy": 0.7727272727272727,
|
|
"eval_loss": 0.6188752055168152,
|
|
"eval_runtime": 3.372,
|
|
"eval_samples_per_second": 130.487,
|
|
"eval_steps_per_second": 2.076,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 76.43,
|
|
"learning_rate": 1.3095238095238096e-05,
|
|
"loss": 0.4845,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 77.0,
|
|
"eval_accuracy": 0.775,
|
|
"eval_loss": 0.6290163397789001,
|
|
"eval_runtime": 3.37,
|
|
"eval_samples_per_second": 130.564,
|
|
"eval_steps_per_second": 2.077,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 77.14,
|
|
"learning_rate": 1.2698412698412699e-05,
|
|
"loss": 0.4699,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 77.86,
|
|
"learning_rate": 1.2301587301587301e-05,
|
|
"loss": 0.4874,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 78.0,
|
|
"eval_accuracy": 0.7772727272727272,
|
|
"eval_loss": 0.6192808747291565,
|
|
"eval_runtime": 3.365,
|
|
"eval_samples_per_second": 130.758,
|
|
"eval_steps_per_second": 2.08,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 78.57,
|
|
"learning_rate": 1.1904761904761905e-05,
|
|
"loss": 0.4898,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 79.0,
|
|
"eval_accuracy": 0.775,
|
|
"eval_loss": 0.643506646156311,
|
|
"eval_runtime": 3.4199,
|
|
"eval_samples_per_second": 128.66,
|
|
"eval_steps_per_second": 2.047,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 79.29,
|
|
"learning_rate": 1.1507936507936508e-05,
|
|
"loss": 0.4844,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 80.0,
|
|
"learning_rate": 1.1111111111111112e-05,
|
|
"loss": 0.4904,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 80.0,
|
|
"eval_accuracy": 0.7613636363636364,
|
|
"eval_loss": 0.6483902931213379,
|
|
"eval_runtime": 3.373,
|
|
"eval_samples_per_second": 130.448,
|
|
"eval_steps_per_second": 2.075,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 80.71,
|
|
"learning_rate": 1.0714285714285714e-05,
|
|
"loss": 0.4702,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 81.0,
|
|
"eval_accuracy": 0.7727272727272727,
|
|
"eval_loss": 0.6238275170326233,
|
|
"eval_runtime": 3.5076,
|
|
"eval_samples_per_second": 125.441,
|
|
"eval_steps_per_second": 1.996,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"epoch": 81.43,
|
|
"learning_rate": 1.0317460317460318e-05,
|
|
"loss": 0.4802,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 82.0,
|
|
"eval_accuracy": 0.7727272727272727,
|
|
"eval_loss": 0.6386602520942688,
|
|
"eval_runtime": 3.3979,
|
|
"eval_samples_per_second": 129.491,
|
|
"eval_steps_per_second": 2.06,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"epoch": 82.14,
|
|
"learning_rate": 9.92063492063492e-06,
|
|
"loss": 0.4529,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 82.86,
|
|
"learning_rate": 9.523809523809523e-06,
|
|
"loss": 0.4635,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 83.0,
|
|
"eval_accuracy": 0.7704545454545455,
|
|
"eval_loss": 0.6206506490707397,
|
|
"eval_runtime": 3.4119,
|
|
"eval_samples_per_second": 128.961,
|
|
"eval_steps_per_second": 2.052,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"epoch": 83.57,
|
|
"learning_rate": 9.126984126984127e-06,
|
|
"loss": 0.4457,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 84.0,
|
|
"eval_accuracy": 0.7659090909090909,
|
|
"eval_loss": 0.6249757409095764,
|
|
"eval_runtime": 3.4328,
|
|
"eval_samples_per_second": 128.174,
|
|
"eval_steps_per_second": 2.039,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"epoch": 84.29,
|
|
"learning_rate": 8.73015873015873e-06,
|
|
"loss": 0.4665,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 85.0,
|
|
"learning_rate": 8.333333333333334e-06,
|
|
"loss": 0.4697,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 85.0,
|
|
"eval_accuracy": 0.7818181818181819,
|
|
"eval_loss": 0.611347496509552,
|
|
"eval_runtime": 3.4099,
|
|
"eval_samples_per_second": 129.037,
|
|
"eval_steps_per_second": 2.053,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 85.71,
|
|
"learning_rate": 7.936507936507936e-06,
|
|
"loss": 0.4359,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 86.0,
|
|
"eval_accuracy": 0.7659090909090909,
|
|
"eval_loss": 0.637219250202179,
|
|
"eval_runtime": 3.369,
|
|
"eval_samples_per_second": 130.603,
|
|
"eval_steps_per_second": 2.078,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"epoch": 86.43,
|
|
"learning_rate": 7.5396825396825394e-06,
|
|
"loss": 0.4876,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 87.0,
|
|
"eval_accuracy": 0.759090909090909,
|
|
"eval_loss": 0.6338838338851929,
|
|
"eval_runtime": 3.4298,
|
|
"eval_samples_per_second": 128.286,
|
|
"eval_steps_per_second": 2.041,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"epoch": 87.14,
|
|
"learning_rate": 7.142857142857143e-06,
|
|
"loss": 0.4816,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 87.86,
|
|
"learning_rate": 6.746031746031746e-06,
|
|
"loss": 0.4574,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 88.0,
|
|
"eval_accuracy": 0.7545454545454545,
|
|
"eval_loss": 0.6318604946136475,
|
|
"eval_runtime": 3.5206,
|
|
"eval_samples_per_second": 124.979,
|
|
"eval_steps_per_second": 1.988,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 88.57,
|
|
"learning_rate": 6.349206349206349e-06,
|
|
"loss": 0.4633,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 89.0,
|
|
"eval_accuracy": 0.7727272727272727,
|
|
"eval_loss": 0.6072612404823303,
|
|
"eval_runtime": 3.6356,
|
|
"eval_samples_per_second": 121.025,
|
|
"eval_steps_per_second": 1.925,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"epoch": 89.29,
|
|
"learning_rate": 5.9523809523809525e-06,
|
|
"loss": 0.4459,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 90.0,
|
|
"learning_rate": 5.555555555555556e-06,
|
|
"loss": 0.4548,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 90.0,
|
|
"eval_accuracy": 0.7704545454545455,
|
|
"eval_loss": 0.6304746866226196,
|
|
"eval_runtime": 3.4518,
|
|
"eval_samples_per_second": 127.471,
|
|
"eval_steps_per_second": 2.028,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 90.71,
|
|
"learning_rate": 5.158730158730159e-06,
|
|
"loss": 0.4527,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 91.0,
|
|
"eval_accuracy": 0.7681818181818182,
|
|
"eval_loss": 0.6324154734611511,
|
|
"eval_runtime": 3.5884,
|
|
"eval_samples_per_second": 122.617,
|
|
"eval_steps_per_second": 1.951,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 91.43,
|
|
"learning_rate": 4.7619047619047615e-06,
|
|
"loss": 0.4311,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 92.0,
|
|
"eval_accuracy": 0.7795454545454545,
|
|
"eval_loss": 0.6228455901145935,
|
|
"eval_runtime": 3.543,
|
|
"eval_samples_per_second": 124.187,
|
|
"eval_steps_per_second": 1.976,
|
|
"step": 1288
|
|
},
|
|
{
|
|
"epoch": 92.14,
|
|
"learning_rate": 4.365079365079365e-06,
|
|
"loss": 0.4641,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 92.86,
|
|
"learning_rate": 3.968253968253968e-06,
|
|
"loss": 0.4499,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 93.0,
|
|
"eval_accuracy": 0.7636363636363637,
|
|
"eval_loss": 0.6188679337501526,
|
|
"eval_runtime": 3.6596,
|
|
"eval_samples_per_second": 120.231,
|
|
"eval_steps_per_second": 1.913,
|
|
"step": 1302
|
|
},
|
|
{
|
|
"epoch": 93.57,
|
|
"learning_rate": 3.5714285714285714e-06,
|
|
"loss": 0.4333,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 94.0,
|
|
"eval_accuracy": 0.7727272727272727,
|
|
"eval_loss": 0.6302651166915894,
|
|
"eval_runtime": 3.5367,
|
|
"eval_samples_per_second": 124.41,
|
|
"eval_steps_per_second": 1.979,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 94.29,
|
|
"learning_rate": 3.1746031746031746e-06,
|
|
"loss": 0.4647,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 95.0,
|
|
"learning_rate": 2.777777777777778e-06,
|
|
"loss": 0.4526,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 95.0,
|
|
"eval_accuracy": 0.7704545454545455,
|
|
"eval_loss": 0.6163169145584106,
|
|
"eval_runtime": 3.5271,
|
|
"eval_samples_per_second": 124.747,
|
|
"eval_steps_per_second": 1.985,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 95.71,
|
|
"learning_rate": 2.3809523809523808e-06,
|
|
"loss": 0.4709,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 96.0,
|
|
"eval_accuracy": 0.7659090909090909,
|
|
"eval_loss": 0.618194580078125,
|
|
"eval_runtime": 3.4856,
|
|
"eval_samples_per_second": 126.234,
|
|
"eval_steps_per_second": 2.008,
|
|
"step": 1344
|
|
},
|
|
{
|
|
"epoch": 96.43,
|
|
"learning_rate": 1.984126984126984e-06,
|
|
"loss": 0.4451,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 97.0,
|
|
"eval_accuracy": 0.7818181818181819,
|
|
"eval_loss": 0.6048444509506226,
|
|
"eval_runtime": 3.4925,
|
|
"eval_samples_per_second": 125.984,
|
|
"eval_steps_per_second": 2.004,
|
|
"step": 1358
|
|
},
|
|
{
|
|
"epoch": 97.14,
|
|
"learning_rate": 1.5873015873015873e-06,
|
|
"loss": 0.4617,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 97.86,
|
|
"learning_rate": 1.1904761904761904e-06,
|
|
"loss": 0.4174,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 98.0,
|
|
"eval_accuracy": 0.7659090909090909,
|
|
"eval_loss": 0.6300343871116638,
|
|
"eval_runtime": 3.4255,
|
|
"eval_samples_per_second": 128.45,
|
|
"eval_steps_per_second": 2.044,
|
|
"step": 1372
|
|
},
|
|
{
|
|
"epoch": 98.57,
|
|
"learning_rate": 7.936507936507937e-07,
|
|
"loss": 0.4832,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 99.0,
|
|
"eval_accuracy": 0.7704545454545455,
|
|
"eval_loss": 0.6194254159927368,
|
|
"eval_runtime": 3.5142,
|
|
"eval_samples_per_second": 125.206,
|
|
"eval_steps_per_second": 1.992,
|
|
"step": 1386
|
|
},
|
|
{
|
|
"epoch": 99.29,
|
|
"learning_rate": 3.9682539682539683e-07,
|
|
"loss": 0.4501,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 100.0,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.4508,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 100.0,
|
|
"eval_accuracy": 0.7795454545454545,
|
|
"eval_loss": 0.6144731640815735,
|
|
"eval_runtime": 3.5121,
|
|
"eval_samples_per_second": 125.28,
|
|
"eval_steps_per_second": 1.993,
|
|
"step": 1400
|
|
}
|
|
],
|
|
"max_steps": 1400,
|
|
"num_train_epochs": 100,
|
|
"total_flos": 7.496586164267827e+18,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|