inteceleri-shapes / trainer_state.json
LPO-UFPA's picture
Upload 8 files
e5cc41d verified
{
"best_metric": 0.7818181818181819,
"best_model_checkpoint": "resnet-50-finetuned\\checkpoint-1190",
"epoch": 100.0,
"global_step": 1400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.71,
"learning_rate": 3.5714285714285714e-06,
"loss": 1.7972,
"step": 10
},
{
"epoch": 1.0,
"eval_accuracy": 0.11363636363636363,
"eval_loss": 1.800904393196106,
"eval_runtime": 5.5218,
"eval_samples_per_second": 79.685,
"eval_steps_per_second": 1.268,
"step": 14
},
{
"epoch": 1.43,
"learning_rate": 7.142857142857143e-06,
"loss": 1.7991,
"step": 20
},
{
"epoch": 2.0,
"eval_accuracy": 0.16136363636363638,
"eval_loss": 1.7934340238571167,
"eval_runtime": 3.2902,
"eval_samples_per_second": 133.73,
"eval_steps_per_second": 2.128,
"step": 28
},
{
"epoch": 2.14,
"learning_rate": 1.0714285714285714e-05,
"loss": 1.7925,
"step": 30
},
{
"epoch": 2.86,
"learning_rate": 1.4285714285714285e-05,
"loss": 1.7871,
"step": 40
},
{
"epoch": 3.0,
"eval_accuracy": 0.2,
"eval_loss": 1.78143310546875,
"eval_runtime": 3.2024,
"eval_samples_per_second": 137.395,
"eval_steps_per_second": 2.186,
"step": 42
},
{
"epoch": 3.57,
"learning_rate": 1.785714285714286e-05,
"loss": 1.7813,
"step": 50
},
{
"epoch": 4.0,
"eval_accuracy": 0.20909090909090908,
"eval_loss": 1.7663880586624146,
"eval_runtime": 3.3082,
"eval_samples_per_second": 133.005,
"eval_steps_per_second": 2.116,
"step": 56
},
{
"epoch": 4.29,
"learning_rate": 2.1428571428571428e-05,
"loss": 1.7673,
"step": 60
},
{
"epoch": 5.0,
"learning_rate": 2.5e-05,
"loss": 1.7602,
"step": 70
},
{
"epoch": 5.0,
"eval_accuracy": 0.2772727272727273,
"eval_loss": 1.7464295625686646,
"eval_runtime": 3.3301,
"eval_samples_per_second": 132.128,
"eval_steps_per_second": 2.102,
"step": 70
},
{
"epoch": 5.71,
"learning_rate": 2.857142857142857e-05,
"loss": 1.7455,
"step": 80
},
{
"epoch": 6.0,
"eval_accuracy": 0.2840909090909091,
"eval_loss": 1.726538896560669,
"eval_runtime": 3.2579,
"eval_samples_per_second": 135.057,
"eval_steps_per_second": 2.149,
"step": 84
},
{
"epoch": 6.43,
"learning_rate": 3.2142857142857144e-05,
"loss": 1.7321,
"step": 90
},
{
"epoch": 7.0,
"eval_accuracy": 0.2727272727272727,
"eval_loss": 1.7056454420089722,
"eval_runtime": 3.2693,
"eval_samples_per_second": 134.587,
"eval_steps_per_second": 2.141,
"step": 98
},
{
"epoch": 7.14,
"learning_rate": 3.571428571428572e-05,
"loss": 1.7215,
"step": 100
},
{
"epoch": 7.86,
"learning_rate": 3.928571428571429e-05,
"loss": 1.706,
"step": 110
},
{
"epoch": 8.0,
"eval_accuracy": 0.3068181818181818,
"eval_loss": 1.6843111515045166,
"eval_runtime": 3.2827,
"eval_samples_per_second": 134.038,
"eval_steps_per_second": 2.132,
"step": 112
},
{
"epoch": 8.57,
"learning_rate": 4.2857142857142856e-05,
"loss": 1.6891,
"step": 120
},
{
"epoch": 9.0,
"eval_accuracy": 0.325,
"eval_loss": 1.6573911905288696,
"eval_runtime": 3.1775,
"eval_samples_per_second": 138.473,
"eval_steps_per_second": 2.203,
"step": 126
},
{
"epoch": 9.29,
"learning_rate": 4.642857142857143e-05,
"loss": 1.6751,
"step": 130
},
{
"epoch": 10.0,
"learning_rate": 5e-05,
"loss": 1.6524,
"step": 140
},
{
"epoch": 10.0,
"eval_accuracy": 0.3090909090909091,
"eval_loss": 1.6302080154418945,
"eval_runtime": 3.3459,
"eval_samples_per_second": 131.503,
"eval_steps_per_second": 2.092,
"step": 140
},
{
"epoch": 10.71,
"learning_rate": 4.960317460317461e-05,
"loss": 1.6348,
"step": 150
},
{
"epoch": 11.0,
"eval_accuracy": 0.3613636363636364,
"eval_loss": 1.5982272624969482,
"eval_runtime": 3.2269,
"eval_samples_per_second": 136.356,
"eval_steps_per_second": 2.169,
"step": 154
},
{
"epoch": 11.43,
"learning_rate": 4.9206349206349204e-05,
"loss": 1.611,
"step": 160
},
{
"epoch": 12.0,
"eval_accuracy": 0.39545454545454545,
"eval_loss": 1.556486964225769,
"eval_runtime": 3.2229,
"eval_samples_per_second": 136.521,
"eval_steps_per_second": 2.172,
"step": 168
},
{
"epoch": 12.14,
"learning_rate": 4.880952380952381e-05,
"loss": 1.5891,
"step": 170
},
{
"epoch": 12.86,
"learning_rate": 4.841269841269841e-05,
"loss": 1.5673,
"step": 180
},
{
"epoch": 13.0,
"eval_accuracy": 0.41363636363636364,
"eval_loss": 1.516886591911316,
"eval_runtime": 3.2733,
"eval_samples_per_second": 134.423,
"eval_steps_per_second": 2.139,
"step": 182
},
{
"epoch": 13.57,
"learning_rate": 4.801587301587302e-05,
"loss": 1.5239,
"step": 190
},
{
"epoch": 14.0,
"eval_accuracy": 0.509090909090909,
"eval_loss": 1.4703104496002197,
"eval_runtime": 3.2812,
"eval_samples_per_second": 134.096,
"eval_steps_per_second": 2.133,
"step": 196
},
{
"epoch": 14.29,
"learning_rate": 4.761904761904762e-05,
"loss": 1.5131,
"step": 200
},
{
"epoch": 15.0,
"learning_rate": 4.722222222222222e-05,
"loss": 1.4698,
"step": 210
},
{
"epoch": 15.0,
"eval_accuracy": 0.5363636363636364,
"eval_loss": 1.4071372747421265,
"eval_runtime": 3.3595,
"eval_samples_per_second": 130.973,
"eval_steps_per_second": 2.084,
"step": 210
},
{
"epoch": 15.71,
"learning_rate": 4.682539682539683e-05,
"loss": 1.4277,
"step": 220
},
{
"epoch": 16.0,
"eval_accuracy": 0.5886363636363636,
"eval_loss": 1.3305206298828125,
"eval_runtime": 3.3151,
"eval_samples_per_second": 132.725,
"eval_steps_per_second": 2.112,
"step": 224
},
{
"epoch": 16.43,
"learning_rate": 4.642857142857143e-05,
"loss": 1.3798,
"step": 230
},
{
"epoch": 17.0,
"eval_accuracy": 0.6068181818181818,
"eval_loss": 1.250695824623108,
"eval_runtime": 3.348,
"eval_samples_per_second": 131.42,
"eval_steps_per_second": 2.091,
"step": 238
},
{
"epoch": 17.14,
"learning_rate": 4.603174603174603e-05,
"loss": 1.355,
"step": 240
},
{
"epoch": 17.86,
"learning_rate": 4.563492063492064e-05,
"loss": 1.2858,
"step": 250
},
{
"epoch": 18.0,
"eval_accuracy": 0.6409090909090909,
"eval_loss": 1.1516846418380737,
"eval_runtime": 3.35,
"eval_samples_per_second": 131.342,
"eval_steps_per_second": 2.09,
"step": 252
},
{
"epoch": 18.57,
"learning_rate": 4.523809523809524e-05,
"loss": 1.2543,
"step": 260
},
{
"epoch": 19.0,
"eval_accuracy": 0.6659090909090909,
"eval_loss": 1.0930873155593872,
"eval_runtime": 3.4597,
"eval_samples_per_second": 127.177,
"eval_steps_per_second": 2.023,
"step": 266
},
{
"epoch": 19.29,
"learning_rate": 4.4841269841269846e-05,
"loss": 1.2006,
"step": 270
},
{
"epoch": 20.0,
"learning_rate": 4.4444444444444447e-05,
"loss": 1.1696,
"step": 280
},
{
"epoch": 20.0,
"eval_accuracy": 0.675,
"eval_loss": 1.012363314628601,
"eval_runtime": 3.3077,
"eval_samples_per_second": 133.023,
"eval_steps_per_second": 2.116,
"step": 280
},
{
"epoch": 20.71,
"learning_rate": 4.404761904761905e-05,
"loss": 1.1124,
"step": 290
},
{
"epoch": 21.0,
"eval_accuracy": 0.6931818181818182,
"eval_loss": 0.954047679901123,
"eval_runtime": 3.3411,
"eval_samples_per_second": 131.693,
"eval_steps_per_second": 2.095,
"step": 294
},
{
"epoch": 21.43,
"learning_rate": 4.3650793650793655e-05,
"loss": 1.059,
"step": 300
},
{
"epoch": 22.0,
"eval_accuracy": 0.7022727272727273,
"eval_loss": 0.9010851979255676,
"eval_runtime": 3.6911,
"eval_samples_per_second": 119.205,
"eval_steps_per_second": 1.896,
"step": 308
},
{
"epoch": 22.14,
"learning_rate": 4.3253968253968256e-05,
"loss": 1.0318,
"step": 310
},
{
"epoch": 22.86,
"learning_rate": 4.2857142857142856e-05,
"loss": 1.0006,
"step": 320
},
{
"epoch": 23.0,
"eval_accuracy": 0.7090909090909091,
"eval_loss": 0.8692610859870911,
"eval_runtime": 3.4254,
"eval_samples_per_second": 128.451,
"eval_steps_per_second": 2.044,
"step": 322
},
{
"epoch": 23.57,
"learning_rate": 4.2460317460317464e-05,
"loss": 0.9606,
"step": 330
},
{
"epoch": 24.0,
"eval_accuracy": 0.6977272727272728,
"eval_loss": 0.8458234667778015,
"eval_runtime": 3.7789,
"eval_samples_per_second": 116.436,
"eval_steps_per_second": 1.852,
"step": 336
},
{
"epoch": 24.29,
"learning_rate": 4.2063492063492065e-05,
"loss": 0.9541,
"step": 340
},
{
"epoch": 25.0,
"learning_rate": 4.166666666666667e-05,
"loss": 0.9146,
"step": 350
},
{
"epoch": 25.0,
"eval_accuracy": 0.7227272727272728,
"eval_loss": 0.8226051926612854,
"eval_runtime": 3.375,
"eval_samples_per_second": 130.371,
"eval_steps_per_second": 2.074,
"step": 350
},
{
"epoch": 25.71,
"learning_rate": 4.126984126984127e-05,
"loss": 0.8864,
"step": 360
},
{
"epoch": 26.0,
"eval_accuracy": 0.7159090909090909,
"eval_loss": 0.8187159895896912,
"eval_runtime": 3.4251,
"eval_samples_per_second": 128.462,
"eval_steps_per_second": 2.044,
"step": 364
},
{
"epoch": 26.43,
"learning_rate": 4.0873015873015874e-05,
"loss": 0.8864,
"step": 370
},
{
"epoch": 27.0,
"eval_accuracy": 0.7227272727272728,
"eval_loss": 0.7862333655357361,
"eval_runtime": 3.3921,
"eval_samples_per_second": 129.714,
"eval_steps_per_second": 2.064,
"step": 378
},
{
"epoch": 27.14,
"learning_rate": 4.047619047619048e-05,
"loss": 0.8447,
"step": 380
},
{
"epoch": 27.86,
"learning_rate": 4.007936507936508e-05,
"loss": 0.8458,
"step": 390
},
{
"epoch": 28.0,
"eval_accuracy": 0.7295454545454545,
"eval_loss": 0.7686573266983032,
"eval_runtime": 3.4126,
"eval_samples_per_second": 128.935,
"eval_steps_per_second": 2.051,
"step": 392
},
{
"epoch": 28.57,
"learning_rate": 3.968253968253968e-05,
"loss": 0.8337,
"step": 400
},
{
"epoch": 29.0,
"eval_accuracy": 0.7272727272727273,
"eval_loss": 0.747409999370575,
"eval_runtime": 3.4047,
"eval_samples_per_second": 129.233,
"eval_steps_per_second": 2.056,
"step": 406
},
{
"epoch": 29.29,
"learning_rate": 3.928571428571429e-05,
"loss": 0.8107,
"step": 410
},
{
"epoch": 30.0,
"learning_rate": 3.888888888888889e-05,
"loss": 0.7943,
"step": 420
},
{
"epoch": 30.0,
"eval_accuracy": 0.7454545454545455,
"eval_loss": 0.7368342876434326,
"eval_runtime": 3.4657,
"eval_samples_per_second": 126.957,
"eval_steps_per_second": 2.02,
"step": 420
},
{
"epoch": 30.71,
"learning_rate": 3.84920634920635e-05,
"loss": 0.7884,
"step": 430
},
{
"epoch": 31.0,
"eval_accuracy": 0.7454545454545455,
"eval_loss": 0.7331739068031311,
"eval_runtime": 3.4308,
"eval_samples_per_second": 128.249,
"eval_steps_per_second": 2.04,
"step": 434
},
{
"epoch": 31.43,
"learning_rate": 3.809523809523809e-05,
"loss": 0.7495,
"step": 440
},
{
"epoch": 32.0,
"eval_accuracy": 0.725,
"eval_loss": 0.7354092597961426,
"eval_runtime": 3.3899,
"eval_samples_per_second": 129.796,
"eval_steps_per_second": 2.065,
"step": 448
},
{
"epoch": 32.14,
"learning_rate": 3.76984126984127e-05,
"loss": 0.78,
"step": 450
},
{
"epoch": 32.86,
"learning_rate": 3.730158730158731e-05,
"loss": 0.7503,
"step": 460
},
{
"epoch": 33.0,
"eval_accuracy": 0.7477272727272727,
"eval_loss": 0.7180919051170349,
"eval_runtime": 3.3351,
"eval_samples_per_second": 131.931,
"eval_steps_per_second": 2.099,
"step": 462
},
{
"epoch": 33.57,
"learning_rate": 3.690476190476191e-05,
"loss": 0.7358,
"step": 470
},
{
"epoch": 34.0,
"eval_accuracy": 0.7477272727272727,
"eval_loss": 0.7007948160171509,
"eval_runtime": 3.4029,
"eval_samples_per_second": 129.301,
"eval_steps_per_second": 2.057,
"step": 476
},
{
"epoch": 34.29,
"learning_rate": 3.650793650793651e-05,
"loss": 0.7605,
"step": 480
},
{
"epoch": 35.0,
"learning_rate": 3.611111111111111e-05,
"loss": 0.7229,
"step": 490
},
{
"epoch": 35.0,
"eval_accuracy": 0.740909090909091,
"eval_loss": 0.6959581971168518,
"eval_runtime": 3.6574,
"eval_samples_per_second": 120.304,
"eval_steps_per_second": 1.914,
"step": 490
},
{
"epoch": 35.71,
"learning_rate": 3.571428571428572e-05,
"loss": 0.7114,
"step": 500
},
{
"epoch": 36.0,
"eval_accuracy": 0.7431818181818182,
"eval_loss": 0.6837091445922852,
"eval_runtime": 3.6041,
"eval_samples_per_second": 122.083,
"eval_steps_per_second": 1.942,
"step": 504
},
{
"epoch": 36.43,
"learning_rate": 3.5317460317460324e-05,
"loss": 0.7075,
"step": 510
},
{
"epoch": 37.0,
"eval_accuracy": 0.740909090909091,
"eval_loss": 0.6922788023948669,
"eval_runtime": 3.3849,
"eval_samples_per_second": 129.987,
"eval_steps_per_second": 2.068,
"step": 518
},
{
"epoch": 37.14,
"learning_rate": 3.492063492063492e-05,
"loss": 0.6915,
"step": 520
},
{
"epoch": 37.86,
"learning_rate": 3.4523809523809526e-05,
"loss": 0.682,
"step": 530
},
{
"epoch": 38.0,
"eval_accuracy": 0.7431818181818182,
"eval_loss": 0.6930937767028809,
"eval_runtime": 3.4683,
"eval_samples_per_second": 126.864,
"eval_steps_per_second": 2.018,
"step": 532
},
{
"epoch": 38.57,
"learning_rate": 3.412698412698413e-05,
"loss": 0.6626,
"step": 540
},
{
"epoch": 39.0,
"eval_accuracy": 0.740909090909091,
"eval_loss": 0.6872156262397766,
"eval_runtime": 3.3949,
"eval_samples_per_second": 129.605,
"eval_steps_per_second": 2.062,
"step": 546
},
{
"epoch": 39.29,
"learning_rate": 3.3730158730158734e-05,
"loss": 0.6708,
"step": 550
},
{
"epoch": 40.0,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.6688,
"step": 560
},
{
"epoch": 40.0,
"eval_accuracy": 0.75,
"eval_loss": 0.6758830547332764,
"eval_runtime": 3.2304,
"eval_samples_per_second": 136.208,
"eval_steps_per_second": 2.167,
"step": 560
},
{
"epoch": 40.71,
"learning_rate": 3.2936507936507936e-05,
"loss": 0.655,
"step": 570
},
{
"epoch": 41.0,
"eval_accuracy": 0.7340909090909091,
"eval_loss": 0.7006358504295349,
"eval_runtime": 3.3002,
"eval_samples_per_second": 133.326,
"eval_steps_per_second": 2.121,
"step": 574
},
{
"epoch": 41.43,
"learning_rate": 3.253968253968254e-05,
"loss": 0.6533,
"step": 580
},
{
"epoch": 42.0,
"eval_accuracy": 0.75,
"eval_loss": 0.6774057149887085,
"eval_runtime": 3.351,
"eval_samples_per_second": 131.303,
"eval_steps_per_second": 2.089,
"step": 588
},
{
"epoch": 42.14,
"learning_rate": 3.2142857142857144e-05,
"loss": 0.6488,
"step": 590
},
{
"epoch": 42.86,
"learning_rate": 3.1746031746031745e-05,
"loss": 0.6215,
"step": 600
},
{
"epoch": 43.0,
"eval_accuracy": 0.7454545454545455,
"eval_loss": 0.6663152575492859,
"eval_runtime": 3.4833,
"eval_samples_per_second": 126.317,
"eval_steps_per_second": 2.01,
"step": 602
},
{
"epoch": 43.57,
"learning_rate": 3.134920634920635e-05,
"loss": 0.6267,
"step": 610
},
{
"epoch": 44.0,
"eval_accuracy": 0.7545454545454545,
"eval_loss": 0.6608263850212097,
"eval_runtime": 3.375,
"eval_samples_per_second": 130.371,
"eval_steps_per_second": 2.074,
"step": 616
},
{
"epoch": 44.29,
"learning_rate": 3.095238095238095e-05,
"loss": 0.6383,
"step": 620
},
{
"epoch": 45.0,
"learning_rate": 3.055555555555556e-05,
"loss": 0.6122,
"step": 630
},
{
"epoch": 45.0,
"eval_accuracy": 0.7477272727272727,
"eval_loss": 0.6523904800415039,
"eval_runtime": 3.3102,
"eval_samples_per_second": 132.923,
"eval_steps_per_second": 2.115,
"step": 630
},
{
"epoch": 45.71,
"learning_rate": 3.0158730158730158e-05,
"loss": 0.6047,
"step": 640
},
{
"epoch": 46.0,
"eval_accuracy": 0.7477272727272727,
"eval_loss": 0.6757220029830933,
"eval_runtime": 3.3012,
"eval_samples_per_second": 133.286,
"eval_steps_per_second": 2.12,
"step": 644
},
{
"epoch": 46.43,
"learning_rate": 2.9761904761904762e-05,
"loss": 0.6135,
"step": 650
},
{
"epoch": 47.0,
"eval_accuracy": 0.7431818181818182,
"eval_loss": 0.6492685675621033,
"eval_runtime": 3.4379,
"eval_samples_per_second": 127.986,
"eval_steps_per_second": 2.036,
"step": 658
},
{
"epoch": 47.14,
"learning_rate": 2.9365079365079366e-05,
"loss": 0.6145,
"step": 660
},
{
"epoch": 47.86,
"learning_rate": 2.8968253968253974e-05,
"loss": 0.5805,
"step": 670
},
{
"epoch": 48.0,
"eval_accuracy": 0.75,
"eval_loss": 0.6503338813781738,
"eval_runtime": 3.383,
"eval_samples_per_second": 130.064,
"eval_steps_per_second": 2.069,
"step": 672
},
{
"epoch": 48.57,
"learning_rate": 2.857142857142857e-05,
"loss": 0.6124,
"step": 680
},
{
"epoch": 49.0,
"eval_accuracy": 0.7454545454545455,
"eval_loss": 0.6626009941101074,
"eval_runtime": 3.5236,
"eval_samples_per_second": 124.873,
"eval_steps_per_second": 1.987,
"step": 686
},
{
"epoch": 49.29,
"learning_rate": 2.8174603174603175e-05,
"loss": 0.6079,
"step": 690
},
{
"epoch": 50.0,
"learning_rate": 2.777777777777778e-05,
"loss": 0.5826,
"step": 700
},
{
"epoch": 50.0,
"eval_accuracy": 0.7522727272727273,
"eval_loss": 0.6511955857276917,
"eval_runtime": 3.2723,
"eval_samples_per_second": 134.464,
"eval_steps_per_second": 2.139,
"step": 700
},
{
"epoch": 50.71,
"learning_rate": 2.7380952380952383e-05,
"loss": 0.5698,
"step": 710
},
{
"epoch": 51.0,
"eval_accuracy": 0.7477272727272727,
"eval_loss": 0.6589775085449219,
"eval_runtime": 3.4253,
"eval_samples_per_second": 128.455,
"eval_steps_per_second": 2.044,
"step": 714
},
{
"epoch": 51.43,
"learning_rate": 2.6984126984126984e-05,
"loss": 0.5721,
"step": 720
},
{
"epoch": 52.0,
"eval_accuracy": 0.7454545454545455,
"eval_loss": 0.6558127403259277,
"eval_runtime": 3.2646,
"eval_samples_per_second": 134.777,
"eval_steps_per_second": 2.144,
"step": 728
},
{
"epoch": 52.14,
"learning_rate": 2.6587301587301588e-05,
"loss": 0.5912,
"step": 730
},
{
"epoch": 52.86,
"learning_rate": 2.6190476190476192e-05,
"loss": 0.5569,
"step": 740
},
{
"epoch": 53.0,
"eval_accuracy": 0.7454545454545455,
"eval_loss": 0.6654322743415833,
"eval_runtime": 3.2932,
"eval_samples_per_second": 133.609,
"eval_steps_per_second": 2.126,
"step": 742
},
{
"epoch": 53.57,
"learning_rate": 2.5793650793650796e-05,
"loss": 0.573,
"step": 750
},
{
"epoch": 54.0,
"eval_accuracy": 0.7545454545454545,
"eval_loss": 0.636202335357666,
"eval_runtime": 3.3271,
"eval_samples_per_second": 132.247,
"eval_steps_per_second": 2.104,
"step": 756
},
{
"epoch": 54.29,
"learning_rate": 2.5396825396825397e-05,
"loss": 0.5816,
"step": 760
},
{
"epoch": 55.0,
"learning_rate": 2.5e-05,
"loss": 0.543,
"step": 770
},
{
"epoch": 55.0,
"eval_accuracy": 0.7636363636363637,
"eval_loss": 0.6368376016616821,
"eval_runtime": 3.2054,
"eval_samples_per_second": 137.267,
"eval_steps_per_second": 2.184,
"step": 770
},
{
"epoch": 55.71,
"learning_rate": 2.4603174603174602e-05,
"loss": 0.5494,
"step": 780
},
{
"epoch": 56.0,
"eval_accuracy": 0.759090909090909,
"eval_loss": 0.6263792514801025,
"eval_runtime": 3.2663,
"eval_samples_per_second": 134.71,
"eval_steps_per_second": 2.143,
"step": 784
},
{
"epoch": 56.43,
"learning_rate": 2.4206349206349206e-05,
"loss": 0.5598,
"step": 790
},
{
"epoch": 57.0,
"eval_accuracy": 0.7681818181818182,
"eval_loss": 0.627074658870697,
"eval_runtime": 3.4193,
"eval_samples_per_second": 128.683,
"eval_steps_per_second": 2.047,
"step": 798
},
{
"epoch": 57.14,
"learning_rate": 2.380952380952381e-05,
"loss": 0.5655,
"step": 800
},
{
"epoch": 57.86,
"learning_rate": 2.3412698412698414e-05,
"loss": 0.5412,
"step": 810
},
{
"epoch": 58.0,
"eval_accuracy": 0.7522727272727273,
"eval_loss": 0.6506673693656921,
"eval_runtime": 3.2684,
"eval_samples_per_second": 134.623,
"eval_steps_per_second": 2.142,
"step": 812
},
{
"epoch": 58.57,
"learning_rate": 2.3015873015873015e-05,
"loss": 0.5512,
"step": 820
},
{
"epoch": 59.0,
"eval_accuracy": 0.7568181818181818,
"eval_loss": 0.6446354985237122,
"eval_runtime": 3.3303,
"eval_samples_per_second": 132.119,
"eval_steps_per_second": 2.102,
"step": 826
},
{
"epoch": 59.29,
"learning_rate": 2.261904761904762e-05,
"loss": 0.5217,
"step": 830
},
{
"epoch": 60.0,
"learning_rate": 2.2222222222222223e-05,
"loss": 0.5504,
"step": 840
},
{
"epoch": 60.0,
"eval_accuracy": 0.7727272727272727,
"eval_loss": 0.6208813190460205,
"eval_runtime": 3.2503,
"eval_samples_per_second": 135.372,
"eval_steps_per_second": 2.154,
"step": 840
},
{
"epoch": 60.71,
"learning_rate": 2.1825396825396827e-05,
"loss": 0.5304,
"step": 850
},
{
"epoch": 61.0,
"eval_accuracy": 0.7568181818181818,
"eval_loss": 0.6428102850914001,
"eval_runtime": 3.4103,
"eval_samples_per_second": 129.021,
"eval_steps_per_second": 2.053,
"step": 854
},
{
"epoch": 61.43,
"learning_rate": 2.1428571428571428e-05,
"loss": 0.5238,
"step": 860
},
{
"epoch": 62.0,
"eval_accuracy": 0.7613636363636364,
"eval_loss": 0.6323724985122681,
"eval_runtime": 3.2991,
"eval_samples_per_second": 133.368,
"eval_steps_per_second": 2.122,
"step": 868
},
{
"epoch": 62.14,
"learning_rate": 2.1031746031746032e-05,
"loss": 0.5344,
"step": 870
},
{
"epoch": 62.86,
"learning_rate": 2.0634920634920636e-05,
"loss": 0.5298,
"step": 880
},
{
"epoch": 63.0,
"eval_accuracy": 0.7568181818181818,
"eval_loss": 0.6347512006759644,
"eval_runtime": 3.2796,
"eval_samples_per_second": 134.162,
"eval_steps_per_second": 2.134,
"step": 882
},
{
"epoch": 63.57,
"learning_rate": 2.023809523809524e-05,
"loss": 0.4965,
"step": 890
},
{
"epoch": 64.0,
"eval_accuracy": 0.7545454545454545,
"eval_loss": 0.6351690292358398,
"eval_runtime": 3.394,
"eval_samples_per_second": 129.639,
"eval_steps_per_second": 2.062,
"step": 896
},
{
"epoch": 64.29,
"learning_rate": 1.984126984126984e-05,
"loss": 0.5338,
"step": 900
},
{
"epoch": 65.0,
"learning_rate": 1.9444444444444445e-05,
"loss": 0.5188,
"step": 910
},
{
"epoch": 65.0,
"eval_accuracy": 0.7704545454545455,
"eval_loss": 0.6352004408836365,
"eval_runtime": 3.3712,
"eval_samples_per_second": 130.519,
"eval_steps_per_second": 2.076,
"step": 910
},
{
"epoch": 65.71,
"learning_rate": 1.9047619047619046e-05,
"loss": 0.5081,
"step": 920
},
{
"epoch": 66.0,
"eval_accuracy": 0.7681818181818182,
"eval_loss": 0.6369162201881409,
"eval_runtime": 3.3987,
"eval_samples_per_second": 129.462,
"eval_steps_per_second": 2.06,
"step": 924
},
{
"epoch": 66.43,
"learning_rate": 1.8650793650793654e-05,
"loss": 0.4725,
"step": 930
},
{
"epoch": 67.0,
"eval_accuracy": 0.775,
"eval_loss": 0.6235827207565308,
"eval_runtime": 3.2862,
"eval_samples_per_second": 133.893,
"eval_steps_per_second": 2.13,
"step": 938
},
{
"epoch": 67.14,
"learning_rate": 1.8253968253968254e-05,
"loss": 0.52,
"step": 940
},
{
"epoch": 67.86,
"learning_rate": 1.785714285714286e-05,
"loss": 0.4977,
"step": 950
},
{
"epoch": 68.0,
"eval_accuracy": 0.7681818181818182,
"eval_loss": 0.621067225933075,
"eval_runtime": 3.3453,
"eval_samples_per_second": 131.527,
"eval_steps_per_second": 2.092,
"step": 952
},
{
"epoch": 68.57,
"learning_rate": 1.746031746031746e-05,
"loss": 0.5118,
"step": 960
},
{
"epoch": 69.0,
"eval_accuracy": 0.7568181818181818,
"eval_loss": 0.6289528012275696,
"eval_runtime": 3.3375,
"eval_samples_per_second": 131.834,
"eval_steps_per_second": 2.097,
"step": 966
},
{
"epoch": 69.29,
"learning_rate": 1.7063492063492063e-05,
"loss": 0.5001,
"step": 970
},
{
"epoch": 70.0,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.4876,
"step": 980
},
{
"epoch": 70.0,
"eval_accuracy": 0.7636363636363637,
"eval_loss": 0.6366029381752014,
"eval_runtime": 3.2398,
"eval_samples_per_second": 135.809,
"eval_steps_per_second": 2.161,
"step": 980
},
{
"epoch": 70.71,
"learning_rate": 1.626984126984127e-05,
"loss": 0.49,
"step": 990
},
{
"epoch": 71.0,
"eval_accuracy": 0.7727272727272727,
"eval_loss": 0.6185997128486633,
"eval_runtime": 3.2603,
"eval_samples_per_second": 134.958,
"eval_steps_per_second": 2.147,
"step": 994
},
{
"epoch": 71.43,
"learning_rate": 1.5873015873015872e-05,
"loss": 0.4912,
"step": 1000
},
{
"epoch": 72.0,
"eval_accuracy": 0.7727272727272727,
"eval_loss": 0.6244505643844604,
"eval_runtime": 3.3698,
"eval_samples_per_second": 130.572,
"eval_steps_per_second": 2.077,
"step": 1008
},
{
"epoch": 72.14,
"learning_rate": 1.5476190476190476e-05,
"loss": 0.4971,
"step": 1010
},
{
"epoch": 72.86,
"learning_rate": 1.5079365079365079e-05,
"loss": 0.4774,
"step": 1020
},
{
"epoch": 73.0,
"eval_accuracy": 0.7727272727272727,
"eval_loss": 0.6223783493041992,
"eval_runtime": 3.4233,
"eval_samples_per_second": 128.533,
"eval_steps_per_second": 2.045,
"step": 1022
},
{
"epoch": 73.57,
"learning_rate": 1.4682539682539683e-05,
"loss": 0.4972,
"step": 1030
},
{
"epoch": 74.0,
"eval_accuracy": 0.7454545454545455,
"eval_loss": 0.6407915353775024,
"eval_runtime": 3.2294,
"eval_samples_per_second": 136.25,
"eval_steps_per_second": 2.168,
"step": 1036
},
{
"epoch": 74.29,
"learning_rate": 1.4285714285714285e-05,
"loss": 0.4663,
"step": 1040
},
{
"epoch": 75.0,
"learning_rate": 1.388888888888889e-05,
"loss": 0.4973,
"step": 1050
},
{
"epoch": 75.0,
"eval_accuracy": 0.7795454545454545,
"eval_loss": 0.6241269707679749,
"eval_runtime": 3.4141,
"eval_samples_per_second": 128.879,
"eval_steps_per_second": 2.05,
"step": 1050
},
{
"epoch": 75.71,
"learning_rate": 1.3492063492063492e-05,
"loss": 0.4807,
"step": 1060
},
{
"epoch": 76.0,
"eval_accuracy": 0.7727272727272727,
"eval_loss": 0.6188752055168152,
"eval_runtime": 3.372,
"eval_samples_per_second": 130.487,
"eval_steps_per_second": 2.076,
"step": 1064
},
{
"epoch": 76.43,
"learning_rate": 1.3095238095238096e-05,
"loss": 0.4845,
"step": 1070
},
{
"epoch": 77.0,
"eval_accuracy": 0.775,
"eval_loss": 0.6290163397789001,
"eval_runtime": 3.37,
"eval_samples_per_second": 130.564,
"eval_steps_per_second": 2.077,
"step": 1078
},
{
"epoch": 77.14,
"learning_rate": 1.2698412698412699e-05,
"loss": 0.4699,
"step": 1080
},
{
"epoch": 77.86,
"learning_rate": 1.2301587301587301e-05,
"loss": 0.4874,
"step": 1090
},
{
"epoch": 78.0,
"eval_accuracy": 0.7772727272727272,
"eval_loss": 0.6192808747291565,
"eval_runtime": 3.365,
"eval_samples_per_second": 130.758,
"eval_steps_per_second": 2.08,
"step": 1092
},
{
"epoch": 78.57,
"learning_rate": 1.1904761904761905e-05,
"loss": 0.4898,
"step": 1100
},
{
"epoch": 79.0,
"eval_accuracy": 0.775,
"eval_loss": 0.643506646156311,
"eval_runtime": 3.4199,
"eval_samples_per_second": 128.66,
"eval_steps_per_second": 2.047,
"step": 1106
},
{
"epoch": 79.29,
"learning_rate": 1.1507936507936508e-05,
"loss": 0.4844,
"step": 1110
},
{
"epoch": 80.0,
"learning_rate": 1.1111111111111112e-05,
"loss": 0.4904,
"step": 1120
},
{
"epoch": 80.0,
"eval_accuracy": 0.7613636363636364,
"eval_loss": 0.6483902931213379,
"eval_runtime": 3.373,
"eval_samples_per_second": 130.448,
"eval_steps_per_second": 2.075,
"step": 1120
},
{
"epoch": 80.71,
"learning_rate": 1.0714285714285714e-05,
"loss": 0.4702,
"step": 1130
},
{
"epoch": 81.0,
"eval_accuracy": 0.7727272727272727,
"eval_loss": 0.6238275170326233,
"eval_runtime": 3.5076,
"eval_samples_per_second": 125.441,
"eval_steps_per_second": 1.996,
"step": 1134
},
{
"epoch": 81.43,
"learning_rate": 1.0317460317460318e-05,
"loss": 0.4802,
"step": 1140
},
{
"epoch": 82.0,
"eval_accuracy": 0.7727272727272727,
"eval_loss": 0.6386602520942688,
"eval_runtime": 3.3979,
"eval_samples_per_second": 129.491,
"eval_steps_per_second": 2.06,
"step": 1148
},
{
"epoch": 82.14,
"learning_rate": 9.92063492063492e-06,
"loss": 0.4529,
"step": 1150
},
{
"epoch": 82.86,
"learning_rate": 9.523809523809523e-06,
"loss": 0.4635,
"step": 1160
},
{
"epoch": 83.0,
"eval_accuracy": 0.7704545454545455,
"eval_loss": 0.6206506490707397,
"eval_runtime": 3.4119,
"eval_samples_per_second": 128.961,
"eval_steps_per_second": 2.052,
"step": 1162
},
{
"epoch": 83.57,
"learning_rate": 9.126984126984127e-06,
"loss": 0.4457,
"step": 1170
},
{
"epoch": 84.0,
"eval_accuracy": 0.7659090909090909,
"eval_loss": 0.6249757409095764,
"eval_runtime": 3.4328,
"eval_samples_per_second": 128.174,
"eval_steps_per_second": 2.039,
"step": 1176
},
{
"epoch": 84.29,
"learning_rate": 8.73015873015873e-06,
"loss": 0.4665,
"step": 1180
},
{
"epoch": 85.0,
"learning_rate": 8.333333333333334e-06,
"loss": 0.4697,
"step": 1190
},
{
"epoch": 85.0,
"eval_accuracy": 0.7818181818181819,
"eval_loss": 0.611347496509552,
"eval_runtime": 3.4099,
"eval_samples_per_second": 129.037,
"eval_steps_per_second": 2.053,
"step": 1190
},
{
"epoch": 85.71,
"learning_rate": 7.936507936507936e-06,
"loss": 0.4359,
"step": 1200
},
{
"epoch": 86.0,
"eval_accuracy": 0.7659090909090909,
"eval_loss": 0.637219250202179,
"eval_runtime": 3.369,
"eval_samples_per_second": 130.603,
"eval_steps_per_second": 2.078,
"step": 1204
},
{
"epoch": 86.43,
"learning_rate": 7.5396825396825394e-06,
"loss": 0.4876,
"step": 1210
},
{
"epoch": 87.0,
"eval_accuracy": 0.759090909090909,
"eval_loss": 0.6338838338851929,
"eval_runtime": 3.4298,
"eval_samples_per_second": 128.286,
"eval_steps_per_second": 2.041,
"step": 1218
},
{
"epoch": 87.14,
"learning_rate": 7.142857142857143e-06,
"loss": 0.4816,
"step": 1220
},
{
"epoch": 87.86,
"learning_rate": 6.746031746031746e-06,
"loss": 0.4574,
"step": 1230
},
{
"epoch": 88.0,
"eval_accuracy": 0.7545454545454545,
"eval_loss": 0.6318604946136475,
"eval_runtime": 3.5206,
"eval_samples_per_second": 124.979,
"eval_steps_per_second": 1.988,
"step": 1232
},
{
"epoch": 88.57,
"learning_rate": 6.349206349206349e-06,
"loss": 0.4633,
"step": 1240
},
{
"epoch": 89.0,
"eval_accuracy": 0.7727272727272727,
"eval_loss": 0.6072612404823303,
"eval_runtime": 3.6356,
"eval_samples_per_second": 121.025,
"eval_steps_per_second": 1.925,
"step": 1246
},
{
"epoch": 89.29,
"learning_rate": 5.9523809523809525e-06,
"loss": 0.4459,
"step": 1250
},
{
"epoch": 90.0,
"learning_rate": 5.555555555555556e-06,
"loss": 0.4548,
"step": 1260
},
{
"epoch": 90.0,
"eval_accuracy": 0.7704545454545455,
"eval_loss": 0.6304746866226196,
"eval_runtime": 3.4518,
"eval_samples_per_second": 127.471,
"eval_steps_per_second": 2.028,
"step": 1260
},
{
"epoch": 90.71,
"learning_rate": 5.158730158730159e-06,
"loss": 0.4527,
"step": 1270
},
{
"epoch": 91.0,
"eval_accuracy": 0.7681818181818182,
"eval_loss": 0.6324154734611511,
"eval_runtime": 3.5884,
"eval_samples_per_second": 122.617,
"eval_steps_per_second": 1.951,
"step": 1274
},
{
"epoch": 91.43,
"learning_rate": 4.7619047619047615e-06,
"loss": 0.4311,
"step": 1280
},
{
"epoch": 92.0,
"eval_accuracy": 0.7795454545454545,
"eval_loss": 0.6228455901145935,
"eval_runtime": 3.543,
"eval_samples_per_second": 124.187,
"eval_steps_per_second": 1.976,
"step": 1288
},
{
"epoch": 92.14,
"learning_rate": 4.365079365079365e-06,
"loss": 0.4641,
"step": 1290
},
{
"epoch": 92.86,
"learning_rate": 3.968253968253968e-06,
"loss": 0.4499,
"step": 1300
},
{
"epoch": 93.0,
"eval_accuracy": 0.7636363636363637,
"eval_loss": 0.6188679337501526,
"eval_runtime": 3.6596,
"eval_samples_per_second": 120.231,
"eval_steps_per_second": 1.913,
"step": 1302
},
{
"epoch": 93.57,
"learning_rate": 3.5714285714285714e-06,
"loss": 0.4333,
"step": 1310
},
{
"epoch": 94.0,
"eval_accuracy": 0.7727272727272727,
"eval_loss": 0.6302651166915894,
"eval_runtime": 3.5367,
"eval_samples_per_second": 124.41,
"eval_steps_per_second": 1.979,
"step": 1316
},
{
"epoch": 94.29,
"learning_rate": 3.1746031746031746e-06,
"loss": 0.4647,
"step": 1320
},
{
"epoch": 95.0,
"learning_rate": 2.777777777777778e-06,
"loss": 0.4526,
"step": 1330
},
{
"epoch": 95.0,
"eval_accuracy": 0.7704545454545455,
"eval_loss": 0.6163169145584106,
"eval_runtime": 3.5271,
"eval_samples_per_second": 124.747,
"eval_steps_per_second": 1.985,
"step": 1330
},
{
"epoch": 95.71,
"learning_rate": 2.3809523809523808e-06,
"loss": 0.4709,
"step": 1340
},
{
"epoch": 96.0,
"eval_accuracy": 0.7659090909090909,
"eval_loss": 0.618194580078125,
"eval_runtime": 3.4856,
"eval_samples_per_second": 126.234,
"eval_steps_per_second": 2.008,
"step": 1344
},
{
"epoch": 96.43,
"learning_rate": 1.984126984126984e-06,
"loss": 0.4451,
"step": 1350
},
{
"epoch": 97.0,
"eval_accuracy": 0.7818181818181819,
"eval_loss": 0.6048444509506226,
"eval_runtime": 3.4925,
"eval_samples_per_second": 125.984,
"eval_steps_per_second": 2.004,
"step": 1358
},
{
"epoch": 97.14,
"learning_rate": 1.5873015873015873e-06,
"loss": 0.4617,
"step": 1360
},
{
"epoch": 97.86,
"learning_rate": 1.1904761904761904e-06,
"loss": 0.4174,
"step": 1370
},
{
"epoch": 98.0,
"eval_accuracy": 0.7659090909090909,
"eval_loss": 0.6300343871116638,
"eval_runtime": 3.4255,
"eval_samples_per_second": 128.45,
"eval_steps_per_second": 2.044,
"step": 1372
},
{
"epoch": 98.57,
"learning_rate": 7.936507936507937e-07,
"loss": 0.4832,
"step": 1380
},
{
"epoch": 99.0,
"eval_accuracy": 0.7704545454545455,
"eval_loss": 0.6194254159927368,
"eval_runtime": 3.5142,
"eval_samples_per_second": 125.206,
"eval_steps_per_second": 1.992,
"step": 1386
},
{
"epoch": 99.29,
"learning_rate": 3.9682539682539683e-07,
"loss": 0.4501,
"step": 1390
},
{
"epoch": 100.0,
"learning_rate": 0.0,
"loss": 0.4508,
"step": 1400
},
{
"epoch": 100.0,
"eval_accuracy": 0.7795454545454545,
"eval_loss": 0.6144731640815735,
"eval_runtime": 3.5121,
"eval_samples_per_second": 125.28,
"eval_steps_per_second": 1.993,
"step": 1400
}
],
"max_steps": 1400,
"num_train_epochs": 100,
"total_flos": 7.496586164267827e+18,
"trial_name": null,
"trial_params": null
}