clf-canine-r / trainer_state.json
potamides's picture
upload model files
8aceabe
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"global_step": 176630,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.8307761988337205e-09,
"loss": 0.7015,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 7.0769404970843e-07,
"loss": 0.6985,
"step": 250
},
{
"epoch": 0.03,
"learning_rate": 1.41538809941686e-06,
"loss": 0.6961,
"step": 500
},
{
"epoch": 0.04,
"learning_rate": 2.12308214912529e-06,
"loss": 0.6965,
"step": 750
},
{
"epoch": 0.06,
"learning_rate": 2.83077619883372e-06,
"loss": 0.6961,
"step": 1000
},
{
"epoch": 0.07,
"learning_rate": 3.538470248542151e-06,
"loss": 0.6945,
"step": 1250
},
{
"epoch": 0.08,
"learning_rate": 4.24616429825058e-06,
"loss": 0.6941,
"step": 1500
},
{
"epoch": 0.1,
"learning_rate": 4.953858347959011e-06,
"loss": 0.6953,
"step": 1750
},
{
"epoch": 0.11,
"learning_rate": 5.66155239766744e-06,
"loss": 0.6938,
"step": 2000
},
{
"epoch": 0.13,
"learning_rate": 6.36924644737587e-06,
"loss": 0.6939,
"step": 2250
},
{
"epoch": 0.14,
"learning_rate": 7.076940497084302e-06,
"loss": 0.6943,
"step": 2500
},
{
"epoch": 0.16,
"learning_rate": 7.78463454679273e-06,
"loss": 0.694,
"step": 2750
},
{
"epoch": 0.17,
"learning_rate": 8.49232859650116e-06,
"loss": 0.695,
"step": 3000
},
{
"epoch": 0.18,
"learning_rate": 9.200022646209591e-06,
"loss": 0.6937,
"step": 3250
},
{
"epoch": 0.2,
"learning_rate": 9.907716695918021e-06,
"loss": 0.6944,
"step": 3500
},
{
"epoch": 0.21,
"learning_rate": 1.0615410745626452e-05,
"loss": 0.6954,
"step": 3750
},
{
"epoch": 0.23,
"learning_rate": 1.132310479533488e-05,
"loss": 0.6955,
"step": 4000
},
{
"epoch": 0.24,
"learning_rate": 1.2030798845043312e-05,
"loss": 0.6959,
"step": 4250
},
{
"epoch": 0.25,
"learning_rate": 1.273849289475174e-05,
"loss": 0.6926,
"step": 4500
},
{
"epoch": 0.27,
"learning_rate": 1.3446186944460171e-05,
"loss": 0.6946,
"step": 4750
},
{
"epoch": 0.28,
"learning_rate": 1.4153880994168603e-05,
"loss": 0.6934,
"step": 5000
},
{
"epoch": 0.3,
"learning_rate": 1.4861575043877032e-05,
"loss": 0.6956,
"step": 5250
},
{
"epoch": 0.31,
"learning_rate": 1.556926909358546e-05,
"loss": 0.6936,
"step": 5500
},
{
"epoch": 0.33,
"learning_rate": 1.627696314329389e-05,
"loss": 0.6944,
"step": 5750
},
{
"epoch": 0.34,
"learning_rate": 1.698465719300232e-05,
"loss": 0.6927,
"step": 6000
},
{
"epoch": 0.35,
"learning_rate": 1.769235124271075e-05,
"loss": 0.6959,
"step": 6250
},
{
"epoch": 0.37,
"learning_rate": 1.8400045292419182e-05,
"loss": 0.6954,
"step": 6500
},
{
"epoch": 0.38,
"learning_rate": 1.9107739342127612e-05,
"loss": 0.6931,
"step": 6750
},
{
"epoch": 0.4,
"learning_rate": 1.9815433391836042e-05,
"loss": 0.6948,
"step": 7000
},
{
"epoch": 0.41,
"learning_rate": 2.0523127441544473e-05,
"loss": 0.694,
"step": 7250
},
{
"epoch": 0.42,
"learning_rate": 2.1230821491252903e-05,
"loss": 0.6955,
"step": 7500
},
{
"epoch": 0.44,
"learning_rate": 2.1938515540961333e-05,
"loss": 0.6939,
"step": 7750
},
{
"epoch": 0.45,
"learning_rate": 2.264620959066976e-05,
"loss": 0.6944,
"step": 8000
},
{
"epoch": 0.47,
"learning_rate": 2.3353903640378194e-05,
"loss": 0.695,
"step": 8250
},
{
"epoch": 0.48,
"learning_rate": 2.4061597690086624e-05,
"loss": 0.6968,
"step": 8500
},
{
"epoch": 0.5,
"learning_rate": 2.476929173979505e-05,
"loss": 0.6958,
"step": 8750
},
{
"epoch": 0.51,
"learning_rate": 2.547698578950348e-05,
"loss": 0.6948,
"step": 9000
},
{
"epoch": 0.52,
"learning_rate": 2.6184679839211912e-05,
"loss": 0.6948,
"step": 9250
},
{
"epoch": 0.54,
"learning_rate": 2.6892373888920342e-05,
"loss": 0.6965,
"step": 9500
},
{
"epoch": 0.55,
"learning_rate": 2.760006793862877e-05,
"loss": 0.6935,
"step": 9750
},
{
"epoch": 0.57,
"learning_rate": 2.8307761988337206e-05,
"loss": 0.6949,
"step": 10000
},
{
"epoch": 0.58,
"learning_rate": 2.9015456038045637e-05,
"loss": 0.6949,
"step": 10250
},
{
"epoch": 0.59,
"learning_rate": 2.9723150087754064e-05,
"loss": 0.6946,
"step": 10500
},
{
"epoch": 0.61,
"learning_rate": 3.0430844137462494e-05,
"loss": 0.6944,
"step": 10750
},
{
"epoch": 0.62,
"learning_rate": 3.113853818717092e-05,
"loss": 0.694,
"step": 11000
},
{
"epoch": 0.64,
"learning_rate": 3.184623223687935e-05,
"loss": 0.6934,
"step": 11250
},
{
"epoch": 0.65,
"learning_rate": 3.255109551038895e-05,
"loss": 0.6955,
"step": 11500
},
{
"epoch": 0.67,
"learning_rate": 3.325878956009738e-05,
"loss": 0.6953,
"step": 11750
},
{
"epoch": 0.68,
"learning_rate": 3.396648360980581e-05,
"loss": 0.6957,
"step": 12000
},
{
"epoch": 0.69,
"learning_rate": 3.4674177659514237e-05,
"loss": 0.6939,
"step": 12250
},
{
"epoch": 0.71,
"learning_rate": 3.538187170922267e-05,
"loss": 0.6956,
"step": 12500
},
{
"epoch": 0.72,
"learning_rate": 3.60895657589311e-05,
"loss": 0.6948,
"step": 12750
},
{
"epoch": 0.74,
"learning_rate": 3.679725980863953e-05,
"loss": 0.6936,
"step": 13000
},
{
"epoch": 0.75,
"learning_rate": 3.7504953858347965e-05,
"loss": 0.6949,
"step": 13250
},
{
"epoch": 0.76,
"learning_rate": 3.820981713185756e-05,
"loss": 0.6941,
"step": 13500
},
{
"epoch": 0.78,
"learning_rate": 3.891751118156598e-05,
"loss": 0.6941,
"step": 13750
},
{
"epoch": 0.79,
"learning_rate": 3.962520523127441e-05,
"loss": 0.695,
"step": 14000
},
{
"epoch": 0.81,
"learning_rate": 4.033289928098285e-05,
"loss": 0.6957,
"step": 14250
},
{
"epoch": 0.82,
"learning_rate": 4.104059333069128e-05,
"loss": 0.6949,
"step": 14500
},
{
"epoch": 0.84,
"learning_rate": 4.174828738039971e-05,
"loss": 0.6945,
"step": 14750
},
{
"epoch": 0.85,
"learning_rate": 4.245598143010814e-05,
"loss": 0.6943,
"step": 15000
},
{
"epoch": 0.86,
"learning_rate": 4.3163675479816565e-05,
"loss": 0.6952,
"step": 15250
},
{
"epoch": 0.88,
"learning_rate": 4.3871369529524995e-05,
"loss": 0.6939,
"step": 15500
},
{
"epoch": 0.89,
"learning_rate": 4.4579063579233425e-05,
"loss": 0.6952,
"step": 15750
},
{
"epoch": 0.91,
"learning_rate": 4.5283926852743026e-05,
"loss": 0.6946,
"step": 16000
},
{
"epoch": 0.92,
"learning_rate": 4.599162090245146e-05,
"loss": 0.6939,
"step": 16250
},
{
"epoch": 0.93,
"learning_rate": 4.669931495215989e-05,
"loss": 0.695,
"step": 16500
},
{
"epoch": 0.95,
"learning_rate": 4.740700900186831e-05,
"loss": 0.6945,
"step": 16750
},
{
"epoch": 0.96,
"learning_rate": 4.811470305157674e-05,
"loss": 0.6938,
"step": 17000
},
{
"epoch": 0.98,
"learning_rate": 4.882239710128517e-05,
"loss": 0.6946,
"step": 17250
},
{
"epoch": 0.99,
"learning_rate": 4.953009115099361e-05,
"loss": 0.6943,
"step": 17500
},
{
"epoch": 1.0,
"eval_accuracy": 0.5002547770700637,
"eval_f1": 0.33344654835696697,
"eval_loss": 0.6931106448173523,
"eval_precision": 0.25012738853503186,
"eval_recall": 0.5,
"eval_runtime": 12.7276,
"eval_samples_per_second": 616.771,
"eval_steps_per_second": 77.155,
"step": 17663
},
{
"epoch": 1.0,
"learning_rate": 4.999996555277244e-05,
"loss": 0.6939,
"step": 17750
},
{
"epoch": 1.02,
"learning_rate": 4.999945538806009e-05,
"loss": 0.6931,
"step": 18000
},
{
"epoch": 1.03,
"learning_rate": 4.999834068779779e-05,
"loss": 0.6939,
"step": 18250
},
{
"epoch": 1.05,
"learning_rate": 4.9996612525797716e-05,
"loss": 0.6938,
"step": 18500
},
{
"epoch": 1.06,
"learning_rate": 4.999427420049964e-05,
"loss": 0.6948,
"step": 18750
},
{
"epoch": 1.08,
"learning_rate": 4.999132576898172e-05,
"loss": 0.6935,
"step": 19000
},
{
"epoch": 1.09,
"learning_rate": 4.998776730321471e-05,
"loss": 0.6945,
"step": 19250
},
{
"epoch": 1.1,
"learning_rate": 4.9983598890060156e-05,
"loss": 0.6949,
"step": 19500
},
{
"epoch": 1.12,
"learning_rate": 4.997882063126838e-05,
"loss": 0.6947,
"step": 19750
},
{
"epoch": 1.13,
"learning_rate": 4.997343264347589e-05,
"loss": 0.6935,
"step": 20000
},
{
"epoch": 1.15,
"learning_rate": 4.996743505820262e-05,
"loss": 0.6941,
"step": 20250
},
{
"epoch": 1.16,
"learning_rate": 4.996082802184866e-05,
"loss": 0.6947,
"step": 20500
},
{
"epoch": 1.17,
"learning_rate": 4.9953641774472115e-05,
"loss": 0.6937,
"step": 20750
},
{
"epoch": 1.19,
"learning_rate": 4.9945818770743544e-05,
"loss": 0.6958,
"step": 21000
},
{
"epoch": 1.2,
"learning_rate": 4.99373868435844e-05,
"loss": 0.6946,
"step": 21250
},
{
"epoch": 1.22,
"learning_rate": 4.992834619881665e-05,
"loss": 0.6937,
"step": 21500
},
{
"epoch": 1.23,
"learning_rate": 4.991869705712099e-05,
"loss": 0.6952,
"step": 21750
},
{
"epoch": 1.25,
"learning_rate": 4.990843965403141e-05,
"loss": 0.6938,
"step": 22000
},
{
"epoch": 1.26,
"learning_rate": 4.989757423992949e-05,
"loss": 0.6927,
"step": 22250
},
{
"epoch": 1.27,
"learning_rate": 4.9886101080038236e-05,
"loss": 0.6954,
"step": 22500
},
{
"epoch": 1.29,
"learning_rate": 4.987402045441564e-05,
"loss": 0.6933,
"step": 22750
},
{
"epoch": 1.3,
"learning_rate": 4.986138461821248e-05,
"loss": 0.6934,
"step": 23000
},
{
"epoch": 1.32,
"learning_rate": 4.9848092387414506e-05,
"loss": 0.6949,
"step": 23250
},
{
"epoch": 1.33,
"learning_rate": 4.983419361867128e-05,
"loss": 0.6947,
"step": 23500
},
{
"epoch": 1.34,
"learning_rate": 4.9819688651249495e-05,
"loss": 0.6951,
"step": 23750
},
{
"epoch": 1.36,
"learning_rate": 4.9804577839213096e-05,
"loss": 0.694,
"step": 24000
},
{
"epoch": 1.37,
"learning_rate": 4.978886155141456e-05,
"loss": 0.6941,
"step": 24250
},
{
"epoch": 1.39,
"learning_rate": 4.977254017148597e-05,
"loss": 0.6942,
"step": 24500
},
{
"epoch": 1.4,
"learning_rate": 4.9755614097829575e-05,
"loss": 0.6931,
"step": 24750
},
{
"epoch": 1.42,
"learning_rate": 4.9738083743608114e-05,
"loss": 0.6939,
"step": 25000
},
{
"epoch": 1.43,
"learning_rate": 4.9719949536734725e-05,
"loss": 0.6946,
"step": 25250
},
{
"epoch": 1.44,
"learning_rate": 4.97012880717214e-05,
"loss": 0.695,
"step": 25500
},
{
"epoch": 1.46,
"learning_rate": 4.968194991311238e-05,
"loss": 0.6935,
"step": 25750
},
{
"epoch": 1.47,
"learning_rate": 4.9662009272069223e-05,
"loss": 0.6946,
"step": 26000
},
{
"epoch": 1.49,
"learning_rate": 4.964146663533976e-05,
"loss": 0.6943,
"step": 26250
},
{
"epoch": 1.5,
"learning_rate": 4.962032250436647e-05,
"loss": 0.6942,
"step": 26500
},
{
"epoch": 1.51,
"learning_rate": 4.959857739527419e-05,
"loss": 0.6938,
"step": 26750
},
{
"epoch": 1.53,
"learning_rate": 4.957623183885755e-05,
"loss": 0.6942,
"step": 27000
},
{
"epoch": 1.54,
"learning_rate": 4.9553286380567973e-05,
"loss": 0.6947,
"step": 27250
},
{
"epoch": 1.56,
"learning_rate": 4.952983695283269e-05,
"loss": 0.6946,
"step": 27500
},
{
"epoch": 1.57,
"learning_rate": 4.9505695779615944e-05,
"loss": 0.6945,
"step": 27750
},
{
"epoch": 1.59,
"learning_rate": 4.9480956426300165e-05,
"loss": 0.6937,
"step": 28000
},
{
"epoch": 1.6,
"learning_rate": 4.945561949676898e-05,
"loss": 0.6935,
"step": 28250
},
{
"epoch": 1.61,
"learning_rate": 4.9429685609492773e-05,
"loss": 0.6943,
"step": 28500
},
{
"epoch": 1.63,
"learning_rate": 4.940315539751357e-05,
"loss": 0.6942,
"step": 28750
},
{
"epoch": 1.64,
"learning_rate": 4.9376029508429584e-05,
"loss": 0.6938,
"step": 29000
},
{
"epoch": 1.66,
"learning_rate": 4.934830860437941e-05,
"loss": 0.6941,
"step": 29250
},
{
"epoch": 1.67,
"learning_rate": 4.932010780600598e-05,
"loss": 0.6943,
"step": 29500
},
{
"epoch": 1.68,
"learning_rate": 4.92912012897121e-05,
"loss": 0.694,
"step": 29750
},
{
"epoch": 1.7,
"learning_rate": 4.926170182909523e-05,
"loss": 0.6943,
"step": 30000
},
{
"epoch": 1.71,
"learning_rate": 4.923161014423248e-05,
"loss": 0.6941,
"step": 30250
},
{
"epoch": 1.73,
"learning_rate": 4.920092696965702e-05,
"loss": 0.6947,
"step": 30500
},
{
"epoch": 1.74,
"learning_rate": 4.916965305434024e-05,
"loss": 0.6942,
"step": 30750
},
{
"epoch": 1.76,
"learning_rate": 4.913778916167339e-05,
"loss": 0.6941,
"step": 31000
},
{
"epoch": 1.77,
"learning_rate": 4.910533606944895e-05,
"loss": 0.6942,
"step": 31250
},
{
"epoch": 1.78,
"learning_rate": 4.907242790688348e-05,
"loss": 0.6942,
"step": 31500
},
{
"epoch": 1.8,
"learning_rate": 4.903880115520887e-05,
"loss": 0.6939,
"step": 31750
},
{
"epoch": 1.81,
"learning_rate": 4.900458762025803e-05,
"loss": 0.6933,
"step": 32000
},
{
"epoch": 1.83,
"learning_rate": 4.8969788137177854e-05,
"loss": 0.6943,
"step": 32250
},
{
"epoch": 1.84,
"learning_rate": 4.893440355541813e-05,
"loss": 0.6938,
"step": 32500
},
{
"epoch": 1.85,
"learning_rate": 4.889843473871082e-05,
"loss": 0.6947,
"step": 32750
},
{
"epoch": 1.87,
"learning_rate": 4.8861882565048975e-05,
"loss": 0.6939,
"step": 33000
},
{
"epoch": 1.88,
"learning_rate": 4.882474792666527e-05,
"loss": 0.694,
"step": 33250
},
{
"epoch": 1.9,
"learning_rate": 4.878703173001027e-05,
"loss": 0.693,
"step": 33500
},
{
"epoch": 1.91,
"learning_rate": 4.87488892384633e-05,
"loss": 0.6939,
"step": 33750
},
{
"epoch": 1.92,
"learning_rate": 4.871001501830819e-05,
"loss": 0.6947,
"step": 34000
},
{
"epoch": 1.94,
"learning_rate": 4.867056204049366e-05,
"loss": 0.6938,
"step": 34250
},
{
"epoch": 1.95,
"learning_rate": 4.863053126806055e-05,
"loss": 0.6932,
"step": 34500
},
{
"epoch": 1.97,
"learning_rate": 4.85899236781536e-05,
"loss": 0.6932,
"step": 34750
},
{
"epoch": 1.98,
"learning_rate": 4.854874026199756e-05,
"loss": 0.694,
"step": 35000
},
{
"epoch": 2.0,
"learning_rate": 4.8506982024873006e-05,
"loss": 0.6941,
"step": 35250
},
{
"epoch": 2.0,
"eval_accuracy": 0.4997452229299363,
"eval_f1": 0.3332200798437102,
"eval_loss": 0.6948533058166504,
"eval_precision": 0.24987261146496814,
"eval_recall": 0.5,
"eval_runtime": 12.6064,
"eval_samples_per_second": 622.697,
"eval_steps_per_second": 77.897,
"step": 35326
},
{
"epoch": 2.01,
"learning_rate": 4.846464998609178e-05,
"loss": 0.6946,
"step": 35500
},
{
"epoch": 2.02,
"learning_rate": 4.842174517897218e-05,
"loss": 0.6937,
"step": 35750
},
{
"epoch": 2.04,
"learning_rate": 4.8378443694392686e-05,
"loss": 0.6938,
"step": 36000
},
{
"epoch": 2.05,
"learning_rate": 4.833439878695692e-05,
"loss": 0.6936,
"step": 36250
},
{
"epoch": 2.07,
"learning_rate": 4.8289784290593636e-05,
"loss": 0.6934,
"step": 36500
},
{
"epoch": 2.08,
"learning_rate": 4.824460129433551e-05,
"loss": 0.6939,
"step": 36750
},
{
"epoch": 2.09,
"learning_rate": 4.819885090109222e-05,
"loss": 0.6936,
"step": 37000
},
{
"epoch": 2.11,
"learning_rate": 4.815253422762353e-05,
"loss": 0.6931,
"step": 37250
},
{
"epoch": 2.12,
"learning_rate": 4.8105652404512013e-05,
"loss": 0.6941,
"step": 37500
},
{
"epoch": 2.14,
"learning_rate": 4.8058206576135415e-05,
"loss": 0.6935,
"step": 37750
},
{
"epoch": 2.15,
"learning_rate": 4.801039105498368e-05,
"loss": 0.694,
"step": 38000
},
{
"epoch": 2.17,
"learning_rate": 4.796182294859949e-05,
"loss": 0.6936,
"step": 38250
},
{
"epoch": 2.18,
"learning_rate": 4.791269434780415e-05,
"loss": 0.6933,
"step": 38500
},
{
"epoch": 2.19,
"learning_rate": 4.7863006451818936e-05,
"loss": 0.6936,
"step": 38750
},
{
"epoch": 2.21,
"learning_rate": 4.781276047351739e-05,
"loss": 0.6932,
"step": 39000
},
{
"epoch": 2.22,
"learning_rate": 4.7761957639395794e-05,
"loss": 0.6943,
"step": 39250
},
{
"epoch": 2.24,
"learning_rate": 4.771059918954316e-05,
"loss": 0.6933,
"step": 39500
},
{
"epoch": 2.25,
"learning_rate": 4.7658686377610994e-05,
"loss": 0.6943,
"step": 39750
},
{
"epoch": 2.26,
"learning_rate": 4.7606220470782714e-05,
"loss": 0.6942,
"step": 40000
},
{
"epoch": 2.28,
"learning_rate": 4.755341591813002e-05,
"loss": 0.6933,
"step": 40250
},
{
"epoch": 2.29,
"learning_rate": 4.749984987651673e-05,
"loss": 0.6943,
"step": 40500
},
{
"epoch": 2.31,
"learning_rate": 4.744573461718082e-05,
"loss": 0.6934,
"step": 40750
},
{
"epoch": 2.32,
"learning_rate": 4.739107146106705e-05,
"loss": 0.6933,
"step": 41000
},
{
"epoch": 2.34,
"learning_rate": 4.733586174249431e-05,
"loss": 0.6938,
"step": 41250
},
{
"epoch": 2.35,
"learning_rate": 4.728010680912296e-05,
"loss": 0.6937,
"step": 41500
},
{
"epoch": 2.36,
"learning_rate": 4.722380802192197e-05,
"loss": 0.6935,
"step": 41750
},
{
"epoch": 2.38,
"learning_rate": 4.716696675513569e-05,
"loss": 0.694,
"step": 42000
},
{
"epoch": 2.39,
"learning_rate": 4.710958439625032e-05,
"loss": 0.6945,
"step": 42250
},
{
"epoch": 2.41,
"learning_rate": 4.7051895107356976e-05,
"loss": 0.694,
"step": 42500
},
{
"epoch": 2.42,
"learning_rate": 4.6993436929806e-05,
"loss": 0.694,
"step": 42750
},
{
"epoch": 2.43,
"learning_rate": 4.69344418959911e-05,
"loss": 0.6934,
"step": 43000
},
{
"epoch": 2.45,
"learning_rate": 4.687491144597158e-05,
"loss": 0.6942,
"step": 43250
},
{
"epoch": 2.46,
"learning_rate": 4.6814847032876174e-05,
"loss": 0.6937,
"step": 43500
},
{
"epoch": 2.48,
"learning_rate": 4.675425012286756e-05,
"loss": 0.6932,
"step": 43750
},
{
"epoch": 2.49,
"learning_rate": 4.6693122195106574e-05,
"loss": 0.6942,
"step": 44000
},
{
"epoch": 2.51,
"learning_rate": 4.663146474171613e-05,
"loss": 0.6933,
"step": 44250
},
{
"epoch": 2.52,
"learning_rate": 4.656952905945011e-05,
"loss": 0.694,
"step": 44500
},
{
"epoch": 2.53,
"learning_rate": 4.650681918580503e-05,
"loss": 0.6939,
"step": 44750
},
{
"epoch": 2.55,
"learning_rate": 4.644358433415702e-05,
"loss": 0.6935,
"step": 45000
},
{
"epoch": 2.56,
"learning_rate": 4.6379826048058654e-05,
"loss": 0.6942,
"step": 45250
},
{
"epoch": 2.58,
"learning_rate": 4.6315545883839474e-05,
"loss": 0.6939,
"step": 45500
},
{
"epoch": 2.59,
"learning_rate": 4.625074541056795e-05,
"loss": 0.693,
"step": 45750
},
{
"epoch": 2.6,
"learning_rate": 4.618542621001324e-05,
"loss": 0.6941,
"step": 46000
},
{
"epoch": 2.62,
"learning_rate": 4.611958987660653e-05,
"loss": 0.6938,
"step": 46250
},
{
"epoch": 2.63,
"learning_rate": 4.6053238017402155e-05,
"loss": 0.6932,
"step": 46500
},
{
"epoch": 2.65,
"learning_rate": 4.598664073664183e-05,
"loss": 0.6938,
"step": 46750
},
{
"epoch": 2.66,
"learning_rate": 4.591926474312919e-05,
"loss": 0.694,
"step": 47000
},
{
"epoch": 2.68,
"learning_rate": 4.585137811372323e-05,
"loss": 0.6938,
"step": 47250
},
{
"epoch": 2.69,
"learning_rate": 4.5782982505525664e-05,
"loss": 0.6936,
"step": 47500
},
{
"epoch": 2.7,
"learning_rate": 4.5714079588062306e-05,
"loss": 0.6938,
"step": 47750
},
{
"epoch": 2.72,
"learning_rate": 4.5644671043242314e-05,
"loss": 0.6938,
"step": 48000
},
{
"epoch": 2.73,
"learning_rate": 4.557475856531712e-05,
"loss": 0.6943,
"step": 48250
},
{
"epoch": 2.75,
"learning_rate": 4.550434386083909e-05,
"loss": 0.693,
"step": 48500
},
{
"epoch": 2.76,
"learning_rate": 4.543371330418998e-05,
"loss": 0.6936,
"step": 48750
},
{
"epoch": 2.77,
"learning_rate": 4.5362301306900956e-05,
"loss": 0.6924,
"step": 49000
},
{
"epoch": 2.79,
"learning_rate": 4.5290392269106644e-05,
"loss": 0.6936,
"step": 49250
},
{
"epoch": 2.8,
"learning_rate": 4.521798794609512e-05,
"loss": 0.6941,
"step": 49500
},
{
"epoch": 2.82,
"learning_rate": 4.514509010524428e-05,
"loss": 0.693,
"step": 49750
},
{
"epoch": 2.83,
"learning_rate": 4.507170052597872e-05,
"loss": 0.6943,
"step": 50000
},
{
"epoch": 2.84,
"learning_rate": 4.499782099972628e-05,
"loss": 0.6936,
"step": 50250
},
{
"epoch": 2.86,
"learning_rate": 4.4923453329874334e-05,
"loss": 0.6938,
"step": 50500
},
{
"epoch": 2.87,
"learning_rate": 4.484859933172575e-05,
"loss": 0.694,
"step": 50750
},
{
"epoch": 2.89,
"learning_rate": 4.477356314914456e-05,
"loss": 0.6938,
"step": 51000
},
{
"epoch": 2.9,
"learning_rate": 4.469774391472113e-05,
"loss": 0.6933,
"step": 51250
},
{
"epoch": 2.92,
"learning_rate": 4.4621443861531634e-05,
"loss": 0.6936,
"step": 51500
},
{
"epoch": 2.93,
"learning_rate": 4.4544664852048143e-05,
"loss": 0.6932,
"step": 51750
},
{
"epoch": 2.94,
"learning_rate": 4.446740876043395e-05,
"loss": 0.693,
"step": 52000
},
{
"epoch": 2.96,
"learning_rate": 4.438967747249785e-05,
"loss": 0.6944,
"step": 52250
},
{
"epoch": 2.97,
"learning_rate": 4.4311472885648076e-05,
"loss": 0.694,
"step": 52500
},
{
"epoch": 2.99,
"learning_rate": 4.423279690884606e-05,
"loss": 0.6931,
"step": 52750
},
{
"epoch": 3.0,
"eval_accuracy": 0.4997452229299363,
"eval_f1": 0.3332200798437102,
"eval_loss": 0.6931188702583313,
"eval_precision": 0.24987261146496814,
"eval_recall": 0.5,
"eval_runtime": 12.4472,
"eval_samples_per_second": 630.665,
"eval_steps_per_second": 78.893,
"step": 52989
},
{
"epoch": 3.0,
"learning_rate": 4.4153651462559756e-05,
"loss": 0.6939,
"step": 53000
},
{
"epoch": 3.01,
"learning_rate": 4.407403847871679e-05,
"loss": 0.6935,
"step": 53250
},
{
"epoch": 3.03,
"learning_rate": 4.399395990065732e-05,
"loss": 0.693,
"step": 53500
},
{
"epoch": 3.04,
"learning_rate": 4.3913740772924885e-05,
"loss": 0.6932,
"step": 53750
},
{
"epoch": 3.06,
"learning_rate": 4.383273872462815e-05,
"loss": 0.6931,
"step": 54000
},
{
"epoch": 3.07,
"learning_rate": 4.3751276972203183e-05,
"loss": 0.6941,
"step": 54250
},
{
"epoch": 3.09,
"learning_rate": 4.366935750411825e-05,
"loss": 0.6938,
"step": 54500
},
{
"epoch": 3.1,
"learning_rate": 4.3586982320014426e-05,
"loss": 0.6942,
"step": 54750
},
{
"epoch": 3.11,
"learning_rate": 4.3504153430656725e-05,
"loss": 0.694,
"step": 55000
},
{
"epoch": 3.13,
"learning_rate": 4.3420872857885045e-05,
"loss": 0.693,
"step": 55250
},
{
"epoch": 3.14,
"learning_rate": 4.333714263456478e-05,
"loss": 0.6935,
"step": 55500
},
{
"epoch": 3.16,
"learning_rate": 4.325330240477026e-05,
"loss": 0.693,
"step": 55750
},
{
"epoch": 3.17,
"learning_rate": 4.3168680800902995e-05,
"loss": 0.6936,
"step": 56000
},
{
"epoch": 3.18,
"learning_rate": 4.308361570245477e-05,
"loss": 0.6926,
"step": 56250
},
{
"epoch": 3.2,
"learning_rate": 4.299810918585097e-05,
"loss": 0.6945,
"step": 56500
},
{
"epoch": 3.21,
"learning_rate": 4.2912163338291946e-05,
"loss": 0.6929,
"step": 56750
},
{
"epoch": 3.23,
"learning_rate": 4.2825780257702033e-05,
"loss": 0.6947,
"step": 57000
},
{
"epoch": 3.24,
"learning_rate": 4.2738962052678356e-05,
"loss": 0.6936,
"step": 57250
},
{
"epoch": 3.26,
"learning_rate": 4.265171084243936e-05,
"loss": 0.6934,
"step": 57500
},
{
"epoch": 3.27,
"learning_rate": 4.256402875677308e-05,
"loss": 0.694,
"step": 57750
},
{
"epoch": 3.28,
"learning_rate": 4.247591793598513e-05,
"loss": 0.6935,
"step": 58000
},
{
"epoch": 3.3,
"learning_rate": 4.238773552736224e-05,
"loss": 0.6933,
"step": 58250
},
{
"epoch": 3.31,
"learning_rate": 4.2298775392430656e-05,
"loss": 0.6934,
"step": 58500
},
{
"epoch": 3.33,
"learning_rate": 4.220939299716943e-05,
"loss": 0.6939,
"step": 58750
},
{
"epoch": 3.34,
"learning_rate": 4.211959052338851e-05,
"loss": 0.6933,
"step": 59000
},
{
"epoch": 3.35,
"learning_rate": 4.202937016315183e-05,
"loss": 0.6934,
"step": 59250
},
{
"epoch": 3.37,
"learning_rate": 4.1938734118723924e-05,
"loss": 0.6935,
"step": 59500
},
{
"epoch": 3.38,
"learning_rate": 4.184768460251607e-05,
"loss": 0.6932,
"step": 59750
},
{
"epoch": 3.4,
"learning_rate": 4.1756223837032336e-05,
"loss": 0.6939,
"step": 60000
},
{
"epoch": 3.41,
"learning_rate": 4.166435405481533e-05,
"loss": 0.6935,
"step": 60250
},
{
"epoch": 3.43,
"learning_rate": 4.1572077498391684e-05,
"loss": 0.6938,
"step": 60500
},
{
"epoch": 3.44,
"learning_rate": 4.147976794734224e-05,
"loss": 0.6936,
"step": 60750
},
{
"epoch": 3.45,
"learning_rate": 4.138668621426548e-05,
"loss": 0.6937,
"step": 61000
},
{
"epoch": 3.47,
"learning_rate": 4.129320448480947e-05,
"loss": 0.6935,
"step": 61250
},
{
"epoch": 3.48,
"learning_rate": 4.1199325040848224e-05,
"loss": 0.6932,
"step": 61500
},
{
"epoch": 3.5,
"learning_rate": 4.11050501739639e-05,
"loss": 0.6934,
"step": 61750
},
{
"epoch": 3.51,
"learning_rate": 4.101038218539085e-05,
"loss": 0.6934,
"step": 62000
},
{
"epoch": 3.52,
"learning_rate": 4.091532338595949e-05,
"loss": 0.6933,
"step": 62250
},
{
"epoch": 3.54,
"learning_rate": 4.081987609603982e-05,
"loss": 0.6933,
"step": 62500
},
{
"epoch": 3.55,
"learning_rate": 4.072404264548489e-05,
"loss": 0.694,
"step": 62750
},
{
"epoch": 3.57,
"learning_rate": 4.0628211004123955e-05,
"loss": 0.6937,
"step": 63000
},
{
"epoch": 3.58,
"learning_rate": 4.053161378070489e-05,
"loss": 0.6933,
"step": 63250
},
{
"epoch": 3.6,
"learning_rate": 4.043463743308738e-05,
"loss": 0.6936,
"step": 63500
},
{
"epoch": 3.61,
"learning_rate": 4.03372843284485e-05,
"loss": 0.6938,
"step": 63750
},
{
"epoch": 3.62,
"learning_rate": 4.023955684316192e-05,
"loss": 0.6936,
"step": 64000
},
{
"epoch": 3.64,
"learning_rate": 4.014145736273984e-05,
"loss": 0.6935,
"step": 64250
},
{
"epoch": 3.65,
"learning_rate": 4.004298828177483e-05,
"loss": 0.6931,
"step": 64500
},
{
"epoch": 3.67,
"learning_rate": 3.9944152003881354e-05,
"loss": 0.6921,
"step": 64750
},
{
"epoch": 3.68,
"learning_rate": 3.984534846933014e-05,
"loss": 0.6939,
"step": 65000
},
{
"epoch": 3.69,
"learning_rate": 3.974578648883321e-05,
"loss": 0.6935,
"step": 65250
},
{
"epoch": 3.71,
"learning_rate": 3.9645864566055856e-05,
"loss": 0.6934,
"step": 65500
},
{
"epoch": 3.72,
"learning_rate": 3.954558514007616e-05,
"loss": 0.6936,
"step": 65750
},
{
"epoch": 3.74,
"learning_rate": 3.944495065869881e-05,
"loss": 0.6933,
"step": 66000
},
{
"epoch": 3.75,
"learning_rate": 3.934396357839535e-05,
"loss": 0.6937,
"step": 66250
},
{
"epoch": 3.76,
"learning_rate": 3.9242626364244185e-05,
"loss": 0.6936,
"step": 66500
},
{
"epoch": 3.78,
"learning_rate": 3.914094148987043e-05,
"loss": 0.6931,
"step": 66750
},
{
"epoch": 3.79,
"learning_rate": 3.9039320241892266e-05,
"loss": 0.6938,
"step": 67000
},
{
"epoch": 3.81,
"learning_rate": 3.89369488676115e-05,
"loss": 0.6936,
"step": 67250
},
{
"epoch": 3.82,
"learning_rate": 3.883423729464678e-05,
"loss": 0.6926,
"step": 67500
},
{
"epoch": 3.84,
"learning_rate": 3.8731188030171076e-05,
"loss": 0.6938,
"step": 67750
},
{
"epoch": 3.85,
"learning_rate": 3.862780358960041e-05,
"loss": 0.6929,
"step": 68000
},
{
"epoch": 3.86,
"learning_rate": 3.8524086496532375e-05,
"loss": 0.6937,
"step": 68250
},
{
"epoch": 3.88,
"learning_rate": 3.842003928268455e-05,
"loss": 0.6935,
"step": 68500
},
{
"epoch": 3.89,
"learning_rate": 3.831566448783271e-05,
"loss": 0.6928,
"step": 68750
},
{
"epoch": 3.91,
"learning_rate": 3.8210964659748866e-05,
"loss": 0.694,
"step": 69000
},
{
"epoch": 3.92,
"learning_rate": 3.810636308234164e-05,
"loss": 0.6934,
"step": 69250
},
{
"epoch": 3.93,
"learning_rate": 3.800102213732213e-05,
"loss": 0.6937,
"step": 69500
},
{
"epoch": 3.95,
"learning_rate": 3.789536383943985e-05,
"loss": 0.6936,
"step": 69750
},
{
"epoch": 3.96,
"learning_rate": 3.778939076779688e-05,
"loss": 0.6934,
"step": 70000
},
{
"epoch": 3.98,
"learning_rate": 3.768310550917889e-05,
"loss": 0.6936,
"step": 70250
},
{
"epoch": 3.99,
"learning_rate": 3.757651065799198e-05,
"loss": 0.6936,
"step": 70500
},
{
"epoch": 4.0,
"eval_accuracy": 0.5002547770700637,
"eval_f1": 0.33344654835696697,
"eval_loss": 0.6933583617210388,
"eval_precision": 0.25012738853503186,
"eval_recall": 0.5,
"eval_runtime": 12.4824,
"eval_samples_per_second": 628.887,
"eval_steps_per_second": 78.671,
"step": 70652
},
{
"epoch": 4.01,
"learning_rate": 3.7469608816199355e-05,
"loss": 0.6932,
"step": 70750
},
{
"epoch": 4.02,
"learning_rate": 3.7362402593257796e-05,
"loss": 0.6939,
"step": 71000
},
{
"epoch": 4.03,
"learning_rate": 3.725489460605399e-05,
"loss": 0.6935,
"step": 71250
},
{
"epoch": 4.05,
"learning_rate": 3.7147087478840654e-05,
"loss": 0.693,
"step": 71500
},
{
"epoch": 4.06,
"learning_rate": 3.7039416844865045e-05,
"loss": 0.694,
"step": 71750
},
{
"epoch": 4.08,
"learning_rate": 3.693102050974699e-05,
"loss": 0.6933,
"step": 72000
},
{
"epoch": 4.09,
"learning_rate": 3.682233294033409e-05,
"loss": 0.6933,
"step": 72250
},
{
"epoch": 4.1,
"learning_rate": 3.671335678967246e-05,
"loss": 0.6933,
"step": 72500
},
{
"epoch": 4.12,
"learning_rate": 3.6604094717852435e-05,
"loss": 0.6935,
"step": 72750
},
{
"epoch": 4.13,
"learning_rate": 3.649454939194364e-05,
"loss": 0.6932,
"step": 73000
},
{
"epoch": 4.15,
"learning_rate": 3.638472348592989e-05,
"loss": 0.6934,
"step": 73250
},
{
"epoch": 4.16,
"learning_rate": 3.627461968064393e-05,
"loss": 0.6935,
"step": 73500
},
{
"epoch": 4.18,
"learning_rate": 3.616424066370194e-05,
"loss": 0.6929,
"step": 73750
},
{
"epoch": 4.19,
"learning_rate": 3.605358912943803e-05,
"loss": 0.6929,
"step": 74000
},
{
"epoch": 4.2,
"learning_rate": 3.594311199812862e-05,
"loss": 0.6937,
"step": 74250
},
{
"epoch": 4.22,
"learning_rate": 3.5831924601798475e-05,
"loss": 0.6934,
"step": 74500
},
{
"epoch": 4.23,
"learning_rate": 3.5720472799928105e-05,
"loss": 0.6937,
"step": 74750
},
{
"epoch": 4.25,
"learning_rate": 3.560875931303811e-05,
"loss": 0.6934,
"step": 75000
},
{
"epoch": 4.26,
"learning_rate": 3.549678686803674e-05,
"loss": 0.6936,
"step": 75250
},
{
"epoch": 4.27,
"learning_rate": 3.5384558198153416e-05,
"loss": 0.694,
"step": 75500
},
{
"epoch": 4.29,
"learning_rate": 3.527207604287196e-05,
"loss": 0.6934,
"step": 75750
},
{
"epoch": 4.3,
"learning_rate": 3.51593431478637e-05,
"loss": 0.693,
"step": 76000
},
{
"epoch": 4.32,
"learning_rate": 3.504681467879116e-05,
"loss": 0.6931,
"step": 76250
},
{
"epoch": 4.33,
"learning_rate": 3.4933589541177024e-05,
"loss": 0.6936,
"step": 76500
},
{
"epoch": 4.35,
"learning_rate": 3.4820121926237256e-05,
"loss": 0.6934,
"step": 76750
},
{
"epoch": 4.36,
"learning_rate": 3.47064146036981e-05,
"loss": 0.6937,
"step": 77000
},
{
"epoch": 4.37,
"learning_rate": 3.4592470349137056e-05,
"loss": 0.6934,
"step": 77250
},
{
"epoch": 4.39,
"learning_rate": 3.447829194391507e-05,
"loss": 0.6932,
"step": 77500
},
{
"epoch": 4.4,
"learning_rate": 3.436388217510869e-05,
"loss": 0.6937,
"step": 77750
},
{
"epoch": 4.42,
"learning_rate": 3.4249243835441994e-05,
"loss": 0.6928,
"step": 78000
},
{
"epoch": 4.43,
"learning_rate": 3.413437972321844e-05,
"loss": 0.692,
"step": 78250
},
{
"epoch": 4.44,
"learning_rate": 3.401975343100874e-05,
"loss": 0.694,
"step": 78500
},
{
"epoch": 4.46,
"learning_rate": 3.390444706559248e-05,
"loss": 0.693,
"step": 78750
},
{
"epoch": 4.47,
"learning_rate": 3.3788923344053156e-05,
"loss": 0.6936,
"step": 79000
},
{
"epoch": 4.49,
"learning_rate": 3.367318508630627e-05,
"loss": 0.6932,
"step": 79250
},
{
"epoch": 4.5,
"learning_rate": 3.355723511750409e-05,
"loss": 0.6933,
"step": 79500
},
{
"epoch": 4.52,
"learning_rate": 3.3441076267966755e-05,
"loss": 0.6935,
"step": 79750
},
{
"epoch": 4.53,
"learning_rate": 3.3324711373113114e-05,
"loss": 0.6932,
"step": 80000
},
{
"epoch": 4.54,
"learning_rate": 3.320814327339158e-05,
"loss": 0.693,
"step": 80250
},
{
"epoch": 4.56,
"learning_rate": 3.30918422833875e-05,
"loss": 0.6941,
"step": 80500
},
{
"epoch": 4.57,
"learning_rate": 3.2974877099399125e-05,
"loss": 0.6934,
"step": 80750
},
{
"epoch": 4.59,
"learning_rate": 3.28577172499413e-05,
"loss": 0.6932,
"step": 81000
},
{
"epoch": 4.6,
"learning_rate": 3.274036559486714e-05,
"loss": 0.6935,
"step": 81250
},
{
"epoch": 4.61,
"learning_rate": 3.26228249987117e-05,
"loss": 0.6931,
"step": 81500
},
{
"epoch": 4.63,
"learning_rate": 3.250509833062205e-05,
"loss": 0.6929,
"step": 81750
},
{
"epoch": 4.64,
"learning_rate": 3.238718846428726e-05,
"loss": 0.6932,
"step": 82000
},
{
"epoch": 4.66,
"learning_rate": 3.226909827786824e-05,
"loss": 0.6934,
"step": 82250
},
{
"epoch": 4.67,
"learning_rate": 3.2150830653927466e-05,
"loss": 0.6928,
"step": 82500
},
{
"epoch": 4.68,
"learning_rate": 3.203238847935866e-05,
"loss": 0.6936,
"step": 82750
},
{
"epoch": 4.7,
"learning_rate": 3.191424943876217e-05,
"loss": 0.6935,
"step": 83000
},
{
"epoch": 4.71,
"learning_rate": 3.179546750987436e-05,
"loss": 0.6934,
"step": 83250
},
{
"epoch": 4.73,
"learning_rate": 3.1676519704715806e-05,
"loss": 0.6934,
"step": 83500
},
{
"epoch": 4.74,
"learning_rate": 3.155740892678332e-05,
"loss": 0.6934,
"step": 83750
},
{
"epoch": 4.76,
"learning_rate": 3.143813808355187e-05,
"loss": 0.6932,
"step": 84000
},
{
"epoch": 4.77,
"learning_rate": 3.1318710086403584e-05,
"loss": 0.6935,
"step": 84250
},
{
"epoch": 4.78,
"learning_rate": 3.1199127850556704e-05,
"loss": 0.6935,
"step": 84500
},
{
"epoch": 4.8,
"learning_rate": 3.10793942949944e-05,
"loss": 0.6936,
"step": 84750
},
{
"epoch": 4.81,
"learning_rate": 3.095951234239353e-05,
"loss": 0.6932,
"step": 85000
},
{
"epoch": 4.83,
"learning_rate": 3.083948491905332e-05,
"loss": 0.6933,
"step": 85250
},
{
"epoch": 4.84,
"learning_rate": 3.0719314954823894e-05,
"loss": 0.693,
"step": 85500
},
{
"epoch": 4.85,
"learning_rate": 3.059900538303479e-05,
"loss": 0.6931,
"step": 85750
},
{
"epoch": 4.87,
"learning_rate": 3.047855914042333e-05,
"loss": 0.6938,
"step": 86000
},
{
"epoch": 4.88,
"learning_rate": 3.0357979167062957e-05,
"loss": 0.6933,
"step": 86250
},
{
"epoch": 4.9,
"learning_rate": 3.0237751505958946e-05,
"loss": 0.6935,
"step": 86500
},
{
"epoch": 4.91,
"learning_rate": 3.0116913409796476e-05,
"loss": 0.6926,
"step": 86750
},
{
"epoch": 4.93,
"learning_rate": 2.999595041059932e-05,
"loss": 0.6938,
"step": 87000
},
{
"epoch": 4.94,
"learning_rate": 2.9874865461054845e-05,
"loss": 0.6934,
"step": 87250
},
{
"epoch": 4.95,
"learning_rate": 2.9753661516827242e-05,
"loss": 0.6927,
"step": 87500
},
{
"epoch": 4.97,
"learning_rate": 2.9632341536485315e-05,
"loss": 0.6938,
"step": 87750
},
{
"epoch": 4.98,
"learning_rate": 2.9510908481430306e-05,
"loss": 0.6931,
"step": 88000
},
{
"epoch": 5.0,
"learning_rate": 2.938936531582359e-05,
"loss": 0.6934,
"step": 88250
},
{
"epoch": 5.0,
"eval_accuracy": 0.4997452229299363,
"eval_f1": 0.3332200798437102,
"eval_loss": 0.6931193470954895,
"eval_precision": 0.24987261146496814,
"eval_recall": 0.5,
"eval_runtime": 12.6829,
"eval_samples_per_second": 618.944,
"eval_steps_per_second": 77.427,
"step": 88315
},
{
"epoch": 5.01,
"learning_rate": 2.926820181724712e-05,
"loss": 0.6931,
"step": 88500
},
{
"epoch": 5.02,
"learning_rate": 2.914644774447821e-05,
"loss": 0.6929,
"step": 88750
},
{
"epoch": 5.04,
"learning_rate": 2.9024592457585668e-05,
"loss": 0.6936,
"step": 89000
},
{
"epoch": 5.05,
"learning_rate": 2.8902638931037468e-05,
"loss": 0.6931,
"step": 89250
},
{
"epoch": 5.07,
"learning_rate": 2.878059014169962e-05,
"loss": 0.6933,
"step": 89500
},
{
"epoch": 5.08,
"learning_rate": 2.8658449068763467e-05,
"loss": 0.6934,
"step": 89750
},
{
"epoch": 5.1,
"learning_rate": 2.8536218693672996e-05,
"loss": 0.6934,
"step": 90000
},
{
"epoch": 5.11,
"learning_rate": 2.841390200005203e-05,
"loss": 0.6932,
"step": 90250
},
{
"epoch": 5.12,
"learning_rate": 2.8291501973631418e-05,
"loss": 0.6935,
"step": 90500
},
{
"epoch": 5.14,
"learning_rate": 2.8169511679747517e-05,
"loss": 0.6933,
"step": 90750
},
{
"epoch": 5.15,
"learning_rate": 2.8046954256446972e-05,
"loss": 0.6932,
"step": 91000
},
{
"epoch": 5.17,
"learning_rate": 2.7924322457482288e-05,
"loss": 0.6935,
"step": 91250
},
{
"epoch": 5.18,
"learning_rate": 2.780161927627598e-05,
"loss": 0.6932,
"step": 91500
},
{
"epoch": 5.19,
"learning_rate": 2.7678847707993006e-05,
"loss": 0.6935,
"step": 91750
},
{
"epoch": 5.21,
"learning_rate": 2.7556010749467626e-05,
"loss": 0.693,
"step": 92000
},
{
"epoch": 5.22,
"learning_rate": 2.7433111399130273e-05,
"loss": 0.6933,
"step": 92250
},
{
"epoch": 5.24,
"learning_rate": 2.7310152656934363e-05,
"loss": 0.6931,
"step": 92500
},
{
"epoch": 5.25,
"learning_rate": 2.7187629693164157e-05,
"loss": 0.6934,
"step": 92750
},
{
"epoch": 5.27,
"learning_rate": 2.7064561380404474e-05,
"loss": 0.6933,
"step": 93000
},
{
"epoch": 5.28,
"learning_rate": 2.6941442672033014e-05,
"loss": 0.6934,
"step": 93250
},
{
"epoch": 5.29,
"learning_rate": 2.6818276573357664e-05,
"loss": 0.6932,
"step": 93500
},
{
"epoch": 5.31,
"learning_rate": 2.6695066090843123e-05,
"loss": 0.6933,
"step": 93750
},
{
"epoch": 5.32,
"learning_rate": 2.657181423203749e-05,
"loss": 0.6934,
"step": 94000
},
{
"epoch": 5.34,
"learning_rate": 2.6448524005498838e-05,
"loss": 0.6933,
"step": 94250
},
{
"epoch": 5.35,
"learning_rate": 2.6325198420721808e-05,
"loss": 0.6933,
"step": 94500
},
{
"epoch": 5.36,
"learning_rate": 2.6201840488064118e-05,
"loss": 0.6934,
"step": 94750
},
{
"epoch": 5.38,
"learning_rate": 2.6078453218673098e-05,
"loss": 0.6932,
"step": 95000
},
{
"epoch": 5.39,
"learning_rate": 2.595553332723595e-05,
"loss": 0.6936,
"step": 95250
},
{
"epoch": 5.41,
"learning_rate": 2.5832096507858917e-05,
"loss": 0.6928,
"step": 95500
},
{
"epoch": 5.42,
"learning_rate": 2.5708639377139736e-05,
"loss": 0.6933,
"step": 95750
},
{
"epoch": 5.44,
"learning_rate": 2.558516494864715e-05,
"loss": 0.6932,
"step": 96000
},
{
"epoch": 5.45,
"learning_rate": 2.546167623637212e-05,
"loss": 0.6933,
"step": 96250
},
{
"epoch": 5.46,
"learning_rate": 2.5338176254654305e-05,
"loss": 0.6934,
"step": 96500
},
{
"epoch": 5.48,
"learning_rate": 2.5214668018108413e-05,
"loss": 0.6933,
"step": 96750
},
{
"epoch": 5.49,
"learning_rate": 2.509115454155066e-05,
"loss": 0.6934,
"step": 97000
},
{
"epoch": 5.51,
"learning_rate": 2.4968132903168124e-05,
"loss": 0.6929,
"step": 97250
},
{
"epoch": 5.52,
"learning_rate": 2.4844617982307774e-05,
"loss": 0.6936,
"step": 97500
},
{
"epoch": 5.53,
"learning_rate": 2.4721106854297538e-05,
"loss": 0.6932,
"step": 97750
},
{
"epoch": 5.55,
"learning_rate": 2.4597602534024195e-05,
"loss": 0.6935,
"step": 98000
},
{
"epoch": 5.56,
"learning_rate": 2.447410803620838e-05,
"loss": 0.6931,
"step": 98250
},
{
"epoch": 5.58,
"learning_rate": 2.4350626375330963e-05,
"loss": 0.6931,
"step": 98500
},
{
"epoch": 5.59,
"learning_rate": 2.4227160565559448e-05,
"loss": 0.6933,
"step": 98750
},
{
"epoch": 5.6,
"learning_rate": 2.410371362067444e-05,
"loss": 0.6933,
"step": 99000
},
{
"epoch": 5.62,
"learning_rate": 2.398078220668819e-05,
"loss": 0.6934,
"step": 99250
},
{
"epoch": 5.63,
"learning_rate": 2.385738192543777e-05,
"loss": 0.6934,
"step": 99500
},
{
"epoch": 5.65,
"learning_rate": 2.3734009535311082e-05,
"loss": 0.693,
"step": 99750
},
{
"epoch": 5.66,
"learning_rate": 2.3610668047808355e-05,
"loss": 0.6936,
"step": 100000
},
{
"epoch": 5.68,
"learning_rate": 2.3487360473675493e-05,
"loss": 0.693,
"step": 100250
},
{
"epoch": 5.69,
"learning_rate": 2.3364089822830585e-05,
"loss": 0.6934,
"step": 100500
},
{
"epoch": 5.7,
"learning_rate": 2.3240859104290418e-05,
"loss": 0.6932,
"step": 100750
},
{
"epoch": 5.72,
"learning_rate": 2.311767132609704e-05,
"loss": 0.6932,
"step": 101000
},
{
"epoch": 5.73,
"learning_rate": 2.2994529495244343e-05,
"loss": 0.6934,
"step": 101250
},
{
"epoch": 5.75,
"learning_rate": 2.287192888761777e-05,
"loss": 0.6934,
"step": 101500
},
{
"epoch": 5.76,
"learning_rate": 2.2748887754053225e-05,
"loss": 0.6932,
"step": 101750
},
{
"epoch": 5.77,
"learning_rate": 2.2625901569777145e-05,
"loss": 0.693,
"step": 102000
},
{
"epoch": 5.79,
"learning_rate": 2.2502973336862524e-05,
"loss": 0.6933,
"step": 102250
},
{
"epoch": 5.8,
"learning_rate": 2.23801060559678e-05,
"loss": 0.6935,
"step": 102500
},
{
"epoch": 5.82,
"learning_rate": 2.2257302726263566e-05,
"loss": 0.6931,
"step": 102750
},
{
"epoch": 5.83,
"learning_rate": 2.213456634535938e-05,
"loss": 0.6933,
"step": 103000
},
{
"epoch": 5.85,
"learning_rate": 2.201189990923061e-05,
"loss": 0.6936,
"step": 103250
},
{
"epoch": 5.86,
"learning_rate": 2.1889796636872324e-05,
"loss": 0.6934,
"step": 103500
},
{
"epoch": 5.87,
"learning_rate": 2.176727876163321e-05,
"loss": 0.6929,
"step": 103750
},
{
"epoch": 5.89,
"learning_rate": 2.1644839796600443e-05,
"loss": 0.6935,
"step": 104000
},
{
"epoch": 5.9,
"learning_rate": 2.152248273048949e-05,
"loss": 0.6932,
"step": 104250
},
{
"epoch": 5.92,
"learning_rate": 2.1400210550016697e-05,
"loss": 0.6931,
"step": 104500
},
{
"epoch": 5.93,
"learning_rate": 2.127802623982636e-05,
"loss": 0.6932,
"step": 104750
},
{
"epoch": 5.94,
"learning_rate": 2.1155932782417855e-05,
"loss": 0.6933,
"step": 105000
},
{
"epoch": 5.96,
"learning_rate": 2.1033933158072878e-05,
"loss": 0.6928,
"step": 105250
},
{
"epoch": 5.97,
"learning_rate": 2.0912030344782672e-05,
"loss": 0.6935,
"step": 105500
},
{
"epoch": 5.99,
"learning_rate": 2.0790714327565363e-05,
"loss": 0.6932,
"step": 105750
},
{
"epoch": 6.0,
"eval_accuracy": 0.5002547770700637,
"eval_f1": 0.33344654835696697,
"eval_loss": 0.6931106448173523,
"eval_precision": 0.25012738853503186,
"eval_recall": 0.5,
"eval_runtime": 12.652,
"eval_samples_per_second": 620.456,
"eval_steps_per_second": 77.616,
"step": 105978
},
{
"epoch": 6.0,
"learning_rate": 2.066901364387442e-05,
"loss": 0.6934,
"step": 106000
},
{
"epoch": 6.02,
"learning_rate": 2.0547418678865e-05,
"loss": 0.693,
"step": 106250
},
{
"epoch": 6.03,
"learning_rate": 2.0425932400650658e-05,
"loss": 0.6935,
"step": 106500
},
{
"epoch": 6.04,
"learning_rate": 2.0304557774691947e-05,
"loss": 0.6935,
"step": 106750
},
{
"epoch": 6.06,
"learning_rate": 2.0183297763724e-05,
"loss": 0.693,
"step": 107000
},
{
"epoch": 6.07,
"learning_rate": 2.006215532768421e-05,
"loss": 0.6932,
"step": 107250
},
{
"epoch": 6.09,
"learning_rate": 1.9941133423640003e-05,
"loss": 0.6933,
"step": 107500
},
{
"epoch": 6.1,
"learning_rate": 1.9820235005716594e-05,
"loss": 0.6924,
"step": 107750
},
{
"epoch": 6.11,
"learning_rate": 1.9699945857176588e-05,
"loss": 0.6935,
"step": 108000
},
{
"epoch": 6.13,
"learning_rate": 1.9579302738332077e-05,
"loss": 0.6935,
"step": 108250
},
{
"epoch": 6.14,
"learning_rate": 1.9458791937837308e-05,
"loss": 0.6932,
"step": 108500
},
{
"epoch": 6.16,
"learning_rate": 1.9338416397341575e-05,
"loss": 0.6932,
"step": 108750
},
{
"epoch": 6.17,
"learning_rate": 1.92181790551925e-05,
"loss": 0.6933,
"step": 109000
},
{
"epoch": 6.19,
"learning_rate": 1.9098082846364272e-05,
"loss": 0.6932,
"step": 109250
},
{
"epoch": 6.2,
"learning_rate": 1.8978130702386082e-05,
"loss": 0.6932,
"step": 109500
},
{
"epoch": 6.21,
"learning_rate": 1.8858325551270503e-05,
"loss": 0.6933,
"step": 109750
},
{
"epoch": 6.23,
"learning_rate": 1.8739148635869862e-05,
"loss": 0.6933,
"step": 110000
},
{
"epoch": 6.24,
"learning_rate": 1.8619645622928097e-05,
"loss": 0.6932,
"step": 110250
},
{
"epoch": 6.26,
"learning_rate": 1.8500298353412176e-05,
"loss": 0.6933,
"step": 110500
},
{
"epoch": 6.27,
"learning_rate": 1.838110974056977e-05,
"loss": 0.6933,
"step": 110750
},
{
"epoch": 6.28,
"learning_rate": 1.826208269377578e-05,
"loss": 0.6933,
"step": 111000
},
{
"epoch": 6.3,
"learning_rate": 1.8143220118461316e-05,
"loss": 0.6932,
"step": 111250
},
{
"epoch": 6.31,
"learning_rate": 1.802452491604275e-05,
"loss": 0.6931,
"step": 111500
},
{
"epoch": 6.33,
"learning_rate": 1.790599998385092e-05,
"loss": 0.6935,
"step": 111750
},
{
"epoch": 6.34,
"learning_rate": 1.778764821506038e-05,
"loss": 0.6934,
"step": 112000
},
{
"epoch": 6.36,
"learning_rate": 1.7669944846963703e-05,
"loss": 0.6933,
"step": 112250
},
{
"epoch": 6.37,
"learning_rate": 1.75519473460329e-05,
"loss": 0.6933,
"step": 112500
},
{
"epoch": 6.38,
"learning_rate": 1.7434131650871434e-05,
"loss": 0.6933,
"step": 112750
},
{
"epoch": 6.4,
"learning_rate": 1.7316500637341497e-05,
"loss": 0.6934,
"step": 113000
},
{
"epoch": 6.41,
"learning_rate": 1.7199057176797255e-05,
"loss": 0.693,
"step": 113250
},
{
"epoch": 6.43,
"learning_rate": 1.7081804136014705e-05,
"loss": 0.6934,
"step": 113500
},
{
"epoch": 6.44,
"learning_rate": 1.696474437712175e-05,
"loss": 0.6932,
"step": 113750
},
{
"epoch": 6.45,
"learning_rate": 1.684788075752831e-05,
"loss": 0.6933,
"step": 114000
},
{
"epoch": 6.47,
"learning_rate": 1.6731216129856575e-05,
"loss": 0.6928,
"step": 114250
},
{
"epoch": 6.48,
"learning_rate": 1.6614753341871385e-05,
"loss": 0.6933,
"step": 114500
},
{
"epoch": 6.5,
"learning_rate": 1.6498959857342135e-05,
"loss": 0.6934,
"step": 114750
},
{
"epoch": 6.51,
"learning_rate": 1.6382908436519833e-05,
"loss": 0.6933,
"step": 115000
},
{
"epoch": 6.52,
"learning_rate": 1.6267067357518934e-05,
"loss": 0.6932,
"step": 115250
},
{
"epoch": 6.54,
"learning_rate": 1.615143944800157e-05,
"loss": 0.6932,
"step": 115500
},
{
"epoch": 6.55,
"learning_rate": 1.6036027530426446e-05,
"loss": 0.6932,
"step": 115750
},
{
"epoch": 6.57,
"learning_rate": 1.592083442197995e-05,
"loss": 0.693,
"step": 116000
},
{
"epoch": 6.58,
"learning_rate": 1.5805862934507337e-05,
"loss": 0.6932,
"step": 116250
},
{
"epoch": 6.6,
"learning_rate": 1.5691115874444174e-05,
"loss": 0.6931,
"step": 116500
},
{
"epoch": 6.61,
"learning_rate": 1.5576596042747766e-05,
"loss": 0.6928,
"step": 116750
},
{
"epoch": 6.62,
"learning_rate": 1.546230623482881e-05,
"loss": 0.6937,
"step": 117000
},
{
"epoch": 6.64,
"learning_rate": 1.534870500100411e-05,
"loss": 0.6932,
"step": 117250
},
{
"epoch": 6.65,
"learning_rate": 1.5234882656415627e-05,
"loss": 0.6931,
"step": 117500
},
{
"epoch": 6.67,
"learning_rate": 1.5121298676773482e-05,
"loss": 0.6931,
"step": 117750
},
{
"epoch": 6.68,
"learning_rate": 1.5007955834644389e-05,
"loss": 0.6932,
"step": 118000
},
{
"epoch": 6.69,
"learning_rate": 1.4894856896708926e-05,
"loss": 0.6933,
"step": 118250
},
{
"epoch": 6.71,
"learning_rate": 1.478200462369401e-05,
"loss": 0.6935,
"step": 118500
},
{
"epoch": 6.72,
"learning_rate": 1.4669401770305513e-05,
"loss": 0.6933,
"step": 118750
},
{
"epoch": 6.74,
"learning_rate": 1.4557051085160978e-05,
"loss": 0.6932,
"step": 119000
},
{
"epoch": 6.75,
"learning_rate": 1.4445403182409694e-05,
"loss": 0.6931,
"step": 119250
},
{
"epoch": 6.77,
"learning_rate": 1.4333564018887296e-05,
"loss": 0.6931,
"step": 119500
},
{
"epoch": 6.78,
"learning_rate": 1.422198522135445e-05,
"loss": 0.6932,
"step": 119750
},
{
"epoch": 6.79,
"learning_rate": 1.4110669513431698e-05,
"loss": 0.6931,
"step": 120000
},
{
"epoch": 6.81,
"learning_rate": 1.39996196123176e-05,
"loss": 0.6929,
"step": 120250
},
{
"epoch": 6.82,
"learning_rate": 1.3888838228722412e-05,
"loss": 0.6934,
"step": 120500
},
{
"epoch": 6.84,
"learning_rate": 1.377832806680191e-05,
"loss": 0.6932,
"step": 120750
},
{
"epoch": 6.85,
"learning_rate": 1.3668091824091411e-05,
"loss": 0.6932,
"step": 121000
},
{
"epoch": 6.86,
"learning_rate": 1.3558132191439892e-05,
"loss": 0.6931,
"step": 121250
},
{
"epoch": 6.88,
"learning_rate": 1.3448890014393772e-05,
"loss": 0.6933,
"step": 121500
},
{
"epoch": 6.89,
"learning_rate": 1.3339490514123848e-05,
"loss": 0.6934,
"step": 121750
},
{
"epoch": 6.91,
"learning_rate": 1.3230375645018111e-05,
"loss": 0.6934,
"step": 122000
},
{
"epoch": 6.92,
"learning_rate": 1.3121548070553007e-05,
"loss": 0.6926,
"step": 122250
},
{
"epoch": 6.94,
"learning_rate": 1.3013010447192154e-05,
"loss": 0.6932,
"step": 122500
},
{
"epoch": 6.95,
"learning_rate": 1.2904765424321502e-05,
"loss": 0.6934,
"step": 122750
},
{
"epoch": 6.96,
"learning_rate": 1.2796815644184693e-05,
"loss": 0.6933,
"step": 123000
},
{
"epoch": 6.98,
"learning_rate": 1.2689163741818505e-05,
"loss": 0.6932,
"step": 123250
},
{
"epoch": 6.99,
"learning_rate": 1.2581812344988603e-05,
"loss": 0.6932,
"step": 123500
},
{
"epoch": 7.0,
"eval_accuracy": 0.5002547770700637,
"eval_f1": 0.33344654835696697,
"eval_loss": 0.6931138634681702,
"eval_precision": 0.25012738853503186,
"eval_recall": 0.5,
"eval_runtime": 12.5558,
"eval_samples_per_second": 625.208,
"eval_steps_per_second": 78.211,
"step": 123641
},
{
"epoch": 7.01,
"learning_rate": 1.2475191659916228e-05,
"loss": 0.6933,
"step": 123750
},
{
"epoch": 7.02,
"learning_rate": 1.2368447899899433e-05,
"loss": 0.6933,
"step": 124000
},
{
"epoch": 7.03,
"learning_rate": 1.2262012474041122e-05,
"loss": 0.6932,
"step": 124250
},
{
"epoch": 7.05,
"learning_rate": 1.2155887980412958e-05,
"loss": 0.693,
"step": 124500
},
{
"epoch": 7.06,
"learning_rate": 1.2050077009496776e-05,
"loss": 0.6934,
"step": 124750
},
{
"epoch": 7.08,
"learning_rate": 1.1944582144121394e-05,
"loss": 0.6933,
"step": 125000
},
{
"epoch": 7.09,
"learning_rate": 1.1839405959399536e-05,
"loss": 0.6931,
"step": 125250
},
{
"epoch": 7.11,
"learning_rate": 1.1734551022664981e-05,
"loss": 0.693,
"step": 125500
},
{
"epoch": 7.12,
"learning_rate": 1.1630437369518046e-05,
"loss": 0.6935,
"step": 125750
},
{
"epoch": 7.13,
"learning_rate": 1.152623128882203e-05,
"loss": 0.6933,
"step": 126000
},
{
"epoch": 7.15,
"learning_rate": 1.1422354100656728e-05,
"loss": 0.6931,
"step": 126250
},
{
"epoch": 7.16,
"learning_rate": 1.1318808340647638e-05,
"loss": 0.6927,
"step": 126500
},
{
"epoch": 7.18,
"learning_rate": 1.1215596536330131e-05,
"loss": 0.6935,
"step": 126750
},
{
"epoch": 7.19,
"learning_rate": 1.1112721207087779e-05,
"loss": 0.6933,
"step": 127000
},
{
"epoch": 7.2,
"learning_rate": 1.1010184864090856e-05,
"loss": 0.6933,
"step": 127250
},
{
"epoch": 7.22,
"learning_rate": 1.0907990010235016e-05,
"loss": 0.6933,
"step": 127500
},
{
"epoch": 7.23,
"learning_rate": 1.080613914008024e-05,
"loss": 0.6932,
"step": 127750
},
{
"epoch": 7.25,
"learning_rate": 1.0704634739789915e-05,
"loss": 0.6931,
"step": 128000
},
{
"epoch": 7.26,
"learning_rate": 1.06038832105021e-05,
"loss": 0.6933,
"step": 128250
},
{
"epoch": 7.28,
"learning_rate": 1.0503077763966945e-05,
"loss": 0.6933,
"step": 128500
},
{
"epoch": 7.29,
"learning_rate": 1.0402626184975775e-05,
"loss": 0.6933,
"step": 128750
},
{
"epoch": 7.3,
"learning_rate": 1.0302530925535489e-05,
"loss": 0.6933,
"step": 129000
},
{
"epoch": 7.32,
"learning_rate": 1.0202794428955301e-05,
"loss": 0.6932,
"step": 129250
},
{
"epoch": 7.33,
"learning_rate": 1.0103419129787083e-05,
"loss": 0.6932,
"step": 129500
},
{
"epoch": 7.35,
"learning_rate": 1.0004407453765927e-05,
"loss": 0.693,
"step": 129750
},
{
"epoch": 7.36,
"learning_rate": 9.905761817750958e-06,
"loss": 0.6934,
"step": 130000
},
{
"epoch": 7.37,
"learning_rate": 9.807484629666289e-06,
"loss": 0.6929,
"step": 130250
},
{
"epoch": 7.39,
"learning_rate": 9.709969171909833e-06,
"loss": 0.6931,
"step": 130500
},
{
"epoch": 7.4,
"learning_rate": 9.612434569729081e-06,
"loss": 0.6935,
"step": 130750
},
{
"epoch": 7.42,
"learning_rate": 9.515275575549665e-06,
"loss": 0.6932,
"step": 131000
},
{
"epoch": 7.43,
"learning_rate": 9.418494561007033e-06,
"loss": 0.6932,
"step": 131250
},
{
"epoch": 7.44,
"learning_rate": 9.322093888510217e-06,
"loss": 0.6933,
"step": 131500
},
{
"epoch": 7.46,
"learning_rate": 9.226075911184137e-06,
"loss": 0.6932,
"step": 131750
},
{
"epoch": 7.47,
"learning_rate": 9.130442972812207e-06,
"loss": 0.6933,
"step": 132000
},
{
"epoch": 7.49,
"learning_rate": 9.0351974077791e-06,
"loss": 0.6932,
"step": 132250
},
{
"epoch": 7.5,
"learning_rate": 8.940720185129978e-06,
"loss": 0.6932,
"step": 132500
},
{
"epoch": 7.52,
"learning_rate": 8.846254759394507e-06,
"loss": 0.6931,
"step": 132750
},
{
"epoch": 7.53,
"learning_rate": 8.752183643986523e-06,
"loss": 0.6932,
"step": 133000
},
{
"epoch": 7.54,
"learning_rate": 8.658509135166829e-06,
"loss": 0.6933,
"step": 133250
},
{
"epoch": 7.56,
"learning_rate": 8.565233519515157e-06,
"loss": 0.6932,
"step": 133500
},
{
"epoch": 7.57,
"learning_rate": 8.472359073874304e-06,
"loss": 0.693,
"step": 133750
},
{
"epoch": 7.59,
"learning_rate": 8.379888065294575e-06,
"loss": 0.6934,
"step": 134000
},
{
"epoch": 7.6,
"learning_rate": 8.287822750978453e-06,
"loss": 0.6932,
"step": 134250
},
{
"epoch": 7.61,
"learning_rate": 8.196531192128304e-06,
"loss": 0.6932,
"step": 134500
},
{
"epoch": 7.63,
"learning_rate": 8.105282353121044e-06,
"loss": 0.6931,
"step": 134750
},
{
"epoch": 7.64,
"learning_rate": 8.014445911458718e-06,
"loss": 0.6934,
"step": 135000
},
{
"epoch": 7.66,
"learning_rate": 7.924024084444284e-06,
"loss": 0.6932,
"step": 135250
},
{
"epoch": 7.67,
"learning_rate": 7.834019079260019e-06,
"loss": 0.6931,
"step": 135500
},
{
"epoch": 7.69,
"learning_rate": 7.744433092913655e-06,
"loss": 0.6931,
"step": 135750
},
{
"epoch": 7.7,
"learning_rate": 7.655268312184721e-06,
"loss": 0.693,
"step": 136000
},
{
"epoch": 7.71,
"learning_rate": 7.5665269135712034e-06,
"loss": 0.6934,
"step": 136250
},
{
"epoch": 7.73,
"learning_rate": 7.478563476084949e-06,
"loss": 0.6932,
"step": 136500
},
{
"epoch": 7.74,
"learning_rate": 7.390673614707866e-06,
"loss": 0.6931,
"step": 136750
},
{
"epoch": 7.76,
"learning_rate": 7.3032135941602745e-06,
"loss": 0.6932,
"step": 137000
},
{
"epoch": 7.77,
"learning_rate": 7.216185549327245e-06,
"loss": 0.6931,
"step": 137250
},
{
"epoch": 7.78,
"learning_rate": 7.129591604549363e-06,
"loss": 0.6931,
"step": 137500
},
{
"epoch": 7.8,
"learning_rate": 7.043433873570918e-06,
"loss": 0.6932,
"step": 137750
},
{
"epoch": 7.81,
"learning_rate": 6.957714459488293e-06,
"loss": 0.6934,
"step": 138000
},
{
"epoch": 7.83,
"learning_rate": 6.87243545469862e-06,
"loss": 0.693,
"step": 138250
},
{
"epoch": 7.84,
"learning_rate": 6.787937402713973e-06,
"loss": 0.6934,
"step": 138500
},
{
"epoch": 7.86,
"learning_rate": 6.703543668291617e-06,
"loss": 0.6932,
"step": 138750
},
{
"epoch": 7.87,
"learning_rate": 6.619596547430409e-06,
"loss": 0.693,
"step": 139000
},
{
"epoch": 7.88,
"learning_rate": 6.536098089266093e-06,
"loss": 0.6933,
"step": 139250
},
{
"epoch": 7.9,
"learning_rate": 6.453050331982624e-06,
"loss": 0.6933,
"step": 139500
},
{
"epoch": 7.91,
"learning_rate": 6.370455302762429e-06,
"loss": 0.6931,
"step": 139750
},
{
"epoch": 7.93,
"learning_rate": 6.2883150177369095e-06,
"loss": 0.6933,
"step": 140000
},
{
"epoch": 7.94,
"learning_rate": 6.206631481937219e-06,
"loss": 0.6933,
"step": 140250
},
{
"epoch": 7.95,
"learning_rate": 6.125406689245361e-06,
"loss": 0.693,
"step": 140500
},
{
"epoch": 7.97,
"learning_rate": 6.04496475823072e-06,
"loss": 0.6931,
"step": 140750
},
{
"epoch": 7.98,
"learning_rate": 5.964661533859653e-06,
"loss": 0.6934,
"step": 141000
},
{
"epoch": 8.0,
"learning_rate": 5.884822959043998e-06,
"loss": 0.6933,
"step": 141250
},
{
"epoch": 8.0,
"eval_accuracy": 0.4997452229299363,
"eval_f1": 0.3332200798437102,
"eval_loss": 0.6931153535842896,
"eval_precision": 0.24987261146496814,
"eval_recall": 0.5,
"eval_runtime": 12.5013,
"eval_samples_per_second": 627.937,
"eval_steps_per_second": 78.552,
"step": 141304
},
{
"epoch": 8.01,
"learning_rate": 5.805450982630542e-06,
"loss": 0.6931,
"step": 141500
},
{
"epoch": 8.03,
"learning_rate": 5.726547542076485e-06,
"loss": 0.6932,
"step": 141750
},
{
"epoch": 8.04,
"learning_rate": 5.6481145634021515e-06,
"loss": 0.6931,
"step": 142000
},
{
"epoch": 8.05,
"learning_rate": 5.570153961143942e-06,
"loss": 0.6932,
"step": 142250
},
{
"epoch": 8.07,
"learning_rate": 5.492667638307647e-06,
"loss": 0.6932,
"step": 142500
},
{
"epoch": 8.08,
"learning_rate": 5.415964575902305e-06,
"loss": 0.6933,
"step": 142750
},
{
"epoch": 8.1,
"learning_rate": 5.339430558640929e-06,
"loss": 0.693,
"step": 143000
},
{
"epoch": 8.11,
"learning_rate": 5.263376452722673e-06,
"loss": 0.6931,
"step": 143250
},
{
"epoch": 8.12,
"learning_rate": 5.187804114616051e-06,
"loss": 0.6933,
"step": 143500
},
{
"epoch": 8.14,
"learning_rate": 5.112715389029707e-06,
"loss": 0.6932,
"step": 143750
},
{
"epoch": 8.15,
"learning_rate": 5.038112108867363e-06,
"loss": 0.6931,
"step": 144000
},
{
"epoch": 8.17,
"learning_rate": 4.963996095183115e-06,
"loss": 0.6932,
"step": 144250
},
{
"epoch": 8.18,
"learning_rate": 4.890369157136956e-06,
"loss": 0.6931,
"step": 144500
},
{
"epoch": 8.2,
"learning_rate": 4.817233091950621e-06,
"loss": 0.6932,
"step": 144750
},
{
"epoch": 8.21,
"learning_rate": 4.744879274763003e-06,
"loss": 0.6934,
"step": 145000
},
{
"epoch": 8.22,
"learning_rate": 4.672728317747921e-06,
"loss": 0.6934,
"step": 145250
},
{
"epoch": 8.24,
"learning_rate": 4.601073546170611e-06,
"loss": 0.6932,
"step": 145500
},
{
"epoch": 8.25,
"learning_rate": 4.529916709112531e-06,
"loss": 0.693,
"step": 145750
},
{
"epoch": 8.27,
"learning_rate": 4.459259543500649e-06,
"loss": 0.6932,
"step": 146000
},
{
"epoch": 8.28,
"learning_rate": 4.38910377406504e-06,
"loss": 0.6932,
"step": 146250
},
{
"epoch": 8.29,
"learning_rate": 4.319451113296763e-06,
"loss": 0.6931,
"step": 146500
},
{
"epoch": 8.31,
"learning_rate": 4.250303261406091e-06,
"loss": 0.6933,
"step": 146750
},
{
"epoch": 8.32,
"learning_rate": 4.1819354605351554e-06,
"loss": 0.6934,
"step": 147000
},
{
"epoch": 8.34,
"learning_rate": 4.113800241689689e-06,
"loss": 0.6931,
"step": 147250
},
{
"epoch": 8.35,
"learning_rate": 4.046174851626572e-06,
"loss": 0.6932,
"step": 147500
},
{
"epoch": 8.36,
"learning_rate": 3.979060941070722e-06,
"loss": 0.6932,
"step": 147750
},
{
"epoch": 8.38,
"learning_rate": 3.9124601482619205e-06,
"loss": 0.6932,
"step": 148000
},
{
"epoch": 8.39,
"learning_rate": 3.846374098914826e-06,
"loss": 0.693,
"step": 148250
},
{
"epoch": 8.41,
"learning_rate": 3.780804406179295e-06,
"loss": 0.6931,
"step": 148500
},
{
"epoch": 8.42,
"learning_rate": 3.715752670600986e-06,
"loss": 0.6934,
"step": 148750
},
{
"epoch": 8.44,
"learning_rate": 3.651220480082326e-06,
"loss": 0.6932,
"step": 149000
},
{
"epoch": 8.45,
"learning_rate": 3.587209409843728e-06,
"loss": 0.6931,
"step": 149250
},
{
"epoch": 8.46,
"learning_rate": 3.523721022385132e-06,
"loss": 0.6932,
"step": 149500
},
{
"epoch": 8.48,
"learning_rate": 3.4607568674478897e-06,
"loss": 0.6933,
"step": 149750
},
{
"epoch": 8.49,
"learning_rate": 3.3983184819769e-06,
"loss": 0.6932,
"step": 150000
},
{
"epoch": 8.51,
"learning_rate": 3.3366539820747027e-06,
"loss": 0.6932,
"step": 150250
},
{
"epoch": 8.52,
"learning_rate": 3.2752695767854113e-06,
"loss": 0.6931,
"step": 150500
},
{
"epoch": 8.53,
"learning_rate": 3.2144154686773565e-06,
"loss": 0.6933,
"step": 150750
},
{
"epoch": 8.55,
"learning_rate": 3.1540931431895514e-06,
"loss": 0.6932,
"step": 151000
},
{
"epoch": 8.56,
"learning_rate": 3.0943040727802797e-06,
"loss": 0.6932,
"step": 151250
},
{
"epoch": 8.58,
"learning_rate": 3.0350497168911334e-06,
"loss": 0.6931,
"step": 151500
},
{
"epoch": 8.59,
"learning_rate": 2.976331521911438e-06,
"loss": 0.6931,
"step": 151750
},
{
"epoch": 8.61,
"learning_rate": 2.918150921142901e-06,
"loss": 0.6933,
"step": 152000
},
{
"epoch": 8.62,
"learning_rate": 2.860509334764647e-06,
"loss": 0.6932,
"step": 152250
},
{
"epoch": 8.63,
"learning_rate": 2.8034081697985437e-06,
"loss": 0.6931,
"step": 152500
},
{
"epoch": 8.65,
"learning_rate": 2.746848820074854e-06,
"loss": 0.6933,
"step": 152750
},
{
"epoch": 8.66,
"learning_rate": 2.6910556469509853e-06,
"loss": 0.6934,
"step": 153000
},
{
"epoch": 8.68,
"learning_rate": 2.6355818753078243e-06,
"loss": 0.6932,
"step": 153250
},
{
"epoch": 8.69,
"learning_rate": 2.5806540155199815e-06,
"loss": 0.6932,
"step": 153500
},
{
"epoch": 8.7,
"learning_rate": 2.5262734083676936e-06,
"loss": 0.6933,
"step": 153750
},
{
"epoch": 8.72,
"learning_rate": 2.4724413812728525e-06,
"loss": 0.6932,
"step": 154000
},
{
"epoch": 8.73,
"learning_rate": 2.4191592482665893e-06,
"loss": 0.693,
"step": 154250
},
{
"epoch": 8.75,
"learning_rate": 2.3664283099572192e-06,
"loss": 0.6933,
"step": 154500
},
{
"epoch": 8.76,
"learning_rate": 2.3142498534984735e-06,
"loss": 0.693,
"step": 154750
},
{
"epoch": 8.78,
"learning_rate": 2.262625152558104e-06,
"loss": 0.6932,
"step": 155000
},
{
"epoch": 8.79,
"learning_rate": 2.211758638779948e-06,
"loss": 0.6932,
"step": 155250
},
{
"epoch": 8.8,
"learning_rate": 2.161242988266132e-06,
"loss": 0.6932,
"step": 155500
},
{
"epoch": 8.82,
"learning_rate": 2.111284828143695e-06,
"loss": 0.6932,
"step": 155750
},
{
"epoch": 8.83,
"learning_rate": 2.0618853778833075e-06,
"loss": 0.6933,
"step": 156000
},
{
"epoch": 8.85,
"learning_rate": 2.013045843317621e-06,
"loss": 0.6933,
"step": 156250
},
{
"epoch": 8.86,
"learning_rate": 1.964767416611829e-06,
"loss": 0.6933,
"step": 156500
},
{
"epoch": 8.87,
"learning_rate": 1.9170512762345825e-06,
"loss": 0.6932,
"step": 156750
},
{
"epoch": 8.89,
"learning_rate": 1.8698985869291973e-06,
"loss": 0.6932,
"step": 157000
},
{
"epoch": 8.9,
"learning_rate": 1.8234957258351364e-06,
"loss": 0.6931,
"step": 157250
},
{
"epoch": 8.92,
"learning_rate": 1.7774711126562565e-06,
"loss": 0.6933,
"step": 157500
},
{
"epoch": 8.93,
"learning_rate": 1.732013357678619e-06,
"loss": 0.6933,
"step": 157750
},
{
"epoch": 8.95,
"learning_rate": 1.6871235705187283e-06,
"loss": 0.6931,
"step": 158000
},
{
"epoch": 8.96,
"learning_rate": 1.6428028469290813e-06,
"loss": 0.693,
"step": 158250
},
{
"epoch": 8.97,
"learning_rate": 1.5990522687714172e-06,
"loss": 0.693,
"step": 158500
},
{
"epoch": 8.99,
"learning_rate": 1.5558729039903296e-06,
"loss": 0.6933,
"step": 158750
},
{
"epoch": 9.0,
"eval_accuracy": 0.4997452229299363,
"eval_f1": 0.3332200798437102,
"eval_loss": 0.6933611035346985,
"eval_precision": 0.24987261146496814,
"eval_recall": 0.5,
"eval_runtime": 12.7446,
"eval_samples_per_second": 615.949,
"eval_steps_per_second": 77.052,
"step": 158967
},
{
"epoch": 9.0,
"learning_rate": 1.5132658065871747e-06,
"loss": 0.6932,
"step": 159000
},
{
"epoch": 9.02,
"learning_rate": 1.471232016594365e-06,
"loss": 0.6932,
"step": 159250
},
{
"epoch": 9.03,
"learning_rate": 1.4299372524579662e-06,
"loss": 0.6932,
"step": 159500
},
{
"epoch": 9.04,
"learning_rate": 1.3890508380014433e-06,
"loss": 0.6931,
"step": 159750
},
{
"epoch": 9.06,
"learning_rate": 1.348740763022696e-06,
"loss": 0.6931,
"step": 160000
},
{
"epoch": 9.07,
"learning_rate": 1.3090080114841863e-06,
"loss": 0.6934,
"step": 160250
},
{
"epoch": 9.09,
"learning_rate": 1.269853553255998e-06,
"loss": 0.6932,
"step": 160500
},
{
"epoch": 9.1,
"learning_rate": 1.231278344092171e-06,
"loss": 0.6933,
"step": 160750
},
{
"epoch": 9.12,
"learning_rate": 1.1932833256073695e-06,
"loss": 0.6931,
"step": 161000
},
{
"epoch": 9.13,
"learning_rate": 1.155869425253886e-06,
"loss": 0.6931,
"step": 161250
},
{
"epoch": 9.14,
"learning_rate": 1.1191837231720042e-06,
"loss": 0.6931,
"step": 161500
},
{
"epoch": 9.16,
"learning_rate": 1.0829324511818617e-06,
"loss": 0.6932,
"step": 161750
},
{
"epoch": 9.17,
"learning_rate": 1.0472649909701676e-06,
"loss": 0.6931,
"step": 162000
},
{
"epoch": 9.19,
"learning_rate": 1.0121822131738956e-06,
"loss": 0.6933,
"step": 162250
},
{
"epoch": 9.2,
"learning_rate": 9.776849741580247e-07,
"loss": 0.6932,
"step": 162500
},
{
"epoch": 9.21,
"learning_rate": 9.437741159946139e-07,
"loss": 0.6933,
"step": 162750
},
{
"epoch": 9.23,
"learning_rate": 9.104504664422669e-07,
"loss": 0.6932,
"step": 163000
},
{
"epoch": 9.24,
"learning_rate": 8.777148389259194e-07,
"loss": 0.6931,
"step": 163250
},
{
"epoch": 9.26,
"learning_rate": 8.455680325169802e-07,
"loss": 0.6933,
"step": 163500
},
{
"epoch": 9.27,
"learning_rate": 8.140108319138362e-07,
"loss": 0.6932,
"step": 163750
},
{
"epoch": 9.28,
"learning_rate": 7.831666976848146e-07,
"loss": 0.6932,
"step": 164000
},
{
"epoch": 9.3,
"learning_rate": 7.527886391862193e-07,
"loss": 0.6932,
"step": 164250
},
{
"epoch": 9.31,
"learning_rate": 7.230024512235062e-07,
"loss": 0.6932,
"step": 164500
},
{
"epoch": 9.33,
"learning_rate": 6.938088608727439e-07,
"loss": 0.6933,
"step": 164750
},
{
"epoch": 9.34,
"learning_rate": 6.652085807447861e-07,
"loss": 0.693,
"step": 165000
},
{
"epoch": 9.36,
"learning_rate": 6.372023089678797e-07,
"loss": 0.6932,
"step": 165250
},
{
"epoch": 9.37,
"learning_rate": 6.097907291706201e-07,
"loss": 0.693,
"step": 165500
},
{
"epoch": 9.38,
"learning_rate": 5.829745104652673e-07,
"loss": 0.6932,
"step": 165750
},
{
"epoch": 9.4,
"learning_rate": 5.568580001272361e-07,
"loss": 0.6932,
"step": 166000
},
{
"epoch": 9.41,
"learning_rate": 5.31232064917378e-07,
"loss": 0.6931,
"step": 166250
},
{
"epoch": 9.43,
"learning_rate": 5.062034084038104e-07,
"loss": 0.6932,
"step": 166500
},
{
"epoch": 9.44,
"learning_rate": 4.817726415320162e-07,
"loss": 0.6932,
"step": 166750
},
{
"epoch": 9.45,
"learning_rate": 4.5794036065309466e-07,
"loss": 0.6932,
"step": 167000
},
{
"epoch": 9.47,
"learning_rate": 4.347071475091918e-07,
"loss": 0.6932,
"step": 167250
},
{
"epoch": 9.48,
"learning_rate": 4.120735692193151e-07,
"loss": 0.6931,
"step": 167500
},
{
"epoch": 9.5,
"learning_rate": 3.900401782654717e-07,
"loss": 0.6932,
"step": 167750
},
{
"epoch": 9.51,
"learning_rate": 3.686075124792021e-07,
"loss": 0.6932,
"step": 168000
},
{
"epoch": 9.53,
"learning_rate": 3.477760950284292e-07,
"loss": 0.6932,
"step": 168250
},
{
"epoch": 9.54,
"learning_rate": 3.2762615368826244e-07,
"loss": 0.6932,
"step": 168500
},
{
"epoch": 9.55,
"learning_rate": 3.0799633372767536e-07,
"loss": 0.6931,
"step": 168750
},
{
"epoch": 9.57,
"learning_rate": 2.889692416117279e-07,
"loss": 0.6931,
"step": 169000
},
{
"epoch": 9.58,
"learning_rate": 2.7054534178868607e-07,
"loss": 0.6932,
"step": 169250
},
{
"epoch": 9.6,
"learning_rate": 2.527250839829881e-07,
"loss": 0.6932,
"step": 169500
},
{
"epoch": 9.61,
"learning_rate": 2.3550890318425888e-07,
"loss": 0.6932,
"step": 169750
},
{
"epoch": 9.62,
"learning_rate": 2.1889721963671284e-07,
"loss": 0.6931,
"step": 170000
},
{
"epoch": 9.64,
"learning_rate": 2.0289043882887604e-07,
"loss": 0.6932,
"step": 170250
},
{
"epoch": 9.65,
"learning_rate": 1.8748895148369973e-07,
"loss": 0.6933,
"step": 170500
},
{
"epoch": 9.67,
"learning_rate": 1.727511098436996e-07,
"loss": 0.6931,
"step": 170750
},
{
"epoch": 9.68,
"learning_rate": 1.5855889766091536e-07,
"loss": 0.6932,
"step": 171000
},
{
"epoch": 9.7,
"learning_rate": 1.4497306106658893e-07,
"loss": 0.693,
"step": 171250
},
{
"epoch": 9.71,
"learning_rate": 1.3199393168881468e-07,
"loss": 0.6932,
"step": 171500
},
{
"epoch": 9.72,
"learning_rate": 1.1962182634605302e-07,
"loss": 0.6932,
"step": 171750
},
{
"epoch": 9.74,
"learning_rate": 1.0785704703941135e-07,
"loss": 0.6932,
"step": 172000
},
{
"epoch": 9.75,
"learning_rate": 9.669988094526128e-08,
"loss": 0.6933,
"step": 172250
},
{
"epoch": 9.77,
"learning_rate": 8.615060040823852e-08,
"loss": 0.6931,
"step": 172500
},
{
"epoch": 9.78,
"learning_rate": 7.620946293458442e-08,
"loss": 0.6932,
"step": 172750
},
{
"epoch": 9.79,
"learning_rate": 6.691282998163018e-08,
"loss": 0.6931,
"step": 173000
},
{
"epoch": 9.81,
"learning_rate": 5.8186256880568555e-08,
"loss": 0.6933,
"step": 173250
},
{
"epoch": 9.82,
"learning_rate": 5.006850944801467e-08,
"loss": 0.6932,
"step": 173500
},
{
"epoch": 9.84,
"learning_rate": 4.25597858368737e-08,
"loss": 0.6934,
"step": 173750
},
{
"epoch": 9.85,
"learning_rate": 3.5660269333887e-08,
"loss": 0.6931,
"step": 174000
},
{
"epoch": 9.87,
"learning_rate": 2.9370128355143966e-08,
"loss": 0.6932,
"step": 174250
},
{
"epoch": 9.88,
"learning_rate": 2.3689516441977034e-08,
"loss": 0.6931,
"step": 174500
},
{
"epoch": 9.89,
"learning_rate": 1.8618572257209087e-08,
"loss": 0.6932,
"step": 174750
},
{
"epoch": 9.91,
"learning_rate": 1.4174049338511186e-08,
"loss": 0.6932,
"step": 175000
},
{
"epoch": 9.92,
"learning_rate": 1.0320357269619218e-08,
"loss": 0.6932,
"step": 175250
},
{
"epoch": 9.94,
"learning_rate": 7.076659268151487e-09,
"loss": 0.6932,
"step": 175500
},
{
"epoch": 9.95,
"learning_rate": 4.443034512263689e-09,
"loss": 0.6931,
"step": 175750
},
{
"epoch": 9.96,
"learning_rate": 2.4195472883042914e-09,
"loss": 0.6931,
"step": 176000
},
{
"epoch": 9.98,
"learning_rate": 1.0062469892796466e-09,
"loss": 0.6932,
"step": 176250
},
{
"epoch": 9.99,
"learning_rate": 2.031681136188679e-10,
"loss": 0.6932,
"step": 176500
},
{
"epoch": 10.0,
"eval_accuracy": 0.4997452229299363,
"eval_f1": 0.3332200798437102,
"eval_loss": 0.693359375,
"eval_precision": 0.24987261146496814,
"eval_recall": 0.5,
"eval_runtime": 12.7636,
"eval_samples_per_second": 615.031,
"eval_steps_per_second": 76.938,
"step": 176630
},
{
"epoch": 10.0,
"step": 176630,
"total_flos": 8.989707126226944e+16,
"train_loss": 0.6935468676189722,
"train_runtime": 13050.2295,
"train_samples_per_second": 108.274,
"train_steps_per_second": 13.535
}
],
"max_steps": 176630,
"num_train_epochs": 10,
"total_flos": 8.989707126226944e+16,
"trial_name": null,
"trial_params": {
"learning_rate": 1e-05
}
}