JLB-JLB's picture
VIT_SEIZURE_231108
3309150
{
"best_metric": 0.47342389822006226,
"best_model_checkpoint": "seizure_vit/seizure_vit_jlb_231108_iir_adjusted/checkpoint-1000",
"epoch": 5.0,
"eval_steps": 1000,
"global_step": 14685,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 9.986380660537965e-06,
"loss": 0.6425,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 9.972761321075928e-06,
"loss": 0.5846,
"step": 40
},
{
"epoch": 0.02,
"learning_rate": 9.959141981613892e-06,
"loss": 0.549,
"step": 60
},
{
"epoch": 0.03,
"learning_rate": 9.945522642151857e-06,
"loss": 0.5503,
"step": 80
},
{
"epoch": 0.03,
"learning_rate": 9.931903302689821e-06,
"loss": 0.5304,
"step": 100
},
{
"epoch": 0.04,
"learning_rate": 9.918283963227783e-06,
"loss": 0.5064,
"step": 120
},
{
"epoch": 0.05,
"learning_rate": 9.904664623765749e-06,
"loss": 0.4953,
"step": 140
},
{
"epoch": 0.05,
"learning_rate": 9.891045284303712e-06,
"loss": 0.5028,
"step": 160
},
{
"epoch": 0.06,
"learning_rate": 9.877425944841676e-06,
"loss": 0.4742,
"step": 180
},
{
"epoch": 0.07,
"learning_rate": 9.86380660537964e-06,
"loss": 0.4921,
"step": 200
},
{
"epoch": 0.07,
"learning_rate": 9.850187265917604e-06,
"loss": 0.4961,
"step": 220
},
{
"epoch": 0.08,
"learning_rate": 9.836567926455567e-06,
"loss": 0.438,
"step": 240
},
{
"epoch": 0.09,
"learning_rate": 9.822948586993531e-06,
"loss": 0.4273,
"step": 260
},
{
"epoch": 0.1,
"learning_rate": 9.809329247531496e-06,
"loss": 0.4606,
"step": 280
},
{
"epoch": 0.1,
"learning_rate": 9.79570990806946e-06,
"loss": 0.4513,
"step": 300
},
{
"epoch": 0.11,
"learning_rate": 9.782090568607424e-06,
"loss": 0.4375,
"step": 320
},
{
"epoch": 0.12,
"learning_rate": 9.768471229145388e-06,
"loss": 0.4441,
"step": 340
},
{
"epoch": 0.12,
"learning_rate": 9.754851889683351e-06,
"loss": 0.4187,
"step": 360
},
{
"epoch": 0.13,
"learning_rate": 9.741232550221315e-06,
"loss": 0.4336,
"step": 380
},
{
"epoch": 0.14,
"learning_rate": 9.727613210759279e-06,
"loss": 0.4258,
"step": 400
},
{
"epoch": 0.14,
"learning_rate": 9.713993871297242e-06,
"loss": 0.4225,
"step": 420
},
{
"epoch": 0.15,
"learning_rate": 9.700374531835208e-06,
"loss": 0.4119,
"step": 440
},
{
"epoch": 0.16,
"learning_rate": 9.68675519237317e-06,
"loss": 0.4016,
"step": 460
},
{
"epoch": 0.16,
"learning_rate": 9.673135852911134e-06,
"loss": 0.43,
"step": 480
},
{
"epoch": 0.17,
"learning_rate": 9.659516513449099e-06,
"loss": 0.4397,
"step": 500
},
{
"epoch": 0.18,
"learning_rate": 9.645897173987063e-06,
"loss": 0.376,
"step": 520
},
{
"epoch": 0.18,
"learning_rate": 9.632277834525027e-06,
"loss": 0.4556,
"step": 540
},
{
"epoch": 0.19,
"learning_rate": 9.61865849506299e-06,
"loss": 0.4067,
"step": 560
},
{
"epoch": 0.2,
"learning_rate": 9.605039155600954e-06,
"loss": 0.4005,
"step": 580
},
{
"epoch": 0.2,
"learning_rate": 9.591419816138918e-06,
"loss": 0.3618,
"step": 600
},
{
"epoch": 0.21,
"learning_rate": 9.577800476676881e-06,
"loss": 0.4302,
"step": 620
},
{
"epoch": 0.22,
"learning_rate": 9.564181137214847e-06,
"loss": 0.4234,
"step": 640
},
{
"epoch": 0.22,
"learning_rate": 9.55056179775281e-06,
"loss": 0.4304,
"step": 660
},
{
"epoch": 0.23,
"learning_rate": 9.536942458290773e-06,
"loss": 0.3656,
"step": 680
},
{
"epoch": 0.24,
"learning_rate": 9.523323118828738e-06,
"loss": 0.3883,
"step": 700
},
{
"epoch": 0.25,
"learning_rate": 9.509703779366702e-06,
"loss": 0.3971,
"step": 720
},
{
"epoch": 0.25,
"learning_rate": 9.496084439904665e-06,
"loss": 0.4155,
"step": 740
},
{
"epoch": 0.26,
"learning_rate": 9.48246510044263e-06,
"loss": 0.3724,
"step": 760
},
{
"epoch": 0.27,
"learning_rate": 9.468845760980593e-06,
"loss": 0.4199,
"step": 780
},
{
"epoch": 0.27,
"learning_rate": 9.455226421518557e-06,
"loss": 0.3955,
"step": 800
},
{
"epoch": 0.28,
"learning_rate": 9.44160708205652e-06,
"loss": 0.3703,
"step": 820
},
{
"epoch": 0.29,
"learning_rate": 9.427987742594486e-06,
"loss": 0.3956,
"step": 840
},
{
"epoch": 0.29,
"learning_rate": 9.41436840313245e-06,
"loss": 0.362,
"step": 860
},
{
"epoch": 0.3,
"learning_rate": 9.400749063670412e-06,
"loss": 0.3908,
"step": 880
},
{
"epoch": 0.31,
"learning_rate": 9.387129724208377e-06,
"loss": 0.3674,
"step": 900
},
{
"epoch": 0.31,
"learning_rate": 9.37351038474634e-06,
"loss": 0.3814,
"step": 920
},
{
"epoch": 0.32,
"learning_rate": 9.359891045284304e-06,
"loss": 0.4162,
"step": 940
},
{
"epoch": 0.33,
"learning_rate": 9.346271705822268e-06,
"loss": 0.4027,
"step": 960
},
{
"epoch": 0.33,
"learning_rate": 9.332652366360232e-06,
"loss": 0.3573,
"step": 980
},
{
"epoch": 0.34,
"learning_rate": 9.319033026898197e-06,
"loss": 0.3803,
"step": 1000
},
{
"epoch": 0.34,
"eval_loss": 0.47342389822006226,
"eval_roc_auc": 0.7746478873239437,
"eval_runtime": 223.2127,
"eval_samples_per_second": 66.797,
"eval_steps_per_second": 8.351,
"step": 1000
},
{
"epoch": 0.35,
"learning_rate": 9.30541368743616e-06,
"loss": 0.3728,
"step": 1020
},
{
"epoch": 0.35,
"learning_rate": 9.291794347974123e-06,
"loss": 0.3511,
"step": 1040
},
{
"epoch": 0.36,
"learning_rate": 9.278175008512089e-06,
"loss": 0.3722,
"step": 1060
},
{
"epoch": 0.37,
"learning_rate": 9.264555669050052e-06,
"loss": 0.336,
"step": 1080
},
{
"epoch": 0.37,
"learning_rate": 9.250936329588016e-06,
"loss": 0.4234,
"step": 1100
},
{
"epoch": 0.38,
"learning_rate": 9.23731699012598e-06,
"loss": 0.3634,
"step": 1120
},
{
"epoch": 0.39,
"learning_rate": 9.223697650663943e-06,
"loss": 0.3378,
"step": 1140
},
{
"epoch": 0.39,
"learning_rate": 9.210078311201907e-06,
"loss": 0.3802,
"step": 1160
},
{
"epoch": 0.4,
"learning_rate": 9.196458971739871e-06,
"loss": 0.3998,
"step": 1180
},
{
"epoch": 0.41,
"learning_rate": 9.182839632277836e-06,
"loss": 0.3611,
"step": 1200
},
{
"epoch": 0.42,
"learning_rate": 9.169220292815798e-06,
"loss": 0.3884,
"step": 1220
},
{
"epoch": 0.42,
"learning_rate": 9.155600953353762e-06,
"loss": 0.3412,
"step": 1240
},
{
"epoch": 0.43,
"learning_rate": 9.141981613891727e-06,
"loss": 0.4026,
"step": 1260
},
{
"epoch": 0.44,
"learning_rate": 9.128362274429691e-06,
"loss": 0.3619,
"step": 1280
},
{
"epoch": 0.44,
"learning_rate": 9.114742934967655e-06,
"loss": 0.3291,
"step": 1300
},
{
"epoch": 0.45,
"learning_rate": 9.101123595505619e-06,
"loss": 0.3514,
"step": 1320
},
{
"epoch": 0.46,
"learning_rate": 9.087504256043582e-06,
"loss": 0.3353,
"step": 1340
},
{
"epoch": 0.46,
"learning_rate": 9.073884916581546e-06,
"loss": 0.3713,
"step": 1360
},
{
"epoch": 0.47,
"learning_rate": 9.06026557711951e-06,
"loss": 0.3571,
"step": 1380
},
{
"epoch": 0.48,
"learning_rate": 9.046646237657475e-06,
"loss": 0.3406,
"step": 1400
},
{
"epoch": 0.48,
"learning_rate": 9.033026898195439e-06,
"loss": 0.3785,
"step": 1420
},
{
"epoch": 0.49,
"learning_rate": 9.019407558733401e-06,
"loss": 0.3618,
"step": 1440
},
{
"epoch": 0.5,
"learning_rate": 9.005788219271366e-06,
"loss": 0.323,
"step": 1460
},
{
"epoch": 0.5,
"learning_rate": 8.99216887980933e-06,
"loss": 0.2908,
"step": 1480
},
{
"epoch": 0.51,
"learning_rate": 8.978549540347294e-06,
"loss": 0.372,
"step": 1500
},
{
"epoch": 0.52,
"learning_rate": 8.964930200885258e-06,
"loss": 0.3317,
"step": 1520
},
{
"epoch": 0.52,
"learning_rate": 8.951310861423221e-06,
"loss": 0.3953,
"step": 1540
},
{
"epoch": 0.53,
"learning_rate": 8.937691521961187e-06,
"loss": 0.3151,
"step": 1560
},
{
"epoch": 0.54,
"learning_rate": 8.924072182499149e-06,
"loss": 0.3391,
"step": 1580
},
{
"epoch": 0.54,
"learning_rate": 8.910452843037113e-06,
"loss": 0.3281,
"step": 1600
},
{
"epoch": 0.55,
"learning_rate": 8.896833503575078e-06,
"loss": 0.3613,
"step": 1620
},
{
"epoch": 0.56,
"learning_rate": 8.883214164113042e-06,
"loss": 0.3444,
"step": 1640
},
{
"epoch": 0.57,
"learning_rate": 8.869594824651005e-06,
"loss": 0.3122,
"step": 1660
},
{
"epoch": 0.57,
"learning_rate": 8.855975485188969e-06,
"loss": 0.2896,
"step": 1680
},
{
"epoch": 0.58,
"learning_rate": 8.842356145726933e-06,
"loss": 0.3603,
"step": 1700
},
{
"epoch": 0.59,
"learning_rate": 8.828736806264897e-06,
"loss": 0.3173,
"step": 1720
},
{
"epoch": 0.59,
"learning_rate": 8.81511746680286e-06,
"loss": 0.3016,
"step": 1740
},
{
"epoch": 0.6,
"learning_rate": 8.801498127340826e-06,
"loss": 0.3522,
"step": 1760
},
{
"epoch": 0.61,
"learning_rate": 8.787878787878788e-06,
"loss": 0.3482,
"step": 1780
},
{
"epoch": 0.61,
"learning_rate": 8.774259448416752e-06,
"loss": 0.2981,
"step": 1800
},
{
"epoch": 0.62,
"learning_rate": 8.760640108954717e-06,
"loss": 0.3467,
"step": 1820
},
{
"epoch": 0.63,
"learning_rate": 8.74702076949268e-06,
"loss": 0.373,
"step": 1840
},
{
"epoch": 0.63,
"learning_rate": 8.733401430030644e-06,
"loss": 0.3449,
"step": 1860
},
{
"epoch": 0.64,
"learning_rate": 8.719782090568608e-06,
"loss": 0.3541,
"step": 1880
},
{
"epoch": 0.65,
"learning_rate": 8.706162751106572e-06,
"loss": 0.3123,
"step": 1900
},
{
"epoch": 0.65,
"learning_rate": 8.692543411644536e-06,
"loss": 0.3103,
"step": 1920
},
{
"epoch": 0.66,
"learning_rate": 8.6789240721825e-06,
"loss": 0.3029,
"step": 1940
},
{
"epoch": 0.67,
"learning_rate": 8.665304732720465e-06,
"loss": 0.3077,
"step": 1960
},
{
"epoch": 0.67,
"learning_rate": 8.651685393258428e-06,
"loss": 0.3612,
"step": 1980
},
{
"epoch": 0.68,
"learning_rate": 8.63806605379639e-06,
"loss": 0.3456,
"step": 2000
},
{
"epoch": 0.68,
"eval_loss": 0.4863223433494568,
"eval_roc_auc": 0.7782025486250839,
"eval_runtime": 226.9387,
"eval_samples_per_second": 65.701,
"eval_steps_per_second": 8.214,
"step": 2000
},
{
"epoch": 0.69,
"learning_rate": 8.624446714334356e-06,
"loss": 0.308,
"step": 2020
},
{
"epoch": 0.69,
"learning_rate": 8.61082737487232e-06,
"loss": 0.314,
"step": 2040
},
{
"epoch": 0.7,
"learning_rate": 8.597208035410283e-06,
"loss": 0.3102,
"step": 2060
},
{
"epoch": 0.71,
"learning_rate": 8.583588695948247e-06,
"loss": 0.3176,
"step": 2080
},
{
"epoch": 0.72,
"learning_rate": 8.56996935648621e-06,
"loss": 0.3541,
"step": 2100
},
{
"epoch": 0.72,
"learning_rate": 8.556350017024175e-06,
"loss": 0.3057,
"step": 2120
},
{
"epoch": 0.73,
"learning_rate": 8.542730677562138e-06,
"loss": 0.2985,
"step": 2140
},
{
"epoch": 0.74,
"learning_rate": 8.529111338100104e-06,
"loss": 0.2799,
"step": 2160
},
{
"epoch": 0.74,
"learning_rate": 8.515491998638067e-06,
"loss": 0.3107,
"step": 2180
},
{
"epoch": 0.75,
"learning_rate": 8.501872659176031e-06,
"loss": 0.3512,
"step": 2200
},
{
"epoch": 0.76,
"learning_rate": 8.488253319713995e-06,
"loss": 0.311,
"step": 2220
},
{
"epoch": 0.76,
"learning_rate": 8.474633980251959e-06,
"loss": 0.3146,
"step": 2240
},
{
"epoch": 0.77,
"learning_rate": 8.461014640789922e-06,
"loss": 0.3202,
"step": 2260
},
{
"epoch": 0.78,
"learning_rate": 8.447395301327886e-06,
"loss": 0.3073,
"step": 2280
},
{
"epoch": 0.78,
"learning_rate": 8.43377596186585e-06,
"loss": 0.3144,
"step": 2300
},
{
"epoch": 0.79,
"learning_rate": 8.420156622403815e-06,
"loss": 0.3132,
"step": 2320
},
{
"epoch": 0.8,
"learning_rate": 8.406537282941777e-06,
"loss": 0.3162,
"step": 2340
},
{
"epoch": 0.8,
"learning_rate": 8.392917943479741e-06,
"loss": 0.3047,
"step": 2360
},
{
"epoch": 0.81,
"learning_rate": 8.379298604017706e-06,
"loss": 0.3362,
"step": 2380
},
{
"epoch": 0.82,
"learning_rate": 8.36567926455567e-06,
"loss": 0.3045,
"step": 2400
},
{
"epoch": 0.82,
"learning_rate": 8.352059925093634e-06,
"loss": 0.301,
"step": 2420
},
{
"epoch": 0.83,
"learning_rate": 8.338440585631598e-06,
"loss": 0.3196,
"step": 2440
},
{
"epoch": 0.84,
"learning_rate": 8.324821246169561e-06,
"loss": 0.3542,
"step": 2460
},
{
"epoch": 0.84,
"learning_rate": 8.311201906707525e-06,
"loss": 0.3279,
"step": 2480
},
{
"epoch": 0.85,
"learning_rate": 8.297582567245489e-06,
"loss": 0.3157,
"step": 2500
},
{
"epoch": 0.86,
"learning_rate": 8.283963227783454e-06,
"loss": 0.2519,
"step": 2520
},
{
"epoch": 0.86,
"learning_rate": 8.270343888321418e-06,
"loss": 0.3058,
"step": 2540
},
{
"epoch": 0.87,
"learning_rate": 8.25672454885938e-06,
"loss": 0.2764,
"step": 2560
},
{
"epoch": 0.88,
"learning_rate": 8.243105209397345e-06,
"loss": 0.2877,
"step": 2580
},
{
"epoch": 0.89,
"learning_rate": 8.229485869935309e-06,
"loss": 0.3564,
"step": 2600
},
{
"epoch": 0.89,
"learning_rate": 8.215866530473273e-06,
"loss": 0.2914,
"step": 2620
},
{
"epoch": 0.9,
"learning_rate": 8.202247191011237e-06,
"loss": 0.2847,
"step": 2640
},
{
"epoch": 0.91,
"learning_rate": 8.1886278515492e-06,
"loss": 0.3405,
"step": 2660
},
{
"epoch": 0.91,
"learning_rate": 8.175008512087164e-06,
"loss": 0.3281,
"step": 2680
},
{
"epoch": 0.92,
"learning_rate": 8.161389172625128e-06,
"loss": 0.263,
"step": 2700
},
{
"epoch": 0.93,
"learning_rate": 8.147769833163093e-06,
"loss": 0.2724,
"step": 2720
},
{
"epoch": 0.93,
"learning_rate": 8.134150493701057e-06,
"loss": 0.342,
"step": 2740
},
{
"epoch": 0.94,
"learning_rate": 8.120531154239019e-06,
"loss": 0.3156,
"step": 2760
},
{
"epoch": 0.95,
"learning_rate": 8.106911814776984e-06,
"loss": 0.3101,
"step": 2780
},
{
"epoch": 0.95,
"learning_rate": 8.093292475314948e-06,
"loss": 0.3083,
"step": 2800
},
{
"epoch": 0.96,
"learning_rate": 8.079673135852912e-06,
"loss": 0.304,
"step": 2820
},
{
"epoch": 0.97,
"learning_rate": 8.066053796390875e-06,
"loss": 0.3087,
"step": 2840
},
{
"epoch": 0.97,
"learning_rate": 8.05243445692884e-06,
"loss": 0.3681,
"step": 2860
},
{
"epoch": 0.98,
"learning_rate": 8.038815117466805e-06,
"loss": 0.2919,
"step": 2880
},
{
"epoch": 0.99,
"learning_rate": 8.025195778004767e-06,
"loss": 0.3218,
"step": 2900
},
{
"epoch": 0.99,
"learning_rate": 8.01157643854273e-06,
"loss": 0.3012,
"step": 2920
},
{
"epoch": 1.0,
"learning_rate": 7.997957099080696e-06,
"loss": 0.2952,
"step": 2940
},
{
"epoch": 1.01,
"learning_rate": 7.98433775961866e-06,
"loss": 0.2597,
"step": 2960
},
{
"epoch": 1.01,
"learning_rate": 7.970718420156623e-06,
"loss": 0.2836,
"step": 2980
},
{
"epoch": 1.02,
"learning_rate": 7.957099080694587e-06,
"loss": 0.2831,
"step": 3000
},
{
"epoch": 1.02,
"eval_loss": 0.48173409700393677,
"eval_roc_auc": 0.7896713615023474,
"eval_runtime": 226.5392,
"eval_samples_per_second": 65.816,
"eval_steps_per_second": 8.228,
"step": 3000
},
{
"epoch": 1.03,
"learning_rate": 7.94347974123255e-06,
"loss": 0.2553,
"step": 3020
},
{
"epoch": 1.04,
"learning_rate": 7.929860401770514e-06,
"loss": 0.2786,
"step": 3040
},
{
"epoch": 1.04,
"learning_rate": 7.916241062308478e-06,
"loss": 0.2636,
"step": 3060
},
{
"epoch": 1.05,
"learning_rate": 7.902621722846444e-06,
"loss": 0.2512,
"step": 3080
},
{
"epoch": 1.06,
"learning_rate": 7.889002383384407e-06,
"loss": 0.2755,
"step": 3100
},
{
"epoch": 1.06,
"learning_rate": 7.87538304392237e-06,
"loss": 0.2738,
"step": 3120
},
{
"epoch": 1.07,
"learning_rate": 7.861763704460335e-06,
"loss": 0.2653,
"step": 3140
},
{
"epoch": 1.08,
"learning_rate": 7.848144364998298e-06,
"loss": 0.2944,
"step": 3160
},
{
"epoch": 1.08,
"learning_rate": 7.834525025536262e-06,
"loss": 0.2624,
"step": 3180
},
{
"epoch": 1.09,
"learning_rate": 7.820905686074226e-06,
"loss": 0.2323,
"step": 3200
},
{
"epoch": 1.1,
"learning_rate": 7.80728634661219e-06,
"loss": 0.2742,
"step": 3220
},
{
"epoch": 1.1,
"learning_rate": 7.793667007150153e-06,
"loss": 0.2747,
"step": 3240
},
{
"epoch": 1.11,
"learning_rate": 7.780047667688117e-06,
"loss": 0.2377,
"step": 3260
},
{
"epoch": 1.12,
"learning_rate": 7.766428328226083e-06,
"loss": 0.2602,
"step": 3280
},
{
"epoch": 1.12,
"learning_rate": 7.752808988764046e-06,
"loss": 0.2795,
"step": 3300
},
{
"epoch": 1.13,
"learning_rate": 7.739189649302008e-06,
"loss": 0.2344,
"step": 3320
},
{
"epoch": 1.14,
"learning_rate": 7.725570309839974e-06,
"loss": 0.2519,
"step": 3340
},
{
"epoch": 1.14,
"learning_rate": 7.711950970377937e-06,
"loss": 0.2342,
"step": 3360
},
{
"epoch": 1.15,
"learning_rate": 7.698331630915901e-06,
"loss": 0.2442,
"step": 3380
},
{
"epoch": 1.16,
"learning_rate": 7.684712291453865e-06,
"loss": 0.2626,
"step": 3400
},
{
"epoch": 1.16,
"learning_rate": 7.671092951991829e-06,
"loss": 0.2381,
"step": 3420
},
{
"epoch": 1.17,
"learning_rate": 7.657473612529794e-06,
"loss": 0.2341,
"step": 3440
},
{
"epoch": 1.18,
"learning_rate": 7.643854273067756e-06,
"loss": 0.2994,
"step": 3460
},
{
"epoch": 1.18,
"learning_rate": 7.63023493360572e-06,
"loss": 0.2759,
"step": 3480
},
{
"epoch": 1.19,
"learning_rate": 7.616615594143684e-06,
"loss": 0.2927,
"step": 3500
},
{
"epoch": 1.2,
"learning_rate": 7.602996254681648e-06,
"loss": 0.2327,
"step": 3520
},
{
"epoch": 1.21,
"learning_rate": 7.589376915219613e-06,
"loss": 0.3248,
"step": 3540
},
{
"epoch": 1.21,
"learning_rate": 7.5757575757575764e-06,
"loss": 0.2836,
"step": 3560
},
{
"epoch": 1.22,
"learning_rate": 7.56213823629554e-06,
"loss": 0.2545,
"step": 3580
},
{
"epoch": 1.23,
"learning_rate": 7.548518896833505e-06,
"loss": 0.2596,
"step": 3600
},
{
"epoch": 1.23,
"learning_rate": 7.534899557371468e-06,
"loss": 0.2308,
"step": 3620
},
{
"epoch": 1.24,
"learning_rate": 7.521280217909432e-06,
"loss": 0.2462,
"step": 3640
},
{
"epoch": 1.25,
"learning_rate": 7.507660878447396e-06,
"loss": 0.2763,
"step": 3660
},
{
"epoch": 1.25,
"learning_rate": 7.49404153898536e-06,
"loss": 0.271,
"step": 3680
},
{
"epoch": 1.26,
"learning_rate": 7.480422199523324e-06,
"loss": 0.2717,
"step": 3700
},
{
"epoch": 1.27,
"learning_rate": 7.466802860061287e-06,
"loss": 0.2995,
"step": 3720
},
{
"epoch": 1.27,
"learning_rate": 7.453183520599252e-06,
"loss": 0.2812,
"step": 3740
},
{
"epoch": 1.28,
"learning_rate": 7.439564181137215e-06,
"loss": 0.2863,
"step": 3760
},
{
"epoch": 1.29,
"learning_rate": 7.425944841675179e-06,
"loss": 0.2468,
"step": 3780
},
{
"epoch": 1.29,
"learning_rate": 7.412325502213144e-06,
"loss": 0.2542,
"step": 3800
},
{
"epoch": 1.3,
"learning_rate": 7.3987061627511066e-06,
"loss": 0.2439,
"step": 3820
},
{
"epoch": 1.31,
"learning_rate": 7.385086823289071e-06,
"loss": 0.2923,
"step": 3840
},
{
"epoch": 1.31,
"learning_rate": 7.371467483827035e-06,
"loss": 0.2809,
"step": 3860
},
{
"epoch": 1.32,
"learning_rate": 7.357848144364999e-06,
"loss": 0.2601,
"step": 3880
},
{
"epoch": 1.33,
"learning_rate": 7.344228804902963e-06,
"loss": 0.2481,
"step": 3900
},
{
"epoch": 1.33,
"learning_rate": 7.330609465440927e-06,
"loss": 0.2886,
"step": 3920
},
{
"epoch": 1.34,
"learning_rate": 7.3169901259788915e-06,
"loss": 0.2546,
"step": 3940
},
{
"epoch": 1.35,
"learning_rate": 7.303370786516854e-06,
"loss": 0.2393,
"step": 3960
},
{
"epoch": 1.36,
"learning_rate": 7.289751447054818e-06,
"loss": 0.2839,
"step": 3980
},
{
"epoch": 1.36,
"learning_rate": 7.276132107592783e-06,
"loss": 0.2781,
"step": 4000
},
{
"epoch": 1.36,
"eval_loss": 0.5417753458023071,
"eval_roc_auc": 0.7655935613682091,
"eval_runtime": 226.2172,
"eval_samples_per_second": 65.91,
"eval_steps_per_second": 8.24,
"step": 4000
},
{
"epoch": 1.37,
"learning_rate": 7.262512768130746e-06,
"loss": 0.2696,
"step": 4020
},
{
"epoch": 1.38,
"learning_rate": 7.248893428668711e-06,
"loss": 0.2458,
"step": 4040
},
{
"epoch": 1.38,
"learning_rate": 7.235274089206674e-06,
"loss": 0.269,
"step": 4060
},
{
"epoch": 1.39,
"learning_rate": 7.2216547497446376e-06,
"loss": 0.2798,
"step": 4080
},
{
"epoch": 1.4,
"learning_rate": 7.208035410282602e-06,
"loss": 0.2501,
"step": 4100
},
{
"epoch": 1.4,
"learning_rate": 7.194416070820566e-06,
"loss": 0.2655,
"step": 4120
},
{
"epoch": 1.41,
"learning_rate": 7.180796731358529e-06,
"loss": 0.2207,
"step": 4140
},
{
"epoch": 1.42,
"learning_rate": 7.167177391896494e-06,
"loss": 0.2381,
"step": 4160
},
{
"epoch": 1.42,
"learning_rate": 7.153558052434457e-06,
"loss": 0.2929,
"step": 4180
},
{
"epoch": 1.43,
"learning_rate": 7.139938712972422e-06,
"loss": 0.2455,
"step": 4200
},
{
"epoch": 1.44,
"learning_rate": 7.126319373510385e-06,
"loss": 0.2443,
"step": 4220
},
{
"epoch": 1.44,
"learning_rate": 7.112700034048349e-06,
"loss": 0.2439,
"step": 4240
},
{
"epoch": 1.45,
"learning_rate": 7.099080694586314e-06,
"loss": 0.2799,
"step": 4260
},
{
"epoch": 1.46,
"learning_rate": 7.0854613551242765e-06,
"loss": 0.2492,
"step": 4280
},
{
"epoch": 1.46,
"learning_rate": 7.071842015662241e-06,
"loss": 0.281,
"step": 4300
},
{
"epoch": 1.47,
"learning_rate": 7.058222676200205e-06,
"loss": 0.2539,
"step": 4320
},
{
"epoch": 1.48,
"learning_rate": 7.0446033367381686e-06,
"loss": 0.2464,
"step": 4340
},
{
"epoch": 1.48,
"learning_rate": 7.030983997276133e-06,
"loss": 0.2929,
"step": 4360
},
{
"epoch": 1.49,
"learning_rate": 7.017364657814096e-06,
"loss": 0.2679,
"step": 4380
},
{
"epoch": 1.5,
"learning_rate": 7.003745318352061e-06,
"loss": 0.2155,
"step": 4400
},
{
"epoch": 1.5,
"learning_rate": 6.990125978890024e-06,
"loss": 0.247,
"step": 4420
},
{
"epoch": 1.51,
"learning_rate": 6.976506639427988e-06,
"loss": 0.2069,
"step": 4440
},
{
"epoch": 1.52,
"learning_rate": 6.962887299965953e-06,
"loss": 0.2544,
"step": 4460
},
{
"epoch": 1.53,
"learning_rate": 6.949267960503916e-06,
"loss": 0.2162,
"step": 4480
},
{
"epoch": 1.53,
"learning_rate": 6.935648621041881e-06,
"loss": 0.252,
"step": 4500
},
{
"epoch": 1.54,
"learning_rate": 6.922029281579844e-06,
"loss": 0.2548,
"step": 4520
},
{
"epoch": 1.55,
"learning_rate": 6.9084099421178075e-06,
"loss": 0.27,
"step": 4540
},
{
"epoch": 1.55,
"learning_rate": 6.894790602655772e-06,
"loss": 0.2314,
"step": 4560
},
{
"epoch": 1.56,
"learning_rate": 6.881171263193736e-06,
"loss": 0.2381,
"step": 4580
},
{
"epoch": 1.57,
"learning_rate": 6.8675519237317e-06,
"loss": 0.2739,
"step": 4600
},
{
"epoch": 1.57,
"learning_rate": 6.853932584269663e-06,
"loss": 0.2185,
"step": 4620
},
{
"epoch": 1.58,
"learning_rate": 6.840313244807627e-06,
"loss": 0.2458,
"step": 4640
},
{
"epoch": 1.59,
"learning_rate": 6.826693905345592e-06,
"loss": 0.3119,
"step": 4660
},
{
"epoch": 1.59,
"learning_rate": 6.813074565883555e-06,
"loss": 0.2082,
"step": 4680
},
{
"epoch": 1.6,
"learning_rate": 6.799455226421518e-06,
"loss": 0.2772,
"step": 4700
},
{
"epoch": 1.61,
"learning_rate": 6.785835886959483e-06,
"loss": 0.238,
"step": 4720
},
{
"epoch": 1.61,
"learning_rate": 6.7722165474974465e-06,
"loss": 0.2403,
"step": 4740
},
{
"epoch": 1.62,
"learning_rate": 6.758597208035411e-06,
"loss": 0.2496,
"step": 4760
},
{
"epoch": 1.63,
"learning_rate": 6.744977868573375e-06,
"loss": 0.258,
"step": 4780
},
{
"epoch": 1.63,
"learning_rate": 6.7313585291113385e-06,
"loss": 0.2462,
"step": 4800
},
{
"epoch": 1.64,
"learning_rate": 6.717739189649303e-06,
"loss": 0.3012,
"step": 4820
},
{
"epoch": 1.65,
"learning_rate": 6.704119850187266e-06,
"loss": 0.2331,
"step": 4840
},
{
"epoch": 1.65,
"learning_rate": 6.6905005107252305e-06,
"loss": 0.2477,
"step": 4860
},
{
"epoch": 1.66,
"learning_rate": 6.676881171263194e-06,
"loss": 0.234,
"step": 4880
},
{
"epoch": 1.67,
"learning_rate": 6.663261831801158e-06,
"loss": 0.2434,
"step": 4900
},
{
"epoch": 1.68,
"learning_rate": 6.649642492339123e-06,
"loss": 0.3011,
"step": 4920
},
{
"epoch": 1.68,
"learning_rate": 6.6360231528770855e-06,
"loss": 0.2587,
"step": 4940
},
{
"epoch": 1.69,
"learning_rate": 6.62240381341505e-06,
"loss": 0.2252,
"step": 4960
},
{
"epoch": 1.7,
"learning_rate": 6.608784473953014e-06,
"loss": 0.2443,
"step": 4980
},
{
"epoch": 1.7,
"learning_rate": 6.5951651344909775e-06,
"loss": 0.2355,
"step": 5000
},
{
"epoch": 1.7,
"eval_loss": 0.5398261547088623,
"eval_roc_auc": 0.7786049631120053,
"eval_runtime": 224.1313,
"eval_samples_per_second": 66.524,
"eval_steps_per_second": 8.317,
"step": 5000
},
{
"epoch": 1.71,
"learning_rate": 6.581545795028942e-06,
"loss": 0.2645,
"step": 5020
},
{
"epoch": 1.72,
"learning_rate": 6.567926455566905e-06,
"loss": 0.2447,
"step": 5040
},
{
"epoch": 1.72,
"learning_rate": 6.5543071161048695e-06,
"loss": 0.234,
"step": 5060
},
{
"epoch": 1.73,
"learning_rate": 6.540687776642833e-06,
"loss": 0.2343,
"step": 5080
},
{
"epoch": 1.74,
"learning_rate": 6.527068437180797e-06,
"loss": 0.2592,
"step": 5100
},
{
"epoch": 1.74,
"learning_rate": 6.5134490977187615e-06,
"loss": 0.2689,
"step": 5120
},
{
"epoch": 1.75,
"learning_rate": 6.499829758256725e-06,
"loss": 0.2535,
"step": 5140
},
{
"epoch": 1.76,
"learning_rate": 6.48621041879469e-06,
"loss": 0.2059,
"step": 5160
},
{
"epoch": 1.76,
"learning_rate": 6.472591079332653e-06,
"loss": 0.2646,
"step": 5180
},
{
"epoch": 1.77,
"learning_rate": 6.4589717398706165e-06,
"loss": 0.2203,
"step": 5200
},
{
"epoch": 1.78,
"learning_rate": 6.445352400408581e-06,
"loss": 0.2413,
"step": 5220
},
{
"epoch": 1.78,
"learning_rate": 6.431733060946545e-06,
"loss": 0.2399,
"step": 5240
},
{
"epoch": 1.79,
"learning_rate": 6.418113721484509e-06,
"loss": 0.2313,
"step": 5260
},
{
"epoch": 1.8,
"learning_rate": 6.404494382022472e-06,
"loss": 0.2082,
"step": 5280
},
{
"epoch": 1.8,
"learning_rate": 6.390875042560436e-06,
"loss": 0.2555,
"step": 5300
},
{
"epoch": 1.81,
"learning_rate": 6.3772557030984005e-06,
"loss": 0.2664,
"step": 5320
},
{
"epoch": 1.82,
"learning_rate": 6.363636363636364e-06,
"loss": 0.2367,
"step": 5340
},
{
"epoch": 1.82,
"learning_rate": 6.350017024174327e-06,
"loss": 0.2643,
"step": 5360
},
{
"epoch": 1.83,
"learning_rate": 6.336397684712292e-06,
"loss": 0.1894,
"step": 5380
},
{
"epoch": 1.84,
"learning_rate": 6.322778345250255e-06,
"loss": 0.2024,
"step": 5400
},
{
"epoch": 1.85,
"learning_rate": 6.30915900578822e-06,
"loss": 0.2293,
"step": 5420
},
{
"epoch": 1.85,
"learning_rate": 6.295539666326184e-06,
"loss": 0.2299,
"step": 5440
},
{
"epoch": 1.86,
"learning_rate": 6.2819203268641474e-06,
"loss": 0.2658,
"step": 5460
},
{
"epoch": 1.87,
"learning_rate": 6.268300987402112e-06,
"loss": 0.2577,
"step": 5480
},
{
"epoch": 1.87,
"learning_rate": 6.254681647940075e-06,
"loss": 0.2487,
"step": 5500
},
{
"epoch": 1.88,
"learning_rate": 6.2410623084780395e-06,
"loss": 0.2413,
"step": 5520
},
{
"epoch": 1.89,
"learning_rate": 6.227442969016003e-06,
"loss": 0.2544,
"step": 5540
},
{
"epoch": 1.89,
"learning_rate": 6.213823629553967e-06,
"loss": 0.2531,
"step": 5560
},
{
"epoch": 1.9,
"learning_rate": 6.2002042900919315e-06,
"loss": 0.293,
"step": 5580
},
{
"epoch": 1.91,
"learning_rate": 6.186584950629894e-06,
"loss": 0.2563,
"step": 5600
},
{
"epoch": 1.91,
"learning_rate": 6.172965611167859e-06,
"loss": 0.2549,
"step": 5620
},
{
"epoch": 1.92,
"learning_rate": 6.159346271705823e-06,
"loss": 0.2166,
"step": 5640
},
{
"epoch": 1.93,
"learning_rate": 6.145726932243786e-06,
"loss": 0.2084,
"step": 5660
},
{
"epoch": 1.93,
"learning_rate": 6.132107592781751e-06,
"loss": 0.2599,
"step": 5680
},
{
"epoch": 1.94,
"learning_rate": 6.118488253319715e-06,
"loss": 0.2266,
"step": 5700
},
{
"epoch": 1.95,
"learning_rate": 6.104868913857679e-06,
"loss": 0.2363,
"step": 5720
},
{
"epoch": 1.95,
"learning_rate": 6.091249574395642e-06,
"loss": 0.2586,
"step": 5740
},
{
"epoch": 1.96,
"learning_rate": 6.077630234933606e-06,
"loss": 0.2428,
"step": 5760
},
{
"epoch": 1.97,
"learning_rate": 6.0640108954715705e-06,
"loss": 0.269,
"step": 5780
},
{
"epoch": 1.97,
"learning_rate": 6.050391556009534e-06,
"loss": 0.2265,
"step": 5800
},
{
"epoch": 1.98,
"learning_rate": 6.036772216547499e-06,
"loss": 0.2594,
"step": 5820
},
{
"epoch": 1.99,
"learning_rate": 6.023152877085462e-06,
"loss": 0.2552,
"step": 5840
},
{
"epoch": 2.0,
"learning_rate": 6.009533537623425e-06,
"loss": 0.2279,
"step": 5860
},
{
"epoch": 2.0,
"learning_rate": 5.99591419816139e-06,
"loss": 0.2381,
"step": 5880
},
{
"epoch": 2.01,
"learning_rate": 5.982294858699354e-06,
"loss": 0.2271,
"step": 5900
},
{
"epoch": 2.02,
"learning_rate": 5.968675519237318e-06,
"loss": 0.1785,
"step": 5920
},
{
"epoch": 2.02,
"learning_rate": 5.955056179775281e-06,
"loss": 0.2066,
"step": 5940
},
{
"epoch": 2.03,
"learning_rate": 5.941436840313245e-06,
"loss": 0.219,
"step": 5960
},
{
"epoch": 2.04,
"learning_rate": 5.9278175008512094e-06,
"loss": 0.1896,
"step": 5980
},
{
"epoch": 2.04,
"learning_rate": 5.914198161389173e-06,
"loss": 0.1978,
"step": 6000
},
{
"epoch": 2.04,
"eval_loss": 0.6120939254760742,
"eval_roc_auc": 0.7648558014755198,
"eval_runtime": 225.5238,
"eval_samples_per_second": 66.113,
"eval_steps_per_second": 8.265,
"step": 6000
},
{
"epoch": 2.05,
"learning_rate": 5.900578821927137e-06,
"loss": 0.1811,
"step": 6020
},
{
"epoch": 2.06,
"learning_rate": 5.8869594824651015e-06,
"loss": 0.2201,
"step": 6040
},
{
"epoch": 2.06,
"learning_rate": 5.873340143003064e-06,
"loss": 0.2118,
"step": 6060
},
{
"epoch": 2.07,
"learning_rate": 5.859720803541029e-06,
"loss": 0.1765,
"step": 6080
},
{
"epoch": 2.08,
"learning_rate": 5.846101464078993e-06,
"loss": 0.2063,
"step": 6100
},
{
"epoch": 2.08,
"learning_rate": 5.832482124616956e-06,
"loss": 0.2055,
"step": 6120
},
{
"epoch": 2.09,
"learning_rate": 5.818862785154921e-06,
"loss": 0.1919,
"step": 6140
},
{
"epoch": 2.1,
"learning_rate": 5.805243445692884e-06,
"loss": 0.1999,
"step": 6160
},
{
"epoch": 2.1,
"learning_rate": 5.791624106230848e-06,
"loss": 0.1761,
"step": 6180
},
{
"epoch": 2.11,
"learning_rate": 5.778004766768812e-06,
"loss": 0.1917,
"step": 6200
},
{
"epoch": 2.12,
"learning_rate": 5.764385427306776e-06,
"loss": 0.2086,
"step": 6220
},
{
"epoch": 2.12,
"learning_rate": 5.7507660878447404e-06,
"loss": 0.195,
"step": 6240
},
{
"epoch": 2.13,
"learning_rate": 5.737146748382703e-06,
"loss": 0.2244,
"step": 6260
},
{
"epoch": 2.14,
"learning_rate": 5.723527408920668e-06,
"loss": 0.2411,
"step": 6280
},
{
"epoch": 2.15,
"learning_rate": 5.709908069458632e-06,
"loss": 0.1984,
"step": 6300
},
{
"epoch": 2.15,
"learning_rate": 5.696288729996595e-06,
"loss": 0.1899,
"step": 6320
},
{
"epoch": 2.16,
"learning_rate": 5.68266939053456e-06,
"loss": 0.2241,
"step": 6340
},
{
"epoch": 2.17,
"learning_rate": 5.669050051072524e-06,
"loss": 0.2296,
"step": 6360
},
{
"epoch": 2.17,
"learning_rate": 5.655430711610488e-06,
"loss": 0.2082,
"step": 6380
},
{
"epoch": 2.18,
"learning_rate": 5.641811372148451e-06,
"loss": 0.1981,
"step": 6400
},
{
"epoch": 2.19,
"learning_rate": 5.628192032686415e-06,
"loss": 0.2306,
"step": 6420
},
{
"epoch": 2.19,
"learning_rate": 5.614572693224379e-06,
"loss": 0.1962,
"step": 6440
},
{
"epoch": 2.2,
"learning_rate": 5.600953353762343e-06,
"loss": 0.1773,
"step": 6460
},
{
"epoch": 2.21,
"learning_rate": 5.587334014300308e-06,
"loss": 0.2058,
"step": 6480
},
{
"epoch": 2.21,
"learning_rate": 5.5737146748382706e-06,
"loss": 0.1875,
"step": 6500
},
{
"epoch": 2.22,
"learning_rate": 5.560095335376234e-06,
"loss": 0.208,
"step": 6520
},
{
"epoch": 2.23,
"learning_rate": 5.546475995914199e-06,
"loss": 0.2189,
"step": 6540
},
{
"epoch": 2.23,
"learning_rate": 5.532856656452163e-06,
"loss": 0.1869,
"step": 6560
},
{
"epoch": 2.24,
"learning_rate": 5.5192373169901255e-06,
"loss": 0.1737,
"step": 6580
},
{
"epoch": 2.25,
"learning_rate": 5.50561797752809e-06,
"loss": 0.1807,
"step": 6600
},
{
"epoch": 2.25,
"learning_rate": 5.491998638066054e-06,
"loss": 0.2236,
"step": 6620
},
{
"epoch": 2.26,
"learning_rate": 5.478379298604018e-06,
"loss": 0.2033,
"step": 6640
},
{
"epoch": 2.27,
"learning_rate": 5.464759959141982e-06,
"loss": 0.177,
"step": 6660
},
{
"epoch": 2.27,
"learning_rate": 5.451140619679946e-06,
"loss": 0.1955,
"step": 6680
},
{
"epoch": 2.28,
"learning_rate": 5.43752128021791e-06,
"loss": 0.1657,
"step": 6700
},
{
"epoch": 2.29,
"learning_rate": 5.423901940755873e-06,
"loss": 0.2116,
"step": 6720
},
{
"epoch": 2.29,
"learning_rate": 5.410282601293838e-06,
"loss": 0.1991,
"step": 6740
},
{
"epoch": 2.3,
"learning_rate": 5.3966632618318016e-06,
"loss": 0.2085,
"step": 6760
},
{
"epoch": 2.31,
"learning_rate": 5.383043922369765e-06,
"loss": 0.2189,
"step": 6780
},
{
"epoch": 2.32,
"learning_rate": 5.36942458290773e-06,
"loss": 0.1781,
"step": 6800
},
{
"epoch": 2.32,
"learning_rate": 5.355805243445693e-06,
"loss": 0.192,
"step": 6820
},
{
"epoch": 2.33,
"learning_rate": 5.342185903983657e-06,
"loss": 0.1436,
"step": 6840
},
{
"epoch": 2.34,
"learning_rate": 5.328566564521621e-06,
"loss": 0.2369,
"step": 6860
},
{
"epoch": 2.34,
"learning_rate": 5.314947225059585e-06,
"loss": 0.1851,
"step": 6880
},
{
"epoch": 2.35,
"learning_rate": 5.301327885597549e-06,
"loss": 0.1813,
"step": 6900
},
{
"epoch": 2.36,
"learning_rate": 5.287708546135513e-06,
"loss": 0.1995,
"step": 6920
},
{
"epoch": 2.36,
"learning_rate": 5.274089206673478e-06,
"loss": 0.1771,
"step": 6940
},
{
"epoch": 2.37,
"learning_rate": 5.2604698672114405e-06,
"loss": 0.2012,
"step": 6960
},
{
"epoch": 2.38,
"learning_rate": 5.246850527749404e-06,
"loss": 0.2236,
"step": 6980
},
{
"epoch": 2.38,
"learning_rate": 5.233231188287369e-06,
"loss": 0.149,
"step": 7000
},
{
"epoch": 2.38,
"eval_loss": 0.6401586532592773,
"eval_roc_auc": 0.7706237424547283,
"eval_runtime": 224.5054,
"eval_samples_per_second": 66.413,
"eval_steps_per_second": 8.303,
"step": 7000
},
{
"epoch": 2.39,
"learning_rate": 5.2196118488253326e-06,
"loss": 0.2141,
"step": 7020
},
{
"epoch": 2.4,
"learning_rate": 5.205992509363297e-06,
"loss": 0.1899,
"step": 7040
},
{
"epoch": 2.4,
"learning_rate": 5.19237316990126e-06,
"loss": 0.1737,
"step": 7060
},
{
"epoch": 2.41,
"learning_rate": 5.178753830439224e-06,
"loss": 0.182,
"step": 7080
},
{
"epoch": 2.42,
"learning_rate": 5.165134490977188e-06,
"loss": 0.1781,
"step": 7100
},
{
"epoch": 2.42,
"learning_rate": 5.151515151515152e-06,
"loss": 0.1758,
"step": 7120
},
{
"epoch": 2.43,
"learning_rate": 5.137895812053117e-06,
"loss": 0.2001,
"step": 7140
},
{
"epoch": 2.44,
"learning_rate": 5.1242764725910795e-06,
"loss": 0.1888,
"step": 7160
},
{
"epoch": 2.44,
"learning_rate": 5.110657133129043e-06,
"loss": 0.1871,
"step": 7180
},
{
"epoch": 2.45,
"learning_rate": 5.097037793667008e-06,
"loss": 0.2059,
"step": 7200
},
{
"epoch": 2.46,
"learning_rate": 5.0834184542049715e-06,
"loss": 0.2047,
"step": 7220
},
{
"epoch": 2.47,
"learning_rate": 5.069799114742935e-06,
"loss": 0.1985,
"step": 7240
},
{
"epoch": 2.47,
"learning_rate": 5.0561797752809e-06,
"loss": 0.2316,
"step": 7260
},
{
"epoch": 2.48,
"learning_rate": 5.042560435818863e-06,
"loss": 0.2107,
"step": 7280
},
{
"epoch": 2.49,
"learning_rate": 5.028941096356827e-06,
"loss": 0.1963,
"step": 7300
},
{
"epoch": 2.49,
"learning_rate": 5.015321756894791e-06,
"loss": 0.209,
"step": 7320
},
{
"epoch": 2.5,
"learning_rate": 5.001702417432755e-06,
"loss": 0.2221,
"step": 7340
},
{
"epoch": 2.51,
"learning_rate": 4.988083077970719e-06,
"loss": 0.1636,
"step": 7360
},
{
"epoch": 2.51,
"learning_rate": 4.974463738508682e-06,
"loss": 0.2369,
"step": 7380
},
{
"epoch": 2.52,
"learning_rate": 4.960844399046647e-06,
"loss": 0.2145,
"step": 7400
},
{
"epoch": 2.53,
"learning_rate": 4.9472250595846105e-06,
"loss": 0.1966,
"step": 7420
},
{
"epoch": 2.53,
"learning_rate": 4.933605720122574e-06,
"loss": 0.2195,
"step": 7440
},
{
"epoch": 2.54,
"learning_rate": 4.919986380660539e-06,
"loss": 0.1953,
"step": 7460
},
{
"epoch": 2.55,
"learning_rate": 4.906367041198502e-06,
"loss": 0.1959,
"step": 7480
},
{
"epoch": 2.55,
"learning_rate": 4.892747701736466e-06,
"loss": 0.1888,
"step": 7500
},
{
"epoch": 2.56,
"learning_rate": 4.87912836227443e-06,
"loss": 0.1777,
"step": 7520
},
{
"epoch": 2.57,
"learning_rate": 4.8655090228123945e-06,
"loss": 0.2161,
"step": 7540
},
{
"epoch": 2.57,
"learning_rate": 4.851889683350358e-06,
"loss": 0.2002,
"step": 7560
},
{
"epoch": 2.58,
"learning_rate": 4.838270343888322e-06,
"loss": 0.212,
"step": 7580
},
{
"epoch": 2.59,
"learning_rate": 4.824651004426286e-06,
"loss": 0.2396,
"step": 7600
},
{
"epoch": 2.59,
"learning_rate": 4.8110316649642495e-06,
"loss": 0.161,
"step": 7620
},
{
"epoch": 2.6,
"learning_rate": 4.797412325502214e-06,
"loss": 0.2192,
"step": 7640
},
{
"epoch": 2.61,
"learning_rate": 4.783792986040177e-06,
"loss": 0.1878,
"step": 7660
},
{
"epoch": 2.61,
"learning_rate": 4.7701736465781415e-06,
"loss": 0.2237,
"step": 7680
},
{
"epoch": 2.62,
"learning_rate": 4.756554307116105e-06,
"loss": 0.1654,
"step": 7700
},
{
"epoch": 2.63,
"learning_rate": 4.742934967654069e-06,
"loss": 0.1884,
"step": 7720
},
{
"epoch": 2.64,
"learning_rate": 4.7293156281920335e-06,
"loss": 0.2605,
"step": 7740
},
{
"epoch": 2.64,
"learning_rate": 4.715696288729996e-06,
"loss": 0.1947,
"step": 7760
},
{
"epoch": 2.65,
"learning_rate": 4.702076949267961e-06,
"loss": 0.2241,
"step": 7780
},
{
"epoch": 2.66,
"learning_rate": 4.688457609805925e-06,
"loss": 0.2135,
"step": 7800
},
{
"epoch": 2.66,
"learning_rate": 4.674838270343888e-06,
"loss": 0.1556,
"step": 7820
},
{
"epoch": 2.67,
"learning_rate": 4.661218930881853e-06,
"loss": 0.2006,
"step": 7840
},
{
"epoch": 2.68,
"learning_rate": 4.647599591419817e-06,
"loss": 0.168,
"step": 7860
},
{
"epoch": 2.68,
"learning_rate": 4.6339802519577804e-06,
"loss": 0.1939,
"step": 7880
},
{
"epoch": 2.69,
"learning_rate": 4.620360912495744e-06,
"loss": 0.1877,
"step": 7900
},
{
"epoch": 2.7,
"learning_rate": 4.606741573033709e-06,
"loss": 0.2254,
"step": 7920
},
{
"epoch": 2.7,
"learning_rate": 4.5931222335716725e-06,
"loss": 0.1725,
"step": 7940
},
{
"epoch": 2.71,
"learning_rate": 4.579502894109636e-06,
"loss": 0.2169,
"step": 7960
},
{
"epoch": 2.72,
"learning_rate": 4.5658835546476e-06,
"loss": 0.1552,
"step": 7980
},
{
"epoch": 2.72,
"learning_rate": 4.552264215185564e-06,
"loss": 0.1766,
"step": 8000
},
{
"epoch": 2.72,
"eval_loss": 0.6767948865890503,
"eval_roc_auc": 0.7610328638497652,
"eval_runtime": 224.0971,
"eval_samples_per_second": 66.534,
"eval_steps_per_second": 8.318,
"step": 8000
},
{
"epoch": 2.73,
"learning_rate": 4.538644875723528e-06,
"loss": 0.1792,
"step": 8020
},
{
"epoch": 2.74,
"learning_rate": 4.525025536261491e-06,
"loss": 0.2031,
"step": 8040
},
{
"epoch": 2.74,
"learning_rate": 4.511406196799456e-06,
"loss": 0.1644,
"step": 8060
},
{
"epoch": 2.75,
"learning_rate": 4.497786857337419e-06,
"loss": 0.1895,
"step": 8080
},
{
"epoch": 2.76,
"learning_rate": 4.484167517875383e-06,
"loss": 0.1915,
"step": 8100
},
{
"epoch": 2.76,
"learning_rate": 4.470548178413348e-06,
"loss": 0.1828,
"step": 8120
},
{
"epoch": 2.77,
"learning_rate": 4.456928838951311e-06,
"loss": 0.2172,
"step": 8140
},
{
"epoch": 2.78,
"learning_rate": 4.443309499489275e-06,
"loss": 0.1817,
"step": 8160
},
{
"epoch": 2.79,
"learning_rate": 4.429690160027239e-06,
"loss": 0.1833,
"step": 8180
},
{
"epoch": 2.79,
"learning_rate": 4.4160708205652035e-06,
"loss": 0.1648,
"step": 8200
},
{
"epoch": 2.8,
"learning_rate": 4.402451481103167e-06,
"loss": 0.1905,
"step": 8220
},
{
"epoch": 2.81,
"learning_rate": 4.388832141641131e-06,
"loss": 0.1964,
"step": 8240
},
{
"epoch": 2.81,
"learning_rate": 4.375212802179095e-06,
"loss": 0.2237,
"step": 8260
},
{
"epoch": 2.82,
"learning_rate": 4.361593462717058e-06,
"loss": 0.1929,
"step": 8280
},
{
"epoch": 2.83,
"learning_rate": 4.347974123255023e-06,
"loss": 0.2196,
"step": 8300
},
{
"epoch": 2.83,
"learning_rate": 4.334354783792986e-06,
"loss": 0.1876,
"step": 8320
},
{
"epoch": 2.84,
"learning_rate": 4.32073544433095e-06,
"loss": 0.2213,
"step": 8340
},
{
"epoch": 2.85,
"learning_rate": 4.307116104868914e-06,
"loss": 0.2043,
"step": 8360
},
{
"epoch": 2.85,
"learning_rate": 4.293496765406878e-06,
"loss": 0.1865,
"step": 8380
},
{
"epoch": 2.86,
"learning_rate": 4.2798774259448424e-06,
"loss": 0.2067,
"step": 8400
},
{
"epoch": 2.87,
"learning_rate": 4.266258086482805e-06,
"loss": 0.1983,
"step": 8420
},
{
"epoch": 2.87,
"learning_rate": 4.25263874702077e-06,
"loss": 0.1724,
"step": 8440
},
{
"epoch": 2.88,
"learning_rate": 4.239019407558734e-06,
"loss": 0.2027,
"step": 8460
},
{
"epoch": 2.89,
"learning_rate": 4.225400068096698e-06,
"loss": 0.1854,
"step": 8480
},
{
"epoch": 2.89,
"learning_rate": 4.211780728634662e-06,
"loss": 0.1886,
"step": 8500
},
{
"epoch": 2.9,
"learning_rate": 4.198161389172626e-06,
"loss": 0.1882,
"step": 8520
},
{
"epoch": 2.91,
"learning_rate": 4.184542049710589e-06,
"loss": 0.221,
"step": 8540
},
{
"epoch": 2.91,
"learning_rate": 4.170922710248553e-06,
"loss": 0.1875,
"step": 8560
},
{
"epoch": 2.92,
"learning_rate": 4.157303370786518e-06,
"loss": 0.162,
"step": 8580
},
{
"epoch": 2.93,
"learning_rate": 4.1436840313244805e-06,
"loss": 0.1633,
"step": 8600
},
{
"epoch": 2.93,
"learning_rate": 4.130064691862445e-06,
"loss": 0.2134,
"step": 8620
},
{
"epoch": 2.94,
"learning_rate": 4.116445352400409e-06,
"loss": 0.2259,
"step": 8640
},
{
"epoch": 2.95,
"learning_rate": 4.102826012938373e-06,
"loss": 0.1672,
"step": 8660
},
{
"epoch": 2.96,
"learning_rate": 4.089206673476337e-06,
"loss": 0.1891,
"step": 8680
},
{
"epoch": 2.96,
"learning_rate": 4.0755873340143e-06,
"loss": 0.1633,
"step": 8700
},
{
"epoch": 2.97,
"learning_rate": 4.061967994552265e-06,
"loss": 0.192,
"step": 8720
},
{
"epoch": 2.98,
"learning_rate": 4.048348655090228e-06,
"loss": 0.1875,
"step": 8740
},
{
"epoch": 2.98,
"learning_rate": 4.034729315628192e-06,
"loss": 0.1698,
"step": 8760
},
{
"epoch": 2.99,
"learning_rate": 4.021109976166157e-06,
"loss": 0.1905,
"step": 8780
},
{
"epoch": 3.0,
"learning_rate": 4.00749063670412e-06,
"loss": 0.1735,
"step": 8800
},
{
"epoch": 3.0,
"learning_rate": 3.993871297242084e-06,
"loss": 0.1437,
"step": 8820
},
{
"epoch": 3.01,
"learning_rate": 3.980251957780048e-06,
"loss": 0.1843,
"step": 8840
},
{
"epoch": 3.02,
"learning_rate": 3.966632618318012e-06,
"loss": 0.1405,
"step": 8860
},
{
"epoch": 3.02,
"learning_rate": 3.953013278855976e-06,
"loss": 0.1279,
"step": 8880
},
{
"epoch": 3.03,
"learning_rate": 3.93939393939394e-06,
"loss": 0.1407,
"step": 8900
},
{
"epoch": 3.04,
"learning_rate": 3.9257745999319036e-06,
"loss": 0.1751,
"step": 8920
},
{
"epoch": 3.04,
"learning_rate": 3.912155260469867e-06,
"loss": 0.1507,
"step": 8940
},
{
"epoch": 3.05,
"learning_rate": 3.898535921007832e-06,
"loss": 0.1636,
"step": 8960
},
{
"epoch": 3.06,
"learning_rate": 3.884916581545795e-06,
"loss": 0.1631,
"step": 8980
},
{
"epoch": 3.06,
"learning_rate": 3.871297242083759e-06,
"loss": 0.1496,
"step": 9000
},
{
"epoch": 3.06,
"eval_loss": 0.6239368915557861,
"eval_roc_auc": 0.7733065057008719,
"eval_runtime": 227.7765,
"eval_samples_per_second": 65.459,
"eval_steps_per_second": 8.183,
"step": 9000
},
{
"epoch": 3.07,
"learning_rate": 3.857677902621723e-06,
"loss": 0.1598,
"step": 9020
},
{
"epoch": 3.08,
"learning_rate": 3.844058563159687e-06,
"loss": 0.1247,
"step": 9040
},
{
"epoch": 3.08,
"learning_rate": 3.830439223697651e-06,
"loss": 0.1712,
"step": 9060
},
{
"epoch": 3.09,
"learning_rate": 3.816819884235615e-06,
"loss": 0.1623,
"step": 9080
},
{
"epoch": 3.1,
"learning_rate": 3.803200544773579e-06,
"loss": 0.1359,
"step": 9100
},
{
"epoch": 3.11,
"learning_rate": 3.7895812053115425e-06,
"loss": 0.1476,
"step": 9120
},
{
"epoch": 3.11,
"learning_rate": 3.7759618658495067e-06,
"loss": 0.1505,
"step": 9140
},
{
"epoch": 3.12,
"learning_rate": 3.762342526387471e-06,
"loss": 0.1779,
"step": 9160
},
{
"epoch": 3.13,
"learning_rate": 3.748723186925434e-06,
"loss": 0.1288,
"step": 9180
},
{
"epoch": 3.13,
"learning_rate": 3.7351038474633983e-06,
"loss": 0.1545,
"step": 9200
},
{
"epoch": 3.14,
"learning_rate": 3.7214845080013624e-06,
"loss": 0.1543,
"step": 9220
},
{
"epoch": 3.15,
"learning_rate": 3.707865168539326e-06,
"loss": 0.1486,
"step": 9240
},
{
"epoch": 3.15,
"learning_rate": 3.69424582907729e-06,
"loss": 0.1456,
"step": 9260
},
{
"epoch": 3.16,
"learning_rate": 3.6806264896152536e-06,
"loss": 0.1582,
"step": 9280
},
{
"epoch": 3.17,
"learning_rate": 3.6670071501532178e-06,
"loss": 0.1314,
"step": 9300
},
{
"epoch": 3.17,
"learning_rate": 3.653387810691182e-06,
"loss": 0.1784,
"step": 9320
},
{
"epoch": 3.18,
"learning_rate": 3.639768471229146e-06,
"loss": 0.1799,
"step": 9340
},
{
"epoch": 3.19,
"learning_rate": 3.6261491317671094e-06,
"loss": 0.1634,
"step": 9360
},
{
"epoch": 3.19,
"learning_rate": 3.6125297923050735e-06,
"loss": 0.1668,
"step": 9380
},
{
"epoch": 3.2,
"learning_rate": 3.5989104528430373e-06,
"loss": 0.1717,
"step": 9400
},
{
"epoch": 3.21,
"learning_rate": 3.5852911133810014e-06,
"loss": 0.1076,
"step": 9420
},
{
"epoch": 3.21,
"learning_rate": 3.5716717739189656e-06,
"loss": 0.1087,
"step": 9440
},
{
"epoch": 3.22,
"learning_rate": 3.558052434456929e-06,
"loss": 0.158,
"step": 9460
},
{
"epoch": 3.23,
"learning_rate": 3.544433094994893e-06,
"loss": 0.1229,
"step": 9480
},
{
"epoch": 3.23,
"learning_rate": 3.530813755532857e-06,
"loss": 0.1428,
"step": 9500
},
{
"epoch": 3.24,
"learning_rate": 3.517194416070821e-06,
"loss": 0.1513,
"step": 9520
},
{
"epoch": 3.25,
"learning_rate": 3.5035750766087846e-06,
"loss": 0.1302,
"step": 9540
},
{
"epoch": 3.26,
"learning_rate": 3.4899557371467483e-06,
"loss": 0.1416,
"step": 9560
},
{
"epoch": 3.26,
"learning_rate": 3.4763363976847125e-06,
"loss": 0.1926,
"step": 9580
},
{
"epoch": 3.27,
"learning_rate": 3.4627170582226766e-06,
"loss": 0.1431,
"step": 9600
},
{
"epoch": 3.28,
"learning_rate": 3.449097718760641e-06,
"loss": 0.1235,
"step": 9620
},
{
"epoch": 3.28,
"learning_rate": 3.435478379298604e-06,
"loss": 0.1897,
"step": 9640
},
{
"epoch": 3.29,
"learning_rate": 3.4218590398365683e-06,
"loss": 0.1464,
"step": 9660
},
{
"epoch": 3.3,
"learning_rate": 3.408239700374532e-06,
"loss": 0.1889,
"step": 9680
},
{
"epoch": 3.3,
"learning_rate": 3.394620360912496e-06,
"loss": 0.1379,
"step": 9700
},
{
"epoch": 3.31,
"learning_rate": 3.3810010214504603e-06,
"loss": 0.1677,
"step": 9720
},
{
"epoch": 3.32,
"learning_rate": 3.3673816819884236e-06,
"loss": 0.1461,
"step": 9740
},
{
"epoch": 3.32,
"learning_rate": 3.3537623425263877e-06,
"loss": 0.1661,
"step": 9760
},
{
"epoch": 3.33,
"learning_rate": 3.340143003064352e-06,
"loss": 0.1571,
"step": 9780
},
{
"epoch": 3.34,
"learning_rate": 3.3265236636023156e-06,
"loss": 0.157,
"step": 9800
},
{
"epoch": 3.34,
"learning_rate": 3.3129043241402798e-06,
"loss": 0.1774,
"step": 9820
},
{
"epoch": 3.35,
"learning_rate": 3.299284984678243e-06,
"loss": 0.1463,
"step": 9840
},
{
"epoch": 3.36,
"learning_rate": 3.2856656452162072e-06,
"loss": 0.1568,
"step": 9860
},
{
"epoch": 3.36,
"learning_rate": 3.2720463057541714e-06,
"loss": 0.1165,
"step": 9880
},
{
"epoch": 3.37,
"learning_rate": 3.258426966292135e-06,
"loss": 0.1414,
"step": 9900
},
{
"epoch": 3.38,
"learning_rate": 3.244807626830099e-06,
"loss": 0.133,
"step": 9920
},
{
"epoch": 3.38,
"learning_rate": 3.231188287368063e-06,
"loss": 0.1348,
"step": 9940
},
{
"epoch": 3.39,
"learning_rate": 3.2175689479060267e-06,
"loss": 0.1444,
"step": 9960
},
{
"epoch": 3.4,
"learning_rate": 3.203949608443991e-06,
"loss": 0.1566,
"step": 9980
},
{
"epoch": 3.4,
"learning_rate": 3.190330268981955e-06,
"loss": 0.155,
"step": 10000
},
{
"epoch": 3.4,
"eval_loss": 0.7332798838615417,
"eval_roc_auc": 0.7601609657947686,
"eval_runtime": 222.7036,
"eval_samples_per_second": 66.95,
"eval_steps_per_second": 8.37,
"step": 10000
},
{
"epoch": 3.41,
"learning_rate": 3.1767109295199183e-06,
"loss": 0.1454,
"step": 10020
},
{
"epoch": 3.42,
"learning_rate": 3.1630915900578825e-06,
"loss": 0.1361,
"step": 10040
},
{
"epoch": 3.43,
"learning_rate": 3.149472250595846e-06,
"loss": 0.153,
"step": 10060
},
{
"epoch": 3.43,
"learning_rate": 3.1358529111338103e-06,
"loss": 0.1855,
"step": 10080
},
{
"epoch": 3.44,
"learning_rate": 3.1222335716717745e-06,
"loss": 0.1164,
"step": 10100
},
{
"epoch": 3.45,
"learning_rate": 3.1086142322097378e-06,
"loss": 0.1846,
"step": 10120
},
{
"epoch": 3.45,
"learning_rate": 3.094994892747702e-06,
"loss": 0.1596,
"step": 10140
},
{
"epoch": 3.46,
"learning_rate": 3.081375553285666e-06,
"loss": 0.1693,
"step": 10160
},
{
"epoch": 3.47,
"learning_rate": 3.06775621382363e-06,
"loss": 0.1359,
"step": 10180
},
{
"epoch": 3.47,
"learning_rate": 3.0541368743615935e-06,
"loss": 0.17,
"step": 10200
},
{
"epoch": 3.48,
"learning_rate": 3.0405175348995573e-06,
"loss": 0.1185,
"step": 10220
},
{
"epoch": 3.49,
"learning_rate": 3.0268981954375214e-06,
"loss": 0.111,
"step": 10240
},
{
"epoch": 3.49,
"learning_rate": 3.0132788559754856e-06,
"loss": 0.1632,
"step": 10260
},
{
"epoch": 3.5,
"learning_rate": 2.9996595165134497e-06,
"loss": 0.1217,
"step": 10280
},
{
"epoch": 3.51,
"learning_rate": 2.986040177051413e-06,
"loss": 0.1515,
"step": 10300
},
{
"epoch": 3.51,
"learning_rate": 2.972420837589377e-06,
"loss": 0.1249,
"step": 10320
},
{
"epoch": 3.52,
"learning_rate": 2.958801498127341e-06,
"loss": 0.1566,
"step": 10340
},
{
"epoch": 3.53,
"learning_rate": 2.945182158665305e-06,
"loss": 0.1707,
"step": 10360
},
{
"epoch": 3.53,
"learning_rate": 2.931562819203269e-06,
"loss": 0.1395,
"step": 10380
},
{
"epoch": 3.54,
"learning_rate": 2.9179434797412325e-06,
"loss": 0.1631,
"step": 10400
},
{
"epoch": 3.55,
"learning_rate": 2.9043241402791967e-06,
"loss": 0.1359,
"step": 10420
},
{
"epoch": 3.55,
"learning_rate": 2.890704800817161e-06,
"loss": 0.1413,
"step": 10440
},
{
"epoch": 3.56,
"learning_rate": 2.8770854613551245e-06,
"loss": 0.1415,
"step": 10460
},
{
"epoch": 3.57,
"learning_rate": 2.8634661218930883e-06,
"loss": 0.1439,
"step": 10480
},
{
"epoch": 3.58,
"learning_rate": 2.849846782431052e-06,
"loss": 0.1997,
"step": 10500
},
{
"epoch": 3.58,
"learning_rate": 2.836227442969016e-06,
"loss": 0.1727,
"step": 10520
},
{
"epoch": 3.59,
"learning_rate": 2.8226081035069803e-06,
"loss": 0.1468,
"step": 10540
},
{
"epoch": 3.6,
"learning_rate": 2.8089887640449444e-06,
"loss": 0.1451,
"step": 10560
},
{
"epoch": 3.6,
"learning_rate": 2.7953694245829077e-06,
"loss": 0.1466,
"step": 10580
},
{
"epoch": 3.61,
"learning_rate": 2.781750085120872e-06,
"loss": 0.1434,
"step": 10600
},
{
"epoch": 3.62,
"learning_rate": 2.7681307456588356e-06,
"loss": 0.1302,
"step": 10620
},
{
"epoch": 3.62,
"learning_rate": 2.7545114061967998e-06,
"loss": 0.1318,
"step": 10640
},
{
"epoch": 3.63,
"learning_rate": 2.740892066734764e-06,
"loss": 0.1516,
"step": 10660
},
{
"epoch": 3.64,
"learning_rate": 2.7272727272727272e-06,
"loss": 0.1508,
"step": 10680
},
{
"epoch": 3.64,
"learning_rate": 2.7136533878106914e-06,
"loss": 0.1838,
"step": 10700
},
{
"epoch": 3.65,
"learning_rate": 2.7000340483486555e-06,
"loss": 0.1497,
"step": 10720
},
{
"epoch": 3.66,
"learning_rate": 2.6864147088866193e-06,
"loss": 0.1694,
"step": 10740
},
{
"epoch": 3.66,
"learning_rate": 2.6727953694245834e-06,
"loss": 0.1789,
"step": 10760
},
{
"epoch": 3.67,
"learning_rate": 2.6591760299625467e-06,
"loss": 0.1369,
"step": 10780
},
{
"epoch": 3.68,
"learning_rate": 2.645556690500511e-06,
"loss": 0.1195,
"step": 10800
},
{
"epoch": 3.68,
"learning_rate": 2.631937351038475e-06,
"loss": 0.1486,
"step": 10820
},
{
"epoch": 3.69,
"learning_rate": 2.6183180115764387e-06,
"loss": 0.1374,
"step": 10840
},
{
"epoch": 3.7,
"learning_rate": 2.6046986721144025e-06,
"loss": 0.1454,
"step": 10860
},
{
"epoch": 3.7,
"learning_rate": 2.5910793326523666e-06,
"loss": 0.1969,
"step": 10880
},
{
"epoch": 3.71,
"learning_rate": 2.5774599931903303e-06,
"loss": 0.1388,
"step": 10900
},
{
"epoch": 3.72,
"learning_rate": 2.5638406537282945e-06,
"loss": 0.1714,
"step": 10920
},
{
"epoch": 3.72,
"learning_rate": 2.5502213142662586e-06,
"loss": 0.1339,
"step": 10940
},
{
"epoch": 3.73,
"learning_rate": 2.536601974804222e-06,
"loss": 0.1532,
"step": 10960
},
{
"epoch": 3.74,
"learning_rate": 2.522982635342186e-06,
"loss": 0.1918,
"step": 10980
},
{
"epoch": 3.75,
"learning_rate": 2.5093632958801502e-06,
"loss": 0.1238,
"step": 11000
},
{
"epoch": 3.75,
"eval_loss": 0.6513485312461853,
"eval_roc_auc": 0.7726358148893361,
"eval_runtime": 227.7851,
"eval_samples_per_second": 65.456,
"eval_steps_per_second": 8.183,
"step": 11000
},
{
"epoch": 3.75,
"learning_rate": 2.495743956418114e-06,
"loss": 0.169,
"step": 11020
},
{
"epoch": 3.76,
"learning_rate": 2.4821246169560777e-06,
"loss": 0.1273,
"step": 11040
},
{
"epoch": 3.77,
"learning_rate": 2.468505277494042e-06,
"loss": 0.1087,
"step": 11060
},
{
"epoch": 3.77,
"learning_rate": 2.4548859380320056e-06,
"loss": 0.1648,
"step": 11080
},
{
"epoch": 3.78,
"learning_rate": 2.4412665985699697e-06,
"loss": 0.1748,
"step": 11100
},
{
"epoch": 3.79,
"learning_rate": 2.4276472591079335e-06,
"loss": 0.1964,
"step": 11120
},
{
"epoch": 3.79,
"learning_rate": 2.414027919645897e-06,
"loss": 0.1551,
"step": 11140
},
{
"epoch": 3.8,
"learning_rate": 2.4004085801838613e-06,
"loss": 0.175,
"step": 11160
},
{
"epoch": 3.81,
"learning_rate": 2.386789240721825e-06,
"loss": 0.1273,
"step": 11180
},
{
"epoch": 3.81,
"learning_rate": 2.3731699012597892e-06,
"loss": 0.1746,
"step": 11200
},
{
"epoch": 3.82,
"learning_rate": 2.359550561797753e-06,
"loss": 0.1182,
"step": 11220
},
{
"epoch": 3.83,
"learning_rate": 2.345931222335717e-06,
"loss": 0.1066,
"step": 11240
},
{
"epoch": 3.83,
"learning_rate": 2.332311882873681e-06,
"loss": 0.1269,
"step": 11260
},
{
"epoch": 3.84,
"learning_rate": 2.3186925434116445e-06,
"loss": 0.0901,
"step": 11280
},
{
"epoch": 3.85,
"learning_rate": 2.3050732039496087e-06,
"loss": 0.206,
"step": 11300
},
{
"epoch": 3.85,
"learning_rate": 2.2914538644875724e-06,
"loss": 0.1164,
"step": 11320
},
{
"epoch": 3.86,
"learning_rate": 2.2778345250255366e-06,
"loss": 0.1698,
"step": 11340
},
{
"epoch": 3.87,
"learning_rate": 2.2642151855635003e-06,
"loss": 0.1279,
"step": 11360
},
{
"epoch": 3.87,
"learning_rate": 2.2505958461014645e-06,
"loss": 0.1477,
"step": 11380
},
{
"epoch": 3.88,
"learning_rate": 2.236976506639428e-06,
"loss": 0.136,
"step": 11400
},
{
"epoch": 3.89,
"learning_rate": 2.223357167177392e-06,
"loss": 0.122,
"step": 11420
},
{
"epoch": 3.9,
"learning_rate": 2.209737827715356e-06,
"loss": 0.138,
"step": 11440
},
{
"epoch": 3.9,
"learning_rate": 2.1961184882533198e-06,
"loss": 0.161,
"step": 11460
},
{
"epoch": 3.91,
"learning_rate": 2.182499148791284e-06,
"loss": 0.1685,
"step": 11480
},
{
"epoch": 3.92,
"learning_rate": 2.1688798093292477e-06,
"loss": 0.1503,
"step": 11500
},
{
"epoch": 3.92,
"learning_rate": 2.155260469867212e-06,
"loss": 0.1933,
"step": 11520
},
{
"epoch": 3.93,
"learning_rate": 2.1416411304051755e-06,
"loss": 0.199,
"step": 11540
},
{
"epoch": 3.94,
"learning_rate": 2.1280217909431393e-06,
"loss": 0.1233,
"step": 11560
},
{
"epoch": 3.94,
"learning_rate": 2.1144024514811034e-06,
"loss": 0.1485,
"step": 11580
},
{
"epoch": 3.95,
"learning_rate": 2.100783112019067e-06,
"loss": 0.0955,
"step": 11600
},
{
"epoch": 3.96,
"learning_rate": 2.0871637725570313e-06,
"loss": 0.1369,
"step": 11620
},
{
"epoch": 3.96,
"learning_rate": 2.073544433094995e-06,
"loss": 0.1463,
"step": 11640
},
{
"epoch": 3.97,
"learning_rate": 2.059925093632959e-06,
"loss": 0.1499,
"step": 11660
},
{
"epoch": 3.98,
"learning_rate": 2.046305754170923e-06,
"loss": 0.1376,
"step": 11680
},
{
"epoch": 3.98,
"learning_rate": 2.0326864147088866e-06,
"loss": 0.1385,
"step": 11700
},
{
"epoch": 3.99,
"learning_rate": 2.0190670752468508e-06,
"loss": 0.1194,
"step": 11720
},
{
"epoch": 4.0,
"learning_rate": 2.0054477357848145e-06,
"loss": 0.1677,
"step": 11740
},
{
"epoch": 4.0,
"learning_rate": 1.9918283963227787e-06,
"loss": 0.1102,
"step": 11760
},
{
"epoch": 4.01,
"learning_rate": 1.9782090568607424e-06,
"loss": 0.114,
"step": 11780
},
{
"epoch": 4.02,
"learning_rate": 1.9645897173987065e-06,
"loss": 0.1493,
"step": 11800
},
{
"epoch": 4.02,
"learning_rate": 1.9509703779366703e-06,
"loss": 0.1083,
"step": 11820
},
{
"epoch": 4.03,
"learning_rate": 1.937351038474634e-06,
"loss": 0.0971,
"step": 11840
},
{
"epoch": 4.04,
"learning_rate": 1.923731699012598e-06,
"loss": 0.1186,
"step": 11860
},
{
"epoch": 4.04,
"learning_rate": 1.910112359550562e-06,
"loss": 0.0883,
"step": 11880
},
{
"epoch": 4.05,
"learning_rate": 1.896493020088526e-06,
"loss": 0.1268,
"step": 11900
},
{
"epoch": 4.06,
"learning_rate": 1.8828736806264897e-06,
"loss": 0.1449,
"step": 11920
},
{
"epoch": 4.07,
"learning_rate": 1.8692543411644537e-06,
"loss": 0.1092,
"step": 11940
},
{
"epoch": 4.07,
"learning_rate": 1.8556350017024174e-06,
"loss": 0.1058,
"step": 11960
},
{
"epoch": 4.08,
"learning_rate": 1.8420156622403816e-06,
"loss": 0.0781,
"step": 11980
},
{
"epoch": 4.09,
"learning_rate": 1.8283963227783455e-06,
"loss": 0.1054,
"step": 12000
},
{
"epoch": 4.09,
"eval_loss": 0.7551047801971436,
"eval_roc_auc": 0.7666666666666667,
"eval_runtime": 225.8268,
"eval_samples_per_second": 66.024,
"eval_steps_per_second": 8.254,
"step": 12000
},
{
"epoch": 4.09,
"learning_rate": 1.8147769833163092e-06,
"loss": 0.1366,
"step": 12020
},
{
"epoch": 4.1,
"learning_rate": 1.8011576438542734e-06,
"loss": 0.1395,
"step": 12040
},
{
"epoch": 4.11,
"learning_rate": 1.787538304392237e-06,
"loss": 0.094,
"step": 12060
},
{
"epoch": 4.11,
"learning_rate": 1.773918964930201e-06,
"loss": 0.1077,
"step": 12080
},
{
"epoch": 4.12,
"learning_rate": 1.760299625468165e-06,
"loss": 0.1134,
"step": 12100
},
{
"epoch": 4.13,
"learning_rate": 1.746680286006129e-06,
"loss": 0.1002,
"step": 12120
},
{
"epoch": 4.13,
"learning_rate": 1.7330609465440929e-06,
"loss": 0.0906,
"step": 12140
},
{
"epoch": 4.14,
"learning_rate": 1.7194416070820566e-06,
"loss": 0.1249,
"step": 12160
},
{
"epoch": 4.15,
"learning_rate": 1.7058222676200205e-06,
"loss": 0.1197,
"step": 12180
},
{
"epoch": 4.15,
"learning_rate": 1.6922029281579845e-06,
"loss": 0.1198,
"step": 12200
},
{
"epoch": 4.16,
"learning_rate": 1.6785835886959484e-06,
"loss": 0.1104,
"step": 12220
},
{
"epoch": 4.17,
"learning_rate": 1.6649642492339123e-06,
"loss": 0.1034,
"step": 12240
},
{
"epoch": 4.17,
"learning_rate": 1.651344909771876e-06,
"loss": 0.0803,
"step": 12260
},
{
"epoch": 4.18,
"learning_rate": 1.6377255703098402e-06,
"loss": 0.1039,
"step": 12280
},
{
"epoch": 4.19,
"learning_rate": 1.624106230847804e-06,
"loss": 0.1119,
"step": 12300
},
{
"epoch": 4.19,
"learning_rate": 1.6104868913857679e-06,
"loss": 0.1463,
"step": 12320
},
{
"epoch": 4.2,
"learning_rate": 1.5968675519237318e-06,
"loss": 0.1201,
"step": 12340
},
{
"epoch": 4.21,
"learning_rate": 1.5832482124616958e-06,
"loss": 0.1104,
"step": 12360
},
{
"epoch": 4.22,
"learning_rate": 1.5696288729996597e-06,
"loss": 0.1164,
"step": 12380
},
{
"epoch": 4.22,
"learning_rate": 1.5560095335376234e-06,
"loss": 0.0739,
"step": 12400
},
{
"epoch": 4.23,
"learning_rate": 1.5423901940755876e-06,
"loss": 0.1345,
"step": 12420
},
{
"epoch": 4.24,
"learning_rate": 1.5287708546135513e-06,
"loss": 0.11,
"step": 12440
},
{
"epoch": 4.24,
"learning_rate": 1.5151515151515152e-06,
"loss": 0.0976,
"step": 12460
},
{
"epoch": 4.25,
"learning_rate": 1.501532175689479e-06,
"loss": 0.1323,
"step": 12480
},
{
"epoch": 4.26,
"learning_rate": 1.4879128362274431e-06,
"loss": 0.1222,
"step": 12500
},
{
"epoch": 4.26,
"learning_rate": 1.474293496765407e-06,
"loss": 0.1165,
"step": 12520
},
{
"epoch": 4.27,
"learning_rate": 1.4606741573033708e-06,
"loss": 0.1214,
"step": 12540
},
{
"epoch": 4.28,
"learning_rate": 1.447054817841335e-06,
"loss": 0.0871,
"step": 12560
},
{
"epoch": 4.28,
"learning_rate": 1.4334354783792987e-06,
"loss": 0.1088,
"step": 12580
},
{
"epoch": 4.29,
"learning_rate": 1.4198161389172626e-06,
"loss": 0.0892,
"step": 12600
},
{
"epoch": 4.3,
"learning_rate": 1.4061967994552263e-06,
"loss": 0.1352,
"step": 12620
},
{
"epoch": 4.3,
"learning_rate": 1.3925774599931905e-06,
"loss": 0.0977,
"step": 12640
},
{
"epoch": 4.31,
"learning_rate": 1.3789581205311544e-06,
"loss": 0.1374,
"step": 12660
},
{
"epoch": 4.32,
"learning_rate": 1.3653387810691181e-06,
"loss": 0.0864,
"step": 12680
},
{
"epoch": 4.32,
"learning_rate": 1.3517194416070823e-06,
"loss": 0.089,
"step": 12700
},
{
"epoch": 4.33,
"learning_rate": 1.338100102145046e-06,
"loss": 0.0961,
"step": 12720
},
{
"epoch": 4.34,
"learning_rate": 1.32448076268301e-06,
"loss": 0.1148,
"step": 12740
},
{
"epoch": 4.34,
"learning_rate": 1.3108614232209737e-06,
"loss": 0.1113,
"step": 12760
},
{
"epoch": 4.35,
"learning_rate": 1.2972420837589378e-06,
"loss": 0.0984,
"step": 12780
},
{
"epoch": 4.36,
"learning_rate": 1.2836227442969018e-06,
"loss": 0.1134,
"step": 12800
},
{
"epoch": 4.36,
"learning_rate": 1.2700034048348655e-06,
"loss": 0.1126,
"step": 12820
},
{
"epoch": 4.37,
"learning_rate": 1.2563840653728297e-06,
"loss": 0.1318,
"step": 12840
},
{
"epoch": 4.38,
"learning_rate": 1.2427647259107934e-06,
"loss": 0.1326,
"step": 12860
},
{
"epoch": 4.39,
"learning_rate": 1.2291453864487573e-06,
"loss": 0.0954,
"step": 12880
},
{
"epoch": 4.39,
"learning_rate": 1.2155260469867213e-06,
"loss": 0.1549,
"step": 12900
},
{
"epoch": 4.4,
"learning_rate": 1.2019067075246852e-06,
"loss": 0.1077,
"step": 12920
},
{
"epoch": 4.41,
"learning_rate": 1.188287368062649e-06,
"loss": 0.0598,
"step": 12940
},
{
"epoch": 4.41,
"learning_rate": 1.1746680286006129e-06,
"loss": 0.1317,
"step": 12960
},
{
"epoch": 4.42,
"learning_rate": 1.161048689138577e-06,
"loss": 0.0877,
"step": 12980
},
{
"epoch": 4.43,
"learning_rate": 1.1474293496765407e-06,
"loss": 0.1076,
"step": 13000
},
{
"epoch": 4.43,
"eval_loss": 0.8132159113883972,
"eval_roc_auc": 0.7627095908786049,
"eval_runtime": 227.818,
"eval_samples_per_second": 65.447,
"eval_steps_per_second": 8.182,
"step": 13000
},
{
"epoch": 4.43,
"learning_rate": 1.1338100102145047e-06,
"loss": 0.1132,
"step": 13020
},
{
"epoch": 4.44,
"learning_rate": 1.1201906707524686e-06,
"loss": 0.0821,
"step": 13040
},
{
"epoch": 4.45,
"learning_rate": 1.1065713312904326e-06,
"loss": 0.0979,
"step": 13060
},
{
"epoch": 4.45,
"learning_rate": 1.0929519918283963e-06,
"loss": 0.1109,
"step": 13080
},
{
"epoch": 4.46,
"learning_rate": 1.0793326523663602e-06,
"loss": 0.1429,
"step": 13100
},
{
"epoch": 4.47,
"learning_rate": 1.0657133129043244e-06,
"loss": 0.1369,
"step": 13120
},
{
"epoch": 4.47,
"learning_rate": 1.052093973442288e-06,
"loss": 0.103,
"step": 13140
},
{
"epoch": 4.48,
"learning_rate": 1.038474633980252e-06,
"loss": 0.1071,
"step": 13160
},
{
"epoch": 4.49,
"learning_rate": 1.024855294518216e-06,
"loss": 0.0864,
"step": 13180
},
{
"epoch": 4.49,
"learning_rate": 1.01123595505618e-06,
"loss": 0.1063,
"step": 13200
},
{
"epoch": 4.5,
"learning_rate": 9.976166155941436e-07,
"loss": 0.1019,
"step": 13220
},
{
"epoch": 4.51,
"learning_rate": 9.839972761321076e-07,
"loss": 0.1222,
"step": 13240
},
{
"epoch": 4.51,
"learning_rate": 9.703779366700715e-07,
"loss": 0.0911,
"step": 13260
},
{
"epoch": 4.52,
"learning_rate": 9.567585972080355e-07,
"loss": 0.1774,
"step": 13280
},
{
"epoch": 4.53,
"learning_rate": 9.431392577459994e-07,
"loss": 0.1098,
"step": 13300
},
{
"epoch": 4.54,
"learning_rate": 9.295199182839632e-07,
"loss": 0.1371,
"step": 13320
},
{
"epoch": 4.54,
"learning_rate": 9.159005788219272e-07,
"loss": 0.1277,
"step": 13340
},
{
"epoch": 4.55,
"learning_rate": 9.022812393598911e-07,
"loss": 0.0962,
"step": 13360
},
{
"epoch": 4.56,
"learning_rate": 8.886618998978551e-07,
"loss": 0.1411,
"step": 13380
},
{
"epoch": 4.56,
"learning_rate": 8.75042560435819e-07,
"loss": 0.1044,
"step": 13400
},
{
"epoch": 4.57,
"learning_rate": 8.614232209737828e-07,
"loss": 0.0907,
"step": 13420
},
{
"epoch": 4.58,
"learning_rate": 8.478038815117468e-07,
"loss": 0.1084,
"step": 13440
},
{
"epoch": 4.58,
"learning_rate": 8.341845420497106e-07,
"loss": 0.1198,
"step": 13460
},
{
"epoch": 4.59,
"learning_rate": 8.205652025876745e-07,
"loss": 0.0946,
"step": 13480
},
{
"epoch": 4.6,
"learning_rate": 8.069458631256384e-07,
"loss": 0.1361,
"step": 13500
},
{
"epoch": 4.6,
"learning_rate": 7.933265236636024e-07,
"loss": 0.0859,
"step": 13520
},
{
"epoch": 4.61,
"learning_rate": 7.797071842015664e-07,
"loss": 0.1314,
"step": 13540
},
{
"epoch": 4.62,
"learning_rate": 7.660878447395302e-07,
"loss": 0.121,
"step": 13560
},
{
"epoch": 4.62,
"learning_rate": 7.524685052774941e-07,
"loss": 0.109,
"step": 13580
},
{
"epoch": 4.63,
"learning_rate": 7.38849165815458e-07,
"loss": 0.0947,
"step": 13600
},
{
"epoch": 4.64,
"learning_rate": 7.252298263534219e-07,
"loss": 0.105,
"step": 13620
},
{
"epoch": 4.64,
"learning_rate": 7.116104868913857e-07,
"loss": 0.1235,
"step": 13640
},
{
"epoch": 4.65,
"learning_rate": 6.979911474293498e-07,
"loss": 0.1332,
"step": 13660
},
{
"epoch": 4.66,
"learning_rate": 6.843718079673137e-07,
"loss": 0.0717,
"step": 13680
},
{
"epoch": 4.66,
"learning_rate": 6.707524685052775e-07,
"loss": 0.1095,
"step": 13700
},
{
"epoch": 4.67,
"learning_rate": 6.571331290432415e-07,
"loss": 0.1468,
"step": 13720
},
{
"epoch": 4.68,
"learning_rate": 6.435137895812053e-07,
"loss": 0.1202,
"step": 13740
},
{
"epoch": 4.69,
"learning_rate": 6.298944501191693e-07,
"loss": 0.0621,
"step": 13760
},
{
"epoch": 4.69,
"learning_rate": 6.162751106571332e-07,
"loss": 0.0852,
"step": 13780
},
{
"epoch": 4.7,
"learning_rate": 6.02655771195097e-07,
"loss": 0.1006,
"step": 13800
},
{
"epoch": 4.71,
"learning_rate": 5.89036431733061e-07,
"loss": 0.1046,
"step": 13820
},
{
"epoch": 4.71,
"learning_rate": 5.754170922710249e-07,
"loss": 0.1105,
"step": 13840
},
{
"epoch": 4.72,
"learning_rate": 5.617977528089888e-07,
"loss": 0.0842,
"step": 13860
},
{
"epoch": 4.73,
"learning_rate": 5.481784133469528e-07,
"loss": 0.0779,
"step": 13880
},
{
"epoch": 4.73,
"learning_rate": 5.345590738849166e-07,
"loss": 0.1007,
"step": 13900
},
{
"epoch": 4.74,
"learning_rate": 5.209397344228806e-07,
"loss": 0.0784,
"step": 13920
},
{
"epoch": 4.75,
"learning_rate": 5.073203949608444e-07,
"loss": 0.121,
"step": 13940
},
{
"epoch": 4.75,
"learning_rate": 4.937010554988083e-07,
"loss": 0.1562,
"step": 13960
},
{
"epoch": 4.76,
"learning_rate": 4.800817160367723e-07,
"loss": 0.1098,
"step": 13980
},
{
"epoch": 4.77,
"learning_rate": 4.6646237657473615e-07,
"loss": 0.1321,
"step": 14000
},
{
"epoch": 4.77,
"eval_loss": 0.815223753452301,
"eval_roc_auc": 0.7586854460093896,
"eval_runtime": 225.105,
"eval_samples_per_second": 66.236,
"eval_steps_per_second": 8.281,
"step": 14000
},
{
"epoch": 4.77,
"learning_rate": 4.528430371127001e-07,
"loss": 0.1244,
"step": 14020
},
{
"epoch": 4.78,
"learning_rate": 4.39223697650664e-07,
"loss": 0.1415,
"step": 14040
},
{
"epoch": 4.79,
"learning_rate": 4.2560435818862786e-07,
"loss": 0.0982,
"step": 14060
},
{
"epoch": 4.79,
"learning_rate": 4.1198501872659175e-07,
"loss": 0.0874,
"step": 14080
},
{
"epoch": 4.8,
"learning_rate": 3.9836567926455574e-07,
"loss": 0.1736,
"step": 14100
},
{
"epoch": 4.81,
"learning_rate": 3.8474633980251963e-07,
"loss": 0.1213,
"step": 14120
},
{
"epoch": 4.81,
"learning_rate": 3.711270003404835e-07,
"loss": 0.1008,
"step": 14140
},
{
"epoch": 4.82,
"learning_rate": 3.5750766087844745e-07,
"loss": 0.1624,
"step": 14160
},
{
"epoch": 4.83,
"learning_rate": 3.4388832141641134e-07,
"loss": 0.1073,
"step": 14180
},
{
"epoch": 4.83,
"learning_rate": 3.302689819543752e-07,
"loss": 0.0801,
"step": 14200
},
{
"epoch": 4.84,
"learning_rate": 3.1664964249233916e-07,
"loss": 0.1252,
"step": 14220
},
{
"epoch": 4.85,
"learning_rate": 3.0303030303030305e-07,
"loss": 0.0988,
"step": 14240
},
{
"epoch": 4.86,
"learning_rate": 2.8941096356826693e-07,
"loss": 0.0818,
"step": 14260
},
{
"epoch": 4.86,
"learning_rate": 2.7579162410623087e-07,
"loss": 0.1005,
"step": 14280
},
{
"epoch": 4.87,
"learning_rate": 2.621722846441948e-07,
"loss": 0.1191,
"step": 14300
},
{
"epoch": 4.88,
"learning_rate": 2.485529451821587e-07,
"loss": 0.1062,
"step": 14320
},
{
"epoch": 4.88,
"learning_rate": 2.349336057201226e-07,
"loss": 0.115,
"step": 14340
},
{
"epoch": 4.89,
"learning_rate": 2.213142662580865e-07,
"loss": 0.1172,
"step": 14360
},
{
"epoch": 4.9,
"learning_rate": 2.076949267960504e-07,
"loss": 0.099,
"step": 14380
},
{
"epoch": 4.9,
"learning_rate": 1.9407558733401432e-07,
"loss": 0.127,
"step": 14400
},
{
"epoch": 4.91,
"learning_rate": 1.804562478719782e-07,
"loss": 0.0879,
"step": 14420
},
{
"epoch": 4.92,
"learning_rate": 1.6683690840994215e-07,
"loss": 0.1284,
"step": 14440
},
{
"epoch": 4.92,
"learning_rate": 1.5321756894790606e-07,
"loss": 0.1249,
"step": 14460
},
{
"epoch": 4.93,
"learning_rate": 1.3959822948586994e-07,
"loss": 0.1314,
"step": 14480
},
{
"epoch": 4.94,
"learning_rate": 1.2597889002383386e-07,
"loss": 0.1307,
"step": 14500
},
{
"epoch": 4.94,
"learning_rate": 1.1235955056179776e-07,
"loss": 0.092,
"step": 14520
},
{
"epoch": 4.95,
"learning_rate": 9.874021109976167e-08,
"loss": 0.084,
"step": 14540
},
{
"epoch": 4.96,
"learning_rate": 8.512087163772558e-08,
"loss": 0.1793,
"step": 14560
},
{
"epoch": 4.96,
"learning_rate": 7.150153217568949e-08,
"loss": 0.1071,
"step": 14580
},
{
"epoch": 4.97,
"learning_rate": 5.788219271365339e-08,
"loss": 0.103,
"step": 14600
},
{
"epoch": 4.98,
"learning_rate": 4.42628532516173e-08,
"loss": 0.1024,
"step": 14620
},
{
"epoch": 4.98,
"learning_rate": 3.0643513789581204e-08,
"loss": 0.0751,
"step": 14640
},
{
"epoch": 4.99,
"learning_rate": 1.7024174327545116e-08,
"loss": 0.0811,
"step": 14660
},
{
"epoch": 5.0,
"learning_rate": 3.4048348655090227e-09,
"loss": 0.0877,
"step": 14680
},
{
"epoch": 5.0,
"step": 14685,
"total_flos": 3.6406511640831468e+19,
"train_loss": 0.21540352483164263,
"train_runtime": 8294.2837,
"train_samples_per_second": 56.643,
"train_steps_per_second": 1.77
}
],
"logging_steps": 20,
"max_steps": 14685,
"num_train_epochs": 5,
"save_steps": 1000,
"total_flos": 3.6406511640831468e+19,
"trial_name": null,
"trial_params": null
}