josecannete's picture
adding model finetuned on mlqa
3bbf164
raw
history blame
14 kB
{
"best_metric": 48.94001149135019,
"best_model_checkpoint": "/home/jcanete/ft-data/all_results/mlqa/albeto_base_2/epochs_4_bs_16_lr_2e-5/checkpoint-12900",
"epoch": 4.0,
"global_step": 20516,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"eval_exact_match": 11.2,
"eval_f1": 26.483192221593768,
"step": 300
},
{
"epoch": 0.1,
"learning_rate": 1.951355039968805e-05,
"loss": 4.0436,
"step": 500
},
{
"epoch": 0.12,
"eval_exact_match": 15.0,
"eval_f1": 33.51854251608216,
"step": 600
},
{
"epoch": 0.18,
"eval_exact_match": 18.6,
"eval_f1": 35.629522354460704,
"step": 900
},
{
"epoch": 0.19,
"learning_rate": 1.902612595047768e-05,
"loss": 3.2766,
"step": 1000
},
{
"epoch": 0.23,
"eval_exact_match": 18.0,
"eval_f1": 36.188007340871735,
"step": 1200
},
{
"epoch": 0.29,
"learning_rate": 1.8538701501267304e-05,
"loss": 3.0936,
"step": 1500
},
{
"epoch": 0.29,
"eval_exact_match": 18.8,
"eval_f1": 37.44597113423442,
"step": 1500
},
{
"epoch": 0.35,
"eval_exact_match": 21.2,
"eval_f1": 39.84498534299273,
"step": 1800
},
{
"epoch": 0.39,
"learning_rate": 1.8051277052056932e-05,
"loss": 2.9305,
"step": 2000
},
{
"epoch": 0.41,
"eval_exact_match": 20.8,
"eval_f1": 39.95193770849471,
"step": 2100
},
{
"epoch": 0.47,
"eval_exact_match": 22.2,
"eval_f1": 42.758832207716736,
"step": 2400
},
{
"epoch": 0.49,
"learning_rate": 1.756482745174498e-05,
"loss": 2.794,
"step": 2500
},
{
"epoch": 0.53,
"eval_exact_match": 22.2,
"eval_f1": 42.601350107858735,
"step": 2700
},
{
"epoch": 0.58,
"learning_rate": 1.707740300253461e-05,
"loss": 2.7894,
"step": 3000
},
{
"epoch": 0.58,
"eval_exact_match": 24.0,
"eval_f1": 43.534851112760435,
"step": 3000
},
{
"epoch": 0.64,
"eval_exact_match": 22.8,
"eval_f1": 43.62106996130737,
"step": 3300
},
{
"epoch": 0.68,
"learning_rate": 1.6590953402222657e-05,
"loss": 2.7014,
"step": 3500
},
{
"epoch": 0.7,
"eval_exact_match": 22.8,
"eval_f1": 44.321198455769256,
"step": 3600
},
{
"epoch": 0.76,
"eval_exact_match": 22.6,
"eval_f1": 44.57581116292914,
"step": 3900
},
{
"epoch": 0.78,
"learning_rate": 1.6103528953012282e-05,
"loss": 2.6815,
"step": 4000
},
{
"epoch": 0.82,
"eval_exact_match": 23.4,
"eval_f1": 46.30028588125735,
"step": 4200
},
{
"epoch": 0.88,
"learning_rate": 1.561610450380191e-05,
"loss": 2.6472,
"step": 4500
},
{
"epoch": 0.88,
"eval_exact_match": 25.2,
"eval_f1": 45.66133144463246,
"step": 4500
},
{
"epoch": 0.94,
"eval_exact_match": 24.4,
"eval_f1": 46.05729908209824,
"step": 4800
},
{
"epoch": 0.97,
"learning_rate": 1.512868005459154e-05,
"loss": 2.6125,
"step": 5000
},
{
"epoch": 0.99,
"eval_exact_match": 24.6,
"eval_f1": 45.667180594212915,
"step": 5100
},
{
"epoch": 1.05,
"eval_exact_match": 23.2,
"eval_f1": 45.63595896581295,
"step": 5400
},
{
"epoch": 1.07,
"learning_rate": 1.4641255605381166e-05,
"loss": 2.4544,
"step": 5500
},
{
"epoch": 1.11,
"eval_exact_match": 23.0,
"eval_f1": 45.57347635092067,
"step": 5700
},
{
"epoch": 1.17,
"learning_rate": 1.4153831156170795e-05,
"loss": 2.4307,
"step": 6000
},
{
"epoch": 1.17,
"eval_exact_match": 22.8,
"eval_f1": 45.4787090109933,
"step": 6000
},
{
"epoch": 1.23,
"eval_exact_match": 24.2,
"eval_f1": 46.144624410019276,
"step": 6300
},
{
"epoch": 1.27,
"learning_rate": 1.3666406706960422e-05,
"loss": 2.4623,
"step": 6500
},
{
"epoch": 1.29,
"eval_exact_match": 23.2,
"eval_f1": 46.0243228203072,
"step": 6600
},
{
"epoch": 1.35,
"eval_exact_match": 24.2,
"eval_f1": 46.67180386550054,
"step": 6900
},
{
"epoch": 1.36,
"learning_rate": 1.3179957106648472e-05,
"loss": 2.4077,
"step": 7000
},
{
"epoch": 1.4,
"eval_exact_match": 25.4,
"eval_f1": 47.08102740483588,
"step": 7200
},
{
"epoch": 1.46,
"learning_rate": 1.2692532657438098e-05,
"loss": 2.3967,
"step": 7500
},
{
"epoch": 1.46,
"eval_exact_match": 25.2,
"eval_f1": 46.94730205752481,
"step": 7500
},
{
"epoch": 1.52,
"eval_exact_match": 22.6,
"eval_f1": 45.74979168773037,
"step": 7800
},
{
"epoch": 1.56,
"learning_rate": 1.2205108208227725e-05,
"loss": 2.3936,
"step": 8000
},
{
"epoch": 1.58,
"eval_exact_match": 22.8,
"eval_f1": 45.53841968646729,
"step": 8100
},
{
"epoch": 1.64,
"eval_exact_match": 23.8,
"eval_f1": 46.600652318202485,
"step": 8400
},
{
"epoch": 1.66,
"learning_rate": 1.1717683759017354e-05,
"loss": 2.3773,
"step": 8500
},
{
"epoch": 1.7,
"eval_exact_match": 22.4,
"eval_f1": 46.012756869173074,
"step": 8700
},
{
"epoch": 1.75,
"learning_rate": 1.123025930980698e-05,
"loss": 2.3893,
"step": 9000
},
{
"epoch": 1.75,
"eval_exact_match": 23.8,
"eval_f1": 45.9605328124833,
"step": 9000
},
{
"epoch": 1.81,
"eval_exact_match": 24.4,
"eval_f1": 47.31331525220247,
"step": 9300
},
{
"epoch": 1.85,
"learning_rate": 1.0742834860596609e-05,
"loss": 2.4066,
"step": 9500
},
{
"epoch": 1.87,
"eval_exact_match": 25.8,
"eval_f1": 48.09576595590226,
"step": 9600
},
{
"epoch": 1.93,
"eval_exact_match": 24.2,
"eval_f1": 46.333298341879264,
"step": 9900
},
{
"epoch": 1.95,
"learning_rate": 1.0256385260284656e-05,
"loss": 2.3616,
"step": 10000
},
{
"epoch": 1.99,
"eval_exact_match": 24.4,
"eval_f1": 47.86325870979021,
"step": 10200
},
{
"epoch": 2.05,
"learning_rate": 9.768960811074284e-06,
"loss": 2.3018,
"step": 10500
},
{
"epoch": 2.05,
"eval_exact_match": 23.4,
"eval_f1": 46.869345750688815,
"step": 10500
},
{
"epoch": 2.11,
"eval_exact_match": 23.2,
"eval_f1": 46.75900565743204,
"step": 10800
},
{
"epoch": 2.14,
"learning_rate": 9.281536361863913e-06,
"loss": 2.2207,
"step": 11000
},
{
"epoch": 2.16,
"eval_exact_match": 23.0,
"eval_f1": 47.11768387241494,
"step": 11100
},
{
"epoch": 2.22,
"eval_exact_match": 24.8,
"eval_f1": 47.824589572947076,
"step": 11400
},
{
"epoch": 2.24,
"learning_rate": 8.79411191265354e-06,
"loss": 2.2133,
"step": 11500
},
{
"epoch": 2.28,
"eval_exact_match": 22.6,
"eval_f1": 46.81847093965424,
"step": 11700
},
{
"epoch": 2.34,
"learning_rate": 8.306687463443166e-06,
"loss": 2.1887,
"step": 12000
},
{
"epoch": 2.34,
"eval_exact_match": 25.4,
"eval_f1": 48.70213586118982,
"step": 12000
},
{
"epoch": 2.4,
"eval_exact_match": 24.0,
"eval_f1": 47.907936732879136,
"step": 12300
},
{
"epoch": 2.44,
"learning_rate": 7.819263014232795e-06,
"loss": 2.2084,
"step": 12500
},
{
"epoch": 2.46,
"eval_exact_match": 24.2,
"eval_f1": 47.50117367525339,
"step": 12600
},
{
"epoch": 2.52,
"eval_exact_match": 25.6,
"eval_f1": 48.94001149135019,
"step": 12900
},
{
"epoch": 2.53,
"learning_rate": 7.3318385650224225e-06,
"loss": 2.205,
"step": 13000
},
{
"epoch": 2.57,
"eval_exact_match": 24.8,
"eval_f1": 48.65674792844071,
"step": 13200
},
{
"epoch": 2.63,
"learning_rate": 6.844414115812049e-06,
"loss": 2.1895,
"step": 13500
},
{
"epoch": 2.63,
"eval_exact_match": 25.6,
"eval_f1": 48.791085379512644,
"step": 13500
},
{
"epoch": 2.69,
"eval_exact_match": 24.4,
"eval_f1": 46.9179679859715,
"step": 13800
},
{
"epoch": 2.73,
"learning_rate": 6.357964515500098e-06,
"loss": 2.2337,
"step": 14000
},
{
"epoch": 2.75,
"eval_exact_match": 23.6,
"eval_f1": 46.28350673077509,
"step": 14100
},
{
"epoch": 2.81,
"eval_exact_match": 23.6,
"eval_f1": 47.41817849560017,
"step": 14400
},
{
"epoch": 2.83,
"learning_rate": 5.871514915188147e-06,
"loss": 2.1879,
"step": 14500
},
{
"epoch": 2.87,
"eval_exact_match": 22.8,
"eval_f1": 46.458213496270524,
"step": 14700
},
{
"epoch": 2.92,
"learning_rate": 5.384090465977774e-06,
"loss": 2.2071,
"step": 15000
},
{
"epoch": 2.92,
"eval_exact_match": 24.2,
"eval_f1": 47.532040364746656,
"step": 15000
},
{
"epoch": 2.98,
"eval_exact_match": 24.0,
"eval_f1": 47.252480052113945,
"step": 15300
},
{
"epoch": 3.02,
"learning_rate": 4.896666016767401e-06,
"loss": 2.1374,
"step": 15500
},
{
"epoch": 3.04,
"eval_exact_match": 23.4,
"eval_f1": 46.915969224210855,
"step": 15600
},
{
"epoch": 3.1,
"eval_exact_match": 23.4,
"eval_f1": 47.627048464769736,
"step": 15900
},
{
"epoch": 3.12,
"learning_rate": 4.409241567557029e-06,
"loss": 2.1177,
"step": 16000
},
{
"epoch": 3.16,
"eval_exact_match": 22.2,
"eval_f1": 46.544258456129775,
"step": 16200
},
{
"epoch": 3.22,
"learning_rate": 3.921817118346657e-06,
"loss": 2.0913,
"step": 16500
},
{
"epoch": 3.22,
"eval_exact_match": 23.0,
"eval_f1": 46.50691177045911,
"step": 16500
},
{
"epoch": 3.28,
"eval_exact_match": 23.2,
"eval_f1": 47.050804068875436,
"step": 16800
},
{
"epoch": 3.31,
"learning_rate": 3.435367518034705e-06,
"loss": 2.0779,
"step": 17000
},
{
"epoch": 3.33,
"eval_exact_match": 23.2,
"eval_f1": 46.33502363014144,
"step": 17100
},
{
"epoch": 3.39,
"eval_exact_match": 23.2,
"eval_f1": 46.765575747268464,
"step": 17400
},
{
"epoch": 3.41,
"learning_rate": 2.9479430688243326e-06,
"loss": 2.0847,
"step": 17500
},
{
"epoch": 3.45,
"eval_exact_match": 24.2,
"eval_f1": 47.26636124409616,
"step": 17700
},
{
"epoch": 3.51,
"learning_rate": 2.46051861961396e-06,
"loss": 2.0733,
"step": 18000
},
{
"epoch": 3.51,
"eval_exact_match": 23.0,
"eval_f1": 46.90041208739893,
"step": 18000
},
{
"epoch": 3.57,
"eval_exact_match": 24.2,
"eval_f1": 47.60954713139801,
"step": 18300
},
{
"epoch": 3.61,
"learning_rate": 1.9730941704035875e-06,
"loss": 2.0735,
"step": 18500
},
{
"epoch": 3.63,
"eval_exact_match": 23.4,
"eval_f1": 47.03174405348153,
"step": 18600
},
{
"epoch": 3.68,
"eval_exact_match": 22.4,
"eval_f1": 46.133959338075734,
"step": 18900
},
{
"epoch": 3.7,
"learning_rate": 1.4856697211932152e-06,
"loss": 2.0766,
"step": 19000
},
{
"epoch": 3.74,
"eval_exact_match": 23.0,
"eval_f1": 46.661523832404924,
"step": 19200
},
{
"epoch": 3.8,
"learning_rate": 9.982452719828426e-07,
"loss": 2.0837,
"step": 19500
},
{
"epoch": 3.8,
"eval_exact_match": 23.2,
"eval_f1": 46.75773872861982,
"step": 19500
},
{
"epoch": 3.86,
"eval_exact_match": 23.0,
"eval_f1": 46.675643262204126,
"step": 19800
},
{
"epoch": 3.9,
"learning_rate": 5.108208227724703e-07,
"loss": 2.0645,
"step": 20000
},
{
"epoch": 3.92,
"eval_exact_match": 22.8,
"eval_f1": 46.09897714824699,
"step": 20100
},
{
"epoch": 3.98,
"eval_exact_match": 22.8,
"eval_f1": 46.31081325560941,
"step": 20400
},
{
"epoch": 4.0,
"learning_rate": 2.437122246051862e-08,
"loss": 2.05,
"step": 20500
},
{
"epoch": 4.0,
"step": 20516,
"total_flos": 5015301739248672.0,
"train_loss": 2.4078880540807246,
"train_runtime": 515.9797,
"train_samples_per_second": 636.079,
"train_steps_per_second": 39.761
}
],
"max_steps": 20516,
"num_train_epochs": 4,
"total_flos": 5015301739248672.0,
"trial_name": null,
"trial_params": null
}