bobox's picture
Training in progress, step 2148, checkpoint
e06690f verified
raw
history blame
77.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.6,
"eval_steps": 179,
"global_step": 2148,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005027932960893855,
"grad_norm": 28.300092697143555,
"learning_rate": 6.331471135940411e-07,
"loss": 1.0198,
"step": 18
},
{
"epoch": 0.01005586592178771,
"grad_norm": 1.059788465499878,
"learning_rate": 1.303538175046555e-06,
"loss": 0.8564,
"step": 36
},
{
"epoch": 0.015083798882681564,
"grad_norm": 9.368417739868164,
"learning_rate": 1.9739292364990693e-06,
"loss": 0.685,
"step": 54
},
{
"epoch": 0.02011173184357542,
"grad_norm": 3.4458816051483154,
"learning_rate": 2.644320297951583e-06,
"loss": 0.7667,
"step": 72
},
{
"epoch": 0.025139664804469275,
"grad_norm": 4.497154712677002,
"learning_rate": 3.314711359404097e-06,
"loss": 0.7907,
"step": 90
},
{
"epoch": 0.030167597765363128,
"grad_norm": 4.394736289978027,
"learning_rate": 3.985102420856611e-06,
"loss": 0.9719,
"step": 108
},
{
"epoch": 0.03519553072625698,
"grad_norm": 10.305765151977539,
"learning_rate": 4.655493482309125e-06,
"loss": 1.1097,
"step": 126
},
{
"epoch": 0.04022346368715084,
"grad_norm": 5.402688980102539,
"learning_rate": 5.325884543761639e-06,
"loss": 1.0463,
"step": 144
},
{
"epoch": 0.045251396648044694,
"grad_norm": 0.2811428904533386,
"learning_rate": 5.996275605214154e-06,
"loss": 0.8796,
"step": 162
},
{
"epoch": 0.05,
"eval_nli-pairs_loss": 0.9358150959014893,
"eval_nli-pairs_runtime": 3.7163,
"eval_nli-pairs_samples_per_second": 40.363,
"eval_nli-pairs_steps_per_second": 0.538,
"eval_sts-test_pearson_cosine": 0.7920907827756308,
"eval_sts-test_pearson_dot": 0.553905804338793,
"eval_sts-test_pearson_euclidean": 0.7477067417028286,
"eval_sts-test_pearson_manhattan": 0.7454637009944656,
"eval_sts-test_pearson_max": 0.7920907827756308,
"eval_sts-test_spearman_cosine": 0.8050902340286232,
"eval_sts-test_spearman_dot": 0.5325926730168974,
"eval_sts-test_spearman_euclidean": 0.73705041377373,
"eval_sts-test_spearman_manhattan": 0.7373939486807765,
"eval_sts-test_spearman_max": 0.8050902340286232,
"step": 179
},
{
"epoch": 0.05,
"eval_vitaminc-pairs_loss": 4.57532262802124,
"eval_vitaminc-pairs_runtime": 0.8386,
"eval_vitaminc-pairs_samples_per_second": 149.066,
"eval_vitaminc-pairs_steps_per_second": 2.385,
"step": 179
},
{
"epoch": 0.05,
"eval_qnli-contrastive_loss": 0.260235071182251,
"eval_qnli-contrastive_runtime": 0.1227,
"eval_qnli-contrastive_samples_per_second": 1222.824,
"eval_qnli-contrastive_steps_per_second": 16.304,
"step": 179
},
{
"epoch": 0.05,
"eval_scitail-pairs-qa_loss": 0.06116430461406708,
"eval_scitail-pairs-qa_runtime": 0.6972,
"eval_scitail-pairs-qa_samples_per_second": 215.142,
"eval_scitail-pairs-qa_steps_per_second": 2.869,
"step": 179
},
{
"epoch": 0.05,
"eval_scitail-pairs-pos_loss": 0.3001463711261749,
"eval_scitail-pairs-pos_runtime": 1.8226,
"eval_scitail-pairs-pos_samples_per_second": 82.299,
"eval_scitail-pairs-pos_steps_per_second": 1.097,
"step": 179
},
{
"epoch": 0.05,
"eval_xsum-pairs_loss": 0.43996042013168335,
"eval_xsum-pairs_runtime": 0.1548,
"eval_xsum-pairs_samples_per_second": 529.555,
"eval_xsum-pairs_steps_per_second": 6.458,
"step": 179
},
{
"epoch": 0.05,
"eval_compression-pairs_loss": 0.10088926553726196,
"eval_compression-pairs_runtime": 0.1051,
"eval_compression-pairs_samples_per_second": 1427.213,
"eval_compression-pairs_steps_per_second": 19.03,
"step": 179
},
{
"epoch": 0.05,
"eval_sciq_pairs_loss": 0.15509434044361115,
"eval_sciq_pairs_runtime": 4.9415,
"eval_sciq_pairs_samples_per_second": 30.355,
"eval_sciq_pairs_steps_per_second": 0.405,
"step": 179
},
{
"epoch": 0.05,
"eval_qasc_pairs_loss": 0.24072103202342987,
"eval_qasc_pairs_runtime": 0.6453,
"eval_qasc_pairs_samples_per_second": 232.455,
"eval_qasc_pairs_steps_per_second": 3.099,
"step": 179
},
{
"epoch": 0.05,
"eval_openbookqa_pairs_loss": 1.5241280794143677,
"eval_openbookqa_pairs_runtime": 0.7059,
"eval_openbookqa_pairs_samples_per_second": 212.48,
"eval_openbookqa_pairs_steps_per_second": 2.833,
"step": 179
},
{
"epoch": 0.05,
"eval_msmarco_pairs_loss": 0.6149919629096985,
"eval_msmarco_pairs_runtime": 1.2013,
"eval_msmarco_pairs_samples_per_second": 124.863,
"eval_msmarco_pairs_steps_per_second": 1.665,
"step": 179
},
{
"epoch": 0.05,
"eval_nq_pairs_loss": 0.591522216796875,
"eval_nq_pairs_runtime": 2.2917,
"eval_nq_pairs_samples_per_second": 65.455,
"eval_nq_pairs_steps_per_second": 0.873,
"step": 179
},
{
"epoch": 0.05,
"eval_trivia_pairs_loss": 1.0899044275283813,
"eval_trivia_pairs_runtime": 3.1583,
"eval_trivia_pairs_samples_per_second": 47.494,
"eval_trivia_pairs_steps_per_second": 0.633,
"step": 179
},
{
"epoch": 0.05,
"eval_quora_pairs_loss": 0.21252885460853577,
"eval_quora_pairs_runtime": 0.3861,
"eval_quora_pairs_samples_per_second": 388.452,
"eval_quora_pairs_steps_per_second": 5.179,
"step": 179
},
{
"epoch": 0.05,
"eval_gooaq_pairs_loss": 0.5593109130859375,
"eval_gooaq_pairs_runtime": 0.7918,
"eval_gooaq_pairs_samples_per_second": 189.449,
"eval_gooaq_pairs_steps_per_second": 2.526,
"step": 179
},
{
"epoch": 0.05,
"eval_mrpc_pairs_loss": 0.055620357394218445,
"eval_mrpc_pairs_runtime": 0.1048,
"eval_mrpc_pairs_samples_per_second": 1431.291,
"eval_mrpc_pairs_steps_per_second": 19.084,
"step": 179
},
{
"epoch": 0.05027932960893855,
"grad_norm": 9.769979476928711,
"learning_rate": 6.666666666666667e-06,
"loss": 0.7777,
"step": 180
},
{
"epoch": 0.0553072625698324,
"grad_norm": 9.294490814208984,
"learning_rate": 7.337057728119181e-06,
"loss": 0.9075,
"step": 198
},
{
"epoch": 0.060335195530726256,
"grad_norm": 0.6179952621459961,
"learning_rate": 8.007448789571696e-06,
"loss": 0.8239,
"step": 216
},
{
"epoch": 0.06536312849162011,
"grad_norm": 6.369142532348633,
"learning_rate": 8.677839851024209e-06,
"loss": 0.949,
"step": 234
},
{
"epoch": 0.07039106145251396,
"grad_norm": 1.7066636085510254,
"learning_rate": 9.348230912476724e-06,
"loss": 1.2046,
"step": 252
},
{
"epoch": 0.07541899441340782,
"grad_norm": 0.33892622590065,
"learning_rate": 1.0018621973929237e-05,
"loss": 0.8526,
"step": 270
},
{
"epoch": 0.08044692737430167,
"grad_norm": 24.46626091003418,
"learning_rate": 1.0689013035381753e-05,
"loss": 0.8629,
"step": 288
},
{
"epoch": 0.08547486033519552,
"grad_norm": 9.228001594543457,
"learning_rate": 1.1359404096834266e-05,
"loss": 1.1474,
"step": 306
},
{
"epoch": 0.09050279329608939,
"grad_norm": 3.441566228866577,
"learning_rate": 1.2029795158286779e-05,
"loss": 0.9141,
"step": 324
},
{
"epoch": 0.09553072625698324,
"grad_norm": 0.3621586859226227,
"learning_rate": 1.2700186219739294e-05,
"loss": 0.9756,
"step": 342
},
{
"epoch": 0.1,
"eval_nli-pairs_loss": 0.9455356001853943,
"eval_nli-pairs_runtime": 3.6115,
"eval_nli-pairs_samples_per_second": 41.533,
"eval_nli-pairs_steps_per_second": 0.554,
"eval_sts-test_pearson_cosine": 0.7944563743777492,
"eval_sts-test_pearson_dot": 0.554185693724103,
"eval_sts-test_pearson_euclidean": 0.7483225064388481,
"eval_sts-test_pearson_manhattan": 0.7454936340776467,
"eval_sts-test_pearson_max": 0.7944563743777492,
"eval_sts-test_spearman_cosine": 0.8064910811776218,
"eval_sts-test_spearman_dot": 0.5333761592947532,
"eval_sts-test_spearman_euclidean": 0.7382698586949932,
"eval_sts-test_spearman_manhattan": 0.7378149008082869,
"eval_sts-test_spearman_max": 0.8064910811776218,
"step": 358
},
{
"epoch": 0.1,
"eval_vitaminc-pairs_loss": 4.517832279205322,
"eval_vitaminc-pairs_runtime": 0.8908,
"eval_vitaminc-pairs_samples_per_second": 140.33,
"eval_vitaminc-pairs_steps_per_second": 2.245,
"step": 358
},
{
"epoch": 0.1,
"eval_qnli-contrastive_loss": 0.2627922296524048,
"eval_qnli-contrastive_runtime": 0.1229,
"eval_qnli-contrastive_samples_per_second": 1220.886,
"eval_qnli-contrastive_steps_per_second": 16.278,
"step": 358
},
{
"epoch": 0.1,
"eval_scitail-pairs-qa_loss": 0.06381849944591522,
"eval_scitail-pairs-qa_runtime": 0.7125,
"eval_scitail-pairs-qa_samples_per_second": 210.516,
"eval_scitail-pairs-qa_steps_per_second": 2.807,
"step": 358
},
{
"epoch": 0.1,
"eval_scitail-pairs-pos_loss": 0.30350035429000854,
"eval_scitail-pairs-pos_runtime": 1.877,
"eval_scitail-pairs-pos_samples_per_second": 79.916,
"eval_scitail-pairs-pos_steps_per_second": 1.066,
"step": 358
},
{
"epoch": 0.1,
"eval_xsum-pairs_loss": 0.44631534814834595,
"eval_xsum-pairs_runtime": 0.1512,
"eval_xsum-pairs_samples_per_second": 542.377,
"eval_xsum-pairs_steps_per_second": 6.614,
"step": 358
},
{
"epoch": 0.1,
"eval_compression-pairs_loss": 0.10376789420843124,
"eval_compression-pairs_runtime": 0.1034,
"eval_compression-pairs_samples_per_second": 1450.105,
"eval_compression-pairs_steps_per_second": 19.335,
"step": 358
},
{
"epoch": 0.1,
"eval_sciq_pairs_loss": 0.15615364909172058,
"eval_sciq_pairs_runtime": 5.1236,
"eval_sciq_pairs_samples_per_second": 29.276,
"eval_sciq_pairs_steps_per_second": 0.39,
"step": 358
},
{
"epoch": 0.1,
"eval_qasc_pairs_loss": 0.24106402695178986,
"eval_qasc_pairs_runtime": 0.6663,
"eval_qasc_pairs_samples_per_second": 225.133,
"eval_qasc_pairs_steps_per_second": 3.002,
"step": 358
},
{
"epoch": 0.1,
"eval_openbookqa_pairs_loss": 1.5405189990997314,
"eval_openbookqa_pairs_runtime": 0.7365,
"eval_openbookqa_pairs_samples_per_second": 203.658,
"eval_openbookqa_pairs_steps_per_second": 2.715,
"step": 358
},
{
"epoch": 0.1,
"eval_msmarco_pairs_loss": 0.6067730188369751,
"eval_msmarco_pairs_runtime": 1.2272,
"eval_msmarco_pairs_samples_per_second": 122.233,
"eval_msmarco_pairs_steps_per_second": 1.63,
"step": 358
},
{
"epoch": 0.1,
"eval_nq_pairs_loss": 0.5683658123016357,
"eval_nq_pairs_runtime": 2.3358,
"eval_nq_pairs_samples_per_second": 64.219,
"eval_nq_pairs_steps_per_second": 0.856,
"step": 358
},
{
"epoch": 0.1,
"eval_trivia_pairs_loss": 1.1103042364120483,
"eval_trivia_pairs_runtime": 3.2191,
"eval_trivia_pairs_samples_per_second": 46.596,
"eval_trivia_pairs_steps_per_second": 0.621,
"step": 358
},
{
"epoch": 0.1,
"eval_quora_pairs_loss": 0.2291153520345688,
"eval_quora_pairs_runtime": 0.39,
"eval_quora_pairs_samples_per_second": 384.605,
"eval_quora_pairs_steps_per_second": 5.128,
"step": 358
},
{
"epoch": 0.1,
"eval_gooaq_pairs_loss": 0.5594914555549622,
"eval_gooaq_pairs_runtime": 0.814,
"eval_gooaq_pairs_samples_per_second": 184.276,
"eval_gooaq_pairs_steps_per_second": 2.457,
"step": 358
},
{
"epoch": 0.1,
"eval_mrpc_pairs_loss": 0.056830935180187225,
"eval_mrpc_pairs_runtime": 0.1099,
"eval_mrpc_pairs_samples_per_second": 1364.673,
"eval_mrpc_pairs_steps_per_second": 18.196,
"step": 358
},
{
"epoch": 0.1005586592178771,
"grad_norm": 0.40709275007247925,
"learning_rate": 1.3370577281191808e-05,
"loss": 0.7904,
"step": 360
},
{
"epoch": 0.10558659217877095,
"grad_norm": 8.056321144104004,
"learning_rate": 1.4040968342644321e-05,
"loss": 1.2543,
"step": 378
},
{
"epoch": 0.1106145251396648,
"grad_norm": 6.643378257751465,
"learning_rate": 1.4711359404096834e-05,
"loss": 0.854,
"step": 396
},
{
"epoch": 0.11564245810055866,
"grad_norm": 7.473087310791016,
"learning_rate": 1.538175046554935e-05,
"loss": 0.4719,
"step": 414
},
{
"epoch": 0.12067039106145251,
"grad_norm": 8.68940258026123,
"learning_rate": 1.6052141527001864e-05,
"loss": 1.4229,
"step": 432
},
{
"epoch": 0.12569832402234637,
"grad_norm": 8.564693450927734,
"learning_rate": 1.6722532588454377e-05,
"loss": 0.7646,
"step": 450
},
{
"epoch": 0.13072625698324022,
"grad_norm": 0.39765194058418274,
"learning_rate": 1.7392923649906893e-05,
"loss": 0.8512,
"step": 468
},
{
"epoch": 0.13575418994413407,
"grad_norm": 7.3941192626953125,
"learning_rate": 1.8063314711359406e-05,
"loss": 1.1333,
"step": 486
},
{
"epoch": 0.14078212290502792,
"grad_norm": 3.183729410171509,
"learning_rate": 1.873370577281192e-05,
"loss": 0.4451,
"step": 504
},
{
"epoch": 0.1458100558659218,
"grad_norm": 1.2496144771575928,
"learning_rate": 1.9404096834264436e-05,
"loss": 0.8859,
"step": 522
},
{
"epoch": 0.15,
"eval_nli-pairs_loss": 0.9459429979324341,
"eval_nli-pairs_runtime": 3.418,
"eval_nli-pairs_samples_per_second": 43.886,
"eval_nli-pairs_steps_per_second": 0.585,
"eval_sts-test_pearson_cosine": 0.7949855875446147,
"eval_sts-test_pearson_dot": 0.5640384325880634,
"eval_sts-test_pearson_euclidean": 0.751389277231376,
"eval_sts-test_pearson_manhattan": 0.7487497192657817,
"eval_sts-test_pearson_max": 0.7949855875446147,
"eval_sts-test_spearman_cosine": 0.8064088099185094,
"eval_sts-test_spearman_dot": 0.5419498621910633,
"eval_sts-test_spearman_euclidean": 0.7411803691316964,
"eval_sts-test_spearman_manhattan": 0.7408925147897409,
"eval_sts-test_spearman_max": 0.8064088099185094,
"step": 537
},
{
"epoch": 0.15,
"eval_vitaminc-pairs_loss": 4.557371139526367,
"eval_vitaminc-pairs_runtime": 0.8813,
"eval_vitaminc-pairs_samples_per_second": 141.843,
"eval_vitaminc-pairs_steps_per_second": 2.269,
"step": 537
},
{
"epoch": 0.15,
"eval_qnli-contrastive_loss": 0.2963089942932129,
"eval_qnli-contrastive_runtime": 0.1173,
"eval_qnli-contrastive_samples_per_second": 1279.24,
"eval_qnli-contrastive_steps_per_second": 17.057,
"step": 537
},
{
"epoch": 0.15,
"eval_scitail-pairs-qa_loss": 0.06569412350654602,
"eval_scitail-pairs-qa_runtime": 0.6754,
"eval_scitail-pairs-qa_samples_per_second": 222.091,
"eval_scitail-pairs-qa_steps_per_second": 2.961,
"step": 537
},
{
"epoch": 0.15,
"eval_scitail-pairs-pos_loss": 0.3104495108127594,
"eval_scitail-pairs-pos_runtime": 1.7852,
"eval_scitail-pairs-pos_samples_per_second": 84.022,
"eval_scitail-pairs-pos_steps_per_second": 1.12,
"step": 537
},
{
"epoch": 0.15,
"eval_xsum-pairs_loss": 0.44305890798568726,
"eval_xsum-pairs_runtime": 0.1668,
"eval_xsum-pairs_samples_per_second": 491.658,
"eval_xsum-pairs_steps_per_second": 5.996,
"step": 537
},
{
"epoch": 0.15,
"eval_compression-pairs_loss": 0.10611961036920547,
"eval_compression-pairs_runtime": 0.1033,
"eval_compression-pairs_samples_per_second": 1451.474,
"eval_compression-pairs_steps_per_second": 19.353,
"step": 537
},
{
"epoch": 0.15,
"eval_sciq_pairs_loss": 0.15942400693893433,
"eval_sciq_pairs_runtime": 4.8457,
"eval_sciq_pairs_samples_per_second": 30.955,
"eval_sciq_pairs_steps_per_second": 0.413,
"step": 537
},
{
"epoch": 0.15,
"eval_qasc_pairs_loss": 0.2380143702030182,
"eval_qasc_pairs_runtime": 0.6528,
"eval_qasc_pairs_samples_per_second": 229.78,
"eval_qasc_pairs_steps_per_second": 3.064,
"step": 537
},
{
"epoch": 0.15,
"eval_openbookqa_pairs_loss": 1.556572437286377,
"eval_openbookqa_pairs_runtime": 0.6905,
"eval_openbookqa_pairs_samples_per_second": 217.246,
"eval_openbookqa_pairs_steps_per_second": 2.897,
"step": 537
},
{
"epoch": 0.15,
"eval_msmarco_pairs_loss": 0.5950079560279846,
"eval_msmarco_pairs_runtime": 1.1541,
"eval_msmarco_pairs_samples_per_second": 129.968,
"eval_msmarco_pairs_steps_per_second": 1.733,
"step": 537
},
{
"epoch": 0.15,
"eval_nq_pairs_loss": 0.5976797938346863,
"eval_nq_pairs_runtime": 2.2683,
"eval_nq_pairs_samples_per_second": 66.128,
"eval_nq_pairs_steps_per_second": 0.882,
"step": 537
},
{
"epoch": 0.15,
"eval_trivia_pairs_loss": 1.1377041339874268,
"eval_trivia_pairs_runtime": 3.1791,
"eval_trivia_pairs_samples_per_second": 47.183,
"eval_trivia_pairs_steps_per_second": 0.629,
"step": 537
},
{
"epoch": 0.15,
"eval_quora_pairs_loss": 0.20836791396141052,
"eval_quora_pairs_runtime": 0.374,
"eval_quora_pairs_samples_per_second": 401.115,
"eval_quora_pairs_steps_per_second": 5.348,
"step": 537
},
{
"epoch": 0.15,
"eval_gooaq_pairs_loss": 0.5545207262039185,
"eval_gooaq_pairs_runtime": 0.7667,
"eval_gooaq_pairs_samples_per_second": 195.642,
"eval_gooaq_pairs_steps_per_second": 2.609,
"step": 537
},
{
"epoch": 0.15,
"eval_mrpc_pairs_loss": 0.060290463268756866,
"eval_mrpc_pairs_runtime": 0.1026,
"eval_mrpc_pairs_samples_per_second": 1462.106,
"eval_mrpc_pairs_steps_per_second": 19.495,
"step": 537
},
{
"epoch": 0.15083798882681565,
"grad_norm": 1.3315377235412598,
"learning_rate": 1.9999898748390674e-05,
"loss": 0.9245,
"step": 540
},
{
"epoch": 0.1558659217877095,
"grad_norm": 8.015869140625,
"learning_rate": 1.9989876718445097e-05,
"loss": 1.0384,
"step": 558
},
{
"epoch": 0.16089385474860335,
"grad_norm": 1.3641886711120605,
"learning_rate": 1.9963472835306562e-05,
"loss": 0.8826,
"step": 576
},
{
"epoch": 0.1659217877094972,
"grad_norm": 7.676197528839111,
"learning_rate": 1.9920735213598995e-05,
"loss": 0.6693,
"step": 594
},
{
"epoch": 0.17094972067039105,
"grad_norm": 8.144271850585938,
"learning_rate": 1.9861741732192866e-05,
"loss": 0.8437,
"step": 612
},
{
"epoch": 0.17597765363128492,
"grad_norm": 11.723029136657715,
"learning_rate": 1.9786599892290035e-05,
"loss": 1.2241,
"step": 630
},
{
"epoch": 0.18100558659217877,
"grad_norm": 12.107389450073242,
"learning_rate": 1.9695446621529053e-05,
"loss": 0.7169,
"step": 648
},
{
"epoch": 0.18603351955307262,
"grad_norm": 0.671999454498291,
"learning_rate": 1.9588448024468015e-05,
"loss": 0.7937,
"step": 666
},
{
"epoch": 0.19106145251396647,
"grad_norm": 2.9247829914093018,
"learning_rate": 1.9465799079899608e-05,
"loss": 0.8666,
"step": 684
},
{
"epoch": 0.19608938547486032,
"grad_norm": 9.634854316711426,
"learning_rate": 1.932772328554989e-05,
"loss": 0.8002,
"step": 702
},
{
"epoch": 0.2,
"eval_nli-pairs_loss": 0.9524073004722595,
"eval_nli-pairs_runtime": 3.3652,
"eval_nli-pairs_samples_per_second": 44.573,
"eval_nli-pairs_steps_per_second": 0.594,
"eval_sts-test_pearson_cosine": 0.7960974627777679,
"eval_sts-test_pearson_dot": 0.5653927619143873,
"eval_sts-test_pearson_euclidean": 0.7510324476182012,
"eval_sts-test_pearson_manhattan": 0.7478581635465753,
"eval_sts-test_pearson_max": 0.7960974627777679,
"eval_sts-test_spearman_cosine": 0.8076217698074869,
"eval_sts-test_spearman_dot": 0.5437275169754562,
"eval_sts-test_spearman_euclidean": 0.7394612354069316,
"eval_sts-test_spearman_manhattan": 0.7383580125266631,
"eval_sts-test_spearman_max": 0.8076217698074869,
"step": 716
},
{
"epoch": 0.2,
"eval_vitaminc-pairs_loss": 4.544824600219727,
"eval_vitaminc-pairs_runtime": 0.8209,
"eval_vitaminc-pairs_samples_per_second": 152.27,
"eval_vitaminc-pairs_steps_per_second": 2.436,
"step": 716
},
{
"epoch": 0.2,
"eval_qnli-contrastive_loss": 0.2854141891002655,
"eval_qnli-contrastive_runtime": 0.116,
"eval_qnli-contrastive_samples_per_second": 1293.335,
"eval_qnli-contrastive_steps_per_second": 17.244,
"step": 716
},
{
"epoch": 0.2,
"eval_scitail-pairs-qa_loss": 0.06479831039905548,
"eval_scitail-pairs-qa_runtime": 0.6668,
"eval_scitail-pairs-qa_samples_per_second": 224.963,
"eval_scitail-pairs-qa_steps_per_second": 3.0,
"step": 716
},
{
"epoch": 0.2,
"eval_scitail-pairs-pos_loss": 0.30356064438819885,
"eval_scitail-pairs-pos_runtime": 1.7895,
"eval_scitail-pairs-pos_samples_per_second": 83.823,
"eval_scitail-pairs-pos_steps_per_second": 1.118,
"step": 716
},
{
"epoch": 0.2,
"eval_xsum-pairs_loss": 0.4510384202003479,
"eval_xsum-pairs_runtime": 0.1513,
"eval_xsum-pairs_samples_per_second": 541.869,
"eval_xsum-pairs_steps_per_second": 6.608,
"step": 716
},
{
"epoch": 0.2,
"eval_compression-pairs_loss": 0.10218393057584763,
"eval_compression-pairs_runtime": 0.1063,
"eval_compression-pairs_samples_per_second": 1410.495,
"eval_compression-pairs_steps_per_second": 18.807,
"step": 716
},
{
"epoch": 0.2,
"eval_sciq_pairs_loss": 0.16117030382156372,
"eval_sciq_pairs_runtime": 4.8842,
"eval_sciq_pairs_samples_per_second": 30.711,
"eval_sciq_pairs_steps_per_second": 0.409,
"step": 716
},
{
"epoch": 0.2,
"eval_qasc_pairs_loss": 0.25312450528144836,
"eval_qasc_pairs_runtime": 0.626,
"eval_qasc_pairs_samples_per_second": 239.619,
"eval_qasc_pairs_steps_per_second": 3.195,
"step": 716
},
{
"epoch": 0.2,
"eval_openbookqa_pairs_loss": 1.560429573059082,
"eval_openbookqa_pairs_runtime": 0.6856,
"eval_openbookqa_pairs_samples_per_second": 218.787,
"eval_openbookqa_pairs_steps_per_second": 2.917,
"step": 716
},
{
"epoch": 0.2,
"eval_msmarco_pairs_loss": 0.6264744997024536,
"eval_msmarco_pairs_runtime": 1.1496,
"eval_msmarco_pairs_samples_per_second": 130.479,
"eval_msmarco_pairs_steps_per_second": 1.74,
"step": 716
},
{
"epoch": 0.2,
"eval_nq_pairs_loss": 0.602968692779541,
"eval_nq_pairs_runtime": 2.2714,
"eval_nq_pairs_samples_per_second": 66.038,
"eval_nq_pairs_steps_per_second": 0.881,
"step": 716
},
{
"epoch": 0.2,
"eval_trivia_pairs_loss": 1.1763343811035156,
"eval_trivia_pairs_runtime": 3.1779,
"eval_trivia_pairs_samples_per_second": 47.201,
"eval_trivia_pairs_steps_per_second": 0.629,
"step": 716
},
{
"epoch": 0.2,
"eval_quora_pairs_loss": 0.25559327006340027,
"eval_quora_pairs_runtime": 0.3926,
"eval_quora_pairs_samples_per_second": 382.056,
"eval_quora_pairs_steps_per_second": 5.094,
"step": 716
},
{
"epoch": 0.2,
"eval_gooaq_pairs_loss": 0.5762031078338623,
"eval_gooaq_pairs_runtime": 0.7962,
"eval_gooaq_pairs_samples_per_second": 188.399,
"eval_gooaq_pairs_steps_per_second": 2.512,
"step": 716
},
{
"epoch": 0.2,
"eval_mrpc_pairs_loss": 0.05901935696601868,
"eval_mrpc_pairs_runtime": 0.1022,
"eval_mrpc_pairs_samples_per_second": 1467.265,
"eval_mrpc_pairs_steps_per_second": 19.564,
"step": 716
},
{
"epoch": 0.2011173184357542,
"grad_norm": 2.0602877140045166,
"learning_rate": 1.9174472250808337e-05,
"loss": 0.9815,
"step": 720
},
{
"epoch": 0.20614525139664805,
"grad_norm": 10.161604881286621,
"learning_rate": 1.90063252382312e-05,
"loss": 0.568,
"step": 738
},
{
"epoch": 0.2111731843575419,
"grad_norm": 18.392881393432617,
"learning_rate": 1.882358865465376e-05,
"loss": 0.8045,
"step": 756
},
{
"epoch": 0.21620111731843575,
"grad_norm": 1.8240203857421875,
"learning_rate": 1.8626595492838756e-05,
"loss": 0.9535,
"step": 774
},
{
"epoch": 0.2212290502793296,
"grad_norm": 0.7226653099060059,
"learning_rate": 1.8415704724678457e-05,
"loss": 0.8132,
"step": 792
},
{
"epoch": 0.22625698324022347,
"grad_norm": 5.971342086791992,
"learning_rate": 1.819130064705612e-05,
"loss": 0.6948,
"step": 810
},
{
"epoch": 0.23128491620111732,
"grad_norm": 8.432775497436523,
"learning_rate": 1.795379218155883e-05,
"loss": 1.2272,
"step": 828
},
{
"epoch": 0.23631284916201117,
"grad_norm": 18.75815200805664,
"learning_rate": 1.7703612129317886e-05,
"loss": 0.9203,
"step": 846
},
{
"epoch": 0.24134078212290502,
"grad_norm": 0.6807297468185425,
"learning_rate": 1.7441216382334472e-05,
"loss": 0.6491,
"step": 864
},
{
"epoch": 0.24636871508379887,
"grad_norm": 0.27311134338378906,
"learning_rate": 1.716708309272793e-05,
"loss": 0.7194,
"step": 882
},
{
"epoch": 0.25,
"eval_nli-pairs_loss": 0.9641673564910889,
"eval_nli-pairs_runtime": 3.4016,
"eval_nli-pairs_samples_per_second": 44.097,
"eval_nli-pairs_steps_per_second": 0.588,
"eval_sts-test_pearson_cosine": 0.7984203994030687,
"eval_sts-test_pearson_dot": 0.5718132073426122,
"eval_sts-test_pearson_euclidean": 0.751738033448039,
"eval_sts-test_pearson_manhattan": 0.7480386170862788,
"eval_sts-test_pearson_max": 0.7984203994030687,
"eval_sts-test_spearman_cosine": 0.8106824393227386,
"eval_sts-test_spearman_dot": 0.5505203809846263,
"eval_sts-test_spearman_euclidean": 0.7424109996455601,
"eval_sts-test_spearman_manhattan": 0.740994183877676,
"eval_sts-test_spearman_max": 0.8106824393227386,
"step": 895
},
{
"epoch": 0.25,
"eval_vitaminc-pairs_loss": 4.445808410644531,
"eval_vitaminc-pairs_runtime": 0.8341,
"eval_vitaminc-pairs_samples_per_second": 149.86,
"eval_vitaminc-pairs_steps_per_second": 2.398,
"step": 895
},
{
"epoch": 0.25,
"eval_qnli-contrastive_loss": 0.26446962356567383,
"eval_qnli-contrastive_runtime": 0.1192,
"eval_qnli-contrastive_samples_per_second": 1258.221,
"eval_qnli-contrastive_steps_per_second": 16.776,
"step": 895
},
{
"epoch": 0.25,
"eval_scitail-pairs-qa_loss": 0.06552065908908844,
"eval_scitail-pairs-qa_runtime": 0.685,
"eval_scitail-pairs-qa_samples_per_second": 218.99,
"eval_scitail-pairs-qa_steps_per_second": 2.92,
"step": 895
},
{
"epoch": 0.25,
"eval_scitail-pairs-pos_loss": 0.2940276265144348,
"eval_scitail-pairs-pos_runtime": 1.7906,
"eval_scitail-pairs-pos_samples_per_second": 83.773,
"eval_scitail-pairs-pos_steps_per_second": 1.117,
"step": 895
},
{
"epoch": 0.25,
"eval_xsum-pairs_loss": 0.46034616231918335,
"eval_xsum-pairs_runtime": 0.1605,
"eval_xsum-pairs_samples_per_second": 510.946,
"eval_xsum-pairs_steps_per_second": 6.231,
"step": 895
},
{
"epoch": 0.25,
"eval_compression-pairs_loss": 0.10285492241382599,
"eval_compression-pairs_runtime": 0.1054,
"eval_compression-pairs_samples_per_second": 1423.451,
"eval_compression-pairs_steps_per_second": 18.979,
"step": 895
},
{
"epoch": 0.25,
"eval_sciq_pairs_loss": 0.16273371875286102,
"eval_sciq_pairs_runtime": 4.8468,
"eval_sciq_pairs_samples_per_second": 30.948,
"eval_sciq_pairs_steps_per_second": 0.413,
"step": 895
},
{
"epoch": 0.25,
"eval_qasc_pairs_loss": 0.24400106072425842,
"eval_qasc_pairs_runtime": 0.6336,
"eval_qasc_pairs_samples_per_second": 236.76,
"eval_qasc_pairs_steps_per_second": 3.157,
"step": 895
},
{
"epoch": 0.25,
"eval_openbookqa_pairs_loss": 1.5389502048492432,
"eval_openbookqa_pairs_runtime": 0.6864,
"eval_openbookqa_pairs_samples_per_second": 218.536,
"eval_openbookqa_pairs_steps_per_second": 2.914,
"step": 895
},
{
"epoch": 0.25,
"eval_msmarco_pairs_loss": 0.6092618703842163,
"eval_msmarco_pairs_runtime": 1.1561,
"eval_msmarco_pairs_samples_per_second": 129.746,
"eval_msmarco_pairs_steps_per_second": 1.73,
"step": 895
},
{
"epoch": 0.25,
"eval_nq_pairs_loss": 0.5972890853881836,
"eval_nq_pairs_runtime": 2.2849,
"eval_nq_pairs_samples_per_second": 65.648,
"eval_nq_pairs_steps_per_second": 0.875,
"step": 895
},
{
"epoch": 0.25,
"eval_trivia_pairs_loss": 1.1646121740341187,
"eval_trivia_pairs_runtime": 3.1713,
"eval_trivia_pairs_samples_per_second": 47.299,
"eval_trivia_pairs_steps_per_second": 0.631,
"step": 895
},
{
"epoch": 0.25,
"eval_quora_pairs_loss": 0.2002115696668625,
"eval_quora_pairs_runtime": 0.3724,
"eval_quora_pairs_samples_per_second": 402.785,
"eval_quora_pairs_steps_per_second": 5.37,
"step": 895
},
{
"epoch": 0.25,
"eval_gooaq_pairs_loss": 0.5770832300186157,
"eval_gooaq_pairs_runtime": 0.7637,
"eval_gooaq_pairs_samples_per_second": 196.419,
"eval_gooaq_pairs_steps_per_second": 2.619,
"step": 895
},
{
"epoch": 0.25,
"eval_mrpc_pairs_loss": 0.060786984860897064,
"eval_mrpc_pairs_runtime": 0.1004,
"eval_mrpc_pairs_samples_per_second": 1493.964,
"eval_mrpc_pairs_steps_per_second": 19.92,
"step": 895
},
{
"epoch": 0.25139664804469275,
"grad_norm": 9.622016906738281,
"learning_rate": 1.6881711801420374e-05,
"loss": 0.9089,
"step": 900
},
{
"epoch": 0.25642458100558657,
"grad_norm": 3.134366035461426,
"learning_rate": 1.658562252784545e-05,
"loss": 0.9682,
"step": 918
},
{
"epoch": 0.26145251396648045,
"grad_norm": 11.332123756408691,
"learning_rate": 1.627935482234001e-05,
"loss": 1.0025,
"step": 936
},
{
"epoch": 0.2664804469273743,
"grad_norm": 10.678857803344727,
"learning_rate": 1.5963466782945497e-05,
"loss": 1.0259,
"step": 954
},
{
"epoch": 0.27150837988826815,
"grad_norm": 0.8130522966384888,
"learning_rate": 1.5638534038410646e-05,
"loss": 0.8946,
"step": 972
},
{
"epoch": 0.276536312849162,
"grad_norm": 25.857967376708984,
"learning_rate": 1.5305148699248803e-05,
"loss": 0.72,
"step": 990
},
{
"epoch": 0.28156424581005585,
"grad_norm": 8.641229629516602,
"learning_rate": 1.4963918278761172e-05,
"loss": 0.8911,
"step": 1008
},
{
"epoch": 0.2865921787709497,
"grad_norm": 5.093626499176025,
"learning_rate": 1.4615464585992371e-05,
"loss": 0.8059,
"step": 1026
},
{
"epoch": 0.2916201117318436,
"grad_norm": 0.18065588176250458,
"learning_rate": 1.4260422592635362e-05,
"loss": 1.0758,
"step": 1044
},
{
"epoch": 0.2966480446927374,
"grad_norm": 10.29207706451416,
"learning_rate": 1.3899439275950759e-05,
"loss": 0.8875,
"step": 1062
},
{
"epoch": 0.3,
"eval_nli-pairs_loss": 0.9130206108093262,
"eval_nli-pairs_runtime": 3.3804,
"eval_nli-pairs_samples_per_second": 44.373,
"eval_nli-pairs_steps_per_second": 0.592,
"eval_sts-test_pearson_cosine": 0.7935564606718174,
"eval_sts-test_pearson_dot": 0.5781608636588449,
"eval_sts-test_pearson_euclidean": 0.7502295476446079,
"eval_sts-test_pearson_manhattan": 0.746737096593109,
"eval_sts-test_pearson_max": 0.7935564606718174,
"eval_sts-test_spearman_cosine": 0.8086930151421495,
"eval_sts-test_spearman_dot": 0.5549031403563933,
"eval_sts-test_spearman_euclidean": 0.739389609130169,
"eval_sts-test_spearman_manhattan": 0.7380312227930571,
"eval_sts-test_spearman_max": 0.8086930151421495,
"step": 1074
},
{
"epoch": 0.3,
"eval_vitaminc-pairs_loss": 4.481905937194824,
"eval_vitaminc-pairs_runtime": 0.8115,
"eval_vitaminc-pairs_samples_per_second": 154.041,
"eval_vitaminc-pairs_steps_per_second": 2.465,
"step": 1074
},
{
"epoch": 0.3,
"eval_qnli-contrastive_loss": 0.271823525428772,
"eval_qnli-contrastive_runtime": 0.1166,
"eval_qnli-contrastive_samples_per_second": 1286.546,
"eval_qnli-contrastive_steps_per_second": 17.154,
"step": 1074
},
{
"epoch": 0.3,
"eval_scitail-pairs-qa_loss": 0.06460323184728622,
"eval_scitail-pairs-qa_runtime": 0.6655,
"eval_scitail-pairs-qa_samples_per_second": 225.379,
"eval_scitail-pairs-qa_steps_per_second": 3.005,
"step": 1074
},
{
"epoch": 0.3,
"eval_scitail-pairs-pos_loss": 0.28618788719177246,
"eval_scitail-pairs-pos_runtime": 1.7829,
"eval_scitail-pairs-pos_samples_per_second": 84.131,
"eval_scitail-pairs-pos_steps_per_second": 1.122,
"step": 1074
},
{
"epoch": 0.3,
"eval_xsum-pairs_loss": 0.46461817622184753,
"eval_xsum-pairs_runtime": 0.1506,
"eval_xsum-pairs_samples_per_second": 544.581,
"eval_xsum-pairs_steps_per_second": 6.641,
"step": 1074
},
{
"epoch": 0.3,
"eval_compression-pairs_loss": 0.0993683859705925,
"eval_compression-pairs_runtime": 0.1041,
"eval_compression-pairs_samples_per_second": 1440.48,
"eval_compression-pairs_steps_per_second": 19.206,
"step": 1074
},
{
"epoch": 0.3,
"eval_sciq_pairs_loss": 0.16428890824317932,
"eval_sciq_pairs_runtime": 4.9141,
"eval_sciq_pairs_samples_per_second": 30.524,
"eval_sciq_pairs_steps_per_second": 0.407,
"step": 1074
},
{
"epoch": 0.3,
"eval_qasc_pairs_loss": 0.2515706419944763,
"eval_qasc_pairs_runtime": 0.6397,
"eval_qasc_pairs_samples_per_second": 234.494,
"eval_qasc_pairs_steps_per_second": 3.127,
"step": 1074
},
{
"epoch": 0.3,
"eval_openbookqa_pairs_loss": 1.5371934175491333,
"eval_openbookqa_pairs_runtime": 0.6887,
"eval_openbookqa_pairs_samples_per_second": 217.793,
"eval_openbookqa_pairs_steps_per_second": 2.904,
"step": 1074
},
{
"epoch": 0.3,
"eval_msmarco_pairs_loss": 0.6019803881645203,
"eval_msmarco_pairs_runtime": 1.1582,
"eval_msmarco_pairs_samples_per_second": 129.515,
"eval_msmarco_pairs_steps_per_second": 1.727,
"step": 1074
},
{
"epoch": 0.3,
"eval_nq_pairs_loss": 0.6118470430374146,
"eval_nq_pairs_runtime": 2.2708,
"eval_nq_pairs_samples_per_second": 66.056,
"eval_nq_pairs_steps_per_second": 0.881,
"step": 1074
},
{
"epoch": 0.3,
"eval_trivia_pairs_loss": 1.12074613571167,
"eval_trivia_pairs_runtime": 3.1611,
"eval_trivia_pairs_samples_per_second": 47.452,
"eval_trivia_pairs_steps_per_second": 0.633,
"step": 1074
},
{
"epoch": 0.3,
"eval_quora_pairs_loss": 0.22596687078475952,
"eval_quora_pairs_runtime": 0.3879,
"eval_quora_pairs_samples_per_second": 386.741,
"eval_quora_pairs_steps_per_second": 5.157,
"step": 1074
},
{
"epoch": 0.3,
"eval_gooaq_pairs_loss": 0.5833558440208435,
"eval_gooaq_pairs_runtime": 0.7697,
"eval_gooaq_pairs_samples_per_second": 194.881,
"eval_gooaq_pairs_steps_per_second": 2.598,
"step": 1074
},
{
"epoch": 0.3,
"eval_mrpc_pairs_loss": 0.058091748505830765,
"eval_mrpc_pairs_runtime": 0.1053,
"eval_mrpc_pairs_samples_per_second": 1424.202,
"eval_mrpc_pairs_steps_per_second": 18.989,
"step": 1074
},
{
"epoch": 0.3016759776536313,
"grad_norm": 3.0486669540405273,
"learning_rate": 1.3533172439808864e-05,
"loss": 1.1998,
"step": 1080
},
{
"epoch": 0.3067039106145251,
"grad_norm": 9.376039505004883,
"learning_rate": 1.3162289516002921e-05,
"loss": 0.7894,
"step": 1098
},
{
"epoch": 0.311731843575419,
"grad_norm": 2.824523448944092,
"learning_rate": 1.2787466348017752e-05,
"loss": 1.1598,
"step": 1116
},
{
"epoch": 0.3167597765363129,
"grad_norm": 13.443314552307129,
"learning_rate": 1.2430464185616653e-05,
"loss": 1.0983,
"step": 1134
},
{
"epoch": 0.3217877094972067,
"grad_norm": 7.764914035797119,
"learning_rate": 1.2049940057022904e-05,
"loss": 0.8006,
"step": 1152
},
{
"epoch": 0.3268156424581006,
"grad_norm": 7.54190731048584,
"learning_rate": 1.1667502669280646e-05,
"loss": 0.6122,
"step": 1170
},
{
"epoch": 0.3318435754189944,
"grad_norm": 9.039295196533203,
"learning_rate": 1.1283848921061318e-05,
"loss": 0.5932,
"step": 1188
},
{
"epoch": 0.3368715083798883,
"grad_norm": 7.766181468963623,
"learning_rate": 1.0899677927556045e-05,
"loss": 0.6522,
"step": 1206
},
{
"epoch": 0.3418994413407821,
"grad_norm": 17.672466278076172,
"learning_rate": 1.0515689746509086e-05,
"loss": 1.0307,
"step": 1224
},
{
"epoch": 0.346927374301676,
"grad_norm": 8.456610679626465,
"learning_rate": 1.0132584102533657e-05,
"loss": 1.0892,
"step": 1242
},
{
"epoch": 0.35,
"eval_nli-pairs_loss": 0.9248345494270325,
"eval_nli-pairs_runtime": 3.3923,
"eval_nli-pairs_samples_per_second": 44.218,
"eval_nli-pairs_steps_per_second": 0.59,
"eval_sts-test_pearson_cosine": 0.7948484764216447,
"eval_sts-test_pearson_dot": 0.5671123561184989,
"eval_sts-test_pearson_euclidean": 0.7505417118786587,
"eval_sts-test_pearson_manhattan": 0.7471600903913781,
"eval_sts-test_pearson_max": 0.7948484764216447,
"eval_sts-test_spearman_cosine": 0.8098066598796416,
"eval_sts-test_spearman_dot": 0.5445125288737649,
"eval_sts-test_spearman_euclidean": 0.7404259515118012,
"eval_sts-test_spearman_manhattan": 0.7397832260792029,
"eval_sts-test_spearman_max": 0.8098066598796416,
"step": 1253
},
{
"epoch": 0.35,
"eval_vitaminc-pairs_loss": 4.364264965057373,
"eval_vitaminc-pairs_runtime": 0.8126,
"eval_vitaminc-pairs_samples_per_second": 153.831,
"eval_vitaminc-pairs_steps_per_second": 2.461,
"step": 1253
},
{
"epoch": 0.35,
"eval_qnli-contrastive_loss": 0.2623118460178375,
"eval_qnli-contrastive_runtime": 0.1187,
"eval_qnli-contrastive_samples_per_second": 1264.134,
"eval_qnli-contrastive_steps_per_second": 16.855,
"step": 1253
},
{
"epoch": 0.35,
"eval_scitail-pairs-qa_loss": 0.0632135197520256,
"eval_scitail-pairs-qa_runtime": 0.6764,
"eval_scitail-pairs-qa_samples_per_second": 221.754,
"eval_scitail-pairs-qa_steps_per_second": 2.957,
"step": 1253
},
{
"epoch": 0.35,
"eval_scitail-pairs-pos_loss": 0.2840258777141571,
"eval_scitail-pairs-pos_runtime": 1.7876,
"eval_scitail-pairs-pos_samples_per_second": 83.913,
"eval_scitail-pairs-pos_steps_per_second": 1.119,
"step": 1253
},
{
"epoch": 0.35,
"eval_xsum-pairs_loss": 0.45192036032676697,
"eval_xsum-pairs_runtime": 0.1525,
"eval_xsum-pairs_samples_per_second": 537.578,
"eval_xsum-pairs_steps_per_second": 6.556,
"step": 1253
},
{
"epoch": 0.35,
"eval_compression-pairs_loss": 0.09640318900346756,
"eval_compression-pairs_runtime": 0.1083,
"eval_compression-pairs_samples_per_second": 1385.341,
"eval_compression-pairs_steps_per_second": 18.471,
"step": 1253
},
{
"epoch": 0.35,
"eval_sciq_pairs_loss": 0.16294126212596893,
"eval_sciq_pairs_runtime": 4.9672,
"eval_sciq_pairs_samples_per_second": 30.198,
"eval_sciq_pairs_steps_per_second": 0.403,
"step": 1253
},
{
"epoch": 0.35,
"eval_qasc_pairs_loss": 0.24691826105117798,
"eval_qasc_pairs_runtime": 0.6342,
"eval_qasc_pairs_samples_per_second": 236.512,
"eval_qasc_pairs_steps_per_second": 3.153,
"step": 1253
},
{
"epoch": 0.35,
"eval_openbookqa_pairs_loss": 1.5197923183441162,
"eval_openbookqa_pairs_runtime": 0.6915,
"eval_openbookqa_pairs_samples_per_second": 216.924,
"eval_openbookqa_pairs_steps_per_second": 2.892,
"step": 1253
},
{
"epoch": 0.35,
"eval_msmarco_pairs_loss": 0.5905637145042419,
"eval_msmarco_pairs_runtime": 1.1595,
"eval_msmarco_pairs_samples_per_second": 129.367,
"eval_msmarco_pairs_steps_per_second": 1.725,
"step": 1253
},
{
"epoch": 0.35,
"eval_nq_pairs_loss": 0.5736532211303711,
"eval_nq_pairs_runtime": 2.2864,
"eval_nq_pairs_samples_per_second": 65.606,
"eval_nq_pairs_steps_per_second": 0.875,
"step": 1253
},
{
"epoch": 0.35,
"eval_trivia_pairs_loss": 1.1131163835525513,
"eval_trivia_pairs_runtime": 3.1686,
"eval_trivia_pairs_samples_per_second": 47.34,
"eval_trivia_pairs_steps_per_second": 0.631,
"step": 1253
},
{
"epoch": 0.35,
"eval_quora_pairs_loss": 0.2874162197113037,
"eval_quora_pairs_runtime": 0.4087,
"eval_quora_pairs_samples_per_second": 366.989,
"eval_quora_pairs_steps_per_second": 4.893,
"step": 1253
},
{
"epoch": 0.35,
"eval_gooaq_pairs_loss": 0.5454840660095215,
"eval_gooaq_pairs_runtime": 0.7785,
"eval_gooaq_pairs_samples_per_second": 192.683,
"eval_gooaq_pairs_steps_per_second": 2.569,
"step": 1253
},
{
"epoch": 0.35,
"eval_mrpc_pairs_loss": 0.05724556744098663,
"eval_mrpc_pairs_runtime": 0.1564,
"eval_mrpc_pairs_samples_per_second": 958.775,
"eval_mrpc_pairs_steps_per_second": 12.784,
"step": 1253
},
{
"epoch": 0.35195530726256985,
"grad_norm": 9.2592191696167,
"learning_rate": 9.75105911203486e-06,
"loss": 1.0845,
"step": 1260
},
{
"epoch": 0.35698324022346367,
"grad_norm": 8.712442398071289,
"learning_rate": 9.371810011063151e-06,
"loss": 0.931,
"step": 1278
},
{
"epoch": 0.36201117318435755,
"grad_norm": 12.440937995910645,
"learning_rate": 8.995527888416549e-06,
"loss": 0.6159,
"step": 1296
},
{
"epoch": 0.36703910614525137,
"grad_norm": 7.335140705108643,
"learning_rate": 8.6228984263002e-06,
"loss": 0.6498,
"step": 1314
},
{
"epoch": 0.37206703910614525,
"grad_norm": 9.586339950561523,
"learning_rate": 8.254600650838152e-06,
"loss": 0.9762,
"step": 1332
},
{
"epoch": 0.3770949720670391,
"grad_norm": 5.655767917633057,
"learning_rate": 7.891305694714182e-06,
"loss": 0.7534,
"step": 1350
},
{
"epoch": 0.38212290502793295,
"grad_norm": 11.050688743591309,
"learning_rate": 7.533675574196519e-06,
"loss": 0.6792,
"step": 1368
},
{
"epoch": 0.3871508379888268,
"grad_norm": 8.111286163330078,
"learning_rate": 7.182361982774984e-06,
"loss": 0.6524,
"step": 1386
},
{
"epoch": 0.39217877094972065,
"grad_norm": 0.9727733135223389,
"learning_rate": 6.8380051036088735e-06,
"loss": 0.8079,
"step": 1404
},
{
"epoch": 0.3972067039106145,
"grad_norm": 7.61720609664917,
"learning_rate": 6.501232442949635e-06,
"loss": 1.2497,
"step": 1422
},
{
"epoch": 0.4,
"eval_nli-pairs_loss": 0.9316923022270203,
"eval_nli-pairs_runtime": 3.6665,
"eval_nli-pairs_samples_per_second": 40.911,
"eval_nli-pairs_steps_per_second": 0.545,
"eval_sts-test_pearson_cosine": 0.7951706194225081,
"eval_sts-test_pearson_dot": 0.5655762874531658,
"eval_sts-test_pearson_euclidean": 0.748417242742893,
"eval_sts-test_pearson_manhattan": 0.745117038641416,
"eval_sts-test_pearson_max": 0.7951706194225081,
"eval_sts-test_spearman_cosine": 0.808265389218048,
"eval_sts-test_spearman_dot": 0.5439374661732892,
"eval_sts-test_spearman_euclidean": 0.737979067648202,
"eval_sts-test_spearman_manhattan": 0.7367152789420606,
"eval_sts-test_spearman_max": 0.808265389218048,
"step": 1432
},
{
"epoch": 0.4,
"eval_vitaminc-pairs_loss": 4.391602993011475,
"eval_vitaminc-pairs_runtime": 0.8561,
"eval_vitaminc-pairs_samples_per_second": 146.012,
"eval_vitaminc-pairs_steps_per_second": 2.336,
"step": 1432
},
{
"epoch": 0.4,
"eval_qnli-contrastive_loss": 0.2666786313056946,
"eval_qnli-contrastive_runtime": 0.1247,
"eval_qnli-contrastive_samples_per_second": 1202.967,
"eval_qnli-contrastive_steps_per_second": 16.04,
"step": 1432
},
{
"epoch": 0.4,
"eval_scitail-pairs-qa_loss": 0.06357826292514801,
"eval_scitail-pairs-qa_runtime": 0.7031,
"eval_scitail-pairs-qa_samples_per_second": 213.329,
"eval_scitail-pairs-qa_steps_per_second": 2.844,
"step": 1432
},
{
"epoch": 0.4,
"eval_scitail-pairs-pos_loss": 0.2862562835216522,
"eval_scitail-pairs-pos_runtime": 1.9248,
"eval_scitail-pairs-pos_samples_per_second": 77.929,
"eval_scitail-pairs-pos_steps_per_second": 1.039,
"step": 1432
},
{
"epoch": 0.4,
"eval_xsum-pairs_loss": 0.4477728009223938,
"eval_xsum-pairs_runtime": 0.1563,
"eval_xsum-pairs_samples_per_second": 524.667,
"eval_xsum-pairs_steps_per_second": 6.398,
"step": 1432
},
{
"epoch": 0.4,
"eval_compression-pairs_loss": 0.09554792195558548,
"eval_compression-pairs_runtime": 0.1113,
"eval_compression-pairs_samples_per_second": 1348.03,
"eval_compression-pairs_steps_per_second": 17.974,
"step": 1432
},
{
"epoch": 0.4,
"eval_sciq_pairs_loss": 0.16296182572841644,
"eval_sciq_pairs_runtime": 4.8695,
"eval_sciq_pairs_samples_per_second": 30.804,
"eval_sciq_pairs_steps_per_second": 0.411,
"step": 1432
},
{
"epoch": 0.4,
"eval_qasc_pairs_loss": 0.2451217621564865,
"eval_qasc_pairs_runtime": 0.6585,
"eval_qasc_pairs_samples_per_second": 227.783,
"eval_qasc_pairs_steps_per_second": 3.037,
"step": 1432
},
{
"epoch": 0.4,
"eval_openbookqa_pairs_loss": 1.5293627977371216,
"eval_openbookqa_pairs_runtime": 0.6922,
"eval_openbookqa_pairs_samples_per_second": 216.714,
"eval_openbookqa_pairs_steps_per_second": 2.89,
"step": 1432
},
{
"epoch": 0.4,
"eval_msmarco_pairs_loss": 0.5807374119758606,
"eval_msmarco_pairs_runtime": 1.1603,
"eval_msmarco_pairs_samples_per_second": 129.282,
"eval_msmarco_pairs_steps_per_second": 1.724,
"step": 1432
},
{
"epoch": 0.4,
"eval_nq_pairs_loss": 0.5674509406089783,
"eval_nq_pairs_runtime": 2.2725,
"eval_nq_pairs_samples_per_second": 66.007,
"eval_nq_pairs_steps_per_second": 0.88,
"step": 1432
},
{
"epoch": 0.4,
"eval_trivia_pairs_loss": 1.0616999864578247,
"eval_trivia_pairs_runtime": 3.1673,
"eval_trivia_pairs_samples_per_second": 47.359,
"eval_trivia_pairs_steps_per_second": 0.631,
"step": 1432
},
{
"epoch": 0.4,
"eval_quora_pairs_loss": 0.18273381888866425,
"eval_quora_pairs_runtime": 0.384,
"eval_quora_pairs_samples_per_second": 390.615,
"eval_quora_pairs_steps_per_second": 5.208,
"step": 1432
},
{
"epoch": 0.4,
"eval_gooaq_pairs_loss": 0.5485998392105103,
"eval_gooaq_pairs_runtime": 0.7712,
"eval_gooaq_pairs_samples_per_second": 194.508,
"eval_gooaq_pairs_steps_per_second": 2.593,
"step": 1432
},
{
"epoch": 0.4,
"eval_mrpc_pairs_loss": 0.05663560330867767,
"eval_mrpc_pairs_runtime": 0.1016,
"eval_mrpc_pairs_samples_per_second": 1475.752,
"eval_mrpc_pairs_steps_per_second": 19.677,
"step": 1432
},
{
"epoch": 0.4022346368715084,
"grad_norm": 9.786046028137207,
"learning_rate": 6.172657686664055e-06,
"loss": 0.8498,
"step": 1440
},
{
"epoch": 0.4072625698324022,
"grad_norm": 0.5503944158554077,
"learning_rate": 5.852879581941775e-06,
"loss": 0.5437,
"step": 1458
},
{
"epoch": 0.4122905027932961,
"grad_norm": 7.578904151916504,
"learning_rate": 5.542480846224858e-06,
"loss": 1.0774,
"step": 1476
},
{
"epoch": 0.4173184357541899,
"grad_norm": 10.365352630615234,
"learning_rate": 5.2420271053476904e-06,
"loss": 0.9975,
"step": 1494
},
{
"epoch": 0.4223463687150838,
"grad_norm": 0.6473840475082397,
"learning_rate": 4.9520658628221e-06,
"loss": 0.9214,
"step": 1512
},
{
"epoch": 0.4273743016759777,
"grad_norm": 10.450447082519531,
"learning_rate": 4.6731255021460435e-06,
"loss": 0.8964,
"step": 1530
},
{
"epoch": 0.4324022346368715,
"grad_norm": 9.311734199523926,
"learning_rate": 4.405714323953814e-06,
"loss": 0.669,
"step": 1548
},
{
"epoch": 0.4374301675977654,
"grad_norm": 36.57959747314453,
"learning_rate": 4.150319619762361e-06,
"loss": 1.1688,
"step": 1566
},
{
"epoch": 0.4424581005586592,
"grad_norm": 15.4668607711792,
"learning_rate": 3.907406784001587e-06,
"loss": 0.5588,
"step": 1584
},
{
"epoch": 0.4474860335195531,
"grad_norm": 11.20753288269043,
"learning_rate": 3.6774184659467597e-06,
"loss": 0.8192,
"step": 1602
},
{
"epoch": 0.45,
"eval_nli-pairs_loss": 0.9224842190742493,
"eval_nli-pairs_runtime": 3.4345,
"eval_nli-pairs_samples_per_second": 43.675,
"eval_nli-pairs_steps_per_second": 0.582,
"eval_sts-test_pearson_cosine": 0.7947548033548021,
"eval_sts-test_pearson_dot": 0.5642970712925082,
"eval_sts-test_pearson_euclidean": 0.7480023824232991,
"eval_sts-test_pearson_manhattan": 0.7449444544911363,
"eval_sts-test_pearson_max": 0.7947548033548021,
"eval_sts-test_spearman_cosine": 0.8077938075661354,
"eval_sts-test_spearman_dot": 0.5418922459663886,
"eval_sts-test_spearman_euclidean": 0.7372467308885063,
"eval_sts-test_spearman_manhattan": 0.736073760123024,
"eval_sts-test_spearman_max": 0.8077938075661354,
"step": 1611
},
{
"epoch": 0.45,
"eval_vitaminc-pairs_loss": 4.448488235473633,
"eval_vitaminc-pairs_runtime": 0.8352,
"eval_vitaminc-pairs_samples_per_second": 149.657,
"eval_vitaminc-pairs_steps_per_second": 2.395,
"step": 1611
},
{
"epoch": 0.45,
"eval_qnli-contrastive_loss": 0.27063658833503723,
"eval_qnli-contrastive_runtime": 0.1172,
"eval_qnli-contrastive_samples_per_second": 1280.373,
"eval_qnli-contrastive_steps_per_second": 17.072,
"step": 1611
},
{
"epoch": 0.45,
"eval_scitail-pairs-qa_loss": 0.06333789229393005,
"eval_scitail-pairs-qa_runtime": 0.6731,
"eval_scitail-pairs-qa_samples_per_second": 222.838,
"eval_scitail-pairs-qa_steps_per_second": 2.971,
"step": 1611
},
{
"epoch": 0.45,
"eval_scitail-pairs-pos_loss": 0.2783344089984894,
"eval_scitail-pairs-pos_runtime": 1.773,
"eval_scitail-pairs-pos_samples_per_second": 84.601,
"eval_scitail-pairs-pos_steps_per_second": 1.128,
"step": 1611
},
{
"epoch": 0.45,
"eval_xsum-pairs_loss": 0.44794514775276184,
"eval_xsum-pairs_runtime": 0.1515,
"eval_xsum-pairs_samples_per_second": 541.356,
"eval_xsum-pairs_steps_per_second": 6.602,
"step": 1611
},
{
"epoch": 0.45,
"eval_compression-pairs_loss": 0.0947626531124115,
"eval_compression-pairs_runtime": 0.1026,
"eval_compression-pairs_samples_per_second": 1461.376,
"eval_compression-pairs_steps_per_second": 19.485,
"step": 1611
},
{
"epoch": 0.45,
"eval_sciq_pairs_loss": 0.16194498538970947,
"eval_sciq_pairs_runtime": 4.8935,
"eval_sciq_pairs_samples_per_second": 30.653,
"eval_sciq_pairs_steps_per_second": 0.409,
"step": 1611
},
{
"epoch": 0.45,
"eval_qasc_pairs_loss": 0.24168159067630768,
"eval_qasc_pairs_runtime": 0.633,
"eval_qasc_pairs_samples_per_second": 236.951,
"eval_qasc_pairs_steps_per_second": 3.159,
"step": 1611
},
{
"epoch": 0.45,
"eval_openbookqa_pairs_loss": 1.5171433687210083,
"eval_openbookqa_pairs_runtime": 0.6889,
"eval_openbookqa_pairs_samples_per_second": 217.741,
"eval_openbookqa_pairs_steps_per_second": 2.903,
"step": 1611
},
{
"epoch": 0.45,
"eval_msmarco_pairs_loss": 0.5660761594772339,
"eval_msmarco_pairs_runtime": 1.1539,
"eval_msmarco_pairs_samples_per_second": 129.998,
"eval_msmarco_pairs_steps_per_second": 1.733,
"step": 1611
},
{
"epoch": 0.45,
"eval_nq_pairs_loss": 0.5651562213897705,
"eval_nq_pairs_runtime": 2.2681,
"eval_nq_pairs_samples_per_second": 66.133,
"eval_nq_pairs_steps_per_second": 0.882,
"step": 1611
},
{
"epoch": 0.45,
"eval_trivia_pairs_loss": 1.0472347736358643,
"eval_trivia_pairs_runtime": 3.1346,
"eval_trivia_pairs_samples_per_second": 47.854,
"eval_trivia_pairs_steps_per_second": 0.638,
"step": 1611
},
{
"epoch": 0.45,
"eval_quora_pairs_loss": 0.2106107771396637,
"eval_quora_pairs_runtime": 0.3754,
"eval_quora_pairs_samples_per_second": 399.527,
"eval_quora_pairs_steps_per_second": 5.327,
"step": 1611
},
{
"epoch": 0.45,
"eval_gooaq_pairs_loss": 0.5418450832366943,
"eval_gooaq_pairs_runtime": 0.7637,
"eval_gooaq_pairs_samples_per_second": 196.414,
"eval_gooaq_pairs_steps_per_second": 2.619,
"step": 1611
},
{
"epoch": 0.45,
"eval_mrpc_pairs_loss": 0.055642638355493546,
"eval_mrpc_pairs_runtime": 0.1033,
"eval_mrpc_pairs_samples_per_second": 1452.724,
"eval_mrpc_pairs_steps_per_second": 19.37,
"step": 1611
},
{
"epoch": 0.45251396648044695,
"grad_norm": 9.676753044128418,
"learning_rate": 3.4607737630983708e-06,
"loss": 0.8948,
"step": 1620
},
{
"epoch": 0.45754189944134077,
"grad_norm": 7.678776264190674,
"learning_rate": 3.2578674574793844e-06,
"loss": 1.1183,
"step": 1638
},
{
"epoch": 0.46256983240223465,
"grad_norm": 23.967897415161133,
"learning_rate": 3.0690692962414793e-06,
"loss": 1.0605,
"step": 1656
},
{
"epoch": 0.46759776536312847,
"grad_norm": 8.221102714538574,
"learning_rate": 2.894723317891229e-06,
"loss": 0.972,
"step": 1674
},
{
"epoch": 0.47262569832402235,
"grad_norm": 27.992626190185547,
"learning_rate": 2.7351472253640043e-06,
"loss": 0.7679,
"step": 1692
},
{
"epoch": 0.4776536312849162,
"grad_norm": 8.492770195007324,
"learning_rate": 2.590631807088036e-06,
"loss": 0.919,
"step": 1710
},
{
"epoch": 0.48268156424581005,
"grad_norm": 3.369513511657715,
"learning_rate": 2.4614404070935527e-06,
"loss": 0.9308,
"step": 1728
},
{
"epoch": 0.4877094972067039,
"grad_norm": 7.596469879150391,
"learning_rate": 2.347808445132657e-06,
"loss": 1.0704,
"step": 1746
},
{
"epoch": 0.49273743016759775,
"grad_norm": 11.658712387084961,
"learning_rate": 2.249942987684365e-06,
"loss": 1.1985,
"step": 1764
},
{
"epoch": 0.4977653631284916,
"grad_norm": 6.525745868682861,
"learning_rate": 2.168022370626553e-06,
"loss": 1.0564,
"step": 1782
},
{
"epoch": 0.5,
"eval_nli-pairs_loss": 0.9110333919525146,
"eval_nli-pairs_runtime": 3.3929,
"eval_nli-pairs_samples_per_second": 44.21,
"eval_nli-pairs_steps_per_second": 0.589,
"eval_sts-test_pearson_cosine": 0.7946860505353394,
"eval_sts-test_pearson_dot": 0.5621595297878732,
"eval_sts-test_pearson_euclidean": 0.7470211624755623,
"eval_sts-test_pearson_manhattan": 0.7437137423401929,
"eval_sts-test_pearson_max": 0.7946860505353394,
"eval_sts-test_spearman_cosine": 0.8076167403072096,
"eval_sts-test_spearman_dot": 0.5400005132562322,
"eval_sts-test_spearman_euclidean": 0.7362746334826669,
"eval_sts-test_spearman_manhattan": 0.7347256866136587,
"eval_sts-test_spearman_max": 0.8076167403072096,
"step": 1790
},
{
"epoch": 0.5,
"eval_vitaminc-pairs_loss": 4.431720733642578,
"eval_vitaminc-pairs_runtime": 0.8235,
"eval_vitaminc-pairs_samples_per_second": 151.785,
"eval_vitaminc-pairs_steps_per_second": 2.429,
"step": 1790
},
{
"epoch": 0.5,
"eval_qnli-contrastive_loss": 0.26112574338912964,
"eval_qnli-contrastive_runtime": 0.1181,
"eval_qnli-contrastive_samples_per_second": 1270.319,
"eval_qnli-contrastive_steps_per_second": 16.938,
"step": 1790
},
{
"epoch": 0.5,
"eval_scitail-pairs-qa_loss": 0.06175601854920387,
"eval_scitail-pairs-qa_runtime": 0.6691,
"eval_scitail-pairs-qa_samples_per_second": 224.193,
"eval_scitail-pairs-qa_steps_per_second": 2.989,
"step": 1790
},
{
"epoch": 0.5,
"eval_scitail-pairs-pos_loss": 0.2788408100605011,
"eval_scitail-pairs-pos_runtime": 1.7858,
"eval_scitail-pairs-pos_samples_per_second": 83.996,
"eval_scitail-pairs-pos_steps_per_second": 1.12,
"step": 1790
},
{
"epoch": 0.5,
"eval_xsum-pairs_loss": 0.4458795487880707,
"eval_xsum-pairs_runtime": 0.1537,
"eval_xsum-pairs_samples_per_second": 533.68,
"eval_xsum-pairs_steps_per_second": 6.508,
"step": 1790
},
{
"epoch": 0.5,
"eval_compression-pairs_loss": 0.09369674324989319,
"eval_compression-pairs_runtime": 0.1042,
"eval_compression-pairs_samples_per_second": 1439.162,
"eval_compression-pairs_steps_per_second": 19.189,
"step": 1790
},
{
"epoch": 0.5,
"eval_sciq_pairs_loss": 0.16267743706703186,
"eval_sciq_pairs_runtime": 4.8926,
"eval_sciq_pairs_samples_per_second": 30.659,
"eval_sciq_pairs_steps_per_second": 0.409,
"step": 1790
},
{
"epoch": 0.5,
"eval_qasc_pairs_loss": 0.24173419177532196,
"eval_qasc_pairs_runtime": 0.6349,
"eval_qasc_pairs_samples_per_second": 236.24,
"eval_qasc_pairs_steps_per_second": 3.15,
"step": 1790
},
{
"epoch": 0.5,
"eval_openbookqa_pairs_loss": 1.512660026550293,
"eval_openbookqa_pairs_runtime": 0.6859,
"eval_openbookqa_pairs_samples_per_second": 218.688,
"eval_openbookqa_pairs_steps_per_second": 2.916,
"step": 1790
},
{
"epoch": 0.5,
"eval_msmarco_pairs_loss": 0.5678529739379883,
"eval_msmarco_pairs_runtime": 1.1574,
"eval_msmarco_pairs_samples_per_second": 129.597,
"eval_msmarco_pairs_steps_per_second": 1.728,
"step": 1790
},
{
"epoch": 0.5,
"eval_nq_pairs_loss": 0.5613307952880859,
"eval_nq_pairs_runtime": 2.2694,
"eval_nq_pairs_samples_per_second": 66.097,
"eval_nq_pairs_steps_per_second": 0.881,
"step": 1790
},
{
"epoch": 0.5,
"eval_trivia_pairs_loss": 1.050702452659607,
"eval_trivia_pairs_runtime": 3.169,
"eval_trivia_pairs_samples_per_second": 47.334,
"eval_trivia_pairs_steps_per_second": 0.631,
"step": 1790
},
{
"epoch": 0.5,
"eval_quora_pairs_loss": 0.20358483493328094,
"eval_quora_pairs_runtime": 0.3794,
"eval_quora_pairs_samples_per_second": 395.355,
"eval_quora_pairs_steps_per_second": 5.271,
"step": 1790
},
{
"epoch": 0.5,
"eval_gooaq_pairs_loss": 0.5364322066307068,
"eval_gooaq_pairs_runtime": 0.7677,
"eval_gooaq_pairs_samples_per_second": 195.377,
"eval_gooaq_pairs_steps_per_second": 2.605,
"step": 1790
},
{
"epoch": 0.5,
"eval_mrpc_pairs_loss": 0.05436507612466812,
"eval_mrpc_pairs_runtime": 0.1022,
"eval_mrpc_pairs_samples_per_second": 1467.535,
"eval_mrpc_pairs_steps_per_second": 19.567,
"step": 1790
},
{
"epoch": 0.5027932960893855,
"grad_norm": 12.61409854888916,
"learning_rate": 2.102195874262405e-06,
"loss": 0.9128,
"step": 1800
},
{
"epoch": 0.5078212290502794,
"grad_norm": 3.934692621231079,
"learning_rate": 2.0525834512935438e-06,
"loss": 0.9643,
"step": 1818
},
{
"epoch": 0.5128491620111731,
"grad_norm": 0.5022761225700378,
"learning_rate": 2.019275508235552e-06,
"loss": 0.8366,
"step": 1836
},
{
"epoch": 0.517877094972067,
"grad_norm": 1.4803472757339478,
"learning_rate": 2.00233274067419e-06,
"loss": 1.045,
"step": 1854
},
{
"epoch": 0.5229050279329609,
"grad_norm": 3.873300552368164,
"learning_rate": 2.0017860226625237e-06,
"loss": 0.7239,
"step": 1872
},
{
"epoch": 0.5279329608938548,
"grad_norm": 1.603557825088501,
"learning_rate": 2.0176363504605122e-06,
"loss": 0.9027,
"step": 1890
},
{
"epoch": 0.5329608938547487,
"grad_norm": 7.767194747924805,
"learning_rate": 2.0498548407195652e-06,
"loss": 1.0256,
"step": 1908
},
{
"epoch": 0.5379888268156424,
"grad_norm": 0.2919946014881134,
"learning_rate": 2.0983827831153845e-06,
"loss": 0.6783,
"step": 1926
},
{
"epoch": 0.5430167597765363,
"grad_norm": 4.737668514251709,
"learning_rate": 2.163131747333189e-06,
"loss": 1.0205,
"step": 1944
},
{
"epoch": 0.5480446927374302,
"grad_norm": 12.112689018249512,
"learning_rate": 2.24398374421034e-06,
"loss": 1.0501,
"step": 1962
},
{
"epoch": 0.55,
"eval_nli-pairs_loss": 0.9151772260665894,
"eval_nli-pairs_runtime": 3.3845,
"eval_nli-pairs_samples_per_second": 44.32,
"eval_nli-pairs_steps_per_second": 0.591,
"eval_sts-test_pearson_cosine": 0.7950386994851146,
"eval_sts-test_pearson_dot": 0.561799407632955,
"eval_sts-test_pearson_euclidean": 0.7471927000576978,
"eval_sts-test_pearson_manhattan": 0.7439585200721782,
"eval_sts-test_pearson_max": 0.7950386994851146,
"eval_sts-test_spearman_cosine": 0.8079323733036744,
"eval_sts-test_spearman_dot": 0.5396753842956418,
"eval_sts-test_spearman_euclidean": 0.7365066331647045,
"eval_sts-test_spearman_manhattan": 0.7349643316784805,
"eval_sts-test_spearman_max": 0.8079323733036744,
"step": 1969
},
{
"epoch": 0.55,
"eval_vitaminc-pairs_loss": 4.449384689331055,
"eval_vitaminc-pairs_runtime": 0.8196,
"eval_vitaminc-pairs_samples_per_second": 152.52,
"eval_vitaminc-pairs_steps_per_second": 2.44,
"step": 1969
},
{
"epoch": 0.55,
"eval_qnli-contrastive_loss": 0.2493373602628708,
"eval_qnli-contrastive_runtime": 0.1168,
"eval_qnli-contrastive_samples_per_second": 1283.727,
"eval_qnli-contrastive_steps_per_second": 17.116,
"step": 1969
},
{
"epoch": 0.55,
"eval_scitail-pairs-qa_loss": 0.06184713542461395,
"eval_scitail-pairs-qa_runtime": 0.665,
"eval_scitail-pairs-qa_samples_per_second": 225.568,
"eval_scitail-pairs-qa_steps_per_second": 3.008,
"step": 1969
},
{
"epoch": 0.55,
"eval_scitail-pairs-pos_loss": 0.2792353332042694,
"eval_scitail-pairs-pos_runtime": 1.8207,
"eval_scitail-pairs-pos_samples_per_second": 82.388,
"eval_scitail-pairs-pos_steps_per_second": 1.099,
"step": 1969
},
{
"epoch": 0.55,
"eval_xsum-pairs_loss": 0.44985902309417725,
"eval_xsum-pairs_runtime": 0.153,
"eval_xsum-pairs_samples_per_second": 535.847,
"eval_xsum-pairs_steps_per_second": 6.535,
"step": 1969
},
{
"epoch": 0.55,
"eval_compression-pairs_loss": 0.09320634603500366,
"eval_compression-pairs_runtime": 0.1054,
"eval_compression-pairs_samples_per_second": 1423.332,
"eval_compression-pairs_steps_per_second": 18.978,
"step": 1969
},
{
"epoch": 0.55,
"eval_sciq_pairs_loss": 0.16240650415420532,
"eval_sciq_pairs_runtime": 4.8632,
"eval_sciq_pairs_samples_per_second": 30.844,
"eval_sciq_pairs_steps_per_second": 0.411,
"step": 1969
},
{
"epoch": 0.55,
"eval_qasc_pairs_loss": 0.23881591856479645,
"eval_qasc_pairs_runtime": 0.6406,
"eval_qasc_pairs_samples_per_second": 234.166,
"eval_qasc_pairs_steps_per_second": 3.122,
"step": 1969
},
{
"epoch": 0.55,
"eval_openbookqa_pairs_loss": 1.5175997018814087,
"eval_openbookqa_pairs_runtime": 0.6872,
"eval_openbookqa_pairs_samples_per_second": 218.27,
"eval_openbookqa_pairs_steps_per_second": 2.91,
"step": 1969
},
{
"epoch": 0.55,
"eval_msmarco_pairs_loss": 0.5688791275024414,
"eval_msmarco_pairs_runtime": 1.1543,
"eval_msmarco_pairs_samples_per_second": 129.949,
"eval_msmarco_pairs_steps_per_second": 1.733,
"step": 1969
},
{
"epoch": 0.55,
"eval_nq_pairs_loss": 0.559186577796936,
"eval_nq_pairs_runtime": 2.2663,
"eval_nq_pairs_samples_per_second": 66.187,
"eval_nq_pairs_steps_per_second": 0.882,
"step": 1969
},
{
"epoch": 0.55,
"eval_trivia_pairs_loss": 1.0515549182891846,
"eval_trivia_pairs_runtime": 3.1623,
"eval_trivia_pairs_samples_per_second": 47.434,
"eval_trivia_pairs_steps_per_second": 0.632,
"step": 1969
},
{
"epoch": 0.55,
"eval_quora_pairs_loss": 0.19930994510650635,
"eval_quora_pairs_runtime": 0.3755,
"eval_quora_pairs_samples_per_second": 399.469,
"eval_quora_pairs_steps_per_second": 5.326,
"step": 1969
},
{
"epoch": 0.55,
"eval_gooaq_pairs_loss": 0.5293665528297424,
"eval_gooaq_pairs_runtime": 0.7647,
"eval_gooaq_pairs_samples_per_second": 196.156,
"eval_gooaq_pairs_steps_per_second": 2.615,
"step": 1969
},
{
"epoch": 0.55,
"eval_mrpc_pairs_loss": 0.05420010909438133,
"eval_mrpc_pairs_runtime": 0.1031,
"eval_mrpc_pairs_samples_per_second": 1455.466,
"eval_mrpc_pairs_steps_per_second": 19.406,
"step": 1969
},
{
"epoch": 0.553072625698324,
"grad_norm": 7.374291896820068,
"learning_rate": 2.340791440742762e-06,
"loss": 1.0035,
"step": 1980
},
{
"epoch": 0.5581005586592179,
"grad_norm": 10.105379104614258,
"learning_rate": 2.453378428563332e-06,
"loss": 0.8237,
"step": 1998
},
{
"epoch": 0.5631284916201117,
"grad_norm": 0.47507917881011963,
"learning_rate": 2.581539545403014e-06,
"loss": 0.7804,
"step": 2016
},
{
"epoch": 0.5681564245810056,
"grad_norm": 8.750273704528809,
"learning_rate": 2.725041248948953e-06,
"loss": 0.9602,
"step": 2034
},
{
"epoch": 0.5731843575418994,
"grad_norm": 6.673968315124512,
"learning_rate": 2.8836220424182614e-06,
"loss": 0.6921,
"step": 2052
},
{
"epoch": 0.5782122905027933,
"grad_norm": 7.76973295211792,
"learning_rate": 3.056992951071994e-06,
"loss": 1.1073,
"step": 2070
},
{
"epoch": 0.5832402234636872,
"grad_norm": 11.25104808807373,
"learning_rate": 3.2448380488009927e-06,
"loss": 1.0952,
"step": 2088
},
{
"epoch": 0.588268156424581,
"grad_norm": 10.827056884765625,
"learning_rate": 3.446815033824003e-06,
"loss": 0.986,
"step": 2106
},
{
"epoch": 0.5932960893854748,
"grad_norm": 2.704712152481079,
"learning_rate": 3.662555852449e-06,
"loss": 0.9821,
"step": 2124
},
{
"epoch": 0.5983240223463687,
"grad_norm": 12.812453269958496,
"learning_rate": 3.891667369761114e-06,
"loss": 1.4121,
"step": 2142
},
{
"epoch": 0.6,
"eval_nli-pairs_loss": 0.9104093909263611,
"eval_nli-pairs_runtime": 3.4531,
"eval_nli-pairs_samples_per_second": 43.439,
"eval_nli-pairs_steps_per_second": 0.579,
"eval_sts-test_pearson_cosine": 0.7935960995769732,
"eval_sts-test_pearson_dot": 0.5580627112302,
"eval_sts-test_pearson_euclidean": 0.7478812796510826,
"eval_sts-test_pearson_manhattan": 0.7447776866041059,
"eval_sts-test_pearson_max": 0.7935960995769732,
"eval_sts-test_spearman_cosine": 0.8077076901857909,
"eval_sts-test_spearman_dot": 0.5352305509553539,
"eval_sts-test_spearman_euclidean": 0.7369380974127236,
"eval_sts-test_spearman_manhattan": 0.7355092851368631,
"eval_sts-test_spearman_max": 0.8077076901857909,
"step": 2148
},
{
"epoch": 0.6,
"eval_vitaminc-pairs_loss": 4.4026408195495605,
"eval_vitaminc-pairs_runtime": 0.8318,
"eval_vitaminc-pairs_samples_per_second": 150.285,
"eval_vitaminc-pairs_steps_per_second": 2.405,
"step": 2148
},
{
"epoch": 0.6,
"eval_qnli-contrastive_loss": 0.25988292694091797,
"eval_qnli-contrastive_runtime": 0.1194,
"eval_qnli-contrastive_samples_per_second": 1255.948,
"eval_qnli-contrastive_steps_per_second": 16.746,
"step": 2148
},
{
"epoch": 0.6,
"eval_scitail-pairs-qa_loss": 0.06044253334403038,
"eval_scitail-pairs-qa_runtime": 0.674,
"eval_scitail-pairs-qa_samples_per_second": 222.537,
"eval_scitail-pairs-qa_steps_per_second": 2.967,
"step": 2148
},
{
"epoch": 0.6,
"eval_scitail-pairs-pos_loss": 0.28237131237983704,
"eval_scitail-pairs-pos_runtime": 1.8129,
"eval_scitail-pairs-pos_samples_per_second": 82.738,
"eval_scitail-pairs-pos_steps_per_second": 1.103,
"step": 2148
},
{
"epoch": 0.6,
"eval_xsum-pairs_loss": 0.4531600773334503,
"eval_xsum-pairs_runtime": 0.1542,
"eval_xsum-pairs_samples_per_second": 531.931,
"eval_xsum-pairs_steps_per_second": 6.487,
"step": 2148
},
{
"epoch": 0.6,
"eval_compression-pairs_loss": 0.0922146737575531,
"eval_compression-pairs_runtime": 0.1039,
"eval_compression-pairs_samples_per_second": 1444.015,
"eval_compression-pairs_steps_per_second": 19.254,
"step": 2148
},
{
"epoch": 0.6,
"eval_sciq_pairs_loss": 0.16167433559894562,
"eval_sciq_pairs_runtime": 4.8709,
"eval_sciq_pairs_samples_per_second": 30.795,
"eval_sciq_pairs_steps_per_second": 0.411,
"step": 2148
},
{
"epoch": 0.6,
"eval_qasc_pairs_loss": 0.2368052899837494,
"eval_qasc_pairs_runtime": 0.6338,
"eval_qasc_pairs_samples_per_second": 236.682,
"eval_qasc_pairs_steps_per_second": 3.156,
"step": 2148
},
{
"epoch": 0.6,
"eval_openbookqa_pairs_loss": 1.515324354171753,
"eval_openbookqa_pairs_runtime": 0.6973,
"eval_openbookqa_pairs_samples_per_second": 215.117,
"eval_openbookqa_pairs_steps_per_second": 2.868,
"step": 2148
},
{
"epoch": 0.6,
"eval_msmarco_pairs_loss": 0.5706735253334045,
"eval_msmarco_pairs_runtime": 1.2143,
"eval_msmarco_pairs_samples_per_second": 123.528,
"eval_msmarco_pairs_steps_per_second": 1.647,
"step": 2148
},
{
"epoch": 0.6,
"eval_nq_pairs_loss": 0.5522983074188232,
"eval_nq_pairs_runtime": 2.288,
"eval_nq_pairs_samples_per_second": 65.56,
"eval_nq_pairs_steps_per_second": 0.874,
"step": 2148
},
{
"epoch": 0.6,
"eval_trivia_pairs_loss": 1.0437382459640503,
"eval_trivia_pairs_runtime": 3.1388,
"eval_trivia_pairs_samples_per_second": 47.789,
"eval_trivia_pairs_steps_per_second": 0.637,
"step": 2148
},
{
"epoch": 0.6,
"eval_quora_pairs_loss": 0.2811141908168793,
"eval_quora_pairs_runtime": 0.3726,
"eval_quora_pairs_samples_per_second": 402.612,
"eval_quora_pairs_steps_per_second": 5.368,
"step": 2148
},
{
"epoch": 0.6,
"eval_gooaq_pairs_loss": 0.5305163860321045,
"eval_gooaq_pairs_runtime": 0.7714,
"eval_gooaq_pairs_samples_per_second": 194.459,
"eval_gooaq_pairs_steps_per_second": 2.593,
"step": 2148
},
{
"epoch": 0.6,
"eval_mrpc_pairs_loss": 0.05323876813054085,
"eval_mrpc_pairs_runtime": 0.1045,
"eval_mrpc_pairs_samples_per_second": 1435.059,
"eval_mrpc_pairs_steps_per_second": 19.134,
"step": 2148
}
],
"logging_steps": 18,
"max_steps": 7160,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 716,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 82,
"trial_name": null,
"trial_params": null
}