diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,11533 @@ +{ + "best_metric": 47.3115, + "best_model_checkpoint": "/output/checkpoint-7500", + "epoch": 2.0, + "global_step": 8748, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 4.1445, + "step": 1 + }, + { + "epoch": 0.0, + "eval_exact_match": 6.759, + "eval_exact_match_for_answerability_classification": 2.6923, + "eval_exact_match_for_cause_effect_classification": 10.5714, + "eval_exact_match_for_coreference_resolution": 8.5, + "eval_exact_match_for_data_to_text": 0.3632, + "eval_exact_match_for_dialogue_act_recognition": 5.1429, + "eval_exact_match_for_grammar_error_correction": 0.5, + "eval_exact_match_for_keyword_tagging": 11.6, + "eval_exact_match_for_overlap_extraction": 4.5, + "eval_exact_match_for_question_rewriting": 0.3636, + "eval_exact_match_for_task020_mctaco_answerability_classification": 7.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 8.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 1.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 9.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 2.0, + "eval_exact_match_for_task102_commongen_data_to_text": 2.0, + "eval_exact_match_for_task1152_bard_word_analogy": 1.0, + "eval_exact_match_for_task1153_bard_word_analogy": 2.0, + "eval_exact_match_for_task1154_bard_word_analogy": 0.0, + "eval_exact_match_for_task1155_bard_word_analogy": 5.0, + "eval_exact_match_for_task1156_bard_word_analogy": 1.0, + "eval_exact_match_for_task1157_bard_word_analogy": 0.0, + "eval_exact_match_for_task1158_bard_word_analogy": 0.0, + "eval_exact_match_for_task1159_bard_word_analogy": 3.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 52.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 1.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 1.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 42.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 0.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 23.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 0.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 0.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 1.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 1.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 1.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 0.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 48.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 1.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 3.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 9.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 29.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 4.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 0.0, + "eval_exact_match_for_task219_rocstories_title_generation": 3.0, + "eval_exact_match_for_task220_rocstories_title_generation": 10.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 0.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 3.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 10.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 0.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 0.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 0.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 23.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 1.0, + "eval_exact_match_for_task602_wikitext_title_generation": 0.0, + "eval_exact_match_for_task613_liar_keyword_tagging": 4.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 0.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 0.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 37.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 0.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 13.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 53.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 4.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 0.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 0.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 51.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 0.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 34.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 1.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 0.0, + "eval_exact_match_for_textual_entailment": 18.2083, + "eval_exact_match_for_title_generation": 0.9529, + "eval_exact_match_for_word_analogy": 1.5, + "eval_f1": 20.6961, + "eval_f1_for_answerability_classification": 9.7444, + "eval_f1_for_cause_effect_classification": 21.5946, + "eval_f1_for_coreference_resolution": 15.6673, + "eval_f1_for_data_to_text": 40.8188, + "eval_f1_for_dialogue_act_recognition": 10.2801, + "eval_f1_for_grammar_error_correction": 49.3161, + "eval_f1_for_keyword_tagging": 25.134, + "eval_f1_for_overlap_extraction": 23.0893, + "eval_f1_for_question_rewriting": 42.6687, + "eval_f1_for_task020_mctaco_answerability_classification": 11.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 9.4024, + "eval_f1_for_task034_winogrande_question_rewriting": 34.0828, + "eval_f1_for_task035_winogrande_question_rewriting": 32.4251, + "eval_f1_for_task036_qasc_keyword_tagging": 42.0142, + "eval_f1_for_task039_qasc_overlap_extraction": 22.304, + "eval_f1_for_task050_multirc_answerability_classification": 2.6667, + "eval_f1_for_task102_commongen_data_to_text": 55.056, + "eval_f1_for_task1152_bard_word_analogy": 1.0, + "eval_f1_for_task1153_bard_word_analogy": 9.3333, + "eval_f1_for_task1154_bard_word_analogy": 5.6667, + "eval_f1_for_task1155_bard_word_analogy": 6.3333, + "eval_f1_for_task1156_bard_word_analogy": 1.0, + "eval_f1_for_task1157_bard_word_analogy": 20.6667, + "eval_f1_for_task1158_bard_word_analogy": 1.3333, + "eval_f1_for_task1159_bard_word_analogy": 3.6667, + "eval_f1_for_task1161_coda_19_title_generation": 18.2171, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 67.6471, + "eval_f1_for_task121_zest_question_rewriting": 32.8369, + "eval_f1_for_task133_winowhy_coreference_resolution": 52.025, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 9.1302, + "eval_f1_for_task1344_rte_textual_entailment": 9.4652, + "eval_f1_for_task1345_qqp_question_rewriting": 20.7518, + "eval_f1_for_task1356_xlsum_title_generation": 6.6319, + "eval_f1_for_task1358_xlsum_title_generation": 26.6721, + "eval_f1_for_task1385_anli_textual_entailment": 1.1939, + "eval_f1_for_task1386_anli_textual_entailment": 0.4827, + "eval_f1_for_task1387_anli_textual_entailment": 1.2532, + "eval_f1_for_task1388_cb_textual_entailment": 3.4694, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 45.3333, + "eval_f1_for_task1391_winogrande_coreference_resolution": 0.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 23.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 0.1281, + "eval_f1_for_task1407_dart_data_to_text": 33.2431, + "eval_f1_for_task1409_dart_data_to_text": 43.8477, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 30.2476, + "eval_f1_for_task1439_doqa_answerability_classification": 0.6602, + "eval_f1_for_task1442_doqa_answerability_classification": 1.3028, + "eval_f1_for_task1516_imppres_textual_entailment": 21.3333, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 6.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 3.6273, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 2.7193, + "eval_f1_for_task1540_peer_read_title_generation": 10.4998, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 68.3846, + "eval_f1_for_task1562_zest_question_rewriting": 44.5006, + "eval_f1_for_task1586_scifact_title_generation": 20.601, + "eval_f1_for_task1598_nyc_data_to_text": 46.1825, + "eval_f1_for_task1612_sick_textual_entailment": 1.9379, + "eval_f1_for_task1615_sick_textual_entailment": 48.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 64.902, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 0.5115, + "eval_f1_for_task1631_open_pi_data_to_text": 67.4547, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 1.2597, + "eval_f1_for_task1659_billsum_title_generation": 19.6264, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 51.8206, + "eval_f1_for_task1728_web_nlg_data_to_text": 33.6871, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 9.6667, + "eval_f1_for_task200_multinli_textual_entailment": 29.0263, + "eval_f1_for_task201_multinli_textual_entailment": 5.9992, + "eval_f1_for_task202_multinli_textual_entailment": 2.4554, + "eval_f1_for_task219_rocstories_title_generation": 16.9115, + "eval_f1_for_task220_rocstories_title_generation": 12.2941, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 8.9032, + "eval_f1_for_task232_iirc_answerability_classification": 2.0023, + "eval_f1_for_task233_iirc_answerability_classification": 1.3034, + "eval_f1_for_task242_tweetqa_answerability_classification": 20.3333, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 17.65, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 23.8746, + "eval_f1_for_task288_gigaword_title_generation": 28.0454, + "eval_f1_for_task290_tellmewhy_answerability_classification": 50.9048, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 1.292, + "eval_f1_for_task329_gap_coreference_resolution": 1.9594, + "eval_f1_for_task330_gap_coreference_resolution": 2.7157, + "eval_f1_for_task349_squad2.0_answerability_classification": 1.6454, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 24.1317, + "eval_f1_for_task391_cod3s_cause_effect_classification": 0.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 1.4, + "eval_f1_for_task393_cod3s_cause_effect_classification": 23.5122, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 1.6208, + "eval_f1_for_task402_grailqa_question_rewriting": 20.7921, + "eval_f1_for_task418_persent_title_generation": 8.6018, + "eval_f1_for_task442_com_qa_question_rewriting": 54.8157, + "eval_f1_for_task500_scruples_title_generation": 7.1504, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 15.9795, + "eval_f1_for_task520_aquamuse_answerability_classification": 24.1835, + "eval_f1_for_task569_recipe_nlg_title_generation": 33.1762, + "eval_f1_for_task602_wikitext_title_generation": 1.9146, + "eval_f1_for_task613_liar_keyword_tagging": 7.3524, + "eval_f1_for_task614_glucose_cause_effect_classification": 27.2498, + "eval_f1_for_task619_ohsumed_title_generation": 12.6935, + "eval_f1_for_task620_ohsumed_keyword_tagging": 2.6743, + "eval_f1_for_task623_ohsumed_keyword_tagging": 1.1318, + "eval_f1_for_task640_e_snli_textual_entailment": 37.0, + "eval_f1_for_task641_e_snli_textual_entailment": 0.0, + "eval_f1_for_task642_e_snli_textual_entailment": 19.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 72.4976, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 30.4875, + "eval_f1_for_task670_ambigqa_question_rewriting": 50.3552, + "eval_f1_for_task671_ambigqa_question_rewriting": 46.2459, + "eval_f1_for_task677_ollie_data_to_text": 26.5577, + "eval_f1_for_task738_perspectrum_textual_entailment": 2.6667, + "eval_f1_for_task743_eurlex_title_generation": 17.9505, + "eval_f1_for_task760_msr_sqa_data_to_text": 4.6424, + "eval_f1_for_task769_qed_title_generation": 8.8084, + "eval_f1_for_task827_copa_cause_effect_classification": 51.0, + "eval_f1_for_task828_copa_cause_effect_classification": 24.6667, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 34.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 1.3542, + "eval_f1_for_task890_gwsd_textual_entailment": 8.2184, + "eval_f1_for_task891_gap_coreference_resolution": 1.9993, + "eval_f1_for_task892_gap_coreference_resolution": 1.366, + "eval_f1_for_task893_gap_coreference_resolution": 1.6708, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_f1_for_task957_e2e_data_to_text": 29.9275, + "eval_f1_for_task970_sherliic_textual_entailment": 0.0, + "eval_f1_for_textual_entailment": 20.632, + "eval_f1_for_title_generation": 15.3923, + "eval_f1_for_word_analogy": 6.125, + "eval_gen_len": 52.8816, + "eval_global_step": 1, + "eval_loss": 6.063381671905518, + "eval_rouge1": 22.1453, + "eval_rouge1_for_answerability_classification": 9.733, + "eval_rouge1_for_cause_effect_classification": 22.5895, + "eval_rouge1_for_coreference_resolution": 15.7743, + "eval_rouge1_for_data_to_text": 43.5979, + "eval_rouge1_for_dialogue_act_recognition": 10.7681, + "eval_rouge1_for_grammar_error_correction": 54.0388, + "eval_rouge1_for_keyword_tagging": 27.8351, + "eval_rouge1_for_overlap_extraction": 26.8568, + "eval_rouge1_for_question_rewriting": 44.4682, + "eval_rouge1_for_task020_mctaco_answerability_classification": 11.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 10.3667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 34.2286, + "eval_rouge1_for_task035_winogrande_question_rewriting": 32.9653, + "eval_rouge1_for_task036_qasc_keyword_tagging": 48.9071, + "eval_rouge1_for_task039_qasc_overlap_extraction": 29.3707, + "eval_rouge1_for_task050_multirc_answerability_classification": 2.6667, + "eval_rouge1_for_task102_commongen_data_to_text": 65.0564, + "eval_rouge1_for_task1152_bard_word_analogy": 1.0, + "eval_rouge1_for_task1153_bard_word_analogy": 9.3333, + "eval_rouge1_for_task1154_bard_word_analogy": 5.6667, + "eval_rouge1_for_task1155_bard_word_analogy": 6.3333, + "eval_rouge1_for_task1156_bard_word_analogy": 1.0, + "eval_rouge1_for_task1157_bard_word_analogy": 20.6667, + "eval_rouge1_for_task1158_bard_word_analogy": 1.3333, + "eval_rouge1_for_task1159_bard_word_analogy": 3.6667, + "eval_rouge1_for_task1161_coda_19_title_generation": 20.2027, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 68.8469, + "eval_rouge1_for_task121_zest_question_rewriting": 34.9537, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 52.025, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 9.6593, + "eval_rouge1_for_task1344_rte_textual_entailment": 9.4017, + "eval_rouge1_for_task1345_qqp_question_rewriting": 24.0023, + "eval_rouge1_for_task1356_xlsum_title_generation": 7.8061, + "eval_rouge1_for_task1358_xlsum_title_generation": 30.7978, + "eval_rouge1_for_task1385_anli_textual_entailment": 1.5382, + "eval_rouge1_for_task1386_anli_textual_entailment": 0.4717, + "eval_rouge1_for_task1387_anli_textual_entailment": 1.2068, + "eval_rouge1_for_task1388_cb_textual_entailment": 3.3834, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 45.3333, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 0.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 23.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.7019, + "eval_rouge1_for_task1407_dart_data_to_text": 33.7284, + "eval_rouge1_for_task1409_dart_data_to_text": 44.9915, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.7449, + "eval_rouge1_for_task1439_doqa_answerability_classification": 0.6385, + "eval_rouge1_for_task1442_doqa_answerability_classification": 1.269, + "eval_rouge1_for_task1516_imppres_textual_entailment": 21.3333, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 6.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 3.5134, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 2.5248, + "eval_rouge1_for_task1540_peer_read_title_generation": 12.1473, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 71.3327, + "eval_rouge1_for_task1562_zest_question_rewriting": 47.6589, + "eval_rouge1_for_task1586_scifact_title_generation": 22.977, + "eval_rouge1_for_task1598_nyc_data_to_text": 46.8487, + "eval_rouge1_for_task1612_sick_textual_entailment": 1.8011, + "eval_rouge1_for_task1615_sick_textual_entailment": 81.8667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 66.5799, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 0.5085, + "eval_rouge1_for_task1631_open_pi_data_to_text": 68.4446, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 1.2597, + "eval_rouge1_for_task1659_billsum_title_generation": 20.8671, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 51.8206, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 36.9664, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 9.6667, + "eval_rouge1_for_task200_multinli_textual_entailment": 31.6894, + "eval_rouge1_for_task201_multinli_textual_entailment": 9.8986, + "eval_rouge1_for_task202_multinli_textual_entailment": 4.9734, + "eval_rouge1_for_task219_rocstories_title_generation": 21.5798, + "eval_rouge1_for_task220_rocstories_title_generation": 12.2914, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 8.8824, + "eval_rouge1_for_task232_iirc_answerability_classification": 1.9617, + "eval_rouge1_for_task233_iirc_answerability_classification": 1.2835, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 20.3333, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 17.8167, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 24.343, + "eval_rouge1_for_task288_gigaword_title_generation": 31.069, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 50.9048, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 1.6112, + "eval_rouge1_for_task329_gap_coreference_resolution": 1.8623, + "eval_rouge1_for_task330_gap_coreference_resolution": 2.6787, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 1.6418, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 24.1144, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 0.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 1.4, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 24.0348, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 1.6542, + "eval_rouge1_for_task402_grailqa_question_rewriting": 21.6476, + "eval_rouge1_for_task418_persent_title_generation": 9.9142, + "eval_rouge1_for_task442_com_qa_question_rewriting": 58.52, + "eval_rouge1_for_task500_scruples_title_generation": 7.5939, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 16.1288, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 24.1786, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 34.7639, + "eval_rouge1_for_task602_wikitext_title_generation": 2.0117, + "eval_rouge1_for_task613_liar_keyword_tagging": 13.1576, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 32.6919, + "eval_rouge1_for_task619_ohsumed_title_generation": 13.5775, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 3.0262, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 1.0867, + "eval_rouge1_for_task640_e_snli_textual_entailment": 37.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 0.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 19.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 72.9976, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 30.6615, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 52.0351, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 47.712, + "eval_rouge1_for_task677_ollie_data_to_text": 29.1239, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 20.6667, + "eval_rouge1_for_task743_eurlex_title_generation": 19.1024, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 4.9621, + "eval_rouge1_for_task769_qed_title_generation": 9.1146, + "eval_rouge1_for_task827_copa_cause_effect_classification": 51.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 25.6667, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 34.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 2.5223, + "eval_rouge1_for_task890_gwsd_textual_entailment": 8.2184, + "eval_rouge1_for_task891_gap_coreference_resolution": 2.0099, + "eval_rouge1_for_task892_gap_coreference_resolution": 1.3387, + "eval_rouge1_for_task893_gap_coreference_resolution": 1.6612, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_rouge1_for_task957_e2e_data_to_text": 33.6688, + "eval_rouge1_for_task970_sherliic_textual_entailment": 0.0, + "eval_rouge1_for_textual_entailment": 23.1715, + "eval_rouge1_for_title_generation": 16.888, + "eval_rouge1_for_word_analogy": 6.125, + "eval_rougeL": 21.0063, + "eval_rougeL_for_answerability_classification": 9.733, + "eval_rougeL_for_cause_effect_classification": 21.9936, + "eval_rougeL_for_coreference_resolution": 15.4404, + "eval_rougeL_for_data_to_text": 38.2271, + "eval_rougeL_for_dialogue_act_recognition": 10.7648, + "eval_rougeL_for_grammar_error_correction": 53.0946, + "eval_rougeL_for_keyword_tagging": 26.8458, + "eval_rougeL_for_overlap_extraction": 25.7016, + "eval_rougeL_for_question_rewriting": 41.0279, + "eval_rougeL_for_task020_mctaco_answerability_classification": 11.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 10.3667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 33.0134, + "eval_rougeL_for_task035_winogrande_question_rewriting": 30.8442, + "eval_rougeL_for_task036_qasc_keyword_tagging": 46.2051, + "eval_rougeL_for_task039_qasc_overlap_extraction": 29.3707, + "eval_rougeL_for_task050_multirc_answerability_classification": 2.6667, + "eval_rougeL_for_task102_commongen_data_to_text": 58.4385, + "eval_rougeL_for_task1152_bard_word_analogy": 1.0, + "eval_rougeL_for_task1153_bard_word_analogy": 9.3333, + "eval_rougeL_for_task1154_bard_word_analogy": 5.6667, + "eval_rougeL_for_task1155_bard_word_analogy": 6.3333, + "eval_rougeL_for_task1156_bard_word_analogy": 1.0, + "eval_rougeL_for_task1157_bard_word_analogy": 20.6667, + "eval_rougeL_for_task1158_bard_word_analogy": 1.3333, + "eval_rougeL_for_task1159_bard_word_analogy": 3.6667, + "eval_rougeL_for_task1161_coda_19_title_generation": 17.0614, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 66.6323, + "eval_rougeL_for_task121_zest_question_rewriting": 31.5967, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 52.025, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 9.2012, + "eval_rougeL_for_task1344_rte_textual_entailment": 9.4017, + "eval_rougeL_for_task1345_qqp_question_rewriting": 21.0229, + "eval_rougeL_for_task1356_xlsum_title_generation": 6.6727, + "eval_rougeL_for_task1358_xlsum_title_generation": 25.8162, + "eval_rougeL_for_task1385_anli_textual_entailment": 1.5382, + "eval_rougeL_for_task1386_anli_textual_entailment": 0.4717, + "eval_rougeL_for_task1387_anli_textual_entailment": 1.2068, + "eval_rougeL_for_task1388_cb_textual_entailment": 3.3834, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 45.3333, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 0.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 23.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.6786, + "eval_rougeL_for_task1407_dart_data_to_text": 30.1324, + "eval_rougeL_for_task1409_dart_data_to_text": 40.0496, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.7421, + "eval_rougeL_for_task1439_doqa_answerability_classification": 0.6385, + "eval_rougeL_for_task1442_doqa_answerability_classification": 1.269, + "eval_rougeL_for_task1516_imppres_textual_entailment": 21.3333, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 6.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 3.5134, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 2.5248, + "eval_rougeL_for_task1540_peer_read_title_generation": 10.3892, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 70.4471, + "eval_rougeL_for_task1562_zest_question_rewriting": 41.6169, + "eval_rougeL_for_task1586_scifact_title_generation": 18.2123, + "eval_rougeL_for_task1598_nyc_data_to_text": 38.4076, + "eval_rougeL_for_task1612_sick_textual_entailment": 1.8011, + "eval_rougeL_for_task1615_sick_textual_entailment": 81.8667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 64.4067, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 0.5085, + "eval_rougeL_for_task1631_open_pi_data_to_text": 64.847, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 1.2597, + "eval_rougeL_for_task1659_billsum_title_generation": 17.3576, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 47.4722, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 32.1914, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 9.6667, + "eval_rougeL_for_task200_multinli_textual_entailment": 31.6894, + "eval_rougeL_for_task201_multinli_textual_entailment": 9.8986, + "eval_rougeL_for_task202_multinli_textual_entailment": 4.9734, + "eval_rougeL_for_task219_rocstories_title_generation": 20.9298, + "eval_rougeL_for_task220_rocstories_title_generation": 12.2914, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 8.8824, + "eval_rougeL_for_task232_iirc_answerability_classification": 1.9617, + "eval_rougeL_for_task233_iirc_answerability_classification": 1.2835, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 20.3333, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 17.8167, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 22.0325, + "eval_rougeL_for_task288_gigaword_title_generation": 26.6723, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 50.9048, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 1.6112, + "eval_rougeL_for_task329_gap_coreference_resolution": 1.8623, + "eval_rougeL_for_task330_gap_coreference_resolution": 2.6787, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 1.6418, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 24.1144, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 0.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 1.4, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 22.9281, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 1.6542, + "eval_rougeL_for_task402_grailqa_question_rewriting": 17.7913, + "eval_rougeL_for_task418_persent_title_generation": 8.2143, + "eval_rougeL_for_task442_com_qa_question_rewriting": 51.8922, + "eval_rougeL_for_task500_scruples_title_generation": 6.282, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 15.3911, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 24.1786, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 33.3312, + "eval_rougeL_for_task602_wikitext_title_generation": 2.0117, + "eval_rougeL_for_task613_liar_keyword_tagging": 13.1576, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 29.6272, + "eval_rougeL_for_task619_ohsumed_title_generation": 11.0114, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 2.9822, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 1.0867, + "eval_rougeL_for_task640_e_snli_textual_entailment": 37.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 0.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 19.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 70.7976, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 30.3348, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 48.1776, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 44.313, + "eval_rougeL_for_task677_ollie_data_to_text": 24.3271, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 20.6667, + "eval_rougeL_for_task743_eurlex_title_generation": 16.4606, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 4.0385, + "eval_rougeL_for_task769_qed_title_generation": 8.8578, + "eval_rougeL_for_task827_copa_cause_effect_classification": 51.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 25.6667, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 34.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 2.5223, + "eval_rougeL_for_task890_gwsd_textual_entailment": 8.2184, + "eval_rougeL_for_task891_gap_coreference_resolution": 2.0099, + "eval_rougeL_for_task892_gap_coreference_resolution": 1.3387, + "eval_rougeL_for_task893_gap_coreference_resolution": 1.6612, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_rougeL_for_task957_e2e_data_to_text": 26.3123, + "eval_rougeL_for_task970_sherliic_textual_entailment": 0.0, + "eval_rougeL_for_textual_entailment": 23.1715, + "eval_rougeL_for_title_generation": 14.9015, + "eval_rougeL_for_word_analogy": 6.125, + "eval_runtime": 1806.0079, + "eval_samples_per_second": 6.595, + "eval_steps_per_second": 0.207, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 2.3581, + "step": 50 + }, + { + "epoch": 0.01, + "eval_exact_match": 25.9194, + "eval_exact_match_for_answerability_classification": 49.0769, + "eval_exact_match_for_cause_effect_classification": 35.5714, + "eval_exact_match_for_coreference_resolution": 35.2143, + "eval_exact_match_for_data_to_text": 3.2688, + "eval_exact_match_for_dialogue_act_recognition": 36.1429, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 36.0, + "eval_exact_match_for_overlap_extraction": 10.5, + "eval_exact_match_for_question_rewriting": 1.5455, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 31.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 21.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 21.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 13.0, + "eval_exact_match_for_task1153_bard_word_analogy": 5.0, + "eval_exact_match_for_task1154_bard_word_analogy": 24.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 18.0, + "eval_exact_match_for_task1157_bard_word_analogy": 18.0, + "eval_exact_match_for_task1158_bard_word_analogy": 16.0, + "eval_exact_match_for_task1159_bard_word_analogy": 4.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 8.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 40.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 28.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 30.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 42.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 48.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 34.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 23.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 24.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 3.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 9.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 39.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 1.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 53.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 55.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 52.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 15.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 7.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 4.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 3.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 16.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 2.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 11.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 82.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 8.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 59.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 42.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 61.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 31.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 31.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 48.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 40.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 38.0, + "eval_exact_match_for_title_generation": 7.5673, + "eval_exact_match_for_word_analogy": 18.5, + "eval_f1": 42.4244, + "eval_f1_for_answerability_classification": 51.641, + "eval_f1_for_cause_effect_classification": 52.5064, + "eval_f1_for_coreference_resolution": 43.5617, + "eval_f1_for_data_to_text": 48.9203, + "eval_f1_for_dialogue_act_recognition": 42.5714, + "eval_f1_for_grammar_error_correction": 55.1737, + "eval_f1_for_keyword_tagging": 49.4682, + "eval_f1_for_overlap_extraction": 38.2936, + "eval_f1_for_question_rewriting": 63.9768, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 32.3333, + "eval_f1_for_task034_winogrande_question_rewriting": 66.9413, + "eval_f1_for_task035_winogrande_question_rewriting": 84.6764, + "eval_f1_for_task036_qasc_keyword_tagging": 60.7338, + "eval_f1_for_task039_qasc_overlap_extraction": 30.7722, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 53.8292, + "eval_f1_for_task1152_bard_word_analogy": 13.0, + "eval_f1_for_task1153_bard_word_analogy": 13.0, + "eval_f1_for_task1154_bard_word_analogy": 24.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 18.0, + "eval_f1_for_task1157_bard_word_analogy": 18.0, + "eval_f1_for_task1158_bard_word_analogy": 16.0, + "eval_f1_for_task1159_bard_word_analogy": 4.0, + "eval_f1_for_task1161_coda_19_title_generation": 24.5389, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 75.3201, + "eval_f1_for_task121_zest_question_rewriting": 45.0136, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 10.2623, + "eval_f1_for_task1344_rte_textual_entailment": 40.0, + "eval_f1_for_task1345_qqp_question_rewriting": 38.958, + "eval_f1_for_task1356_xlsum_title_generation": 8.5423, + "eval_f1_for_task1358_xlsum_title_generation": 27.2269, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 28.0, + "eval_f1_for_task1387_anli_textual_entailment": 30.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_f1_for_task1407_dart_data_to_text": 31.9902, + "eval_f1_for_task1409_dart_data_to_text": 49.845, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.1408, + "eval_f1_for_task1439_doqa_answerability_classification": 42.0, + "eval_f1_for_task1442_doqa_answerability_classification": 48.0, + "eval_f1_for_task1516_imppres_textual_entailment": 0.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 34.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1540_peer_read_title_generation": 24.7422, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 81.2066, + "eval_f1_for_task1562_zest_question_rewriting": 52.8238, + "eval_f1_for_task1586_scifact_title_generation": 29.695, + "eval_f1_for_task1598_nyc_data_to_text": 45.2512, + "eval_f1_for_task1612_sick_textual_entailment": 34.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 72.9778, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 87.5834, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 32.8335, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 68.7151, + "eval_f1_for_task1728_web_nlg_data_to_text": 53.1339, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 33.0, + "eval_f1_for_task201_multinli_textual_entailment": 33.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 24.2738, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 50.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 48.1619, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 45.8149, + "eval_f1_for_task288_gigaword_title_generation": 26.1811, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 44.3667, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 57.8, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 70.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 85.0, + "eval_f1_for_task392_cod3s_cause_effect_classification": 84.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 29.7995, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 17.0, + "eval_f1_for_task402_grailqa_question_rewriting": 76.4232, + "eval_f1_for_task418_persent_title_generation": 20.2928, + "eval_f1_for_task442_com_qa_question_rewriting": 62.0679, + "eval_f1_for_task500_scruples_title_generation": 12.1941, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 37.2018, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 35.0653, + "eval_f1_for_task602_wikitext_title_generation": 7.6846, + "eval_f1_for_task613_liar_keyword_tagging": 17.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 26.7457, + "eval_f1_for_task619_ohsumed_title_generation": 32.8166, + "eval_f1_for_task620_ohsumed_keyword_tagging": 28.9929, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 90.281, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 9.7094, + "eval_f1_for_task670_ambigqa_question_rewriting": 69.6066, + "eval_f1_for_task671_ambigqa_question_rewriting": 58.936, + "eval_f1_for_task677_ollie_data_to_text": 27.4151, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 26.786, + "eval_f1_for_task760_msr_sqa_data_to_text": 2.9375, + "eval_f1_for_task769_qed_title_generation": 66.7952, + "eval_f1_for_task827_copa_cause_effect_classification": 42.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 61.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 31.0, + "eval_f1_for_task890_gwsd_textual_entailment": 31.0, + "eval_f1_for_task891_gap_coreference_resolution": 57.7778, + "eval_f1_for_task892_gap_coreference_resolution": 40.0, + "eval_f1_for_task893_gap_coreference_resolution": 51.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 54.2701, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 38.0, + "eval_f1_for_title_generation": 27.7973, + "eval_f1_for_word_analogy": 19.5, + "eval_gen_len": 10.3451, + "eval_global_step": 50, + "eval_loss": 1.360949993133545, + "eval_rouge1": 44.1153, + "eval_rouge1_for_answerability_classification": 51.641, + "eval_rouge1_for_cause_effect_classification": 53.4022, + "eval_rouge1_for_coreference_resolution": 44.2839, + "eval_rouge1_for_data_to_text": 51.8516, + "eval_rouge1_for_dialogue_act_recognition": 46.3333, + "eval_rouge1_for_grammar_error_correction": 60.0225, + "eval_rouge1_for_keyword_tagging": 52.702, + "eval_rouge1_for_overlap_extraction": 41.6067, + "eval_rouge1_for_question_rewriting": 65.9852, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 35.3333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 66.967, + "eval_rouge1_for_task035_winogrande_question_rewriting": 85.5121, + "eval_rouge1_for_task036_qasc_keyword_tagging": 63.3362, + "eval_rouge1_for_task039_qasc_overlap_extraction": 36.1722, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 67.3601, + "eval_rouge1_for_task1152_bard_word_analogy": 13.0, + "eval_rouge1_for_task1153_bard_word_analogy": 13.0, + "eval_rouge1_for_task1154_bard_word_analogy": 24.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 18.0, + "eval_rouge1_for_task1157_bard_word_analogy": 18.0, + "eval_rouge1_for_task1158_bard_word_analogy": 16.0, + "eval_rouge1_for_task1159_bard_word_analogy": 4.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 27.8956, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 76.1082, + "eval_rouge1_for_task121_zest_question_rewriting": 47.9556, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 11.1636, + "eval_rouge1_for_task1344_rte_textual_entailment": 40.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.5635, + "eval_rouge1_for_task1356_xlsum_title_generation": 10.1226, + "eval_rouge1_for_task1358_xlsum_title_generation": 32.264, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 28.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 30.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 6.0, + "eval_rouge1_for_task1407_dart_data_to_text": 32.3116, + "eval_rouge1_for_task1409_dart_data_to_text": 50.739, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.428, + "eval_rouge1_for_task1439_doqa_answerability_classification": 42.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 48.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 0.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 34.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 27.2858, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 84.617, + "eval_rouge1_for_task1562_zest_question_rewriting": 55.7619, + "eval_rouge1_for_task1586_scifact_title_generation": 32.7796, + "eval_rouge1_for_task1598_nyc_data_to_text": 48.1784, + "eval_rouge1_for_task1612_sick_textual_entailment": 34.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 73.7471, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 88.1541, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 33.8919, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 68.7151, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 55.7415, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 30.723, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 49.3405, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 47.0413, + "eval_rouge1_for_task288_gigaword_title_generation": 28.7637, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 45.3, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 57.8, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 70.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 85.0, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 84.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 30.1273, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 21.6667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 78.8357, + "eval_rouge1_for_task418_persent_title_generation": 23.2101, + "eval_rouge1_for_task442_com_qa_question_rewriting": 66.5182, + "eval_rouge1_for_task500_scruples_title_generation": 13.5159, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 37.7533, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 35.3546, + "eval_rouge1_for_task602_wikitext_title_generation": 7.8507, + "eval_rouge1_for_task613_liar_keyword_tagging": 28.3333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 32.688, + "eval_rouge1_for_task619_ohsumed_title_generation": 35.0547, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 31.0595, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 90.781, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 10.0857, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 71.3133, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 60.5547, + "eval_rouge1_for_task677_ollie_data_to_text": 30.8066, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 27.7725, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.2234, + "eval_rouge1_for_task769_qed_title_generation": 66.9667, + "eval_rouge1_for_task827_copa_cause_effect_classification": 42.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 61.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 54.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 31.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 57.7333, + "eval_rouge1_for_task892_gap_coreference_resolution": 40.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 51.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 54.1647, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 39.8611, + "eval_rouge1_for_title_generation": 29.7709, + "eval_rouge1_for_word_analogy": 19.5, + "eval_rougeL": 42.805, + "eval_rougeL_for_answerability_classification": 51.641, + "eval_rougeL_for_cause_effect_classification": 53.0372, + "eval_rougeL_for_coreference_resolution": 44.2839, + "eval_rougeL_for_data_to_text": 44.8475, + "eval_rougeL_for_dialogue_act_recognition": 46.3333, + "eval_rougeL_for_grammar_error_correction": 59.1998, + "eval_rougeL_for_keyword_tagging": 51.8553, + "eval_rougeL_for_overlap_extraction": 40.4496, + "eval_rougeL_for_question_rewriting": 61.8144, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 35.3333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 65.9595, + "eval_rougeL_for_task035_winogrande_question_rewriting": 84.6904, + "eval_rougeL_for_task036_qasc_keyword_tagging": 60.3362, + "eval_rougeL_for_task039_qasc_overlap_extraction": 36.1722, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 58.1486, + "eval_rougeL_for_task1152_bard_word_analogy": 13.0, + "eval_rougeL_for_task1153_bard_word_analogy": 13.0, + "eval_rougeL_for_task1154_bard_word_analogy": 24.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 18.0, + "eval_rougeL_for_task1157_bard_word_analogy": 18.0, + "eval_rougeL_for_task1158_bard_word_analogy": 16.0, + "eval_rougeL_for_task1159_bard_word_analogy": 4.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 22.598, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 74.7264, + "eval_rougeL_for_task121_zest_question_rewriting": 41.7684, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 10.7625, + "eval_rougeL_for_task1344_rte_textual_entailment": 40.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.5164, + "eval_rougeL_for_task1356_xlsum_title_generation": 8.5741, + "eval_rougeL_for_task1358_xlsum_title_generation": 27.9907, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 28.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 30.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 6.0, + "eval_rougeL_for_task1407_dart_data_to_text": 29.3652, + "eval_rougeL_for_task1409_dart_data_to_text": 44.192, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 34.7897, + "eval_rougeL_for_task1439_doqa_answerability_classification": 42.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 48.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 0.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 34.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 25.9154, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 83.61, + "eval_rougeL_for_task1562_zest_question_rewriting": 47.9731, + "eval_rougeL_for_task1586_scifact_title_generation": 25.9544, + "eval_rougeL_for_task1598_nyc_data_to_text": 35.1319, + "eval_rougeL_for_task1612_sick_textual_entailment": 34.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 71.2728, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 84.0839, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 29.5277, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 68.7151, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 49.8252, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 30.223, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 49.3405, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 44.7269, + "eval_rougeL_for_task288_gigaword_title_generation": 24.6086, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 45.3, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 57.8, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 70.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 85.0, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 84.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 28.7275, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 21.6667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 65.9428, + "eval_rougeL_for_task418_persent_title_generation": 20.0925, + "eval_rougeL_for_task442_com_qa_question_rewriting": 60.046, + "eval_rougeL_for_task500_scruples_title_generation": 12.2942, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 37.3191, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 34.9508, + "eval_rougeL_for_task602_wikitext_title_generation": 7.7674, + "eval_rougeL_for_task613_liar_keyword_tagging": 28.3333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 31.5332, + "eval_rougeL_for_task619_ohsumed_title_generation": 31.5362, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 29.8262, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 90.781, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 10.0857, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 69.3529, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 58.7099, + "eval_rougeL_for_task677_ollie_data_to_text": 25.0177, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 23.6915, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.1569, + "eval_rougeL_for_task769_qed_title_generation": 66.9667, + "eval_rougeL_for_task827_copa_cause_effect_classification": 42.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 61.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 54.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 31.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 57.7333, + "eval_rougeL_for_task892_gap_coreference_resolution": 40.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 51.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 43.855, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 39.8611, + "eval_rougeL_for_title_generation": 27.44, + "eval_rougeL_for_word_analogy": 19.5, + "eval_runtime": 705.0451, + "eval_samples_per_second": 16.893, + "eval_steps_per_second": 0.529, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 1.6822, + "step": 100 + }, + { + "epoch": 0.02, + "eval_exact_match": 25.6675, + "eval_exact_match_for_answerability_classification": 49.5385, + "eval_exact_match_for_cause_effect_classification": 35.7143, + "eval_exact_match_for_coreference_resolution": 31.9286, + "eval_exact_match_for_data_to_text": 3.0266, + "eval_exact_match_for_dialogue_act_recognition": 37.2857, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 36.2, + "eval_exact_match_for_overlap_extraction": 8.5, + "eval_exact_match_for_question_rewriting": 1.8182, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 38.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 20.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 16.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 51.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 14.0, + "eval_exact_match_for_task1153_bard_word_analogy": 3.0, + "eval_exact_match_for_task1154_bard_word_analogy": 17.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 21.0, + "eval_exact_match_for_task1157_bard_word_analogy": 17.0, + "eval_exact_match_for_task1158_bard_word_analogy": 9.0, + "eval_exact_match_for_task1159_bard_word_analogy": 2.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 8.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 49.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 51.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 6.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 2.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 45.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 27.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 28.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 19.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 21.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 4.0, + "eval_exact_match_for_task190_snli_textual_entailment": 52.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 25.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task219_rocstories_title_generation": 7.0, + "eval_exact_match_for_task220_rocstories_title_generation": 46.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 33.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 1.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 49.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 1.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 49.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 16.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 9.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 3.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 12.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 14.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 45.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 85.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 6.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 48.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 49.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 37.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 38.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 44.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 46.0, + "eval_exact_match_for_textual_entailment": 39.5, + "eval_exact_match_for_title_generation": 6.5583, + "eval_exact_match_for_word_analogy": 16.625, + "eval_f1": 41.9404, + "eval_f1_for_answerability_classification": 52.1538, + "eval_f1_for_cause_effect_classification": 53.1004, + "eval_f1_for_coreference_resolution": 37.9777, + "eval_f1_for_data_to_text": 47.7033, + "eval_f1_for_dialogue_act_recognition": 40.8571, + "eval_f1_for_grammar_error_correction": 55.8573, + "eval_f1_for_keyword_tagging": 50.3222, + "eval_f1_for_overlap_extraction": 34.2999, + "eval_f1_for_question_rewriting": 65.6756, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 39.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 70.949, + "eval_f1_for_task035_winogrande_question_rewriting": 84.3797, + "eval_f1_for_task036_qasc_keyword_tagging": 61.0967, + "eval_f1_for_task039_qasc_overlap_extraction": 26.1937, + "eval_f1_for_task050_multirc_answerability_classification": 51.0, + "eval_f1_for_task102_commongen_data_to_text": 51.7845, + "eval_f1_for_task1152_bard_word_analogy": 14.0, + "eval_f1_for_task1153_bard_word_analogy": 13.0, + "eval_f1_for_task1154_bard_word_analogy": 17.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 21.0, + "eval_f1_for_task1157_bard_word_analogy": 17.0, + "eval_f1_for_task1158_bard_word_analogy": 9.0, + "eval_f1_for_task1159_bard_word_analogy": 2.0, + "eval_f1_for_task1161_coda_19_title_generation": 23.8519, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 74.2126, + "eval_f1_for_task121_zest_question_rewriting": 46.5173, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 12.9414, + "eval_f1_for_task1344_rte_textual_entailment": 49.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.198, + "eval_f1_for_task1356_xlsum_title_generation": 8.762, + "eval_f1_for_task1358_xlsum_title_generation": 28.8634, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 51.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 6.0, + "eval_f1_for_task1407_dart_data_to_text": 31.9488, + "eval_f1_for_task1409_dart_data_to_text": 47.1149, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.8359, + "eval_f1_for_task1439_doqa_answerability_classification": 45.0, + "eval_f1_for_task1442_doqa_answerability_classification": 50.0, + "eval_f1_for_task1516_imppres_textual_entailment": 27.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 28.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 27.4022, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 81.8786, + "eval_f1_for_task1562_zest_question_rewriting": 50.4976, + "eval_f1_for_task1586_scifact_title_generation": 28.635, + "eval_f1_for_task1598_nyc_data_to_text": 47.4627, + "eval_f1_for_task1612_sick_textual_entailment": 34.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 77.1436, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_f1_for_task1631_open_pi_data_to_text": 84.683, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 31.2204, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 66.1393, + "eval_f1_for_task1728_web_nlg_data_to_text": 52.9716, + "eval_f1_for_task190_snli_textual_entailment": 52.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 25.0, + "eval_f1_for_task201_multinli_textual_entailment": 33.0, + "eval_f1_for_task202_multinli_textual_entailment": 34.0, + "eval_f1_for_task219_rocstories_title_generation": 22.2937, + "eval_f1_for_task220_rocstories_title_generation": 46.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 50.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 42.4, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 42.4061, + "eval_f1_for_task288_gigaword_title_generation": 25.2269, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 8.2, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 39.2, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.2755, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 20.0, + "eval_f1_for_task402_grailqa_question_rewriting": 79.0335, + "eval_f1_for_task418_persent_title_generation": 18.485, + "eval_f1_for_task442_com_qa_question_rewriting": 65.4439, + "eval_f1_for_task500_scruples_title_generation": 12.0853, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 38.0016, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 34.2044, + "eval_f1_for_task602_wikitext_title_generation": 7.9816, + "eval_f1_for_task613_liar_keyword_tagging": 14.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 23.0942, + "eval_f1_for_task619_ohsumed_title_generation": 33.3165, + "eval_f1_for_task620_ohsumed_keyword_tagging": 33.1048, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 45.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 92.7429, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 8.3047, + "eval_f1_for_task670_ambigqa_question_rewriting": 73.1875, + "eval_f1_for_task671_ambigqa_question_rewriting": 61.8686, + "eval_f1_for_task677_ollie_data_to_text": 27.6673, + "eval_f1_for_task738_perspectrum_textual_entailment": 48.0, + "eval_f1_for_task743_eurlex_title_generation": 26.9411, + "eval_f1_for_task760_msr_sqa_data_to_text": 4.0893, + "eval_f1_for_task769_qed_title_generation": 63.3932, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_f1_for_task890_gwsd_textual_entailment": 37.0, + "eval_f1_for_task891_gap_coreference_resolution": 48.7778, + "eval_f1_for_task892_gap_coreference_resolution": 35.0, + "eval_f1_for_task893_gap_coreference_resolution": 44.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 49.3333, + "eval_f1_for_task970_sherliic_textual_entailment": 46.0, + "eval_f1_for_textual_entailment": 39.5, + "eval_f1_for_title_generation": 27.3727, + "eval_f1_for_word_analogy": 17.875, + "eval_gen_len": 10.8283, + "eval_global_step": 100, + "eval_loss": 1.2748730182647705, + "eval_rouge1": 43.5371, + "eval_rouge1_for_answerability_classification": 52.1538, + "eval_rouge1_for_cause_effect_classification": 54.1068, + "eval_rouge1_for_coreference_resolution": 38.973, + "eval_rouge1_for_data_to_text": 50.4429, + "eval_rouge1_for_dialogue_act_recognition": 43.2238, + "eval_rouge1_for_grammar_error_correction": 60.9109, + "eval_rouge1_for_keyword_tagging": 53.0471, + "eval_rouge1_for_overlap_extraction": 36.4871, + "eval_rouge1_for_question_rewriting": 67.4655, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 43.1667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 70.9299, + "eval_rouge1_for_task035_winogrande_question_rewriting": 85.1603, + "eval_rouge1_for_task036_qasc_keyword_tagging": 63.002, + "eval_rouge1_for_task039_qasc_overlap_extraction": 29.2603, + "eval_rouge1_for_task050_multirc_answerability_classification": 51.0, + "eval_rouge1_for_task102_commongen_data_to_text": 65.2524, + "eval_rouge1_for_task1152_bard_word_analogy": 14.0, + "eval_rouge1_for_task1153_bard_word_analogy": 13.0, + "eval_rouge1_for_task1154_bard_word_analogy": 17.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 21.0, + "eval_rouge1_for_task1157_bard_word_analogy": 17.0, + "eval_rouge1_for_task1158_bard_word_analogy": 9.0, + "eval_rouge1_for_task1159_bard_word_analogy": 2.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 27.3346, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 75.0427, + "eval_rouge1_for_task121_zest_question_rewriting": 49.0313, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 13.6411, + "eval_rouge1_for_task1344_rte_textual_entailment": 49.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.7557, + "eval_rouge1_for_task1356_xlsum_title_generation": 10.499, + "eval_rouge1_for_task1358_xlsum_title_generation": 33.7113, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 51.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 9.2333, + "eval_rouge1_for_task1407_dart_data_to_text": 31.9098, + "eval_rouge1_for_task1409_dart_data_to_text": 47.9659, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.6124, + "eval_rouge1_for_task1439_doqa_answerability_classification": 45.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 27.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 28.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 30.5459, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 85.2094, + "eval_rouge1_for_task1562_zest_question_rewriting": 53.3991, + "eval_rouge1_for_task1586_scifact_title_generation": 31.2372, + "eval_rouge1_for_task1598_nyc_data_to_text": 49.3848, + "eval_rouge1_for_task1612_sick_textual_entailment": 34.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 77.5685, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 85.3877, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 32.5317, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 66.1393, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 55.2182, + "eval_rouge1_for_task190_snli_textual_entailment": 52.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 25.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task219_rocstories_title_generation": 27.5698, + "eval_rouge1_for_task220_rocstories_title_generation": 46.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 43.0667, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 43.7138, + "eval_rouge1_for_task288_gigaword_title_generation": 27.7638, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 8.3333, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 39.4667, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.8856, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 29.5, + "eval_rouge1_for_task402_grailqa_question_rewriting": 81.2989, + "eval_rouge1_for_task418_persent_title_generation": 21.3696, + "eval_rouge1_for_task442_com_qa_question_rewriting": 69.5482, + "eval_rouge1_for_task500_scruples_title_generation": 13.0867, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 38.6965, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 35.117, + "eval_rouge1_for_task602_wikitext_title_generation": 8.5433, + "eval_rouge1_for_task613_liar_keyword_tagging": 24.9, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 29.5285, + "eval_rouge1_for_task619_ohsumed_title_generation": 35.3744, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 34.3048, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 45.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 93.0286, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 8.3164, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 74.6195, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 62.7667, + "eval_rouge1_for_task677_ollie_data_to_text": 30.7059, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 28.7876, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 4.5087, + "eval_rouge1_for_task769_qed_title_generation": 63.4072, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 37.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 48.6333, + "eval_rouge1_for_task892_gap_coreference_resolution": 35.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 44.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 49.6614, + "eval_rouge1_for_task970_sherliic_textual_entailment": 46.0, + "eval_rouge1_for_textual_entailment": 41.4444, + "eval_rouge1_for_title_generation": 29.3638, + "eval_rouge1_for_word_analogy": 17.875, + "eval_rougeL": 42.2796, + "eval_rougeL_for_answerability_classification": 52.1538, + "eval_rougeL_for_cause_effect_classification": 53.738, + "eval_rougeL_for_coreference_resolution": 38.973, + "eval_rougeL_for_data_to_text": 44.2445, + "eval_rougeL_for_dialogue_act_recognition": 43.2238, + "eval_rougeL_for_grammar_error_correction": 60.1328, + "eval_rougeL_for_keyword_tagging": 51.9668, + "eval_rougeL_for_overlap_extraction": 35.3681, + "eval_rougeL_for_question_rewriting": 63.4404, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 43.1667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 69.7761, + "eval_rougeL_for_task035_winogrande_question_rewriting": 84.1825, + "eval_rougeL_for_task036_qasc_keyword_tagging": 59.7339, + "eval_rougeL_for_task039_qasc_overlap_extraction": 29.2603, + "eval_rougeL_for_task050_multirc_answerability_classification": 51.0, + "eval_rougeL_for_task102_commongen_data_to_text": 55.9583, + "eval_rougeL_for_task1152_bard_word_analogy": 14.0, + "eval_rougeL_for_task1153_bard_word_analogy": 13.0, + "eval_rougeL_for_task1154_bard_word_analogy": 17.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 21.0, + "eval_rougeL_for_task1157_bard_word_analogy": 17.0, + "eval_rougeL_for_task1158_bard_word_analogy": 9.0, + "eval_rougeL_for_task1159_bard_word_analogy": 2.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 21.9574, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 73.7696, + "eval_rougeL_for_task121_zest_question_rewriting": 43.4156, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 12.8973, + "eval_rougeL_for_task1344_rte_textual_entailment": 49.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.6383, + "eval_rougeL_for_task1356_xlsum_title_generation": 8.8413, + "eval_rougeL_for_task1358_xlsum_title_generation": 28.883, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 51.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 9.2333, + "eval_rougeL_for_task1407_dart_data_to_text": 28.8593, + "eval_rougeL_for_task1409_dart_data_to_text": 43.5462, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.0563, + "eval_rougeL_for_task1439_doqa_answerability_classification": 45.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 27.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 28.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 29.0257, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 84.2092, + "eval_rougeL_for_task1562_zest_question_rewriting": 46.0269, + "eval_rougeL_for_task1586_scifact_title_generation": 25.038, + "eval_rougeL_for_task1598_nyc_data_to_text": 36.5553, + "eval_rougeL_for_task1612_sick_textual_entailment": 34.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 75.635, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 83.6947, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 27.9466, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 66.1393, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 49.6519, + "eval_rougeL_for_task190_snli_textual_entailment": 52.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 25.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task219_rocstories_title_generation": 27.1032, + "eval_rougeL_for_task220_rocstories_title_generation": 46.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 43.0667, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 41.4759, + "eval_rougeL_for_task288_gigaword_title_generation": 23.5837, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 8.3333, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 39.4667, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.4727, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 29.5, + "eval_rougeL_for_task402_grailqa_question_rewriting": 67.6651, + "eval_rougeL_for_task418_persent_title_generation": 18.1053, + "eval_rougeL_for_task442_com_qa_question_rewriting": 63.8237, + "eval_rougeL_for_task500_scruples_title_generation": 11.8338, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 38.3289, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 34.2828, + "eval_rougeL_for_task602_wikitext_title_generation": 8.461, + "eval_rougeL_for_task613_liar_keyword_tagging": 24.9, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 28.3601, + "eval_rougeL_for_task619_ohsumed_title_generation": 31.3727, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 32.1714, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 45.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 93.0286, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 8.3164, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 72.8198, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 61.0911, + "eval_rougeL_for_task677_ollie_data_to_text": 25.8154, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 25.6271, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 4.3102, + "eval_rougeL_for_task769_qed_title_generation": 63.4072, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 37.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 48.6333, + "eval_rougeL_for_task892_gap_coreference_resolution": 35.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 44.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 40.2578, + "eval_rougeL_for_task970_sherliic_textual_entailment": 46.0, + "eval_rougeL_for_textual_entailment": 41.4444, + "eval_rougeL_for_title_generation": 26.981, + "eval_rougeL_for_word_analogy": 17.875, + "eval_runtime": 754.2612, + "eval_samples_per_second": 15.79, + "eval_steps_per_second": 0.495, + "step": 100 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 1.6078, + "step": 200 + }, + { + "epoch": 0.05, + "eval_exact_match": 26.4736, + "eval_exact_match_for_answerability_classification": 50.0, + "eval_exact_match_for_cause_effect_classification": 36.8571, + "eval_exact_match_for_coreference_resolution": 30.5714, + "eval_exact_match_for_data_to_text": 6.0533, + "eval_exact_match_for_dialogue_act_recognition": 39.7143, + "eval_exact_match_for_grammar_error_correction": 8.0, + "eval_exact_match_for_keyword_tagging": 42.6, + "eval_exact_match_for_overlap_extraction": 13.0, + "eval_exact_match_for_question_rewriting": 1.4545, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 34.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 42.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 25.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 18.0, + "eval_exact_match_for_task1153_bard_word_analogy": 11.0, + "eval_exact_match_for_task1154_bard_word_analogy": 16.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 22.0, + "eval_exact_match_for_task1157_bard_word_analogy": 21.0, + "eval_exact_match_for_task1158_bard_word_analogy": 13.0, + "eval_exact_match_for_task1159_bard_word_analogy": 8.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 7.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 5.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 30.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 35.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 41.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 2.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 48.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 57.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 37.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 3.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 16.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 48.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 44.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 49.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 15.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 4.0, + "eval_exact_match_for_task190_snli_textual_entailment": 0.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 37.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task219_rocstories_title_generation": 10.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 53.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 14.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 1.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 31.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 45.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 56.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 22.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 7.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 1.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 4.0, + "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, + "eval_exact_match_for_task613_liar_keyword_tagging": 18.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 2.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 16.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 87.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 3.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 14.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 69.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 57.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 19.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 37.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 47.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 34.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 37.6667, + "eval_exact_match_for_title_generation": 8.6883, + "eval_exact_match_for_word_analogy": 19.875, + "eval_f1": 42.8656, + "eval_f1_for_answerability_classification": 52.5855, + "eval_f1_for_cause_effect_classification": 52.5935, + "eval_f1_for_coreference_resolution": 38.9487, + "eval_f1_for_data_to_text": 48.2747, + "eval_f1_for_dialogue_act_recognition": 43.2857, + "eval_f1_for_grammar_error_correction": 56.5501, + "eval_f1_for_keyword_tagging": 54.0313, + "eval_f1_for_overlap_extraction": 43.0464, + "eval_f1_for_question_rewriting": 66.4739, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 35.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 77.7937, + "eval_f1_for_task035_winogrande_question_rewriting": 82.6968, + "eval_f1_for_task036_qasc_keyword_tagging": 73.7564, + "eval_f1_for_task039_qasc_overlap_extraction": 34.3, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 52.3001, + "eval_f1_for_task1152_bard_word_analogy": 18.0, + "eval_f1_for_task1153_bard_word_analogy": 13.0, + "eval_f1_for_task1154_bard_word_analogy": 16.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 22.0, + "eval_f1_for_task1157_bard_word_analogy": 21.0, + "eval_f1_for_task1158_bard_word_analogy": 13.0, + "eval_f1_for_task1159_bard_word_analogy": 8.0, + "eval_f1_for_task1161_coda_19_title_generation": 28.4119, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 78.7153, + "eval_f1_for_task121_zest_question_rewriting": 43.5112, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.7183, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.1817, + "eval_f1_for_task1356_xlsum_title_generation": 9.6419, + "eval_f1_for_task1358_xlsum_title_generation": 29.9752, + "eval_f1_for_task1385_anli_textual_entailment": 32.0, + "eval_f1_for_task1386_anli_textual_entailment": 30.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 35.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 41.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 25.0, + "eval_f1_for_task1407_dart_data_to_text": 31.9919, + "eval_f1_for_task1409_dart_data_to_text": 46.6315, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.789, + "eval_f1_for_task1439_doqa_answerability_classification": 48.2222, + "eval_f1_for_task1442_doqa_answerability_classification": 57.0, + "eval_f1_for_task1516_imppres_textual_entailment": 37.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 27.7877, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.3112, + "eval_f1_for_task1562_zest_question_rewriting": 49.2966, + "eval_f1_for_task1586_scifact_title_generation": 30.2313, + "eval_f1_for_task1598_nyc_data_to_text": 46.7021, + "eval_f1_for_task1612_sick_textual_entailment": 34.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.3403, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 48.0, + "eval_f1_for_task1631_open_pi_data_to_text": 92.1859, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 49.0, + "eval_f1_for_task1659_billsum_title_generation": 34.652, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 66.7371, + "eval_f1_for_task1728_web_nlg_data_to_text": 51.1744, + "eval_f1_for_task190_snli_textual_entailment": 0.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 37.0, + "eval_f1_for_task202_multinli_textual_entailment": 34.0, + "eval_f1_for_task219_rocstories_title_generation": 26.5881, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, + "eval_f1_for_task232_iirc_answerability_classification": 53.0, + "eval_f1_for_task233_iirc_answerability_classification": 49.0556, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 19.0667, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 51.7928, + "eval_f1_for_task288_gigaword_title_generation": 27.4994, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 42.1, + "eval_f1_for_task329_gap_coreference_resolution": 31.0, + "eval_f1_for_task330_gap_coreference_resolution": 56.0667, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 81.6667, + "eval_f1_for_task392_cod3s_cause_effect_classification": 85.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.2234, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 27.1667, + "eval_f1_for_task402_grailqa_question_rewriting": 80.4069, + "eval_f1_for_task418_persent_title_generation": 21.3918, + "eval_f1_for_task442_com_qa_question_rewriting": 66.8186, + "eval_f1_for_task500_scruples_title_generation": 13.5998, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 40.2948, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 35.3077, + "eval_f1_for_task602_wikitext_title_generation": 11.1388, + "eval_f1_for_task613_liar_keyword_tagging": 19.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 12.9311, + "eval_f1_for_task619_ohsumed_title_generation": 36.6238, + "eval_f1_for_task620_ohsumed_keyword_tagging": 32.4524, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.6143, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 3.0, + "eval_f1_for_task670_ambigqa_question_rewriting": 71.1369, + "eval_f1_for_task671_ambigqa_question_rewriting": 62.3151, + "eval_f1_for_task677_ollie_data_to_text": 27.8431, + "eval_f1_for_task738_perspectrum_textual_entailment": 14.0, + "eval_f1_for_task743_eurlex_title_generation": 27.8246, + "eval_f1_for_task760_msr_sqa_data_to_text": 2.6247, + "eval_f1_for_task769_qed_title_generation": 78.0286, + "eval_f1_for_task827_copa_cause_effect_classification": 57.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 19.0, + "eval_f1_for_task890_gwsd_textual_entailment": 37.0, + "eval_f1_for_task891_gap_coreference_resolution": 56.4778, + "eval_f1_for_task892_gap_coreference_resolution": 33.0, + "eval_f1_for_task893_gap_coreference_resolution": 34.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 49.2381, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 37.6667, + "eval_f1_for_title_generation": 30.3214, + "eval_f1_for_word_analogy": 20.125, + "eval_gen_len": 10.4497, + "eval_global_step": 200, + "eval_loss": 1.2393280267715454, + "eval_rouge1": 44.7826, + "eval_rouge1_for_answerability_classification": 52.5855, + "eval_rouge1_for_cause_effect_classification": 53.3266, + "eval_rouge1_for_coreference_resolution": 39.8098, + "eval_rouge1_for_data_to_text": 51.054, + "eval_rouge1_for_dialogue_act_recognition": 45.7, + "eval_rouge1_for_grammar_error_correction": 61.596, + "eval_rouge1_for_keyword_tagging": 57.8019, + "eval_rouge1_for_overlap_extraction": 44.615, + "eval_rouge1_for_question_rewriting": 68.3317, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 39.1667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 77.8249, + "eval_rouge1_for_task035_winogrande_question_rewriting": 83.5317, + "eval_rouge1_for_task036_qasc_keyword_tagging": 77.1762, + "eval_rouge1_for_task039_qasc_overlap_extraction": 36.2, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 65.8127, + "eval_rouge1_for_task1152_bard_word_analogy": 18.0, + "eval_rouge1_for_task1153_bard_word_analogy": 13.0, + "eval_rouge1_for_task1154_bard_word_analogy": 16.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 22.0, + "eval_rouge1_for_task1157_bard_word_analogy": 21.0, + "eval_rouge1_for_task1158_bard_word_analogy": 13.0, + "eval_rouge1_for_task1159_bard_word_analogy": 8.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 32.6204, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 78.9904, + "eval_rouge1_for_task121_zest_question_rewriting": 46.1399, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.1901, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.8636, + "eval_rouge1_for_task1356_xlsum_title_generation": 11.9671, + "eval_rouge1_for_task1358_xlsum_title_generation": 35.6339, + "eval_rouge1_for_task1385_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 30.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 35.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 41.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 27.9, + "eval_rouge1_for_task1407_dart_data_to_text": 31.7923, + "eval_rouge1_for_task1409_dart_data_to_text": 47.377, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.5005, + "eval_rouge1_for_task1439_doqa_answerability_classification": 48.2222, + "eval_rouge1_for_task1442_doqa_answerability_classification": 57.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 37.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 30.2558, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.6915, + "eval_rouge1_for_task1562_zest_question_rewriting": 53.0971, + "eval_rouge1_for_task1586_scifact_title_generation": 33.1289, + "eval_rouge1_for_task1598_nyc_data_to_text": 48.0909, + "eval_rouge1_for_task1612_sick_textual_entailment": 34.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.613, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 48.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 92.342, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 49.0, + "eval_rouge1_for_task1659_billsum_title_generation": 36.0287, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 66.7371, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 55.7757, + "eval_rouge1_for_task190_snli_textual_entailment": 0.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 37.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task219_rocstories_title_generation": 31.6762, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 53.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 49.0556, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 19.7333, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 53.03, + "eval_rouge1_for_task288_gigaword_title_generation": 29.9465, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 42.4, + "eval_rouge1_for_task329_gap_coreference_resolution": 31.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 55.9, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 81.6667, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 85.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.5479, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 35.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 82.785, + "eval_rouge1_for_task418_persent_title_generation": 24.8337, + "eval_rouge1_for_task442_com_qa_question_rewriting": 70.227, + "eval_rouge1_for_task500_scruples_title_generation": 15.2415, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 40.9048, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 35.9558, + "eval_rouge1_for_task602_wikitext_title_generation": 11.5131, + "eval_rouge1_for_task613_liar_keyword_tagging": 33.3333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 17.7386, + "eval_rouge1_for_task619_ohsumed_title_generation": 39.2492, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 33.3857, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.1143, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 3.0, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 73.0561, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 63.5196, + "eval_rouge1_for_task677_ollie_data_to_text": 30.4857, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 29.651, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 2.8337, + "eval_rouge1_for_task769_qed_title_generation": 78.0603, + "eval_rouge1_for_task827_copa_cause_effect_classification": 57.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 37.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 56.2333, + "eval_rouge1_for_task892_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 34.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 49.2933, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 41.0278, + "eval_rouge1_for_title_generation": 32.456, + "eval_rouge1_for_word_analogy": 20.125, + "eval_rougeL": 43.5956, + "eval_rougeL_for_answerability_classification": 52.5855, + "eval_rougeL_for_cause_effect_classification": 53.0882, + "eval_rougeL_for_coreference_resolution": 39.8098, + "eval_rougeL_for_data_to_text": 45.1732, + "eval_rougeL_for_dialogue_act_recognition": 45.7, + "eval_rougeL_for_grammar_error_correction": 60.959, + "eval_rougeL_for_keyword_tagging": 57.2352, + "eval_rougeL_for_overlap_extraction": 43.6386, + "eval_rougeL_for_question_rewriting": 64.4068, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 39.1667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 77.5618, + "eval_rougeL_for_task035_winogrande_question_rewriting": 82.2814, + "eval_rougeL_for_task036_qasc_keyword_tagging": 76.0762, + "eval_rougeL_for_task039_qasc_overlap_extraction": 36.2, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 56.2666, + "eval_rougeL_for_task1152_bard_word_analogy": 18.0, + "eval_rougeL_for_task1153_bard_word_analogy": 13.0, + "eval_rougeL_for_task1154_bard_word_analogy": 16.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 22.0, + "eval_rougeL_for_task1157_bard_word_analogy": 21.0, + "eval_rougeL_for_task1158_bard_word_analogy": 13.0, + "eval_rougeL_for_task1159_bard_word_analogy": 8.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 27.8231, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 77.8236, + "eval_rougeL_for_task121_zest_question_rewriting": 38.0905, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.7846, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.9909, + "eval_rougeL_for_task1356_xlsum_title_generation": 9.7735, + "eval_rougeL_for_task1358_xlsum_title_generation": 30.6533, + "eval_rougeL_for_task1385_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 30.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 35.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 41.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 27.9, + "eval_rougeL_for_task1407_dart_data_to_text": 29.1314, + "eval_rougeL_for_task1409_dart_data_to_text": 42.1841, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.1767, + "eval_rougeL_for_task1439_doqa_answerability_classification": 48.2222, + "eval_rougeL_for_task1442_doqa_answerability_classification": 57.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 37.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 29.0221, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.7413, + "eval_rougeL_for_task1562_zest_question_rewriting": 45.2143, + "eval_rougeL_for_task1586_scifact_title_generation": 26.9059, + "eval_rougeL_for_task1598_nyc_data_to_text": 36.9498, + "eval_rougeL_for_task1612_sick_textual_entailment": 34.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.1862, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 48.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 91.5688, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 49.0, + "eval_rougeL_for_task1659_billsum_title_generation": 31.3524, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 66.7371, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 49.8692, + "eval_rougeL_for_task190_snli_textual_entailment": 0.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 37.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task219_rocstories_title_generation": 31.3429, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 53.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 49.0556, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 19.7333, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 51.0772, + "eval_rougeL_for_task288_gigaword_title_generation": 26.0759, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 42.4, + "eval_rougeL_for_task329_gap_coreference_resolution": 31.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 55.9, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 81.6667, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 85.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.3519, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 35.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 70.0681, + "eval_rougeL_for_task418_persent_title_generation": 21.225, + "eval_rougeL_for_task442_com_qa_question_rewriting": 66.1528, + "eval_rougeL_for_task500_scruples_title_generation": 14.3608, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 40.6779, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 34.9224, + "eval_rougeL_for_task602_wikitext_title_generation": 11.2901, + "eval_rougeL_for_task613_liar_keyword_tagging": 33.3333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 17.2652, + "eval_rougeL_for_task619_ohsumed_title_generation": 35.271, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 31.6524, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.1143, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 3.0, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 71.4236, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 61.6818, + "eval_rougeL_for_task677_ollie_data_to_text": 25.4704, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 26.3861, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 2.8337, + "eval_rougeL_for_task769_qed_title_generation": 78.0603, + "eval_rougeL_for_task827_copa_cause_effect_classification": 57.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 37.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 56.2333, + "eval_rougeL_for_task892_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 34.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 40.9538, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 41.0278, + "eval_rougeL_for_title_generation": 30.1077, + "eval_rougeL_for_word_analogy": 20.125, + "eval_runtime": 717.9988, + "eval_samples_per_second": 16.588, + "eval_steps_per_second": 0.519, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 1.5176, + "step": 500 + }, + { + "epoch": 0.11, + "eval_exact_match": 26.9018, + "eval_exact_match_for_answerability_classification": 51.0769, + "eval_exact_match_for_cause_effect_classification": 34.7143, + "eval_exact_match_for_coreference_resolution": 34.6429, + "eval_exact_match_for_data_to_text": 5.2058, + "eval_exact_match_for_dialogue_act_recognition": 41.7143, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 41.0, + "eval_exact_match_for_overlap_extraction": 10.0, + "eval_exact_match_for_question_rewriting": 1.7273, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 35.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 33.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 19.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 13.0, + "eval_exact_match_for_task1153_bard_word_analogy": 11.0, + "eval_exact_match_for_task1154_bard_word_analogy": 10.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 24.0, + "eval_exact_match_for_task1157_bard_word_analogy": 29.0, + "eval_exact_match_for_task1158_bard_word_analogy": 13.0, + "eval_exact_match_for_task1159_bard_word_analogy": 14.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 8.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 4.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 53.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 36.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 31.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 40.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 54.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 43.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 52.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 38.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 1.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 4.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task219_rocstories_title_generation": 6.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 52.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 38.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 1.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 12.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 31.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 53.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 53.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 24.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 8.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 3.0, + "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, + "eval_exact_match_for_task613_liar_keyword_tagging": 14.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 20.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 38.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 49.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 88.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 5.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 0.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 65.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 33.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 45.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 42.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 46.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 52.0, + "eval_exact_match_for_textual_entailment": 38.125, + "eval_exact_match_for_title_generation": 7.8475, + "eval_exact_match_for_word_analogy": 20.5, + "eval_f1": 43.8851, + "eval_f1_for_answerability_classification": 53.641, + "eval_f1_for_cause_effect_classification": 52.5754, + "eval_f1_for_coreference_resolution": 44.3813, + "eval_f1_for_data_to_text": 49.0625, + "eval_f1_for_dialogue_act_recognition": 45.2857, + "eval_f1_for_grammar_error_correction": 56.8838, + "eval_f1_for_keyword_tagging": 53.6169, + "eval_f1_for_overlap_extraction": 39.1045, + "eval_f1_for_question_rewriting": 66.4007, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 38.0, + "eval_f1_for_task034_winogrande_question_rewriting": 89.7733, + "eval_f1_for_task035_winogrande_question_rewriting": 79.485, + "eval_f1_for_task036_qasc_keyword_tagging": 67.3894, + "eval_f1_for_task039_qasc_overlap_extraction": 26.4556, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 52.2224, + "eval_f1_for_task1152_bard_word_analogy": 13.0, + "eval_f1_for_task1153_bard_word_analogy": 19.0, + "eval_f1_for_task1154_bard_word_analogy": 10.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 24.0, + "eval_f1_for_task1157_bard_word_analogy": 29.0, + "eval_f1_for_task1158_bard_word_analogy": 13.0, + "eval_f1_for_task1159_bard_word_analogy": 14.0, + "eval_f1_for_task1161_coda_19_title_generation": 25.9459, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 78.0743, + "eval_f1_for_task121_zest_question_rewriting": 44.8888, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 15.0402, + "eval_f1_for_task1344_rte_textual_entailment": 53.0, + "eval_f1_for_task1345_qqp_question_rewriting": 37.1685, + "eval_f1_for_task1356_xlsum_title_generation": 12.5608, + "eval_f1_for_task1358_xlsum_title_generation": 31.207, + "eval_f1_for_task1385_anli_textual_entailment": 32.0, + "eval_f1_for_task1386_anli_textual_entailment": 36.0, + "eval_f1_for_task1387_anli_textual_entailment": 31.0, + "eval_f1_for_task1388_cb_textual_entailment": 40.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 54.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 43.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 47.0, + "eval_f1_for_task1407_dart_data_to_text": 32.8892, + "eval_f1_for_task1409_dart_data_to_text": 48.2231, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 31.0353, + "eval_f1_for_task1439_doqa_answerability_classification": 52.0, + "eval_f1_for_task1442_doqa_answerability_classification": 53.0, + "eval_f1_for_task1516_imppres_textual_entailment": 0.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 31.5335, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 82.7322, + "eval_f1_for_task1562_zest_question_rewriting": 45.8426, + "eval_f1_for_task1586_scifact_title_generation": 29.7835, + "eval_f1_for_task1598_nyc_data_to_text": 48.0558, + "eval_f1_for_task1612_sick_textual_entailment": 34.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.6683, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_f1_for_task1631_open_pi_data_to_text": 90.6398, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_f1_for_task1659_billsum_title_generation": 32.7159, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 51.2535, + "eval_f1_for_task1728_web_nlg_data_to_text": 54.2245, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 33.0, + "eval_f1_for_task201_multinli_textual_entailment": 33.0, + "eval_f1_for_task202_multinli_textual_entailment": 34.0, + "eval_f1_for_task219_rocstories_title_generation": 23.3754, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 50.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 52.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 48.0667, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 51.7534, + "eval_f1_for_task288_gigaword_title_generation": 26.4482, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 59.9714, + "eval_f1_for_task329_gap_coreference_resolution": 31.0, + "eval_f1_for_task330_gap_coreference_resolution": 60.1802, + "eval_f1_for_task349_squad2.0_answerability_classification": 53.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 32.6793, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 27.8333, + "eval_f1_for_task402_grailqa_question_rewriting": 80.5182, + "eval_f1_for_task418_persent_title_generation": 22.4257, + "eval_f1_for_task442_com_qa_question_rewriting": 66.9219, + "eval_f1_for_task500_scruples_title_generation": 14.8365, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 37.2707, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 34.9289, + "eval_f1_for_task602_wikitext_title_generation": 12.4304, + "eval_f1_for_task613_liar_keyword_tagging": 16.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 25.6821, + "eval_f1_for_task619_ohsumed_title_generation": 36.7329, + "eval_f1_for_task620_ohsumed_keyword_tagging": 40.4857, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 38.0, + "eval_f1_for_task641_e_snli_textual_entailment": 34.0, + "eval_f1_for_task642_e_snli_textual_entailment": 49.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.2095, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 7.8889, + "eval_f1_for_task670_ambigqa_question_rewriting": 72.5503, + "eval_f1_for_task671_ambigqa_question_rewriting": 56.5166, + "eval_f1_for_task677_ollie_data_to_text": 27.3515, + "eval_f1_for_task738_perspectrum_textual_entailment": 0.0, + "eval_f1_for_task743_eurlex_title_generation": 29.6597, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.4862, + "eval_f1_for_task769_qed_title_generation": 75.3881, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 33.0, + "eval_f1_for_task891_gap_coreference_resolution": 56.1444, + "eval_f1_for_task892_gap_coreference_resolution": 42.0, + "eval_f1_for_task893_gap_coreference_resolution": 46.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 50.7433, + "eval_f1_for_task970_sherliic_textual_entailment": 52.0, + "eval_f1_for_textual_entailment": 38.125, + "eval_f1_for_title_generation": 30.2855, + "eval_f1_for_word_analogy": 21.5, + "eval_gen_len": 10.6385, + "eval_global_step": 500, + "eval_loss": 1.2217644453048706, + "eval_rouge1": 45.9388, + "eval_rouge1_for_answerability_classification": 53.641, + "eval_rouge1_for_cause_effect_classification": 53.3728, + "eval_rouge1_for_coreference_resolution": 45.3746, + "eval_rouge1_for_data_to_text": 51.4177, + "eval_rouge1_for_dialogue_act_recognition": 47.9333, + "eval_rouge1_for_grammar_error_correction": 61.0941, + "eval_rouge1_for_keyword_tagging": 57.892, + "eval_rouge1_for_overlap_extraction": 41.0417, + "eval_rouge1_for_question_rewriting": 68.2462, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 39.5, + "eval_rouge1_for_task034_winogrande_question_rewriting": 89.8596, + "eval_rouge1_for_task035_winogrande_question_rewriting": 80.3076, + "eval_rouge1_for_task036_qasc_keyword_tagging": 71.5791, + "eval_rouge1_for_task039_qasc_overlap_extraction": 29.0222, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 64.8234, + "eval_rouge1_for_task1152_bard_word_analogy": 13.0, + "eval_rouge1_for_task1153_bard_word_analogy": 19.0, + "eval_rouge1_for_task1154_bard_word_analogy": 10.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 24.0, + "eval_rouge1_for_task1157_bard_word_analogy": 29.0, + "eval_rouge1_for_task1158_bard_word_analogy": 13.0, + "eval_rouge1_for_task1159_bard_word_analogy": 14.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 29.617, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 78.4398, + "eval_rouge1_for_task121_zest_question_rewriting": 47.5493, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.3528, + "eval_rouge1_for_task1344_rte_textual_entailment": 53.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 40.7873, + "eval_rouge1_for_task1356_xlsum_title_generation": 14.6108, + "eval_rouge1_for_task1358_xlsum_title_generation": 35.737, + "eval_rouge1_for_task1385_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 36.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 31.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 54.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 43.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 52.2, + "eval_rouge1_for_task1407_dart_data_to_text": 32.8134, + "eval_rouge1_for_task1409_dart_data_to_text": 48.9587, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.6339, + "eval_rouge1_for_task1439_doqa_answerability_classification": 52.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 0.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 33.7515, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 85.5544, + "eval_rouge1_for_task1562_zest_question_rewriting": 49.6397, + "eval_rouge1_for_task1586_scifact_title_generation": 32.4694, + "eval_rouge1_for_task1598_nyc_data_to_text": 49.0813, + "eval_rouge1_for_task1612_sick_textual_entailment": 34.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.8902, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 90.8566, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_rouge1_for_task1659_billsum_title_generation": 34.7626, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 51.2177, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 56.3787, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task219_rocstories_title_generation": 28.6652, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 52.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 49.2333, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 53.0612, + "eval_rouge1_for_task288_gigaword_title_generation": 29.2566, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 62.1048, + "eval_rouge1_for_task329_gap_coreference_resolution": 31.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 60.1357, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 53.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 32.6339, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 36.8333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 82.5881, + "eval_rouge1_for_task418_persent_title_generation": 25.287, + "eval_rouge1_for_task442_com_qa_question_rewriting": 70.4823, + "eval_rouge1_for_task500_scruples_title_generation": 16.5759, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 37.8476, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 36.9518, + "eval_rouge1_for_task602_wikitext_title_generation": 13.5123, + "eval_rouge1_for_task613_liar_keyword_tagging": 29.8, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 31.3089, + "eval_rouge1_for_task619_ohsumed_title_generation": 39.5756, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 43.5857, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 38.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 49.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.4952, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 8.3667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 74.0878, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 58.0767, + "eval_rouge1_for_task677_ollie_data_to_text": 29.7517, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 31.5747, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.7317, + "eval_rouge1_for_task769_qed_title_generation": 75.3881, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 33.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 55.8524, + "eval_rouge1_for_task892_gap_coreference_resolution": 42.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 46.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 51.0759, + "eval_rouge1_for_task970_sherliic_textual_entailment": 52.0, + "eval_rouge1_for_textual_entailment": 42.0694, + "eval_rouge1_for_title_generation": 32.4425, + "eval_rouge1_for_word_analogy": 21.5, + "eval_rougeL": 44.6467, + "eval_rougeL_for_answerability_classification": 53.641, + "eval_rougeL_for_cause_effect_classification": 52.9068, + "eval_rougeL_for_coreference_resolution": 45.3746, + "eval_rougeL_for_data_to_text": 45.2412, + "eval_rougeL_for_dialogue_act_recognition": 47.9333, + "eval_rougeL_for_grammar_error_correction": 60.3181, + "eval_rougeL_for_keyword_tagging": 57.3089, + "eval_rougeL_for_overlap_extraction": 39.9391, + "eval_rougeL_for_question_rewriting": 64.3159, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 39.5, + "eval_rougeL_for_task034_winogrande_question_rewriting": 89.7129, + "eval_rougeL_for_task035_winogrande_question_rewriting": 79.3736, + "eval_rougeL_for_task036_qasc_keyword_tagging": 70.5471, + "eval_rougeL_for_task039_qasc_overlap_extraction": 29.0222, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 54.6455, + "eval_rougeL_for_task1152_bard_word_analogy": 13.0, + "eval_rougeL_for_task1153_bard_word_analogy": 19.0, + "eval_rougeL_for_task1154_bard_word_analogy": 10.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 24.0, + "eval_rougeL_for_task1157_bard_word_analogy": 29.0, + "eval_rougeL_for_task1158_bard_word_analogy": 13.0, + "eval_rougeL_for_task1159_bard_word_analogy": 14.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 24.1276, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 77.3734, + "eval_rougeL_for_task121_zest_question_rewriting": 41.4981, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.5471, + "eval_rougeL_for_task1344_rte_textual_entailment": 53.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 37.611, + "eval_rougeL_for_task1356_xlsum_title_generation": 12.2655, + "eval_rougeL_for_task1358_xlsum_title_generation": 30.3958, + "eval_rougeL_for_task1385_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 36.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 31.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 54.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 43.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 52.2, + "eval_rougeL_for_task1407_dart_data_to_text": 29.4151, + "eval_rougeL_for_task1409_dart_data_to_text": 42.0739, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.1624, + "eval_rougeL_for_task1439_doqa_answerability_classification": 52.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 0.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 30.677, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 84.4737, + "eval_rougeL_for_task1562_zest_question_rewriting": 43.1824, + "eval_rougeL_for_task1586_scifact_title_generation": 26.0865, + "eval_rougeL_for_task1598_nyc_data_to_text": 39.7798, + "eval_rougeL_for_task1612_sick_textual_entailment": 34.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.278, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 89.7205, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_rougeL_for_task1659_billsum_title_generation": 29.4758, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 51.2177, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 50.1056, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task219_rocstories_title_generation": 28.4152, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 52.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 49.2333, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 50.8561, + "eval_rougeL_for_task288_gigaword_title_generation": 24.9081, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 62.1048, + "eval_rougeL_for_task329_gap_coreference_resolution": 31.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 60.1357, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 53.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.0663, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 36.8333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 69.0057, + "eval_rougeL_for_task418_persent_title_generation": 21.205, + "eval_rougeL_for_task442_com_qa_question_rewriting": 65.3038, + "eval_rougeL_for_task500_scruples_title_generation": 15.0637, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 37.4629, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 35.3645, + "eval_rougeL_for_task602_wikitext_title_generation": 13.3149, + "eval_rougeL_for_task613_liar_keyword_tagging": 29.8, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 29.6148, + "eval_rougeL_for_task619_ohsumed_title_generation": 35.5316, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 41.7024, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 38.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 49.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.4952, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 8.3667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 71.8307, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 55.3056, + "eval_rougeL_for_task677_ollie_data_to_text": 24.8844, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 26.9766, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.5917, + "eval_rougeL_for_task769_qed_title_generation": 75.3881, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 33.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 55.8524, + "eval_rougeL_for_task892_gap_coreference_resolution": 42.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 46.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 42.1339, + "eval_rougeL_for_task970_sherliic_textual_entailment": 52.0, + "eval_rougeL_for_textual_entailment": 42.0694, + "eval_rougeL_for_title_generation": 29.6567, + "eval_rougeL_for_word_analogy": 21.5, + "eval_runtime": 774.7723, + "eval_samples_per_second": 15.372, + "eval_steps_per_second": 0.481, + "step": 500 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 1.3905, + "step": 1000 + }, + { + "epoch": 0.23, + "eval_exact_match": 27.5819, + "eval_exact_match_for_answerability_classification": 50.7692, + "eval_exact_match_for_cause_effect_classification": 36.4286, + "eval_exact_match_for_coreference_resolution": 35.6429, + "eval_exact_match_for_data_to_text": 5.8111, + "eval_exact_match_for_dialogue_act_recognition": 42.8571, + "eval_exact_match_for_grammar_error_correction": 8.0, + "eval_exact_match_for_keyword_tagging": 41.4, + "eval_exact_match_for_overlap_extraction": 9.5, + "eval_exact_match_for_question_rewriting": 1.3636, + "eval_exact_match_for_task020_mctaco_answerability_classification": 51.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 39.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 45.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 18.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 49.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 19.0, + "eval_exact_match_for_task1153_bard_word_analogy": 15.0, + "eval_exact_match_for_task1154_bard_word_analogy": 16.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 24.0, + "eval_exact_match_for_task1157_bard_word_analogy": 46.0, + "eval_exact_match_for_task1158_bard_word_analogy": 13.0, + "eval_exact_match_for_task1159_bard_word_analogy": 13.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 5.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 21.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 54.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 55.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 51.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 22.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 26.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 16.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 35.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 42.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 58.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 14.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, + "eval_exact_match_for_task190_snli_textual_entailment": 41.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 49.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task219_rocstories_title_generation": 9.0, + "eval_exact_match_for_task220_rocstories_title_generation": 52.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 47.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 37.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 1.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 11.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 45.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 52.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 44.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 61.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 26.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 6.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 4.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 15.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 12.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 37.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 47.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 85.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 4.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 15.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 71.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 41.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 33.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 42.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 42.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 50.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 53.0, + "eval_exact_match_for_textual_entailment": 38.4167, + "eval_exact_match_for_title_generation": 8.296, + "eval_exact_match_for_word_analogy": 24.5, + "eval_f1": 44.9812, + "eval_f1_for_answerability_classification": 53.3333, + "eval_f1_for_cause_effect_classification": 53.6894, + "eval_f1_for_coreference_resolution": 45.2069, + "eval_f1_for_data_to_text": 50.443, + "eval_f1_for_dialogue_act_recognition": 46.3571, + "eval_f1_for_grammar_error_correction": 67.9042, + "eval_f1_for_keyword_tagging": 53.5269, + "eval_f1_for_overlap_extraction": 37.1117, + "eval_f1_for_question_rewriting": 68.7739, + "eval_f1_for_task020_mctaco_answerability_classification": 51.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 42.5, + "eval_f1_for_task034_winogrande_question_rewriting": 91.9166, + "eval_f1_for_task035_winogrande_question_rewriting": 84.1863, + "eval_f1_for_task036_qasc_keyword_tagging": 75.6343, + "eval_f1_for_task039_qasc_overlap_extraction": 29.1889, + "eval_f1_for_task050_multirc_answerability_classification": 49.0, + "eval_f1_for_task102_commongen_data_to_text": 53.4927, + "eval_f1_for_task1152_bard_word_analogy": 19.0, + "eval_f1_for_task1153_bard_word_analogy": 19.0, + "eval_f1_for_task1154_bard_word_analogy": 16.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 24.0, + "eval_f1_for_task1157_bard_word_analogy": 46.0, + "eval_f1_for_task1158_bard_word_analogy": 13.0, + "eval_f1_for_task1159_bard_word_analogy": 13.0, + "eval_f1_for_task1161_coda_19_title_generation": 29.1427, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.6966, + "eval_f1_for_task121_zest_question_rewriting": 44.1624, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.6626, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.5259, + "eval_f1_for_task1356_xlsum_title_generation": 12.4013, + "eval_f1_for_task1358_xlsum_title_generation": 31.2728, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 34.0, + "eval_f1_for_task1388_cb_textual_entailment": 21.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 54.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 55.0, + "eval_f1_for_task1407_dart_data_to_text": 34.4379, + "eval_f1_for_task1409_dart_data_to_text": 50.0703, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 52.111, + "eval_f1_for_task1439_doqa_answerability_classification": 51.0, + "eval_f1_for_task1442_doqa_answerability_classification": 50.0, + "eval_f1_for_task1516_imppres_textual_entailment": 22.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 26.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 30.9726, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.6975, + "eval_f1_for_task1562_zest_question_rewriting": 45.0132, + "eval_f1_for_task1586_scifact_title_generation": 31.3146, + "eval_f1_for_task1598_nyc_data_to_text": 49.5622, + "eval_f1_for_task1612_sick_textual_entailment": 34.0, + "eval_f1_for_task1615_sick_textual_entailment": 35.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.4619, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_f1_for_task1631_open_pi_data_to_text": 89.8906, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 58.0, + "eval_f1_for_task1659_billsum_title_generation": 32.1105, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 63.6174, + "eval_f1_for_task1728_web_nlg_data_to_text": 56.5388, + "eval_f1_for_task190_snli_textual_entailment": 41.0, + "eval_f1_for_task199_multinli_textual_entailment": 49.0, + "eval_f1_for_task200_multinli_textual_entailment": 33.0, + "eval_f1_for_task201_multinli_textual_entailment": 33.0, + "eval_f1_for_task202_multinli_textual_entailment": 34.0, + "eval_f1_for_task219_rocstories_title_generation": 25.8079, + "eval_f1_for_task220_rocstories_title_generation": 52.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 50.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 47.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 47.5667, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 45.0345, + "eval_f1_for_task288_gigaword_title_generation": 27.4222, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 57.8548, + "eval_f1_for_task329_gap_coreference_resolution": 35.0, + "eval_f1_for_task330_gap_coreference_resolution": 53.7468, + "eval_f1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 81.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 87.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 30.0065, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 29.8333, + "eval_f1_for_task402_grailqa_question_rewriting": 77.6696, + "eval_f1_for_task418_persent_title_generation": 23.2861, + "eval_f1_for_task442_com_qa_question_rewriting": 70.2923, + "eval_f1_for_task500_scruples_title_generation": 14.2379, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 38.6303, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 37.4986, + "eval_f1_for_task602_wikitext_title_generation": 12.2519, + "eval_f1_for_task613_liar_keyword_tagging": 16.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 27.4862, + "eval_f1_for_task619_ohsumed_title_generation": 39.8754, + "eval_f1_for_task620_ohsumed_keyword_tagging": 33.3381, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 37.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 47.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 92.3286, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 5.1667, + "eval_f1_for_task670_ambigqa_question_rewriting": 79.7818, + "eval_f1_for_task671_ambigqa_question_rewriting": 65.806, + "eval_f1_for_task677_ollie_data_to_text": 25.4279, + "eval_f1_for_task738_perspectrum_textual_entailment": 15.0, + "eval_f1_for_task743_eurlex_title_generation": 28.7079, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.1553, + "eval_f1_for_task769_qed_title_generation": 79.2333, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 41.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_f1_for_task890_gwsd_textual_entailment": 33.0, + "eval_f1_for_task891_gap_coreference_resolution": 51.6111, + "eval_f1_for_task892_gap_coreference_resolution": 42.0, + "eval_f1_for_task893_gap_coreference_resolution": 50.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 56.418, + "eval_f1_for_task970_sherliic_textual_entailment": 53.0, + "eval_f1_for_textual_entailment": 38.4167, + "eval_f1_for_title_generation": 31.2706, + "eval_f1_for_word_analogy": 25.0, + "eval_gen_len": 10.8891, + "eval_global_step": 1000, + "eval_loss": 1.2411549091339111, + "eval_rouge1": 46.8497, + "eval_rouge1_for_answerability_classification": 53.3333, + "eval_rouge1_for_cause_effect_classification": 54.4048, + "eval_rouge1_for_coreference_resolution": 45.8815, + "eval_rouge1_for_data_to_text": 53.3139, + "eval_rouge1_for_dialogue_act_recognition": 48.9619, + "eval_rouge1_for_grammar_error_correction": 70.0065, + "eval_rouge1_for_keyword_tagging": 57.8855, + "eval_rouge1_for_overlap_extraction": 38.985, + "eval_rouge1_for_question_rewriting": 70.4781, + "eval_rouge1_for_task020_mctaco_answerability_classification": 51.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 42.9, + "eval_rouge1_for_task034_winogrande_question_rewriting": 91.9931, + "eval_rouge1_for_task035_winogrande_question_rewriting": 84.9943, + "eval_rouge1_for_task036_qasc_keyword_tagging": 78.1455, + "eval_rouge1_for_task039_qasc_overlap_extraction": 31.7556, + "eval_rouge1_for_task050_multirc_answerability_classification": 49.0, + "eval_rouge1_for_task102_commongen_data_to_text": 67.3224, + "eval_rouge1_for_task1152_bard_word_analogy": 19.0, + "eval_rouge1_for_task1153_bard_word_analogy": 19.0, + "eval_rouge1_for_task1154_bard_word_analogy": 16.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 24.0, + "eval_rouge1_for_task1157_bard_word_analogy": 46.0, + "eval_rouge1_for_task1158_bard_word_analogy": 13.0, + "eval_rouge1_for_task1159_bard_word_analogy": 13.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 33.1847, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.962, + "eval_rouge1_for_task121_zest_question_rewriting": 46.9608, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 13.6427, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.1473, + "eval_rouge1_for_task1356_xlsum_title_generation": 15.1246, + "eval_rouge1_for_task1358_xlsum_title_generation": 36.1937, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 21.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 54.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 59.9, + "eval_rouge1_for_task1407_dart_data_to_text": 34.7902, + "eval_rouge1_for_task1409_dart_data_to_text": 50.705, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 53.1751, + "eval_rouge1_for_task1439_doqa_answerability_classification": 51.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 22.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 26.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 33.8481, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.8378, + "eval_rouge1_for_task1562_zest_question_rewriting": 48.737, + "eval_rouge1_for_task1586_scifact_title_generation": 34.3705, + "eval_rouge1_for_task1598_nyc_data_to_text": 51.2371, + "eval_rouge1_for_task1612_sick_textual_entailment": 34.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.7346, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 90.189, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 58.0, + "eval_rouge1_for_task1659_billsum_title_generation": 33.9567, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 63.6174, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 59.0209, + "eval_rouge1_for_task190_snli_textual_entailment": 41.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 49.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task219_rocstories_title_generation": 29.4119, + "eval_rouge1_for_task220_rocstories_title_generation": 52.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 47.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 48.7333, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 46.2145, + "eval_rouge1_for_task288_gigaword_title_generation": 29.743, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 59.5214, + "eval_rouge1_for_task329_gap_coreference_resolution": 35.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 53.869, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 81.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 87.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 30.1637, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 35.8333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 79.7805, + "eval_rouge1_for_task418_persent_title_generation": 26.8838, + "eval_rouge1_for_task442_com_qa_question_rewriting": 73.5249, + "eval_rouge1_for_task500_scruples_title_generation": 15.6582, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 38.9048, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 39.7711, + "eval_rouge1_for_task602_wikitext_title_generation": 13.5901, + "eval_rouge1_for_task613_liar_keyword_tagging": 26.7667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 32.3365, + "eval_rouge1_for_task619_ohsumed_title_generation": 42.7873, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 41.6868, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 37.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 47.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 92.8286, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 5.1667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.6426, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 66.7816, + "eval_rouge1_for_task677_ollie_data_to_text": 28.2565, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 30.6564, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.4881, + "eval_rouge1_for_task769_qed_title_generation": 79.6333, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 41.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 33.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 51.7, + "eval_rouge1_for_task892_gap_coreference_resolution": 42.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 50.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 57.9448, + "eval_rouge1_for_task970_sherliic_textual_entailment": 53.0, + "eval_rouge1_for_textual_entailment": 41.6806, + "eval_rouge1_for_title_generation": 33.4746, + "eval_rouge1_for_word_analogy": 25.0, + "eval_rougeL": 45.4528, + "eval_rougeL_for_answerability_classification": 53.3333, + "eval_rougeL_for_cause_effect_classification": 53.777, + "eval_rougeL_for_coreference_resolution": 45.8815, + "eval_rougeL_for_data_to_text": 46.0794, + "eval_rougeL_for_dialogue_act_recognition": 48.9619, + "eval_rougeL_for_grammar_error_correction": 68.69, + "eval_rougeL_for_keyword_tagging": 57.4014, + "eval_rougeL_for_overlap_extraction": 37.8863, + "eval_rougeL_for_question_rewriting": 66.8011, + "eval_rougeL_for_task020_mctaco_answerability_classification": 51.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 42.9, + "eval_rougeL_for_task034_winogrande_question_rewriting": 91.9931, + "eval_rougeL_for_task035_winogrande_question_rewriting": 83.2617, + "eval_rougeL_for_task036_qasc_keyword_tagging": 77.8455, + "eval_rougeL_for_task039_qasc_overlap_extraction": 31.7556, + "eval_rougeL_for_task050_multirc_answerability_classification": 49.0, + "eval_rougeL_for_task102_commongen_data_to_text": 56.4194, + "eval_rougeL_for_task1152_bard_word_analogy": 19.0, + "eval_rougeL_for_task1153_bard_word_analogy": 19.0, + "eval_rougeL_for_task1154_bard_word_analogy": 16.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 24.0, + "eval_rougeL_for_task1157_bard_word_analogy": 46.0, + "eval_rougeL_for_task1158_bard_word_analogy": 13.0, + "eval_rougeL_for_task1159_bard_word_analogy": 13.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 26.7174, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.7521, + "eval_rougeL_for_task121_zest_question_rewriting": 40.3679, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 12.8881, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.0717, + "eval_rougeL_for_task1356_xlsum_title_generation": 12.4171, + "eval_rougeL_for_task1358_xlsum_title_generation": 30.1745, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 21.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 54.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 59.9, + "eval_rougeL_for_task1407_dart_data_to_text": 30.4571, + "eval_rougeL_for_task1409_dart_data_to_text": 42.5245, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 51.5331, + "eval_rougeL_for_task1439_doqa_answerability_classification": 51.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 22.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 26.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 30.4265, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.8468, + "eval_rougeL_for_task1562_zest_question_rewriting": 42.6456, + "eval_rougeL_for_task1586_scifact_title_generation": 27.5469, + "eval_rougeL_for_task1598_nyc_data_to_text": 40.6662, + "eval_rougeL_for_task1612_sick_textual_entailment": 34.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.4316, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 88.5907, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 58.0, + "eval_rougeL_for_task1659_billsum_title_generation": 28.9214, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 63.6174, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 52.4842, + "eval_rougeL_for_task190_snli_textual_entailment": 41.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 49.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task219_rocstories_title_generation": 29.1262, + "eval_rougeL_for_task220_rocstories_title_generation": 52.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 47.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 48.7333, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 44.017, + "eval_rougeL_for_task288_gigaword_title_generation": 25.6274, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 59.5214, + "eval_rougeL_for_task329_gap_coreference_resolution": 35.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 53.869, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 81.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 87.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 28.9962, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 35.8333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 66.4055, + "eval_rougeL_for_task418_persent_title_generation": 22.7998, + "eval_rougeL_for_task442_com_qa_question_rewriting": 69.1713, + "eval_rougeL_for_task500_scruples_title_generation": 14.0498, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 38.3604, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 37.2889, + "eval_rougeL_for_task602_wikitext_title_generation": 13.4648, + "eval_rougeL_for_task613_liar_keyword_tagging": 26.7667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 29.1093, + "eval_rougeL_for_task619_ohsumed_title_generation": 37.4826, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 39.5663, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 37.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 47.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 92.8286, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 5.1667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 79.8612, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 64.8509, + "eval_rougeL_for_task677_ollie_data_to_text": 23.6372, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 25.8875, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.4077, + "eval_rougeL_for_task769_qed_title_generation": 79.6333, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 41.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 33.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 51.7, + "eval_rougeL_for_task892_gap_coreference_resolution": 42.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 50.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 44.9501, + "eval_rougeL_for_task970_sherliic_textual_entailment": 53.0, + "eval_rougeL_for_textual_entailment": 41.6806, + "eval_rougeL_for_title_generation": 30.4181, + "eval_rougeL_for_word_analogy": 25.0, + "eval_runtime": 752.7221, + "eval_samples_per_second": 15.823, + "eval_steps_per_second": 0.496, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 1.3134, + "step": 1500 + }, + { + "epoch": 0.34, + "eval_exact_match": 27.7078, + "eval_exact_match_for_answerability_classification": 50.1538, + "eval_exact_match_for_cause_effect_classification": 34.8571, + "eval_exact_match_for_coreference_resolution": 33.8571, + "eval_exact_match_for_data_to_text": 6.4165, + "eval_exact_match_for_dialogue_act_recognition": 43.0, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 41.6, + "eval_exact_match_for_overlap_extraction": 10.5, + "eval_exact_match_for_question_rewriting": 2.4545, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 32.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 3.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 6.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 42.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 21.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 20.0, + "eval_exact_match_for_task1153_bard_word_analogy": 16.0, + "eval_exact_match_for_task1154_bard_word_analogy": 19.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 26.0, + "eval_exact_match_for_task1157_bard_word_analogy": 48.0, + "eval_exact_match_for_task1158_bard_word_analogy": 27.0, + "eval_exact_match_for_task1159_bard_word_analogy": 13.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 6.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 4.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 48.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 2.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 13.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 26.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 53.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 49.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 44.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 17.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task219_rocstories_title_generation": 6.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 52.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 47.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 8.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 32.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 52.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 46.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 48.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 23.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 9.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 3.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 15.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 2.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 11.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 47.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 90.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 1.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 31.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 70.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 22.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 33.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 45.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 30.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 40.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 39.25, + "eval_exact_match_for_title_generation": 8.0717, + "eval_exact_match_for_word_analogy": 27.375, + "eval_f1": 45.0008, + "eval_f1_for_answerability_classification": 52.7179, + "eval_f1_for_cause_effect_classification": 52.0137, + "eval_f1_for_coreference_resolution": 42.9702, + "eval_f1_for_data_to_text": 50.2462, + "eval_f1_for_dialogue_act_recognition": 46.4286, + "eval_f1_for_grammar_error_correction": 61.4425, + "eval_f1_for_keyword_tagging": 54.3221, + "eval_f1_for_overlap_extraction": 39.5818, + "eval_f1_for_question_rewriting": 68.6847, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 34.8333, + "eval_f1_for_task034_winogrande_question_rewriting": 92.0622, + "eval_f1_for_task035_winogrande_question_rewriting": 83.2173, + "eval_f1_for_task036_qasc_keyword_tagging": 73.3273, + "eval_f1_for_task039_qasc_overlap_extraction": 31.0746, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 50.6293, + "eval_f1_for_task1152_bard_word_analogy": 20.0, + "eval_f1_for_task1153_bard_word_analogy": 25.3333, + "eval_f1_for_task1154_bard_word_analogy": 19.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 26.0, + "eval_f1_for_task1157_bard_word_analogy": 48.0, + "eval_f1_for_task1158_bard_word_analogy": 27.0, + "eval_f1_for_task1159_bard_word_analogy": 13.0, + "eval_f1_for_task1161_coda_19_title_generation": 31.5685, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.4082, + "eval_f1_for_task121_zest_question_rewriting": 44.1609, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.3418, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.7293, + "eval_f1_for_task1356_xlsum_title_generation": 14.5341, + "eval_f1_for_task1358_xlsum_title_generation": 30.6659, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 48.0, + "eval_f1_for_task1407_dart_data_to_text": 37.0887, + "eval_f1_for_task1409_dart_data_to_text": 48.2419, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 38.6607, + "eval_f1_for_task1439_doqa_answerability_classification": 50.0, + "eval_f1_for_task1442_doqa_answerability_classification": 50.0, + "eval_f1_for_task1516_imppres_textual_entailment": 13.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 26.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 53.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 34.7753, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.2242, + "eval_f1_for_task1562_zest_question_rewriting": 47.695, + "eval_f1_for_task1586_scifact_title_generation": 32.9973, + "eval_f1_for_task1598_nyc_data_to_text": 48.9427, + "eval_f1_for_task1612_sick_textual_entailment": 49.0, + "eval_f1_for_task1615_sick_textual_entailment": 34.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.5971, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 91.6647, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 32.4022, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 65.5721, + "eval_f1_for_task1728_web_nlg_data_to_text": 57.9402, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 33.0, + "eval_f1_for_task201_multinli_textual_entailment": 33.0, + "eval_f1_for_task202_multinli_textual_entailment": 34.0, + "eval_f1_for_task219_rocstories_title_generation": 24.1811, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 50.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 52.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 57.9, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 48.089, + "eval_f1_for_task288_gigaword_title_generation": 28.176, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 52.7667, + "eval_f1_for_task329_gap_coreference_resolution": 32.0, + "eval_f1_for_task330_gap_coreference_resolution": 54.3444, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 76.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 82.0, + "eval_f1_for_task392_cod3s_cause_effect_classification": 82.6667, + "eval_f1_for_task393_cod3s_cause_effect_classification": 30.7718, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 27.5, + "eval_f1_for_task402_grailqa_question_rewriting": 78.7363, + "eval_f1_for_task418_persent_title_generation": 23.4478, + "eval_f1_for_task442_com_qa_question_rewriting": 68.2992, + "eval_f1_for_task500_scruples_title_generation": 14.9628, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 38.9105, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 36.7379, + "eval_f1_for_task602_wikitext_title_generation": 13.907, + "eval_f1_for_task613_liar_keyword_tagging": 18.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 18.6571, + "eval_f1_for_task619_ohsumed_title_generation": 42.3363, + "eval_f1_for_task620_ohsumed_keyword_tagging": 34.5262, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 47.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 95.4238, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 1.0, + "eval_f1_for_task670_ambigqa_question_rewriting": 76.9551, + "eval_f1_for_task671_ambigqa_question_rewriting": 65.6708, + "eval_f1_for_task677_ollie_data_to_text": 26.7888, + "eval_f1_for_task738_perspectrum_textual_entailment": 31.0, + "eval_f1_for_task743_eurlex_title_generation": 30.638, + "eval_f1_for_task760_msr_sqa_data_to_text": 4.1656, + "eval_f1_for_task769_qed_title_generation": 76.673, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 22.0, + "eval_f1_for_task890_gwsd_textual_entailment": 33.0, + "eval_f1_for_task891_gap_coreference_resolution": 55.6667, + "eval_f1_for_task892_gap_coreference_resolution": 30.0, + "eval_f1_for_task893_gap_coreference_resolution": 40.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 52.6544, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 39.25, + "eval_f1_for_title_generation": 31.8963, + "eval_f1_for_word_analogy": 28.5417, + "eval_gen_len": 10.3341, + "eval_global_step": 1500, + "eval_loss": 1.230643391609192, + "eval_rouge1": 46.7104, + "eval_rouge1_for_answerability_classification": 52.7179, + "eval_rouge1_for_cause_effect_classification": 52.2789, + "eval_rouge1_for_coreference_resolution": 43.5837, + "eval_rouge1_for_data_to_text": 52.7728, + "eval_rouge1_for_dialogue_act_recognition": 49.0905, + "eval_rouge1_for_grammar_error_correction": 65.0394, + "eval_rouge1_for_keyword_tagging": 57.6952, + "eval_rouge1_for_overlap_extraction": 42.8242, + "eval_rouge1_for_question_rewriting": 70.4236, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 35.2333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.101, + "eval_rouge1_for_task035_winogrande_question_rewriting": 84.0049, + "eval_rouge1_for_task036_qasc_keyword_tagging": 76.1273, + "eval_rouge1_for_task039_qasc_overlap_extraction": 36.4746, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 63.4598, + "eval_rouge1_for_task1152_bard_word_analogy": 20.0, + "eval_rouge1_for_task1153_bard_word_analogy": 25.3333, + "eval_rouge1_for_task1154_bard_word_analogy": 19.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 26.0, + "eval_rouge1_for_task1157_bard_word_analogy": 48.0, + "eval_rouge1_for_task1158_bard_word_analogy": 27.0, + "eval_rouge1_for_task1159_bard_word_analogy": 13.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 35.8717, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.7423, + "eval_rouge1_for_task121_zest_question_rewriting": 46.845, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.6904, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 44.2804, + "eval_rouge1_for_task1356_xlsum_title_generation": 17.4266, + "eval_rouge1_for_task1358_xlsum_title_generation": 35.6985, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 53.3, + "eval_rouge1_for_task1407_dart_data_to_text": 36.8485, + "eval_rouge1_for_task1409_dart_data_to_text": 48.8276, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 42.8462, + "eval_rouge1_for_task1439_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 13.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 26.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 53.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 38.0938, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.2326, + "eval_rouge1_for_task1562_zest_question_rewriting": 51.8424, + "eval_rouge1_for_task1586_scifact_title_generation": 35.8938, + "eval_rouge1_for_task1598_nyc_data_to_text": 49.7143, + "eval_rouge1_for_task1612_sick_textual_entailment": 49.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.8675, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 91.99, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 34.1293, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 65.5721, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 59.9921, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task219_rocstories_title_generation": 29.2074, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 52.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 59.0667, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 49.1738, + "eval_rouge1_for_task288_gigaword_title_generation": 30.7307, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 53.1, + "eval_rouge1_for_task329_gap_coreference_resolution": 32.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 54.6333, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 76.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 82.0, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 82.6667, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 30.907, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 33.5, + "eval_rouge1_for_task402_grailqa_question_rewriting": 80.7809, + "eval_rouge1_for_task418_persent_title_generation": 26.7941, + "eval_rouge1_for_task442_com_qa_question_rewriting": 72.0632, + "eval_rouge1_for_task500_scruples_title_generation": 16.1188, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 39.3045, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 38.3187, + "eval_rouge1_for_task602_wikitext_title_generation": 14.6212, + "eval_rouge1_for_task613_liar_keyword_tagging": 29.6667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 20.3788, + "eval_rouge1_for_task619_ohsumed_title_generation": 46.1359, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 36.7582, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 47.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.9238, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 1.5, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 77.7967, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 66.3353, + "eval_rouge1_for_task677_ollie_data_to_text": 29.4485, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 32.2488, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 4.5504, + "eval_rouge1_for_task769_qed_title_generation": 77.073, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 35.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 33.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 55.5667, + "eval_rouge1_for_task892_gap_coreference_resolution": 30.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 40.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 54.4395, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 41.875, + "eval_rouge1_for_title_generation": 34.1938, + "eval_rouge1_for_word_analogy": 28.5417, + "eval_rougeL": 45.317, + "eval_rougeL_for_answerability_classification": 52.7179, + "eval_rougeL_for_cause_effect_classification": 51.7426, + "eval_rougeL_for_coreference_resolution": 43.5837, + "eval_rougeL_for_data_to_text": 45.8589, + "eval_rougeL_for_dialogue_act_recognition": 49.0905, + "eval_rougeL_for_grammar_error_correction": 63.8108, + "eval_rougeL_for_keyword_tagging": 57.213, + "eval_rougeL_for_overlap_extraction": 41.7163, + "eval_rougeL_for_question_rewriting": 66.4664, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 35.2333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 91.7862, + "eval_rougeL_for_task035_winogrande_question_rewriting": 82.8753, + "eval_rougeL_for_task036_qasc_keyword_tagging": 75.5035, + "eval_rougeL_for_task039_qasc_overlap_extraction": 36.4746, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 52.4521, + "eval_rougeL_for_task1152_bard_word_analogy": 20.0, + "eval_rougeL_for_task1153_bard_word_analogy": 25.3333, + "eval_rougeL_for_task1154_bard_word_analogy": 19.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 26.0, + "eval_rougeL_for_task1157_bard_word_analogy": 48.0, + "eval_rougeL_for_task1158_bard_word_analogy": 27.0, + "eval_rougeL_for_task1159_bard_word_analogy": 13.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 29.2844, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.585, + "eval_rougeL_for_task121_zest_question_rewriting": 38.9281, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.1491, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 41.5077, + "eval_rougeL_for_task1356_xlsum_title_generation": 14.815, + "eval_rougeL_for_task1358_xlsum_title_generation": 30.7807, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 53.3, + "eval_rougeL_for_task1407_dart_data_to_text": 32.9419, + "eval_rougeL_for_task1409_dart_data_to_text": 41.9174, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 41.2539, + "eval_rougeL_for_task1439_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 13.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 26.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 53.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 34.057, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.3677, + "eval_rougeL_for_task1562_zest_question_rewriting": 44.5805, + "eval_rougeL_for_task1586_scifact_title_generation": 29.5755, + "eval_rougeL_for_task1598_nyc_data_to_text": 39.2954, + "eval_rougeL_for_task1612_sick_textual_entailment": 49.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.5645, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 91.5473, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 29.1738, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 65.5721, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 53.6806, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task219_rocstories_title_generation": 28.8033, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 52.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 59.0667, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 46.958, + "eval_rougeL_for_task288_gigaword_title_generation": 26.2452, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 53.1, + "eval_rougeL_for_task329_gap_coreference_resolution": 32.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 54.6333, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 76.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 82.0, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 82.6667, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 29.5925, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 33.5, + "eval_rougeL_for_task402_grailqa_question_rewriting": 68.2015, + "eval_rougeL_for_task418_persent_title_generation": 22.8475, + "eval_rougeL_for_task442_com_qa_question_rewriting": 66.9934, + "eval_rougeL_for_task500_scruples_title_generation": 15.1119, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 38.8324, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 35.7072, + "eval_rougeL_for_task602_wikitext_title_generation": 14.4637, + "eval_rougeL_for_task613_liar_keyword_tagging": 29.6667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 17.9393, + "eval_rougeL_for_task619_ohsumed_title_generation": 38.9918, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 34.9711, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 47.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.9238, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 1.5, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 76.1646, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 63.9431, + "eval_rougeL_for_task677_ollie_data_to_text": 24.2726, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 27.9406, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 4.4087, + "eval_rougeL_for_task769_qed_title_generation": 77.073, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 35.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 33.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 55.5667, + "eval_rougeL_for_task892_gap_coreference_resolution": 30.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 40.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 41.5408, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 41.875, + "eval_rougeL_for_title_generation": 31.14, + "eval_rougeL_for_word_analogy": 28.5417, + "eval_runtime": 715.6382, + "eval_samples_per_second": 16.642, + "eval_steps_per_second": 0.521, + "step": 1500 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 1.303, + "step": 2000 + }, + { + "epoch": 0.46, + "eval_exact_match": 28.1444, + "eval_exact_match_for_answerability_classification": 50.2308, + "eval_exact_match_for_cause_effect_classification": 35.7143, + "eval_exact_match_for_coreference_resolution": 37.0714, + "eval_exact_match_for_data_to_text": 7.5061, + "eval_exact_match_for_dialogue_act_recognition": 43.7143, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 41.0, + "eval_exact_match_for_overlap_extraction": 9.5, + "eval_exact_match_for_question_rewriting": 2.7273, + "eval_exact_match_for_task020_mctaco_answerability_classification": 53.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 39.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 6.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 7.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 42.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 18.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 56.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 15.0, + "eval_exact_match_for_task1153_bard_word_analogy": 13.0, + "eval_exact_match_for_task1154_bard_word_analogy": 15.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 29.0, + "eval_exact_match_for_task1157_bard_word_analogy": 43.0, + "eval_exact_match_for_task1158_bard_word_analogy": 26.0, + "eval_exact_match_for_task1159_bard_word_analogy": 14.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 9.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 29.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 37.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 35.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 41.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 45.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 52.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 48.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 4.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 55.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 54.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 47.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 29.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 31.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 36.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 38.0, + "eval_exact_match_for_task219_rocstories_title_generation": 5.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 48.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 51.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 41.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 1.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 5.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 34.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 48.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 42.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 58.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 25.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 0.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 51.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 5.0, + "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, + "eval_exact_match_for_task613_liar_keyword_tagging": 13.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 13.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 37.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 49.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 88.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 7.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 7.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 79.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 36.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 46.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 34.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 55.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 38.9167, + "eval_exact_match_for_title_generation": 8.6323, + "eval_exact_match_for_word_analogy": 25.625, + "eval_f1": 44.8802, + "eval_f1_for_answerability_classification": 52.7949, + "eval_f1_for_cause_effect_classification": 52.4856, + "eval_f1_for_coreference_resolution": 45.8404, + "eval_f1_for_data_to_text": 50.4627, + "eval_f1_for_dialogue_act_recognition": 47.2857, + "eval_f1_for_grammar_error_correction": 58.8152, + "eval_f1_for_keyword_tagging": 53.6079, + "eval_f1_for_overlap_extraction": 35.339, + "eval_f1_for_question_rewriting": 66.5889, + "eval_f1_for_task020_mctaco_answerability_classification": 53.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 40.8333, + "eval_f1_for_task034_winogrande_question_rewriting": 92.2124, + "eval_f1_for_task035_winogrande_question_rewriting": 84.5396, + "eval_f1_for_task036_qasc_keyword_tagging": 71.835, + "eval_f1_for_task039_qasc_overlap_extraction": 25.8818, + "eval_f1_for_task050_multirc_answerability_classification": 56.0, + "eval_f1_for_task102_commongen_data_to_text": 53.6125, + "eval_f1_for_task1152_bard_word_analogy": 15.0, + "eval_f1_for_task1153_bard_word_analogy": 15.0, + "eval_f1_for_task1154_bard_word_analogy": 15.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 29.0, + "eval_f1_for_task1157_bard_word_analogy": 43.0, + "eval_f1_for_task1158_bard_word_analogy": 26.0, + "eval_f1_for_task1159_bard_word_analogy": 14.0, + "eval_f1_for_task1161_coda_19_title_generation": 31.8823, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.5609, + "eval_f1_for_task121_zest_question_rewriting": 42.8718, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.2648, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.0058, + "eval_f1_for_task1356_xlsum_title_generation": 15.3165, + "eval_f1_for_task1358_xlsum_title_generation": 30.0794, + "eval_f1_for_task1385_anli_textual_entailment": 29.0, + "eval_f1_for_task1386_anli_textual_entailment": 37.0, + "eval_f1_for_task1387_anli_textual_entailment": 35.0, + "eval_f1_for_task1388_cb_textual_entailment": 41.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 45.0, + "eval_f1_for_task1407_dart_data_to_text": 34.0851, + "eval_f1_for_task1409_dart_data_to_text": 48.8288, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 33.7807, + "eval_f1_for_task1439_doqa_answerability_classification": 52.0, + "eval_f1_for_task1442_doqa_answerability_classification": 48.0, + "eval_f1_for_task1516_imppres_textual_entailment": 4.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 55.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 33.1414, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.8497, + "eval_f1_for_task1562_zest_question_rewriting": 45.244, + "eval_f1_for_task1586_scifact_title_generation": 33.2039, + "eval_f1_for_task1598_nyc_data_to_text": 49.7135, + "eval_f1_for_task1612_sick_textual_entailment": 34.0, + "eval_f1_for_task1615_sick_textual_entailment": 34.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.4535, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 94.6535, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 47.0, + "eval_f1_for_task1659_billsum_title_generation": 34.3967, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 71.7072, + "eval_f1_for_task1728_web_nlg_data_to_text": 55.514, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 31.0, + "eval_f1_for_task201_multinli_textual_entailment": 36.0, + "eval_f1_for_task202_multinli_textual_entailment": 38.0, + "eval_f1_for_task219_rocstories_title_generation": 20.2477, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_f1_for_task232_iirc_answerability_classification": 49.0, + "eval_f1_for_task233_iirc_answerability_classification": 48.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 51.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 55.4381, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 44.7961, + "eval_f1_for_task288_gigaword_title_generation": 26.6517, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 45.1333, + "eval_f1_for_task329_gap_coreference_resolution": 34.0, + "eval_f1_for_task330_gap_coreference_resolution": 58.8349, + "eval_f1_for_task349_squad2.0_answerability_classification": 48.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 80.6667, + "eval_f1_for_task392_cod3s_cause_effect_classification": 86.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.7555, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 31.5, + "eval_f1_for_task402_grailqa_question_rewriting": 75.5013, + "eval_f1_for_task418_persent_title_generation": 23.0507, + "eval_f1_for_task442_com_qa_question_rewriting": 65.7821, + "eval_f1_for_task500_scruples_title_generation": 13.7914, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 37.2003, + "eval_f1_for_task520_aquamuse_answerability_classification": 51.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 38.3379, + "eval_f1_for_task602_wikitext_title_generation": 13.5179, + "eval_f1_for_task613_liar_keyword_tagging": 16.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 18.9769, + "eval_f1_for_task619_ohsumed_title_generation": 41.907, + "eval_f1_for_task620_ohsumed_keyword_tagging": 36.0905, + "eval_f1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_f1_for_task640_e_snli_textual_entailment": 37.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 49.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.781, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 7.0, + "eval_f1_for_task670_ambigqa_question_rewriting": 72.8929, + "eval_f1_for_task671_ambigqa_question_rewriting": 55.4129, + "eval_f1_for_task677_ollie_data_to_text": 26.3688, + "eval_f1_for_task738_perspectrum_textual_entailment": 7.0, + "eval_f1_for_task743_eurlex_title_generation": 31.1774, + "eval_f1_for_task760_msr_sqa_data_to_text": 4.6676, + "eval_f1_for_task769_qed_title_generation": 86.0762, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_f1_for_task890_gwsd_textual_entailment": 36.0, + "eval_f1_for_task891_gap_coreference_resolution": 55.319, + "eval_f1_for_task892_gap_coreference_resolution": 34.0, + "eval_f1_for_task893_gap_coreference_resolution": 55.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_f1_for_task957_e2e_data_to_text": 52.8323, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 38.9167, + "eval_f1_for_title_generation": 32.0673, + "eval_f1_for_word_analogy": 25.875, + "eval_gen_len": 10.2966, + "eval_global_step": 2000, + "eval_loss": 1.2161450386047363, + "eval_rouge1": 46.8668, + "eval_rouge1_for_answerability_classification": 52.7949, + "eval_rouge1_for_cause_effect_classification": 52.7247, + "eval_rouge1_for_coreference_resolution": 46.4952, + "eval_rouge1_for_data_to_text": 52.8445, + "eval_rouge1_for_dialogue_act_recognition": 49.8667, + "eval_rouge1_for_grammar_error_correction": 63.0965, + "eval_rouge1_for_keyword_tagging": 58.6053, + "eval_rouge1_for_overlap_extraction": 38.468, + "eval_rouge1_for_question_rewriting": 68.3207, + "eval_rouge1_for_task020_mctaco_answerability_classification": 53.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 40.7333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.2542, + "eval_rouge1_for_task035_winogrande_question_rewriting": 85.3095, + "eval_rouge1_for_task036_qasc_keyword_tagging": 77.4683, + "eval_rouge1_for_task039_qasc_overlap_extraction": 31.4485, + "eval_rouge1_for_task050_multirc_answerability_classification": 56.0, + "eval_rouge1_for_task102_commongen_data_to_text": 64.8163, + "eval_rouge1_for_task1152_bard_word_analogy": 15.0, + "eval_rouge1_for_task1153_bard_word_analogy": 15.0, + "eval_rouge1_for_task1154_bard_word_analogy": 15.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 29.0, + "eval_rouge1_for_task1157_bard_word_analogy": 43.0, + "eval_rouge1_for_task1158_bard_word_analogy": 26.0, + "eval_rouge1_for_task1159_bard_word_analogy": 14.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 35.9287, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.7183, + "eval_rouge1_for_task121_zest_question_rewriting": 45.4456, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.2878, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.2137, + "eval_rouge1_for_task1356_xlsum_title_generation": 18.0335, + "eval_rouge1_for_task1358_xlsum_title_generation": 34.2744, + "eval_rouge1_for_task1385_anli_textual_entailment": 29.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 37.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 35.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 50.4, + "eval_rouge1_for_task1407_dart_data_to_text": 34.3093, + "eval_rouge1_for_task1409_dart_data_to_text": 49.2385, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 39.0613, + "eval_rouge1_for_task1439_doqa_answerability_classification": 52.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 48.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 4.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 55.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 35.8875, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.1317, + "eval_rouge1_for_task1562_zest_question_rewriting": 49.4277, + "eval_rouge1_for_task1586_scifact_title_generation": 37.1211, + "eval_rouge1_for_task1598_nyc_data_to_text": 50.0854, + "eval_rouge1_for_task1612_sick_textual_entailment": 34.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.6556, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 94.8678, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 47.0, + "eval_rouge1_for_task1659_billsum_title_generation": 36.3157, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 71.7072, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 58.1014, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 31.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 36.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 38.0, + "eval_rouge1_for_task219_rocstories_title_generation": 26.4802, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 48.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 51.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 56.2833, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 45.4874, + "eval_rouge1_for_task288_gigaword_title_generation": 29.0127, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 44.9667, + "eval_rouge1_for_task329_gap_coreference_resolution": 34.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 58.7905, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 48.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 80.6667, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 86.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.8881, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 39.8333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 77.3483, + "eval_rouge1_for_task418_persent_title_generation": 27.1423, + "eval_rouge1_for_task442_com_qa_question_rewriting": 69.7498, + "eval_rouge1_for_task500_scruples_title_generation": 15.0763, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 37.8607, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 51.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 39.2845, + "eval_rouge1_for_task602_wikitext_title_generation": 14.8934, + "eval_rouge1_for_task613_liar_keyword_tagging": 28.4667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 20.518, + "eval_rouge1_for_task619_ohsumed_title_generation": 45.4542, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 42.8106, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 37.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 49.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.281, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 7.5, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 73.7092, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 56.6955, + "eval_rouge1_for_task677_ollie_data_to_text": 28.8678, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 33.1088, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 5.1402, + "eval_rouge1_for_task769_qed_title_generation": 86.4762, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 35.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 36.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 55.119, + "eval_rouge1_for_task892_gap_coreference_resolution": 34.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 55.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rouge1_for_task957_e2e_data_to_text": 54.8729, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 42.5417, + "eval_rouge1_for_title_generation": 34.4313, + "eval_rouge1_for_word_analogy": 25.875, + "eval_rougeL": 45.4618, + "eval_rougeL_for_answerability_classification": 52.7949, + "eval_rougeL_for_cause_effect_classification": 52.2573, + "eval_rougeL_for_coreference_resolution": 46.4952, + "eval_rougeL_for_data_to_text": 45.9755, + "eval_rougeL_for_dialogue_act_recognition": 49.8667, + "eval_rougeL_for_grammar_error_correction": 62.1537, + "eval_rougeL_for_keyword_tagging": 57.9536, + "eval_rougeL_for_overlap_extraction": 37.1094, + "eval_rougeL_for_question_rewriting": 64.1894, + "eval_rougeL_for_task020_mctaco_answerability_classification": 53.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 40.7333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.0876, + "eval_rougeL_for_task035_winogrande_question_rewriting": 83.7131, + "eval_rougeL_for_task036_qasc_keyword_tagging": 76.9016, + "eval_rougeL_for_task039_qasc_overlap_extraction": 31.4485, + "eval_rougeL_for_task050_multirc_answerability_classification": 56.0, + "eval_rougeL_for_task102_commongen_data_to_text": 56.9056, + "eval_rougeL_for_task1152_bard_word_analogy": 15.0, + "eval_rougeL_for_task1153_bard_word_analogy": 15.0, + "eval_rougeL_for_task1154_bard_word_analogy": 15.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 29.0, + "eval_rougeL_for_task1157_bard_word_analogy": 43.0, + "eval_rougeL_for_task1158_bard_word_analogy": 26.0, + "eval_rougeL_for_task1159_bard_word_analogy": 14.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 29.0435, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.561, + "eval_rougeL_for_task121_zest_question_rewriting": 37.7291, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.5813, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 38.9444, + "eval_rougeL_for_task1356_xlsum_title_generation": 15.3512, + "eval_rougeL_for_task1358_xlsum_title_generation": 28.5038, + "eval_rougeL_for_task1385_anli_textual_entailment": 29.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 37.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 35.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 50.4, + "eval_rougeL_for_task1407_dart_data_to_text": 30.4918, + "eval_rougeL_for_task1409_dart_data_to_text": 41.787, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 38.0407, + "eval_rougeL_for_task1439_doqa_answerability_classification": 52.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 48.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 4.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 55.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 32.6491, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.2668, + "eval_rougeL_for_task1562_zest_question_rewriting": 42.6115, + "eval_rougeL_for_task1586_scifact_title_generation": 31.1682, + "eval_rougeL_for_task1598_nyc_data_to_text": 39.9889, + "eval_rougeL_for_task1612_sick_textual_entailment": 34.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.1563, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 93.4729, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 47.0, + "eval_rougeL_for_task1659_billsum_title_generation": 31.5641, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 71.7072, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 51.0216, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 31.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 36.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 38.0, + "eval_rougeL_for_task219_rocstories_title_generation": 26.2579, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 48.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 51.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 56.2833, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 42.7703, + "eval_rougeL_for_task288_gigaword_title_generation": 24.753, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 44.9667, + "eval_rougeL_for_task329_gap_coreference_resolution": 34.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 58.7905, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 48.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 80.6667, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 86.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.2985, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 39.8333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 65.4037, + "eval_rougeL_for_task418_persent_title_generation": 22.9958, + "eval_rougeL_for_task442_com_qa_question_rewriting": 64.2265, + "eval_rougeL_for_task500_scruples_title_generation": 13.8713, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 37.5312, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 51.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 37.8714, + "eval_rougeL_for_task602_wikitext_title_generation": 14.6637, + "eval_rougeL_for_task613_liar_keyword_tagging": 28.4667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 18.836, + "eval_rougeL_for_task619_ohsumed_title_generation": 37.8063, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 40.1187, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 37.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 49.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.281, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 7.5, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 71.9272, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 52.7232, + "eval_rougeL_for_task677_ollie_data_to_text": 23.0643, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 28.4916, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 4.8385, + "eval_rougeL_for_task769_qed_title_generation": 86.4762, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 35.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 36.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 55.119, + "eval_rougeL_for_task892_gap_coreference_resolution": 34.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 55.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rougeL_for_task957_e2e_data_to_text": 41.7676, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 42.5417, + "eval_rougeL_for_title_generation": 31.4032, + "eval_rougeL_for_word_analogy": 25.875, + "eval_runtime": 671.1073, + "eval_samples_per_second": 17.747, + "eval_steps_per_second": 0.556, + "step": 2000 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 1.277, + "step": 2500 + }, + { + "epoch": 0.57, + "eval_exact_match": 27.7078, + "eval_exact_match_for_answerability_classification": 49.5385, + "eval_exact_match_for_cause_effect_classification": 36.1429, + "eval_exact_match_for_coreference_resolution": 34.4286, + "eval_exact_match_for_data_to_text": 6.0533, + "eval_exact_match_for_dialogue_act_recognition": 46.0, + "eval_exact_match_for_grammar_error_correction": 8.0, + "eval_exact_match_for_keyword_tagging": 40.2, + "eval_exact_match_for_overlap_extraction": 9.5, + "eval_exact_match_for_question_rewriting": 2.0909, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 36.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 6.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 37.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 19.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 16.0, + "eval_exact_match_for_task1153_bard_word_analogy": 11.0, + "eval_exact_match_for_task1154_bard_word_analogy": 16.0, + "eval_exact_match_for_task1155_bard_word_analogy": 53.0, + "eval_exact_match_for_task1156_bard_word_analogy": 23.0, + "eval_exact_match_for_task1157_bard_word_analogy": 38.0, + "eval_exact_match_for_task1158_bard_word_analogy": 22.0, + "eval_exact_match_for_task1159_bard_word_analogy": 19.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 7.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 4.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 45.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 31.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 36.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 31.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 55.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 55.0, + "eval_exact_match_for_task1407_dart_data_to_text": 1.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 40.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 16.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 35.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 43.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 7.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 5.0, + "eval_exact_match_for_task190_snli_textual_entailment": 49.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 31.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 38.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 32.0, + "eval_exact_match_for_task219_rocstories_title_generation": 1.0, + "eval_exact_match_for_task220_rocstories_title_generation": 52.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 39.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 5.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 52.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 46.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 47.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 56.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 25.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 51.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 4.0, + "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, + "eval_exact_match_for_task613_liar_keyword_tagging": 14.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 13.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 30.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 87.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 6.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 27.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 80.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 27.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 34.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 46.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 39.1667, + "eval_exact_match_for_title_generation": 8.5202, + "eval_exact_match_for_word_analogy": 24.75, + "eval_f1": 45.0328, + "eval_f1_for_answerability_classification": 52.1026, + "eval_f1_for_cause_effect_classification": 52.9177, + "eval_f1_for_coreference_resolution": 42.8659, + "eval_f1_for_data_to_text": 50.8279, + "eval_f1_for_dialogue_act_recognition": 49.6429, + "eval_f1_for_grammar_error_correction": 66.5628, + "eval_f1_for_keyword_tagging": 53.7176, + "eval_f1_for_overlap_extraction": 36.3256, + "eval_f1_for_question_rewriting": 68.1383, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 38.3333, + "eval_f1_for_task034_winogrande_question_rewriting": 92.2625, + "eval_f1_for_task035_winogrande_question_rewriting": 86.4487, + "eval_f1_for_task036_qasc_keyword_tagging": 68.803, + "eval_f1_for_task039_qasc_overlap_extraction": 33.7667, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 55.6952, + "eval_f1_for_task1152_bard_word_analogy": 16.0, + "eval_f1_for_task1153_bard_word_analogy": 19.6667, + "eval_f1_for_task1154_bard_word_analogy": 16.0, + "eval_f1_for_task1155_bard_word_analogy": 53.0, + "eval_f1_for_task1156_bard_word_analogy": 23.0, + "eval_f1_for_task1157_bard_word_analogy": 38.0, + "eval_f1_for_task1158_bard_word_analogy": 22.0, + "eval_f1_for_task1159_bard_word_analogy": 19.0, + "eval_f1_for_task1161_coda_19_title_generation": 31.6065, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.1619, + "eval_f1_for_task121_zest_question_rewriting": 45.6066, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 15.5172, + "eval_f1_for_task1344_rte_textual_entailment": 45.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.4542, + "eval_f1_for_task1356_xlsum_title_generation": 17.9964, + "eval_f1_for_task1358_xlsum_title_generation": 30.2637, + "eval_f1_for_task1385_anli_textual_entailment": 31.0, + "eval_f1_for_task1386_anli_textual_entailment": 36.0, + "eval_f1_for_task1387_anli_textual_entailment": 31.0, + "eval_f1_for_task1388_cb_textual_entailment": 55.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 55.0, + "eval_f1_for_task1407_dart_data_to_text": 35.8215, + "eval_f1_for_task1409_dart_data_to_text": 49.785, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 49.1663, + "eval_f1_for_task1439_doqa_answerability_classification": 50.0, + "eval_f1_for_task1442_doqa_answerability_classification": 50.0, + "eval_f1_for_task1516_imppres_textual_entailment": 0.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 40.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 33.0179, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.9593, + "eval_f1_for_task1562_zest_question_rewriting": 48.7718, + "eval_f1_for_task1586_scifact_title_generation": 34.0286, + "eval_f1_for_task1598_nyc_data_to_text": 50.9201, + "eval_f1_for_task1612_sick_textual_entailment": 35.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.1045, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 91.2191, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_f1_for_task1659_billsum_title_generation": 33.5758, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 59.1659, + "eval_f1_for_task1728_web_nlg_data_to_text": 56.7622, + "eval_f1_for_task190_snli_textual_entailment": 49.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 31.0, + "eval_f1_for_task201_multinli_textual_entailment": 38.0, + "eval_f1_for_task202_multinli_textual_entailment": 32.0, + "eval_f1_for_task219_rocstories_title_generation": 15.8415, + "eval_f1_for_task220_rocstories_title_generation": 52.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 49.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 51.6667, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 38.8844, + "eval_f1_for_task288_gigaword_title_generation": 27.5814, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 31.4524, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 59.7778, + "eval_f1_for_task349_squad2.0_answerability_classification": 46.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 82.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 85.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 32.8924, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 31.8333, + "eval_f1_for_task402_grailqa_question_rewriting": 70.4997, + "eval_f1_for_task418_persent_title_generation": 24.3659, + "eval_f1_for_task442_com_qa_question_rewriting": 67.1586, + "eval_f1_for_task500_scruples_title_generation": 15.594, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 36.4492, + "eval_f1_for_task520_aquamuse_answerability_classification": 51.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 35.8284, + "eval_f1_for_task602_wikitext_title_generation": 14.1839, + "eval_f1_for_task613_liar_keyword_tagging": 18.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 19.8649, + "eval_f1_for_task619_ohsumed_title_generation": 41.2394, + "eval_f1_for_task620_ohsumed_keyword_tagging": 37.7087, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 30.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.0762, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 6.2408, + "eval_f1_for_task670_ambigqa_question_rewriting": 78.5301, + "eval_f1_for_task671_ambigqa_question_rewriting": 61.5228, + "eval_f1_for_task677_ollie_data_to_text": 24.4781, + "eval_f1_for_task738_perspectrum_textual_entailment": 27.0, + "eval_f1_for_task743_eurlex_title_generation": 33.2895, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.857, + "eval_f1_for_task769_qed_title_generation": 87.5286, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 47.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 27.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 58.6524, + "eval_f1_for_task892_gap_coreference_resolution": 34.0, + "eval_f1_for_task893_gap_coreference_resolution": 46.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 54.1541, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 39.1667, + "eval_f1_for_title_generation": 32.3788, + "eval_f1_for_word_analogy": 25.8333, + "eval_gen_len": 10.1285, + "eval_global_step": 2500, + "eval_loss": 1.2495967149734497, + "eval_rouge1": 46.7646, + "eval_rouge1_for_answerability_classification": 52.1026, + "eval_rouge1_for_cause_effect_classification": 53.0496, + "eval_rouge1_for_coreference_resolution": 43.543, + "eval_rouge1_for_data_to_text": 53.4925, + "eval_rouge1_for_dialogue_act_recognition": 52.1905, + "eval_rouge1_for_grammar_error_correction": 69.1627, + "eval_rouge1_for_keyword_tagging": 57.4051, + "eval_rouge1_for_overlap_extraction": 39.2689, + "eval_rouge1_for_question_rewriting": 69.7638, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 38.2333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.3391, + "eval_rouge1_for_task035_winogrande_question_rewriting": 87.274, + "eval_rouge1_for_task036_qasc_keyword_tagging": 73.9879, + "eval_rouge1_for_task039_qasc_overlap_extraction": 38.3676, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 69.2169, + "eval_rouge1_for_task1152_bard_word_analogy": 16.0, + "eval_rouge1_for_task1153_bard_word_analogy": 19.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 16.0, + "eval_rouge1_for_task1155_bard_word_analogy": 53.0, + "eval_rouge1_for_task1156_bard_word_analogy": 23.0, + "eval_rouge1_for_task1157_bard_word_analogy": 38.0, + "eval_rouge1_for_task1158_bard_word_analogy": 22.0, + "eval_rouge1_for_task1159_bard_word_analogy": 19.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 35.8496, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.4653, + "eval_rouge1_for_task121_zest_question_rewriting": 47.8837, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 16.0115, + "eval_rouge1_for_task1344_rte_textual_entailment": 45.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.5214, + "eval_rouge1_for_task1356_xlsum_title_generation": 20.5889, + "eval_rouge1_for_task1358_xlsum_title_generation": 34.9649, + "eval_rouge1_for_task1385_anli_textual_entailment": 31.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 36.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 31.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 55.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 62.8333, + "eval_rouge1_for_task1407_dart_data_to_text": 35.8009, + "eval_rouge1_for_task1409_dart_data_to_text": 50.5672, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 51.2598, + "eval_rouge1_for_task1439_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 0.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 40.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 35.6772, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.0655, + "eval_rouge1_for_task1562_zest_question_rewriting": 52.474, + "eval_rouge1_for_task1586_scifact_title_generation": 38.0429, + "eval_rouge1_for_task1598_nyc_data_to_text": 51.5354, + "eval_rouge1_for_task1612_sick_textual_entailment": 35.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.3736, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 91.3278, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_rouge1_for_task1659_billsum_title_generation": 35.811, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 59.1659, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 58.69, + "eval_rouge1_for_task190_snli_textual_entailment": 49.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 31.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 38.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 32.0, + "eval_rouge1_for_task219_rocstories_title_generation": 18.7729, + "eval_rouge1_for_task220_rocstories_title_generation": 52.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 52.3333, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 40.1703, + "eval_rouge1_for_task288_gigaword_title_generation": 30.2407, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 32.1857, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 59.9, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 46.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 82.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 85.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 32.7878, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 38.8333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 72.363, + "eval_rouge1_for_task418_persent_title_generation": 28.7105, + "eval_rouge1_for_task442_com_qa_question_rewriting": 71.199, + "eval_rouge1_for_task500_scruples_title_generation": 17.3137, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 36.8043, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 51.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 37.3346, + "eval_rouge1_for_task602_wikitext_title_generation": 15.3588, + "eval_rouge1_for_task613_liar_keyword_tagging": 26.1167, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 20.893, + "eval_rouge1_for_task619_ohsumed_title_generation": 44.6012, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 42.559, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 30.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.3619, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 7.2124, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 79.2052, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 62.303, + "eval_rouge1_for_task677_ollie_data_to_text": 27.6996, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 35.0229, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 4.1342, + "eval_rouge1_for_task769_qed_title_generation": 87.9286, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 47.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 37.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 58.7381, + "eval_rouge1_for_task892_gap_coreference_resolution": 34.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 46.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 55.9351, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 41.9861, + "eval_rouge1_for_title_generation": 34.6736, + "eval_rouge1_for_word_analogy": 25.8333, + "eval_rougeL": 45.3281, + "eval_rougeL_for_answerability_classification": 52.1026, + "eval_rougeL_for_cause_effect_classification": 52.5185, + "eval_rougeL_for_coreference_resolution": 43.543, + "eval_rougeL_for_data_to_text": 46.342, + "eval_rougeL_for_dialogue_act_recognition": 52.1905, + "eval_rougeL_for_grammar_error_correction": 67.9575, + "eval_rougeL_for_keyword_tagging": 56.7411, + "eval_rougeL_for_overlap_extraction": 38.5063, + "eval_rougeL_for_question_rewriting": 66.2022, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 38.2333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.3391, + "eval_rougeL_for_task035_winogrande_question_rewriting": 85.8188, + "eval_rougeL_for_task036_qasc_keyword_tagging": 72.7885, + "eval_rougeL_for_task039_qasc_overlap_extraction": 38.3676, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 59.3276, + "eval_rougeL_for_task1152_bard_word_analogy": 16.0, + "eval_rougeL_for_task1153_bard_word_analogy": 19.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 16.0, + "eval_rougeL_for_task1155_bard_word_analogy": 53.0, + "eval_rougeL_for_task1156_bard_word_analogy": 23.0, + "eval_rougeL_for_task1157_bard_word_analogy": 38.0, + "eval_rougeL_for_task1158_bard_word_analogy": 22.0, + "eval_rougeL_for_task1159_bard_word_analogy": 19.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 27.9025, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.183, + "eval_rougeL_for_task121_zest_question_rewriting": 42.8401, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 15.258, + "eval_rougeL_for_task1344_rte_textual_entailment": 45.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.1033, + "eval_rougeL_for_task1356_xlsum_title_generation": 17.3989, + "eval_rougeL_for_task1358_xlsum_title_generation": 29.0176, + "eval_rougeL_for_task1385_anli_textual_entailment": 31.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 36.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 31.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 55.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 62.8333, + "eval_rougeL_for_task1407_dart_data_to_text": 31.1587, + "eval_rougeL_for_task1409_dart_data_to_text": 42.2326, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 49.7144, + "eval_rougeL_for_task1439_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 0.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 40.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 31.6787, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.2006, + "eval_rougeL_for_task1562_zest_question_rewriting": 46.2775, + "eval_rougeL_for_task1586_scifact_title_generation": 29.958, + "eval_rougeL_for_task1598_nyc_data_to_text": 40.4453, + "eval_rougeL_for_task1612_sick_textual_entailment": 35.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.069, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 89.9278, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_rougeL_for_task1659_billsum_title_generation": 30.0839, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 59.1659, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 52.5996, + "eval_rougeL_for_task190_snli_textual_entailment": 49.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 31.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 38.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 32.0, + "eval_rougeL_for_task219_rocstories_title_generation": 18.2229, + "eval_rougeL_for_task220_rocstories_title_generation": 52.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 52.3333, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 38.645, + "eval_rougeL_for_task288_gigaword_title_generation": 25.6072, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 32.1857, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 59.9, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 46.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 82.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 85.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.7793, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 38.8333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 60.0499, + "eval_rougeL_for_task418_persent_title_generation": 25.0393, + "eval_rougeL_for_task442_com_qa_question_rewriting": 65.9159, + "eval_rougeL_for_task500_scruples_title_generation": 15.9233, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 36.3536, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 51.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 34.5649, + "eval_rougeL_for_task602_wikitext_title_generation": 15.2544, + "eval_rougeL_for_task613_liar_keyword_tagging": 26.1167, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 18.1832, + "eval_rougeL_for_task619_ohsumed_title_generation": 36.9954, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 40.4385, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 30.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.3619, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 7.2124, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 78.2834, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 60.3438, + "eval_rougeL_for_task677_ollie_data_to_text": 23.1061, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 29.956, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.7807, + "eval_rougeL_for_task769_qed_title_generation": 87.9286, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 47.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 37.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 58.7381, + "eval_rougeL_for_task892_gap_coreference_resolution": 34.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 46.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 43.0046, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 41.9861, + "eval_rougeL_for_title_generation": 31.2053, + "eval_rougeL_for_word_analogy": 25.8333, + "eval_runtime": 619.8944, + "eval_samples_per_second": 19.213, + "eval_steps_per_second": 0.602, + "step": 2500 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 1.2354, + "step": 3000 + }, + { + "epoch": 0.69, + "eval_exact_match": 27.1956, + "eval_exact_match_for_answerability_classification": 49.6154, + "eval_exact_match_for_cause_effect_classification": 35.0, + "eval_exact_match_for_coreference_resolution": 36.9286, + "eval_exact_match_for_data_to_text": 5.6901, + "eval_exact_match_for_dialogue_act_recognition": 46.5714, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 41.8, + "eval_exact_match_for_overlap_extraction": 13.0, + "eval_exact_match_for_question_rewriting": 1.6364, + "eval_exact_match_for_task020_mctaco_answerability_classification": 49.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 34.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 3.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 48.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 26.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 57.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 19.0, + "eval_exact_match_for_task1153_bard_word_analogy": 18.0, + "eval_exact_match_for_task1154_bard_word_analogy": 16.0, + "eval_exact_match_for_task1155_bard_word_analogy": 53.0, + "eval_exact_match_for_task1156_bard_word_analogy": 20.0, + "eval_exact_match_for_task1157_bard_word_analogy": 46.0, + "eval_exact_match_for_task1158_bard_word_analogy": 27.0, + "eval_exact_match_for_task1159_bard_word_analogy": 13.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 4.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 51.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 10.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 5.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 12.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 18.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 60.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 47.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 51.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 29.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 30.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 55.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 35.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 40.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 22.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 13.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task219_rocstories_title_generation": 8.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 43.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 51.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 44.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 4.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 34.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 53.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 49.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 48.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 47.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 30.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 48.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 5.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 14.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 10.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 47.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 29.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 90.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 15.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 3.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 16.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 73.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 52.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 37.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 46.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 36.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 55.0, + "eval_exact_match_for_textual_entailment": 34.5, + "eval_exact_match_for_title_generation": 8.4641, + "eval_exact_match_for_word_analogy": 26.5, + "eval_f1": 44.437, + "eval_f1_for_answerability_classification": 52.1795, + "eval_f1_for_cause_effect_classification": 52.4, + "eval_f1_for_coreference_resolution": 45.8147, + "eval_f1_for_data_to_text": 47.561, + "eval_f1_for_dialogue_act_recognition": 50.1429, + "eval_f1_for_grammar_error_correction": 62.1502, + "eval_f1_for_keyword_tagging": 54.4707, + "eval_f1_for_overlap_extraction": 36.9778, + "eval_f1_for_question_rewriting": 69.5637, + "eval_f1_for_task020_mctaco_answerability_classification": 49.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 36.5, + "eval_f1_for_task034_winogrande_question_rewriting": 92.416, + "eval_f1_for_task035_winogrande_question_rewriting": 86.4712, + "eval_f1_for_task036_qasc_keyword_tagging": 79.5108, + "eval_f1_for_task039_qasc_overlap_extraction": 33.4167, + "eval_f1_for_task050_multirc_answerability_classification": 57.0, + "eval_f1_for_task102_commongen_data_to_text": 52.0441, + "eval_f1_for_task1152_bard_word_analogy": 19.0, + "eval_f1_for_task1153_bard_word_analogy": 21.3333, + "eval_f1_for_task1154_bard_word_analogy": 16.0, + "eval_f1_for_task1155_bard_word_analogy": 53.0, + "eval_f1_for_task1156_bard_word_analogy": 20.0, + "eval_f1_for_task1157_bard_word_analogy": 46.0, + "eval_f1_for_task1158_bard_word_analogy": 27.0, + "eval_f1_for_task1159_bard_word_analogy": 13.0, + "eval_f1_for_task1161_coda_19_title_generation": 32.3158, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.8954, + "eval_f1_for_task121_zest_question_rewriting": 48.2019, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 15.3271, + "eval_f1_for_task1344_rte_textual_entailment": 51.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.6173, + "eval_f1_for_task1356_xlsum_title_generation": 17.71, + "eval_f1_for_task1358_xlsum_title_generation": 29.2909, + "eval_f1_for_task1385_anli_textual_entailment": 10.0, + "eval_f1_for_task1386_anli_textual_entailment": 5.0, + "eval_f1_for_task1387_anli_textual_entailment": 12.0, + "eval_f1_for_task1388_cb_textual_entailment": 18.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 60.0, + "eval_f1_for_task1407_dart_data_to_text": 25.9014, + "eval_f1_for_task1409_dart_data_to_text": 46.1235, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 40.1699, + "eval_f1_for_task1439_doqa_answerability_classification": 47.0, + "eval_f1_for_task1442_doqa_answerability_classification": 51.0, + "eval_f1_for_task1516_imppres_textual_entailment": 29.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 30.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 55.0, + "eval_f1_for_task1540_peer_read_title_generation": 36.0321, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.1305, + "eval_f1_for_task1562_zest_question_rewriting": 48.4496, + "eval_f1_for_task1586_scifact_title_generation": 35.0727, + "eval_f1_for_task1598_nyc_data_to_text": 48.1471, + "eval_f1_for_task1612_sick_textual_entailment": 34.0, + "eval_f1_for_task1615_sick_textual_entailment": 35.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.5426, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_f1_for_task1631_open_pi_data_to_text": 91.3688, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 33.3981, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 69.9571, + "eval_f1_for_task1728_web_nlg_data_to_text": 58.4748, + "eval_f1_for_task190_snli_textual_entailment": 13.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 33.0, + "eval_f1_for_task201_multinli_textual_entailment": 33.0, + "eval_f1_for_task202_multinli_textual_entailment": 34.0, + "eval_f1_for_task219_rocstories_title_generation": 22.7509, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 43.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 49.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 51.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 58.2667, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 40.5388, + "eval_f1_for_task288_gigaword_title_generation": 26.6505, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 39.2857, + "eval_f1_for_task329_gap_coreference_resolution": 34.0, + "eval_f1_for_task330_gap_coreference_resolution": 59.8444, + "eval_f1_for_task349_squad2.0_answerability_classification": 49.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 82.6667, + "eval_f1_for_task392_cod3s_cause_effect_classification": 82.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.3107, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 35.8333, + "eval_f1_for_task402_grailqa_question_rewriting": 71.3813, + "eval_f1_for_task418_persent_title_generation": 26.0834, + "eval_f1_for_task442_com_qa_question_rewriting": 70.7016, + "eval_f1_for_task500_scruples_title_generation": 16.1734, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 35.9262, + "eval_f1_for_task520_aquamuse_answerability_classification": 48.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 35.1964, + "eval_f1_for_task602_wikitext_title_generation": 13.7136, + "eval_f1_for_task613_liar_keyword_tagging": 17.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 20.4893, + "eval_f1_for_task619_ohsumed_title_generation": 42.5042, + "eval_f1_for_task620_ohsumed_keyword_tagging": 33.2524, + "eval_f1_for_task623_ohsumed_keyword_tagging": 47.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 29.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 95.2571, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 16.7333, + "eval_f1_for_task670_ambigqa_question_rewriting": 81.5377, + "eval_f1_for_task671_ambigqa_question_rewriting": 66.9865, + "eval_f1_for_task677_ollie_data_to_text": 18.2776, + "eval_f1_for_task738_perspectrum_textual_entailment": 16.0, + "eval_f1_for_task743_eurlex_title_generation": 33.2553, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.7739, + "eval_f1_for_task769_qed_title_generation": 83.7005, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 52.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 25.0, + "eval_f1_for_task890_gwsd_textual_entailment": 37.0, + "eval_f1_for_task891_gap_coreference_resolution": 55.9857, + "eval_f1_for_task892_gap_coreference_resolution": 36.0, + "eval_f1_for_task893_gap_coreference_resolution": 49.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_f1_for_task957_e2e_data_to_text": 51.5357, + "eval_f1_for_task970_sherliic_textual_entailment": 55.0, + "eval_f1_for_textual_entailment": 34.5, + "eval_f1_for_title_generation": 32.6742, + "eval_f1_for_word_analogy": 26.9167, + "eval_gen_len": 10.6123, + "eval_global_step": 3000, + "eval_loss": 1.2438644170761108, + "eval_rouge1": 47.1554, + "eval_rouge1_for_answerability_classification": 52.1795, + "eval_rouge1_for_cause_effect_classification": 52.5602, + "eval_rouge1_for_coreference_resolution": 46.5044, + "eval_rouge1_for_data_to_text": 50.0727, + "eval_rouge1_for_dialogue_act_recognition": 52.5952, + "eval_rouge1_for_grammar_error_correction": 65.0518, + "eval_rouge1_for_keyword_tagging": 58.5714, + "eval_rouge1_for_overlap_extraction": 39.3807, + "eval_rouge1_for_question_rewriting": 71.2918, + "eval_rouge1_for_task020_mctaco_answerability_classification": 49.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 36.4, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.4522, + "eval_rouge1_for_task035_winogrande_question_rewriting": 87.4453, + "eval_rouge1_for_task036_qasc_keyword_tagging": 81.932, + "eval_rouge1_for_task039_qasc_overlap_extraction": 37.25, + "eval_rouge1_for_task050_multirc_answerability_classification": 57.0, + "eval_rouge1_for_task102_commongen_data_to_text": 64.4868, + "eval_rouge1_for_task1152_bard_word_analogy": 19.0, + "eval_rouge1_for_task1153_bard_word_analogy": 21.3333, + "eval_rouge1_for_task1154_bard_word_analogy": 16.0, + "eval_rouge1_for_task1155_bard_word_analogy": 53.0, + "eval_rouge1_for_task1156_bard_word_analogy": 20.0, + "eval_rouge1_for_task1157_bard_word_analogy": 46.0, + "eval_rouge1_for_task1158_bard_word_analogy": 27.0, + "eval_rouge1_for_task1159_bard_word_analogy": 13.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 35.8512, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.1553, + "eval_rouge1_for_task121_zest_question_rewriting": 50.3717, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.9672, + "eval_rouge1_for_task1344_rte_textual_entailment": 51.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 44.2874, + "eval_rouge1_for_task1356_xlsum_title_generation": 20.8158, + "eval_rouge1_for_task1358_xlsum_title_generation": 33.4664, + "eval_rouge1_for_task1385_anli_textual_entailment": 31.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 31.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 38.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 65.1667, + "eval_rouge1_for_task1407_dart_data_to_text": 25.8937, + "eval_rouge1_for_task1409_dart_data_to_text": 46.5484, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 42.7847, + "eval_rouge1_for_task1439_doqa_answerability_classification": 47.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 51.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 29.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 30.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 55.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 38.3002, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.3188, + "eval_rouge1_for_task1562_zest_question_rewriting": 52.9469, + "eval_rouge1_for_task1586_scifact_title_generation": 38.6695, + "eval_rouge1_for_task1598_nyc_data_to_text": 50.0137, + "eval_rouge1_for_task1612_sick_textual_entailment": 34.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.8118, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 91.4774, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 35.4423, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 69.9571, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 60.038, + "eval_rouge1_for_task190_snli_textual_entailment": 13.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task219_rocstories_title_generation": 28.2087, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 43.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 51.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 58.4333, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 41.5113, + "eval_rouge1_for_task288_gigaword_title_generation": 28.9967, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 40.119, + "eval_rouge1_for_task329_gap_coreference_resolution": 34.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 60.1333, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 49.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 82.6667, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 82.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.1633, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 44.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 74.0176, + "eval_rouge1_for_task418_persent_title_generation": 29.4233, + "eval_rouge1_for_task442_com_qa_question_rewriting": 73.9783, + "eval_rouge1_for_task500_scruples_title_generation": 17.4509, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 36.5774, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 48.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 36.5636, + "eval_rouge1_for_task602_wikitext_title_generation": 14.4808, + "eval_rouge1_for_task613_liar_keyword_tagging": 28.8333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 21.7578, + "eval_rouge1_for_task619_ohsumed_title_generation": 46.4705, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 39.3344, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 47.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 29.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.7571, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 16.7333, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 82.1797, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 67.5634, + "eval_rouge1_for_task677_ollie_data_to_text": 20.7042, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 61.0, + "eval_rouge1_for_task743_eurlex_title_generation": 35.192, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 4.1449, + "eval_rouge1_for_task769_qed_title_generation": 83.9672, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 52.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 37.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 37.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 56.119, + "eval_rouge1_for_task892_gap_coreference_resolution": 36.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 49.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rouge1_for_task957_e2e_data_to_text": 53.3609, + "eval_rouge1_for_task970_sherliic_textual_entailment": 55.0, + "eval_rouge1_for_textual_entailment": 42.1806, + "eval_rouge1_for_title_generation": 34.9511, + "eval_rouge1_for_word_analogy": 26.9167, + "eval_rougeL": 45.8486, + "eval_rougeL_for_answerability_classification": 52.1795, + "eval_rougeL_for_cause_effect_classification": 52.0329, + "eval_rougeL_for_coreference_resolution": 46.5044, + "eval_rougeL_for_data_to_text": 43.6701, + "eval_rougeL_for_dialogue_act_recognition": 52.5952, + "eval_rougeL_for_grammar_error_correction": 63.9544, + "eval_rougeL_for_keyword_tagging": 57.9273, + "eval_rougeL_for_overlap_extraction": 38.8667, + "eval_rougeL_for_question_rewriting": 67.8452, + "eval_rougeL_for_task020_mctaco_answerability_classification": 49.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 36.4, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.4522, + "eval_rougeL_for_task035_winogrande_question_rewriting": 85.9793, + "eval_rougeL_for_task036_qasc_keyword_tagging": 81.1654, + "eval_rougeL_for_task039_qasc_overlap_extraction": 37.25, + "eval_rougeL_for_task050_multirc_answerability_classification": 57.0, + "eval_rougeL_for_task102_commongen_data_to_text": 55.3327, + "eval_rougeL_for_task1152_bard_word_analogy": 19.0, + "eval_rougeL_for_task1153_bard_word_analogy": 21.3333, + "eval_rougeL_for_task1154_bard_word_analogy": 16.0, + "eval_rougeL_for_task1155_bard_word_analogy": 53.0, + "eval_rougeL_for_task1156_bard_word_analogy": 20.0, + "eval_rougeL_for_task1157_bard_word_analogy": 46.0, + "eval_rougeL_for_task1158_bard_word_analogy": 27.0, + "eval_rougeL_for_task1159_bard_word_analogy": 13.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 30.0965, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.998, + "eval_rougeL_for_task121_zest_question_rewriting": 45.4081, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 15.2781, + "eval_rougeL_for_task1344_rte_textual_entailment": 51.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 41.1193, + "eval_rougeL_for_task1356_xlsum_title_generation": 18.0958, + "eval_rougeL_for_task1358_xlsum_title_generation": 27.0761, + "eval_rougeL_for_task1385_anli_textual_entailment": 31.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 31.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 38.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 65.1667, + "eval_rougeL_for_task1407_dart_data_to_text": 22.14, + "eval_rougeL_for_task1409_dart_data_to_text": 39.3576, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 41.4413, + "eval_rougeL_for_task1439_doqa_answerability_classification": 47.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 51.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 29.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 30.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 55.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 34.9389, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.4676, + "eval_rougeL_for_task1562_zest_question_rewriting": 46.378, + "eval_rougeL_for_task1586_scifact_title_generation": 31.5756, + "eval_rougeL_for_task1598_nyc_data_to_text": 39.3554, + "eval_rougeL_for_task1612_sick_textual_entailment": 34.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.5072, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 90.5598, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 29.9395, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 69.9571, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 53.4281, + "eval_rougeL_for_task190_snli_textual_entailment": 13.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task219_rocstories_title_generation": 27.9587, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 43.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 51.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 58.4333, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 40.4833, + "eval_rougeL_for_task288_gigaword_title_generation": 24.9577, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 40.119, + "eval_rougeL_for_task329_gap_coreference_resolution": 34.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 60.1333, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 49.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 82.6667, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 82.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.1519, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 44.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 61.664, + "eval_rougeL_for_task418_persent_title_generation": 25.9706, + "eval_rougeL_for_task442_com_qa_question_rewriting": 69.6524, + "eval_rougeL_for_task500_scruples_title_generation": 16.1595, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 36.2282, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 48.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 35.3764, + "eval_rougeL_for_task602_wikitext_title_generation": 14.3836, + "eval_rougeL_for_task613_liar_keyword_tagging": 28.8333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 19.0781, + "eval_rougeL_for_task619_ohsumed_title_generation": 38.6967, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 36.8806, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 47.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 29.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.7571, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 16.7333, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 81.1105, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 66.0286, + "eval_rougeL_for_task677_ollie_data_to_text": 18.0775, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 61.0, + "eval_rougeL_for_task743_eurlex_title_generation": 30.4188, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.6652, + "eval_rougeL_for_task769_qed_title_generation": 83.9672, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 52.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 37.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 37.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 56.119, + "eval_rougeL_for_task892_gap_coreference_resolution": 36.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 49.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rougeL_for_task957_e2e_data_to_text": 41.5111, + "eval_rougeL_for_task970_sherliic_textual_entailment": 55.0, + "eval_rougeL_for_textual_entailment": 42.1806, + "eval_rougeL_for_title_generation": 31.8843, + "eval_rougeL_for_word_analogy": 26.9167, + "eval_runtime": 670.5679, + "eval_samples_per_second": 17.761, + "eval_steps_per_second": 0.556, + "step": 3000 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 1.2202, + "step": 3500 + }, + { + "epoch": 0.8, + "eval_exact_match": 27.9681, + "eval_exact_match_for_answerability_classification": 50.0, + "eval_exact_match_for_cause_effect_classification": 36.5714, + "eval_exact_match_for_coreference_resolution": 35.8571, + "eval_exact_match_for_data_to_text": 5.8111, + "eval_exact_match_for_dialogue_act_recognition": 46.5714, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 35.4, + "eval_exact_match_for_overlap_extraction": 7.5, + "eval_exact_match_for_question_rewriting": 2.4545, + "eval_exact_match_for_task020_mctaco_answerability_classification": 49.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 39.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 7.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 5.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 38.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 15.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 57.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 21.0, + "eval_exact_match_for_task1153_bard_word_analogy": 19.0, + "eval_exact_match_for_task1154_bard_word_analogy": 24.0, + "eval_exact_match_for_task1155_bard_word_analogy": 53.0, + "eval_exact_match_for_task1156_bard_word_analogy": 25.0, + "eval_exact_match_for_task1157_bard_word_analogy": 43.0, + "eval_exact_match_for_task1158_bard_word_analogy": 30.0, + "eval_exact_match_for_task1159_bard_word_analogy": 13.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 7.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 4.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 28.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 40.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 52.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 48.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 53.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 44.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 17.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 30.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 55.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 37.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 48.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 41.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 17.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task219_rocstories_title_generation": 2.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 48.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 48.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 55.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 44.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 11.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 54.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 47.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 58.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 25.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 4.0, + "eval_exact_match_for_task602_wikitext_title_generation": 5.9524, + "eval_exact_match_for_task613_liar_keyword_tagging": 13.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 7.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 32.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 44.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 87.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 11.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 6.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 71.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 51.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 61.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 27.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 39.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 36.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 45.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 54.0, + "eval_exact_match_for_textual_entailment": 39.2917, + "eval_exact_match_for_title_generation": 8.0717, + "eval_exact_match_for_word_analogy": 28.5, + "eval_f1": 45.124, + "eval_f1_for_answerability_classification": 52.5641, + "eval_f1_for_cause_effect_classification": 53.0451, + "eval_f1_for_coreference_resolution": 44.3794, + "eval_f1_for_data_to_text": 49.9049, + "eval_f1_for_dialogue_act_recognition": 50.1429, + "eval_f1_for_grammar_error_correction": 64.0227, + "eval_f1_for_keyword_tagging": 49.5209, + "eval_f1_for_overlap_extraction": 31.5128, + "eval_f1_for_question_rewriting": 67.6193, + "eval_f1_for_task020_mctaco_answerability_classification": 49.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 40.1667, + "eval_f1_for_task034_winogrande_question_rewriting": 92.4261, + "eval_f1_for_task035_winogrande_question_rewriting": 86.2447, + "eval_f1_for_task036_qasc_keyword_tagging": 73.9178, + "eval_f1_for_task039_qasc_overlap_extraction": 25.3079, + "eval_f1_for_task050_multirc_answerability_classification": 57.0, + "eval_f1_for_task102_commongen_data_to_text": 53.4108, + "eval_f1_for_task1152_bard_word_analogy": 21.0, + "eval_f1_for_task1153_bard_word_analogy": 25.0, + "eval_f1_for_task1154_bard_word_analogy": 24.0, + "eval_f1_for_task1155_bard_word_analogy": 53.0, + "eval_f1_for_task1156_bard_word_analogy": 25.0, + "eval_f1_for_task1157_bard_word_analogy": 43.0, + "eval_f1_for_task1158_bard_word_analogy": 30.0, + "eval_f1_for_task1159_bard_word_analogy": 13.0, + "eval_f1_for_task1161_coda_19_title_generation": 33.5817, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.3174, + "eval_f1_for_task121_zest_question_rewriting": 44.1773, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 15.6603, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.4508, + "eval_f1_for_task1356_xlsum_title_generation": 17.0217, + "eval_f1_for_task1358_xlsum_title_generation": 30.8775, + "eval_f1_for_task1385_anli_textual_entailment": 28.0, + "eval_f1_for_task1386_anli_textual_entailment": 40.0, + "eval_f1_for_task1387_anli_textual_entailment": 32.0, + "eval_f1_for_task1388_cb_textual_entailment": 52.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 48.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 53.0, + "eval_f1_for_task1407_dart_data_to_text": 32.1112, + "eval_f1_for_task1409_dart_data_to_text": 48.7884, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 44.3547, + "eval_f1_for_task1439_doqa_answerability_classification": 44.0, + "eval_f1_for_task1442_doqa_answerability_classification": 50.0, + "eval_f1_for_task1516_imppres_textual_entailment": 17.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 30.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 55.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 35.8716, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.6907, + "eval_f1_for_task1562_zest_question_rewriting": 44.9338, + "eval_f1_for_task1586_scifact_title_generation": 35.1587, + "eval_f1_for_task1598_nyc_data_to_text": 50.4958, + "eval_f1_for_task1612_sick_textual_entailment": 34.0, + "eval_f1_for_task1615_sick_textual_entailment": 37.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.2156, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 48.0, + "eval_f1_for_task1631_open_pi_data_to_text": 92.9004, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_f1_for_task1659_billsum_title_generation": 35.2652, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 66.2963, + "eval_f1_for_task1728_web_nlg_data_to_text": 58.6732, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 33.0, + "eval_f1_for_task201_multinli_textual_entailment": 33.0, + "eval_f1_for_task202_multinli_textual_entailment": 34.0, + "eval_f1_for_task219_rocstories_title_generation": 16.0112, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_f1_for_task232_iirc_answerability_classification": 48.0, + "eval_f1_for_task233_iirc_answerability_classification": 48.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 55.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 57.8667, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 37.7176, + "eval_f1_for_task288_gigaword_title_generation": 27.5807, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 45.1524, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 58.8444, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 82.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 86.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 33.2842, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 30.8333, + "eval_f1_for_task402_grailqa_question_rewriting": 73.1588, + "eval_f1_for_task418_persent_title_generation": 25.8762, + "eval_f1_for_task442_com_qa_question_rewriting": 68.4102, + "eval_f1_for_task500_scruples_title_generation": 13.0045, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 38.1703, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 32.7451, + "eval_f1_for_task602_wikitext_title_generation": 15.621, + "eval_f1_for_task613_liar_keyword_tagging": 16.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 18.6979, + "eval_f1_for_task619_ohsumed_title_generation": 40.9881, + "eval_f1_for_task620_ohsumed_keyword_tagging": 30.9413, + "eval_f1_for_task623_ohsumed_keyword_tagging": 32.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 44.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.4119, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 13.0, + "eval_f1_for_task670_ambigqa_question_rewriting": 75.6091, + "eval_f1_for_task671_ambigqa_question_rewriting": 59.8685, + "eval_f1_for_task677_ollie_data_to_text": 22.3244, + "eval_f1_for_task738_perspectrum_textual_entailment": 6.0, + "eval_f1_for_task743_eurlex_title_generation": 35.1687, + "eval_f1_for_task760_msr_sqa_data_to_text": 5.3605, + "eval_f1_for_task769_qed_title_generation": 79.5006, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 51.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 61.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 27.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 47.1524, + "eval_f1_for_task892_gap_coreference_resolution": 36.0, + "eval_f1_for_task893_gap_coreference_resolution": 45.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 52.1166, + "eval_f1_for_task970_sherliic_textual_entailment": 54.0, + "eval_f1_for_textual_entailment": 39.2917, + "eval_f1_for_title_generation": 32.2648, + "eval_f1_for_word_analogy": 29.25, + "eval_gen_len": 10.1134, + "eval_global_step": 3500, + "eval_loss": 1.2416279315948486, + "eval_rouge1": 47.1108, + "eval_rouge1_for_answerability_classification": 52.5641, + "eval_rouge1_for_cause_effect_classification": 53.2991, + "eval_rouge1_for_coreference_resolution": 44.8443, + "eval_rouge1_for_data_to_text": 52.6866, + "eval_rouge1_for_dialogue_act_recognition": 52.8952, + "eval_rouge1_for_grammar_error_correction": 66.7844, + "eval_rouge1_for_keyword_tagging": 52.7206, + "eval_rouge1_for_overlap_extraction": 33.045, + "eval_rouge1_for_question_rewriting": 69.2353, + "eval_rouge1_for_task020_mctaco_answerability_classification": 49.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 40.0667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.4623, + "eval_rouge1_for_task035_winogrande_question_rewriting": 87.0982, + "eval_rouge1_for_task036_qasc_keyword_tagging": 75.9628, + "eval_rouge1_for_task039_qasc_overlap_extraction": 27.3079, + "eval_rouge1_for_task050_multirc_answerability_classification": 57.0, + "eval_rouge1_for_task102_commongen_data_to_text": 65.7895, + "eval_rouge1_for_task1152_bard_word_analogy": 21.0, + "eval_rouge1_for_task1153_bard_word_analogy": 25.0, + "eval_rouge1_for_task1154_bard_word_analogy": 24.0, + "eval_rouge1_for_task1155_bard_word_analogy": 53.0, + "eval_rouge1_for_task1156_bard_word_analogy": 25.0, + "eval_rouge1_for_task1157_bard_word_analogy": 43.0, + "eval_rouge1_for_task1158_bard_word_analogy": 30.0, + "eval_rouge1_for_task1159_bard_word_analogy": 13.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 37.3157, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.4934, + "eval_rouge1_for_task121_zest_question_rewriting": 46.8151, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.6642, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.8395, + "eval_rouge1_for_task1356_xlsum_title_generation": 20.0216, + "eval_rouge1_for_task1358_xlsum_title_generation": 35.8797, + "eval_rouge1_for_task1385_anli_textual_entailment": 31.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 40.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 41.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 52.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 48.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 60.9333, + "eval_rouge1_for_task1407_dart_data_to_text": 32.601, + "eval_rouge1_for_task1409_dart_data_to_text": 49.5161, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 46.6754, + "eval_rouge1_for_task1439_doqa_answerability_classification": 44.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 17.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 30.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 55.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 37.4025, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.8933, + "eval_rouge1_for_task1562_zest_question_rewriting": 48.5446, + "eval_rouge1_for_task1586_scifact_title_generation": 39.2787, + "eval_rouge1_for_task1598_nyc_data_to_text": 53.7431, + "eval_rouge1_for_task1612_sick_textual_entailment": 34.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 79.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.4871, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 48.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 93.0091, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1659_billsum_title_generation": 37.2291, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 66.2963, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 60.5254, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task219_rocstories_title_generation": 21.9643, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 48.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 48.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 55.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 58.5333, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 38.782, + "eval_rouge1_for_task288_gigaword_title_generation": 30.2138, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 46.0524, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 59.1333, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 82.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 86.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.4484, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 35.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 74.7422, + "eval_rouge1_for_task418_persent_title_generation": 29.9919, + "eval_rouge1_for_task442_com_qa_question_rewriting": 72.0381, + "eval_rouge1_for_task500_scruples_title_generation": 14.4083, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 38.814, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 34.599, + "eval_rouge1_for_task602_wikitext_title_generation": 16.4322, + "eval_rouge1_for_task613_liar_keyword_tagging": 24.0, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 20.3121, + "eval_rouge1_for_task619_ohsumed_title_generation": 44.669, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 36.7281, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 32.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 44.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.9119, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 13.5, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 76.3981, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 60.6692, + "eval_rouge1_for_task677_ollie_data_to_text": 24.5523, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 53.0, + "eval_rouge1_for_task743_eurlex_title_generation": 37.3164, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 5.7619, + "eval_rouge1_for_task769_qed_title_generation": 79.7672, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 51.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 61.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 38.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 46.9048, + "eval_rouge1_for_task892_gap_coreference_resolution": 36.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 45.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 53.9566, + "eval_rouge1_for_task970_sherliic_textual_entailment": 54.0, + "eval_rouge1_for_textual_entailment": 43.5, + "eval_rouge1_for_title_generation": 34.6602, + "eval_rouge1_for_word_analogy": 29.25, + "eval_rougeL": 45.7066, + "eval_rougeL_for_answerability_classification": 52.5641, + "eval_rougeL_for_cause_effect_classification": 52.8983, + "eval_rougeL_for_coreference_resolution": 44.8443, + "eval_rougeL_for_data_to_text": 45.6353, + "eval_rougeL_for_dialogue_act_recognition": 52.8952, + "eval_rougeL_for_grammar_error_correction": 65.8565, + "eval_rougeL_for_keyword_tagging": 52.0765, + "eval_rougeL_for_overlap_extraction": 32.2856, + "eval_rougeL_for_question_rewriting": 65.4678, + "eval_rougeL_for_task020_mctaco_answerability_classification": 49.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 40.0667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.3542, + "eval_rougeL_for_task035_winogrande_question_rewriting": 85.5621, + "eval_rougeL_for_task036_qasc_keyword_tagging": 74.4628, + "eval_rougeL_for_task039_qasc_overlap_extraction": 27.3079, + "eval_rougeL_for_task050_multirc_answerability_classification": 57.0, + "eval_rougeL_for_task102_commongen_data_to_text": 56.2294, + "eval_rougeL_for_task1152_bard_word_analogy": 21.0, + "eval_rougeL_for_task1153_bard_word_analogy": 25.0, + "eval_rougeL_for_task1154_bard_word_analogy": 24.0, + "eval_rougeL_for_task1155_bard_word_analogy": 53.0, + "eval_rougeL_for_task1156_bard_word_analogy": 25.0, + "eval_rougeL_for_task1157_bard_word_analogy": 43.0, + "eval_rougeL_for_task1158_bard_word_analogy": 30.0, + "eval_rougeL_for_task1159_bard_word_analogy": 13.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 30.3275, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.3361, + "eval_rougeL_for_task121_zest_question_rewriting": 39.5174, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 15.1112, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.6408, + "eval_rougeL_for_task1356_xlsum_title_generation": 16.8797, + "eval_rougeL_for_task1358_xlsum_title_generation": 29.5001, + "eval_rougeL_for_task1385_anli_textual_entailment": 31.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 40.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 41.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 52.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 48.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 60.9333, + "eval_rougeL_for_task1407_dart_data_to_text": 28.3924, + "eval_rougeL_for_task1409_dart_data_to_text": 40.8992, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 45.6846, + "eval_rougeL_for_task1439_doqa_answerability_classification": 44.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 17.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 30.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 55.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 34.3114, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.0284, + "eval_rougeL_for_task1562_zest_question_rewriting": 42.403, + "eval_rougeL_for_task1586_scifact_title_generation": 31.4739, + "eval_rougeL_for_task1598_nyc_data_to_text": 42.3625, + "eval_rougeL_for_task1612_sick_textual_entailment": 34.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 79.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.247, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 48.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 91.32, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1659_billsum_title_generation": 31.5399, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 66.2963, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 53.9536, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task219_rocstories_title_generation": 21.5476, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 48.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 48.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 55.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 58.5333, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 37.2633, + "eval_rougeL_for_task288_gigaword_title_generation": 26.0321, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 46.0524, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 59.1333, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 82.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 86.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 32.3498, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 35.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 63.179, + "eval_rougeL_for_task418_persent_title_generation": 26.4218, + "eval_rougeL_for_task442_com_qa_question_rewriting": 66.6154, + "eval_rougeL_for_task500_scruples_title_generation": 13.1891, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 38.1636, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 32.0182, + "eval_rougeL_for_task602_wikitext_title_generation": 16.241, + "eval_rougeL_for_task613_liar_keyword_tagging": 24.0, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 18.6051, + "eval_rougeL_for_task619_ohsumed_title_generation": 37.8259, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 35.0076, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 32.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 44.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.9119, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 13.5, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 75.1587, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 58.1316, + "eval_rougeL_for_task677_ollie_data_to_text": 21.1344, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 53.0, + "eval_rougeL_for_task743_eurlex_title_generation": 32.4317, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.1105, + "eval_rougeL_for_task769_qed_title_generation": 79.7672, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 51.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 61.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 38.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 46.9048, + "eval_rougeL_for_task892_gap_coreference_resolution": 36.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 45.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 41.3275, + "eval_rougeL_for_task970_sherliic_textual_entailment": 54.0, + "eval_rougeL_for_textual_entailment": 43.5, + "eval_rougeL_for_title_generation": 31.4004, + "eval_rougeL_for_word_analogy": 29.25, + "eval_runtime": 659.4484, + "eval_samples_per_second": 18.061, + "eval_steps_per_second": 0.566, + "step": 3500 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 1.1921, + "step": 4000 + }, + { + "epoch": 0.91, + "eval_exact_match": 28.3963, + "eval_exact_match_for_answerability_classification": 49.2308, + "eval_exact_match_for_cause_effect_classification": 37.1429, + "eval_exact_match_for_coreference_resolution": 35.0714, + "eval_exact_match_for_data_to_text": 7.385, + "eval_exact_match_for_dialogue_act_recognition": 47.4286, + "eval_exact_match_for_grammar_error_correction": 8.0, + "eval_exact_match_for_keyword_tagging": 34.8, + "eval_exact_match_for_overlap_extraction": 11.0, + "eval_exact_match_for_question_rewriting": 2.6364, + "eval_exact_match_for_task020_mctaco_answerability_classification": 43.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 37.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 6.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 5.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 24.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 22.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 52.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 21.0, + "eval_exact_match_for_task1153_bard_word_analogy": 13.0, + "eval_exact_match_for_task1154_bard_word_analogy": 22.0, + "eval_exact_match_for_task1155_bard_word_analogy": 58.0, + "eval_exact_match_for_task1156_bard_word_analogy": 25.0, + "eval_exact_match_for_task1157_bard_word_analogy": 44.0, + "eval_exact_match_for_task1158_bard_word_analogy": 26.0, + "eval_exact_match_for_task1159_bard_word_analogy": 17.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 9.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 4.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 7.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 26.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 14.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 43.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 45.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 57.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 55.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 46.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 51.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 4.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 41.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 44.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 16.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 46.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 64.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 53.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 3.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 18.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 32.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task219_rocstories_title_generation": 4.0, + "eval_exact_match_for_task220_rocstories_title_generation": 68.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 36.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 7.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 52.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 44.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 49.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 21.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 6.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 4.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 18.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 8.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 35.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 89.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 5.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 36.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 70.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 53.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 57.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 69.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 36.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 44.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 46.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 47.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 62.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 59.0, + "eval_exact_match_for_textual_entailment": 40.4167, + "eval_exact_match_for_title_generation": 9.0247, + "eval_exact_match_for_word_analogy": 28.25, + "eval_f1": 45.6647, + "eval_f1_for_answerability_classification": 51.7949, + "eval_f1_for_cause_effect_classification": 54.8107, + "eval_f1_for_coreference_resolution": 42.954, + "eval_f1_for_data_to_text": 49.9712, + "eval_f1_for_dialogue_act_recognition": 51.0, + "eval_f1_for_grammar_error_correction": 66.5312, + "eval_f1_for_keyword_tagging": 48.766, + "eval_f1_for_overlap_extraction": 42.4079, + "eval_f1_for_question_rewriting": 67.3159, + "eval_f1_for_task020_mctaco_answerability_classification": 43.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 38.1667, + "eval_f1_for_task034_winogrande_question_rewriting": 91.8785, + "eval_f1_for_task035_winogrande_question_rewriting": 85.8446, + "eval_f1_for_task036_qasc_keyword_tagging": 64.8062, + "eval_f1_for_task039_qasc_overlap_extraction": 28.9667, + "eval_f1_for_task050_multirc_answerability_classification": 52.0, + "eval_f1_for_task102_commongen_data_to_text": 52.4743, + "eval_f1_for_task1152_bard_word_analogy": 21.0, + "eval_f1_for_task1153_bard_word_analogy": 23.6667, + "eval_f1_for_task1154_bard_word_analogy": 22.0, + "eval_f1_for_task1155_bard_word_analogy": 58.0, + "eval_f1_for_task1156_bard_word_analogy": 25.0, + "eval_f1_for_task1157_bard_word_analogy": 44.0, + "eval_f1_for_task1158_bard_word_analogy": 26.0, + "eval_f1_for_task1159_bard_word_analogy": 18.3333, + "eval_f1_for_task1161_coda_19_title_generation": 33.7985, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.1992, + "eval_f1_for_task121_zest_question_rewriting": 45.4457, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 15.2981, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.0973, + "eval_f1_for_task1356_xlsum_title_generation": 19.0209, + "eval_f1_for_task1358_xlsum_title_generation": 31.1958, + "eval_f1_for_task1385_anli_textual_entailment": 7.0, + "eval_f1_for_task1386_anli_textual_entailment": 26.0, + "eval_f1_for_task1387_anli_textual_entailment": 14.0, + "eval_f1_for_task1388_cb_textual_entailment": 43.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 45.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 57.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 55.0, + "eval_f1_for_task1407_dart_data_to_text": 32.2323, + "eval_f1_for_task1409_dart_data_to_text": 48.1065, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 49.2602, + "eval_f1_for_task1439_doqa_answerability_classification": 46.0, + "eval_f1_for_task1442_doqa_answerability_classification": 51.0, + "eval_f1_for_task1516_imppres_textual_entailment": 4.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 41.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 44.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 35.1533, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.8022, + "eval_f1_for_task1562_zest_question_rewriting": 45.7556, + "eval_f1_for_task1586_scifact_title_generation": 36.5442, + "eval_f1_for_task1598_nyc_data_to_text": 50.4219, + "eval_f1_for_task1612_sick_textual_entailment": 46.0, + "eval_f1_for_task1615_sick_textual_entailment": 64.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.1223, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 93.5877, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 33.4915, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 66.6455, + "eval_f1_for_task1728_web_nlg_data_to_text": 58.0372, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 32.0, + "eval_f1_for_task202_multinli_textual_entailment": 34.0, + "eval_f1_for_task219_rocstories_title_generation": 19.4746, + "eval_f1_for_task220_rocstories_title_generation": 68.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 49.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 51.8, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 55.8492, + "eval_f1_for_task288_gigaword_title_generation": 29.8013, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 33.7, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 56.8444, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 81.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.7758, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 24.3333, + "eval_f1_for_task402_grailqa_question_rewriting": 74.3505, + "eval_f1_for_task418_persent_title_generation": 25.9523, + "eval_f1_for_task442_com_qa_question_rewriting": 68.5163, + "eval_f1_for_task500_scruples_title_generation": 12.3554, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 37.3568, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 33.4187, + "eval_f1_for_task602_wikitext_title_generation": 14.6363, + "eval_f1_for_task613_liar_keyword_tagging": 18.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 20.5656, + "eval_f1_for_task619_ohsumed_title_generation": 42.0405, + "eval_f1_for_task620_ohsumed_keyword_tagging": 30.1, + "eval_f1_for_task623_ohsumed_keyword_tagging": 35.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 95.2571, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 7.0, + "eval_f1_for_task670_ambigqa_question_rewriting": 71.2061, + "eval_f1_for_task671_ambigqa_question_rewriting": 58.0587, + "eval_f1_for_task677_ollie_data_to_text": 23.6294, + "eval_f1_for_task738_perspectrum_textual_entailment": 36.0, + "eval_f1_for_task743_eurlex_title_generation": 34.5745, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.2176, + "eval_f1_for_task769_qed_title_generation": 80.5857, + "eval_f1_for_task827_copa_cause_effect_classification": 53.0, + "eval_f1_for_task828_copa_cause_effect_classification": 57.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 69.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_f1_for_task890_gwsd_textual_entailment": 36.0, + "eval_f1_for_task891_gap_coreference_resolution": 51.8667, + "eval_f1_for_task892_gap_coreference_resolution": 46.0, + "eval_f1_for_task893_gap_coreference_resolution": 47.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 62.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_f1_for_task957_e2e_data_to_text": 53.4358, + "eval_f1_for_task970_sherliic_textual_entailment": 59.0, + "eval_f1_for_textual_entailment": 40.4167, + "eval_f1_for_title_generation": 33.6523, + "eval_f1_for_word_analogy": 29.75, + "eval_gen_len": 9.9533, + "eval_global_step": 4000, + "eval_loss": 1.2227290868759155, + "eval_rouge1": 47.7766, + "eval_rouge1_for_answerability_classification": 51.7949, + "eval_rouge1_for_cause_effect_classification": 55.1817, + "eval_rouge1_for_coreference_resolution": 43.5128, + "eval_rouge1_for_data_to_text": 52.5195, + "eval_rouge1_for_dialogue_act_recognition": 54.0476, + "eval_rouge1_for_grammar_error_correction": 69.2339, + "eval_rouge1_for_keyword_tagging": 53.0522, + "eval_rouge1_for_overlap_extraction": 45.292, + "eval_rouge1_for_question_rewriting": 68.9642, + "eval_rouge1_for_task020_mctaco_answerability_classification": 43.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 38.0667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 91.9167, + "eval_rouge1_for_task035_winogrande_question_rewriting": 86.7763, + "eval_rouge1_for_task036_qasc_keyword_tagging": 67.8028, + "eval_rouge1_for_task039_qasc_overlap_extraction": 33.5333, + "eval_rouge1_for_task050_multirc_answerability_classification": 52.0, + "eval_rouge1_for_task102_commongen_data_to_text": 65.0964, + "eval_rouge1_for_task1152_bard_word_analogy": 21.0, + "eval_rouge1_for_task1153_bard_word_analogy": 23.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 22.0, + "eval_rouge1_for_task1155_bard_word_analogy": 58.0, + "eval_rouge1_for_task1156_bard_word_analogy": 25.0, + "eval_rouge1_for_task1157_bard_word_analogy": 44.0, + "eval_rouge1_for_task1158_bard_word_analogy": 26.0, + "eval_rouge1_for_task1159_bard_word_analogy": 18.3333, + "eval_rouge1_for_task1161_coda_19_title_generation": 37.2333, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.3965, + "eval_rouge1_for_task121_zest_question_rewriting": 47.9006, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.6917, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.3758, + "eval_rouge1_for_task1356_xlsum_title_generation": 21.8559, + "eval_rouge1_for_task1358_xlsum_title_generation": 36.0411, + "eval_rouge1_for_task1385_anli_textual_entailment": 29.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 40.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 43.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 45.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 57.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 62.3333, + "eval_rouge1_for_task1407_dart_data_to_text": 32.4656, + "eval_rouge1_for_task1409_dart_data_to_text": 48.7489, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 51.3969, + "eval_rouge1_for_task1439_doqa_answerability_classification": 46.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 51.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 4.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 41.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 44.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 37.5249, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.0709, + "eval_rouge1_for_task1562_zest_question_rewriting": 48.9904, + "eval_rouge1_for_task1586_scifact_title_generation": 40.6465, + "eval_rouge1_for_task1598_nyc_data_to_text": 52.0593, + "eval_rouge1_for_task1612_sick_textual_entailment": 46.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 88.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.3915, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 93.7954, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 35.1562, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 66.6455, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 60.0779, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 32.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task219_rocstories_title_generation": 24.381, + "eval_rouge1_for_task220_rocstories_title_generation": 68.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 52.4667, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 57.0507, + "eval_rouge1_for_task288_gigaword_title_generation": 32.0752, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 33.9333, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 57.1333, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 81.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 32.2419, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 30.6667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 76.6054, + "eval_rouge1_for_task418_persent_title_generation": 30.2203, + "eval_rouge1_for_task442_com_qa_question_rewriting": 71.9233, + "eval_rouge1_for_task500_scruples_title_generation": 13.6533, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 37.7, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 35.0198, + "eval_rouge1_for_task602_wikitext_title_generation": 15.3945, + "eval_rouge1_for_task613_liar_keyword_tagging": 28.8333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 22.697, + "eval_rouge1_for_task619_ohsumed_title_generation": 45.7735, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 37.8678, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 35.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.7571, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 7.5, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 72.1427, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 59.1873, + "eval_rouge1_for_task677_ollie_data_to_text": 25.6251, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 63.0, + "eval_rouge1_for_task743_eurlex_title_generation": 36.5898, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.6456, + "eval_rouge1_for_task769_qed_title_generation": 80.8524, + "eval_rouge1_for_task827_copa_cause_effect_classification": 53.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 57.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 69.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 37.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 36.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 51.7667, + "eval_rouge1_for_task892_gap_coreference_resolution": 46.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 47.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 62.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_rouge1_for_task957_e2e_data_to_text": 54.9947, + "eval_rouge1_for_task970_sherliic_textual_entailment": 59.0, + "eval_rouge1_for_textual_entailment": 44.875, + "eval_rouge1_for_title_generation": 35.9499, + "eval_rouge1_for_word_analogy": 29.75, + "eval_rougeL": 46.3236, + "eval_rougeL_for_answerability_classification": 51.7949, + "eval_rougeL_for_cause_effect_classification": 54.669, + "eval_rougeL_for_coreference_resolution": 43.5128, + "eval_rougeL_for_data_to_text": 45.7853, + "eval_rougeL_for_dialogue_act_recognition": 54.0476, + "eval_rougeL_for_grammar_error_correction": 68.3024, + "eval_rougeL_for_keyword_tagging": 52.2434, + "eval_rougeL_for_overlap_extraction": 43.7028, + "eval_rougeL_for_question_rewriting": 64.8644, + "eval_rougeL_for_task020_mctaco_answerability_classification": 43.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 38.0667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 91.2179, + "eval_rougeL_for_task035_winogrande_question_rewriting": 85.4734, + "eval_rougeL_for_task036_qasc_keyword_tagging": 65.1457, + "eval_rougeL_for_task039_qasc_overlap_extraction": 33.5333, + "eval_rougeL_for_task050_multirc_answerability_classification": 52.0, + "eval_rougeL_for_task102_commongen_data_to_text": 55.6655, + "eval_rougeL_for_task1152_bard_word_analogy": 21.0, + "eval_rougeL_for_task1153_bard_word_analogy": 23.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 22.0, + "eval_rougeL_for_task1155_bard_word_analogy": 58.0, + "eval_rougeL_for_task1156_bard_word_analogy": 25.0, + "eval_rougeL_for_task1157_bard_word_analogy": 44.0, + "eval_rougeL_for_task1158_bard_word_analogy": 26.0, + "eval_rougeL_for_task1159_bard_word_analogy": 18.3333, + "eval_rougeL_for_task1161_coda_19_title_generation": 30.9674, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.4496, + "eval_rougeL_for_task121_zest_question_rewriting": 39.6999, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 15.0516, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.0492, + "eval_rougeL_for_task1356_xlsum_title_generation": 18.8658, + "eval_rougeL_for_task1358_xlsum_title_generation": 29.4281, + "eval_rougeL_for_task1385_anli_textual_entailment": 29.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 40.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 43.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 45.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 57.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 62.3333, + "eval_rougeL_for_task1407_dart_data_to_text": 29.1004, + "eval_rougeL_for_task1409_dart_data_to_text": 40.8683, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 50.3989, + "eval_rougeL_for_task1439_doqa_answerability_classification": 46.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 51.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 4.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 41.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 44.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 33.5179, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.206, + "eval_rougeL_for_task1562_zest_question_rewriting": 43.4332, + "eval_rougeL_for_task1586_scifact_title_generation": 31.9071, + "eval_rougeL_for_task1598_nyc_data_to_text": 41.4696, + "eval_rougeL_for_task1612_sick_textual_entailment": 46.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 88.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.1359, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 93.2963, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 29.9679, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 66.6455, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 52.5304, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 32.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task219_rocstories_title_generation": 24.381, + "eval_rougeL_for_task220_rocstories_title_generation": 68.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 52.4667, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 53.8723, + "eval_rougeL_for_task288_gigaword_title_generation": 27.3454, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 33.9333, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 57.1333, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 81.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.8139, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 30.6667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 65.4694, + "eval_rougeL_for_task418_persent_title_generation": 26.2932, + "eval_rougeL_for_task442_com_qa_question_rewriting": 64.6093, + "eval_rougeL_for_task500_scruples_title_generation": 12.7532, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 37.3316, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 32.4457, + "eval_rougeL_for_task602_wikitext_title_generation": 15.3063, + "eval_rougeL_for_task613_liar_keyword_tagging": 28.8333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 20.5358, + "eval_rougeL_for_task619_ohsumed_title_generation": 38.3684, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 36.4806, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 35.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.7571, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 7.5, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 70.5554, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 55.4151, + "eval_rougeL_for_task677_ollie_data_to_text": 22.2213, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 63.0, + "eval_rougeL_for_task743_eurlex_title_generation": 31.3588, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.3243, + "eval_rougeL_for_task769_qed_title_generation": 80.8524, + "eval_rougeL_for_task827_copa_cause_effect_classification": 53.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 57.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 69.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 37.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 36.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 51.7667, + "eval_rougeL_for_task892_gap_coreference_resolution": 46.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 47.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 62.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_rougeL_for_task957_e2e_data_to_text": 42.1709, + "eval_rougeL_for_task970_sherliic_textual_entailment": 59.0, + "eval_rougeL_for_textual_entailment": 44.875, + "eval_rougeL_for_title_generation": 32.6061, + "eval_rougeL_for_word_analogy": 29.75, + "eval_runtime": 626.3281, + "eval_samples_per_second": 19.016, + "eval_steps_per_second": 0.596, + "step": 4000 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 1.1497, + "step": 4500 + }, + { + "epoch": 1.03, + "eval_exact_match": 27.9849, + "eval_exact_match_for_answerability_classification": 51.0, + "eval_exact_match_for_cause_effect_classification": 35.8571, + "eval_exact_match_for_coreference_resolution": 36.1429, + "eval_exact_match_for_data_to_text": 6.5375, + "eval_exact_match_for_dialogue_act_recognition": 46.2857, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 39.8, + "eval_exact_match_for_overlap_extraction": 11.0, + "eval_exact_match_for_question_rewriting": 2.1818, + "eval_exact_match_for_task020_mctaco_answerability_classification": 52.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 38.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 6.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 3.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 33.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 21.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 57.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 17.0, + "eval_exact_match_for_task1153_bard_word_analogy": 18.0, + "eval_exact_match_for_task1154_bard_word_analogy": 15.0, + "eval_exact_match_for_task1155_bard_word_analogy": 53.0, + "eval_exact_match_for_task1156_bard_word_analogy": 30.0, + "eval_exact_match_for_task1157_bard_word_analogy": 47.0, + "eval_exact_match_for_task1158_bard_word_analogy": 19.0, + "eval_exact_match_for_task1159_bard_word_analogy": 12.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 10.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 5.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 48.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 13.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 6.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 17.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 15.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 44.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 47.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 54.0, + "eval_exact_match_for_task1407_dart_data_to_text": 1.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 52.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 51.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 1.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 52.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 59.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 47.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 46.0, + "eval_exact_match_for_task1659_billsum_title_generation": 3.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 25.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, + "eval_exact_match_for_task190_snli_textual_entailment": 49.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 30.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task219_rocstories_title_generation": 7.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 45.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 47.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 56.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 45.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 1.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 6.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 36.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 47.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 45.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 54.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 25.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 1.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 1.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 60.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 4.0, + "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, + "eval_exact_match_for_task613_liar_keyword_tagging": 17.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 14.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 27.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 28.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 49.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 86.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 5.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 58.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 66.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 55.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 26.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 38.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 47.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 44.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 42.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 54.0, + "eval_exact_match_for_textual_entailment": 38.25, + "eval_exact_match_for_title_generation": 8.1839, + "eval_exact_match_for_word_analogy": 26.375, + "eval_f1": 44.7608, + "eval_f1_for_answerability_classification": 53.5641, + "eval_f1_for_cause_effect_classification": 52.5397, + "eval_f1_for_coreference_resolution": 42.3449, + "eval_f1_for_data_to_text": 49.2906, + "eval_f1_for_dialogue_act_recognition": 49.8571, + "eval_f1_for_grammar_error_correction": 68.1564, + "eval_f1_for_keyword_tagging": 53.9453, + "eval_f1_for_overlap_extraction": 34.1167, + "eval_f1_for_question_rewriting": 65.7342, + "eval_f1_for_task020_mctaco_answerability_classification": 52.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 39.8333, + "eval_f1_for_task034_winogrande_question_rewriting": 91.885, + "eval_f1_for_task035_winogrande_question_rewriting": 84.5858, + "eval_f1_for_task036_qasc_keyword_tagging": 69.112, + "eval_f1_for_task039_qasc_overlap_extraction": 26.0667, + "eval_f1_for_task050_multirc_answerability_classification": 57.0, + "eval_f1_for_task102_commongen_data_to_text": 52.8662, + "eval_f1_for_task1152_bard_word_analogy": 17.0, + "eval_f1_for_task1153_bard_word_analogy": 20.6667, + "eval_f1_for_task1154_bard_word_analogy": 15.0, + "eval_f1_for_task1155_bard_word_analogy": 53.0, + "eval_f1_for_task1156_bard_word_analogy": 30.0, + "eval_f1_for_task1157_bard_word_analogy": 47.0, + "eval_f1_for_task1158_bard_word_analogy": 19.0, + "eval_f1_for_task1159_bard_word_analogy": 12.6667, + "eval_f1_for_task1161_coda_19_title_generation": 32.3893, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.5114, + "eval_f1_for_task121_zest_question_rewriting": 44.5345, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 15.9501, + "eval_f1_for_task1344_rte_textual_entailment": 48.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.6013, + "eval_f1_for_task1356_xlsum_title_generation": 21.1489, + "eval_f1_for_task1358_xlsum_title_generation": 32.3925, + "eval_f1_for_task1385_anli_textual_entailment": 13.0, + "eval_f1_for_task1386_anli_textual_entailment": 6.0, + "eval_f1_for_task1387_anli_textual_entailment": 17.0, + "eval_f1_for_task1388_cb_textual_entailment": 15.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 44.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 47.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 54.0, + "eval_f1_for_task1407_dart_data_to_text": 32.3823, + "eval_f1_for_task1409_dart_data_to_text": 48.4769, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 52.2928, + "eval_f1_for_task1439_doqa_answerability_classification": 52.0, + "eval_f1_for_task1442_doqa_answerability_classification": 51.0, + "eval_f1_for_task1516_imppres_textual_entailment": 1.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 52.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 36.7813, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.02, + "eval_f1_for_task1562_zest_question_rewriting": 45.5183, + "eval_f1_for_task1586_scifact_title_generation": 35.5538, + "eval_f1_for_task1598_nyc_data_to_text": 52.0044, + "eval_f1_for_task1612_sick_textual_entailment": 34.0, + "eval_f1_for_task1615_sick_textual_entailment": 59.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.148, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 92.1416, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 46.0, + "eval_f1_for_task1659_billsum_title_generation": 32.833, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 68.7768, + "eval_f1_for_task1728_web_nlg_data_to_text": 56.0189, + "eval_f1_for_task190_snli_textual_entailment": 49.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 33.0, + "eval_f1_for_task201_multinli_textual_entailment": 30.0, + "eval_f1_for_task202_multinli_textual_entailment": 34.0, + "eval_f1_for_task219_rocstories_title_generation": 21.327, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 45.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 47.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 56.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 57.3667, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 42.1667, + "eval_f1_for_task288_gigaword_title_generation": 29.8741, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 16.5857, + "eval_f1_for_task329_gap_coreference_resolution": 36.0, + "eval_f1_for_task330_gap_coreference_resolution": 54.0667, + "eval_f1_for_task349_squad2.0_answerability_classification": 47.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 81.6667, + "eval_f1_for_task392_cod3s_cause_effect_classification": 84.6667, + "eval_f1_for_task393_cod3s_cause_effect_classification": 29.9829, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 29.0, + "eval_f1_for_task402_grailqa_question_rewriting": 67.4962, + "eval_f1_for_task418_persent_title_generation": 24.8621, + "eval_f1_for_task442_com_qa_question_rewriting": 67.6182, + "eval_f1_for_task500_scruples_title_generation": 14.3426, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 39.7709, + "eval_f1_for_task520_aquamuse_answerability_classification": 60.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 36.4336, + "eval_f1_for_task602_wikitext_title_generation": 14.7476, + "eval_f1_for_task613_liar_keyword_tagging": 19.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 19.4615, + "eval_f1_for_task619_ohsumed_title_generation": 41.3223, + "eval_f1_for_task620_ohsumed_keyword_tagging": 39.6667, + "eval_f1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_f1_for_task640_e_snli_textual_entailment": 27.0, + "eval_f1_for_task641_e_snli_textual_entailment": 28.0, + "eval_f1_for_task642_e_snli_textual_entailment": 49.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 92.9476, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 6.3333, + "eval_f1_for_task670_ambigqa_question_rewriting": 71.0042, + "eval_f1_for_task671_ambigqa_question_rewriting": 51.1735, + "eval_f1_for_task677_ollie_data_to_text": 21.9593, + "eval_f1_for_task738_perspectrum_textual_entailment": 58.0, + "eval_f1_for_task743_eurlex_title_generation": 34.3618, + "eval_f1_for_task760_msr_sqa_data_to_text": 5.6598, + "eval_f1_for_task769_qed_title_generation": 76.5143, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 55.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 26.0, + "eval_f1_for_task890_gwsd_textual_entailment": 38.0, + "eval_f1_for_task891_gap_coreference_resolution": 54.8667, + "eval_f1_for_task892_gap_coreference_resolution": 44.0, + "eval_f1_for_task893_gap_coreference_resolution": 42.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_f1_for_task957_e2e_data_to_text": 49.8194, + "eval_f1_for_task970_sherliic_textual_entailment": 54.0, + "eval_f1_for_textual_entailment": 38.25, + "eval_f1_for_title_generation": 32.9734, + "eval_f1_for_word_analogy": 26.7917, + "eval_gen_len": 9.1274, + "eval_global_step": 4500, + "eval_loss": 1.271920919418335, + "eval_rouge1": 47.1082, + "eval_rouge1_for_answerability_classification": 53.5641, + "eval_rouge1_for_cause_effect_classification": 52.8157, + "eval_rouge1_for_coreference_resolution": 42.8405, + "eval_rouge1_for_data_to_text": 51.4807, + "eval_rouge1_for_dialogue_act_recognition": 53.4143, + "eval_rouge1_for_grammar_error_correction": 70.5611, + "eval_rouge1_for_keyword_tagging": 58.0426, + "eval_rouge1_for_overlap_extraction": 38.1246, + "eval_rouge1_for_question_rewriting": 67.4527, + "eval_rouge1_for_task020_mctaco_answerability_classification": 52.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 39.7333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 91.9651, + "eval_rouge1_for_task035_winogrande_question_rewriting": 85.6031, + "eval_rouge1_for_task036_qasc_keyword_tagging": 74.5666, + "eval_rouge1_for_task039_qasc_overlap_extraction": 32.3, + "eval_rouge1_for_task050_multirc_answerability_classification": 57.0, + "eval_rouge1_for_task102_commongen_data_to_text": 63.9389, + "eval_rouge1_for_task1152_bard_word_analogy": 17.0, + "eval_rouge1_for_task1153_bard_word_analogy": 20.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 15.0, + "eval_rouge1_for_task1155_bard_word_analogy": 53.0, + "eval_rouge1_for_task1156_bard_word_analogy": 30.0, + "eval_rouge1_for_task1157_bard_word_analogy": 47.0, + "eval_rouge1_for_task1158_bard_word_analogy": 19.0, + "eval_rouge1_for_task1159_bard_word_analogy": 12.6667, + "eval_rouge1_for_task1161_coda_19_title_generation": 35.6798, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.6921, + "eval_rouge1_for_task121_zest_question_rewriting": 47.2474, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 16.4664, + "eval_rouge1_for_task1344_rte_textual_entailment": 48.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.6889, + "eval_rouge1_for_task1356_xlsum_title_generation": 24.3433, + "eval_rouge1_for_task1358_xlsum_title_generation": 36.869, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 41.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 43.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 44.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 47.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 64.2333, + "eval_rouge1_for_task1407_dart_data_to_text": 32.4626, + "eval_rouge1_for_task1409_dart_data_to_text": 49.1363, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 54.1435, + "eval_rouge1_for_task1439_doqa_answerability_classification": 52.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 51.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 1.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 52.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 39.0662, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.9787, + "eval_rouge1_for_task1562_zest_question_rewriting": 48.4355, + "eval_rouge1_for_task1586_scifact_title_generation": 39.9411, + "eval_rouge1_for_task1598_nyc_data_to_text": 53.6708, + "eval_rouge1_for_task1612_sick_textual_entailment": 34.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 86.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.6224, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 92.2579, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 46.0, + "eval_rouge1_for_task1659_billsum_title_generation": 34.8374, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 68.7768, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 57.8493, + "eval_rouge1_for_task190_snli_textual_entailment": 49.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 30.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task219_rocstories_title_generation": 27.3175, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 45.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 47.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 56.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 58.5333, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 43.9491, + "eval_rouge1_for_task288_gigaword_title_generation": 32.6635, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 16.919, + "eval_rouge1_for_task329_gap_coreference_resolution": 36.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 53.9714, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 47.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 81.6667, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 84.6667, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 30.2199, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 34.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 70.0598, + "eval_rouge1_for_task418_persent_title_generation": 29.1423, + "eval_rouge1_for_task442_com_qa_question_rewriting": 70.9807, + "eval_rouge1_for_task500_scruples_title_generation": 16.4504, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 40.507, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 60.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 37.7043, + "eval_rouge1_for_task602_wikitext_title_generation": 15.8319, + "eval_rouge1_for_task613_liar_keyword_tagging": 30.8333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 21.1564, + "eval_rouge1_for_task619_ohsumed_title_generation": 44.8068, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 42.3654, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 27.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 28.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 49.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 93.4476, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 6.8333, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 72.141, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 52.5441, + "eval_rouge1_for_task677_ollie_data_to_text": 23.7202, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 62.0, + "eval_rouge1_for_task743_eurlex_title_generation": 36.6082, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 5.9941, + "eval_rouge1_for_task769_qed_title_generation": 76.7095, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 55.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 40.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 38.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 54.6667, + "eval_rouge1_for_task892_gap_coreference_resolution": 44.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 42.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_rouge1_for_task957_e2e_data_to_text": 50.6362, + "eval_rouge1_for_task970_sherliic_textual_entailment": 54.0, + "eval_rouge1_for_textual_entailment": 43.7222, + "eval_rouge1_for_title_generation": 35.4491, + "eval_rouge1_for_word_analogy": 26.7917, + "eval_rougeL": 45.7501, + "eval_rougeL_for_answerability_classification": 53.5641, + "eval_rougeL_for_cause_effect_classification": 52.4711, + "eval_rougeL_for_coreference_resolution": 42.8405, + "eval_rougeL_for_data_to_text": 45.0226, + "eval_rougeL_for_dialogue_act_recognition": 53.4143, + "eval_rougeL_for_grammar_error_correction": 69.4211, + "eval_rougeL_for_keyword_tagging": 57.5159, + "eval_rougeL_for_overlap_extraction": 37.2562, + "eval_rougeL_for_question_rewriting": 63.5044, + "eval_rougeL_for_task020_mctaco_answerability_classification": 52.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 39.7333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 91.2663, + "eval_rougeL_for_task035_winogrande_question_rewriting": 83.9218, + "eval_rougeL_for_task036_qasc_keyword_tagging": 73.4205, + "eval_rougeL_for_task039_qasc_overlap_extraction": 32.3, + "eval_rougeL_for_task050_multirc_answerability_classification": 57.0, + "eval_rougeL_for_task102_commongen_data_to_text": 56.0243, + "eval_rougeL_for_task1152_bard_word_analogy": 17.0, + "eval_rougeL_for_task1153_bard_word_analogy": 20.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 15.0, + "eval_rougeL_for_task1155_bard_word_analogy": 53.0, + "eval_rougeL_for_task1156_bard_word_analogy": 30.0, + "eval_rougeL_for_task1157_bard_word_analogy": 47.0, + "eval_rougeL_for_task1158_bard_word_analogy": 19.0, + "eval_rougeL_for_task1159_bard_word_analogy": 12.6667, + "eval_rougeL_for_task1161_coda_19_title_generation": 29.0479, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 80.1384, + "eval_rougeL_for_task121_zest_question_rewriting": 39.8935, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 15.6878, + "eval_rougeL_for_task1344_rte_textual_entailment": 48.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.5178, + "eval_rougeL_for_task1356_xlsum_title_generation": 20.7665, + "eval_rougeL_for_task1358_xlsum_title_generation": 31.7058, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 41.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 43.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 44.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 47.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 64.2333, + "eval_rougeL_for_task1407_dart_data_to_text": 28.6601, + "eval_rougeL_for_task1409_dart_data_to_text": 42.0413, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 52.7284, + "eval_rougeL_for_task1439_doqa_answerability_classification": 52.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 51.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 1.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 52.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 35.1632, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.1138, + "eval_rougeL_for_task1562_zest_question_rewriting": 43.1913, + "eval_rougeL_for_task1586_scifact_title_generation": 32.8312, + "eval_rougeL_for_task1598_nyc_data_to_text": 42.2216, + "eval_rougeL_for_task1612_sick_textual_entailment": 34.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 86.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.7122, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 90.1978, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 46.0, + "eval_rougeL_for_task1659_billsum_title_generation": 29.6294, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 68.7768, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 51.3753, + "eval_rougeL_for_task190_snli_textual_entailment": 49.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 30.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task219_rocstories_title_generation": 27.3175, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 45.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 47.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 56.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 58.5333, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 42.2124, + "eval_rougeL_for_task288_gigaword_title_generation": 28.6512, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 16.919, + "eval_rougeL_for_task329_gap_coreference_resolution": 36.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 53.9714, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 47.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 81.6667, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 84.6667, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 29.556, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 34.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 59.6896, + "eval_rougeL_for_task418_persent_title_generation": 25.562, + "eval_rougeL_for_task442_com_qa_question_rewriting": 62.7158, + "eval_rougeL_for_task500_scruples_title_generation": 15.2917, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 39.771, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 60.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 35.0332, + "eval_rougeL_for_task602_wikitext_title_generation": 15.6407, + "eval_rougeL_for_task613_liar_keyword_tagging": 30.8333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 19.4085, + "eval_rougeL_for_task619_ohsumed_title_generation": 38.9369, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 40.8782, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 27.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 28.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 49.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 93.4476, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 6.8333, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 70.6171, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 48.8841, + "eval_rougeL_for_task677_ollie_data_to_text": 19.6883, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 62.0, + "eval_rougeL_for_task743_eurlex_title_generation": 31.2555, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.4601, + "eval_rougeL_for_task769_qed_title_generation": 76.7095, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 55.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 40.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 38.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 54.6667, + "eval_rougeL_for_task892_gap_coreference_resolution": 44.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 42.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_rougeL_for_task957_e2e_data_to_text": 40.2581, + "eval_rougeL_for_task970_sherliic_textual_entailment": 54.0, + "eval_rougeL_for_textual_entailment": 43.7222, + "eval_rougeL_for_title_generation": 32.3149, + "eval_rougeL_for_word_analogy": 26.7917, + "eval_runtime": 632.4471, + "eval_samples_per_second": 18.832, + "eval_steps_per_second": 0.59, + "step": 4500 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 1.0365, + "step": 5000 + }, + { + "epoch": 1.14, + "eval_exact_match": 28.1528, + "eval_exact_match_for_answerability_classification": 50.7692, + "eval_exact_match_for_cause_effect_classification": 34.5714, + "eval_exact_match_for_coreference_resolution": 36.2857, + "eval_exact_match_for_data_to_text": 7.0218, + "eval_exact_match_for_dialogue_act_recognition": 48.7143, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 41.6, + "eval_exact_match_for_overlap_extraction": 11.5, + "eval_exact_match_for_question_rewriting": 1.9091, + "eval_exact_match_for_task020_mctaco_answerability_classification": 45.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 42.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 7.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 3.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 41.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 23.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 53.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 21.0, + "eval_exact_match_for_task1153_bard_word_analogy": 21.0, + "eval_exact_match_for_task1154_bard_word_analogy": 22.0, + "eval_exact_match_for_task1155_bard_word_analogy": 57.0, + "eval_exact_match_for_task1156_bard_word_analogy": 31.0, + "eval_exact_match_for_task1157_bard_word_analogy": 50.0, + "eval_exact_match_for_task1158_bard_word_analogy": 22.0, + "eval_exact_match_for_task1159_bard_word_analogy": 14.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 6.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 10.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 11.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 2.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 45.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 43.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 52.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 48.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 54.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 14.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 43.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 48.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 50.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 23.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 28.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 41.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 2.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 51.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 45.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 61.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 46.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 9.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 50.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 52.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 47.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 48.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 21.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 2.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 2.0, + "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, + "eval_exact_match_for_task613_liar_keyword_tagging": 17.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 13.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 36.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 42.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 87.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 13.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 3.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 38.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 78.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 54.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 83.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 24.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 46.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 47.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 46.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 66.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 52.0, + "eval_exact_match_for_textual_entailment": 37.3333, + "eval_exact_match_for_title_generation": 8.0157, + "eval_exact_match_for_word_analogy": 29.75, + "eval_f1": 45.4367, + "eval_f1_for_answerability_classification": 53.3333, + "eval_f1_for_cause_effect_classification": 52.5898, + "eval_f1_for_coreference_resolution": 43.6155, + "eval_f1_for_data_to_text": 49.4393, + "eval_f1_for_dialogue_act_recognition": 52.5, + "eval_f1_for_grammar_error_correction": 70.4478, + "eval_f1_for_keyword_tagging": 54.7259, + "eval_f1_for_overlap_extraction": 36.6211, + "eval_f1_for_question_rewriting": 67.0608, + "eval_f1_for_task020_mctaco_answerability_classification": 45.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 43.1667, + "eval_f1_for_task034_winogrande_question_rewriting": 92.3877, + "eval_f1_for_task035_winogrande_question_rewriting": 86.6179, + "eval_f1_for_task036_qasc_keyword_tagging": 70.9107, + "eval_f1_for_task039_qasc_overlap_extraction": 33.4, + "eval_f1_for_task050_multirc_answerability_classification": 53.0, + "eval_f1_for_task102_commongen_data_to_text": 51.5663, + "eval_f1_for_task1152_bard_word_analogy": 21.0, + "eval_f1_for_task1153_bard_word_analogy": 23.0, + "eval_f1_for_task1154_bard_word_analogy": 22.0, + "eval_f1_for_task1155_bard_word_analogy": 57.0, + "eval_f1_for_task1156_bard_word_analogy": 31.0, + "eval_f1_for_task1157_bard_word_analogy": 50.0, + "eval_f1_for_task1158_bard_word_analogy": 22.0, + "eval_f1_for_task1159_bard_word_analogy": 14.0, + "eval_f1_for_task1161_coda_19_title_generation": 34.8133, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.2631, + "eval_f1_for_task121_zest_question_rewriting": 44.4207, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.5992, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.78, + "eval_f1_for_task1356_xlsum_title_generation": 19.5885, + "eval_f1_for_task1358_xlsum_title_generation": 33.0196, + "eval_f1_for_task1385_anli_textual_entailment": 6.0, + "eval_f1_for_task1386_anli_textual_entailment": 10.0, + "eval_f1_for_task1387_anli_textual_entailment": 11.0, + "eval_f1_for_task1388_cb_textual_entailment": 2.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 45.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 43.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 52.0, + "eval_f1_for_task1407_dart_data_to_text": 33.9832, + "eval_f1_for_task1409_dart_data_to_text": 48.6334, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 56.7328, + "eval_f1_for_task1439_doqa_answerability_classification": 48.0, + "eval_f1_for_task1442_doqa_answerability_classification": 54.0, + "eval_f1_for_task1516_imppres_textual_entailment": 14.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 47.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 37.0786, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.1628, + "eval_f1_for_task1562_zest_question_rewriting": 46.6662, + "eval_f1_for_task1586_scifact_title_generation": 36.0023, + "eval_f1_for_task1598_nyc_data_to_text": 48.2728, + "eval_f1_for_task1612_sick_textual_entailment": 43.0, + "eval_f1_for_task1615_sick_textual_entailment": 48.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.7858, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_f1_for_task1631_open_pi_data_to_text": 93.4708, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 34.5981, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 67.201, + "eval_f1_for_task1728_web_nlg_data_to_text": 57.9424, + "eval_f1_for_task190_snli_textual_entailment": 28.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 41.0, + "eval_f1_for_task201_multinli_textual_entailment": 33.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 20.008, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, + "eval_f1_for_task232_iirc_answerability_classification": 51.0, + "eval_f1_for_task233_iirc_answerability_classification": 45.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 61.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 59.5667, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 39.8422, + "eval_f1_for_task288_gigaword_title_generation": 29.3848, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 34.519, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 54.6778, + "eval_f1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 73.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 82.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 82.6667, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.3724, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 24.3333, + "eval_f1_for_task402_grailqa_question_rewriting": 69.3105, + "eval_f1_for_task418_persent_title_generation": 27.9496, + "eval_f1_for_task442_com_qa_question_rewriting": 66.2468, + "eval_f1_for_task500_scruples_title_generation": 17.3691, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 38.2579, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 32.2374, + "eval_f1_for_task602_wikitext_title_generation": 15.579, + "eval_f1_for_task613_liar_keyword_tagging": 19.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 24.756, + "eval_f1_for_task619_ohsumed_title_generation": 40.8678, + "eval_f1_for_task620_ohsumed_keyword_tagging": 38.6048, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 36.0, + "eval_f1_for_task642_e_snli_textual_entailment": 42.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.4476, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 15.0, + "eval_f1_for_task670_ambigqa_question_rewriting": 80.368, + "eval_f1_for_task671_ambigqa_question_rewriting": 53.8218, + "eval_f1_for_task677_ollie_data_to_text": 23.2558, + "eval_f1_for_task738_perspectrum_textual_entailment": 38.0, + "eval_f1_for_task743_eurlex_title_generation": 35.7024, + "eval_f1_for_task760_msr_sqa_data_to_text": 6.9149, + "eval_f1_for_task769_qed_title_generation": 88.5619, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 54.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 83.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 24.0, + "eval_f1_for_task890_gwsd_textual_entailment": 46.0, + "eval_f1_for_task891_gap_coreference_resolution": 55.1524, + "eval_f1_for_task892_gap_coreference_resolution": 33.0, + "eval_f1_for_task893_gap_coreference_resolution": 46.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 66.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_f1_for_task957_e2e_data_to_text": 49.4463, + "eval_f1_for_task970_sherliic_textual_entailment": 52.0, + "eval_f1_for_textual_entailment": 37.3333, + "eval_f1_for_title_generation": 33.7514, + "eval_f1_for_word_analogy": 30.0, + "eval_gen_len": 9.5426, + "eval_global_step": 5000, + "eval_loss": 1.296164631843567, + "eval_rouge1": 48.105, + "eval_rouge1_for_answerability_classification": 53.3333, + "eval_rouge1_for_cause_effect_classification": 52.8671, + "eval_rouge1_for_coreference_resolution": 44.0379, + "eval_rouge1_for_data_to_text": 52.209, + "eval_rouge1_for_dialogue_act_recognition": 55.881, + "eval_rouge1_for_grammar_error_correction": 72.6463, + "eval_rouge1_for_keyword_tagging": 59.0066, + "eval_rouge1_for_overlap_extraction": 39.2492, + "eval_rouge1_for_question_rewriting": 68.6698, + "eval_rouge1_for_task020_mctaco_answerability_classification": 45.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 43.0667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.424, + "eval_rouge1_for_task035_winogrande_question_rewriting": 87.488, + "eval_rouge1_for_task036_qasc_keyword_tagging": 77.0652, + "eval_rouge1_for_task039_qasc_overlap_extraction": 37.3, + "eval_rouge1_for_task050_multirc_answerability_classification": 53.0, + "eval_rouge1_for_task102_commongen_data_to_text": 63.9547, + "eval_rouge1_for_task1152_bard_word_analogy": 21.0, + "eval_rouge1_for_task1153_bard_word_analogy": 23.0, + "eval_rouge1_for_task1154_bard_word_analogy": 22.0, + "eval_rouge1_for_task1155_bard_word_analogy": 57.0, + "eval_rouge1_for_task1156_bard_word_analogy": 31.0, + "eval_rouge1_for_task1157_bard_word_analogy": 50.0, + "eval_rouge1_for_task1158_bard_word_analogy": 22.0, + "eval_rouge1_for_task1159_bard_word_analogy": 14.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 38.0331, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.5268, + "eval_rouge1_for_task121_zest_question_rewriting": 46.9515, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 13.8855, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.8554, + "eval_rouge1_for_task1356_xlsum_title_generation": 22.8307, + "eval_rouge1_for_task1358_xlsum_title_generation": 37.3213, + "eval_rouge1_for_task1385_anli_textual_entailment": 31.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 36.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 38.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 45.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 43.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 60.3333, + "eval_rouge1_for_task1407_dart_data_to_text": 34.1977, + "eval_rouge1_for_task1409_dart_data_to_text": 49.5333, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 57.8389, + "eval_rouge1_for_task1439_doqa_answerability_classification": 48.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 54.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 14.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 47.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 38.9437, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.4536, + "eval_rouge1_for_task1562_zest_question_rewriting": 49.8526, + "eval_rouge1_for_task1586_scifact_title_generation": 39.7964, + "eval_rouge1_for_task1598_nyc_data_to_text": 50.4461, + "eval_rouge1_for_task1612_sick_textual_entailment": 43.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 82.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.0699, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 93.9626, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 36.8004, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 67.201, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 60.3837, + "eval_rouge1_for_task190_snli_textual_entailment": 28.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 41.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 24.6105, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 51.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 45.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 61.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 60.2333, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 41.1984, + "eval_rouge1_for_task288_gigaword_title_generation": 31.8308, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 34.7774, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 54.9667, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 73.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 82.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 82.6667, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.7357, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 29.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 71.1673, + "eval_rouge1_for_task418_persent_title_generation": 31.8261, + "eval_rouge1_for_task442_com_qa_question_rewriting": 70.0883, + "eval_rouge1_for_task500_scruples_title_generation": 19.6828, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 39.4089, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 34.0671, + "eval_rouge1_for_task602_wikitext_title_generation": 16.5028, + "eval_rouge1_for_task613_liar_keyword_tagging": 30.1667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 26.3339, + "eval_rouge1_for_task619_ohsumed_title_generation": 44.2259, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 42.8535, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 36.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 42.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.9476, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 15.0, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 81.1206, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 54.8231, + "eval_rouge1_for_task677_ollie_data_to_text": 25.7758, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 59.0, + "eval_rouge1_for_task743_eurlex_title_generation": 38.144, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 8.0299, + "eval_rouge1_for_task769_qed_title_generation": 88.8286, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 54.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 83.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 39.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 46.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 54.9524, + "eval_rouge1_for_task892_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 46.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 66.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_rouge1_for_task957_e2e_data_to_text": 50.9045, + "eval_rouge1_for_task970_sherliic_textual_entailment": 52.0, + "eval_rouge1_for_textual_entailment": 44.5278, + "eval_rouge1_for_title_generation": 36.1042, + "eval_rouge1_for_word_analogy": 30.0, + "eval_rougeL": 46.738, + "eval_rougeL_for_answerability_classification": 53.3333, + "eval_rougeL_for_cause_effect_classification": 52.5109, + "eval_rougeL_for_coreference_resolution": 44.0379, + "eval_rougeL_for_data_to_text": 45.7825, + "eval_rougeL_for_dialogue_act_recognition": 55.881, + "eval_rougeL_for_grammar_error_correction": 71.3448, + "eval_rougeL_for_keyword_tagging": 58.2515, + "eval_rougeL_for_overlap_extraction": 38.498, + "eval_rougeL_for_question_rewriting": 65.0378, + "eval_rougeL_for_task020_mctaco_answerability_classification": 45.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 43.0667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.1677, + "eval_rougeL_for_task035_winogrande_question_rewriting": 86.2283, + "eval_rougeL_for_task036_qasc_keyword_tagging": 75.8438, + "eval_rougeL_for_task039_qasc_overlap_extraction": 37.3, + "eval_rougeL_for_task050_multirc_answerability_classification": 53.0, + "eval_rougeL_for_task102_commongen_data_to_text": 54.7213, + "eval_rougeL_for_task1152_bard_word_analogy": 21.0, + "eval_rougeL_for_task1153_bard_word_analogy": 23.0, + "eval_rougeL_for_task1154_bard_word_analogy": 22.0, + "eval_rougeL_for_task1155_bard_word_analogy": 57.0, + "eval_rougeL_for_task1156_bard_word_analogy": 31.0, + "eval_rougeL_for_task1157_bard_word_analogy": 50.0, + "eval_rougeL_for_task1158_bard_word_analogy": 22.0, + "eval_rougeL_for_task1159_bard_word_analogy": 14.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 30.691, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.0287, + "eval_rougeL_for_task121_zest_question_rewriting": 41.3346, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.1647, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.8116, + "eval_rougeL_for_task1356_xlsum_title_generation": 19.7001, + "eval_rougeL_for_task1358_xlsum_title_generation": 31.4198, + "eval_rougeL_for_task1385_anli_textual_entailment": 31.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 36.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 38.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 45.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 43.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 60.3333, + "eval_rougeL_for_task1407_dart_data_to_text": 30.7171, + "eval_rougeL_for_task1409_dart_data_to_text": 42.2528, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 56.0871, + "eval_rougeL_for_task1439_doqa_answerability_classification": 48.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 54.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 14.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 47.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 35.5768, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.6024, + "eval_rougeL_for_task1562_zest_question_rewriting": 43.6514, + "eval_rougeL_for_task1586_scifact_title_generation": 32.8351, + "eval_rougeL_for_task1598_nyc_data_to_text": 40.3231, + "eval_rougeL_for_task1612_sick_textual_entailment": 43.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 82.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.918, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 91.9508, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 31.3406, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 67.201, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 53.8979, + "eval_rougeL_for_task190_snli_textual_entailment": 28.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 41.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 24.1382, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 51.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 45.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 61.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 60.2333, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 39.6961, + "eval_rougeL_for_task288_gigaword_title_generation": 27.4026, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 34.7774, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 54.9667, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 73.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 82.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 82.6667, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.0526, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 29.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 59.436, + "eval_rougeL_for_task418_persent_title_generation": 27.8581, + "eval_rougeL_for_task442_com_qa_question_rewriting": 64.9399, + "eval_rougeL_for_task500_scruples_title_generation": 17.5721, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 38.9533, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 31.6028, + "eval_rougeL_for_task602_wikitext_title_generation": 16.3992, + "eval_rougeL_for_task613_liar_keyword_tagging": 30.1667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 24.5237, + "eval_rougeL_for_task619_ohsumed_title_generation": 36.5216, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 40.2996, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 36.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 42.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.9476, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 15.0, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 80.0124, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 50.8876, + "eval_rougeL_for_task677_ollie_data_to_text": 21.5636, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 59.0, + "eval_rougeL_for_task743_eurlex_title_generation": 33.3244, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 7.0125, + "eval_rougeL_for_task769_qed_title_generation": 88.8286, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 54.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 83.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 39.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 46.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 54.9524, + "eval_rougeL_for_task892_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 46.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 66.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_rougeL_for_task957_e2e_data_to_text": 40.9134, + "eval_rougeL_for_task970_sherliic_textual_entailment": 52.0, + "eval_rougeL_for_textual_entailment": 44.5278, + "eval_rougeL_for_title_generation": 32.775, + "eval_rougeL_for_word_analogy": 30.0, + "eval_runtime": 625.647, + "eval_samples_per_second": 19.036, + "eval_steps_per_second": 0.596, + "step": 5000 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 1.0636, + "step": 5500 + }, + { + "epoch": 1.26, + "eval_exact_match": 28.3207, + "eval_exact_match_for_answerability_classification": 51.4615, + "eval_exact_match_for_cause_effect_classification": 37.1429, + "eval_exact_match_for_coreference_resolution": 36.0714, + "eval_exact_match_for_data_to_text": 7.2639, + "eval_exact_match_for_dialogue_act_recognition": 47.7143, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 42.4, + "eval_exact_match_for_overlap_extraction": 13.0, + "eval_exact_match_for_question_rewriting": 2.5455, + "eval_exact_match_for_task020_mctaco_answerability_classification": 48.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 40.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 5.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 5.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 39.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 25.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 57.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 24.0, + "eval_exact_match_for_task1153_bard_word_analogy": 22.0, + "eval_exact_match_for_task1154_bard_word_analogy": 24.0, + "eval_exact_match_for_task1155_bard_word_analogy": 59.0, + "eval_exact_match_for_task1156_bard_word_analogy": 30.0, + "eval_exact_match_for_task1157_bard_word_analogy": 49.0, + "eval_exact_match_for_task1158_bard_word_analogy": 27.0, + "eval_exact_match_for_task1159_bard_word_analogy": 16.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 9.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 4.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 5.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 14.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 2.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 46.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 10.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 39.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 41.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 48.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 48.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 53.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 23.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 40.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 31.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 32.0, + "eval_exact_match_for_task219_rocstories_title_generation": 5.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 43.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 74.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 42.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 1.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 34.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 53.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 45.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 59.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 23.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 4.0, + "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, + "eval_exact_match_for_task613_liar_keyword_tagging": 20.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 11.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 56.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 17.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 45.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 86.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 3.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 23.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 78.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 53.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 53.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 80.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 21.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 43.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 47.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 44.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 45.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 55.0, + "eval_exact_match_for_textual_entailment": 35.8333, + "eval_exact_match_for_title_generation": 8.5762, + "eval_exact_match_for_word_analogy": 31.375, + "eval_f1": 45.4477, + "eval_f1_for_answerability_classification": 54.0256, + "eval_f1_for_cause_effect_classification": 54.1457, + "eval_f1_for_coreference_resolution": 42.9674, + "eval_f1_for_data_to_text": 50.9976, + "eval_f1_for_dialogue_act_recognition": 51.0714, + "eval_f1_for_grammar_error_correction": 67.5752, + "eval_f1_for_keyword_tagging": 54.4264, + "eval_f1_for_overlap_extraction": 40.7295, + "eval_f1_for_question_rewriting": 67.6948, + "eval_f1_for_task020_mctaco_answerability_classification": 48.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 41.8333, + "eval_f1_for_task034_winogrande_question_rewriting": 91.8394, + "eval_f1_for_task035_winogrande_question_rewriting": 84.9882, + "eval_f1_for_task036_qasc_keyword_tagging": 70.4464, + "eval_f1_for_task039_qasc_overlap_extraction": 33.4833, + "eval_f1_for_task050_multirc_answerability_classification": 57.0, + "eval_f1_for_task102_commongen_data_to_text": 56.1743, + "eval_f1_for_task1152_bard_word_analogy": 24.0, + "eval_f1_for_task1153_bard_word_analogy": 25.3333, + "eval_f1_for_task1154_bard_word_analogy": 24.0, + "eval_f1_for_task1155_bard_word_analogy": 59.0, + "eval_f1_for_task1156_bard_word_analogy": 30.0, + "eval_f1_for_task1157_bard_word_analogy": 49.0, + "eval_f1_for_task1158_bard_word_analogy": 27.0, + "eval_f1_for_task1159_bard_word_analogy": 16.0, + "eval_f1_for_task1161_coda_19_title_generation": 33.1589, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.3618, + "eval_f1_for_task121_zest_question_rewriting": 44.1795, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.6006, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 41.3429, + "eval_f1_for_task1356_xlsum_title_generation": 21.1258, + "eval_f1_for_task1358_xlsum_title_generation": 32.0226, + "eval_f1_for_task1385_anli_textual_entailment": 4.0, + "eval_f1_for_task1386_anli_textual_entailment": 5.0, + "eval_f1_for_task1387_anli_textual_entailment": 14.0, + "eval_f1_for_task1388_cb_textual_entailment": 2.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 50.0, + "eval_f1_for_task1407_dart_data_to_text": 32.253, + "eval_f1_for_task1409_dart_data_to_text": 49.3611, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 50.7581, + "eval_f1_for_task1439_doqa_answerability_classification": 46.0, + "eval_f1_for_task1442_doqa_answerability_classification": 50.0, + "eval_f1_for_task1516_imppres_textual_entailment": 10.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 39.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 41.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 34.5065, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.3923, + "eval_f1_for_task1562_zest_question_rewriting": 45.6322, + "eval_f1_for_task1586_scifact_title_generation": 37.3395, + "eval_f1_for_task1598_nyc_data_to_text": 49.147, + "eval_f1_for_task1612_sick_textual_entailment": 48.0, + "eval_f1_for_task1615_sick_textual_entailment": 48.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.294, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_f1_for_task1631_open_pi_data_to_text": 94.5249, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 32.3353, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 67.9468, + "eval_f1_for_task1728_web_nlg_data_to_text": 59.3526, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 40.0, + "eval_f1_for_task201_multinli_textual_entailment": 31.0, + "eval_f1_for_task202_multinli_textual_entailment": 32.0, + "eval_f1_for_task219_rocstories_title_generation": 22.958, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 43.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 50.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 74.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 54.5667, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 47.9757, + "eval_f1_for_task288_gigaword_title_generation": 30.2931, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 19.4667, + "eval_f1_for_task329_gap_coreference_resolution": 34.0, + "eval_f1_for_task330_gap_coreference_resolution": 58.2778, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 76.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 81.6667, + "eval_f1_for_task392_cod3s_cause_effect_classification": 86.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 33.1999, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 26.8333, + "eval_f1_for_task402_grailqa_question_rewriting": 69.5597, + "eval_f1_for_task418_persent_title_generation": 27.0935, + "eval_f1_for_task442_com_qa_question_rewriting": 69.3477, + "eval_f1_for_task500_scruples_title_generation": 16.2607, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 36.7274, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 36.9342, + "eval_f1_for_task602_wikitext_title_generation": 14.72, + "eval_f1_for_task613_liar_keyword_tagging": 20.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 21.8204, + "eval_f1_for_task619_ohsumed_title_generation": 41.8037, + "eval_f1_for_task620_ohsumed_keyword_tagging": 30.5476, + "eval_f1_for_task623_ohsumed_keyword_tagging": 56.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 17.0, + "eval_f1_for_task642_e_snli_textual_entailment": 45.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.4714, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 3.0, + "eval_f1_for_task670_ambigqa_question_rewriting": 75.1438, + "eval_f1_for_task671_ambigqa_question_rewriting": 62.9536, + "eval_f1_for_task677_ollie_data_to_text": 23.8721, + "eval_f1_for_task738_perspectrum_textual_entailment": 23.0, + "eval_f1_for_task743_eurlex_title_generation": 34.9176, + "eval_f1_for_task760_msr_sqa_data_to_text": 8.5719, + "eval_f1_for_task769_qed_title_generation": 89.0201, + "eval_f1_for_task827_copa_cause_effect_classification": 53.0, + "eval_f1_for_task828_copa_cause_effect_classification": 53.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 80.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 21.0, + "eval_f1_for_task890_gwsd_textual_entailment": 43.0, + "eval_f1_for_task891_gap_coreference_resolution": 56.619, + "eval_f1_for_task892_gap_coreference_resolution": 44.0, + "eval_f1_for_task893_gap_coreference_resolution": 45.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_f1_for_task957_e2e_data_to_text": 54.3264, + "eval_f1_for_task970_sherliic_textual_entailment": 55.0, + "eval_f1_for_textual_entailment": 35.8333, + "eval_f1_for_title_generation": 33.8264, + "eval_f1_for_word_analogy": 31.7917, + "eval_gen_len": 10.3366, + "eval_global_step": 5500, + "eval_loss": 1.3314262628555298, + "eval_rouge1": 48.352, + "eval_rouge1_for_answerability_classification": 54.0256, + "eval_rouge1_for_cause_effect_classification": 54.5463, + "eval_rouge1_for_coreference_resolution": 43.4309, + "eval_rouge1_for_data_to_text": 53.7865, + "eval_rouge1_for_dialogue_act_recognition": 54.881, + "eval_rouge1_for_grammar_error_correction": 69.7177, + "eval_rouge1_for_keyword_tagging": 58.79, + "eval_rouge1_for_overlap_extraction": 43.2327, + "eval_rouge1_for_question_rewriting": 69.2377, + "eval_rouge1_for_task020_mctaco_answerability_classification": 48.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 41.7333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 91.8484, + "eval_rouge1_for_task035_winogrande_question_rewriting": 85.8327, + "eval_rouge1_for_task036_qasc_keyword_tagging": 76.4026, + "eval_rouge1_for_task039_qasc_overlap_extraction": 37.2167, + "eval_rouge1_for_task050_multirc_answerability_classification": 57.0, + "eval_rouge1_for_task102_commongen_data_to_text": 68.2019, + "eval_rouge1_for_task1152_bard_word_analogy": 24.0, + "eval_rouge1_for_task1153_bard_word_analogy": 25.3333, + "eval_rouge1_for_task1154_bard_word_analogy": 24.0, + "eval_rouge1_for_task1155_bard_word_analogy": 59.0, + "eval_rouge1_for_task1156_bard_word_analogy": 30.0, + "eval_rouge1_for_task1157_bard_word_analogy": 49.0, + "eval_rouge1_for_task1158_bard_word_analogy": 27.0, + "eval_rouge1_for_task1159_bard_word_analogy": 16.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 36.6346, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.5926, + "eval_rouge1_for_task121_zest_question_rewriting": 46.4355, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.7535, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 44.7805, + "eval_rouge1_for_task1356_xlsum_title_generation": 24.5446, + "eval_rouge1_for_task1358_xlsum_title_generation": 37.1031, + "eval_rouge1_for_task1385_anli_textual_entailment": 29.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 35.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 42.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 42.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 59.3333, + "eval_rouge1_for_task1407_dart_data_to_text": 32.0862, + "eval_rouge1_for_task1409_dart_data_to_text": 50.8306, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 52.1365, + "eval_rouge1_for_task1439_doqa_answerability_classification": 46.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 10.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 39.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 41.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 36.7692, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.299, + "eval_rouge1_for_task1562_zest_question_rewriting": 48.8483, + "eval_rouge1_for_task1586_scifact_title_generation": 41.2503, + "eval_rouge1_for_task1598_nyc_data_to_text": 51.7574, + "eval_rouge1_for_task1612_sick_textual_entailment": 48.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 82.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.5793, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 94.9398, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 34.372, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 67.9468, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 61.5922, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 40.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 31.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 32.0, + "eval_rouge1_for_task219_rocstories_title_generation": 26.4565, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 43.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 74.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 55.2333, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 49.2487, + "eval_rouge1_for_task288_gigaword_title_generation": 32.5894, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 19.8, + "eval_rouge1_for_task329_gap_coreference_resolution": 34.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 58.4, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 76.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 81.6667, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 86.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.4674, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 32.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 71.4025, + "eval_rouge1_for_task418_persent_title_generation": 30.6258, + "eval_rouge1_for_task442_com_qa_question_rewriting": 72.927, + "eval_rouge1_for_task500_scruples_title_generation": 18.3526, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 37.3593, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 37.6357, + "eval_rouge1_for_task602_wikitext_title_generation": 15.813, + "eval_rouge1_for_task613_liar_keyword_tagging": 31.8333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 24.3567, + "eval_rouge1_for_task619_ohsumed_title_generation": 45.4887, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 34.7428, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 56.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 17.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 45.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.9714, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 3.0, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 75.7524, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 63.6149, + "eval_rouge1_for_task677_ollie_data_to_text": 26.0946, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 64.0, + "eval_rouge1_for_task743_eurlex_title_generation": 37.1219, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 8.8181, + "eval_rouge1_for_task769_qed_title_generation": 89.2868, + "eval_rouge1_for_task827_copa_cause_effect_classification": 53.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 53.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 80.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 38.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 43.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 56.7524, + "eval_rouge1_for_task892_gap_coreference_resolution": 44.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 45.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_rouge1_for_task957_e2e_data_to_text": 56.481, + "eval_rouge1_for_task970_sherliic_textual_entailment": 55.0, + "eval_rouge1_for_textual_entailment": 44.1111, + "eval_rouge1_for_title_generation": 36.0777, + "eval_rouge1_for_word_analogy": 31.7917, + "eval_rougeL": 46.8913, + "eval_rougeL_for_answerability_classification": 54.0256, + "eval_rougeL_for_cause_effect_classification": 54.0802, + "eval_rougeL_for_coreference_resolution": 43.4309, + "eval_rougeL_for_data_to_text": 46.1049, + "eval_rougeL_for_dialogue_act_recognition": 54.881, + "eval_rougeL_for_grammar_error_correction": 68.3572, + "eval_rougeL_for_keyword_tagging": 58.1654, + "eval_rougeL_for_overlap_extraction": 42.1846, + "eval_rougeL_for_question_rewriting": 65.5419, + "eval_rougeL_for_task020_mctaco_answerability_classification": 48.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 41.7333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 91.8067, + "eval_rougeL_for_task035_winogrande_question_rewriting": 84.2666, + "eval_rougeL_for_task036_qasc_keyword_tagging": 74.9999, + "eval_rougeL_for_task039_qasc_overlap_extraction": 37.2167, + "eval_rougeL_for_task050_multirc_answerability_classification": 57.0, + "eval_rougeL_for_task102_commongen_data_to_text": 57.7375, + "eval_rougeL_for_task1152_bard_word_analogy": 24.0, + "eval_rougeL_for_task1153_bard_word_analogy": 25.3333, + "eval_rougeL_for_task1154_bard_word_analogy": 24.0, + "eval_rougeL_for_task1155_bard_word_analogy": 59.0, + "eval_rougeL_for_task1156_bard_word_analogy": 30.0, + "eval_rougeL_for_task1157_bard_word_analogy": 49.0, + "eval_rougeL_for_task1158_bard_word_analogy": 27.0, + "eval_rougeL_for_task1159_bard_word_analogy": 16.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 30.3136, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.7901, + "eval_rougeL_for_task121_zest_question_rewriting": 39.9684, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.1909, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 41.3962, + "eval_rougeL_for_task1356_xlsum_title_generation": 21.4213, + "eval_rougeL_for_task1358_xlsum_title_generation": 31.1146, + "eval_rougeL_for_task1385_anli_textual_entailment": 29.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 35.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 42.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 42.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 59.3333, + "eval_rougeL_for_task1407_dart_data_to_text": 27.5795, + "eval_rougeL_for_task1409_dart_data_to_text": 41.6942, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 50.311, + "eval_rougeL_for_task1439_doqa_answerability_classification": 46.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 10.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 39.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 41.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 33.6336, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.4033, + "eval_rougeL_for_task1562_zest_question_rewriting": 42.2956, + "eval_rougeL_for_task1586_scifact_title_generation": 33.9445, + "eval_rougeL_for_task1598_nyc_data_to_text": 40.8225, + "eval_rougeL_for_task1612_sick_textual_entailment": 48.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 82.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.5386, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 92.961, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 29.0811, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 67.9468, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 53.9119, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 40.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 31.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 32.0, + "eval_rougeL_for_task219_rocstories_title_generation": 26.2343, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 43.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 74.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 55.2333, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 47.1525, + "eval_rougeL_for_task288_gigaword_title_generation": 28.2534, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 19.8, + "eval_rougeL_for_task329_gap_coreference_resolution": 34.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 58.4, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 76.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 81.6667, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 86.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 32.3882, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 32.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 59.3227, + "eval_rougeL_for_task418_persent_title_generation": 26.5499, + "eval_rougeL_for_task442_com_qa_question_rewriting": 68.155, + "eval_rougeL_for_task500_scruples_title_generation": 16.5869, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 36.9452, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 34.8112, + "eval_rougeL_for_task602_wikitext_title_generation": 15.7048, + "eval_rougeL_for_task613_liar_keyword_tagging": 31.8333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 22.173, + "eval_rougeL_for_task619_ohsumed_title_generation": 37.1683, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 33.0223, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 56.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 17.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 45.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.9714, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 3.0, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 73.8172, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 61.6042, + "eval_rougeL_for_task677_ollie_data_to_text": 21.3877, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 64.0, + "eval_rougeL_for_task743_eurlex_title_generation": 32.2337, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 7.9644, + "eval_rougeL_for_task769_qed_title_generation": 89.2868, + "eval_rougeL_for_task827_copa_cause_effect_classification": 53.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 53.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 80.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 38.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 43.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 56.7524, + "eval_rougeL_for_task892_gap_coreference_resolution": 44.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 45.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_rougeL_for_task957_e2e_data_to_text": 42.6617, + "eval_rougeL_for_task970_sherliic_textual_entailment": 55.0, + "eval_rougeL_for_textual_entailment": 44.1111, + "eval_rougeL_for_title_generation": 32.7893, + "eval_rougeL_for_word_analogy": 31.7917, + "eval_runtime": 629.3986, + "eval_samples_per_second": 18.923, + "eval_steps_per_second": 0.593, + "step": 5500 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 1.0766, + "step": 6000 + }, + { + "epoch": 1.37, + "eval_exact_match": 28.0269, + "eval_exact_match_for_answerability_classification": 52.4615, + "eval_exact_match_for_cause_effect_classification": 36.7143, + "eval_exact_match_for_coreference_resolution": 33.0714, + "eval_exact_match_for_data_to_text": 6.5375, + "eval_exact_match_for_dialogue_act_recognition": 46.7143, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 41.2, + "eval_exact_match_for_overlap_extraction": 10.5, + "eval_exact_match_for_question_rewriting": 2.2727, + "eval_exact_match_for_task020_mctaco_answerability_classification": 52.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 42.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 7.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 5.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 27.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 21.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 52.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 25.0, + "eval_exact_match_for_task1153_bard_word_analogy": 23.0, + "eval_exact_match_for_task1154_bard_word_analogy": 23.0, + "eval_exact_match_for_task1155_bard_word_analogy": 71.0, + "eval_exact_match_for_task1156_bard_word_analogy": 33.0, + "eval_exact_match_for_task1157_bard_word_analogy": 46.0, + "eval_exact_match_for_task1158_bard_word_analogy": 15.0, + "eval_exact_match_for_task1159_bard_word_analogy": 22.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 6.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 51.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 2.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 5.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 17.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 1.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 51.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 53.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 47.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 47.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 10.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 32.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 46.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 54.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 38.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 51.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 48.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 60.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 6.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, + "eval_exact_match_for_task190_snli_textual_entailment": 38.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 47.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 31.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 21.0, + "eval_exact_match_for_task219_rocstories_title_generation": 4.0, + "eval_exact_match_for_task220_rocstories_title_generation": 69.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 48.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 46.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 55.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 43.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 5.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 53.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 52.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 43.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 54.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 21.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 68.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 3.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 18.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 10.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 62.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 89.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 2.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 26.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 76.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 49.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 60.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 64.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 26.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 51.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 47.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 28.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 40.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 54.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 56.0, + "eval_exact_match_for_textual_entailment": 36.125, + "eval_exact_match_for_title_generation": 9.1928, + "eval_exact_match_for_word_analogy": 32.25, + "eval_f1": 45.4595, + "eval_f1_for_answerability_classification": 55.0256, + "eval_f1_for_cause_effect_classification": 54.2028, + "eval_f1_for_coreference_resolution": 40.3919, + "eval_f1_for_data_to_text": 51.182, + "eval_f1_for_dialogue_act_recognition": 50.1429, + "eval_f1_for_grammar_error_correction": 71.2829, + "eval_f1_for_keyword_tagging": 55.8095, + "eval_f1_for_overlap_extraction": 34.2242, + "eval_f1_for_question_rewriting": 68.9486, + "eval_f1_for_task020_mctaco_answerability_classification": 52.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 44.5, + "eval_f1_for_task034_winogrande_question_rewriting": 92.1732, + "eval_f1_for_task035_winogrande_question_rewriting": 86.5248, + "eval_f1_for_task036_qasc_keyword_tagging": 67.8332, + "eval_f1_for_task039_qasc_overlap_extraction": 30.3722, + "eval_f1_for_task050_multirc_answerability_classification": 52.0, + "eval_f1_for_task102_commongen_data_to_text": 54.3596, + "eval_f1_for_task1152_bard_word_analogy": 25.0, + "eval_f1_for_task1153_bard_word_analogy": 24.3333, + "eval_f1_for_task1154_bard_word_analogy": 23.0, + "eval_f1_for_task1155_bard_word_analogy": 71.0, + "eval_f1_for_task1156_bard_word_analogy": 33.0, + "eval_f1_for_task1157_bard_word_analogy": 46.0, + "eval_f1_for_task1158_bard_word_analogy": 15.0, + "eval_f1_for_task1159_bard_word_analogy": 22.0, + "eval_f1_for_task1161_coda_19_title_generation": 35.0089, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.3769, + "eval_f1_for_task121_zest_question_rewriting": 44.1604, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 15.4822, + "eval_f1_for_task1344_rte_textual_entailment": 51.0, + "eval_f1_for_task1345_qqp_question_rewriting": 41.0743, + "eval_f1_for_task1356_xlsum_title_generation": 18.8744, + "eval_f1_for_task1358_xlsum_title_generation": 30.8694, + "eval_f1_for_task1385_anli_textual_entailment": 2.0, + "eval_f1_for_task1386_anli_textual_entailment": 5.0, + "eval_f1_for_task1387_anli_textual_entailment": 17.0, + "eval_f1_for_task1388_cb_textual_entailment": 1.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 51.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 53.0, + "eval_f1_for_task1407_dart_data_to_text": 33.7542, + "eval_f1_for_task1409_dart_data_to_text": 51.5918, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 58.8969, + "eval_f1_for_task1439_doqa_answerability_classification": 47.0, + "eval_f1_for_task1442_doqa_answerability_classification": 47.0, + "eval_f1_for_task1516_imppres_textual_entailment": 10.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 32.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 46.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 54.0, + "eval_f1_for_task1540_peer_read_title_generation": 33.2873, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.6688, + "eval_f1_for_task1562_zest_question_rewriting": 52.2584, + "eval_f1_for_task1586_scifact_title_generation": 34.9894, + "eval_f1_for_task1598_nyc_data_to_text": 48.8545, + "eval_f1_for_task1612_sick_textual_entailment": 38.0, + "eval_f1_for_task1615_sick_textual_entailment": 51.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.9738, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_f1_for_task1631_open_pi_data_to_text": 94.4333, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 60.0, + "eval_f1_for_task1659_billsum_title_generation": 32.2202, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 61.1891, + "eval_f1_for_task1728_web_nlg_data_to_text": 58.4826, + "eval_f1_for_task190_snli_textual_entailment": 38.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 47.0, + "eval_f1_for_task201_multinli_textual_entailment": 31.0, + "eval_f1_for_task202_multinli_textual_entailment": 21.0, + "eval_f1_for_task219_rocstories_title_generation": 21.5549, + "eval_f1_for_task220_rocstories_title_generation": 69.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_f1_for_task232_iirc_answerability_classification": 48.0, + "eval_f1_for_task233_iirc_answerability_classification": 46.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 55.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 54.9667, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 38.0762, + "eval_f1_for_task288_gigaword_title_generation": 27.8803, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 16.3667, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 54.0111, + "eval_f1_for_task349_squad2.0_answerability_classification": 53.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 76.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 81.0, + "eval_f1_for_task392_cod3s_cause_effect_classification": 84.6667, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.5939, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 26.5, + "eval_f1_for_task402_grailqa_question_rewriting": 70.1321, + "eval_f1_for_task418_persent_title_generation": 24.5005, + "eval_f1_for_task442_com_qa_question_rewriting": 69.8845, + "eval_f1_for_task500_scruples_title_generation": 15.8078, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 38.8516, + "eval_f1_for_task520_aquamuse_answerability_classification": 68.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 32.3771, + "eval_f1_for_task602_wikitext_title_generation": 14.4183, + "eval_f1_for_task613_liar_keyword_tagging": 20.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 22.1589, + "eval_f1_for_task619_ohsumed_title_generation": 42.9072, + "eval_f1_for_task620_ohsumed_keyword_tagging": 34.5905, + "eval_f1_for_task623_ohsumed_keyword_tagging": 62.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.6238, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 2.6667, + "eval_f1_for_task670_ambigqa_question_rewriting": 79.9794, + "eval_f1_for_task671_ambigqa_question_rewriting": 62.897, + "eval_f1_for_task677_ollie_data_to_text": 23.3422, + "eval_f1_for_task738_perspectrum_textual_entailment": 26.0, + "eval_f1_for_task743_eurlex_title_generation": 32.0704, + "eval_f1_for_task760_msr_sqa_data_to_text": 8.2167, + "eval_f1_for_task769_qed_title_generation": 87.6005, + "eval_f1_for_task827_copa_cause_effect_classification": 49.0, + "eval_f1_for_task828_copa_cause_effect_classification": 60.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 64.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 26.0, + "eval_f1_for_task890_gwsd_textual_entailment": 51.0, + "eval_f1_for_task891_gap_coreference_resolution": 57.2857, + "eval_f1_for_task892_gap_coreference_resolution": 28.0, + "eval_f1_for_task893_gap_coreference_resolution": 40.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 54.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_f1_for_task957_e2e_data_to_text": 55.8088, + "eval_f1_for_task970_sherliic_textual_entailment": 56.0, + "eval_f1_for_textual_entailment": 36.125, + "eval_f1_for_title_generation": 33.9346, + "eval_f1_for_word_analogy": 32.4167, + "eval_gen_len": 10.6469, + "eval_global_step": 6000, + "eval_loss": 1.2738133668899536, + "eval_rouge1": 48.3309, + "eval_rouge1_for_answerability_classification": 55.0256, + "eval_rouge1_for_cause_effect_classification": 54.5021, + "eval_rouge1_for_coreference_resolution": 40.9863, + "eval_rouge1_for_data_to_text": 54.1235, + "eval_rouge1_for_dialogue_act_recognition": 53.6667, + "eval_rouge1_for_grammar_error_correction": 73.7759, + "eval_rouge1_for_keyword_tagging": 60.4214, + "eval_rouge1_for_overlap_extraction": 37.398, + "eval_rouge1_for_question_rewriting": 70.5105, + "eval_rouge1_for_task020_mctaco_answerability_classification": 52.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 44.4, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.2197, + "eval_rouge1_for_task035_winogrande_question_rewriting": 87.4233, + "eval_rouge1_for_task036_qasc_keyword_tagging": 70.2702, + "eval_rouge1_for_task039_qasc_overlap_extraction": 35.1579, + "eval_rouge1_for_task050_multirc_answerability_classification": 52.0, + "eval_rouge1_for_task102_commongen_data_to_text": 67.0995, + "eval_rouge1_for_task1152_bard_word_analogy": 25.0, + "eval_rouge1_for_task1153_bard_word_analogy": 24.3333, + "eval_rouge1_for_task1154_bard_word_analogy": 23.0, + "eval_rouge1_for_task1155_bard_word_analogy": 71.0, + "eval_rouge1_for_task1156_bard_word_analogy": 33.0, + "eval_rouge1_for_task1157_bard_word_analogy": 46.0, + "eval_rouge1_for_task1158_bard_word_analogy": 15.0, + "eval_rouge1_for_task1159_bard_word_analogy": 22.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 38.781, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.6412, + "eval_rouge1_for_task121_zest_question_rewriting": 46.8658, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.9585, + "eval_rouge1_for_task1344_rte_textual_entailment": 51.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 44.4475, + "eval_rouge1_for_task1356_xlsum_title_generation": 21.3696, + "eval_rouge1_for_task1358_xlsum_title_generation": 35.2499, + "eval_rouge1_for_task1385_anli_textual_entailment": 30.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 35.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 41.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 51.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 62.3333, + "eval_rouge1_for_task1407_dart_data_to_text": 33.973, + "eval_rouge1_for_task1409_dart_data_to_text": 53.0372, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 60.5633, + "eval_rouge1_for_task1439_doqa_answerability_classification": 47.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 47.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 10.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 32.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 46.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 54.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 35.8096, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.9884, + "eval_rouge1_for_task1562_zest_question_rewriting": 55.7679, + "eval_rouge1_for_task1586_scifact_title_generation": 39.1855, + "eval_rouge1_for_task1598_nyc_data_to_text": 51.1162, + "eval_rouge1_for_task1612_sick_textual_entailment": 38.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 83.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.1905, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 94.6329, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 60.0, + "eval_rouge1_for_task1659_billsum_title_generation": 34.142, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 61.1891, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 60.5926, + "eval_rouge1_for_task190_snli_textual_entailment": 38.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 47.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 31.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 21.0, + "eval_rouge1_for_task219_rocstories_title_generation": 26.0907, + "eval_rouge1_for_task220_rocstories_title_generation": 69.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 48.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 46.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 55.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 55.6333, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 39.6381, + "eval_rouge1_for_task288_gigaword_title_generation": 30.7291, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 16.7, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 54.3, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 53.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 76.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 81.0, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 84.6667, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.805, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 33.8333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 71.8847, + "eval_rouge1_for_task418_persent_title_generation": 27.8423, + "eval_rouge1_for_task442_com_qa_question_rewriting": 73.0957, + "eval_rouge1_for_task500_scruples_title_generation": 17.4188, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 39.5144, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 68.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 34.747, + "eval_rouge1_for_task602_wikitext_title_generation": 15.5983, + "eval_rouge1_for_task613_liar_keyword_tagging": 33.5, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 24.0429, + "eval_rouge1_for_task619_ohsumed_title_generation": 46.04, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 41.213, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 62.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.1238, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 2.6667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.4938, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 63.5857, + "eval_rouge1_for_task677_ollie_data_to_text": 26.3673, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 61.0, + "eval_rouge1_for_task743_eurlex_title_generation": 33.9118, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 8.8565, + "eval_rouge1_for_task769_qed_title_generation": 87.8672, + "eval_rouge1_for_task827_copa_cause_effect_classification": 49.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 60.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 64.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 41.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 51.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 57.0857, + "eval_rouge1_for_task892_gap_coreference_resolution": 28.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 40.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 54.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_rouge1_for_task957_e2e_data_to_text": 57.9388, + "eval_rouge1_for_task970_sherliic_textual_entailment": 56.0, + "eval_rouge1_for_textual_entailment": 44.0278, + "eval_rouge1_for_title_generation": 36.2534, + "eval_rouge1_for_word_analogy": 32.4167, + "eval_rougeL": 46.8495, + "eval_rougeL_for_answerability_classification": 55.0256, + "eval_rougeL_for_cause_effect_classification": 54.124, + "eval_rougeL_for_coreference_resolution": 40.9863, + "eval_rougeL_for_data_to_text": 46.3933, + "eval_rougeL_for_dialogue_act_recognition": 53.6667, + "eval_rougeL_for_grammar_error_correction": 72.6324, + "eval_rougeL_for_keyword_tagging": 59.6292, + "eval_rougeL_for_overlap_extraction": 36.5175, + "eval_rougeL_for_question_rewriting": 66.82, + "eval_rougeL_for_task020_mctaco_answerability_classification": 52.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 44.4, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.2197, + "eval_rougeL_for_task035_winogrande_question_rewriting": 85.633, + "eval_rougeL_for_task036_qasc_keyword_tagging": 68.0962, + "eval_rougeL_for_task039_qasc_overlap_extraction": 35.1579, + "eval_rougeL_for_task050_multirc_answerability_classification": 52.0, + "eval_rougeL_for_task102_commongen_data_to_text": 57.3583, + "eval_rougeL_for_task1152_bard_word_analogy": 25.0, + "eval_rougeL_for_task1153_bard_word_analogy": 24.3333, + "eval_rougeL_for_task1154_bard_word_analogy": 23.0, + "eval_rougeL_for_task1155_bard_word_analogy": 71.0, + "eval_rougeL_for_task1156_bard_word_analogy": 33.0, + "eval_rougeL_for_task1157_bard_word_analogy": 46.0, + "eval_rougeL_for_task1158_bard_word_analogy": 15.0, + "eval_rougeL_for_task1159_bard_word_analogy": 22.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 31.4802, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.7061, + "eval_rougeL_for_task121_zest_question_rewriting": 40.4143, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 15.5447, + "eval_rougeL_for_task1344_rte_textual_entailment": 51.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 41.1666, + "eval_rougeL_for_task1356_xlsum_title_generation": 18.228, + "eval_rougeL_for_task1358_xlsum_title_generation": 28.1512, + "eval_rougeL_for_task1385_anli_textual_entailment": 30.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 35.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 41.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 51.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 62.3333, + "eval_rougeL_for_task1407_dart_data_to_text": 29.8556, + "eval_rougeL_for_task1409_dart_data_to_text": 42.7145, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 59.1858, + "eval_rougeL_for_task1439_doqa_answerability_classification": 47.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 47.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 10.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 32.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 46.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 54.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 32.576, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.079, + "eval_rougeL_for_task1562_zest_question_rewriting": 48.495, + "eval_rougeL_for_task1586_scifact_title_generation": 31.4015, + "eval_rougeL_for_task1598_nyc_data_to_text": 39.0476, + "eval_rougeL_for_task1612_sick_textual_entailment": 38.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 83.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.9401, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 92.6601, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 60.0, + "eval_rougeL_for_task1659_billsum_title_generation": 28.884, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 61.1891, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 53.5393, + "eval_rougeL_for_task190_snli_textual_entailment": 38.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 47.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 31.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 21.0, + "eval_rougeL_for_task219_rocstories_title_generation": 25.8407, + "eval_rougeL_for_task220_rocstories_title_generation": 69.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 48.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 46.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 55.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 55.6333, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 37.8771, + "eval_rougeL_for_task288_gigaword_title_generation": 26.5324, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 16.7, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 54.3, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 53.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 76.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 81.0, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 84.6667, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.1535, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 33.8333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 61.017, + "eval_rougeL_for_task418_persent_title_generation": 23.4888, + "eval_rougeL_for_task442_com_qa_question_rewriting": 67.7203, + "eval_rougeL_for_task500_scruples_title_generation": 15.5694, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 38.9913, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 68.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 32.6277, + "eval_rougeL_for_task602_wikitext_title_generation": 15.5983, + "eval_rougeL_for_task613_liar_keyword_tagging": 33.5, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 22.0481, + "eval_rougeL_for_task619_ohsumed_title_generation": 37.1787, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 39.4258, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 62.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.1238, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 2.6667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 79.2626, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 61.4452, + "eval_rougeL_for_task677_ollie_data_to_text": 21.9575, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 61.0, + "eval_rougeL_for_task743_eurlex_title_generation": 28.9585, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 7.4496, + "eval_rougeL_for_task769_qed_title_generation": 87.8672, + "eval_rougeL_for_task827_copa_cause_effect_classification": 49.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 60.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 64.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 41.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 51.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 57.0857, + "eval_rougeL_for_task892_gap_coreference_resolution": 28.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 40.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 54.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_rougeL_for_task957_e2e_data_to_text": 44.1387, + "eval_rougeL_for_task970_sherliic_textual_entailment": 56.0, + "eval_rougeL_for_textual_entailment": 44.0278, + "eval_rougeL_for_title_generation": 32.8152, + "eval_rougeL_for_word_analogy": 32.4167, + "eval_runtime": 641.6298, + "eval_samples_per_second": 18.562, + "eval_steps_per_second": 0.581, + "step": 6000 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 1.038, + "step": 6500 + }, + { + "epoch": 1.49, + "eval_exact_match": 28.4215, + "eval_exact_match_for_answerability_classification": 52.6154, + "eval_exact_match_for_cause_effect_classification": 36.4286, + "eval_exact_match_for_coreference_resolution": 35.1429, + "eval_exact_match_for_data_to_text": 7.0218, + "eval_exact_match_for_dialogue_act_recognition": 49.8571, + "eval_exact_match_for_grammar_error_correction": 8.0, + "eval_exact_match_for_keyword_tagging": 43.6, + "eval_exact_match_for_overlap_extraction": 11.0, + "eval_exact_match_for_question_rewriting": 3.0, + "eval_exact_match_for_task020_mctaco_answerability_classification": 46.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 42.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 7.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 7.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 35.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 22.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 56.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 28.0, + "eval_exact_match_for_task1153_bard_word_analogy": 18.0, + "eval_exact_match_for_task1154_bard_word_analogy": 24.0, + "eval_exact_match_for_task1155_bard_word_analogy": 69.0, + "eval_exact_match_for_task1156_bard_word_analogy": 32.0, + "eval_exact_match_for_task1157_bard_word_analogy": 51.0, + "eval_exact_match_for_task1158_bard_word_analogy": 18.0, + "eval_exact_match_for_task1159_bard_word_analogy": 14.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 7.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 2.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 2.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 9.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 0.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 52.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 44.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 48.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 8.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 34.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 16.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 37.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 38.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 52.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 51.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 63.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 8.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 35.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 41.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 35.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 32.0, + "eval_exact_match_for_task219_rocstories_title_generation": 4.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 54.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 39.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 71.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 48.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 4.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 54.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 55.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 44.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 49.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 21.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 6.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 61.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 4.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 16.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 15.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 65.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 30.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 45.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 87.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 6.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 3.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 31.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 81.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 60.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 77.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 45.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 45.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 44.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 41.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 58.0, + "eval_exact_match_for_textual_entailment": 35.4583, + "eval_exact_match_for_title_generation": 8.5762, + "eval_exact_match_for_word_analogy": 31.75, + "eval_f1": 45.4341, + "eval_f1_for_answerability_classification": 55.1795, + "eval_f1_for_cause_effect_classification": 53.9952, + "eval_f1_for_coreference_resolution": 42.7553, + "eval_f1_for_data_to_text": 49.2388, + "eval_f1_for_dialogue_act_recognition": 52.9286, + "eval_f1_for_grammar_error_correction": 69.9322, + "eval_f1_for_keyword_tagging": 56.8487, + "eval_f1_for_overlap_extraction": 35.1296, + "eval_f1_for_question_rewriting": 67.4803, + "eval_f1_for_task020_mctaco_answerability_classification": 46.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 43.8333, + "eval_f1_for_task034_winogrande_question_rewriting": 92.2027, + "eval_f1_for_task035_winogrande_question_rewriting": 84.2853, + "eval_f1_for_task036_qasc_keyword_tagging": 69.7244, + "eval_f1_for_task039_qasc_overlap_extraction": 28.5667, + "eval_f1_for_task050_multirc_answerability_classification": 56.0, + "eval_f1_for_task102_commongen_data_to_text": 53.9525, + "eval_f1_for_task1152_bard_word_analogy": 28.0, + "eval_f1_for_task1153_bard_word_analogy": 21.3333, + "eval_f1_for_task1154_bard_word_analogy": 24.0, + "eval_f1_for_task1155_bard_word_analogy": 69.0, + "eval_f1_for_task1156_bard_word_analogy": 32.0, + "eval_f1_for_task1157_bard_word_analogy": 51.0, + "eval_f1_for_task1158_bard_word_analogy": 18.0, + "eval_f1_for_task1159_bard_word_analogy": 14.0, + "eval_f1_for_task1161_coda_19_title_generation": 33.6243, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.1057, + "eval_f1_for_task121_zest_question_rewriting": 42.9374, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.9771, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.9935, + "eval_f1_for_task1356_xlsum_title_generation": 19.7272, + "eval_f1_for_task1358_xlsum_title_generation": 33.3745, + "eval_f1_for_task1385_anli_textual_entailment": 2.0, + "eval_f1_for_task1386_anli_textual_entailment": 2.0, + "eval_f1_for_task1387_anli_textual_entailment": 9.0, + "eval_f1_for_task1388_cb_textual_entailment": 0.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 52.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 49.0, + "eval_f1_for_task1407_dart_data_to_text": 33.6801, + "eval_f1_for_task1409_dart_data_to_text": 45.5292, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 55.451, + "eval_f1_for_task1439_doqa_answerability_classification": 44.0, + "eval_f1_for_task1442_doqa_answerability_classification": 48.0, + "eval_f1_for_task1516_imppres_textual_entailment": 8.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 34.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 35.8609, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.4133, + "eval_f1_for_task1562_zest_question_rewriting": 48.2379, + "eval_f1_for_task1586_scifact_title_generation": 35.2204, + "eval_f1_for_task1598_nyc_data_to_text": 49.8024, + "eval_f1_for_task1612_sick_textual_entailment": 37.0, + "eval_f1_for_task1615_sick_textual_entailment": 38.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.0696, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 52.0, + "eval_f1_for_task1631_open_pi_data_to_text": 94.0296, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 63.0, + "eval_f1_for_task1659_billsum_title_generation": 33.4021, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 60.7438, + "eval_f1_for_task1728_web_nlg_data_to_text": 57.3937, + "eval_f1_for_task190_snli_textual_entailment": 35.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 41.0, + "eval_f1_for_task201_multinli_textual_entailment": 35.0, + "eval_f1_for_task202_multinli_textual_entailment": 32.0, + "eval_f1_for_task219_rocstories_title_generation": 20.4541, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, + "eval_f1_for_task232_iirc_answerability_classification": 54.0, + "eval_f1_for_task233_iirc_answerability_classification": 39.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 71.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 60.8, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 41.6925, + "eval_f1_for_task288_gigaword_title_generation": 28.5517, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 22.5333, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 56.1778, + "eval_f1_for_task349_squad2.0_answerability_classification": 54.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 76.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 81.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.4661, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 25.0, + "eval_f1_for_task402_grailqa_question_rewriting": 67.4063, + "eval_f1_for_task418_persent_title_generation": 26.4389, + "eval_f1_for_task442_com_qa_question_rewriting": 69.5152, + "eval_f1_for_task500_scruples_title_generation": 14.8105, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 37.0484, + "eval_f1_for_task520_aquamuse_answerability_classification": 61.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 35.7859, + "eval_f1_for_task602_wikitext_title_generation": 13.024, + "eval_f1_for_task613_liar_keyword_tagging": 18.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 20.1671, + "eval_f1_for_task619_ohsumed_title_generation": 41.4655, + "eval_f1_for_task620_ohsumed_keyword_tagging": 37.2381, + "eval_f1_for_task623_ohsumed_keyword_tagging": 65.0, + "eval_f1_for_task640_e_snli_textual_entailment": 30.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 45.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.281, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 7.3333, + "eval_f1_for_task670_ambigqa_question_rewriting": 79.4156, + "eval_f1_for_task671_ambigqa_question_rewriting": 59.1138, + "eval_f1_for_task677_ollie_data_to_text": 21.7921, + "eval_f1_for_task738_perspectrum_textual_entailment": 31.0, + "eval_f1_for_task743_eurlex_title_generation": 32.92, + "eval_f1_for_task760_msr_sqa_data_to_text": 4.9251, + "eval_f1_for_task769_qed_title_generation": 90.5286, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 60.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 77.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.0, + "eval_f1_for_task890_gwsd_textual_entailment": 45.0, + "eval_f1_for_task891_gap_coreference_resolution": 55.1524, + "eval_f1_for_task892_gap_coreference_resolution": 44.0, + "eval_f1_for_task893_gap_coreference_resolution": 41.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_f1_for_task957_e2e_data_to_text": 49.2527, + "eval_f1_for_task970_sherliic_textual_entailment": 58.0, + "eval_f1_for_textual_entailment": 35.4583, + "eval_f1_for_title_generation": 33.3593, + "eval_f1_for_word_analogy": 32.1667, + "eval_gen_len": 9.769, + "eval_global_step": 6500, + "eval_loss": 1.3009140491485596, + "eval_rouge1": 48.3067, + "eval_rouge1_for_answerability_classification": 55.1795, + "eval_rouge1_for_cause_effect_classification": 54.2078, + "eval_rouge1_for_coreference_resolution": 43.2307, + "eval_rouge1_for_data_to_text": 51.7962, + "eval_rouge1_for_dialogue_act_recognition": 56.3095, + "eval_rouge1_for_grammar_error_correction": 72.5056, + "eval_rouge1_for_keyword_tagging": 61.8264, + "eval_rouge1_for_overlap_extraction": 37.1137, + "eval_rouge1_for_question_rewriting": 69.0817, + "eval_rouge1_for_task020_mctaco_answerability_classification": 46.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 43.7333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.2107, + "eval_rouge1_for_task035_winogrande_question_rewriting": 85.3206, + "eval_rouge1_for_task036_qasc_keyword_tagging": 75.3464, + "eval_rouge1_for_task039_qasc_overlap_extraction": 31.4667, + "eval_rouge1_for_task050_multirc_answerability_classification": 56.0, + "eval_rouge1_for_task102_commongen_data_to_text": 66.3249, + "eval_rouge1_for_task1152_bard_word_analogy": 28.0, + "eval_rouge1_for_task1153_bard_word_analogy": 21.3333, + "eval_rouge1_for_task1154_bard_word_analogy": 24.0, + "eval_rouge1_for_task1155_bard_word_analogy": 69.0, + "eval_rouge1_for_task1156_bard_word_analogy": 32.0, + "eval_rouge1_for_task1157_bard_word_analogy": 51.0, + "eval_rouge1_for_task1158_bard_word_analogy": 18.0, + "eval_rouge1_for_task1159_bard_word_analogy": 14.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 37.1593, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.3479, + "eval_rouge1_for_task121_zest_question_rewriting": 45.2749, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.216, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.87, + "eval_rouge1_for_task1356_xlsum_title_generation": 23.2364, + "eval_rouge1_for_task1358_xlsum_title_generation": 37.6927, + "eval_rouge1_for_task1385_anli_textual_entailment": 31.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 36.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 52.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 58.0, + "eval_rouge1_for_task1407_dart_data_to_text": 33.9718, + "eval_rouge1_for_task1409_dart_data_to_text": 46.5924, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 57.583, + "eval_rouge1_for_task1439_doqa_answerability_classification": 44.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 48.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 8.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 34.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 37.7825, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.4283, + "eval_rouge1_for_task1562_zest_question_rewriting": 51.9541, + "eval_rouge1_for_task1586_scifact_title_generation": 39.4457, + "eval_rouge1_for_task1598_nyc_data_to_text": 51.2655, + "eval_rouge1_for_task1612_sick_textual_entailment": 37.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 79.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.2863, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 52.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 94.5206, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 63.0, + "eval_rouge1_for_task1659_billsum_title_generation": 35.7451, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 60.7438, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 59.3364, + "eval_rouge1_for_task190_snli_textual_entailment": 35.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 41.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 35.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 32.0, + "eval_rouge1_for_task219_rocstories_title_generation": 25.4668, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 54.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 39.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 71.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 61.4667, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 42.7608, + "eval_rouge1_for_task288_gigaword_title_generation": 31.0552, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 22.3667, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 56.3, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 54.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 76.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 81.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.3897, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 30.0, + "eval_rouge1_for_task402_grailqa_question_rewriting": 69.3674, + "eval_rouge1_for_task418_persent_title_generation": 29.2636, + "eval_rouge1_for_task442_com_qa_question_rewriting": 72.7783, + "eval_rouge1_for_task500_scruples_title_generation": 16.5853, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 37.4491, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 61.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 37.3602, + "eval_rouge1_for_task602_wikitext_title_generation": 13.8206, + "eval_rouge1_for_task613_liar_keyword_tagging": 30.3333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 21.7314, + "eval_rouge1_for_task619_ohsumed_title_generation": 44.9432, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 43.6714, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 65.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 30.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 45.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.781, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 8.6667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.1715, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 60.3173, + "eval_rouge1_for_task677_ollie_data_to_text": 23.7413, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 60.0, + "eval_rouge1_for_task743_eurlex_title_generation": 34.5596, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 5.4241, + "eval_rouge1_for_task769_qed_title_generation": 90.7952, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 60.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 77.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 47.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 45.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 54.9524, + "eval_rouge1_for_task892_gap_coreference_resolution": 44.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 41.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_rouge1_for_task957_e2e_data_to_text": 50.6732, + "eval_rouge1_for_task970_sherliic_textual_entailment": 58.0, + "eval_rouge1_for_textual_entailment": 43.6806, + "eval_rouge1_for_title_generation": 35.6146, + "eval_rouge1_for_word_analogy": 32.1667, + "eval_rougeL": 46.9766, + "eval_rougeL_for_answerability_classification": 55.1795, + "eval_rougeL_for_cause_effect_classification": 53.8523, + "eval_rougeL_for_coreference_resolution": 43.2307, + "eval_rougeL_for_data_to_text": 45.2302, + "eval_rougeL_for_dialogue_act_recognition": 56.3095, + "eval_rougeL_for_grammar_error_correction": 71.1157, + "eval_rougeL_for_keyword_tagging": 61.2022, + "eval_rougeL_for_overlap_extraction": 36.4445, + "eval_rougeL_for_question_rewriting": 65.6525, + "eval_rougeL_for_task020_mctaco_answerability_classification": 46.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 43.7333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 91.9011, + "eval_rougeL_for_task035_winogrande_question_rewriting": 83.9602, + "eval_rougeL_for_task036_qasc_keyword_tagging": 73.7754, + "eval_rougeL_for_task039_qasc_overlap_extraction": 31.4667, + "eval_rougeL_for_task050_multirc_answerability_classification": 56.0, + "eval_rougeL_for_task102_commongen_data_to_text": 57.6698, + "eval_rougeL_for_task1152_bard_word_analogy": 28.0, + "eval_rougeL_for_task1153_bard_word_analogy": 21.3333, + "eval_rougeL_for_task1154_bard_word_analogy": 24.0, + "eval_rougeL_for_task1155_bard_word_analogy": 69.0, + "eval_rougeL_for_task1156_bard_word_analogy": 32.0, + "eval_rougeL_for_task1157_bard_word_analogy": 51.0, + "eval_rougeL_for_task1158_bard_word_analogy": 18.0, + "eval_rougeL_for_task1159_bard_word_analogy": 14.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 30.1228, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.8894, + "eval_rougeL_for_task121_zest_question_rewriting": 39.546, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.4592, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.5654, + "eval_rougeL_for_task1356_xlsum_title_generation": 20.2251, + "eval_rougeL_for_task1358_xlsum_title_generation": 32.1854, + "eval_rougeL_for_task1385_anli_textual_entailment": 31.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 36.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 52.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 58.0, + "eval_rougeL_for_task1407_dart_data_to_text": 29.8121, + "eval_rougeL_for_task1409_dart_data_to_text": 39.268, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 55.6987, + "eval_rougeL_for_task1439_doqa_answerability_classification": 44.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 48.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 8.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 34.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 34.9404, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.5326, + "eval_rougeL_for_task1562_zest_question_rewriting": 45.6822, + "eval_rougeL_for_task1586_scifact_title_generation": 32.1626, + "eval_rougeL_for_task1598_nyc_data_to_text": 40.2877, + "eval_rougeL_for_task1612_sick_textual_entailment": 37.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 79.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.1944, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 52.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 91.9784, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 63.0, + "eval_rougeL_for_task1659_billsum_title_generation": 30.4329, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 60.7438, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 52.9724, + "eval_rougeL_for_task190_snli_textual_entailment": 35.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 41.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 35.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 32.0, + "eval_rougeL_for_task219_rocstories_title_generation": 25.4668, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 54.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 39.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 71.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 61.4667, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 41.4223, + "eval_rougeL_for_task288_gigaword_title_generation": 27.0567, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 22.3667, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 56.3, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 54.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 76.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 81.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.5907, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 30.0, + "eval_rougeL_for_task402_grailqa_question_rewriting": 58.7287, + "eval_rougeL_for_task418_persent_title_generation": 25.4864, + "eval_rougeL_for_task442_com_qa_question_rewriting": 67.7594, + "eval_rougeL_for_task500_scruples_title_generation": 15.0081, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 36.68, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 61.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 34.8278, + "eval_rougeL_for_task602_wikitext_title_generation": 13.7484, + "eval_rougeL_for_task613_liar_keyword_tagging": 30.3333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 20.0422, + "eval_rougeL_for_task619_ohsumed_title_generation": 37.5204, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 42.1214, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 65.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 30.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 45.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.781, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 8.6667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 78.9601, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 57.9909, + "eval_rougeL_for_task677_ollie_data_to_text": 20.2909, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 60.0, + "eval_rougeL_for_task743_eurlex_title_generation": 29.7184, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 4.8157, + "eval_rougeL_for_task769_qed_title_generation": 90.7952, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 60.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 77.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 47.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 45.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 54.9524, + "eval_rougeL_for_task892_gap_coreference_resolution": 44.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 41.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_rougeL_for_task957_e2e_data_to_text": 40.0698, + "eval_rougeL_for_task970_sherliic_textual_entailment": 58.0, + "eval_rougeL_for_textual_entailment": 43.6806, + "eval_rougeL_for_title_generation": 32.4348, + "eval_rougeL_for_word_analogy": 32.1667, + "eval_runtime": 595.3472, + "eval_samples_per_second": 20.005, + "eval_steps_per_second": 0.627, + "step": 6500 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 1.0468, + "step": 7000 + }, + { + "epoch": 1.6, + "eval_exact_match": 28.3039, + "eval_exact_match_for_answerability_classification": 53.6154, + "eval_exact_match_for_cause_effect_classification": 38.1429, + "eval_exact_match_for_coreference_resolution": 34.4286, + "eval_exact_match_for_data_to_text": 5.2058, + "eval_exact_match_for_dialogue_act_recognition": 45.7143, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 42.4, + "eval_exact_match_for_overlap_extraction": 13.0, + "eval_exact_match_for_question_rewriting": 2.3636, + "eval_exact_match_for_task020_mctaco_answerability_classification": 53.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 41.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 6.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 5.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 47.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 26.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 55.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 28.0, + "eval_exact_match_for_task1153_bard_word_analogy": 25.0, + "eval_exact_match_for_task1154_bard_word_analogy": 24.0, + "eval_exact_match_for_task1155_bard_word_analogy": 76.0, + "eval_exact_match_for_task1156_bard_word_analogy": 26.0, + "eval_exact_match_for_task1157_bard_word_analogy": 55.0, + "eval_exact_match_for_task1158_bard_word_analogy": 17.0, + "eval_exact_match_for_task1159_bard_word_analogy": 30.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 54.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 2.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 2.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 7.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 0.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 44.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 56.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 39.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 48.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 1.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 46.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 49.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 54.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 36.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 59.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 14.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 48.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 40.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 36.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 32.0, + "eval_exact_match_for_task219_rocstories_title_generation": 3.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 76.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 46.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 37.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 52.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 44.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 56.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 22.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 6.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 69.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 4.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 16.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 11.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 52.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 30.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 28.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 41.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 86.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 5.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 3.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 22.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 71.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 53.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 64.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 55.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 24.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 47.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 46.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 28.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 45.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 60.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 54.0, + "eval_exact_match_for_textual_entailment": 35.9167, + "eval_exact_match_for_title_generation": 7.8475, + "eval_exact_match_for_word_analogy": 35.125, + "eval_f1": 45.4437, + "eval_f1_for_answerability_classification": 56.1795, + "eval_f1_for_cause_effect_classification": 55.0171, + "eval_f1_for_coreference_resolution": 42.0561, + "eval_f1_for_data_to_text": 48.6886, + "eval_f1_for_dialogue_act_recognition": 49.2143, + "eval_f1_for_grammar_error_correction": 70.1492, + "eval_f1_for_keyword_tagging": 55.1117, + "eval_f1_for_overlap_extraction": 36.0071, + "eval_f1_for_question_rewriting": 68.7244, + "eval_f1_for_task020_mctaco_answerability_classification": 53.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 43.5, + "eval_f1_for_task034_winogrande_question_rewriting": 91.8812, + "eval_f1_for_task035_winogrande_question_rewriting": 86.6505, + "eval_f1_for_task036_qasc_keyword_tagging": 76.5585, + "eval_f1_for_task039_qasc_overlap_extraction": 36.0667, + "eval_f1_for_task050_multirc_answerability_classification": 55.0, + "eval_f1_for_task102_commongen_data_to_text": 52.8733, + "eval_f1_for_task1152_bard_word_analogy": 28.0, + "eval_f1_for_task1153_bard_word_analogy": 27.6667, + "eval_f1_for_task1154_bard_word_analogy": 24.0, + "eval_f1_for_task1155_bard_word_analogy": 76.0, + "eval_f1_for_task1156_bard_word_analogy": 26.0, + "eval_f1_for_task1157_bard_word_analogy": 55.0, + "eval_f1_for_task1158_bard_word_analogy": 17.0, + "eval_f1_for_task1159_bard_word_analogy": 30.0, + "eval_f1_for_task1161_coda_19_title_generation": 32.1131, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.2052, + "eval_f1_for_task121_zest_question_rewriting": 44.448, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.1252, + "eval_f1_for_task1344_rte_textual_entailment": 54.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.3385, + "eval_f1_for_task1356_xlsum_title_generation": 19.2599, + "eval_f1_for_task1358_xlsum_title_generation": 30.5322, + "eval_f1_for_task1385_anli_textual_entailment": 2.0, + "eval_f1_for_task1386_anli_textual_entailment": 2.0, + "eval_f1_for_task1387_anli_textual_entailment": 7.0, + "eval_f1_for_task1388_cb_textual_entailment": 0.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 44.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 56.0, + "eval_f1_for_task1407_dart_data_to_text": 31.3868, + "eval_f1_for_task1409_dart_data_to_text": 48.4195, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 56.6312, + "eval_f1_for_task1439_doqa_answerability_classification": 39.0, + "eval_f1_for_task1442_doqa_answerability_classification": 48.0, + "eval_f1_for_task1516_imppres_textual_entailment": 1.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 46.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 34.7552, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.6672, + "eval_f1_for_task1562_zest_question_rewriting": 49.1866, + "eval_f1_for_task1586_scifact_title_generation": 34.1288, + "eval_f1_for_task1598_nyc_data_to_text": 47.9001, + "eval_f1_for_task1612_sick_textual_entailment": 49.0, + "eval_f1_for_task1615_sick_textual_entailment": 54.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.6872, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 90.4589, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 59.0, + "eval_f1_for_task1659_billsum_title_generation": 32.5552, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 64.73, + "eval_f1_for_task1728_web_nlg_data_to_text": 57.3287, + "eval_f1_for_task190_snli_textual_entailment": 48.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 40.0, + "eval_f1_for_task201_multinli_textual_entailment": 36.0, + "eval_f1_for_task202_multinli_textual_entailment": 32.0, + "eval_f1_for_task219_rocstories_title_generation": 13.8746, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 50.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 76.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 56.3, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 35.9476, + "eval_f1_for_task288_gigaword_title_generation": 27.5241, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 24.4667, + "eval_f1_for_task329_gap_coreference_resolution": 37.0, + "eval_f1_for_task330_gap_coreference_resolution": 57.3444, + "eval_f1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 81.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 85.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 30.3104, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 25.8333, + "eval_f1_for_task402_grailqa_question_rewriting": 72.9523, + "eval_f1_for_task418_persent_title_generation": 23.6611, + "eval_f1_for_task442_com_qa_question_rewriting": 68.9375, + "eval_f1_for_task500_scruples_title_generation": 12.7135, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 37.5664, + "eval_f1_for_task520_aquamuse_answerability_classification": 69.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 38.0312, + "eval_f1_for_task602_wikitext_title_generation": 13.4452, + "eval_f1_for_task613_liar_keyword_tagging": 18.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 21.1424, + "eval_f1_for_task619_ohsumed_title_generation": 40.688, + "eval_f1_for_task620_ohsumed_keyword_tagging": 34.3286, + "eval_f1_for_task623_ohsumed_keyword_tagging": 52.0, + "eval_f1_for_task640_e_snli_textual_entailment": 30.0, + "eval_f1_for_task641_e_snli_textual_entailment": 28.0, + "eval_f1_for_task642_e_snli_textual_entailment": 41.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.0048, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 5.6667, + "eval_f1_for_task670_ambigqa_question_rewriting": 82.0333, + "eval_f1_for_task671_ambigqa_question_rewriting": 61.6485, + "eval_f1_for_task677_ollie_data_to_text": 23.8405, + "eval_f1_for_task738_perspectrum_textual_entailment": 22.0, + "eval_f1_for_task743_eurlex_title_generation": 34.1336, + "eval_f1_for_task760_msr_sqa_data_to_text": 6.6141, + "eval_f1_for_task769_qed_title_generation": 84.9799, + "eval_f1_for_task827_copa_cause_effect_classification": 53.0, + "eval_f1_for_task828_copa_cause_effect_classification": 64.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 55.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 24.0, + "eval_f1_for_task890_gwsd_textual_entailment": 47.0, + "eval_f1_for_task891_gap_coreference_resolution": 56.9444, + "eval_f1_for_task892_gap_coreference_resolution": 28.0, + "eval_f1_for_task893_gap_coreference_resolution": 45.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 60.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_f1_for_task957_e2e_data_to_text": 48.2401, + "eval_f1_for_task970_sherliic_textual_entailment": 54.0, + "eval_f1_for_textual_entailment": 35.9167, + "eval_f1_for_title_generation": 32.0592, + "eval_f1_for_word_analogy": 35.4583, + "eval_gen_len": 10.3486, + "eval_global_step": 7000, + "eval_loss": 1.3404057025909424, + "eval_rouge1": 48.2267, + "eval_rouge1_for_answerability_classification": 56.1795, + "eval_rouge1_for_cause_effect_classification": 55.174, + "eval_rouge1_for_coreference_resolution": 42.3593, + "eval_rouge1_for_data_to_text": 50.9348, + "eval_rouge1_for_dialogue_act_recognition": 52.6952, + "eval_rouge1_for_grammar_error_correction": 72.2762, + "eval_rouge1_for_keyword_tagging": 58.8046, + "eval_rouge1_for_overlap_extraction": 38.3387, + "eval_rouge1_for_question_rewriting": 70.4307, + "eval_rouge1_for_task020_mctaco_answerability_classification": 53.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 43.4, + "eval_rouge1_for_task034_winogrande_question_rewriting": 91.948, + "eval_rouge1_for_task035_winogrande_question_rewriting": 87.4963, + "eval_rouge1_for_task036_qasc_keyword_tagging": 79.9742, + "eval_rouge1_for_task039_qasc_overlap_extraction": 39.3, + "eval_rouge1_for_task050_multirc_answerability_classification": 55.0, + "eval_rouge1_for_task102_commongen_data_to_text": 64.2432, + "eval_rouge1_for_task1152_bard_word_analogy": 28.0, + "eval_rouge1_for_task1153_bard_word_analogy": 28.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 24.0, + "eval_rouge1_for_task1155_bard_word_analogy": 76.0, + "eval_rouge1_for_task1156_bard_word_analogy": 26.0, + "eval_rouge1_for_task1157_bard_word_analogy": 55.0, + "eval_rouge1_for_task1158_bard_word_analogy": 17.0, + "eval_rouge1_for_task1159_bard_word_analogy": 30.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 35.6641, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.4752, + "eval_rouge1_for_task121_zest_question_rewriting": 47.4177, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.3218, + "eval_rouge1_for_task1344_rte_textual_entailment": 54.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.6795, + "eval_rouge1_for_task1356_xlsum_title_generation": 22.4564, + "eval_rouge1_for_task1358_xlsum_title_generation": 34.9866, + "eval_rouge1_for_task1385_anli_textual_entailment": 28.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 44.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 61.7, + "eval_rouge1_for_task1407_dart_data_to_text": 31.2276, + "eval_rouge1_for_task1409_dart_data_to_text": 49.2936, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 57.4473, + "eval_rouge1_for_task1439_doqa_answerability_classification": 39.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 48.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 1.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 46.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 36.8165, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.105, + "eval_rouge1_for_task1562_zest_question_rewriting": 52.8052, + "eval_rouge1_for_task1586_scifact_title_generation": 37.7346, + "eval_rouge1_for_task1598_nyc_data_to_text": 48.9247, + "eval_rouge1_for_task1612_sick_textual_entailment": 49.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 84.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.961, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 90.7463, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 59.0, + "eval_rouge1_for_task1659_billsum_title_generation": 34.5706, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 64.73, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 59.7436, + "eval_rouge1_for_task190_snli_textual_entailment": 48.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 40.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 36.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 32.0, + "eval_rouge1_for_task219_rocstories_title_generation": 18.896, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 76.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 56.9667, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 37.3774, + "eval_rouge1_for_task288_gigaword_title_generation": 30.2525, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 24.2333, + "eval_rouge1_for_task329_gap_coreference_resolution": 37.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 57.6333, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 81.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 85.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 30.3844, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 29.5, + "eval_rouge1_for_task402_grailqa_question_rewriting": 75.2592, + "eval_rouge1_for_task418_persent_title_generation": 27.0093, + "eval_rouge1_for_task442_com_qa_question_rewriting": 72.2782, + "eval_rouge1_for_task500_scruples_title_generation": 14.6529, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 38.1513, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 69.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 39.0664, + "eval_rouge1_for_task602_wikitext_title_generation": 14.2138, + "eval_rouge1_for_task613_liar_keyword_tagging": 27.5, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 22.1672, + "eval_rouge1_for_task619_ohsumed_title_generation": 44.1795, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 40.044, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 52.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 30.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 28.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 41.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.5048, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 5.6667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 82.6735, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 62.7437, + "eval_rouge1_for_task677_ollie_data_to_text": 26.0032, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 64.0, + "eval_rouge1_for_task743_eurlex_title_generation": 35.6975, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 6.8693, + "eval_rouge1_for_task769_qed_title_generation": 84.9132, + "eval_rouge1_for_task827_copa_cause_effect_classification": 53.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 64.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 55.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 42.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 47.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 56.9, + "eval_rouge1_for_task892_gap_coreference_resolution": 28.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 45.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 60.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_rouge1_for_task957_e2e_data_to_text": 48.7535, + "eval_rouge1_for_task970_sherliic_textual_entailment": 54.0, + "eval_rouge1_for_textual_entailment": 44.1111, + "eval_rouge1_for_title_generation": 34.2662, + "eval_rouge1_for_word_analogy": 35.5833, + "eval_rougeL": 46.901, + "eval_rougeL_for_answerability_classification": 56.1795, + "eval_rougeL_for_cause_effect_classification": 54.7818, + "eval_rougeL_for_coreference_resolution": 42.3593, + "eval_rougeL_for_data_to_text": 44.5568, + "eval_rougeL_for_dialogue_act_recognition": 52.6952, + "eval_rougeL_for_grammar_error_correction": 70.9456, + "eval_rougeL_for_keyword_tagging": 58.3508, + "eval_rougeL_for_overlap_extraction": 37.6711, + "eval_rougeL_for_question_rewriting": 66.8013, + "eval_rougeL_for_task020_mctaco_answerability_classification": 53.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 43.4, + "eval_rougeL_for_task034_winogrande_question_rewriting": 91.948, + "eval_rougeL_for_task035_winogrande_question_rewriting": 86.1399, + "eval_rougeL_for_task036_qasc_keyword_tagging": 79.4257, + "eval_rougeL_for_task039_qasc_overlap_extraction": 39.3, + "eval_rougeL_for_task050_multirc_answerability_classification": 55.0, + "eval_rougeL_for_task102_commongen_data_to_text": 55.7344, + "eval_rougeL_for_task1152_bard_word_analogy": 28.0, + "eval_rougeL_for_task1153_bard_word_analogy": 28.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 24.0, + "eval_rougeL_for_task1155_bard_word_analogy": 76.0, + "eval_rougeL_for_task1156_bard_word_analogy": 26.0, + "eval_rougeL_for_task1157_bard_word_analogy": 55.0, + "eval_rougeL_for_task1158_bard_word_analogy": 17.0, + "eval_rougeL_for_task1159_bard_word_analogy": 30.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 29.1082, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.631, + "eval_rougeL_for_task121_zest_question_rewriting": 41.3744, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.9536, + "eval_rougeL_for_task1344_rte_textual_entailment": 54.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.4443, + "eval_rougeL_for_task1356_xlsum_title_generation": 19.5541, + "eval_rougeL_for_task1358_xlsum_title_generation": 28.6923, + "eval_rougeL_for_task1385_anli_textual_entailment": 28.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 44.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 61.7, + "eval_rougeL_for_task1407_dart_data_to_text": 27.4867, + "eval_rougeL_for_task1409_dart_data_to_text": 41.607, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 55.6512, + "eval_rougeL_for_task1439_doqa_answerability_classification": 39.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 48.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 1.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 46.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 33.4794, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.2401, + "eval_rougeL_for_task1562_zest_question_rewriting": 45.394, + "eval_rougeL_for_task1586_scifact_title_generation": 30.5685, + "eval_rougeL_for_task1598_nyc_data_to_text": 38.9237, + "eval_rougeL_for_task1612_sick_textual_entailment": 49.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 84.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.7168, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 87.6769, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 59.0, + "eval_rougeL_for_task1659_billsum_title_generation": 29.3317, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 64.73, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 53.0884, + "eval_rougeL_for_task190_snli_textual_entailment": 48.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 40.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 36.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 32.0, + "eval_rougeL_for_task219_rocstories_title_generation": 18.896, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 76.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 56.9667, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 36.0422, + "eval_rougeL_for_task288_gigaword_title_generation": 25.895, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 24.2333, + "eval_rougeL_for_task329_gap_coreference_resolution": 37.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 57.6333, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 81.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 85.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 29.6631, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 29.5, + "eval_rougeL_for_task402_grailqa_question_rewriting": 63.4085, + "eval_rougeL_for_task418_persent_title_generation": 22.8534, + "eval_rougeL_for_task442_com_qa_question_rewriting": 67.5142, + "eval_rougeL_for_task500_scruples_title_generation": 13.2865, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 37.5571, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 69.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 36.0104, + "eval_rougeL_for_task602_wikitext_title_generation": 14.0596, + "eval_rougeL_for_task613_liar_keyword_tagging": 27.5, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 20.1425, + "eval_rougeL_for_task619_ohsumed_title_generation": 37.6774, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 38.3234, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 52.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 30.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 28.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 41.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.5048, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 5.6667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 81.8345, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 60.4085, + "eval_rougeL_for_task677_ollie_data_to_text": 21.7234, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 64.0, + "eval_rougeL_for_task743_eurlex_title_generation": 31.4525, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.8235, + "eval_rougeL_for_task769_qed_title_generation": 84.9132, + "eval_rougeL_for_task827_copa_cause_effect_classification": 53.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 64.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 55.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 42.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 47.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 56.9, + "eval_rougeL_for_task892_gap_coreference_resolution": 28.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 45.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 60.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_rougeL_for_task957_e2e_data_to_text": 40.2841, + "eval_rougeL_for_task970_sherliic_textual_entailment": 54.0, + "eval_rougeL_for_textual_entailment": 44.1111, + "eval_rougeL_for_title_generation": 31.1121, + "eval_rougeL_for_word_analogy": 35.5833, + "eval_runtime": 665.4491, + "eval_samples_per_second": 17.898, + "eval_steps_per_second": 0.561, + "step": 7000 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 1.0342, + "step": 7500 + }, + { + "epoch": 1.71, + "eval_exact_match": 28.9085, + "eval_exact_match_for_answerability_classification": 51.8462, + "eval_exact_match_for_cause_effect_classification": 38.8571, + "eval_exact_match_for_coreference_resolution": 36.6429, + "eval_exact_match_for_data_to_text": 6.4165, + "eval_exact_match_for_dialogue_act_recognition": 48.0, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 42.0, + "eval_exact_match_for_overlap_extraction": 12.0, + "eval_exact_match_for_question_rewriting": 3.1818, + "eval_exact_match_for_task020_mctaco_answerability_classification": 51.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 41.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 6.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 4.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 40.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 24.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 58.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 27.0, + "eval_exact_match_for_task1153_bard_word_analogy": 19.0, + "eval_exact_match_for_task1154_bard_word_analogy": 20.0, + "eval_exact_match_for_task1155_bard_word_analogy": 61.0, + "eval_exact_match_for_task1156_bard_word_analogy": 27.0, + "eval_exact_match_for_task1157_bard_word_analogy": 57.0, + "eval_exact_match_for_task1158_bard_word_analogy": 23.0, + "eval_exact_match_for_task1159_bard_word_analogy": 16.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 13.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 4.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 1.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 3.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 8.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 39.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 51.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 26.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 40.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 40.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 44.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 36.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 5.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 47.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 28.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 38.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 4.0, + "eval_exact_match_for_task220_rocstories_title_generation": 52.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 52.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 41.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 4.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 34.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 52.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 48.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 47.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 58.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 21.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 59.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 3.0, + "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, + "eval_exact_match_for_task613_liar_keyword_tagging": 18.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 13.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 51.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 49.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 88.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 14.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 26.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 72.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 57.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 60.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 75.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 49.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 46.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 32.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 54.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 58.0, + "eval_exact_match_for_textual_entailment": 38.2083, + "eval_exact_match_for_title_generation": 8.0717, + "eval_exact_match_for_word_analogy": 31.25, + "eval_f1": 46.1232, + "eval_f1_for_answerability_classification": 54.4103, + "eval_f1_for_cause_effect_classification": 55.6876, + "eval_f1_for_coreference_resolution": 43.8492, + "eval_f1_for_data_to_text": 50.9843, + "eval_f1_for_dialogue_act_recognition": 51.3571, + "eval_f1_for_grammar_error_correction": 74.3285, + "eval_f1_for_keyword_tagging": 54.6056, + "eval_f1_for_overlap_extraction": 36.4185, + "eval_f1_for_question_rewriting": 67.4738, + "eval_f1_for_task020_mctaco_answerability_classification": 51.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 43.5, + "eval_f1_for_task034_winogrande_question_rewriting": 92.041, + "eval_f1_for_task035_winogrande_question_rewriting": 86.4239, + "eval_f1_for_task036_qasc_keyword_tagging": 70.723, + "eval_f1_for_task039_qasc_overlap_extraction": 32.6667, + "eval_f1_for_task050_multirc_answerability_classification": 58.0, + "eval_f1_for_task102_commongen_data_to_text": 55.1239, + "eval_f1_for_task1152_bard_word_analogy": 27.0, + "eval_f1_for_task1153_bard_word_analogy": 21.0, + "eval_f1_for_task1154_bard_word_analogy": 20.0, + "eval_f1_for_task1155_bard_word_analogy": 61.0, + "eval_f1_for_task1156_bard_word_analogy": 27.0, + "eval_f1_for_task1157_bard_word_analogy": 57.0, + "eval_f1_for_task1158_bard_word_analogy": 23.0, + "eval_f1_for_task1159_bard_word_analogy": 16.0, + "eval_f1_for_task1161_coda_19_title_generation": 33.4218, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 81.3136, + "eval_f1_for_task121_zest_question_rewriting": 43.9555, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 15.507, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.4376, + "eval_f1_for_task1356_xlsum_title_generation": 21.1327, + "eval_f1_for_task1358_xlsum_title_generation": 31.9282, + "eval_f1_for_task1385_anli_textual_entailment": 1.0, + "eval_f1_for_task1386_anli_textual_entailment": 3.0, + "eval_f1_for_task1387_anli_textual_entailment": 8.0, + "eval_f1_for_task1388_cb_textual_entailment": 39.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 47.0, + "eval_f1_for_task1407_dart_data_to_text": 33.9684, + "eval_f1_for_task1409_dart_data_to_text": 51.3636, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 65.0588, + "eval_f1_for_task1439_doqa_answerability_classification": 49.0, + "eval_f1_for_task1442_doqa_answerability_classification": 51.0, + "eval_f1_for_task1516_imppres_textual_entailment": 26.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 40.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 40.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 36.5848, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.5982, + "eval_f1_for_task1562_zest_question_rewriting": 46.8738, + "eval_f1_for_task1586_scifact_title_generation": 36.0324, + "eval_f1_for_task1598_nyc_data_to_text": 48.5143, + "eval_f1_for_task1612_sick_textual_entailment": 44.0, + "eval_f1_for_task1615_sick_textual_entailment": 36.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.7713, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_f1_for_task1631_open_pi_data_to_text": 92.5503, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 34.7669, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 70.8582, + "eval_f1_for_task1728_web_nlg_data_to_text": 59.3534, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 38.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 21.2365, + "eval_f1_for_task220_rocstories_title_generation": 52.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 50.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 52.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 55.9667, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 40.1703, + "eval_f1_for_task288_gigaword_title_generation": 30.9932, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 17.4, + "eval_f1_for_task329_gap_coreference_resolution": 34.0, + "eval_f1_for_task330_gap_coreference_resolution": 58.3444, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 71.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 82.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 86.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.5677, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 26.1667, + "eval_f1_for_task402_grailqa_question_rewriting": 73.9619, + "eval_f1_for_task418_persent_title_generation": 28.6818, + "eval_f1_for_task442_com_qa_question_rewriting": 66.2399, + "eval_f1_for_task500_scruples_title_generation": 13.2339, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 36.9281, + "eval_f1_for_task520_aquamuse_answerability_classification": 59.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 34.0639, + "eval_f1_for_task602_wikitext_title_generation": 15.5048, + "eval_f1_for_task613_liar_keyword_tagging": 20.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 22.9122, + "eval_f1_for_task619_ohsumed_title_generation": 40.4543, + "eval_f1_for_task620_ohsumed_keyword_tagging": 37.0238, + "eval_f1_for_task623_ohsumed_keyword_tagging": 51.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 49.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.281, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 18.6667, + "eval_f1_for_task670_ambigqa_question_rewriting": 76.9669, + "eval_f1_for_task671_ambigqa_question_rewriting": 55.2264, + "eval_f1_for_task677_ollie_data_to_text": 27.4522, + "eval_f1_for_task738_perspectrum_textual_entailment": 26.0, + "eval_f1_for_task743_eurlex_title_generation": 35.1027, + "eval_f1_for_task760_msr_sqa_data_to_text": 9.4055, + "eval_f1_for_task769_qed_title_generation": 80.781, + "eval_f1_for_task827_copa_cause_effect_classification": 57.0, + "eval_f1_for_task828_copa_cause_effect_classification": 60.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 75.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.0, + "eval_f1_for_task890_gwsd_textual_entailment": 49.0, + "eval_f1_for_task891_gap_coreference_resolution": 56.9857, + "eval_f1_for_task892_gap_coreference_resolution": 32.0, + "eval_f1_for_task893_gap_coreference_resolution": 51.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 54.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_f1_for_task957_e2e_data_to_text": 50.3588, + "eval_f1_for_task970_sherliic_textual_entailment": 58.0, + "eval_f1_for_textual_entailment": 38.2083, + "eval_f1_for_title_generation": 33.401, + "eval_f1_for_word_analogy": 31.5, + "eval_gen_len": 9.3986, + "eval_global_step": 7500, + "eval_loss": 1.3256440162658691, + "eval_rouge1": 48.7508, + "eval_rouge1_for_answerability_classification": 54.4103, + "eval_rouge1_for_cause_effect_classification": 55.8472, + "eval_rouge1_for_coreference_resolution": 44.5545, + "eval_rouge1_for_data_to_text": 53.4439, + "eval_rouge1_for_dialogue_act_recognition": 54.2333, + "eval_rouge1_for_grammar_error_correction": 76.3998, + "eval_rouge1_for_keyword_tagging": 59.5775, + "eval_rouge1_for_overlap_extraction": 38.979, + "eval_rouge1_for_question_rewriting": 69.1157, + "eval_rouge1_for_task020_mctaco_answerability_classification": 51.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 43.4, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.1463, + "eval_rouge1_for_task035_winogrande_question_rewriting": 87.3077, + "eval_rouge1_for_task036_qasc_keyword_tagging": 76.4778, + "eval_rouge1_for_task039_qasc_overlap_extraction": 36.5, + "eval_rouge1_for_task050_multirc_answerability_classification": 58.0, + "eval_rouge1_for_task102_commongen_data_to_text": 66.4176, + "eval_rouge1_for_task1152_bard_word_analogy": 27.0, + "eval_rouge1_for_task1153_bard_word_analogy": 21.0, + "eval_rouge1_for_task1154_bard_word_analogy": 20.0, + "eval_rouge1_for_task1155_bard_word_analogy": 61.0, + "eval_rouge1_for_task1156_bard_word_analogy": 27.0, + "eval_rouge1_for_task1157_bard_word_analogy": 57.0, + "eval_rouge1_for_task1158_bard_word_analogy": 23.0, + "eval_rouge1_for_task1159_bard_word_analogy": 16.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 36.4578, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 81.5431, + "eval_rouge1_for_task121_zest_question_rewriting": 46.1067, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 16.0243, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.1958, + "eval_rouge1_for_task1356_xlsum_title_generation": 24.788, + "eval_rouge1_for_task1358_xlsum_title_generation": 36.5797, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 43.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 53.8, + "eval_rouge1_for_task1407_dart_data_to_text": 34.3077, + "eval_rouge1_for_task1409_dart_data_to_text": 52.9142, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 66.1334, + "eval_rouge1_for_task1439_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 51.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 26.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 40.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 40.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 39.453, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.6662, + "eval_rouge1_for_task1562_zest_question_rewriting": 50.5135, + "eval_rouge1_for_task1586_scifact_title_generation": 39.7375, + "eval_rouge1_for_task1598_nyc_data_to_text": 49.6577, + "eval_rouge1_for_task1612_sick_textual_entailment": 44.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 80.0068, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 92.6671, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 36.6924, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 70.8582, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 61.3613, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 38.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 25.6468, + "eval_rouge1_for_task220_rocstories_title_generation": 52.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 52.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 56.6333, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 41.458, + "eval_rouge1_for_task288_gigaword_title_generation": 33.8082, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 17.6667, + "eval_rouge1_for_task329_gap_coreference_resolution": 34.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 58.4667, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 71.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 82.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 86.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.4811, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 34.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 75.7724, + "eval_rouge1_for_task418_persent_title_generation": 32.285, + "eval_rouge1_for_task442_com_qa_question_rewriting": 70.1316, + "eval_rouge1_for_task500_scruples_title_generation": 14.3744, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 37.466, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 59.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 35.7063, + "eval_rouge1_for_task602_wikitext_title_generation": 16.1329, + "eval_rouge1_for_task613_liar_keyword_tagging": 31.8333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 24.1157, + "eval_rouge1_for_task619_ohsumed_title_generation": 43.8049, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 43.7952, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 51.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 49.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.781, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 19.5, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 77.9041, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 56.6451, + "eval_rouge1_for_task677_ollie_data_to_text": 30.0124, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 60.0, + "eval_rouge1_for_task743_eurlex_title_generation": 37.4344, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 9.6664, + "eval_rouge1_for_task769_qed_title_generation": 80.7143, + "eval_rouge1_for_task827_copa_cause_effect_classification": 57.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 60.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 49.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 49.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 57.0714, + "eval_rouge1_for_task892_gap_coreference_resolution": 32.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 51.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 54.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_rouge1_for_task957_e2e_data_to_text": 51.5952, + "eval_rouge1_for_task970_sherliic_textual_entailment": 58.0, + "eval_rouge1_for_textual_entailment": 45.2361, + "eval_rouge1_for_title_generation": 35.6796, + "eval_rouge1_for_word_analogy": 31.5, + "eval_rougeL": 47.3115, + "eval_rougeL_for_answerability_classification": 54.4103, + "eval_rougeL_for_cause_effect_classification": 55.4886, + "eval_rougeL_for_coreference_resolution": 44.5545, + "eval_rougeL_for_data_to_text": 45.8175, + "eval_rougeL_for_dialogue_act_recognition": 54.2333, + "eval_rougeL_for_grammar_error_correction": 75.3014, + "eval_rougeL_for_keyword_tagging": 59.1056, + "eval_rougeL_for_overlap_extraction": 38.0621, + "eval_rougeL_for_question_rewriting": 65.2386, + "eval_rougeL_for_task020_mctaco_answerability_classification": 51.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 43.4, + "eval_rougeL_for_task034_winogrande_question_rewriting": 91.5191, + "eval_rougeL_for_task035_winogrande_question_rewriting": 85.6753, + "eval_rougeL_for_task036_qasc_keyword_tagging": 76.2683, + "eval_rougeL_for_task039_qasc_overlap_extraction": 36.5, + "eval_rougeL_for_task050_multirc_answerability_classification": 58.0, + "eval_rougeL_for_task102_commongen_data_to_text": 56.8614, + "eval_rougeL_for_task1152_bard_word_analogy": 27.0, + "eval_rougeL_for_task1153_bard_word_analogy": 21.0, + "eval_rougeL_for_task1154_bard_word_analogy": 20.0, + "eval_rougeL_for_task1155_bard_word_analogy": 61.0, + "eval_rougeL_for_task1156_bard_word_analogy": 27.0, + "eval_rougeL_for_task1157_bard_word_analogy": 57.0, + "eval_rougeL_for_task1158_bard_word_analogy": 23.0, + "eval_rougeL_for_task1159_bard_word_analogy": 16.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 29.2509, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 81.0846, + "eval_rougeL_for_task121_zest_question_rewriting": 39.6729, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 15.4596, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.1912, + "eval_rougeL_for_task1356_xlsum_title_generation": 21.5844, + "eval_rougeL_for_task1358_xlsum_title_generation": 31.4624, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 43.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 53.8, + "eval_rougeL_for_task1407_dart_data_to_text": 29.3562, + "eval_rougeL_for_task1409_dart_data_to_text": 42.2965, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 64.8422, + "eval_rougeL_for_task1439_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 51.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 26.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 40.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 40.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 34.9919, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.7605, + "eval_rougeL_for_task1562_zest_question_rewriting": 44.2378, + "eval_rougeL_for_task1586_scifact_title_generation": 32.7624, + "eval_rougeL_for_task1598_nyc_data_to_text": 38.6391, + "eval_rougeL_for_task1612_sick_textual_entailment": 44.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 79.1618, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 89.6574, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 31.6954, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 70.8582, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 54.0261, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 38.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 25.6468, + "eval_rougeL_for_task220_rocstories_title_generation": 52.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 52.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 56.6333, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 39.6241, + "eval_rougeL_for_task288_gigaword_title_generation": 29.2466, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 17.6667, + "eval_rougeL_for_task329_gap_coreference_resolution": 34.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 58.4667, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 71.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 82.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 86.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.6204, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 34.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 63.1898, + "eval_rougeL_for_task418_persent_title_generation": 28.148, + "eval_rougeL_for_task442_com_qa_question_rewriting": 64.9404, + "eval_rougeL_for_task500_scruples_title_generation": 13.4573, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 36.8407, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 59.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 32.6989, + "eval_rougeL_for_task602_wikitext_title_generation": 15.9573, + "eval_rougeL_for_task613_liar_keyword_tagging": 31.8333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 22.4663, + "eval_rougeL_for_task619_ohsumed_title_generation": 37.7227, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 41.6452, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 51.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 49.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.781, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 19.5, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 76.5268, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 52.4253, + "eval_rougeL_for_task677_ollie_data_to_text": 24.3476, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 60.0, + "eval_rougeL_for_task743_eurlex_title_generation": 32.56, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 8.195, + "eval_rougeL_for_task769_qed_title_generation": 80.7143, + "eval_rougeL_for_task827_copa_cause_effect_classification": 57.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 60.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 49.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 49.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 57.0714, + "eval_rougeL_for_task892_gap_coreference_resolution": 32.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 51.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 54.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_rougeL_for_task957_e2e_data_to_text": 41.1376, + "eval_rougeL_for_task970_sherliic_textual_entailment": 58.0, + "eval_rougeL_for_textual_entailment": 45.2361, + "eval_rougeL_for_title_generation": 32.4914, + "eval_rougeL_for_word_analogy": 31.5, + "eval_runtime": 587.9583, + "eval_samples_per_second": 20.257, + "eval_steps_per_second": 0.634, + "step": 7500 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 1.0277, + "step": 8000 + }, + { + "epoch": 1.83, + "eval_exact_match": 28.5055, + "eval_exact_match_for_answerability_classification": 53.5385, + "eval_exact_match_for_cause_effect_classification": 37.5714, + "eval_exact_match_for_coreference_resolution": 36.6429, + "eval_exact_match_for_data_to_text": 6.7797, + "eval_exact_match_for_dialogue_act_recognition": 49.5714, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 40.0, + "eval_exact_match_for_overlap_extraction": 10.5, + "eval_exact_match_for_question_rewriting": 3.0, + "eval_exact_match_for_task020_mctaco_answerability_classification": 51.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 41.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 6.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 6.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 30.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 21.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 57.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 25.0, + "eval_exact_match_for_task1153_bard_word_analogy": 26.0, + "eval_exact_match_for_task1154_bard_word_analogy": 25.0, + "eval_exact_match_for_task1155_bard_word_analogy": 63.0, + "eval_exact_match_for_task1156_bard_word_analogy": 29.0, + "eval_exact_match_for_task1157_bard_word_analogy": 58.0, + "eval_exact_match_for_task1158_bard_word_analogy": 17.0, + "eval_exact_match_for_task1159_bard_word_analogy": 16.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 11.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 1.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 1.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 0.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 45.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 2.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 41.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 52.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 11.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 46.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 43.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 37.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 49.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 48.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 62.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 29.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 58.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 29.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 30.0, + "eval_exact_match_for_task219_rocstories_title_generation": 1.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 43.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 72.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 46.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 6.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 50.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 54.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 42.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 59.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 23.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 63.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 4.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 16.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 9.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 57.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 26.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 41.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 88.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 14.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 28.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 66.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 62.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 74.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 50.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 47.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 32.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 47.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 55.0, + "eval_exact_match_for_textual_entailment": 35.8333, + "eval_exact_match_for_title_generation": 7.3991, + "eval_exact_match_for_word_analogy": 32.375, + "eval_f1": 46.1016, + "eval_f1_for_answerability_classification": 56.1026, + "eval_f1_for_cause_effect_classification": 54.807, + "eval_f1_for_coreference_resolution": 44.988, + "eval_f1_for_data_to_text": 52.6355, + "eval_f1_for_dialogue_act_recognition": 53.0714, + "eval_f1_for_grammar_error_correction": 74.6612, + "eval_f1_for_keyword_tagging": 54.6469, + "eval_f1_for_overlap_extraction": 34.0313, + "eval_f1_for_question_rewriting": 68.0546, + "eval_f1_for_task020_mctaco_answerability_classification": 51.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 43.0, + "eval_f1_for_task034_winogrande_question_rewriting": 91.9783, + "eval_f1_for_task035_winogrande_question_rewriting": 87.4154, + "eval_f1_for_task036_qasc_keyword_tagging": 69.2704, + "eval_f1_for_task039_qasc_overlap_extraction": 29.0, + "eval_f1_for_task050_multirc_answerability_classification": 57.0, + "eval_f1_for_task102_commongen_data_to_text": 53.6361, + "eval_f1_for_task1152_bard_word_analogy": 25.0, + "eval_f1_for_task1153_bard_word_analogy": 28.6667, + "eval_f1_for_task1154_bard_word_analogy": 25.0, + "eval_f1_for_task1155_bard_word_analogy": 63.0, + "eval_f1_for_task1156_bard_word_analogy": 29.0, + "eval_f1_for_task1157_bard_word_analogy": 58.0, + "eval_f1_for_task1158_bard_word_analogy": 17.0, + "eval_f1_for_task1159_bard_word_analogy": 17.3333, + "eval_f1_for_task1161_coda_19_title_generation": 30.6709, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 81.1104, + "eval_f1_for_task121_zest_question_rewriting": 45.3118, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.6788, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.3953, + "eval_f1_for_task1356_xlsum_title_generation": 22.2973, + "eval_f1_for_task1358_xlsum_title_generation": 33.7947, + "eval_f1_for_task1385_anli_textual_entailment": 0.0, + "eval_f1_for_task1386_anli_textual_entailment": 1.0, + "eval_f1_for_task1387_anli_textual_entailment": 1.0, + "eval_f1_for_task1388_cb_textual_entailment": 0.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 45.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 50.0, + "eval_f1_for_task1407_dart_data_to_text": 36.8455, + "eval_f1_for_task1409_dart_data_to_text": 53.3712, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 65.4347, + "eval_f1_for_task1439_doqa_answerability_classification": 41.0, + "eval_f1_for_task1442_doqa_answerability_classification": 52.0, + "eval_f1_for_task1516_imppres_textual_entailment": 11.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 46.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 43.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 34.1776, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.8877, + "eval_f1_for_task1562_zest_question_rewriting": 50.0575, + "eval_f1_for_task1586_scifact_title_generation": 35.2937, + "eval_f1_for_task1598_nyc_data_to_text": 51.4157, + "eval_f1_for_task1612_sick_textual_entailment": 37.0, + "eval_f1_for_task1615_sick_textual_entailment": 49.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 80.1748, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_f1_for_task1631_open_pi_data_to_text": 94.6709, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 62.0, + "eval_f1_for_task1659_billsum_title_generation": 34.009, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 71.2213, + "eval_f1_for_task1728_web_nlg_data_to_text": 63.3415, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 58.0, + "eval_f1_for_task201_multinli_textual_entailment": 29.0, + "eval_f1_for_task202_multinli_textual_entailment": 30.0, + "eval_f1_for_task219_rocstories_title_generation": 15.7452, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 43.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 72.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 58.9667, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 39.0626, + "eval_f1_for_task288_gigaword_title_generation": 32.1284, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 37.3, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 57.0111, + "eval_f1_for_task349_squad2.0_answerability_classification": 54.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 80.6667, + "eval_f1_for_task392_cod3s_cause_effect_classification": 86.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 32.2381, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 27.5, + "eval_f1_for_task402_grailqa_question_rewriting": 73.1985, + "eval_f1_for_task418_persent_title_generation": 28.2007, + "eval_f1_for_task442_com_qa_question_rewriting": 68.4009, + "eval_f1_for_task500_scruples_title_generation": 13.6638, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 37.5029, + "eval_f1_for_task520_aquamuse_answerability_classification": 63.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 34.35, + "eval_f1_for_task602_wikitext_title_generation": 13.6221, + "eval_f1_for_task613_liar_keyword_tagging": 18.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 22.4107, + "eval_f1_for_task619_ohsumed_title_generation": 41.4021, + "eval_f1_for_task620_ohsumed_keyword_tagging": 34.8738, + "eval_f1_for_task623_ohsumed_keyword_tagging": 57.0, + "eval_f1_for_task640_e_snli_textual_entailment": 26.0, + "eval_f1_for_task641_e_snli_textual_entailment": 34.0, + "eval_f1_for_task642_e_snli_textual_entailment": 41.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.0905, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 19.3333, + "eval_f1_for_task670_ambigqa_question_rewriting": 78.5275, + "eval_f1_for_task671_ambigqa_question_rewriting": 53.0308, + "eval_f1_for_task677_ollie_data_to_text": 25.2352, + "eval_f1_for_task738_perspectrum_textual_entailment": 28.0, + "eval_f1_for_task743_eurlex_title_generation": 35.0528, + "eval_f1_for_task760_msr_sqa_data_to_text": 8.6583, + "eval_f1_for_task769_qed_title_generation": 75.9833, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 62.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 74.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.0, + "eval_f1_for_task890_gwsd_textual_entailment": 50.0, + "eval_f1_for_task891_gap_coreference_resolution": 58.5, + "eval_f1_for_task892_gap_coreference_resolution": 32.0, + "eval_f1_for_task893_gap_coreference_resolution": 47.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_f1_for_task957_e2e_data_to_text": 54.002, + "eval_f1_for_task970_sherliic_textual_entailment": 55.0, + "eval_f1_for_textual_entailment": 35.8333, + "eval_f1_for_title_generation": 32.4772, + "eval_f1_for_word_analogy": 32.875, + "eval_gen_len": 9.8767, + "eval_global_step": 8000, + "eval_loss": 1.3468295335769653, + "eval_rouge1": 48.9574, + "eval_rouge1_for_answerability_classification": 56.1026, + "eval_rouge1_for_cause_effect_classification": 55.022, + "eval_rouge1_for_coreference_resolution": 45.6687, + "eval_rouge1_for_data_to_text": 55.3953, + "eval_rouge1_for_dialogue_act_recognition": 55.6905, + "eval_rouge1_for_grammar_error_correction": 76.7999, + "eval_rouge1_for_keyword_tagging": 58.1265, + "eval_rouge1_for_overlap_extraction": 37.3778, + "eval_rouge1_for_question_rewriting": 69.672, + "eval_rouge1_for_task020_mctaco_answerability_classification": 51.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 43.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.0247, + "eval_rouge1_for_task035_winogrande_question_rewriting": 88.3504, + "eval_rouge1_for_task036_qasc_keyword_tagging": 72.786, + "eval_rouge1_for_task039_qasc_overlap_extraction": 34.4, + "eval_rouge1_for_task050_multirc_answerability_classification": 57.0, + "eval_rouge1_for_task102_commongen_data_to_text": 66.4001, + "eval_rouge1_for_task1152_bard_word_analogy": 25.0, + "eval_rouge1_for_task1153_bard_word_analogy": 28.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 25.0, + "eval_rouge1_for_task1155_bard_word_analogy": 63.0, + "eval_rouge1_for_task1156_bard_word_analogy": 29.0, + "eval_rouge1_for_task1157_bard_word_analogy": 58.0, + "eval_rouge1_for_task1158_bard_word_analogy": 17.0, + "eval_rouge1_for_task1159_bard_word_analogy": 17.3333, + "eval_rouge1_for_task1161_coda_19_title_generation": 33.9776, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 81.4136, + "eval_rouge1_for_task121_zest_question_rewriting": 47.8937, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 13.7515, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.475, + "eval_rouge1_for_task1356_xlsum_title_generation": 26.0641, + "eval_rouge1_for_task1358_xlsum_title_generation": 37.6603, + "eval_rouge1_for_task1385_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 45.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 57.0, + "eval_rouge1_for_task1407_dart_data_to_text": 37.0564, + "eval_rouge1_for_task1409_dart_data_to_text": 54.9653, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 66.4853, + "eval_rouge1_for_task1439_doqa_answerability_classification": 41.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 52.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 11.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 46.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 43.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 36.6854, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.1144, + "eval_rouge1_for_task1562_zest_question_rewriting": 53.4412, + "eval_rouge1_for_task1586_scifact_title_generation": 39.0032, + "eval_rouge1_for_task1598_nyc_data_to_text": 52.1028, + "eval_rouge1_for_task1612_sick_textual_entailment": 37.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 83.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 80.3887, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 94.7877, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 62.0, + "eval_rouge1_for_task1659_billsum_title_generation": 36.0315, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 71.2213, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 65.2689, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 58.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 29.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 30.0, + "eval_rouge1_for_task219_rocstories_title_generation": 21.5294, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 43.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 72.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 59.6333, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 40.3557, + "eval_rouge1_for_task288_gigaword_title_generation": 34.5359, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 38.5667, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 57.1333, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 54.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 80.6667, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 86.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 32.1928, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 34.8333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 75.3333, + "eval_rouge1_for_task418_persent_title_generation": 31.6891, + "eval_rouge1_for_task442_com_qa_question_rewriting": 71.5699, + "eval_rouge1_for_task500_scruples_title_generation": 15.3384, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 37.8081, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 63.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 35.1888, + "eval_rouge1_for_task602_wikitext_title_generation": 14.3047, + "eval_rouge1_for_task613_liar_keyword_tagging": 26.8333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 23.961, + "eval_rouge1_for_task619_ohsumed_title_generation": 44.8831, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 39.4225, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 57.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 26.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 41.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.5905, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 19.5, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 79.253, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 54.2487, + "eval_rouge1_for_task677_ollie_data_to_text": 28.3781, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 60.0, + "eval_rouge1_for_task743_eurlex_title_generation": 37.1182, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 9.5505, + "eval_rouge1_for_task769_qed_title_generation": 75.9167, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 62.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 74.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 44.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 50.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 58.4744, + "eval_rouge1_for_task892_gap_coreference_resolution": 32.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 47.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_rouge1_for_task957_e2e_data_to_text": 56.1227, + "eval_rouge1_for_task970_sherliic_textual_entailment": 55.0, + "eval_rouge1_for_textual_entailment": 44.25, + "eval_rouge1_for_title_generation": 34.7084, + "eval_rouge1_for_word_analogy": 32.875, + "eval_rougeL": 47.4323, + "eval_rougeL_for_answerability_classification": 56.1026, + "eval_rougeL_for_cause_effect_classification": 54.7166, + "eval_rougeL_for_coreference_resolution": 45.6687, + "eval_rougeL_for_data_to_text": 46.4984, + "eval_rougeL_for_dialogue_act_recognition": 55.6905, + "eval_rougeL_for_grammar_error_correction": 75.7586, + "eval_rougeL_for_keyword_tagging": 57.7957, + "eval_rougeL_for_overlap_extraction": 36.788, + "eval_rougeL_for_question_rewriting": 65.9961, + "eval_rougeL_for_task020_mctaco_answerability_classification": 51.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 43.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 91.6485, + "eval_rougeL_for_task035_winogrande_question_rewriting": 86.8224, + "eval_rougeL_for_task036_qasc_keyword_tagging": 72.5194, + "eval_rougeL_for_task039_qasc_overlap_extraction": 34.4, + "eval_rougeL_for_task050_multirc_answerability_classification": 57.0, + "eval_rougeL_for_task102_commongen_data_to_text": 53.9429, + "eval_rougeL_for_task1152_bard_word_analogy": 25.0, + "eval_rougeL_for_task1153_bard_word_analogy": 28.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 25.0, + "eval_rougeL_for_task1155_bard_word_analogy": 63.0, + "eval_rougeL_for_task1156_bard_word_analogy": 29.0, + "eval_rougeL_for_task1157_bard_word_analogy": 58.0, + "eval_rougeL_for_task1158_bard_word_analogy": 17.0, + "eval_rougeL_for_task1159_bard_word_analogy": 17.3333, + "eval_rougeL_for_task1161_coda_19_title_generation": 27.2558, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 80.9551, + "eval_rougeL_for_task121_zest_question_rewriting": 42.1458, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.0253, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.6318, + "eval_rougeL_for_task1356_xlsum_title_generation": 22.3183, + "eval_rougeL_for_task1358_xlsum_title_generation": 32.4578, + "eval_rougeL_for_task1385_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 45.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 57.0, + "eval_rougeL_for_task1407_dart_data_to_text": 31.8528, + "eval_rougeL_for_task1409_dart_data_to_text": 43.1916, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 65.2678, + "eval_rougeL_for_task1439_doqa_answerability_classification": 41.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 52.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 11.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 46.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 43.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 32.3339, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.2495, + "eval_rougeL_for_task1562_zest_question_rewriting": 46.6859, + "eval_rougeL_for_task1586_scifact_title_generation": 31.5316, + "eval_rougeL_for_task1598_nyc_data_to_text": 39.8382, + "eval_rougeL_for_task1612_sick_textual_entailment": 37.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 83.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 79.4348, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 90.8916, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 62.0, + "eval_rougeL_for_task1659_billsum_title_generation": 30.1179, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 71.2213, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 56.0151, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 58.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 29.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 30.0, + "eval_rougeL_for_task219_rocstories_title_generation": 21.5294, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 43.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 72.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 59.6333, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 39.1759, + "eval_rougeL_for_task288_gigaword_title_generation": 29.7735, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 38.5667, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 57.1333, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 54.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 80.6667, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 86.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.6078, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 34.8333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 64.2408, + "eval_rougeL_for_task418_persent_title_generation": 27.4826, + "eval_rougeL_for_task442_com_qa_question_rewriting": 65.5087, + "eval_rougeL_for_task500_scruples_title_generation": 14.3838, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 37.0807, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 63.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 32.5213, + "eval_rougeL_for_task602_wikitext_title_generation": 14.1243, + "eval_rougeL_for_task613_liar_keyword_tagging": 26.8333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 22.4087, + "eval_rougeL_for_task619_ohsumed_title_generation": 37.275, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 38.0353, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 57.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 26.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 41.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.5905, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 19.5, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 78.1126, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 50.771, + "eval_rougeL_for_task677_ollie_data_to_text": 23.2534, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 60.0, + "eval_rougeL_for_task743_eurlex_title_generation": 31.9116, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 8.3837, + "eval_rougeL_for_task769_qed_title_generation": 75.6667, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 62.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 74.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 44.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 50.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 58.4744, + "eval_rougeL_for_task892_gap_coreference_resolution": 32.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 47.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_rougeL_for_task957_e2e_data_to_text": 42.9115, + "eval_rougeL_for_task970_sherliic_textual_entailment": 55.0, + "eval_rougeL_for_textual_entailment": 44.25, + "eval_rougeL_for_title_generation": 31.3077, + "eval_rougeL_for_word_analogy": 32.875, + "eval_runtime": 635.463, + "eval_samples_per_second": 18.742, + "eval_steps_per_second": 0.587, + "step": 8000 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 1.06, + "step": 8500 + }, + { + "epoch": 1.94, + "eval_exact_match": 29.5886, + "eval_exact_match_for_answerability_classification": 55.6923, + "eval_exact_match_for_cause_effect_classification": 38.2857, + "eval_exact_match_for_coreference_resolution": 34.8571, + "eval_exact_match_for_data_to_text": 7.7482, + "eval_exact_match_for_dialogue_act_recognition": 51.5714, + "eval_exact_match_for_grammar_error_correction": 8.0, + "eval_exact_match_for_keyword_tagging": 42.6, + "eval_exact_match_for_overlap_extraction": 10.5, + "eval_exact_match_for_question_rewriting": 2.9091, + "eval_exact_match_for_task020_mctaco_answerability_classification": 48.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 39.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 5.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 6.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 34.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 21.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 54.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 25.0, + "eval_exact_match_for_task1153_bard_word_analogy": 30.0, + "eval_exact_match_for_task1154_bard_word_analogy": 26.0, + "eval_exact_match_for_task1155_bard_word_analogy": 67.0, + "eval_exact_match_for_task1156_bard_word_analogy": 28.0, + "eval_exact_match_for_task1157_bard_word_analogy": 62.0, + "eval_exact_match_for_task1158_bard_word_analogy": 22.0, + "eval_exact_match_for_task1159_bard_word_analogy": 17.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 12.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 10.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 54.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 1.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 8.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 16.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 29.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 48.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 52.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 35.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 46.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 52.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 2.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 51.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 16.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 51.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 45.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 56.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 67.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 21.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 41.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 77.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 25.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 31.0, + "eval_exact_match_for_task219_rocstories_title_generation": 0.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 51.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 43.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 86.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 51.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 6.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 41.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 56.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 53.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 59.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 43.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 56.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 22.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 4.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 77.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 3.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 21.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 15.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 54.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 29.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 36.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 89.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 5.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 36.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 75.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 52.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 65.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 90.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 47.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 39.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 60.0, + "eval_exact_match_for_textual_entailment": 38.4167, + "eval_exact_match_for_title_generation": 7.7354, + "eval_exact_match_for_word_analogy": 34.625, + "eval_f1": 46.7437, + "eval_f1_for_answerability_classification": 58.2564, + "eval_f1_for_cause_effect_classification": 55.1862, + "eval_f1_for_coreference_resolution": 42.1667, + "eval_f1_for_data_to_text": 50.9628, + "eval_f1_for_dialogue_act_recognition": 54.5, + "eval_f1_for_grammar_error_correction": 74.9937, + "eval_f1_for_keyword_tagging": 55.3724, + "eval_f1_for_overlap_extraction": 33.8858, + "eval_f1_for_question_rewriting": 68.8602, + "eval_f1_for_task020_mctaco_answerability_classification": 48.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 40.1667, + "eval_f1_for_task034_winogrande_question_rewriting": 91.7387, + "eval_f1_for_task035_winogrande_question_rewriting": 86.6072, + "eval_f1_for_task036_qasc_keyword_tagging": 67.2907, + "eval_f1_for_task039_qasc_overlap_extraction": 30.0, + "eval_f1_for_task050_multirc_answerability_classification": 54.0, + "eval_f1_for_task102_commongen_data_to_text": 52.6126, + "eval_f1_for_task1152_bard_word_analogy": 25.0, + "eval_f1_for_task1153_bard_word_analogy": 30.6667, + "eval_f1_for_task1154_bard_word_analogy": 26.0, + "eval_f1_for_task1155_bard_word_analogy": 67.0, + "eval_f1_for_task1156_bard_word_analogy": 28.0, + "eval_f1_for_task1157_bard_word_analogy": 62.0, + "eval_f1_for_task1158_bard_word_analogy": 22.0, + "eval_f1_for_task1159_bard_word_analogy": 17.6667, + "eval_f1_for_task1161_coda_19_title_generation": 33.5732, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 81.3295, + "eval_f1_for_task121_zest_question_rewriting": 44.92, + "eval_f1_for_task133_winowhy_coreference_resolution": 10.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 12.5309, + "eval_f1_for_task1344_rte_textual_entailment": 54.0, + "eval_f1_for_task1345_qqp_question_rewriting": 41.4939, + "eval_f1_for_task1356_xlsum_title_generation": 21.4395, + "eval_f1_for_task1358_xlsum_title_generation": 32.4548, + "eval_f1_for_task1385_anli_textual_entailment": 1.0, + "eval_f1_for_task1386_anli_textual_entailment": 8.0, + "eval_f1_for_task1387_anli_textual_entailment": 16.0, + "eval_f1_for_task1388_cb_textual_entailment": 29.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 48.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 52.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 35.0, + "eval_f1_for_task1407_dart_data_to_text": 33.9685, + "eval_f1_for_task1409_dart_data_to_text": 48.882, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 65.8746, + "eval_f1_for_task1439_doqa_answerability_classification": 46.0, + "eval_f1_for_task1442_doqa_answerability_classification": 52.0, + "eval_f1_for_task1516_imppres_textual_entailment": 2.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 51.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 35.778, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.1128, + "eval_f1_for_task1562_zest_question_rewriting": 50.5559, + "eval_f1_for_task1586_scifact_title_generation": 36.0014, + "eval_f1_for_task1598_nyc_data_to_text": 49.2276, + "eval_f1_for_task1612_sick_textual_entailment": 51.0, + "eval_f1_for_task1615_sick_textual_entailment": 45.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.4683, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_f1_for_task1631_open_pi_data_to_text": 95.2283, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 67.0, + "eval_f1_for_task1659_billsum_title_generation": 31.8644, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 66.6705, + "eval_f1_for_task1728_web_nlg_data_to_text": 61.1508, + "eval_f1_for_task190_snli_textual_entailment": 41.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 77.0, + "eval_f1_for_task201_multinli_textual_entailment": 25.0, + "eval_f1_for_task202_multinli_textual_entailment": 31.0, + "eval_f1_for_task219_rocstories_title_generation": 13.6999, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, + "eval_f1_for_task232_iirc_answerability_classification": 51.0, + "eval_f1_for_task233_iirc_answerability_classification": 43.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 86.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 62.6333, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 37.7717, + "eval_f1_for_task288_gigaword_title_generation": 29.575, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 24.2667, + "eval_f1_for_task329_gap_coreference_resolution": 41.0, + "eval_f1_for_task330_gap_coreference_resolution": 64.4778, + "eval_f1_for_task349_squad2.0_answerability_classification": 53.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 79.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 81.0, + "eval_f1_for_task392_cod3s_cause_effect_classification": 85.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.9625, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 26.8333, + "eval_f1_for_task402_grailqa_question_rewriting": 71.5176, + "eval_f1_for_task418_persent_title_generation": 25.9881, + "eval_f1_for_task442_com_qa_question_rewriting": 69.4118, + "eval_f1_for_task500_scruples_title_generation": 14.2871, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 37.3187, + "eval_f1_for_task520_aquamuse_answerability_classification": 77.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 32.3988, + "eval_f1_for_task602_wikitext_title_generation": 11.696, + "eval_f1_for_task613_liar_keyword_tagging": 22.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 19.0078, + "eval_f1_for_task619_ohsumed_title_generation": 40.9733, + "eval_f1_for_task620_ohsumed_keyword_tagging": 37.7905, + "eval_f1_for_task623_ohsumed_keyword_tagging": 54.0, + "eval_f1_for_task640_e_snli_textual_entailment": 29.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 36.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 95.4476, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 6.3333, + "eval_f1_for_task670_ambigqa_question_rewriting": 77.0527, + "eval_f1_for_task671_ambigqa_question_rewriting": 64.3666, + "eval_f1_for_task677_ollie_data_to_text": 24.0005, + "eval_f1_for_task738_perspectrum_textual_entailment": 36.0, + "eval_f1_for_task743_eurlex_title_generation": 34.1735, + "eval_f1_for_task760_msr_sqa_data_to_text": 6.7546, + "eval_f1_for_task769_qed_title_generation": 86.4487, + "eval_f1_for_task827_copa_cause_effect_classification": 52.0, + "eval_f1_for_task828_copa_cause_effect_classification": 65.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 90.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.0, + "eval_f1_for_task890_gwsd_textual_entailment": 47.0, + "eval_f1_for_task891_gap_coreference_resolution": 61.9524, + "eval_f1_for_task892_gap_coreference_resolution": 39.0, + "eval_f1_for_task893_gap_coreference_resolution": 49.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_f1_for_task957_e2e_data_to_text": 54.1264, + "eval_f1_for_task970_sherliic_textual_entailment": 60.0, + "eval_f1_for_textual_entailment": 38.4167, + "eval_f1_for_title_generation": 32.4176, + "eval_f1_for_word_analogy": 34.7917, + "eval_gen_len": 10.3971, + "eval_global_step": 8500, + "eval_loss": 1.3123453855514526, + "eval_rouge1": 49.3054, + "eval_rouge1_for_answerability_classification": 58.2564, + "eval_rouge1_for_cause_effect_classification": 55.3926, + "eval_rouge1_for_coreference_resolution": 42.6969, + "eval_rouge1_for_data_to_text": 53.8517, + "eval_rouge1_for_dialogue_act_recognition": 58.3286, + "eval_rouge1_for_grammar_error_correction": 77.2256, + "eval_rouge1_for_keyword_tagging": 59.0379, + "eval_rouge1_for_overlap_extraction": 36.6491, + "eval_rouge1_for_question_rewriting": 70.3445, + "eval_rouge1_for_task020_mctaco_answerability_classification": 48.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 40.0667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 91.7852, + "eval_rouge1_for_task035_winogrande_question_rewriting": 87.5409, + "eval_rouge1_for_task036_qasc_keyword_tagging": 71.3516, + "eval_rouge1_for_task039_qasc_overlap_extraction": 34.7333, + "eval_rouge1_for_task050_multirc_answerability_classification": 54.0, + "eval_rouge1_for_task102_commongen_data_to_text": 65.8037, + "eval_rouge1_for_task1152_bard_word_analogy": 25.0, + "eval_rouge1_for_task1153_bard_word_analogy": 30.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 26.0, + "eval_rouge1_for_task1155_bard_word_analogy": 67.0, + "eval_rouge1_for_task1156_bard_word_analogy": 28.0, + "eval_rouge1_for_task1157_bard_word_analogy": 62.0, + "eval_rouge1_for_task1158_bard_word_analogy": 22.0, + "eval_rouge1_for_task1159_bard_word_analogy": 17.6667, + "eval_rouge1_for_task1161_coda_19_title_generation": 36.0343, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 81.5792, + "eval_rouge1_for_task121_zest_question_rewriting": 47.4708, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 10.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 12.9072, + "eval_rouge1_for_task1344_rte_textual_entailment": 54.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 44.4624, + "eval_rouge1_for_task1356_xlsum_title_generation": 25.0831, + "eval_rouge1_for_task1358_xlsum_title_generation": 36.5589, + "eval_rouge1_for_task1385_anli_textual_entailment": 31.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 36.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 42.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 46.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 48.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 52.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 41.8, + "eval_rouge1_for_task1407_dart_data_to_text": 33.5682, + "eval_rouge1_for_task1409_dart_data_to_text": 50.5488, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 67.0605, + "eval_rouge1_for_task1439_doqa_answerability_classification": 46.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 52.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 2.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 51.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 38.006, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.3907, + "eval_rouge1_for_task1562_zest_question_rewriting": 53.5655, + "eval_rouge1_for_task1586_scifact_title_generation": 39.8636, + "eval_rouge1_for_task1598_nyc_data_to_text": 51.5956, + "eval_rouge1_for_task1612_sick_textual_entailment": 51.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 81.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.7336, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 95.3237, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 67.0, + "eval_rouge1_for_task1659_billsum_title_generation": 34.0536, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 66.6705, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 63.5355, + "eval_rouge1_for_task190_snli_textual_entailment": 41.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 77.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 25.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 31.0, + "eval_rouge1_for_task219_rocstories_title_generation": 19.9826, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 51.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 43.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 86.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 63.3, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 38.5649, + "eval_rouge1_for_task288_gigaword_title_generation": 31.9905, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 24.5333, + "eval_rouge1_for_task329_gap_coreference_resolution": 41.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 64.6, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 53.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 79.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 81.0, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 85.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 32.0851, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 33.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 73.5367, + "eval_rouge1_for_task418_persent_title_generation": 29.5551, + "eval_rouge1_for_task442_com_qa_question_rewriting": 72.573, + "eval_rouge1_for_task500_scruples_title_generation": 16.486, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 37.8329, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 77.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 34.4395, + "eval_rouge1_for_task602_wikitext_title_generation": 13.051, + "eval_rouge1_for_task613_liar_keyword_tagging": 29.5, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 20.3295, + "eval_rouge1_for_task619_ohsumed_title_generation": 44.7027, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 44.3905, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 54.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 29.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 36.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.9476, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 6.3333, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 77.5604, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 64.9814, + "eval_rouge1_for_task677_ollie_data_to_text": 26.6034, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 56.0, + "eval_rouge1_for_task743_eurlex_title_generation": 36.7558, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 7.4897, + "eval_rouge1_for_task769_qed_title_generation": 86.382, + "eval_rouge1_for_task827_copa_cause_effect_classification": 52.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 65.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 90.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 47.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 62.0857, + "eval_rouge1_for_task892_gap_coreference_resolution": 39.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 49.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rouge1_for_task957_e2e_data_to_text": 55.8889, + "eval_rouge1_for_task970_sherliic_textual_entailment": 60.0, + "eval_rouge1_for_textual_entailment": 44.9861, + "eval_rouge1_for_title_generation": 34.8429, + "eval_rouge1_for_word_analogy": 34.7917, + "eval_rougeL": 47.8434, + "eval_rougeL_for_answerability_classification": 58.2564, + "eval_rougeL_for_cause_effect_classification": 55.0482, + "eval_rougeL_for_coreference_resolution": 42.6969, + "eval_rougeL_for_data_to_text": 45.91, + "eval_rougeL_for_dialogue_act_recognition": 58.3286, + "eval_rougeL_for_grammar_error_correction": 76.16, + "eval_rougeL_for_keyword_tagging": 58.2705, + "eval_rougeL_for_overlap_extraction": 35.8804, + "eval_rougeL_for_question_rewriting": 66.7675, + "eval_rougeL_for_task020_mctaco_answerability_classification": 48.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 40.0667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 91.243, + "eval_rougeL_for_task035_winogrande_question_rewriting": 86.1591, + "eval_rougeL_for_task036_qasc_keyword_tagging": 69.7311, + "eval_rougeL_for_task039_qasc_overlap_extraction": 34.7333, + "eval_rougeL_for_task050_multirc_answerability_classification": 54.0, + "eval_rougeL_for_task102_commongen_data_to_text": 56.821, + "eval_rougeL_for_task1152_bard_word_analogy": 25.0, + "eval_rougeL_for_task1153_bard_word_analogy": 30.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 26.0, + "eval_rougeL_for_task1155_bard_word_analogy": 67.0, + "eval_rougeL_for_task1156_bard_word_analogy": 28.0, + "eval_rougeL_for_task1157_bard_word_analogy": 62.0, + "eval_rougeL_for_task1158_bard_word_analogy": 22.0, + "eval_rougeL_for_task1159_bard_word_analogy": 17.6667, + "eval_rougeL_for_task1161_coda_19_title_generation": 29.3539, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 80.8676, + "eval_rougeL_for_task121_zest_question_rewriting": 41.2875, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 10.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 12.0312, + "eval_rougeL_for_task1344_rte_textual_entailment": 54.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 41.3188, + "eval_rougeL_for_task1356_xlsum_title_generation": 21.2921, + "eval_rougeL_for_task1358_xlsum_title_generation": 31.2636, + "eval_rougeL_for_task1385_anli_textual_entailment": 31.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 36.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 42.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 46.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 48.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 52.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 41.8, + "eval_rougeL_for_task1407_dart_data_to_text": 29.382, + "eval_rougeL_for_task1409_dart_data_to_text": 40.4023, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 65.8318, + "eval_rougeL_for_task1439_doqa_answerability_classification": 46.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 52.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 2.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 51.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 34.1836, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.4882, + "eval_rougeL_for_task1562_zest_question_rewriting": 46.791, + "eval_rougeL_for_task1586_scifact_title_generation": 32.0492, + "eval_rougeL_for_task1598_nyc_data_to_text": 39.8645, + "eval_rougeL_for_task1612_sick_textual_entailment": 51.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 81.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.3669, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 93.3351, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 67.0, + "eval_rougeL_for_task1659_billsum_title_generation": 28.9902, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 66.6705, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 53.862, + "eval_rougeL_for_task190_snli_textual_entailment": 41.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 77.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 25.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 31.0, + "eval_rougeL_for_task219_rocstories_title_generation": 19.9826, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 51.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 43.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 86.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 63.3, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 37.0275, + "eval_rougeL_for_task288_gigaword_title_generation": 27.2952, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 24.5333, + "eval_rougeL_for_task329_gap_coreference_resolution": 41.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 64.6, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 53.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 79.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 81.0, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 85.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.4187, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 33.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 61.9412, + "eval_rougeL_for_task418_persent_title_generation": 25.6505, + "eval_rougeL_for_task442_com_qa_question_rewriting": 68.112, + "eval_rougeL_for_task500_scruples_title_generation": 15.1658, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 37.5047, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 77.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 31.6734, + "eval_rougeL_for_task602_wikitext_title_generation": 12.9788, + "eval_rougeL_for_task613_liar_keyword_tagging": 29.5, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 18.5857, + "eval_rougeL_for_task619_ohsumed_title_generation": 37.4742, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 42.1738, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 54.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 29.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 36.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.9476, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 6.3333, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 76.3739, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 62.9808, + "eval_rougeL_for_task677_ollie_data_to_text": 21.2602, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 56.0, + "eval_rougeL_for_task743_eurlex_title_generation": 31.1406, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.5798, + "eval_rougeL_for_task769_qed_title_generation": 86.382, + "eval_rougeL_for_task827_copa_cause_effect_classification": 52.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 65.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 90.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 47.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 62.0857, + "eval_rougeL_for_task892_gap_coreference_resolution": 39.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 49.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rougeL_for_task957_e2e_data_to_text": 42.5786, + "eval_rougeL_for_task970_sherliic_textual_entailment": 60.0, + "eval_rougeL_for_textual_entailment": 44.9861, + "eval_rougeL_for_title_generation": 31.521, + "eval_rougeL_for_word_analogy": 34.7917, + "eval_runtime": 648.2894, + "eval_samples_per_second": 18.371, + "eval_steps_per_second": 0.575, + "step": 8500 + }, + { + "epoch": 2.0, + "step": 8748, + "total_flos": 3.844167635356877e+17, + "train_loss": 1.179815354279639, + "train_runtime": 29462.8242, + "train_samples_per_second": 4.75, + "train_steps_per_second": 0.297 + } + ], + "max_steps": 8748, + "num_train_epochs": 2, + "total_flos": 3.844167635356877e+17, + "trial_name": null, + "trial_params": null +}