diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,11533 @@ +{ + "best_metric": 40.4768, + "best_model_checkpoint": "/output/checkpoint-5000", + "epoch": 2.0, + "global_step": 8748, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 3.1406, + "step": 1 + }, + { + "epoch": 0.0, + "eval_exact_match": 0.487, + "eval_exact_match_for_answerability_classification": 0.0, + "eval_exact_match_for_cause_effect_classification": 0.0, + "eval_exact_match_for_coreference_resolution": 0.0, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 0.0, + "eval_exact_match_for_grammar_error_correction": 0.0, + "eval_exact_match_for_keyword_tagging": 0.0, + "eval_exact_match_for_overlap_extraction": 0.0, + "eval_exact_match_for_question_rewriting": 0.0, + "eval_exact_match_for_task020_mctaco_answerability_classification": 0.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 0.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 0.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 0.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 0.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 0.0, + "eval_exact_match_for_task1153_bard_word_analogy": 0.0, + "eval_exact_match_for_task1154_bard_word_analogy": 3.0, + "eval_exact_match_for_task1155_bard_word_analogy": 3.0, + "eval_exact_match_for_task1156_bard_word_analogy": 0.0, + "eval_exact_match_for_task1157_bard_word_analogy": 1.0, + "eval_exact_match_for_task1158_bard_word_analogy": 0.0, + "eval_exact_match_for_task1159_bard_word_analogy": 0.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 0.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 0.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 0.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 0.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 0.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 0.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 0.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 0.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 0.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 47.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 0.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 0.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 4.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 0.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 0.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 0.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 0.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 0.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 0.0, + "eval_exact_match_for_task219_rocstories_title_generation": 0.0, + "eval_exact_match_for_task220_rocstories_title_generation": 0.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 0.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 0.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 0.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 0.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 0.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 0.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 0.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 0.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 0.0, + "eval_exact_match_for_task613_liar_keyword_tagging": 0.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 0.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 0.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 0.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 0.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 0.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 0.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 0.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 0.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 0.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 0.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 0.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 0.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 0.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 0.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 0.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 0.0, + "eval_exact_match_for_textual_entailment": 2.125, + "eval_exact_match_for_title_generation": 0.0, + "eval_exact_match_for_word_analogy": 0.875, + "eval_f1": 6.0684, + "eval_f1_for_answerability_classification": 3.5998, + "eval_f1_for_cause_effect_classification": 4.9812, + "eval_f1_for_coreference_resolution": 2.4696, + "eval_f1_for_data_to_text": 14.3001, + "eval_f1_for_dialogue_act_recognition": 2.1845, + "eval_f1_for_grammar_error_correction": 14.4694, + "eval_f1_for_keyword_tagging": 2.4538, + "eval_f1_for_overlap_extraction": 12.6283, + "eval_f1_for_question_rewriting": 14.4142, + "eval_f1_for_task020_mctaco_answerability_classification": 1.4664, + "eval_f1_for_task033_winogrande_coreference_resolution": 0.3728, + "eval_f1_for_task034_winogrande_question_rewriting": 8.0968, + "eval_f1_for_task035_winogrande_question_rewriting": 11.7204, + "eval_f1_for_task036_qasc_keyword_tagging": 7.0934, + "eval_f1_for_task039_qasc_overlap_extraction": 0.9301, + "eval_f1_for_task050_multirc_answerability_classification": 1.278, + "eval_f1_for_task102_commongen_data_to_text": 7.023, + "eval_f1_for_task1152_bard_word_analogy": 0.1767, + "eval_f1_for_task1153_bard_word_analogy": 0.0755, + "eval_f1_for_task1154_bard_word_analogy": 8.135, + "eval_f1_for_task1155_bard_word_analogy": 24.5006, + "eval_f1_for_task1156_bard_word_analogy": 8.0703, + "eval_f1_for_task1157_bard_word_analogy": 2.4818, + "eval_f1_for_task1158_bard_word_analogy": 0.7902, + "eval_f1_for_task1159_bard_word_analogy": 2.1392, + "eval_f1_for_task1161_coda_19_title_generation": 11.2568, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 24.8465, + "eval_f1_for_task121_zest_question_rewriting": 14.6629, + "eval_f1_for_task133_winowhy_coreference_resolution": 2.5342, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 0.6328, + "eval_f1_for_task1344_rte_textual_entailment": 2.5958, + "eval_f1_for_task1345_qqp_question_rewriting": 10.6155, + "eval_f1_for_task1356_xlsum_title_generation": 4.9234, + "eval_f1_for_task1358_xlsum_title_generation": 5.5446, + "eval_f1_for_task1385_anli_textual_entailment": 0.9656, + "eval_f1_for_task1386_anli_textual_entailment": 0.108, + "eval_f1_for_task1387_anli_textual_entailment": 0.7575, + "eval_f1_for_task1388_cb_textual_entailment": 0.9532, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 1.7849, + "eval_f1_for_task1391_winogrande_coreference_resolution": 2.6789, + "eval_f1_for_task1393_copa_cause_effect_classification": 2.8015, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 0.0971, + "eval_f1_for_task1407_dart_data_to_text": 8.7356, + "eval_f1_for_task1409_dart_data_to_text": 9.8665, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 8.6954, + "eval_f1_for_task1439_doqa_answerability_classification": 1.1133, + "eval_f1_for_task1442_doqa_answerability_classification": 1.1405, + "eval_f1_for_task1516_imppres_textual_entailment": 1.0466, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 1.44, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 5.7927, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 0.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 1.7004, + "eval_f1_for_task1540_peer_read_title_generation": 6.8907, + "eval_f1_for_task1554_scitail_textual_entailment": 47.0857, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 20.2434, + "eval_f1_for_task1562_zest_question_rewriting": 14.7576, + "eval_f1_for_task1586_scifact_title_generation": 13.2241, + "eval_f1_for_task1598_nyc_data_to_text": 21.85, + "eval_f1_for_task1612_sick_textual_entailment": 1.8728, + "eval_f1_for_task1615_sick_textual_entailment": 5.1684, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 18.1288, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 1.2891, + "eval_f1_for_task1631_open_pi_data_to_text": 25.8481, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 3.0042, + "eval_f1_for_task1659_billsum_title_generation": 17.0531, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 8.5934, + "eval_f1_for_task1728_web_nlg_data_to_text": 8.5828, + "eval_f1_for_task190_snli_textual_entailment": 1.2079, + "eval_f1_for_task199_multinli_textual_entailment": 1.5757, + "eval_f1_for_task200_multinli_textual_entailment": 1.5628, + "eval_f1_for_task201_multinli_textual_entailment": 1.603, + "eval_f1_for_task202_multinli_textual_entailment": 1.6379, + "eval_f1_for_task219_rocstories_title_generation": 1.4438, + "eval_f1_for_task220_rocstories_title_generation": 2.2176, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 0.1827, + "eval_f1_for_task232_iirc_answerability_classification": 1.9579, + "eval_f1_for_task233_iirc_answerability_classification": 1.1885, + "eval_f1_for_task242_tweetqa_answerability_classification": 3.9575, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 6.4206, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 24.3264, + "eval_f1_for_task288_gigaword_title_generation": 3.8522, + "eval_f1_for_task290_tellmewhy_answerability_classification": 27.8586, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 1.1684, + "eval_f1_for_task329_gap_coreference_resolution": 2.39, + "eval_f1_for_task330_gap_coreference_resolution": 0.8617, + "eval_f1_for_task349_squad2.0_answerability_classification": 1.1492, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 4.7957, + "eval_f1_for_task391_cod3s_cause_effect_classification": 2.4726, + "eval_f1_for_task392_cod3s_cause_effect_classification": 2.1317, + "eval_f1_for_task393_cod3s_cause_effect_classification": 7.8799, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 0.7372, + "eval_f1_for_task402_grailqa_question_rewriting": 13.0018, + "eval_f1_for_task418_persent_title_generation": 6.2326, + "eval_f1_for_task442_com_qa_question_rewriting": 11.9546, + "eval_f1_for_task500_scruples_title_generation": 5.0499, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 7.7122, + "eval_f1_for_task520_aquamuse_answerability_classification": 1.211, + "eval_f1_for_task569_recipe_nlg_title_generation": 1.914, + "eval_f1_for_task602_wikitext_title_generation": 0.5961, + "eval_f1_for_task613_liar_keyword_tagging": 0.1167, + "eval_f1_for_task614_glucose_cause_effect_classification": 17.5561, + "eval_f1_for_task619_ohsumed_title_generation": 10.3511, + "eval_f1_for_task620_ohsumed_keyword_tagging": 1.9546, + "eval_f1_for_task623_ohsumed_keyword_tagging": 1.3547, + "eval_f1_for_task640_e_snli_textual_entailment": 2.9422, + "eval_f1_for_task641_e_snli_textual_entailment": 0.8142, + "eval_f1_for_task642_e_snli_textual_entailment": 1.1551, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 1.7494, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 2.8934, + "eval_f1_for_task670_ambigqa_question_rewriting": 15.2822, + "eval_f1_for_task671_ambigqa_question_rewriting": 15.4888, + "eval_f1_for_task677_ollie_data_to_text": 3.2598, + "eval_f1_for_task738_perspectrum_textual_entailment": 0.7372, + "eval_f1_for_task743_eurlex_title_generation": 14.1839, + "eval_f1_for_task760_msr_sqa_data_to_text": 8.1027, + "eval_f1_for_task769_qed_title_generation": 7.922, + "eval_f1_for_task827_copa_cause_effect_classification": 0.8687, + "eval_f1_for_task828_copa_cause_effect_classification": 1.1583, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 2.3561, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 0.5491, + "eval_f1_for_task890_gwsd_textual_entailment": 0.906, + "eval_f1_for_task891_gap_coreference_resolution": 1.2485, + "eval_f1_for_task892_gap_coreference_resolution": 1.5657, + "eval_f1_for_task893_gap_coreference_resolution": 1.325, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 1.4293, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 1.1388, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 1.6458, + "eval_f1_for_task957_e2e_data_to_text": 30.8467, + "eval_f1_for_task970_sherliic_textual_entailment": 0.4454, + "eval_f1_for_textual_entailment": 3.3665, + "eval_f1_for_title_generation": 6.7772, + "eval_f1_for_word_analogy": 5.7962, + "eval_gen_len": 111.3507, + "eval_global_step": 1, + "eval_loss": 5.512484073638916, + "eval_rouge1": 6.6886, + "eval_rouge1_for_answerability_classification": 3.5758, + "eval_rouge1_for_cause_effect_classification": 5.7473, + "eval_rouge1_for_coreference_resolution": 2.5159, + "eval_rouge1_for_data_to_text": 17.0567, + "eval_rouge1_for_dialogue_act_recognition": 2.5889, + "eval_rouge1_for_grammar_error_correction": 16.3928, + "eval_rouge1_for_keyword_tagging": 2.7052, + "eval_rouge1_for_overlap_extraction": 13.002, + "eval_rouge1_for_question_rewriting": 14.9517, + "eval_rouge1_for_task020_mctaco_answerability_classification": 1.4354, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 0.3721, + "eval_rouge1_for_task034_winogrande_question_rewriting": 8.4882, + "eval_rouge1_for_task035_winogrande_question_rewriting": 12.1274, + "eval_rouge1_for_task036_qasc_keyword_tagging": 7.5556, + "eval_rouge1_for_task039_qasc_overlap_extraction": 1.0175, + "eval_rouge1_for_task050_multirc_answerability_classification": 1.2675, + "eval_rouge1_for_task102_commongen_data_to_text": 7.8917, + "eval_rouge1_for_task1152_bard_word_analogy": 0.1767, + "eval_rouge1_for_task1153_bard_word_analogy": 0.0755, + "eval_rouge1_for_task1154_bard_word_analogy": 8.135, + "eval_rouge1_for_task1155_bard_word_analogy": 24.5006, + "eval_rouge1_for_task1156_bard_word_analogy": 8.0703, + "eval_rouge1_for_task1157_bard_word_analogy": 2.4818, + "eval_rouge1_for_task1158_bard_word_analogy": 0.7902, + "eval_rouge1_for_task1159_bard_word_analogy": 2.1392, + "eval_rouge1_for_task1161_coda_19_title_generation": 12.2381, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 25.7878, + "eval_rouge1_for_task121_zest_question_rewriting": 15.2908, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 2.5297, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 0.6315, + "eval_rouge1_for_task1344_rte_textual_entailment": 2.5416, + "eval_rouge1_for_task1345_qqp_question_rewriting": 10.9491, + "eval_rouge1_for_task1356_xlsum_title_generation": 6.1183, + "eval_rouge1_for_task1358_xlsum_title_generation": 6.4101, + "eval_rouge1_for_task1385_anli_textual_entailment": 0.9432, + "eval_rouge1_for_task1386_anli_textual_entailment": 0.1043, + "eval_rouge1_for_task1387_anli_textual_entailment": 0.7436, + "eval_rouge1_for_task1388_cb_textual_entailment": 0.9166, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 1.7265, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 2.6648, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 2.7906, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 3.1769, + "eval_rouge1_for_task1407_dart_data_to_text": 9.2913, + "eval_rouge1_for_task1409_dart_data_to_text": 12.5181, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 11.2638, + "eval_rouge1_for_task1439_doqa_answerability_classification": 1.0537, + "eval_rouge1_for_task1442_doqa_answerability_classification": 1.0725, + "eval_rouge1_for_task1516_imppres_textual_entailment": 0.9711, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 1.4294, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 5.7874, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 0.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 1.5156, + "eval_rouge1_for_task1540_peer_read_title_generation": 7.9768, + "eval_rouge1_for_task1554_scitail_textual_entailment": 47.0842, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 21.5218, + "eval_rouge1_for_task1562_zest_question_rewriting": 15.1218, + "eval_rouge1_for_task1586_scifact_title_generation": 14.5169, + "eval_rouge1_for_task1598_nyc_data_to_text": 25.5042, + "eval_rouge1_for_task1612_sick_textual_entailment": 1.7596, + "eval_rouge1_for_task1615_sick_textual_entailment": 24.4814, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 18.6822, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 1.2661, + "eval_rouge1_for_task1631_open_pi_data_to_text": 26.4392, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 2.9858, + "eval_rouge1_for_task1659_billsum_title_generation": 18.2468, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 8.5265, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 20.7515, + "eval_rouge1_for_task190_snli_textual_entailment": 1.2057, + "eval_rouge1_for_task199_multinli_textual_entailment": 1.5045, + "eval_rouge1_for_task200_multinli_textual_entailment": 1.7305, + "eval_rouge1_for_task201_multinli_textual_entailment": 1.8945, + "eval_rouge1_for_task202_multinli_textual_entailment": 1.8592, + "eval_rouge1_for_task219_rocstories_title_generation": 1.7832, + "eval_rouge1_for_task220_rocstories_title_generation": 2.2098, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 0.1805, + "eval_rouge1_for_task232_iirc_answerability_classification": 1.9358, + "eval_rouge1_for_task233_iirc_answerability_classification": 1.1629, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 3.9557, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 6.4693, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 24.9865, + "eval_rouge1_for_task288_gigaword_title_generation": 4.0885, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 27.8586, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 1.1165, + "eval_rouge1_for_task329_gap_coreference_resolution": 2.3353, + "eval_rouge1_for_task330_gap_coreference_resolution": 0.9175, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 1.1274, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 4.3023, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 2.4486, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 2.09, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 7.8886, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 1.2032, + "eval_rouge1_for_task402_grailqa_question_rewriting": 13.4587, + "eval_rouge1_for_task418_persent_title_generation": 7.0584, + "eval_rouge1_for_task442_com_qa_question_rewriting": 13.031, + "eval_rouge1_for_task500_scruples_title_generation": 5.5152, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 8.1507, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 1.1832, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 2.4031, + "eval_rouge1_for_task602_wikitext_title_generation": 0.5367, + "eval_rouge1_for_task613_liar_keyword_tagging": 0.4409, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 21.8157, + "eval_rouge1_for_task619_ohsumed_title_generation": 10.9281, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 2.4242, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 1.3049, + "eval_rouge1_for_task640_e_snli_textual_entailment": 2.9368, + "eval_rouge1_for_task641_e_snli_textual_entailment": 0.8137, + "eval_rouge1_for_task642_e_snli_textual_entailment": 1.1507, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 1.8003, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 3.2683, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 15.5755, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 15.9561, + "eval_rouge1_for_task677_ollie_data_to_text": 3.3919, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 0.735, + "eval_rouge1_for_task743_eurlex_title_generation": 15.2672, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 8.8458, + "eval_rouge1_for_task769_qed_title_generation": 8.2243, + "eval_rouge1_for_task827_copa_cause_effect_classification": 2.0163, + "eval_rouge1_for_task828_copa_cause_effect_classification": 1.1812, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 2.3492, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 0.9911, + "eval_rouge1_for_task890_gwsd_textual_entailment": 0.8974, + "eval_rouge1_for_task891_gap_coreference_resolution": 1.2565, + "eval_rouge1_for_task892_gap_coreference_resolution": 1.5302, + "eval_rouge1_for_task893_gap_coreference_resolution": 1.3061, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 1.4202, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 1.1342, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 1.6416, + "eval_rouge1_for_task957_e2e_data_to_text": 32.8008, + "eval_rouge1_for_task970_sherliic_textual_entailment": 0.3945, + "eval_rouge1_for_textual_entailment": 4.1789, + "eval_rouge1_for_title_generation": 7.4113, + "eval_rouge1_for_word_analogy": 5.7962, + "eval_rougeL": 6.1182, + "eval_rougeL_for_answerability_classification": 3.5758, + "eval_rougeL_for_cause_effect_classification": 5.2833, + "eval_rougeL_for_coreference_resolution": 2.514, + "eval_rougeL_for_data_to_text": 13.3361, + "eval_rougeL_for_dialogue_act_recognition": 2.5789, + "eval_rougeL_for_grammar_error_correction": 15.1877, + "eval_rougeL_for_keyword_tagging": 2.6454, + "eval_rougeL_for_overlap_extraction": 12.704, + "eval_rougeL_for_question_rewriting": 14.0633, + "eval_rougeL_for_task020_mctaco_answerability_classification": 1.4354, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 0.3721, + "eval_rougeL_for_task034_winogrande_question_rewriting": 8.4672, + "eval_rougeL_for_task035_winogrande_question_rewriting": 10.8789, + "eval_rougeL_for_task036_qasc_keyword_tagging": 7.327, + "eval_rougeL_for_task039_qasc_overlap_extraction": 1.0175, + "eval_rougeL_for_task050_multirc_answerability_classification": 1.2675, + "eval_rougeL_for_task102_commongen_data_to_text": 7.873, + "eval_rougeL_for_task1152_bard_word_analogy": 0.1767, + "eval_rougeL_for_task1153_bard_word_analogy": 0.0755, + "eval_rougeL_for_task1154_bard_word_analogy": 8.135, + "eval_rougeL_for_task1155_bard_word_analogy": 24.5006, + "eval_rougeL_for_task1156_bard_word_analogy": 8.0703, + "eval_rougeL_for_task1157_bard_word_analogy": 2.4818, + "eval_rougeL_for_task1158_bard_word_analogy": 0.7902, + "eval_rougeL_for_task1159_bard_word_analogy": 2.1392, + "eval_rougeL_for_task1161_coda_19_title_generation": 9.5137, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 25.2281, + "eval_rougeL_for_task121_zest_question_rewriting": 14.2364, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 2.5297, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 0.6124, + "eval_rougeL_for_task1344_rte_textual_entailment": 2.5416, + "eval_rougeL_for_task1345_qqp_question_rewriting": 10.188, + "eval_rougeL_for_task1356_xlsum_title_generation": 4.6355, + "eval_rougeL_for_task1358_xlsum_title_generation": 6.1761, + "eval_rougeL_for_task1385_anli_textual_entailment": 0.9432, + "eval_rougeL_for_task1386_anli_textual_entailment": 0.1043, + "eval_rougeL_for_task1387_anli_textual_entailment": 0.7436, + "eval_rougeL_for_task1388_cb_textual_entailment": 0.9166, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 1.7265, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 2.6648, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 2.7906, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 3.1067, + "eval_rougeL_for_task1407_dart_data_to_text": 7.655, + "eval_rougeL_for_task1409_dart_data_to_text": 10.4114, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 9.21, + "eval_rougeL_for_task1439_doqa_answerability_classification": 1.0537, + "eval_rougeL_for_task1442_doqa_answerability_classification": 1.0725, + "eval_rougeL_for_task1516_imppres_textual_entailment": 0.9711, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 1.4294, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 5.7874, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 0.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 1.5156, + "eval_rougeL_for_task1540_peer_read_title_generation": 6.372, + "eval_rougeL_for_task1554_scitail_textual_entailment": 47.0842, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 21.1654, + "eval_rougeL_for_task1562_zest_question_rewriting": 14.0035, + "eval_rougeL_for_task1586_scifact_title_generation": 11.6815, + "eval_rougeL_for_task1598_nyc_data_to_text": 18.2812, + "eval_rougeL_for_task1612_sick_textual_entailment": 1.7596, + "eval_rougeL_for_task1615_sick_textual_entailment": 24.4814, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 18.0899, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 1.2661, + "eval_rougeL_for_task1631_open_pi_data_to_text": 21.3898, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 2.9858, + "eval_rougeL_for_task1659_billsum_title_generation": 14.8259, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 8.5265, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 17.6929, + "eval_rougeL_for_task190_snli_textual_entailment": 1.2057, + "eval_rougeL_for_task199_multinli_textual_entailment": 1.5045, + "eval_rougeL_for_task200_multinli_textual_entailment": 1.7305, + "eval_rougeL_for_task201_multinli_textual_entailment": 1.8945, + "eval_rougeL_for_task202_multinli_textual_entailment": 1.8592, + "eval_rougeL_for_task219_rocstories_title_generation": 1.7285, + "eval_rougeL_for_task220_rocstories_title_generation": 2.2098, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 0.1805, + "eval_rougeL_for_task232_iirc_answerability_classification": 1.9358, + "eval_rougeL_for_task233_iirc_answerability_classification": 1.1629, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 3.9557, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 6.4693, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 24.3905, + "eval_rougeL_for_task288_gigaword_title_generation": 3.9291, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 27.8586, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 1.0895, + "eval_rougeL_for_task329_gap_coreference_resolution": 2.3353, + "eval_rougeL_for_task330_gap_coreference_resolution": 0.9175, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 1.1274, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 4.3023, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 2.4486, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 2.09, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 7.4503, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 1.2032, + "eval_rougeL_for_task402_grailqa_question_rewriting": 11.5793, + "eval_rougeL_for_task418_persent_title_generation": 5.9429, + "eval_rougeL_for_task442_com_qa_question_rewriting": 12.4914, + "eval_rougeL_for_task500_scruples_title_generation": 4.4259, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 7.353, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 1.1832, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 2.2485, + "eval_rougeL_for_task602_wikitext_title_generation": 0.5367, + "eval_rougeL_for_task613_liar_keyword_tagging": 0.4409, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 19.0061, + "eval_rougeL_for_task619_ohsumed_title_generation": 8.7481, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 2.3537, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 1.3049, + "eval_rougeL_for_task640_e_snli_textual_entailment": 2.9368, + "eval_rougeL_for_task641_e_snli_textual_entailment": 0.8137, + "eval_rougeL_for_task642_e_snli_textual_entailment": 1.1507, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 1.8003, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 3.2683, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 14.8847, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 14.6494, + "eval_rougeL_for_task677_ollie_data_to_text": 3.0981, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 0.735, + "eval_rougeL_for_task743_eurlex_title_generation": 12.3752, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 7.4735, + "eval_rougeL_for_task769_qed_title_generation": 8.2021, + "eval_rougeL_for_task827_copa_cause_effect_classification": 2.0163, + "eval_rougeL_for_task828_copa_cause_effect_classification": 1.1812, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 2.3492, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 0.9911, + "eval_rougeL_for_task890_gwsd_textual_entailment": 0.8974, + "eval_rougeL_for_task891_gap_coreference_resolution": 1.2565, + "eval_rougeL_for_task892_gap_coreference_resolution": 1.5302, + "eval_rougeL_for_task893_gap_coreference_resolution": 1.3061, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 1.4202, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 1.1342, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 1.6416, + "eval_rougeL_for_task957_e2e_data_to_text": 21.8114, + "eval_rougeL_for_task970_sherliic_textual_entailment": 0.3945, + "eval_rougeL_for_textual_entailment": 4.1789, + "eval_rougeL_for_title_generation": 6.2461, + "eval_rougeL_for_word_analogy": 5.7962, + "eval_runtime": 1049.3339, + "eval_samples_per_second": 11.35, + "eval_steps_per_second": 0.355, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 3.144, + "step": 50 + }, + { + "epoch": 0.01, + "eval_exact_match": 21.0915, + "eval_exact_match_for_answerability_classification": 44.9231, + "eval_exact_match_for_cause_effect_classification": 36.7143, + "eval_exact_match_for_coreference_resolution": 19.7857, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 33.7143, + "eval_exact_match_for_grammar_error_correction": 1.5, + "eval_exact_match_for_keyword_tagging": 9.8, + "eval_exact_match_for_overlap_extraction": 1.0, + "eval_exact_match_for_question_rewriting": 0.6364, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 4.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 0.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 2.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 7.0, + "eval_exact_match_for_task1153_bard_word_analogy": 2.0, + "eval_exact_match_for_task1154_bard_word_analogy": 7.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 4.0, + "eval_exact_match_for_task1157_bard_word_analogy": 15.0, + "eval_exact_match_for_task1158_bard_word_analogy": 17.0, + "eval_exact_match_for_task1159_bard_word_analogy": 7.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 0.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 52.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 57.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 41.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 52.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 34.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 49.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 3.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 45.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 4.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 25.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 1.0, + "eval_exact_match_for_task220_rocstories_title_generation": 49.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 38.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 23.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 52.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 1.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 2.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 0.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 0.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 56.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 0.0, + "eval_exact_match_for_task613_liar_keyword_tagging": 0.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 0.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 47.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 0.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 0.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 1.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 55.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 4.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 31.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 28.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 39.0417, + "eval_exact_match_for_title_generation": 2.8587, + "eval_exact_match_for_word_analogy": 13.625, + "eval_f1": 29.0381, + "eval_f1_for_answerability_classification": 47.4635, + "eval_f1_for_cause_effect_classification": 53.0327, + "eval_f1_for_coreference_resolution": 28.3449, + "eval_f1_for_data_to_text": 21.2943, + "eval_f1_for_dialogue_act_recognition": 37.381, + "eval_f1_for_grammar_error_correction": 14.1364, + "eval_f1_for_keyword_tagging": 11.6585, + "eval_f1_for_overlap_extraction": 18.6728, + "eval_f1_for_question_rewriting": 26.4903, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 4.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 14.8009, + "eval_f1_for_task035_winogrande_question_rewriting": 24.9841, + "eval_f1_for_task036_qasc_keyword_tagging": 1.4232, + "eval_f1_for_task039_qasc_overlap_extraction": 4.7821, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 9.7669, + "eval_f1_for_task1152_bard_word_analogy": 7.0, + "eval_f1_for_task1153_bard_word_analogy": 2.0, + "eval_f1_for_task1154_bard_word_analogy": 7.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 6.8527, + "eval_f1_for_task1157_bard_word_analogy": 15.0, + "eval_f1_for_task1158_bard_word_analogy": 17.0, + "eval_f1_for_task1159_bard_word_analogy": 7.0, + "eval_f1_for_task1161_coda_19_title_generation": 5.2936, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 16.0324, + "eval_f1_for_task121_zest_question_rewriting": 19.6601, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 1.7529, + "eval_f1_for_task1344_rte_textual_entailment": 0.1, + "eval_f1_for_task1345_qqp_question_rewriting": 24.879, + "eval_f1_for_task1356_xlsum_title_generation": 4.5113, + "eval_f1_for_task1358_xlsum_title_generation": 7.5311, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 52.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 57.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 19.4187, + "eval_f1_for_task1409_dart_data_to_text": 23.5886, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0977, + "eval_f1_for_task1439_doqa_answerability_classification": 41.0, + "eval_f1_for_task1442_doqa_answerability_classification": 52.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.6667, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 34.0, + "eval_f1_for_task1540_peer_read_title_generation": 2.4544, + "eval_f1_for_task1554_scitail_textual_entailment": 49.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 28.1752, + "eval_f1_for_task1562_zest_question_rewriting": 24.7142, + "eval_f1_for_task1586_scifact_title_generation": 6.668, + "eval_f1_for_task1598_nyc_data_to_text": 25.7043, + "eval_f1_for_task1612_sick_textual_entailment": 34.0, + "eval_f1_for_task1615_sick_textual_entailment": 34.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 36.295, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 53.1, + "eval_f1_for_task1631_open_pi_data_to_text": 49.4642, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 45.0, + "eval_f1_for_task1659_billsum_title_generation": 18.9689, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 49.6667, + "eval_f1_for_task1728_web_nlg_data_to_text": 14.0923, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 33.4124, + "eval_f1_for_task201_multinli_textual_entailment": 26.0278, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 5.4171, + "eval_f1_for_task220_rocstories_title_generation": 49.7653, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0714, + "eval_f1_for_task232_iirc_answerability_classification": 0.2627, + "eval_f1_for_task233_iirc_answerability_classification": 38.5914, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 47.9189, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 32.5635, + "eval_f1_for_task288_gigaword_title_generation": 3.9736, + "eval_f1_for_task290_tellmewhy_answerability_classification": 84.0, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 45.0, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 0.4183, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 24.1866, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 0.0351, + "eval_f1_for_task402_grailqa_question_rewriting": 34.9183, + "eval_f1_for_task418_persent_title_generation": 2.0986, + "eval_f1_for_task442_com_qa_question_rewriting": 40.323, + "eval_f1_for_task500_scruples_title_generation": 6.068, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 21.3539, + "eval_f1_for_task520_aquamuse_answerability_classification": 56.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 2.5785, + "eval_f1_for_task602_wikitext_title_generation": 2.7603, + "eval_f1_for_task613_liar_keyword_tagging": 0.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 23.3753, + "eval_f1_for_task619_ohsumed_title_generation": 2.7882, + "eval_f1_for_task620_ohsumed_keyword_tagging": 0.0, + "eval_f1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 47.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 7.8695, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 0.0, + "eval_f1_for_task670_ambigqa_question_rewriting": 29.4094, + "eval_f1_for_task671_ambigqa_question_rewriting": 25.3774, + "eval_f1_for_task677_ollie_data_to_text": 4.3695, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 17.7351, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.4469, + "eval_f1_for_task769_qed_title_generation": 8.8213, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 55.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 8.1223, + "eval_f1_for_task892_gap_coreference_resolution": 31.0, + "eval_f1_for_task893_gap_coreference_resolution": 28.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_f1_for_task957_e2e_data_to_text": 28.59, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 39.1058, + "eval_f1_for_title_generation": 9.5347, + "eval_f1_for_word_analogy": 13.9816, + "eval_gen_len": 15.5877, + "eval_global_step": 50, + "eval_loss": 2.6694912910461426, + "eval_rouge1": 30.0338, + "eval_rouge1_for_answerability_classification": 47.4722, + "eval_rouge1_for_cause_effect_classification": 53.4446, + "eval_rouge1_for_coreference_resolution": 28.3777, + "eval_rouge1_for_data_to_text": 26.1398, + "eval_rouge1_for_dialogue_act_recognition": 39.2857, + "eval_rouge1_for_grammar_error_correction": 14.743, + "eval_rouge1_for_keyword_tagging": 12.1807, + "eval_rouge1_for_overlap_extraction": 20.267, + "eval_rouge1_for_question_rewriting": 27.0988, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 4.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 14.9053, + "eval_rouge1_for_task035_winogrande_question_rewriting": 25.816, + "eval_rouge1_for_task036_qasc_keyword_tagging": 1.5205, + "eval_rouge1_for_task039_qasc_overlap_extraction": 6.669, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 14.7809, + "eval_rouge1_for_task1152_bard_word_analogy": 7.0, + "eval_rouge1_for_task1153_bard_word_analogy": 2.0, + "eval_rouge1_for_task1154_bard_word_analogy": 7.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 6.8527, + "eval_rouge1_for_task1157_bard_word_analogy": 15.0, + "eval_rouge1_for_task1158_bard_word_analogy": 17.0, + "eval_rouge1_for_task1159_bard_word_analogy": 7.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 5.1501, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 16.1342, + "eval_rouge1_for_task121_zest_question_rewriting": 19.9112, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 1.7238, + "eval_rouge1_for_task1344_rte_textual_entailment": 0.1, + "eval_rouge1_for_task1345_qqp_question_rewriting": 26.726, + "eval_rouge1_for_task1356_xlsum_title_generation": 5.5056, + "eval_rouge1_for_task1358_xlsum_title_generation": 8.0543, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 52.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 57.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 20.1221, + "eval_rouge1_for_task1409_dart_data_to_text": 24.5163, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.222, + "eval_rouge1_for_task1439_doqa_answerability_classification": 41.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 52.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.6667, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 34.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 2.7421, + "eval_rouge1_for_task1554_scitail_textual_entailment": 49.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 29.2639, + "eval_rouge1_for_task1562_zest_question_rewriting": 24.747, + "eval_rouge1_for_task1586_scifact_title_generation": 6.6241, + "eval_rouge1_for_task1598_nyc_data_to_text": 27.8745, + "eval_rouge1_for_task1612_sick_textual_entailment": 34.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 36.3602, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 53.1, + "eval_rouge1_for_task1631_open_pi_data_to_text": 50.0993, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 45.0, + "eval_rouge1_for_task1659_billsum_title_generation": 18.9923, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 49.6667, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 38.1366, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 33.4006, + "eval_rouge1_for_task201_multinli_textual_entailment": 26.0041, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 5.8896, + "eval_rouge1_for_task220_rocstories_title_generation": 49.763, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.069, + "eval_rouge1_for_task232_iirc_answerability_classification": 0.3887, + "eval_rouge1_for_task233_iirc_answerability_classification": 38.5809, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 48.3891, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 33.8651, + "eval_rouge1_for_task288_gigaword_title_generation": 4.1787, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 84.0, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 44.9, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 0.4456, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 24.0553, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 0.0351, + "eval_rouge1_for_task402_grailqa_question_rewriting": 35.5884, + "eval_rouge1_for_task418_persent_title_generation": 2.2322, + "eval_rouge1_for_task442_com_qa_question_rewriting": 42.7044, + "eval_rouge1_for_task500_scruples_title_generation": 6.8876, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 21.4593, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 56.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 2.7338, + "eval_rouge1_for_task602_wikitext_title_generation": 3.1134, + "eval_rouge1_for_task613_liar_keyword_tagging": 2.0, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 26.39, + "eval_rouge1_for_task619_ohsumed_title_generation": 2.821, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 0.0, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 47.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 8.3828, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 0.0, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 29.5914, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 25.6025, + "eval_rouge1_for_task677_ollie_data_to_text": 4.5105, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 18.012, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.4944, + "eval_rouge1_for_task769_qed_title_generation": 8.8234, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 55.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 8.1851, + "eval_rouge1_for_task892_gap_coreference_resolution": 31.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 28.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rouge1_for_task957_e2e_data_to_text": 34.9656, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 40.9377, + "eval_rouge1_for_title_generation": 9.765, + "eval_rouge1_for_word_analogy": 13.9816, + "eval_rougeL": 29.4105, + "eval_rougeL_for_answerability_classification": 47.4722, + "eval_rougeL_for_cause_effect_classification": 52.7729, + "eval_rougeL_for_coreference_resolution": 28.3777, + "eval_rougeL_for_data_to_text": 21.9704, + "eval_rougeL_for_dialogue_act_recognition": 39.2857, + "eval_rougeL_for_grammar_error_correction": 14.1047, + "eval_rougeL_for_keyword_tagging": 12.1807, + "eval_rougeL_for_overlap_extraction": 19.3051, + "eval_rougeL_for_question_rewriting": 25.167, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 4.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 14.1206, + "eval_rougeL_for_task035_winogrande_question_rewriting": 21.9978, + "eval_rougeL_for_task036_qasc_keyword_tagging": 1.5205, + "eval_rougeL_for_task039_qasc_overlap_extraction": 6.669, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 13.1676, + "eval_rougeL_for_task1152_bard_word_analogy": 7.0, + "eval_rougeL_for_task1153_bard_word_analogy": 2.0, + "eval_rougeL_for_task1154_bard_word_analogy": 7.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 6.8527, + "eval_rougeL_for_task1157_bard_word_analogy": 15.0, + "eval_rougeL_for_task1158_bard_word_analogy": 17.0, + "eval_rougeL_for_task1159_bard_word_analogy": 7.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 4.2182, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 16.1342, + "eval_rougeL_for_task121_zest_question_rewriting": 17.549, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 1.7238, + "eval_rougeL_for_task1344_rte_textual_entailment": 0.1, + "eval_rougeL_for_task1345_qqp_question_rewriting": 25.1943, + "eval_rougeL_for_task1356_xlsum_title_generation": 4.5296, + "eval_rougeL_for_task1358_xlsum_title_generation": 7.6731, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 52.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 57.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 16.3876, + "eval_rougeL_for_task1409_dart_data_to_text": 21.0362, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.222, + "eval_rougeL_for_task1439_doqa_answerability_classification": 41.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 52.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.6667, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 34.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 2.6171, + "eval_rougeL_for_task1554_scitail_textual_entailment": 49.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 27.9873, + "eval_rougeL_for_task1562_zest_question_rewriting": 22.6542, + "eval_rougeL_for_task1586_scifact_title_generation": 5.7026, + "eval_rougeL_for_task1598_nyc_data_to_text": 21.9778, + "eval_rougeL_for_task1612_sick_textual_entailment": 34.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 35.6315, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 53.1, + "eval_rougeL_for_task1631_open_pi_data_to_text": 44.4556, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 45.0, + "eval_rougeL_for_task1659_billsum_title_generation": 16.9839, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 49.6667, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 34.0953, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 33.4006, + "eval_rougeL_for_task201_multinli_textual_entailment": 26.0041, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 5.837, + "eval_rougeL_for_task220_rocstories_title_generation": 49.763, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.069, + "eval_rougeL_for_task232_iirc_answerability_classification": 0.3887, + "eval_rougeL_for_task233_iirc_answerability_classification": 38.5809, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 48.3891, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 31.9412, + "eval_rougeL_for_task288_gigaword_title_generation": 3.6095, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 84.0, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 44.9, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 0.4456, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 22.1458, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 0.0351, + "eval_rougeL_for_task402_grailqa_question_rewriting": 30.7822, + "eval_rougeL_for_task418_persent_title_generation": 2.0129, + "eval_rougeL_for_task442_com_qa_question_rewriting": 38.8622, + "eval_rougeL_for_task500_scruples_title_generation": 5.5954, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 21.1907, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 56.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 2.5671, + "eval_rougeL_for_task602_wikitext_title_generation": 3.029, + "eval_rougeL_for_task613_liar_keyword_tagging": 2.0, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 23.5978, + "eval_rougeL_for_task619_ohsumed_title_generation": 2.7247, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 0.0, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 47.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 8.3828, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 0.0, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 28.8752, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 25.036, + "eval_rougeL_for_task677_ollie_data_to_text": 4.408, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 15.877, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.3228, + "eval_rougeL_for_task769_qed_title_generation": 8.3876, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 55.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 8.1851, + "eval_rougeL_for_task892_gap_coreference_resolution": 31.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 28.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rougeL_for_task957_e2e_data_to_text": 25.0835, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 40.9377, + "eval_rougeL_for_title_generation": 9.168, + "eval_rougeL_for_word_analogy": 13.9816, + "eval_runtime": 446.3623, + "eval_samples_per_second": 26.682, + "eval_steps_per_second": 0.836, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 2.5342, + "step": 100 + }, + { + "epoch": 0.02, + "eval_exact_match": 21.8304, + "eval_exact_match_for_answerability_classification": 46.1538, + "eval_exact_match_for_cause_effect_classification": 36.8571, + "eval_exact_match_for_coreference_resolution": 23.1429, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 28.1429, + "eval_exact_match_for_grammar_error_correction": 1.5, + "eval_exact_match_for_keyword_tagging": 13.0, + "eval_exact_match_for_overlap_extraction": 6.5, + "eval_exact_match_for_question_rewriting": 0.8182, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 5.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 0.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 13.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 7.0, + "eval_exact_match_for_task1153_bard_word_analogy": 2.0, + "eval_exact_match_for_task1154_bard_word_analogy": 13.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 11.0, + "eval_exact_match_for_task1157_bard_word_analogy": 28.0, + "eval_exact_match_for_task1158_bard_word_analogy": 13.0, + "eval_exact_match_for_task1159_bard_word_analogy": 5.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 0.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 42.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 43.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 54.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 40.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 57.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 23.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 21.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 48.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 3.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 5.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 16.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 32.0, + "eval_exact_match_for_task219_rocstories_title_generation": 3.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 1.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 46.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 27.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 37.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 4.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 2.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 0.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 0.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 55.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 0.0, + "eval_exact_match_for_task613_liar_keyword_tagging": 1.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 0.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 30.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 48.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 15.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 0.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 17.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 54.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 81.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 24.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 34.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 38.8333, + "eval_exact_match_for_title_generation": 3.9238, + "eval_exact_match_for_word_analogy": 16.125, + "eval_f1": 31.3254, + "eval_f1_for_answerability_classification": 48.7179, + "eval_f1_for_cause_effect_classification": 52.869, + "eval_f1_for_coreference_resolution": 31.7291, + "eval_f1_for_data_to_text": 22.8563, + "eval_f1_for_dialogue_act_recognition": 37.6667, + "eval_f1_for_grammar_error_correction": 19.1038, + "eval_f1_for_keyword_tagging": 18.1473, + "eval_f1_for_overlap_extraction": 26.1581, + "eval_f1_for_question_rewriting": 32.487, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 5.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 32.8422, + "eval_f1_for_task035_winogrande_question_rewriting": 14.8628, + "eval_f1_for_task036_qasc_keyword_tagging": 3.3333, + "eval_f1_for_task039_qasc_overlap_extraction": 19.343, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 12.4999, + "eval_f1_for_task1152_bard_word_analogy": 7.0, + "eval_f1_for_task1153_bard_word_analogy": 2.0, + "eval_f1_for_task1154_bard_word_analogy": 13.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 12.1828, + "eval_f1_for_task1157_bard_word_analogy": 28.0, + "eval_f1_for_task1158_bard_word_analogy": 13.0, + "eval_f1_for_task1159_bard_word_analogy": 5.0, + "eval_f1_for_task1161_coda_19_title_generation": 10.155, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 26.2619, + "eval_f1_for_task121_zest_question_rewriting": 21.6192, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 0.9282, + "eval_f1_for_task1344_rte_textual_entailment": 0.0, + "eval_f1_for_task1345_qqp_question_rewriting": 27.4489, + "eval_f1_for_task1356_xlsum_title_generation": 5.4184, + "eval_f1_for_task1358_xlsum_title_generation": 8.4882, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 33.0, + "eval_f1_for_task1387_anli_textual_entailment": 34.0, + "eval_f1_for_task1388_cb_textual_entailment": 42.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 43.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 54.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 18.8304, + "eval_f1_for_task1409_dart_data_to_text": 21.6454, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 3.2079, + "eval_f1_for_task1439_doqa_answerability_classification": 40.0, + "eval_f1_for_task1442_doqa_answerability_classification": 57.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 23.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 21.0, + "eval_f1_for_task1540_peer_read_title_generation": 2.6659, + "eval_f1_for_task1554_scitail_textual_entailment": 48.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 34.9996, + "eval_f1_for_task1562_zest_question_rewriting": 28.0892, + "eval_f1_for_task1586_scifact_title_generation": 12.2822, + "eval_f1_for_task1598_nyc_data_to_text": 25.7043, + "eval_f1_for_task1612_sick_textual_entailment": 34.0, + "eval_f1_for_task1615_sick_textual_entailment": 34.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 49.1403, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 55.4805, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_f1_for_task1659_billsum_title_generation": 22.1315, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 59.661, + "eval_f1_for_task1728_web_nlg_data_to_text": 15.679, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 33.0, + "eval_f1_for_task201_multinli_textual_entailment": 33.0, + "eval_f1_for_task202_multinli_textual_entailment": 32.0, + "eval_f1_for_task219_rocstories_title_generation": 9.8065, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 1.0, + "eval_f1_for_task233_iirc_answerability_classification": 46.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 48.2553, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 32.9733, + "eval_f1_for_task288_gigaword_title_generation": 13.1645, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 44.5, + "eval_f1_for_task329_gap_coreference_resolution": 37.0, + "eval_f1_for_task330_gap_coreference_resolution": 6.8333, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 66.6667, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 26.6294, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 1.3333, + "eval_f1_for_task402_grailqa_question_rewriting": 46.1394, + "eval_f1_for_task418_persent_title_generation": 4.2157, + "eval_f1_for_task442_com_qa_question_rewriting": 46.2116, + "eval_f1_for_task500_scruples_title_generation": 11.2299, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 24.4288, + "eval_f1_for_task520_aquamuse_answerability_classification": 55.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 2.9512, + "eval_f1_for_task602_wikitext_title_generation": 0.0644, + "eval_f1_for_task613_liar_keyword_tagging": 1.2679, + "eval_f1_for_task614_glucose_cause_effect_classification": 18.787, + "eval_f1_for_task619_ohsumed_title_generation": 8.6515, + "eval_f1_for_task620_ohsumed_keyword_tagging": 0.0, + "eval_f1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 30.0, + "eval_f1_for_task642_e_snli_textual_entailment": 48.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 37.1355, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 0.5, + "eval_f1_for_task670_ambigqa_question_rewriting": 33.7906, + "eval_f1_for_task671_ambigqa_question_rewriting": 30.9514, + "eval_f1_for_task677_ollie_data_to_text": 7.1112, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 17.1509, + "eval_f1_for_task760_msr_sqa_data_to_text": 2.3102, + "eval_f1_for_task769_qed_title_generation": 26.2446, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 54.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 81.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 29.4575, + "eval_f1_for_task892_gap_coreference_resolution": 35.0, + "eval_f1_for_task893_gap_coreference_resolution": 34.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 31.2417, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 38.8333, + "eval_f1_for_title_generation": 12.8905, + "eval_f1_for_word_analogy": 16.2728, + "eval_gen_len": 9.6481, + "eval_global_step": 100, + "eval_loss": 2.099561929702759, + "eval_rouge1": 32.4691, + "eval_rouge1_for_answerability_classification": 48.7179, + "eval_rouge1_for_cause_effect_classification": 53.1227, + "eval_rouge1_for_coreference_resolution": 31.7928, + "eval_rouge1_for_data_to_text": 28.4572, + "eval_rouge1_for_dialogue_act_recognition": 39.5714, + "eval_rouge1_for_grammar_error_correction": 20.5408, + "eval_rouge1_for_keyword_tagging": 18.6144, + "eval_rouge1_for_overlap_extraction": 28.8232, + "eval_rouge1_for_question_rewriting": 33.3781, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 6.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 33.2244, + "eval_rouge1_for_task035_winogrande_question_rewriting": 15.3897, + "eval_rouge1_for_task036_qasc_keyword_tagging": 3.2857, + "eval_rouge1_for_task039_qasc_overlap_extraction": 23.8423, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 23.3552, + "eval_rouge1_for_task1152_bard_word_analogy": 7.0, + "eval_rouge1_for_task1153_bard_word_analogy": 2.0, + "eval_rouge1_for_task1154_bard_word_analogy": 13.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 12.1828, + "eval_rouge1_for_task1157_bard_word_analogy": 28.0, + "eval_rouge1_for_task1158_bard_word_analogy": 13.0, + "eval_rouge1_for_task1159_bard_word_analogy": 5.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 10.5831, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 27.0024, + "eval_rouge1_for_task121_zest_question_rewriting": 21.9573, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 0.8992, + "eval_rouge1_for_task1344_rte_textual_entailment": 0.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 29.7767, + "eval_rouge1_for_task1356_xlsum_title_generation": 6.3828, + "eval_rouge1_for_task1358_xlsum_title_generation": 9.6278, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 42.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 43.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 54.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 18.5992, + "eval_rouge1_for_task1409_dart_data_to_text": 21.8651, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 3.6737, + "eval_rouge1_for_task1439_doqa_answerability_classification": 40.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 57.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 23.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 21.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 2.9317, + "eval_rouge1_for_task1554_scitail_textual_entailment": 48.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 37.4079, + "eval_rouge1_for_task1562_zest_question_rewriting": 28.5288, + "eval_rouge1_for_task1586_scifact_title_generation": 14.1888, + "eval_rouge1_for_task1598_nyc_data_to_text": 27.8745, + "eval_rouge1_for_task1612_sick_textual_entailment": 34.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 49.2251, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 56.2712, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_rouge1_for_task1659_billsum_title_generation": 22.3129, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 59.661, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 43.5998, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 32.0, + "eval_rouge1_for_task219_rocstories_title_generation": 10.8843, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 1.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 46.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 48.2205, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 33.804, + "eval_rouge1_for_task288_gigaword_title_generation": 15.0237, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 44.4, + "eval_rouge1_for_task329_gap_coreference_resolution": 37.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 6.8333, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 66.6667, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 26.8063, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 1.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 47.4132, + "eval_rouge1_for_task418_persent_title_generation": 4.3762, + "eval_rouge1_for_task442_com_qa_question_rewriting": 48.8908, + "eval_rouge1_for_task500_scruples_title_generation": 12.2555, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 24.5179, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 55.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 3.1247, + "eval_rouge1_for_task602_wikitext_title_generation": 0.0644, + "eval_rouge1_for_task613_liar_keyword_tagging": 3.2679, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 20.3863, + "eval_rouge1_for_task619_ohsumed_title_generation": 9.0097, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 0.0, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 30.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 48.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 37.5187, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 0.5, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 34.1136, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 31.6368, + "eval_rouge1_for_task677_ollie_data_to_text": 7.754, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 17.1889, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 2.4087, + "eval_rouge1_for_task769_qed_title_generation": 26.2201, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 54.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 81.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 29.4844, + "eval_rouge1_for_task892_gap_coreference_resolution": 35.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 34.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 35.1114, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 40.6667, + "eval_rouge1_for_title_generation": 13.4294, + "eval_rouge1_for_word_analogy": 16.2728, + "eval_rougeL": 31.7789, + "eval_rougeL_for_answerability_classification": 48.7179, + "eval_rougeL_for_cause_effect_classification": 52.6129, + "eval_rougeL_for_coreference_resolution": 31.7928, + "eval_rougeL_for_data_to_text": 23.9413, + "eval_rougeL_for_dialogue_act_recognition": 39.5714, + "eval_rougeL_for_grammar_error_correction": 19.792, + "eval_rougeL_for_keyword_tagging": 18.5991, + "eval_rougeL_for_overlap_extraction": 28.104, + "eval_rougeL_for_question_rewriting": 31.4104, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 6.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 32.2814, + "eval_rougeL_for_task035_winogrande_question_rewriting": 15.2583, + "eval_rougeL_for_task036_qasc_keyword_tagging": 3.2857, + "eval_rougeL_for_task039_qasc_overlap_extraction": 23.8423, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 19.9766, + "eval_rougeL_for_task1152_bard_word_analogy": 7.0, + "eval_rougeL_for_task1153_bard_word_analogy": 2.0, + "eval_rougeL_for_task1154_bard_word_analogy": 13.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 12.1828, + "eval_rougeL_for_task1157_bard_word_analogy": 28.0, + "eval_rougeL_for_task1158_bard_word_analogy": 13.0, + "eval_rougeL_for_task1159_bard_word_analogy": 5.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 9.443, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 26.9154, + "eval_rougeL_for_task121_zest_question_rewriting": 19.5951, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 0.8992, + "eval_rougeL_for_task1344_rte_textual_entailment": 0.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 28.1258, + "eval_rougeL_for_task1356_xlsum_title_generation": 5.5776, + "eval_rougeL_for_task1358_xlsum_title_generation": 8.1045, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 42.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 43.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 54.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 16.1804, + "eval_rougeL_for_task1409_dart_data_to_text": 18.8355, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 3.5585, + "eval_rougeL_for_task1439_doqa_answerability_classification": 40.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 57.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 23.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 21.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 2.8067, + "eval_rougeL_for_task1554_scitail_textual_entailment": 48.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 36.0254, + "eval_rougeL_for_task1562_zest_question_rewriting": 25.7975, + "eval_rougeL_for_task1586_scifact_title_generation": 11.9065, + "eval_rougeL_for_task1598_nyc_data_to_text": 21.9778, + "eval_rougeL_for_task1612_sick_textual_entailment": 34.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 48.2527, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 48.4454, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_rougeL_for_task1659_billsum_title_generation": 19.9418, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 59.661, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 38.8203, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 32.0, + "eval_rougeL_for_task219_rocstories_title_generation": 10.5986, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 1.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 46.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 48.2205, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 32.3656, + "eval_rougeL_for_task288_gigaword_title_generation": 12.4399, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 44.4, + "eval_rougeL_for_task329_gap_coreference_resolution": 37.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 6.8333, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 66.6667, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 25.2812, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 1.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 40.0497, + "eval_rougeL_for_task418_persent_title_generation": 4.1567, + "eval_rougeL_for_task442_com_qa_question_rewriting": 44.7182, + "eval_rougeL_for_task500_scruples_title_generation": 9.7882, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 24.2493, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 55.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 2.958, + "eval_rougeL_for_task602_wikitext_title_generation": 0.0644, + "eval_rougeL_for_task613_liar_keyword_tagging": 3.2679, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 18.3425, + "eval_rougeL_for_task619_ohsumed_title_generation": 8.5887, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 0.0, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 30.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 48.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 37.4417, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 0.5, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 33.3879, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 31.1328, + "eval_rougeL_for_task677_ollie_data_to_text": 7.754, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 15.407, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 1.8169, + "eval_rougeL_for_task769_qed_title_generation": 25.9806, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 54.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 81.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 29.4844, + "eval_rougeL_for_task892_gap_coreference_resolution": 35.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 34.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 25.293, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 40.6667, + "eval_rougeL_for_title_generation": 12.4944, + "eval_rougeL_for_word_analogy": 16.2728, + "eval_runtime": 366.3589, + "eval_samples_per_second": 32.509, + "eval_steps_per_second": 1.018, + "step": 100 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 2.3879, + "step": 200 + }, + { + "epoch": 0.05, + "eval_exact_match": 22.9135, + "eval_exact_match_for_answerability_classification": 45.5385, + "eval_exact_match_for_cause_effect_classification": 35.4286, + "eval_exact_match_for_coreference_resolution": 25.0, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 35.7143, + "eval_exact_match_for_grammar_error_correction": 2.0, + "eval_exact_match_for_keyword_tagging": 19.2, + "eval_exact_match_for_overlap_extraction": 7.0, + "eval_exact_match_for_question_rewriting": 0.8182, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 7.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 0.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 14.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 49.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 6.0, + "eval_exact_match_for_task1153_bard_word_analogy": 2.0, + "eval_exact_match_for_task1154_bard_word_analogy": 13.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 21.0, + "eval_exact_match_for_task1157_bard_word_analogy": 20.0, + "eval_exact_match_for_task1158_bard_word_analogy": 12.0, + "eval_exact_match_for_task1159_bard_word_analogy": 5.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 30.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 50.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 48.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 55.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 48.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 42.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 52.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 46.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 47.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 4.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 32.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 5.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 16.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 30.0, + "eval_exact_match_for_task219_rocstories_title_generation": 5.0, + "eval_exact_match_for_task220_rocstories_title_generation": 46.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 1.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 47.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 29.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 14.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 1.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 0.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 52.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 0.0, + "eval_exact_match_for_task613_liar_keyword_tagging": 2.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 1.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 51.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 44.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 0.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 31.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 83.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 35.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 34.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 40.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 24.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 39.7917, + "eval_exact_match_for_title_generation": 4.5964, + "eval_exact_match_for_word_analogy": 16.125, + "eval_f1": 33.4571, + "eval_f1_for_answerability_classification": 48.1026, + "eval_f1_for_cause_effect_classification": 51.9593, + "eval_f1_for_coreference_resolution": 33.8268, + "eval_f1_for_data_to_text": 20.9623, + "eval_f1_for_dialogue_act_recognition": 45.2381, + "eval_f1_for_grammar_error_correction": 25.1481, + "eval_f1_for_keyword_tagging": 26.4092, + "eval_f1_for_overlap_extraction": 27.0146, + "eval_f1_for_question_rewriting": 41.2031, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 7.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 26.8739, + "eval_f1_for_task035_winogrande_question_rewriting": 27.6404, + "eval_f1_for_task036_qasc_keyword_tagging": 15.2966, + "eval_f1_for_task039_qasc_overlap_extraction": 18.4437, + "eval_f1_for_task050_multirc_answerability_classification": 49.0, + "eval_f1_for_task102_commongen_data_to_text": 6.4116, + "eval_f1_for_task1152_bard_word_analogy": 6.0, + "eval_f1_for_task1153_bard_word_analogy": 2.6667, + "eval_f1_for_task1154_bard_word_analogy": 13.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 21.0, + "eval_f1_for_task1157_bard_word_analogy": 20.0, + "eval_f1_for_task1158_bard_word_analogy": 12.0, + "eval_f1_for_task1159_bard_word_analogy": 5.0, + "eval_f1_for_task1161_coda_19_title_generation": 12.3034, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 43.7886, + "eval_f1_for_task121_zest_question_rewriting": 31.9796, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 0.9282, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 26.7593, + "eval_f1_for_task1356_xlsum_title_generation": 4.9161, + "eval_f1_for_task1358_xlsum_title_generation": 13.5169, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 33.0, + "eval_f1_for_task1387_anli_textual_entailment": 30.0, + "eval_f1_for_task1388_cb_textual_entailment": 50.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 48.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 55.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 48.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 14.4196, + "eval_f1_for_task1409_dart_data_to_text": 21.7573, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 2.9709, + "eval_f1_for_task1439_doqa_answerability_classification": 42.0, + "eval_f1_for_task1442_doqa_answerability_classification": 52.0, + "eval_f1_for_task1516_imppres_textual_entailment": 0.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 46.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1540_peer_read_title_generation": 4.3218, + "eval_f1_for_task1554_scitail_textual_entailment": 47.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 47.3253, + "eval_f1_for_task1562_zest_question_rewriting": 28.2014, + "eval_f1_for_task1586_scifact_title_generation": 17.0615, + "eval_f1_for_task1598_nyc_data_to_text": 25.7043, + "eval_f1_for_task1612_sick_textual_entailment": 32.0, + "eval_f1_for_task1615_sick_textual_entailment": 34.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 56.3489, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_f1_for_task1631_open_pi_data_to_text": 52.7481, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_f1_for_task1659_billsum_title_generation": 23.0862, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 57.9166, + "eval_f1_for_task1728_web_nlg_data_to_text": 13.0916, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 33.0, + "eval_f1_for_task201_multinli_textual_entailment": 33.0, + "eval_f1_for_task202_multinli_textual_entailment": 30.0, + "eval_f1_for_task219_rocstories_title_generation": 6.7174, + "eval_f1_for_task220_rocstories_title_generation": 46.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 1.0, + "eval_f1_for_task233_iirc_answerability_classification": 47.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 51.2999, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 35.5856, + "eval_f1_for_task288_gigaword_title_generation": 15.3522, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 45.0, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 15.8333, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 66.6667, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 26.1046, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 1.3333, + "eval_f1_for_task402_grailqa_question_rewriting": 50.9603, + "eval_f1_for_task418_persent_title_generation": 4.5837, + "eval_f1_for_task442_com_qa_question_rewriting": 52.3193, + "eval_f1_for_task500_scruples_title_generation": 11.3842, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 24.4288, + "eval_f1_for_task520_aquamuse_answerability_classification": 52.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 3.2586, + "eval_f1_for_task602_wikitext_title_generation": 1.1452, + "eval_f1_for_task613_liar_keyword_tagging": 2.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 22.9435, + "eval_f1_for_task619_ohsumed_title_generation": 13.7559, + "eval_f1_for_task620_ohsumed_keyword_tagging": 2.4168, + "eval_f1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 51.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 63.3325, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 3.5, + "eval_f1_for_task670_ambigqa_question_rewriting": 57.0647, + "eval_f1_for_task671_ambigqa_question_rewriting": 51.2981, + "eval_f1_for_task677_ollie_data_to_text": 4.0233, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 17.0287, + "eval_f1_for_task760_msr_sqa_data_to_text": 1.2809, + "eval_f1_for_task769_qed_title_generation": 38.4227, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 83.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 35.0, + "eval_f1_for_task891_gap_coreference_resolution": 41.0248, + "eval_f1_for_task892_gap_coreference_resolution": 40.0, + "eval_f1_for_task893_gap_coreference_resolution": 24.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 34.6598, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 39.7917, + "eval_f1_for_title_generation": 14.4635, + "eval_f1_for_word_analogy": 16.2083, + "eval_gen_len": 10.7522, + "eval_global_step": 200, + "eval_loss": 1.9628477096557617, + "eval_rouge1": 34.9511, + "eval_rouge1_for_answerability_classification": 48.1026, + "eval_rouge1_for_cause_effect_classification": 52.1485, + "eval_rouge1_for_coreference_resolution": 33.8486, + "eval_rouge1_for_data_to_text": 29.4624, + "eval_rouge1_for_dialogue_act_recognition": 47.1429, + "eval_rouge1_for_grammar_error_correction": 28.3983, + "eval_rouge1_for_keyword_tagging": 27.3556, + "eval_rouge1_for_overlap_extraction": 30.2903, + "eval_rouge1_for_question_rewriting": 42.5336, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 8.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 26.954, + "eval_rouge1_for_task035_winogrande_question_rewriting": 28.1731, + "eval_rouge1_for_task036_qasc_keyword_tagging": 15.5903, + "eval_rouge1_for_task039_qasc_overlap_extraction": 23.9031, + "eval_rouge1_for_task050_multirc_answerability_classification": 49.0, + "eval_rouge1_for_task102_commongen_data_to_text": 44.3781, + "eval_rouge1_for_task1152_bard_word_analogy": 6.0, + "eval_rouge1_for_task1153_bard_word_analogy": 2.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 13.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 21.0, + "eval_rouge1_for_task1157_bard_word_analogy": 20.0, + "eval_rouge1_for_task1158_bard_word_analogy": 12.0, + "eval_rouge1_for_task1159_bard_word_analogy": 5.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 13.4633, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 45.2421, + "eval_rouge1_for_task121_zest_question_rewriting": 32.6218, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 0.8992, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 28.9989, + "eval_rouge1_for_task1356_xlsum_title_generation": 5.8157, + "eval_rouge1_for_task1358_xlsum_title_generation": 15.0575, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 30.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 50.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 48.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 55.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 48.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 14.2087, + "eval_rouge1_for_task1409_dart_data_to_text": 21.87, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 6.2907, + "eval_rouge1_for_task1439_doqa_answerability_classification": 42.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 52.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 0.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 46.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 4.5796, + "eval_rouge1_for_task1554_scitail_textual_entailment": 47.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 50.5059, + "eval_rouge1_for_task1562_zest_question_rewriting": 28.884, + "eval_rouge1_for_task1586_scifact_title_generation": 19.5727, + "eval_rouge1_for_task1598_nyc_data_to_text": 27.8745, + "eval_rouge1_for_task1612_sick_textual_entailment": 32.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 56.9614, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 53.5312, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_rouge1_for_task1659_billsum_title_generation": 23.5761, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 57.9166, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 40.6691, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 30.0, + "eval_rouge1_for_task219_rocstories_title_generation": 9.7682, + "eval_rouge1_for_task220_rocstories_title_generation": 46.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 1.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 47.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 51.2651, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 36.6774, + "eval_rouge1_for_task288_gigaword_title_generation": 17.4951, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 44.9, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 15.8333, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 66.6667, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 26.3021, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 1.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 52.504, + "eval_rouge1_for_task418_persent_title_generation": 4.7704, + "eval_rouge1_for_task442_com_qa_question_rewriting": 56.3913, + "eval_rouge1_for_task500_scruples_title_generation": 12.4927, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 24.5179, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 52.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 3.5193, + "eval_rouge1_for_task602_wikitext_title_generation": 1.4583, + "eval_rouge1_for_task613_liar_keyword_tagging": 4.0, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 24.0709, + "eval_rouge1_for_task619_ohsumed_title_generation": 15.5042, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 3.6787, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 51.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 64.5091, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 3.2, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 58.5947, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 52.544, + "eval_rouge1_for_task677_ollie_data_to_text": 4.1195, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 17.2072, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 1.4814, + "eval_rouge1_for_task769_qed_title_generation": 38.9136, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 83.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 35.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 40.765, + "eval_rouge1_for_task892_gap_coreference_resolution": 40.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 24.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 36.3228, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 41.625, + "eval_rouge1_for_title_generation": 15.3799, + "eval_rouge1_for_word_analogy": 16.2083, + "eval_rougeL": 34.1198, + "eval_rougeL_for_answerability_classification": 48.1026, + "eval_rougeL_for_cause_effect_classification": 51.5659, + "eval_rougeL_for_coreference_resolution": 33.8486, + "eval_rougeL_for_data_to_text": 24.1197, + "eval_rougeL_for_dialogue_act_recognition": 47.1429, + "eval_rougeL_for_grammar_error_correction": 27.5602, + "eval_rougeL_for_keyword_tagging": 26.9397, + "eval_rougeL_for_overlap_extraction": 29.539, + "eval_rougeL_for_question_rewriting": 40.297, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 8.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 26.7674, + "eval_rougeL_for_task035_winogrande_question_rewriting": 28.0392, + "eval_rougeL_for_task036_qasc_keyword_tagging": 14.6148, + "eval_rougeL_for_task039_qasc_overlap_extraction": 23.9031, + "eval_rougeL_for_task050_multirc_answerability_classification": 49.0, + "eval_rougeL_for_task102_commongen_data_to_text": 33.6812, + "eval_rougeL_for_task1152_bard_word_analogy": 6.0, + "eval_rougeL_for_task1153_bard_word_analogy": 2.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 13.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 21.0, + "eval_rougeL_for_task1157_bard_word_analogy": 20.0, + "eval_rougeL_for_task1158_bard_word_analogy": 12.0, + "eval_rougeL_for_task1159_bard_word_analogy": 5.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 11.4775, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 44.6806, + "eval_rougeL_for_task121_zest_question_rewriting": 29.5979, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 0.8992, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 27.3169, + "eval_rougeL_for_task1356_xlsum_title_generation": 5.1674, + "eval_rougeL_for_task1358_xlsum_title_generation": 12.7789, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 30.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 50.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 48.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 55.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 48.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 13.0923, + "eval_rougeL_for_task1409_dart_data_to_text": 19.2887, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 5.9943, + "eval_rougeL_for_task1439_doqa_answerability_classification": 42.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 52.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 0.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 46.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 4.4546, + "eval_rougeL_for_task1554_scitail_textual_entailment": 47.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 49.1261, + "eval_rougeL_for_task1562_zest_question_rewriting": 26.3069, + "eval_rougeL_for_task1586_scifact_title_generation": 15.6981, + "eval_rougeL_for_task1598_nyc_data_to_text": 21.9778, + "eval_rougeL_for_task1612_sick_textual_entailment": 32.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 54.8419, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 44.3337, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_rougeL_for_task1659_billsum_title_generation": 20.832, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 57.9166, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 36.8425, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 30.0, + "eval_rougeL_for_task219_rocstories_title_generation": 9.7682, + "eval_rougeL_for_task220_rocstories_title_generation": 46.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 1.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 47.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 51.2651, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 35.1749, + "eval_rougeL_for_task288_gigaword_title_generation": 14.6846, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 44.9, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 15.8333, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 66.6667, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 24.7655, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 1.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 44.793, + "eval_rougeL_for_task418_persent_title_generation": 4.5617, + "eval_rougeL_for_task442_com_qa_question_rewriting": 52.55, + "eval_rougeL_for_task500_scruples_title_generation": 10.0198, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 24.2493, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 52.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 3.2902, + "eval_rougeL_for_task602_wikitext_title_generation": 1.4583, + "eval_rougeL_for_task613_liar_keyword_tagging": 4.0, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 21.529, + "eval_rougeL_for_task619_ohsumed_title_generation": 14.3046, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 3.4296, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 51.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 63.6538, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 3.2, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 57.1966, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 51.1765, + "eval_rougeL_for_task677_ollie_data_to_text": 3.9709, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 15.1082, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 1.3235, + "eval_rougeL_for_task769_qed_title_generation": 38.9136, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 83.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 35.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 40.765, + "eval_rougeL_for_task892_gap_coreference_resolution": 40.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 24.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 25.6976, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 41.625, + "eval_rougeL_for_title_generation": 14.2059, + "eval_rougeL_for_word_analogy": 16.2083, + "eval_runtime": 363.1484, + "eval_samples_per_second": 32.797, + "eval_steps_per_second": 1.027, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 2.2102, + "step": 500 + }, + { + "epoch": 0.11, + "eval_exact_match": 24.3829, + "eval_exact_match_for_answerability_classification": 45.5385, + "eval_exact_match_for_cause_effect_classification": 35.2857, + "eval_exact_match_for_coreference_resolution": 28.8571, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 40.7143, + "eval_exact_match_for_grammar_error_correction": 5.5, + "eval_exact_match_for_keyword_tagging": 28.8, + "eval_exact_match_for_overlap_extraction": 12.0, + "eval_exact_match_for_question_rewriting": 1.0909, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 19.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 10.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 24.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 44.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 4.0, + "eval_exact_match_for_task1153_bard_word_analogy": 0.0, + "eval_exact_match_for_task1154_bard_word_analogy": 7.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 19.0, + "eval_exact_match_for_task1157_bard_word_analogy": 8.0, + "eval_exact_match_for_task1158_bard_word_analogy": 6.0, + "eval_exact_match_for_task1159_bard_word_analogy": 5.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 56.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 44.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 40.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 45.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 11.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 5.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 52.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 13.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 35.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 2.0, + "eval_exact_match_for_task220_rocstories_title_generation": 49.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 1.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 45.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 23.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 49.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 32.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 48.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 2.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 0.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 51.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 9.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 4.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 52.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 72.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 19.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 53.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 46.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 51.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 75.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 37.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 43.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 52.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 19.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 56.0, + "eval_exact_match_for_textual_entailment": 40.7083, + "eval_exact_match_for_title_generation": 6.1099, + "eval_exact_match_for_word_analogy": 12.375, + "eval_f1": 37.001, + "eval_f1_for_answerability_classification": 48.1538, + "eval_f1_for_cause_effect_classification": 51.7284, + "eval_f1_for_coreference_resolution": 38.1026, + "eval_f1_for_data_to_text": 24.0294, + "eval_f1_for_dialogue_act_recognition": 44.4286, + "eval_f1_for_grammar_error_correction": 39.4841, + "eval_f1_for_keyword_tagging": 40.942, + "eval_f1_for_overlap_extraction": 30.6618, + "eval_f1_for_question_rewriting": 56.3248, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 20.7333, + "eval_f1_for_task034_winogrande_question_rewriting": 47.2801, + "eval_f1_for_task035_winogrande_question_rewriting": 66.8657, + "eval_f1_for_task036_qasc_keyword_tagging": 53.347, + "eval_f1_for_task039_qasc_overlap_extraction": 27.8333, + "eval_f1_for_task050_multirc_answerability_classification": 44.0, + "eval_f1_for_task102_commongen_data_to_text": 16.574, + "eval_f1_for_task1152_bard_word_analogy": 4.0, + "eval_f1_for_task1153_bard_word_analogy": 0.6667, + "eval_f1_for_task1154_bard_word_analogy": 7.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 19.0, + "eval_f1_for_task1157_bard_word_analogy": 8.0, + "eval_f1_for_task1158_bard_word_analogy": 6.0, + "eval_f1_for_task1159_bard_word_analogy": 5.0, + "eval_f1_for_task1161_coda_19_title_generation": 16.2545, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 72.5659, + "eval_f1_for_task121_zest_question_rewriting": 42.8865, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 1.6323, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 32.9029, + "eval_f1_for_task1356_xlsum_title_generation": 7.4788, + "eval_f1_for_task1358_xlsum_title_generation": 25.0532, + "eval_f1_for_task1385_anli_textual_entailment": 32.0, + "eval_f1_for_task1386_anli_textual_entailment": 33.0, + "eval_f1_for_task1387_anli_textual_entailment": 34.0, + "eval_f1_for_task1388_cb_textual_entailment": 56.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 17.4294, + "eval_f1_for_task1409_dart_data_to_text": 26.1209, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 10.4119, + "eval_f1_for_task1439_doqa_answerability_classification": 44.0, + "eval_f1_for_task1442_doqa_answerability_classification": 53.0, + "eval_f1_for_task1516_imppres_textual_entailment": 0.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 40.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 8.8148, + "eval_f1_for_task1554_scitail_textual_entailment": 45.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 68.5563, + "eval_f1_for_task1562_zest_question_rewriting": 33.5142, + "eval_f1_for_task1586_scifact_title_generation": 16.7788, + "eval_f1_for_task1598_nyc_data_to_text": 26.866, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 70.6158, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 52.0, + "eval_f1_for_task1631_open_pi_data_to_text": 53.3501, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_f1_for_task1659_billsum_title_generation": 23.6505, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 55.4157, + "eval_f1_for_task1728_web_nlg_data_to_text": 11.9047, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 35.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 5.7442, + "eval_f1_for_task220_rocstories_title_generation": 49.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, + "eval_f1_for_task232_iirc_answerability_classification": 1.0, + "eval_f1_for_task233_iirc_answerability_classification": 45.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 38.3357, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 33.4902, + "eval_f1_for_task288_gigaword_title_generation": 20.1319, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 43.5385, + "eval_f1_for_task329_gap_coreference_resolution": 35.0, + "eval_f1_for_task330_gap_coreference_resolution": 36.1333, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 74.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 25.4108, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 0.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 61.641, + "eval_f1_for_task418_persent_title_generation": 9.7723, + "eval_f1_for_task442_com_qa_question_rewriting": 54.3617, + "eval_f1_for_task500_scruples_title_generation": 11.6015, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 25.396, + "eval_f1_for_task520_aquamuse_answerability_classification": 51.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 5.9984, + "eval_f1_for_task602_wikitext_title_generation": 4.4725, + "eval_f1_for_task613_liar_keyword_tagging": 10.7333, + "eval_f1_for_task614_glucose_cause_effect_classification": 23.0213, + "eval_f1_for_task619_ohsumed_title_generation": 21.3853, + "eval_f1_for_task620_ohsumed_keyword_tagging": 9.3408, + "eval_f1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 52.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 82.289, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 34.1278, + "eval_f1_for_task670_ambigqa_question_rewriting": 74.0833, + "eval_f1_for_task671_ambigqa_question_rewriting": 62.8554, + "eval_f1_for_task677_ollie_data_to_text": 6.6957, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 18.3048, + "eval_f1_for_task760_msr_sqa_data_to_text": 0.7432, + "eval_f1_for_task769_qed_title_generation": 63.7999, + "eval_f1_for_task827_copa_cause_effect_classification": 46.0, + "eval_f1_for_task828_copa_cause_effect_classification": 51.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 75.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 37.0, + "eval_f1_for_task891_gap_coreference_resolution": 49.4857, + "eval_f1_for_task892_gap_coreference_resolution": 52.0, + "eval_f1_for_task893_gap_coreference_resolution": 19.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 39.3491, + "eval_f1_for_task970_sherliic_textual_entailment": 56.0, + "eval_f1_for_textual_entailment": 40.7083, + "eval_f1_for_title_generation": 18.753, + "eval_f1_for_word_analogy": 12.4583, + "eval_gen_len": 10.6576, + "eval_global_step": 500, + "eval_loss": 1.837897539138794, + "eval_rouge1": 38.6387, + "eval_rouge1_for_answerability_classification": 48.1538, + "eval_rouge1_for_cause_effect_classification": 52.0495, + "eval_rouge1_for_coreference_resolution": 38.361, + "eval_rouge1_for_data_to_text": 31.9602, + "eval_rouge1_for_dialogue_act_recognition": 46.3333, + "eval_rouge1_for_grammar_error_correction": 43.827, + "eval_rouge1_for_keyword_tagging": 42.9438, + "eval_rouge1_for_overlap_extraction": 34.7406, + "eval_rouge1_for_question_rewriting": 57.8751, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 22.7333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 47.3905, + "eval_rouge1_for_task035_winogrande_question_rewriting": 67.6867, + "eval_rouge1_for_task036_qasc_keyword_tagging": 55.3715, + "eval_rouge1_for_task039_qasc_overlap_extraction": 34.6667, + "eval_rouge1_for_task050_multirc_answerability_classification": 44.0, + "eval_rouge1_for_task102_commongen_data_to_text": 40.5127, + "eval_rouge1_for_task1152_bard_word_analogy": 4.0, + "eval_rouge1_for_task1153_bard_word_analogy": 0.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 7.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 19.0, + "eval_rouge1_for_task1157_bard_word_analogy": 8.0, + "eval_rouge1_for_task1158_bard_word_analogy": 6.0, + "eval_rouge1_for_task1159_bard_word_analogy": 5.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 17.7024, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 73.0177, + "eval_rouge1_for_task121_zest_question_rewriting": 44.7932, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 1.7571, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 35.9315, + "eval_rouge1_for_task1356_xlsum_title_generation": 9.314, + "eval_rouge1_for_task1358_xlsum_title_generation": 28.9963, + "eval_rouge1_for_task1385_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 56.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 17.1922, + "eval_rouge1_for_task1409_dart_data_to_text": 27.4195, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 15.8136, + "eval_rouge1_for_task1439_doqa_answerability_classification": 44.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 0.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 40.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 9.4658, + "eval_rouge1_for_task1554_scitail_textual_entailment": 45.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 71.8404, + "eval_rouge1_for_task1562_zest_question_rewriting": 34.885, + "eval_rouge1_for_task1586_scifact_title_generation": 19.4571, + "eval_rouge1_for_task1598_nyc_data_to_text": 34.1369, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 71.0777, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 52.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 54.1503, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1659_billsum_title_generation": 24.1919, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 55.4157, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 41.7599, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 35.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 8.1442, + "eval_rouge1_for_task220_rocstories_title_generation": 49.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 1.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 45.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 40.0476, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 34.8145, + "eval_rouge1_for_task288_gigaword_title_generation": 22.4963, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 43.4377, + "eval_rouge1_for_task329_gap_coreference_resolution": 35.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 36.1333, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 74.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 26.0396, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 0.6667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 64.033, + "eval_rouge1_for_task418_persent_title_generation": 11.1906, + "eval_rouge1_for_task442_com_qa_question_rewriting": 58.3703, + "eval_rouge1_for_task500_scruples_title_generation": 12.8012, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 25.341, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 51.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 6.7637, + "eval_rouge1_for_task602_wikitext_title_generation": 4.4924, + "eval_rouge1_for_task613_liar_keyword_tagging": 16.8, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 24.6406, + "eval_rouge1_for_task619_ohsumed_title_generation": 23.4783, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 10.9727, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 52.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 82.5747, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 34.2338, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 75.47, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 63.9701, + "eval_rouge1_for_task677_ollie_data_to_text": 7.0577, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 18.6905, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 1.1246, + "eval_rouge1_for_task769_qed_title_generation": 63.7721, + "eval_rouge1_for_task827_copa_cause_effect_classification": 46.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 51.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 37.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 49.3857, + "eval_rouge1_for_task892_gap_coreference_resolution": 52.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 19.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 41.4701, + "eval_rouge1_for_task970_sherliic_textual_entailment": 56.0, + "eval_rouge1_for_textual_entailment": 42.5694, + "eval_rouge1_for_title_generation": 19.974, + "eval_rouge1_for_word_analogy": 12.4583, + "eval_rougeL": 37.7188, + "eval_rougeL_for_answerability_classification": 48.1538, + "eval_rougeL_for_cause_effect_classification": 51.664, + "eval_rougeL_for_coreference_resolution": 38.361, + "eval_rougeL_for_data_to_text": 26.34, + "eval_rougeL_for_dialogue_act_recognition": 46.3333, + "eval_rougeL_for_grammar_error_correction": 42.9134, + "eval_rougeL_for_keyword_tagging": 42.412, + "eval_rougeL_for_overlap_extraction": 33.998, + "eval_rougeL_for_question_rewriting": 55.2793, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 22.7333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 47.2996, + "eval_rougeL_for_task035_winogrande_question_rewriting": 67.4069, + "eval_rougeL_for_task036_qasc_keyword_tagging": 52.7126, + "eval_rougeL_for_task039_qasc_overlap_extraction": 34.6667, + "eval_rougeL_for_task050_multirc_answerability_classification": 44.0, + "eval_rougeL_for_task102_commongen_data_to_text": 33.3074, + "eval_rougeL_for_task1152_bard_word_analogy": 4.0, + "eval_rougeL_for_task1153_bard_word_analogy": 0.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 7.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 19.0, + "eval_rougeL_for_task1157_bard_word_analogy": 8.0, + "eval_rougeL_for_task1158_bard_word_analogy": 6.0, + "eval_rougeL_for_task1159_bard_word_analogy": 5.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 15.7324, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 71.9774, + "eval_rougeL_for_task121_zest_question_rewriting": 40.4585, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 1.7571, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 33.8532, + "eval_rougeL_for_task1356_xlsum_title_generation": 7.8957, + "eval_rougeL_for_task1358_xlsum_title_generation": 24.3934, + "eval_rougeL_for_task1385_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 56.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 15.6761, + "eval_rougeL_for_task1409_dart_data_to_text": 23.6714, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 15.0152, + "eval_rougeL_for_task1439_doqa_answerability_classification": 44.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 0.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 40.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 8.9864, + "eval_rougeL_for_task1554_scitail_textual_entailment": 45.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 70.8116, + "eval_rougeL_for_task1562_zest_question_rewriting": 31.5928, + "eval_rougeL_for_task1586_scifact_title_generation": 16.0603, + "eval_rougeL_for_task1598_nyc_data_to_text": 25.2766, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 68.7991, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 52.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 45.1107, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1659_billsum_title_generation": 20.9971, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 55.4157, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 37.1899, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 35.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 8.1442, + "eval_rougeL_for_task220_rocstories_title_generation": 49.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 1.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 45.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 40.0476, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 33.3292, + "eval_rougeL_for_task288_gigaword_title_generation": 19.5709, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 43.4377, + "eval_rougeL_for_task329_gap_coreference_resolution": 35.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 36.1333, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 74.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 24.921, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 0.6667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 54.2886, + "eval_rougeL_for_task418_persent_title_generation": 10.0142, + "eval_rougeL_for_task442_com_qa_question_rewriting": 55.5069, + "eval_rougeL_for_task500_scruples_title_generation": 10.8687, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 25.0724, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 51.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 6.7012, + "eval_rougeL_for_task602_wikitext_title_generation": 4.4924, + "eval_rougeL_for_task613_liar_keyword_tagging": 16.8, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 23.0606, + "eval_rougeL_for_task619_ohsumed_title_generation": 21.5161, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 10.9727, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 52.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 82.5747, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 34.2338, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 73.9615, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 62.9281, + "eval_rougeL_for_task677_ollie_data_to_text": 6.6299, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 16.1605, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 1.1035, + "eval_rougeL_for_task769_qed_title_generation": 63.7721, + "eval_rougeL_for_task827_copa_cause_effect_classification": 46.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 51.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 37.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 49.3857, + "eval_rougeL_for_task892_gap_coreference_resolution": 52.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 19.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 30.4193, + "eval_rougeL_for_task970_sherliic_textual_entailment": 56.0, + "eval_rougeL_for_textual_entailment": 42.5694, + "eval_rougeL_for_title_generation": 18.5211, + "eval_rougeL_for_word_analogy": 12.4583, + "eval_runtime": 361.8429, + "eval_samples_per_second": 32.915, + "eval_steps_per_second": 1.031, + "step": 500 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 2.1198, + "step": 1000 + }, + { + "epoch": 0.23, + "eval_exact_match": 23.7699, + "eval_exact_match_for_answerability_classification": 45.1538, + "eval_exact_match_for_cause_effect_classification": 34.2857, + "eval_exact_match_for_coreference_resolution": 28.3571, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 39.0, + "eval_exact_match_for_grammar_error_correction": 6.5, + "eval_exact_match_for_keyword_tagging": 29.8, + "eval_exact_match_for_overlap_extraction": 10.5, + "eval_exact_match_for_question_rewriting": 1.5455, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 20.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 18.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 21.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 49.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 3.0, + "eval_exact_match_for_task1153_bard_word_analogy": 0.0, + "eval_exact_match_for_task1154_bard_word_analogy": 8.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 14.0, + "eval_exact_match_for_task1157_bard_word_analogy": 9.0, + "eval_exact_match_for_task1158_bard_word_analogy": 7.0, + "eval_exact_match_for_task1159_bard_word_analogy": 8.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 7.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 40.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 47.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 43.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 31.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 49.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 13.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 27.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 7.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 14.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 35.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 27.0, + "eval_exact_match_for_task219_rocstories_title_generation": 3.0, + "eval_exact_match_for_task220_rocstories_title_generation": 49.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 44.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 1.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 47.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 23.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 45.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 31.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 23.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 53.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 1.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 2.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 52.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 12.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 3.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 25.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 48.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 67.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 25.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 46.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 48.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 45.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 69.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 32.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 52.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 24.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 38.875, + "eval_exact_match_for_title_generation": 5.7175, + "eval_exact_match_for_word_analogy": 12.375, + "eval_f1": 37.3648, + "eval_f1_for_answerability_classification": 47.981, + "eval_f1_for_cause_effect_classification": 50.8201, + "eval_f1_for_coreference_resolution": 38.0579, + "eval_f1_for_data_to_text": 27.7654, + "eval_f1_for_dialogue_act_recognition": 42.4286, + "eval_f1_for_grammar_error_correction": 45.3561, + "eval_f1_for_keyword_tagging": 42.5883, + "eval_f1_for_overlap_extraction": 29.3548, + "eval_f1_for_question_rewriting": 60.3992, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 22.0, + "eval_f1_for_task034_winogrande_question_rewriting": 78.5198, + "eval_f1_for_task035_winogrande_question_rewriting": 77.6744, + "eval_f1_for_task036_qasc_keyword_tagging": 59.665, + "eval_f1_for_task039_qasc_overlap_extraction": 24.3333, + "eval_f1_for_task050_multirc_answerability_classification": 49.0, + "eval_f1_for_task102_commongen_data_to_text": 28.0026, + "eval_f1_for_task1152_bard_word_analogy": 3.0, + "eval_f1_for_task1153_bard_word_analogy": 0.0, + "eval_f1_for_task1154_bard_word_analogy": 8.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 14.0, + "eval_f1_for_task1157_bard_word_analogy": 9.0, + "eval_f1_for_task1158_bard_word_analogy": 7.0, + "eval_f1_for_task1159_bard_word_analogy": 8.0, + "eval_f1_for_task1161_coda_19_title_generation": 16.3995, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 75.7229, + "eval_f1_for_task121_zest_question_rewriting": 42.7952, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 5.233, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 32.6234, + "eval_f1_for_task1356_xlsum_title_generation": 7.9087, + "eval_f1_for_task1358_xlsum_title_generation": 26.5972, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 33.0, + "eval_f1_for_task1387_anli_textual_entailment": 34.0, + "eval_f1_for_task1388_cb_textual_entailment": 40.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 47.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 16.8014, + "eval_f1_for_task1409_dart_data_to_text": 25.5923, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 12.7171, + "eval_f1_for_task1439_doqa_answerability_classification": 43.0, + "eval_f1_for_task1442_doqa_answerability_classification": 53.0, + "eval_f1_for_task1516_imppres_textual_entailment": 0.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 31.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 10.3819, + "eval_f1_for_task1554_scitail_textual_entailment": 49.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 77.9952, + "eval_f1_for_task1562_zest_question_rewriting": 39.04, + "eval_f1_for_task1586_scifact_title_generation": 20.1885, + "eval_f1_for_task1598_nyc_data_to_text": 27.1495, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 27.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 73.4044, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_f1_for_task1631_open_pi_data_to_text": 55.2385, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_f1_for_task1659_billsum_title_generation": 25.3967, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 58.9434, + "eval_f1_for_task1728_web_nlg_data_to_text": 19.9432, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 35.0, + "eval_f1_for_task202_multinli_textual_entailment": 27.0, + "eval_f1_for_task219_rocstories_title_generation": 6.0422, + "eval_f1_for_task220_rocstories_title_generation": 49.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 44.0, + "eval_f1_for_task232_iirc_answerability_classification": 1.0, + "eval_f1_for_task233_iirc_answerability_classification": 47.087, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 39.2214, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 34.3762, + "eval_f1_for_task288_gigaword_title_generation": 20.9805, + "eval_f1_for_task290_tellmewhy_answerability_classification": 81.6667, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 45.0, + "eval_f1_for_task329_gap_coreference_resolution": 31.0, + "eval_f1_for_task330_gap_coreference_resolution": 29.2381, + "eval_f1_for_task349_squad2.0_answerability_classification": 53.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 27.5466, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 1.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 64.1724, + "eval_f1_for_task418_persent_title_generation": 10.6401, + "eval_f1_for_task442_com_qa_question_rewriting": 53.2883, + "eval_f1_for_task500_scruples_title_generation": 12.0791, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 26.5971, + "eval_f1_for_task520_aquamuse_answerability_classification": 52.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 5.1157, + "eval_f1_for_task602_wikitext_title_generation": 4.746, + "eval_f1_for_task613_liar_keyword_tagging": 14.1429, + "eval_f1_for_task614_glucose_cause_effect_classification": 21.5276, + "eval_f1_for_task619_ohsumed_title_generation": 24.8313, + "eval_f1_for_task620_ohsumed_keyword_tagging": 9.7169, + "eval_f1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 25.0, + "eval_f1_for_task642_e_snli_textual_entailment": 48.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 80.4167, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 39.2664, + "eval_f1_for_task670_ambigqa_question_rewriting": 69.4175, + "eval_f1_for_task671_ambigqa_question_rewriting": 57.7333, + "eval_f1_for_task677_ollie_data_to_text": 12.4192, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 18.3902, + "eval_f1_for_task760_msr_sqa_data_to_text": 2.0324, + "eval_f1_for_task769_qed_title_generation": 62.981, + "eval_f1_for_task827_copa_cause_effect_classification": 48.0, + "eval_f1_for_task828_copa_cause_effect_classification": 45.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 69.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 38.4746, + "eval_f1_for_task892_gap_coreference_resolution": 52.0, + "eval_f1_for_task893_gap_coreference_resolution": 24.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 43.6671, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 38.875, + "eval_f1_for_title_generation": 19.7729, + "eval_f1_for_word_analogy": 12.375, + "eval_gen_len": 11.3181, + "eval_global_step": 1000, + "eval_loss": 1.7911953926086426, + "eval_rouge1": 38.9333, + "eval_rouge1_for_answerability_classification": 47.981, + "eval_rouge1_for_cause_effect_classification": 51.038, + "eval_rouge1_for_coreference_resolution": 38.4287, + "eval_rouge1_for_data_to_text": 32.5807, + "eval_rouge1_for_dialogue_act_recognition": 44.3333, + "eval_rouge1_for_grammar_error_correction": 50.1466, + "eval_rouge1_for_keyword_tagging": 46.1861, + "eval_rouge1_for_overlap_extraction": 33.6839, + "eval_rouge1_for_question_rewriting": 62.0842, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 24.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 78.5972, + "eval_rouge1_for_task035_winogrande_question_rewriting": 78.583, + "eval_rouge1_for_task036_qasc_keyword_tagging": 61.4634, + "eval_rouge1_for_task039_qasc_overlap_extraction": 31.6667, + "eval_rouge1_for_task050_multirc_answerability_classification": 49.0, + "eval_rouge1_for_task102_commongen_data_to_text": 38.8105, + "eval_rouge1_for_task1152_bard_word_analogy": 3.0, + "eval_rouge1_for_task1153_bard_word_analogy": 0.0, + "eval_rouge1_for_task1154_bard_word_analogy": 8.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 14.0, + "eval_rouge1_for_task1157_bard_word_analogy": 9.0, + "eval_rouge1_for_task1158_bard_word_analogy": 7.0, + "eval_rouge1_for_task1159_bard_word_analogy": 8.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 18.2741, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 76.0032, + "eval_rouge1_for_task121_zest_question_rewriting": 45.0863, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 5.2551, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 35.4569, + "eval_rouge1_for_task1356_xlsum_title_generation": 9.4041, + "eval_rouge1_for_task1358_xlsum_title_generation": 31.053, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 47.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 16.7865, + "eval_rouge1_for_task1409_dart_data_to_text": 26.5224, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 19.1428, + "eval_rouge1_for_task1439_doqa_answerability_classification": 43.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 0.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 31.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 11.5372, + "eval_rouge1_for_task1554_scitail_textual_entailment": 49.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 81.1504, + "eval_rouge1_for_task1562_zest_question_rewriting": 40.9983, + "eval_rouge1_for_task1586_scifact_title_generation": 22.69, + "eval_rouge1_for_task1598_nyc_data_to_text": 32.7819, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 75.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 73.9892, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 55.7631, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1659_billsum_title_generation": 26.8316, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 58.9434, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 39.9919, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 35.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 27.0, + "eval_rouge1_for_task219_rocstories_title_generation": 8.4761, + "eval_rouge1_for_task220_rocstories_title_generation": 49.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 44.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 1.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 47.087, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 40.4, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 35.701, + "eval_rouge1_for_task288_gigaword_title_generation": 23.242, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 81.6667, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 44.9, + "eval_rouge1_for_task329_gap_coreference_resolution": 31.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 29.1667, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 53.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 28.0506, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 3.6667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 66.1281, + "eval_rouge1_for_task418_persent_title_generation": 11.7495, + "eval_rouge1_for_task442_com_qa_question_rewriting": 57.9001, + "eval_rouge1_for_task500_scruples_title_generation": 13.0352, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 26.7031, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 52.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 5.7068, + "eval_rouge1_for_task602_wikitext_title_generation": 4.8012, + "eval_rouge1_for_task613_liar_keyword_tagging": 27.8, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 22.5485, + "eval_rouge1_for_task619_ohsumed_title_generation": 27.4722, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 11.9502, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 25.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 48.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 80.7167, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 39.6553, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 71.125, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 59.0584, + "eval_rouge1_for_task677_ollie_data_to_text": 13.3709, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 18.8872, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 2.6392, + "eval_rouge1_for_task769_qed_title_generation": 62.9487, + "eval_rouge1_for_task827_copa_cause_effect_classification": 48.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 45.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 69.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 38.2703, + "eval_rouge1_for_task892_gap_coreference_resolution": 52.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 24.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 44.4031, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 40.9028, + "eval_rouge1_for_title_generation": 21.093, + "eval_rouge1_for_word_analogy": 12.375, + "eval_rougeL": 37.9507, + "eval_rougeL_for_answerability_classification": 47.981, + "eval_rougeL_for_cause_effect_classification": 50.6056, + "eval_rougeL_for_coreference_resolution": 38.4287, + "eval_rougeL_for_data_to_text": 27.5606, + "eval_rougeL_for_dialogue_act_recognition": 44.3333, + "eval_rougeL_for_grammar_error_correction": 49.1437, + "eval_rougeL_for_keyword_tagging": 45.6756, + "eval_rougeL_for_overlap_extraction": 32.9188, + "eval_rougeL_for_question_rewriting": 58.9763, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 24.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 78.4796, + "eval_rougeL_for_task035_winogrande_question_rewriting": 78.2913, + "eval_rougeL_for_task036_qasc_keyword_tagging": 58.9112, + "eval_rougeL_for_task039_qasc_overlap_extraction": 31.6667, + "eval_rougeL_for_task050_multirc_answerability_classification": 49.0, + "eval_rougeL_for_task102_commongen_data_to_text": 33.4455, + "eval_rougeL_for_task1152_bard_word_analogy": 3.0, + "eval_rougeL_for_task1153_bard_word_analogy": 0.0, + "eval_rougeL_for_task1154_bard_word_analogy": 8.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 14.0, + "eval_rougeL_for_task1157_bard_word_analogy": 9.0, + "eval_rougeL_for_task1158_bard_word_analogy": 7.0, + "eval_rougeL_for_task1159_bard_word_analogy": 8.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 15.1626, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 74.901, + "eval_rougeL_for_task121_zest_question_rewriting": 40.0613, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 5.2551, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 33.0324, + "eval_rougeL_for_task1356_xlsum_title_generation": 7.821, + "eval_rougeL_for_task1358_xlsum_title_generation": 25.9648, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 47.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 15.5723, + "eval_rougeL_for_task1409_dart_data_to_text": 22.9021, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 18.3421, + "eval_rougeL_for_task1439_doqa_answerability_classification": 43.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 0.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 31.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 10.6947, + "eval_rougeL_for_task1554_scitail_textual_entailment": 49.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 79.9453, + "eval_rougeL_for_task1562_zest_question_rewriting": 35.9712, + "eval_rougeL_for_task1586_scifact_title_generation": 18.135, + "eval_rougeL_for_task1598_nyc_data_to_text": 24.0326, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 75.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 71.5653, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 49.6737, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1659_billsum_title_generation": 22.6784, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 58.9434, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 35.7478, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 35.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 27.0, + "eval_rougeL_for_task219_rocstories_title_generation": 8.4761, + "eval_rougeL_for_task220_rocstories_title_generation": 49.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 44.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 1.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 47.087, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 40.4, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 34.171, + "eval_rougeL_for_task288_gigaword_title_generation": 19.8466, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 81.6667, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 44.9, + "eval_rougeL_for_task329_gap_coreference_resolution": 31.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 29.1667, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 53.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 27.0698, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 3.6667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 55.3668, + "eval_rougeL_for_task418_persent_title_generation": 10.2766, + "eval_rougeL_for_task442_com_qa_question_rewriting": 54.1953, + "eval_rougeL_for_task500_scruples_title_generation": 11.0649, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 26.4261, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 52.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 5.7068, + "eval_rougeL_for_task602_wikitext_title_generation": 4.7721, + "eval_rougeL_for_task613_liar_keyword_tagging": 27.8, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 20.5029, + "eval_rougeL_for_task619_ohsumed_title_generation": 24.0879, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 11.9502, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 25.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 48.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 80.7167, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 39.6553, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 69.3275, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 57.5477, + "eval_rougeL_for_task677_ollie_data_to_text": 11.4403, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 16.4818, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 2.3509, + "eval_rougeL_for_task769_qed_title_generation": 62.9487, + "eval_rougeL_for_task827_copa_cause_effect_classification": 48.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 45.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 69.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 38.2703, + "eval_rougeL_for_task892_gap_coreference_resolution": 52.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 24.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 34.2248, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 40.9028, + "eval_rougeL_for_title_generation": 19.2845, + "eval_rougeL_for_word_analogy": 12.375, + "eval_runtime": 395.9774, + "eval_samples_per_second": 30.077, + "eval_steps_per_second": 0.942, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 1.9962, + "step": 1500 + }, + { + "epoch": 0.34, + "eval_exact_match": 24.3577, + "eval_exact_match_for_answerability_classification": 46.8462, + "eval_exact_match_for_cause_effect_classification": 35.4286, + "eval_exact_match_for_coreference_resolution": 29.5, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 36.8571, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 31.6, + "eval_exact_match_for_overlap_extraction": 11.5, + "eval_exact_match_for_question_rewriting": 1.4545, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 22.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 24.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 23.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 2.0, + "eval_exact_match_for_task1153_bard_word_analogy": 0.0, + "eval_exact_match_for_task1154_bard_word_analogy": 7.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 9.0, + "eval_exact_match_for_task1157_bard_word_analogy": 10.0, + "eval_exact_match_for_task1158_bard_word_analogy": 4.0, + "eval_exact_match_for_task1159_bard_word_analogy": 8.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 5.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 49.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 40.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 46.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 52.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 1.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 32.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 5.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 14.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 35.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 3.0, + "eval_exact_match_for_task220_rocstories_title_generation": 49.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 1.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 33.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 53.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 21.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 3.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 52.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 16.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 6.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 52.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 63.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 26.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 41.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 49.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 49.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 56.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 28.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 52.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 31.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 63.0, + "eval_exact_match_for_textual_entailment": 40.5833, + "eval_exact_match_for_title_generation": 5.4372, + "eval_exact_match_for_word_analogy": 11.25, + "eval_f1": 38.1986, + "eval_f1_for_answerability_classification": 49.2564, + "eval_f1_for_cause_effect_classification": 52.8556, + "eval_f1_for_coreference_resolution": 38.6829, + "eval_f1_for_data_to_text": 27.9196, + "eval_f1_for_dialogue_act_recognition": 40.5, + "eval_f1_for_grammar_error_correction": 48.0569, + "eval_f1_for_keyword_tagging": 44.8027, + "eval_f1_for_overlap_extraction": 30.1178, + "eval_f1_for_question_rewriting": 61.874, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 22.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 83.7862, + "eval_f1_for_task035_winogrande_question_rewriting": 77.0685, + "eval_f1_for_task036_qasc_keyword_tagging": 64.9801, + "eval_f1_for_task039_qasc_overlap_extraction": 26.3333, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 26.8493, + "eval_f1_for_task1152_bard_word_analogy": 2.0, + "eval_f1_for_task1153_bard_word_analogy": 0.0, + "eval_f1_for_task1154_bard_word_analogy": 7.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 9.0, + "eval_f1_for_task1157_bard_word_analogy": 10.0, + "eval_f1_for_task1158_bard_word_analogy": 4.0, + "eval_f1_for_task1159_bard_word_analogy": 8.0, + "eval_f1_for_task1161_coda_19_title_generation": 15.7462, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 75.5852, + "eval_f1_for_task121_zest_question_rewriting": 46.96, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 7.2093, + "eval_f1_for_task1344_rte_textual_entailment": 49.0, + "eval_f1_for_task1345_qqp_question_rewriting": 32.9337, + "eval_f1_for_task1356_xlsum_title_generation": 8.5414, + "eval_f1_for_task1358_xlsum_title_generation": 27.3419, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 40.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 46.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 17.6796, + "eval_f1_for_task1409_dart_data_to_text": 29.3842, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 14.1963, + "eval_f1_for_task1439_doqa_answerability_classification": 49.0, + "eval_f1_for_task1442_doqa_answerability_classification": 52.0, + "eval_f1_for_task1516_imppres_textual_entailment": 1.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 32.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1540_peer_read_title_generation": 13.223, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 81.9175, + "eval_f1_for_task1562_zest_question_rewriting": 43.7365, + "eval_f1_for_task1586_scifact_title_generation": 19.8534, + "eval_f1_for_task1598_nyc_data_to_text": 28.0256, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 34.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 70.6506, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 53.0631, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_f1_for_task1659_billsum_title_generation": 25.2063, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 59.8048, + "eval_f1_for_task1728_web_nlg_data_to_text": 20.9423, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 35.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 5.4643, + "eval_f1_for_task220_rocstories_title_generation": 49.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_f1_for_task232_iirc_answerability_classification": 1.0, + "eval_f1_for_task233_iirc_answerability_classification": 50.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 48.5381, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 33.9022, + "eval_f1_for_task288_gigaword_title_generation": 22.6988, + "eval_f1_for_task290_tellmewhy_answerability_classification": 84.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 45.2, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 28.4667, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 29.9067, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 3.0, + "eval_f1_for_task402_grailqa_question_rewriting": 64.4932, + "eval_f1_for_task418_persent_title_generation": 9.4421, + "eval_f1_for_task442_com_qa_question_rewriting": 54.1713, + "eval_f1_for_task500_scruples_title_generation": 12.8699, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 27.8188, + "eval_f1_for_task520_aquamuse_answerability_classification": 52.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 4.5325, + "eval_f1_for_task602_wikitext_title_generation": 3.8931, + "eval_f1_for_task613_liar_keyword_tagging": 17.4762, + "eval_f1_for_task614_glucose_cause_effect_classification": 25.4156, + "eval_f1_for_task619_ohsumed_title_generation": 23.6445, + "eval_f1_for_task620_ohsumed_keyword_tagging": 13.5895, + "eval_f1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 34.0, + "eval_f1_for_task642_e_snli_textual_entailment": 52.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 78.9675, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 36.1021, + "eval_f1_for_task670_ambigqa_question_rewriting": 72.3856, + "eval_f1_for_task671_ambigqa_question_rewriting": 58.8428, + "eval_f1_for_task677_ollie_data_to_text": 12.4259, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 17.4264, + "eval_f1_for_task760_msr_sqa_data_to_text": 1.3903, + "eval_f1_for_task769_qed_title_generation": 65.1835, + "eval_f1_for_task827_copa_cause_effect_classification": 49.0, + "eval_f1_for_task828_copa_cause_effect_classification": 49.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 56.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 33.7822, + "eval_f1_for_task892_gap_coreference_resolution": 52.0, + "eval_f1_for_task893_gap_coreference_resolution": 31.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 41.8849, + "eval_f1_for_task970_sherliic_textual_entailment": 63.0, + "eval_f1_for_textual_entailment": 40.5833, + "eval_f1_for_title_generation": 20.0937, + "eval_f1_for_word_analogy": 11.25, + "eval_gen_len": 10.6063, + "eval_global_step": 1500, + "eval_loss": 1.7514349222183228, + "eval_rouge1": 39.7903, + "eval_rouge1_for_answerability_classification": 49.2564, + "eval_rouge1_for_cause_effect_classification": 53.0934, + "eval_rouge1_for_coreference_resolution": 39.1224, + "eval_rouge1_for_data_to_text": 32.6496, + "eval_rouge1_for_dialogue_act_recognition": 42.4048, + "eval_rouge1_for_grammar_error_correction": 54.2643, + "eval_rouge1_for_keyword_tagging": 48.8274, + "eval_rouge1_for_overlap_extraction": 34.0491, + "eval_rouge1_for_question_rewriting": 63.6054, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 25.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 83.798, + "eval_rouge1_for_task035_winogrande_question_rewriting": 77.969, + "eval_rouge1_for_task036_qasc_keyword_tagging": 67.6801, + "eval_rouge1_for_task039_qasc_overlap_extraction": 33.0, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 37.0776, + "eval_rouge1_for_task1152_bard_word_analogy": 2.0, + "eval_rouge1_for_task1153_bard_word_analogy": 0.0, + "eval_rouge1_for_task1154_bard_word_analogy": 7.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 9.0, + "eval_rouge1_for_task1157_bard_word_analogy": 10.0, + "eval_rouge1_for_task1158_bard_word_analogy": 4.0, + "eval_rouge1_for_task1159_bard_word_analogy": 8.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 17.5209, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 75.8077, + "eval_rouge1_for_task121_zest_question_rewriting": 48.7819, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 7.2534, + "eval_rouge1_for_task1344_rte_textual_entailment": 49.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 35.785, + "eval_rouge1_for_task1356_xlsum_title_generation": 10.2099, + "eval_rouge1_for_task1358_xlsum_title_generation": 31.554, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 46.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 17.6978, + "eval_rouge1_for_task1409_dart_data_to_text": 30.5479, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 23.4376, + "eval_rouge1_for_task1439_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 52.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 1.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 32.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 15.1128, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 85.091, + "eval_rouge1_for_task1562_zest_question_rewriting": 46.9349, + "eval_rouge1_for_task1586_scifact_title_generation": 23.0057, + "eval_rouge1_for_task1598_nyc_data_to_text": 33.5349, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 71.2734, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 53.4415, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_rouge1_for_task1659_billsum_title_generation": 26.8742, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 59.8048, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 39.8694, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 35.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 8.0723, + "eval_rouge1_for_task220_rocstories_title_generation": 49.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 1.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 49.75, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 35.0983, + "eval_rouge1_for_task288_gigaword_title_generation": 25.0529, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 84.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 45.1, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 28.4667, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 30.6355, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 5.0, + "eval_rouge1_for_task402_grailqa_question_rewriting": 66.2263, + "eval_rouge1_for_task418_persent_title_generation": 10.9628, + "eval_rouge1_for_task442_com_qa_question_rewriting": 59.1477, + "eval_rouge1_for_task500_scruples_title_generation": 13.9359, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 28.0363, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 52.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 5.2653, + "eval_rouge1_for_task602_wikitext_title_generation": 3.9364, + "eval_rouge1_for_task613_liar_keyword_tagging": 31.6333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 26.3515, + "eval_rouge1_for_task619_ohsumed_title_generation": 26.1001, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 15.5562, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 52.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 80.2675, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 36.391, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 73.8368, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 60.0994, + "eval_rouge1_for_task677_ollie_data_to_text": 13.1389, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 18.0168, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 1.8615, + "eval_rouge1_for_task769_qed_title_generation": 65.106, + "eval_rouge1_for_task827_copa_cause_effect_classification": 49.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 49.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 56.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 33.5342, + "eval_rouge1_for_task892_gap_coreference_resolution": 52.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 31.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 43.8938, + "eval_rouge1_for_task970_sherliic_textual_entailment": 63.0, + "eval_rouge1_for_textual_entailment": 42.4167, + "eval_rouge1_for_title_generation": 21.5463, + "eval_rouge1_for_word_analogy": 11.25, + "eval_rougeL": 38.8197, + "eval_rougeL_for_answerability_classification": 49.2564, + "eval_rougeL_for_cause_effect_classification": 52.5946, + "eval_rougeL_for_coreference_resolution": 39.1224, + "eval_rougeL_for_data_to_text": 27.7317, + "eval_rougeL_for_dialogue_act_recognition": 42.4048, + "eval_rougeL_for_grammar_error_correction": 53.4292, + "eval_rougeL_for_keyword_tagging": 48.4539, + "eval_rougeL_for_overlap_extraction": 33.3252, + "eval_rougeL_for_question_rewriting": 60.4066, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 25.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 83.798, + "eval_rougeL_for_task035_winogrande_question_rewriting": 77.6772, + "eval_rougeL_for_task036_qasc_keyword_tagging": 65.8126, + "eval_rougeL_for_task039_qasc_overlap_extraction": 33.0, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 31.4457, + "eval_rougeL_for_task1152_bard_word_analogy": 2.0, + "eval_rougeL_for_task1153_bard_word_analogy": 0.0, + "eval_rougeL_for_task1154_bard_word_analogy": 7.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 9.0, + "eval_rougeL_for_task1157_bard_word_analogy": 10.0, + "eval_rougeL_for_task1158_bard_word_analogy": 4.0, + "eval_rougeL_for_task1159_bard_word_analogy": 8.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 14.8098, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 74.7136, + "eval_rougeL_for_task121_zest_question_rewriting": 44.0477, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 7.2534, + "eval_rougeL_for_task1344_rte_textual_entailment": 49.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 33.5308, + "eval_rougeL_for_task1356_xlsum_title_generation": 8.6676, + "eval_rougeL_for_task1358_xlsum_title_generation": 26.2082, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 46.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 15.9454, + "eval_rougeL_for_task1409_dart_data_to_text": 26.9513, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 22.7709, + "eval_rougeL_for_task1439_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 52.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 1.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 32.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 14.7654, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 84.0875, + "eval_rougeL_for_task1562_zest_question_rewriting": 39.7546, + "eval_rougeL_for_task1586_scifact_title_generation": 18.8049, + "eval_rougeL_for_task1598_nyc_data_to_text": 24.8921, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 69.3125, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 49.4033, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_rougeL_for_task1659_billsum_title_generation": 22.4679, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 59.8048, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 35.2154, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 35.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 8.0723, + "eval_rougeL_for_task220_rocstories_title_generation": 49.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 1.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 49.75, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 33.6504, + "eval_rougeL_for_task288_gigaword_title_generation": 21.6803, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 84.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 45.1, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 28.4667, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 29.2303, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 5.0, + "eval_rougeL_for_task402_grailqa_question_rewriting": 54.687, + "eval_rougeL_for_task418_persent_title_generation": 9.5385, + "eval_rougeL_for_task442_com_qa_question_rewriting": 55.8694, + "eval_rougeL_for_task500_scruples_title_generation": 12.194, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 27.5732, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 52.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 5.2653, + "eval_rougeL_for_task602_wikitext_title_generation": 3.9073, + "eval_rougeL_for_task613_liar_keyword_tagging": 31.6333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 24.2652, + "eval_rougeL_for_task619_ohsumed_title_generation": 22.8276, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 15.5562, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 52.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 80.2675, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 36.391, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 72.1083, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 58.9733, + "eval_rougeL_for_task677_ollie_data_to_text": 10.9122, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 15.5565, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 1.8129, + "eval_rougeL_for_task769_qed_title_generation": 65.106, + "eval_rougeL_for_task827_copa_cause_effect_classification": 49.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 49.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 56.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 33.5342, + "eval_rougeL_for_task892_gap_coreference_resolution": 52.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 31.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 33.8269, + "eval_rougeL_for_task970_sherliic_textual_entailment": 63.0, + "eval_rougeL_for_textual_entailment": 42.4167, + "eval_rougeL_for_title_generation": 19.7911, + "eval_rougeL_for_word_analogy": 11.25, + "eval_runtime": 362.8623, + "eval_samples_per_second": 32.822, + "eval_steps_per_second": 1.028, + "step": 1500 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 1.9374, + "step": 2000 + }, + { + "epoch": 0.46, + "eval_exact_match": 24.5424, + "eval_exact_match_for_answerability_classification": 46.9231, + "eval_exact_match_for_cause_effect_classification": 35.8571, + "eval_exact_match_for_coreference_resolution": 29.6429, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 35.1429, + "eval_exact_match_for_grammar_error_correction": 8.0, + "eval_exact_match_for_keyword_tagging": 32.2, + "eval_exact_match_for_overlap_extraction": 10.0, + "eval_exact_match_for_question_rewriting": 1.1818, + "eval_exact_match_for_task020_mctaco_answerability_classification": 53.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 23.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 15.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 20.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 3.0, + "eval_exact_match_for_task1153_bard_word_analogy": 0.0, + "eval_exact_match_for_task1154_bard_word_analogy": 8.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 9.0, + "eval_exact_match_for_task1157_bard_word_analogy": 10.0, + "eval_exact_match_for_task1158_bard_word_analogy": 7.0, + "eval_exact_match_for_task1159_bard_word_analogy": 9.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 49.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 40.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 44.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 44.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 18.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 52.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 16.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 27.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 14.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task219_rocstories_title_generation": 3.0, + "eval_exact_match_for_task220_rocstories_title_generation": 49.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 8.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 51.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 52.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 41.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 29.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 24.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 56.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 1.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 41.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 18.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 6.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 32.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 56.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 73.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 20.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 52.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 51.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 55.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 52.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 32.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 45.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 56.0, + "eval_exact_match_for_textual_entailment": 41.1667, + "eval_exact_match_for_title_generation": 5.9978, + "eval_exact_match_for_word_analogy": 12.0, + "eval_f1": 38.7385, + "eval_f1_for_answerability_classification": 49.5256, + "eval_f1_for_cause_effect_classification": 53.5967, + "eval_f1_for_coreference_resolution": 39.0835, + "eval_f1_for_data_to_text": 28.702, + "eval_f1_for_dialogue_act_recognition": 38.7857, + "eval_f1_for_grammar_error_correction": 52.0458, + "eval_f1_for_keyword_tagging": 46.2746, + "eval_f1_for_overlap_extraction": 28.4548, + "eval_f1_for_question_rewriting": 63.0972, + "eval_f1_for_task020_mctaco_answerability_classification": 53.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 23.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 83.5197, + "eval_f1_for_task035_winogrande_question_rewriting": 77.2968, + "eval_f1_for_task036_qasc_keyword_tagging": 59.3261, + "eval_f1_for_task039_qasc_overlap_extraction": 24.6667, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 23.9104, + "eval_f1_for_task1152_bard_word_analogy": 3.0, + "eval_f1_for_task1153_bard_word_analogy": 0.0, + "eval_f1_for_task1154_bard_word_analogy": 8.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 9.0, + "eval_f1_for_task1157_bard_word_analogy": 10.0, + "eval_f1_for_task1158_bard_word_analogy": 7.0, + "eval_f1_for_task1159_bard_word_analogy": 9.0, + "eval_f1_for_task1161_coda_19_title_generation": 16.7813, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 78.6839, + "eval_f1_for_task121_zest_question_rewriting": 44.2735, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 3.2088, + "eval_f1_for_task1344_rte_textual_entailment": 49.0, + "eval_f1_for_task1345_qqp_question_rewriting": 33.4246, + "eval_f1_for_task1356_xlsum_title_generation": 7.8556, + "eval_f1_for_task1358_xlsum_title_generation": 27.4045, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 32.0, + "eval_f1_for_task1388_cb_textual_entailment": 40.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 44.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 16.9863, + "eval_f1_for_task1409_dart_data_to_text": 28.929, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 21.0201, + "eval_f1_for_task1439_doqa_answerability_classification": 44.0, + "eval_f1_for_task1442_doqa_answerability_classification": 49.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 18.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 52.0, + "eval_f1_for_task1540_peer_read_title_generation": 14.0499, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.0714, + "eval_f1_for_task1562_zest_question_rewriting": 44.1758, + "eval_f1_for_task1586_scifact_title_generation": 20.5944, + "eval_f1_for_task1598_nyc_data_to_text": 29.5468, + "eval_f1_for_task1612_sick_textual_entailment": 34.0, + "eval_f1_for_task1615_sick_textual_entailment": 27.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 74.5492, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_f1_for_task1631_open_pi_data_to_text": 56.814, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_f1_for_task1659_billsum_title_generation": 24.4397, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 59.1255, + "eval_f1_for_task1728_web_nlg_data_to_text": 22.1235, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 34.0, + "eval_f1_for_task219_rocstories_title_generation": 6.1435, + "eval_f1_for_task220_rocstories_title_generation": 49.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, + "eval_f1_for_task232_iirc_answerability_classification": 8.0, + "eval_f1_for_task233_iirc_answerability_classification": 51.5, + "eval_f1_for_task242_tweetqa_answerability_classification": 52.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 56.7881, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 32.243, + "eval_f1_for_task288_gigaword_title_generation": 23.2874, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 45.7, + "eval_f1_for_task329_gap_coreference_resolution": 29.0, + "eval_f1_for_task330_gap_coreference_resolution": 30.9667, + "eval_f1_for_task349_squad2.0_answerability_classification": 56.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.3322, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 1.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 63.9047, + "eval_f1_for_task418_persent_title_generation": 12.5458, + "eval_f1_for_task442_com_qa_question_rewriting": 57.6635, + "eval_f1_for_task500_scruples_title_generation": 13.0121, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 27.7796, + "eval_f1_for_task520_aquamuse_answerability_classification": 41.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 5.4009, + "eval_f1_for_task602_wikitext_title_generation": 4.333, + "eval_f1_for_task613_liar_keyword_tagging": 19.4762, + "eval_f1_for_task614_glucose_cause_effect_classification": 26.1777, + "eval_f1_for_task619_ohsumed_title_generation": 26.7651, + "eval_f1_for_task620_ohsumed_keyword_tagging": 19.3272, + "eval_f1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 32.0, + "eval_f1_for_task642_e_snli_textual_entailment": 56.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 84.2434, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 34.4047, + "eval_f1_for_task670_ambigqa_question_rewriting": 74.5367, + "eval_f1_for_task671_ambigqa_question_rewriting": 62.0413, + "eval_f1_for_task677_ollie_data_to_text": 13.7827, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 17.6123, + "eval_f1_for_task760_msr_sqa_data_to_text": 1.7884, + "eval_f1_for_task769_qed_title_generation": 69.0376, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 51.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 55.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 38.1845, + "eval_f1_for_task892_gap_coreference_resolution": 52.0, + "eval_f1_for_task893_gap_coreference_resolution": 32.6667, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 45.0, + "eval_f1_for_task957_e2e_data_to_text": 44.5211, + "eval_f1_for_task970_sherliic_textual_entailment": 56.0, + "eval_f1_for_textual_entailment": 41.1667, + "eval_f1_for_title_generation": 20.6591, + "eval_f1_for_word_analogy": 12.0, + "eval_gen_len": 11.5325, + "eval_global_step": 2000, + "eval_loss": 1.7507429122924805, + "eval_rouge1": 40.4675, + "eval_rouge1_for_answerability_classification": 49.5256, + "eval_rouge1_for_cause_effect_classification": 53.9535, + "eval_rouge1_for_coreference_resolution": 39.4568, + "eval_rouge1_for_data_to_text": 34.4017, + "eval_rouge1_for_dialogue_act_recognition": 40.6905, + "eval_rouge1_for_grammar_error_correction": 60.8072, + "eval_rouge1_for_keyword_tagging": 50.0525, + "eval_rouge1_for_overlap_extraction": 32.4776, + "eval_rouge1_for_question_rewriting": 64.7569, + "eval_rouge1_for_task020_mctaco_answerability_classification": 53.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 26.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 83.6903, + "eval_rouge1_for_task035_winogrande_question_rewriting": 78.0457, + "eval_rouge1_for_task036_qasc_keyword_tagging": 61.5729, + "eval_rouge1_for_task039_qasc_overlap_extraction": 31.3333, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 39.5318, + "eval_rouge1_for_task1152_bard_word_analogy": 3.0, + "eval_rouge1_for_task1153_bard_word_analogy": 0.0, + "eval_rouge1_for_task1154_bard_word_analogy": 8.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 9.0, + "eval_rouge1_for_task1157_bard_word_analogy": 10.0, + "eval_rouge1_for_task1158_bard_word_analogy": 7.0, + "eval_rouge1_for_task1159_bard_word_analogy": 9.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 18.9769, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.0156, + "eval_rouge1_for_task121_zest_question_rewriting": 46.2594, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 3.2813, + "eval_rouge1_for_task1344_rte_textual_entailment": 49.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 36.4893, + "eval_rouge1_for_task1356_xlsum_title_generation": 9.5858, + "eval_rouge1_for_task1358_xlsum_title_generation": 31.9645, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 44.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 16.7603, + "eval_rouge1_for_task1409_dart_data_to_text": 30.7766, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.3159, + "eval_rouge1_for_task1439_doqa_answerability_classification": 44.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 18.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 52.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 15.1252, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.2984, + "eval_rouge1_for_task1562_zest_question_rewriting": 47.2516, + "eval_rouge1_for_task1586_scifact_title_generation": 23.5603, + "eval_rouge1_for_task1598_nyc_data_to_text": 35.6649, + "eval_rouge1_for_task1612_sick_textual_entailment": 34.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 75.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 75.3139, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 57.5276, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_rouge1_for_task1659_billsum_title_generation": 26.0232, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 59.1255, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 42.4812, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task219_rocstories_title_generation": 8.6141, + "eval_rouge1_for_task220_rocstories_title_generation": 49.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 8.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 51.5, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 52.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 57.1333, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 33.6219, + "eval_rouge1_for_task288_gigaword_title_generation": 26.0215, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 45.6, + "eval_rouge1_for_task329_gap_coreference_resolution": 29.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 30.9667, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 56.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.9784, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 3.6667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 65.4942, + "eval_rouge1_for_task418_persent_title_generation": 14.0092, + "eval_rouge1_for_task442_com_qa_question_rewriting": 61.9431, + "eval_rouge1_for_task500_scruples_title_generation": 14.084, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 27.9707, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 41.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 6.5528, + "eval_rouge1_for_task602_wikitext_title_generation": 4.3954, + "eval_rouge1_for_task613_liar_keyword_tagging": 33.1333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 28.0292, + "eval_rouge1_for_task619_ohsumed_title_generation": 29.2063, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 21.5272, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 32.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 56.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 85.0291, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 34.4734, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 75.8016, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 63.0211, + "eval_rouge1_for_task677_ollie_data_to_text": 14.9154, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 18.2805, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 2.715, + "eval_rouge1_for_task769_qed_title_generation": 69.0149, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 51.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 55.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 38.0957, + "eval_rouge1_for_task892_gap_coreference_resolution": 52.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 32.6667, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 45.0, + "eval_rouge1_for_task957_e2e_data_to_text": 45.7943, + "eval_rouge1_for_task970_sherliic_textual_entailment": 56.0, + "eval_rouge1_for_textual_entailment": 43.1944, + "eval_rouge1_for_title_generation": 22.1392, + "eval_rouge1_for_word_analogy": 12.0, + "eval_rougeL": 39.3888, + "eval_rougeL_for_answerability_classification": 49.5256, + "eval_rougeL_for_cause_effect_classification": 53.3661, + "eval_rougeL_for_coreference_resolution": 39.4492, + "eval_rougeL_for_data_to_text": 28.7806, + "eval_rougeL_for_dialogue_act_recognition": 40.6905, + "eval_rougeL_for_grammar_error_correction": 59.6993, + "eval_rougeL_for_keyword_tagging": 49.5122, + "eval_rougeL_for_overlap_extraction": 31.8524, + "eval_rougeL_for_question_rewriting": 61.3131, + "eval_rougeL_for_task020_mctaco_answerability_classification": 53.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 26.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 83.6903, + "eval_rougeL_for_task035_winogrande_question_rewriting": 77.7609, + "eval_rougeL_for_task036_qasc_keyword_tagging": 58.8711, + "eval_rougeL_for_task039_qasc_overlap_extraction": 31.3333, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 32.5256, + "eval_rougeL_for_task1152_bard_word_analogy": 3.0, + "eval_rougeL_for_task1153_bard_word_analogy": 0.0, + "eval_rougeL_for_task1154_bard_word_analogy": 8.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 9.0, + "eval_rougeL_for_task1157_bard_word_analogy": 10.0, + "eval_rougeL_for_task1158_bard_word_analogy": 7.0, + "eval_rougeL_for_task1159_bard_word_analogy": 9.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 16.026, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 77.9215, + "eval_rougeL_for_task121_zest_question_rewriting": 40.7756, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 3.2456, + "eval_rougeL_for_task1344_rte_textual_entailment": 49.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 34.1539, + "eval_rougeL_for_task1356_xlsum_title_generation": 8.2404, + "eval_rougeL_for_task1358_xlsum_title_generation": 26.4058, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 44.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 15.24, + "eval_rougeL_for_task1409_dart_data_to_text": 27.3414, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 34.0041, + "eval_rougeL_for_task1439_doqa_answerability_classification": 44.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 18.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 52.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 14.1744, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.3945, + "eval_rougeL_for_task1562_zest_question_rewriting": 39.3295, + "eval_rougeL_for_task1586_scifact_title_generation": 18.8688, + "eval_rougeL_for_task1598_nyc_data_to_text": 26.777, + "eval_rougeL_for_task1612_sick_textual_entailment": 34.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 75.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 72.9906, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 51.0941, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_rougeL_for_task1659_billsum_title_generation": 21.3818, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 59.1255, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 36.4071, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task219_rocstories_title_generation": 8.6141, + "eval_rougeL_for_task220_rocstories_title_generation": 49.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 8.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 51.5, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 52.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 57.1333, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 32.3714, + "eval_rougeL_for_task288_gigaword_title_generation": 22.015, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 45.6, + "eval_rougeL_for_task329_gap_coreference_resolution": 29.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 30.9667, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 56.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.123, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 3.6667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 54.2197, + "eval_rougeL_for_task418_persent_title_generation": 12.173, + "eval_rougeL_for_task442_com_qa_question_rewriting": 58.041, + "eval_rougeL_for_task500_scruples_title_generation": 12.3838, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 27.6029, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 41.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 6.5528, + "eval_rougeL_for_task602_wikitext_title_generation": 4.3664, + "eval_rougeL_for_task613_liar_keyword_tagging": 33.1333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 25.7729, + "eval_rougeL_for_task619_ohsumed_title_generation": 26.2602, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 21.5272, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 32.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 56.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 85.0291, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 34.3681, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 74.0197, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 61.5416, + "eval_rougeL_for_task677_ollie_data_to_text": 12.2193, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 15.5614, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 2.5574, + "eval_rougeL_for_task769_qed_title_generation": 69.0149, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 51.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 55.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 38.0957, + "eval_rougeL_for_task892_gap_coreference_resolution": 52.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 32.6667, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 45.0, + "eval_rougeL_for_task957_e2e_data_to_text": 35.458, + "eval_rougeL_for_task970_sherliic_textual_entailment": 56.0, + "eval_rougeL_for_textual_entailment": 43.1944, + "eval_rougeL_for_title_generation": 20.246, + "eval_rougeL_for_word_analogy": 12.0, + "eval_runtime": 405.416, + "eval_samples_per_second": 29.377, + "eval_steps_per_second": 0.92, + "step": 2000 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 1.9278, + "step": 2500 + }, + { + "epoch": 0.57, + "eval_exact_match": 24.3409, + "eval_exact_match_for_answerability_classification": 47.0, + "eval_exact_match_for_cause_effect_classification": 35.1429, + "eval_exact_match_for_coreference_resolution": 30.9286, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 34.4286, + "eval_exact_match_for_grammar_error_correction": 8.0, + "eval_exact_match_for_keyword_tagging": 30.4, + "eval_exact_match_for_overlap_extraction": 10.5, + "eval_exact_match_for_question_rewriting": 1.0, + "eval_exact_match_for_task020_mctaco_answerability_classification": 53.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 32.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 18.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 21.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 51.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 3.0, + "eval_exact_match_for_task1153_bard_word_analogy": 0.0, + "eval_exact_match_for_task1154_bard_word_analogy": 7.0, + "eval_exact_match_for_task1155_bard_word_analogy": 53.0, + "eval_exact_match_for_task1156_bard_word_analogy": 5.0, + "eval_exact_match_for_task1157_bard_word_analogy": 5.0, + "eval_exact_match_for_task1158_bard_word_analogy": 7.0, + "eval_exact_match_for_task1159_bard_word_analogy": 10.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 30.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 37.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 38.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 48.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 47.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 52.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 29.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 23.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 16.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 30.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 18.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 36.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 37.0, + "eval_exact_match_for_task219_rocstories_title_generation": 2.0, + "eval_exact_match_for_task220_rocstories_title_generation": 48.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 45.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 17.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 46.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 48.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 41.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 21.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 4.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 5.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 47.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 15.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 3.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 48.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 24.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 49.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 68.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 18.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 51.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 48.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 31.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 52.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 40.5417, + "eval_exact_match_for_title_generation": 5.8857, + "eval_exact_match_for_word_analogy": 11.25, + "eval_f1": 38.7111, + "eval_f1_for_answerability_classification": 49.5641, + "eval_f1_for_cause_effect_classification": 53.057, + "eval_f1_for_coreference_resolution": 39.9474, + "eval_f1_for_data_to_text": 28.4998, + "eval_f1_for_dialogue_act_recognition": 37.8571, + "eval_f1_for_grammar_error_correction": 47.7601, + "eval_f1_for_keyword_tagging": 44.999, + "eval_f1_for_overlap_extraction": 28.6546, + "eval_f1_for_question_rewriting": 64.4996, + "eval_f1_for_task020_mctaco_answerability_classification": 53.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 34.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 84.5148, + "eval_f1_for_task035_winogrande_question_rewriting": 76.7682, + "eval_f1_for_task036_qasc_keyword_tagging": 60.4499, + "eval_f1_for_task039_qasc_overlap_extraction": 24.1667, + "eval_f1_for_task050_multirc_answerability_classification": 51.0, + "eval_f1_for_task102_commongen_data_to_text": 29.9825, + "eval_f1_for_task1152_bard_word_analogy": 3.0, + "eval_f1_for_task1153_bard_word_analogy": 0.6667, + "eval_f1_for_task1154_bard_word_analogy": 7.0, + "eval_f1_for_task1155_bard_word_analogy": 53.0, + "eval_f1_for_task1156_bard_word_analogy": 5.0, + "eval_f1_for_task1157_bard_word_analogy": 5.0, + "eval_f1_for_task1158_bard_word_analogy": 7.0, + "eval_f1_for_task1159_bard_word_analogy": 10.0, + "eval_f1_for_task1161_coda_19_title_generation": 17.816, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 78.5665, + "eval_f1_for_task121_zest_question_rewriting": 45.5579, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 6.7097, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 37.8875, + "eval_f1_for_task1356_xlsum_title_generation": 8.7299, + "eval_f1_for_task1358_xlsum_title_generation": 28.5552, + "eval_f1_for_task1385_anli_textual_entailment": 30.0, + "eval_f1_for_task1386_anli_textual_entailment": 37.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 38.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 48.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 17.2723, + "eval_f1_for_task1409_dart_data_to_text": 25.6653, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 12.72, + "eval_f1_for_task1439_doqa_answerability_classification": 47.0, + "eval_f1_for_task1442_doqa_answerability_classification": 52.0, + "eval_f1_for_task1516_imppres_textual_entailment": 29.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 23.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 16.4504, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 82.8002, + "eval_f1_for_task1562_zest_question_rewriting": 46.5525, + "eval_f1_for_task1586_scifact_title_generation": 20.5357, + "eval_f1_for_task1598_nyc_data_to_text": 29.2354, + "eval_f1_for_task1612_sick_textual_entailment": 34.0, + "eval_f1_for_task1615_sick_textual_entailment": 30.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 74.4945, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_f1_for_task1631_open_pi_data_to_text": 52.699, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_f1_for_task1659_billsum_title_generation": 25.5764, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 62.119, + "eval_f1_for_task1728_web_nlg_data_to_text": 21.042, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 36.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 37.0, + "eval_f1_for_task219_rocstories_title_generation": 5.812, + "eval_f1_for_task220_rocstories_title_generation": 48.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 45.0, + "eval_f1_for_task232_iirc_answerability_classification": 17.0, + "eval_f1_for_task233_iirc_answerability_classification": 46.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 48.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 54.2714, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 33.1426, + "eval_f1_for_task288_gigaword_title_generation": 23.5252, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 39.2, + "eval_f1_for_task329_gap_coreference_resolution": 35.0, + "eval_f1_for_task330_gap_coreference_resolution": 28.4667, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 71.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.4531, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 4.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 67.1959, + "eval_f1_for_task418_persent_title_generation": 15.1635, + "eval_f1_for_task442_com_qa_question_rewriting": 60.0772, + "eval_f1_for_task500_scruples_title_generation": 13.7814, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 29.3556, + "eval_f1_for_task520_aquamuse_answerability_classification": 47.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 5.6447, + "eval_f1_for_task602_wikitext_title_generation": 5.2968, + "eval_f1_for_task613_liar_keyword_tagging": 17.9, + "eval_f1_for_task614_glucose_cause_effect_classification": 27.2792, + "eval_f1_for_task619_ohsumed_title_generation": 26.1967, + "eval_f1_for_task620_ohsumed_keyword_tagging": 17.1605, + "eval_f1_for_task623_ohsumed_keyword_tagging": 48.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 24.0, + "eval_f1_for_task642_e_snli_textual_entailment": 49.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 81.4844, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 30.8214, + "eval_f1_for_task670_ambigqa_question_rewriting": 74.8402, + "eval_f1_for_task671_ambigqa_question_rewriting": 63.0401, + "eval_f1_for_task677_ollie_data_to_text": 13.7528, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 18.5434, + "eval_f1_for_task760_msr_sqa_data_to_text": 1.109, + "eval_f1_for_task769_qed_title_generation": 69.2499, + "eval_f1_for_task827_copa_cause_effect_classification": 48.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 49.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 37.719, + "eval_f1_for_task892_gap_coreference_resolution": 52.0, + "eval_f1_for_task893_gap_coreference_resolution": 34.3333, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_f1_for_task957_e2e_data_to_text": 45.471, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 40.5417, + "eval_f1_for_title_generation": 21.53, + "eval_f1_for_word_analogy": 11.3333, + "eval_gen_len": 10.8898, + "eval_global_step": 2500, + "eval_loss": 1.7274374961853027, + "eval_rouge1": 40.3018, + "eval_rouge1_for_answerability_classification": 49.5641, + "eval_rouge1_for_cause_effect_classification": 53.4239, + "eval_rouge1_for_coreference_resolution": 40.2702, + "eval_rouge1_for_data_to_text": 33.3553, + "eval_rouge1_for_dialogue_act_recognition": 39.7619, + "eval_rouge1_for_grammar_error_correction": 53.8306, + "eval_rouge1_for_keyword_tagging": 48.5619, + "eval_rouge1_for_overlap_extraction": 32.0003, + "eval_rouge1_for_question_rewriting": 66.125, + "eval_rouge1_for_task020_mctaco_answerability_classification": 53.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 36.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 84.5532, + "eval_rouge1_for_task035_winogrande_question_rewriting": 77.533, + "eval_rouge1_for_task036_qasc_keyword_tagging": 62.2122, + "eval_rouge1_for_task039_qasc_overlap_extraction": 29.5, + "eval_rouge1_for_task050_multirc_answerability_classification": 51.0, + "eval_rouge1_for_task102_commongen_data_to_text": 40.1821, + "eval_rouge1_for_task1152_bard_word_analogy": 3.0, + "eval_rouge1_for_task1153_bard_word_analogy": 0.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 7.0, + "eval_rouge1_for_task1155_bard_word_analogy": 53.0, + "eval_rouge1_for_task1156_bard_word_analogy": 5.0, + "eval_rouge1_for_task1157_bard_word_analogy": 5.0, + "eval_rouge1_for_task1158_bard_word_analogy": 7.0, + "eval_rouge1_for_task1159_bard_word_analogy": 10.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 20.1225, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 78.8906, + "eval_rouge1_for_task121_zest_question_rewriting": 47.2197, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 7.0006, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 41.3134, + "eval_rouge1_for_task1356_xlsum_title_generation": 10.3859, + "eval_rouge1_for_task1358_xlsum_title_generation": 33.2806, + "eval_rouge1_for_task1385_anli_textual_entailment": 30.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 37.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 38.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 48.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 17.6041, + "eval_rouge1_for_task1409_dart_data_to_text": 27.0124, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 21.633, + "eval_rouge1_for_task1439_doqa_answerability_classification": 47.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 52.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 29.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 23.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 17.9326, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.0281, + "eval_rouge1_for_task1562_zest_question_rewriting": 49.8065, + "eval_rouge1_for_task1586_scifact_title_generation": 23.3545, + "eval_rouge1_for_task1598_nyc_data_to_text": 32.5462, + "eval_rouge1_for_task1612_sick_textual_entailment": 34.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 76.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 75.2026, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 53.3402, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_rouge1_for_task1659_billsum_title_generation": 27.2973, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 62.119, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 42.8695, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 36.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 37.0, + "eval_rouge1_for_task219_rocstories_title_generation": 8.5842, + "eval_rouge1_for_task220_rocstories_title_generation": 48.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 45.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 17.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 46.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 48.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 55.1167, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 34.5006, + "eval_rouge1_for_task288_gigaword_title_generation": 26.3313, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 39.6, + "eval_rouge1_for_task329_gap_coreference_resolution": 35.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 28.4667, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 71.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.9918, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 5.6667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 68.9113, + "eval_rouge1_for_task418_persent_title_generation": 16.957, + "eval_rouge1_for_task442_com_qa_question_rewriting": 64.6519, + "eval_rouge1_for_task500_scruples_title_generation": 15.1138, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 29.7582, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 47.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 6.8629, + "eval_rouge1_for_task602_wikitext_title_generation": 5.4029, + "eval_rouge1_for_task613_liar_keyword_tagging": 30.5667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 29.3091, + "eval_rouge1_for_task619_ohsumed_title_generation": 27.9451, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 19.7605, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 48.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 24.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 49.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 82.2701, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 30.9607, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 75.5979, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 63.6948, + "eval_rouge1_for_task677_ollie_data_to_text": 14.9512, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 19.1436, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 1.6557, + "eval_rouge1_for_task769_qed_title_generation": 69.2262, + "eval_rouge1_for_task827_copa_cause_effect_classification": 48.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 37.8524, + "eval_rouge1_for_task892_gap_coreference_resolution": 52.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 34.3333, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_rouge1_for_task957_e2e_data_to_text": 46.5785, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 42.4861, + "eval_rouge1_for_title_generation": 23.0849, + "eval_rouge1_for_word_analogy": 11.3333, + "eval_rougeL": 39.2454, + "eval_rougeL_for_answerability_classification": 49.5641, + "eval_rougeL_for_cause_effect_classification": 52.8071, + "eval_rougeL_for_coreference_resolution": 40.2702, + "eval_rougeL_for_data_to_text": 28.1276, + "eval_rougeL_for_dialogue_act_recognition": 39.7619, + "eval_rougeL_for_grammar_error_correction": 53.0361, + "eval_rougeL_for_keyword_tagging": 48.1138, + "eval_rougeL_for_overlap_extraction": 31.4177, + "eval_rougeL_for_question_rewriting": 62.8201, + "eval_rougeL_for_task020_mctaco_answerability_classification": 53.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 36.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 84.5532, + "eval_rougeL_for_task035_winogrande_question_rewriting": 76.9405, + "eval_rougeL_for_task036_qasc_keyword_tagging": 59.9718, + "eval_rougeL_for_task039_qasc_overlap_extraction": 29.5, + "eval_rougeL_for_task050_multirc_answerability_classification": 51.0, + "eval_rougeL_for_task102_commongen_data_to_text": 32.755, + "eval_rougeL_for_task1152_bard_word_analogy": 3.0, + "eval_rougeL_for_task1153_bard_word_analogy": 0.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 7.0, + "eval_rougeL_for_task1155_bard_word_analogy": 53.0, + "eval_rougeL_for_task1156_bard_word_analogy": 5.0, + "eval_rougeL_for_task1157_bard_word_analogy": 5.0, + "eval_rougeL_for_task1158_bard_word_analogy": 7.0, + "eval_rougeL_for_task1159_bard_word_analogy": 10.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 16.989, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 77.9598, + "eval_rougeL_for_task121_zest_question_rewriting": 41.8295, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 6.8666, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 38.4453, + "eval_rougeL_for_task1356_xlsum_title_generation": 8.3781, + "eval_rougeL_for_task1358_xlsum_title_generation": 27.6755, + "eval_rougeL_for_task1385_anli_textual_entailment": 30.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 37.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 38.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 48.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 15.3075, + "eval_rougeL_for_task1409_dart_data_to_text": 23.973, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 20.9466, + "eval_rougeL_for_task1439_doqa_answerability_classification": 47.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 52.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 29.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 23.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 16.3817, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.1256, + "eval_rougeL_for_task1562_zest_question_rewriting": 41.4883, + "eval_rougeL_for_task1586_scifact_title_generation": 18.6986, + "eval_rougeL_for_task1598_nyc_data_to_text": 25.3801, + "eval_rougeL_for_task1612_sick_textual_entailment": 34.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 76.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 73.5704, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 48.7274, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_rougeL_for_task1659_billsum_title_generation": 22.246, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 62.119, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 37.6085, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 36.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 37.0, + "eval_rougeL_for_task219_rocstories_title_generation": 8.5842, + "eval_rougeL_for_task220_rocstories_title_generation": 48.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 45.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 17.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 46.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 48.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 55.1167, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 33.3353, + "eval_rougeL_for_task288_gigaword_title_generation": 22.4004, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 39.6, + "eval_rougeL_for_task329_gap_coreference_resolution": 35.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 28.4667, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 71.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.3591, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 5.6667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 58.0945, + "eval_rougeL_for_task418_persent_title_generation": 14.919, + "eval_rougeL_for_task442_com_qa_question_rewriting": 61.0318, + "eval_rougeL_for_task500_scruples_title_generation": 12.9368, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 29.2089, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 47.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 6.6963, + "eval_rougeL_for_task602_wikitext_title_generation": 5.3674, + "eval_rougeL_for_task613_liar_keyword_tagging": 30.5667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 26.6243, + "eval_rougeL_for_task619_ohsumed_title_generation": 24.7519, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 19.7605, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 48.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 24.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 49.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 82.2701, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 30.9607, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 74.352, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 62.7559, + "eval_rougeL_for_task677_ollie_data_to_text": 12.2447, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 16.3981, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 1.6245, + "eval_rougeL_for_task769_qed_title_generation": 69.2262, + "eval_rougeL_for_task827_copa_cause_effect_classification": 48.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 37.8524, + "eval_rougeL_for_task892_gap_coreference_resolution": 52.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 34.3333, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_rougeL_for_task957_e2e_data_to_text": 35.9155, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 42.4861, + "eval_rougeL_for_title_generation": 21.0127, + "eval_rougeL_for_word_analogy": 11.3333, + "eval_runtime": 367.1345, + "eval_samples_per_second": 32.44, + "eval_steps_per_second": 1.016, + "step": 2500 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 1.8527, + "step": 3000 + }, + { + "epoch": 0.69, + "eval_exact_match": 25.3065, + "eval_exact_match_for_answerability_classification": 49.6154, + "eval_exact_match_for_cause_effect_classification": 35.5714, + "eval_exact_match_for_coreference_resolution": 32.3571, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 35.8571, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 34.4, + "eval_exact_match_for_overlap_extraction": 11.5, + "eval_exact_match_for_question_rewriting": 1.0909, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 34.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 27.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 23.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 2.0, + "eval_exact_match_for_task1153_bard_word_analogy": 0.0, + "eval_exact_match_for_task1154_bard_word_analogy": 7.0, + "eval_exact_match_for_task1155_bard_word_analogy": 49.0, + "eval_exact_match_for_task1156_bard_word_analogy": 8.0, + "eval_exact_match_for_task1157_bard_word_analogy": 8.0, + "eval_exact_match_for_task1158_bard_word_analogy": 5.0, + "eval_exact_match_for_task1159_bard_word_analogy": 10.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 41.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 28.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 41.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 49.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 32.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 54.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 18.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 35.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 4.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 47.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 46.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 46.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 40.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 32.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 31.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 4.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 51.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 18.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 4.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 35.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 54.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 74.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 27.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 52.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 37.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 29.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 52.0, + "eval_exact_match_for_textual_entailment": 41.5, + "eval_exact_match_for_title_generation": 6.1099, + "eval_exact_match_for_word_analogy": 11.125, + "eval_f1": 39.7789, + "eval_f1_for_answerability_classification": 52.1795, + "eval_f1_for_cause_effect_classification": 53.8615, + "eval_f1_for_coreference_resolution": 41.249, + "eval_f1_for_data_to_text": 29.8989, + "eval_f1_for_dialogue_act_recognition": 39.4286, + "eval_f1_for_grammar_error_correction": 52.8872, + "eval_f1_for_keyword_tagging": 48.6844, + "eval_f1_for_overlap_extraction": 29.6597, + "eval_f1_for_question_rewriting": 63.2423, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 35.3333, + "eval_f1_for_task034_winogrande_question_rewriting": 82.5384, + "eval_f1_for_task035_winogrande_question_rewriting": 78.0349, + "eval_f1_for_task036_qasc_keyword_tagging": 67.4918, + "eval_f1_for_task039_qasc_overlap_extraction": 26.3333, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 32.3958, + "eval_f1_for_task1152_bard_word_analogy": 2.0, + "eval_f1_for_task1153_bard_word_analogy": 0.6667, + "eval_f1_for_task1154_bard_word_analogy": 7.0, + "eval_f1_for_task1155_bard_word_analogy": 49.0, + "eval_f1_for_task1156_bard_word_analogy": 8.0, + "eval_f1_for_task1157_bard_word_analogy": 8.0, + "eval_f1_for_task1158_bard_word_analogy": 5.0, + "eval_f1_for_task1159_bard_word_analogy": 10.0, + "eval_f1_for_task1161_coda_19_title_generation": 17.282, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 78.637, + "eval_f1_for_task121_zest_question_rewriting": 45.2908, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 6.9198, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 32.62, + "eval_f1_for_task1356_xlsum_title_generation": 9.7026, + "eval_f1_for_task1358_xlsum_title_generation": 27.7494, + "eval_f1_for_task1385_anli_textual_entailment": 32.0, + "eval_f1_for_task1386_anli_textual_entailment": 41.0, + "eval_f1_for_task1387_anli_textual_entailment": 28.0, + "eval_f1_for_task1388_cb_textual_entailment": 41.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 49.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 17.7914, + "eval_f1_for_task1409_dart_data_to_text": 30.8386, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 22.6922, + "eval_f1_for_task1439_doqa_answerability_classification": 49.0, + "eval_f1_for_task1442_doqa_answerability_classification": 53.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 17.9573, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.0822, + "eval_f1_for_task1562_zest_question_rewriting": 44.0546, + "eval_f1_for_task1586_scifact_title_generation": 21.3867, + "eval_f1_for_task1598_nyc_data_to_text": 27.8568, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 32.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 74.9715, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 52.6964, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 54.0, + "eval_f1_for_task1659_billsum_title_generation": 25.0409, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 60.7143, + "eval_f1_for_task1728_web_nlg_data_to_text": 25.2617, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 35.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 7.3278, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_f1_for_task232_iirc_answerability_classification": 47.0, + "eval_f1_for_task233_iirc_answerability_classification": 46.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 46.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 55.4548, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 32.9861, + "eval_f1_for_task288_gigaword_title_generation": 24.1168, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 42.3667, + "eval_f1_for_task329_gap_coreference_resolution": 32.0, + "eval_f1_for_task330_gap_coreference_resolution": 37.2667, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 32.8079, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 4.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 64.2303, + "eval_f1_for_task418_persent_title_generation": 15.5453, + "eval_f1_for_task442_com_qa_question_rewriting": 56.77, + "eval_f1_for_task500_scruples_title_generation": 12.1154, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 28.6372, + "eval_f1_for_task520_aquamuse_answerability_classification": 51.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 6.6273, + "eval_f1_for_task602_wikitext_title_generation": 6.9967, + "eval_f1_for_task613_liar_keyword_tagging": 19.8333, + "eval_f1_for_task614_glucose_cause_effect_classification": 28.556, + "eval_f1_for_task619_ohsumed_title_generation": 25.2537, + "eval_f1_for_task620_ohsumed_keyword_tagging": 20.7903, + "eval_f1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 35.0, + "eval_f1_for_task642_e_snli_textual_entailment": 54.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 86.3063, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 38.3649, + "eval_f1_for_task670_ambigqa_question_rewriting": 75.9893, + "eval_f1_for_task671_ambigqa_question_rewriting": 62.528, + "eval_f1_for_task677_ollie_data_to_text": 14.0078, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 19.6918, + "eval_f1_for_task760_msr_sqa_data_to_text": 0.9323, + "eval_f1_for_task769_qed_title_generation": 70.7797, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 43.319, + "eval_f1_for_task892_gap_coreference_resolution": 49.0, + "eval_f1_for_task893_gap_coreference_resolution": 29.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_f1_for_task957_e2e_data_to_text": 45.8737, + "eval_f1_for_task970_sherliic_textual_entailment": 52.0, + "eval_f1_for_textual_entailment": 41.5, + "eval_f1_for_title_generation": 21.9737, + "eval_f1_for_word_analogy": 11.2083, + "eval_gen_len": 11.0552, + "eval_global_step": 3000, + "eval_loss": 1.730137825012207, + "eval_rouge1": 41.367, + "eval_rouge1_for_answerability_classification": 52.1795, + "eval_rouge1_for_cause_effect_classification": 54.1971, + "eval_rouge1_for_coreference_resolution": 41.5389, + "eval_rouge1_for_data_to_text": 34.596, + "eval_rouge1_for_dialogue_act_recognition": 41.3333, + "eval_rouge1_for_grammar_error_correction": 60.7647, + "eval_rouge1_for_keyword_tagging": 52.6015, + "eval_rouge1_for_overlap_extraction": 32.8642, + "eval_rouge1_for_question_rewriting": 64.8708, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 37.3333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 82.6386, + "eval_rouge1_for_task035_winogrande_question_rewriting": 78.7668, + "eval_rouge1_for_task036_qasc_keyword_tagging": 70.1918, + "eval_rouge1_for_task039_qasc_overlap_extraction": 31.6667, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 43.7232, + "eval_rouge1_for_task1152_bard_word_analogy": 2.0, + "eval_rouge1_for_task1153_bard_word_analogy": 0.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 7.0, + "eval_rouge1_for_task1155_bard_word_analogy": 49.0, + "eval_rouge1_for_task1156_bard_word_analogy": 8.0, + "eval_rouge1_for_task1157_bard_word_analogy": 8.0, + "eval_rouge1_for_task1158_bard_word_analogy": 5.0, + "eval_rouge1_for_task1159_bard_word_analogy": 10.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 19.2683, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.03, + "eval_rouge1_for_task121_zest_question_rewriting": 47.35, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 7.0318, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 35.177, + "eval_rouge1_for_task1356_xlsum_title_generation": 10.961, + "eval_rouge1_for_task1358_xlsum_title_generation": 32.3043, + "eval_rouge1_for_task1385_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 41.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 28.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 49.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 18.0265, + "eval_rouge1_for_task1409_dart_data_to_text": 32.2626, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.3267, + "eval_rouge1_for_task1439_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 19.5297, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.2026, + "eval_rouge1_for_task1562_zest_question_rewriting": 47.117, + "eval_rouge1_for_task1586_scifact_title_generation": 24.0674, + "eval_rouge1_for_task1598_nyc_data_to_text": 30.6665, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 75.9556, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 53.3675, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 54.0, + "eval_rouge1_for_task1659_billsum_title_generation": 26.7951, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 60.7143, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 45.8083, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 35.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 9.4278, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 47.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 46.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 46.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 55.8, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 34.0617, + "eval_rouge1_for_task288_gigaword_title_generation": 26.7168, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 42.7667, + "eval_rouge1_for_task329_gap_coreference_resolution": 32.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 37.2667, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.4973, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 5.6667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 65.6846, + "eval_rouge1_for_task418_persent_title_generation": 17.7056, + "eval_rouge1_for_task442_com_qa_question_rewriting": 61.3431, + "eval_rouge1_for_task500_scruples_title_generation": 13.3272, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 28.7287, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 51.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 7.4939, + "eval_rouge1_for_task602_wikitext_title_generation": 7.1573, + "eval_rouge1_for_task613_liar_keyword_tagging": 34.5, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 30.2158, + "eval_rouge1_for_task619_ohsumed_title_generation": 27.0072, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 22.2237, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 35.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 54.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 87.0921, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 38.5449, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 77.0124, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 63.5035, + "eval_rouge1_for_task677_ollie_data_to_text": 15.0799, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 20.4106, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 1.539, + "eval_rouge1_for_task769_qed_title_generation": 70.819, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 43.4524, + "eval_rouge1_for_task892_gap_coreference_resolution": 49.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 29.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_rouge1_for_task957_e2e_data_to_text": 46.4287, + "eval_rouge1_for_task970_sherliic_textual_entailment": 52.0, + "eval_rouge1_for_textual_entailment": 43.3889, + "eval_rouge1_for_title_generation": 23.4084, + "eval_rouge1_for_word_analogy": 11.2083, + "eval_rougeL": 40.3228, + "eval_rougeL_for_answerability_classification": 52.1795, + "eval_rougeL_for_cause_effect_classification": 53.4787, + "eval_rougeL_for_coreference_resolution": 41.5389, + "eval_rougeL_for_data_to_text": 29.3649, + "eval_rougeL_for_dialogue_act_recognition": 41.3333, + "eval_rougeL_for_grammar_error_correction": 59.6283, + "eval_rougeL_for_keyword_tagging": 52.2137, + "eval_rougeL_for_overlap_extraction": 32.3764, + "eval_rougeL_for_question_rewriting": 61.522, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 37.3333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 82.6386, + "eval_rougeL_for_task035_winogrande_question_rewriting": 78.0277, + "eval_rougeL_for_task036_qasc_keyword_tagging": 68.2529, + "eval_rougeL_for_task039_qasc_overlap_extraction": 31.6667, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 35.8985, + "eval_rougeL_for_task1152_bard_word_analogy": 2.0, + "eval_rougeL_for_task1153_bard_word_analogy": 0.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 7.0, + "eval_rougeL_for_task1155_bard_word_analogy": 49.0, + "eval_rougeL_for_task1156_bard_word_analogy": 8.0, + "eval_rougeL_for_task1157_bard_word_analogy": 8.0, + "eval_rougeL_for_task1158_bard_word_analogy": 5.0, + "eval_rougeL_for_task1159_bard_word_analogy": 10.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 16.1505, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.0992, + "eval_rougeL_for_task121_zest_question_rewriting": 41.6065, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 6.8977, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 32.7806, + "eval_rougeL_for_task1356_xlsum_title_generation": 9.2172, + "eval_rougeL_for_task1358_xlsum_title_generation": 27.1174, + "eval_rougeL_for_task1385_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 41.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 28.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 49.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 16.1847, + "eval_rougeL_for_task1409_dart_data_to_text": 28.3679, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 33.9716, + "eval_rougeL_for_task1439_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 18.8411, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.2851, + "eval_rougeL_for_task1562_zest_question_rewriting": 39.2662, + "eval_rougeL_for_task1586_scifact_title_generation": 19.5095, + "eval_rougeL_for_task1598_nyc_data_to_text": 22.4249, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 74.2265, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 49.0936, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 54.0, + "eval_rougeL_for_task1659_billsum_title_generation": 21.8281, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 60.7143, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 39.8846, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 35.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 9.4278, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 47.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 46.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 46.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 55.8, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 33.0862, + "eval_rougeL_for_task288_gigaword_title_generation": 22.573, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 42.7667, + "eval_rougeL_for_task329_gap_coreference_resolution": 32.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 37.2667, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.351, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 5.6667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 54.4885, + "eval_rougeL_for_task418_persent_title_generation": 15.3426, + "eval_rougeL_for_task442_com_qa_question_rewriting": 58.1648, + "eval_rougeL_for_task500_scruples_title_generation": 11.9168, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 28.1328, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 51.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 7.4939, + "eval_rougeL_for_task602_wikitext_title_generation": 7.032, + "eval_rougeL_for_task613_liar_keyword_tagging": 34.5, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 27.3333, + "eval_rougeL_for_task619_ohsumed_title_generation": 24.7006, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 22.2237, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 35.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 54.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 87.0921, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 38.5449, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 75.3249, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 62.1187, + "eval_rougeL_for_task677_ollie_data_to_text": 12.5091, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 17.6325, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 1.4692, + "eval_rougeL_for_task769_qed_title_generation": 70.819, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 43.4524, + "eval_rougeL_for_task892_gap_coreference_resolution": 49.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 29.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_rougeL_for_task957_e2e_data_to_text": 37.8089, + "eval_rougeL_for_task970_sherliic_textual_entailment": 52.0, + "eval_rougeL_for_textual_entailment": 43.3889, + "eval_rougeL_for_title_generation": 21.497, + "eval_rougeL_for_word_analogy": 11.2083, + "eval_runtime": 416.4637, + "eval_samples_per_second": 28.598, + "eval_steps_per_second": 0.896, + "step": 3000 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 1.8206, + "step": 3500 + }, + { + "epoch": 0.8, + "eval_exact_match": 25.1134, + "eval_exact_match_for_answerability_classification": 50.5385, + "eval_exact_match_for_cause_effect_classification": 36.0, + "eval_exact_match_for_coreference_resolution": 32.5, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 36.4286, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 32.8, + "eval_exact_match_for_overlap_extraction": 11.0, + "eval_exact_match_for_question_rewriting": 0.9091, + "eval_exact_match_for_task020_mctaco_answerability_classification": 53.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 34.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 26.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 22.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 54.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 3.0, + "eval_exact_match_for_task1153_bard_word_analogy": 0.0, + "eval_exact_match_for_task1154_bard_word_analogy": 7.0, + "eval_exact_match_for_task1155_bard_word_analogy": 51.0, + "eval_exact_match_for_task1156_bard_word_analogy": 6.0, + "eval_exact_match_for_task1157_bard_word_analogy": 8.0, + "eval_exact_match_for_task1158_bard_word_analogy": 5.0, + "eval_exact_match_for_task1159_bard_word_analogy": 10.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 24.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 35.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 54.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 54.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 54.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 31.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 27.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 47.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 18.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 35.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 40.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 4.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 46.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 48.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 41.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 31.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 29.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 47.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 2.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 49.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 18.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 5.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 45.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 70.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 31.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 52.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 52.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 52.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 50.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 28.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 52.0, + "eval_exact_match_for_textual_entailment": 40.0, + "eval_exact_match_for_title_generation": 6.222, + "eval_exact_match_for_word_analogy": 11.25, + "eval_f1": 39.5548, + "eval_f1_for_answerability_classification": 53.1026, + "eval_f1_for_cause_effect_classification": 54.0367, + "eval_f1_for_coreference_resolution": 41.2994, + "eval_f1_for_data_to_text": 29.6113, + "eval_f1_for_dialogue_act_recognition": 40.0, + "eval_f1_for_grammar_error_correction": 50.7919, + "eval_f1_for_keyword_tagging": 46.9932, + "eval_f1_for_overlap_extraction": 29.0766, + "eval_f1_for_question_rewriting": 64.4201, + "eval_f1_for_task020_mctaco_answerability_classification": 53.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 36.0, + "eval_f1_for_task034_winogrande_question_rewriting": 79.3435, + "eval_f1_for_task035_winogrande_question_rewriting": 79.9605, + "eval_f1_for_task036_qasc_keyword_tagging": 66.247, + "eval_f1_for_task039_qasc_overlap_extraction": 24.6667, + "eval_f1_for_task050_multirc_answerability_classification": 54.0, + "eval_f1_for_task102_commongen_data_to_text": 26.029, + "eval_f1_for_task1152_bard_word_analogy": 3.0, + "eval_f1_for_task1153_bard_word_analogy": 0.0, + "eval_f1_for_task1154_bard_word_analogy": 7.0, + "eval_f1_for_task1155_bard_word_analogy": 51.0, + "eval_f1_for_task1156_bard_word_analogy": 6.0, + "eval_f1_for_task1157_bard_word_analogy": 8.0, + "eval_f1_for_task1158_bard_word_analogy": 5.0, + "eval_f1_for_task1159_bard_word_analogy": 10.0, + "eval_f1_for_task1161_coda_19_title_generation": 18.0828, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.1785, + "eval_f1_for_task121_zest_question_rewriting": 46.6181, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 6.3958, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 36.1199, + "eval_f1_for_task1356_xlsum_title_generation": 9.4337, + "eval_f1_for_task1358_xlsum_title_generation": 28.3279, + "eval_f1_for_task1385_anli_textual_entailment": 24.0, + "eval_f1_for_task1386_anli_textual_entailment": 35.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 54.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 18.9969, + "eval_f1_for_task1409_dart_data_to_text": 32.1207, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 18.3059, + "eval_f1_for_task1439_doqa_answerability_classification": 54.0, + "eval_f1_for_task1442_doqa_answerability_classification": 54.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 31.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 17.4129, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.2779, + "eval_f1_for_task1562_zest_question_rewriting": 46.0453, + "eval_f1_for_task1586_scifact_title_generation": 21.6584, + "eval_f1_for_task1598_nyc_data_to_text": 27.741, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 27.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 76.7394, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 47.0, + "eval_f1_for_task1631_open_pi_data_to_text": 55.2759, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_f1_for_task1659_billsum_title_generation": 23.9751, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 60.6758, + "eval_f1_for_task1728_web_nlg_data_to_text": 26.0578, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 35.0, + "eval_f1_for_task201_multinli_textual_entailment": 40.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 7.0667, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 46.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 48.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 56.1167, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 33.4865, + "eval_f1_for_task288_gigaword_title_generation": 23.224, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 37.7, + "eval_f1_for_task329_gap_coreference_resolution": 31.0, + "eval_f1_for_task330_gap_coreference_resolution": 35.6667, + "eval_f1_for_task349_squad2.0_answerability_classification": 47.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 32.4113, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 3.3333, + "eval_f1_for_task402_grailqa_question_rewriting": 66.7349, + "eval_f1_for_task418_persent_title_generation": 14.7663, + "eval_f1_for_task442_com_qa_question_rewriting": 58.264, + "eval_f1_for_task500_scruples_title_generation": 9.8322, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 28.6948, + "eval_f1_for_task520_aquamuse_answerability_classification": 49.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 5.3907, + "eval_f1_for_task602_wikitext_title_generation": 6.3378, + "eval_f1_for_task613_liar_keyword_tagging": 19.8333, + "eval_f1_for_task614_glucose_cause_effect_classification": 27.1789, + "eval_f1_for_task619_ohsumed_title_generation": 26.0943, + "eval_f1_for_task620_ohsumed_keyword_tagging": 19.7514, + "eval_f1_for_task623_ohsumed_keyword_tagging": 45.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 84.1345, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 42.4135, + "eval_f1_for_task670_ambigqa_question_rewriting": 76.1305, + "eval_f1_for_task671_ambigqa_question_rewriting": 63.4862, + "eval_f1_for_task677_ollie_data_to_text": 13.0299, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 20.7485, + "eval_f1_for_task760_msr_sqa_data_to_text": 0.9637, + "eval_f1_for_task769_qed_title_generation": 69.8535, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 52.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 52.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 41.2857, + "eval_f1_for_task892_gap_coreference_resolution": 50.0, + "eval_f1_for_task893_gap_coreference_resolution": 28.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_f1_for_task957_e2e_data_to_text": 45.0872, + "eval_f1_for_task970_sherliic_textual_entailment": 52.0, + "eval_f1_for_textual_entailment": 40.0, + "eval_f1_for_title_generation": 21.6525, + "eval_f1_for_word_analogy": 11.25, + "eval_gen_len": 10.6624, + "eval_global_step": 3500, + "eval_loss": 1.7444958686828613, + "eval_rouge1": 41.2312, + "eval_rouge1_for_answerability_classification": 53.1026, + "eval_rouge1_for_cause_effect_classification": 54.3631, + "eval_rouge1_for_coreference_resolution": 41.6281, + "eval_rouge1_for_data_to_text": 34.7304, + "eval_rouge1_for_dialogue_act_recognition": 41.9048, + "eval_rouge1_for_grammar_error_correction": 60.8867, + "eval_rouge1_for_keyword_tagging": 50.7954, + "eval_rouge1_for_overlap_extraction": 31.6025, + "eval_rouge1_for_question_rewriting": 66.034, + "eval_rouge1_for_task020_mctaco_answerability_classification": 53.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 38.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 79.4289, + "eval_rouge1_for_task035_winogrande_question_rewriting": 80.7876, + "eval_rouge1_for_task036_qasc_keyword_tagging": 67.9827, + "eval_rouge1_for_task039_qasc_overlap_extraction": 28.3333, + "eval_rouge1_for_task050_multirc_answerability_classification": 54.0, + "eval_rouge1_for_task102_commongen_data_to_text": 37.0803, + "eval_rouge1_for_task1152_bard_word_analogy": 3.0, + "eval_rouge1_for_task1153_bard_word_analogy": 0.0, + "eval_rouge1_for_task1154_bard_word_analogy": 7.0, + "eval_rouge1_for_task1155_bard_word_analogy": 51.0, + "eval_rouge1_for_task1156_bard_word_analogy": 6.0, + "eval_rouge1_for_task1157_bard_word_analogy": 8.0, + "eval_rouge1_for_task1158_bard_word_analogy": 5.0, + "eval_rouge1_for_task1159_bard_word_analogy": 10.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 20.1961, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.4933, + "eval_rouge1_for_task121_zest_question_rewriting": 48.4922, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 6.4894, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 39.5205, + "eval_rouge1_for_task1356_xlsum_title_generation": 11.0635, + "eval_rouge1_for_task1358_xlsum_title_generation": 32.8807, + "eval_rouge1_for_task1385_anli_textual_entailment": 24.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 35.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 54.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 19.1532, + "eval_rouge1_for_task1409_dart_data_to_text": 33.4487, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.236, + "eval_rouge1_for_task1439_doqa_answerability_classification": 54.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 54.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 31.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 18.7752, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.5373, + "eval_rouge1_for_task1562_zest_question_rewriting": 49.3161, + "eval_rouge1_for_task1586_scifact_title_generation": 24.5127, + "eval_rouge1_for_task1598_nyc_data_to_text": 30.9136, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 75.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 77.2167, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 47.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 55.7199, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_rouge1_for_task1659_billsum_title_generation": 25.5813, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 60.6758, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 49.5482, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 35.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 40.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 9.1667, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 46.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 48.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 56.2833, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 34.8716, + "eval_rouge1_for_task288_gigaword_title_generation": 25.8723, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 38.1, + "eval_rouge1_for_task329_gap_coreference_resolution": 31.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 35.6667, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 47.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.1135, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 4.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 68.2239, + "eval_rouge1_for_task418_persent_title_generation": 16.7977, + "eval_rouge1_for_task442_com_qa_question_rewriting": 62.4506, + "eval_rouge1_for_task500_scruples_title_generation": 10.9541, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 28.8021, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 49.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 6.3431, + "eval_rouge1_for_task602_wikitext_title_generation": 6.3631, + "eval_rouge1_for_task613_liar_keyword_tagging": 34.1667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 28.7613, + "eval_rouge1_for_task619_ohsumed_title_generation": 28.1778, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 21.9073, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 45.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 84.9202, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 42.649, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 77.1557, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 64.2888, + "eval_rouge1_for_task677_ollie_data_to_text": 14.2159, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 21.5847, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 1.4693, + "eval_rouge1_for_task769_qed_title_generation": 70.2261, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 52.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 52.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 41.419, + "eval_rouge1_for_task892_gap_coreference_resolution": 50.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 28.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_rouge1_for_task957_e2e_data_to_text": 46.4117, + "eval_rouge1_for_task970_sherliic_textual_entailment": 52.0, + "eval_rouge1_for_textual_entailment": 42.0278, + "eval_rouge1_for_title_generation": 23.1372, + "eval_rouge1_for_word_analogy": 11.25, + "eval_rougeL": 40.1716, + "eval_rougeL_for_answerability_classification": 53.1026, + "eval_rougeL_for_cause_effect_classification": 53.7391, + "eval_rougeL_for_coreference_resolution": 41.6281, + "eval_rougeL_for_data_to_text": 29.4465, + "eval_rougeL_for_dialogue_act_recognition": 41.9048, + "eval_rougeL_for_grammar_error_correction": 59.9437, + "eval_rougeL_for_keyword_tagging": 50.4783, + "eval_rougeL_for_overlap_extraction": 31.0931, + "eval_rougeL_for_question_rewriting": 62.6061, + "eval_rougeL_for_task020_mctaco_answerability_classification": 53.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 38.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 79.4289, + "eval_rougeL_for_task035_winogrande_question_rewriting": 80.3116, + "eval_rougeL_for_task036_qasc_keyword_tagging": 66.497, + "eval_rougeL_for_task039_qasc_overlap_extraction": 28.3333, + "eval_rougeL_for_task050_multirc_answerability_classification": 54.0, + "eval_rougeL_for_task102_commongen_data_to_text": 31.0454, + "eval_rougeL_for_task1152_bard_word_analogy": 3.0, + "eval_rougeL_for_task1153_bard_word_analogy": 0.0, + "eval_rougeL_for_task1154_bard_word_analogy": 7.0, + "eval_rougeL_for_task1155_bard_word_analogy": 51.0, + "eval_rougeL_for_task1156_bard_word_analogy": 6.0, + "eval_rougeL_for_task1157_bard_word_analogy": 8.0, + "eval_rougeL_for_task1158_bard_word_analogy": 5.0, + "eval_rougeL_for_task1159_bard_word_analogy": 10.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 16.5508, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.5625, + "eval_rougeL_for_task121_zest_question_rewriting": 42.8989, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 6.4354, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 36.9664, + "eval_rougeL_for_task1356_xlsum_title_generation": 9.173, + "eval_rougeL_for_task1358_xlsum_title_generation": 27.8089, + "eval_rougeL_for_task1385_anli_textual_entailment": 24.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 35.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 54.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 16.8705, + "eval_rougeL_for_task1409_dart_data_to_text": 29.0511, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 34.2535, + "eval_rougeL_for_task1439_doqa_answerability_classification": 54.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 54.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 31.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 18.1359, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.6339, + "eval_rougeL_for_task1562_zest_question_rewriting": 41.1269, + "eval_rougeL_for_task1586_scifact_title_generation": 20.0361, + "eval_rougeL_for_task1598_nyc_data_to_text": 23.9811, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 75.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 75.3845, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 47.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 49.8509, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_rougeL_for_task1659_billsum_title_generation": 21.093, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 60.6758, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 43.8832, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 35.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 40.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 9.1667, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 46.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 48.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 56.2833, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 33.8528, + "eval_rougeL_for_task288_gigaword_title_generation": 21.8407, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 38.1, + "eval_rougeL_for_task329_gap_coreference_resolution": 31.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 35.6667, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 47.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.5113, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 4.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 55.9286, + "eval_rougeL_for_task418_persent_title_generation": 14.2688, + "eval_rougeL_for_task442_com_qa_question_rewriting": 59.22, + "eval_rougeL_for_task500_scruples_title_generation": 9.7604, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 28.2997, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 49.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 6.1765, + "eval_rougeL_for_task602_wikitext_title_generation": 6.3358, + "eval_rougeL_for_task613_liar_keyword_tagging": 34.1667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 25.9956, + "eval_rougeL_for_task619_ohsumed_title_generation": 24.4112, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 21.8073, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 45.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 84.9202, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 42.649, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 75.7385, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 63.0998, + "eval_rougeL_for_task677_ollie_data_to_text": 11.8424, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 18.0667, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 1.4438, + "eval_rougeL_for_task769_qed_title_generation": 70.2261, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 52.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 52.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 41.419, + "eval_rougeL_for_task892_gap_coreference_resolution": 50.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 28.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_rougeL_for_task957_e2e_data_to_text": 36.328, + "eval_rougeL_for_task970_sherliic_textual_entailment": 52.0, + "eval_rougeL_for_textual_entailment": 42.0278, + "eval_rougeL_for_title_generation": 21.1195, + "eval_rougeL_for_word_analogy": 11.25, + "eval_runtime": 373.9586, + "eval_samples_per_second": 31.848, + "eval_steps_per_second": 0.997, + "step": 3500 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 1.771, + "step": 4000 + }, + { + "epoch": 0.91, + "eval_exact_match": 24.9454, + "eval_exact_match_for_answerability_classification": 48.9231, + "eval_exact_match_for_cause_effect_classification": 35.4286, + "eval_exact_match_for_coreference_resolution": 32.7143, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 35.7143, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 34.0, + "eval_exact_match_for_overlap_extraction": 10.0, + "eval_exact_match_for_question_rewriting": 0.7273, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 38.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 27.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 20.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 44.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 3.0, + "eval_exact_match_for_task1153_bard_word_analogy": 0.0, + "eval_exact_match_for_task1154_bard_word_analogy": 7.0, + "eval_exact_match_for_task1155_bard_word_analogy": 51.0, + "eval_exact_match_for_task1156_bard_word_analogy": 6.0, + "eval_exact_match_for_task1157_bard_word_analogy": 9.0, + "eval_exact_match_for_task1158_bard_word_analogy": 5.0, + "eval_exact_match_for_task1159_bard_word_analogy": 9.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 48.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 51.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 24.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 46.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 18.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 35.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task219_rocstories_title_generation": 3.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 43.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 46.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 45.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 36.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 30.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 51.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 2.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 49.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 19.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 4.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 25.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 51.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 71.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 32.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 56.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 30.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 21.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 40.125, + "eval_exact_match_for_title_generation": 6.3341, + "eval_exact_match_for_word_analogy": 11.25, + "eval_f1": 39.6234, + "eval_f1_for_answerability_classification": 51.4872, + "eval_f1_for_cause_effect_classification": 53.5352, + "eval_f1_for_coreference_resolution": 41.2021, + "eval_f1_for_data_to_text": 29.1206, + "eval_f1_for_dialogue_act_recognition": 39.2857, + "eval_f1_for_grammar_error_correction": 55.1468, + "eval_f1_for_keyword_tagging": 48.3645, + "eval_f1_for_overlap_extraction": 28.7759, + "eval_f1_for_question_rewriting": 64.5182, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 40.0, + "eval_f1_for_task034_winogrande_question_rewriting": 88.2757, + "eval_f1_for_task035_winogrande_question_rewriting": 78.7735, + "eval_f1_for_task036_qasc_keyword_tagging": 67.4426, + "eval_f1_for_task039_qasc_overlap_extraction": 24.5, + "eval_f1_for_task050_multirc_answerability_classification": 44.0, + "eval_f1_for_task102_commongen_data_to_text": 27.3155, + "eval_f1_for_task1152_bard_word_analogy": 3.0, + "eval_f1_for_task1153_bard_word_analogy": 0.0, + "eval_f1_for_task1154_bard_word_analogy": 7.0, + "eval_f1_for_task1155_bard_word_analogy": 51.0, + "eval_f1_for_task1156_bard_word_analogy": 6.0, + "eval_f1_for_task1157_bard_word_analogy": 9.0, + "eval_f1_for_task1158_bard_word_analogy": 5.0, + "eval_f1_for_task1159_bard_word_analogy": 9.0, + "eval_f1_for_task1161_coda_19_title_generation": 20.5133, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.0849, + "eval_f1_for_task121_zest_question_rewriting": 45.5962, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 5.6964, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 33.9435, + "eval_f1_for_task1356_xlsum_title_generation": 10.4919, + "eval_f1_for_task1358_xlsum_title_generation": 29.5523, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 48.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 18.4981, + "eval_f1_for_task1409_dart_data_to_text": 30.5263, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 27.3332, + "eval_f1_for_task1439_doqa_answerability_classification": 50.0, + "eval_f1_for_task1442_doqa_answerability_classification": 51.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 24.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 51.0, + "eval_f1_for_task1540_peer_read_title_generation": 18.5715, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 82.9603, + "eval_f1_for_task1562_zest_question_rewriting": 47.0898, + "eval_f1_for_task1586_scifact_title_generation": 24.0088, + "eval_f1_for_task1598_nyc_data_to_text": 24.7449, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 75.6373, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 46.0, + "eval_f1_for_task1631_open_pi_data_to_text": 57.4851, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_f1_for_task1659_billsum_title_generation": 25.7325, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 61.7284, + "eval_f1_for_task1728_web_nlg_data_to_text": 23.5001, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 35.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 34.0, + "eval_f1_for_task219_rocstories_title_generation": 6.4667, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 43.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 46.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 56.1167, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 33.0519, + "eval_f1_for_task288_gigaword_title_generation": 23.8239, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 40.2, + "eval_f1_for_task329_gap_coreference_resolution": 36.0, + "eval_f1_for_task330_gap_coreference_resolution": 36.4333, + "eval_f1_for_task349_squad2.0_answerability_classification": 51.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 32.7919, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 3.3333, + "eval_f1_for_task402_grailqa_question_rewriting": 64.5236, + "eval_f1_for_task418_persent_title_generation": 14.8819, + "eval_f1_for_task442_com_qa_question_rewriting": 59.6237, + "eval_f1_for_task500_scruples_title_generation": 12.8903, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 29.463, + "eval_f1_for_task520_aquamuse_answerability_classification": 49.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 6.5758, + "eval_f1_for_task602_wikitext_title_generation": 7.7848, + "eval_f1_for_task613_liar_keyword_tagging": 20.8333, + "eval_f1_for_task614_glucose_cause_effect_classification": 27.2879, + "eval_f1_for_task619_ohsumed_title_generation": 28.2476, + "eval_f1_for_task620_ohsumed_keyword_tagging": 18.612, + "eval_f1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 25.0, + "eval_f1_for_task642_e_snli_textual_entailment": 51.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 85.9345, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 41.1982, + "eval_f1_for_task670_ambigqa_question_rewriting": 75.3469, + "eval_f1_for_task671_ambigqa_question_rewriting": 61.8051, + "eval_f1_for_task677_ollie_data_to_text": 12.243, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 22.1534, + "eval_f1_for_task760_msr_sqa_data_to_text": 1.229, + "eval_f1_for_task769_qed_title_generation": 74.726, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 36.819, + "eval_f1_for_task892_gap_coreference_resolution": 49.0, + "eval_f1_for_task893_gap_coreference_resolution": 21.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_f1_for_task957_e2e_data_to_text": 45.9035, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 40.125, + "eval_f1_for_title_generation": 23.0008, + "eval_f1_for_word_analogy": 11.25, + "eval_gen_len": 10.7625, + "eval_global_step": 4000, + "eval_loss": 1.7487825155258179, + "eval_rouge1": 41.2285, + "eval_rouge1_for_answerability_classification": 51.4872, + "eval_rouge1_for_cause_effect_classification": 53.9763, + "eval_rouge1_for_coreference_resolution": 41.5977, + "eval_rouge1_for_data_to_text": 33.9516, + "eval_rouge1_for_dialogue_act_recognition": 41.1905, + "eval_rouge1_for_grammar_error_correction": 62.5847, + "eval_rouge1_for_keyword_tagging": 52.3577, + "eval_rouge1_for_overlap_extraction": 31.5837, + "eval_rouge1_for_question_rewriting": 66.1185, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 42.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 88.3094, + "eval_rouge1_for_task035_winogrande_question_rewriting": 79.4465, + "eval_rouge1_for_task036_qasc_keyword_tagging": 69.3069, + "eval_rouge1_for_task039_qasc_overlap_extraction": 28.8333, + "eval_rouge1_for_task050_multirc_answerability_classification": 44.0, + "eval_rouge1_for_task102_commongen_data_to_text": 38.3468, + "eval_rouge1_for_task1152_bard_word_analogy": 3.0, + "eval_rouge1_for_task1153_bard_word_analogy": 0.0, + "eval_rouge1_for_task1154_bard_word_analogy": 7.0, + "eval_rouge1_for_task1155_bard_word_analogy": 51.0, + "eval_rouge1_for_task1156_bard_word_analogy": 6.0, + "eval_rouge1_for_task1157_bard_word_analogy": 9.0, + "eval_rouge1_for_task1158_bard_word_analogy": 5.0, + "eval_rouge1_for_task1159_bard_word_analogy": 9.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 22.681, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.6447, + "eval_rouge1_for_task121_zest_question_rewriting": 47.3476, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 5.7483, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 36.9272, + "eval_rouge1_for_task1356_xlsum_title_generation": 12.0235, + "eval_rouge1_for_task1358_xlsum_title_generation": 34.1514, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 48.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 18.6591, + "eval_rouge1_for_task1409_dart_data_to_text": 32.0037, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 38.8852, + "eval_rouge1_for_task1439_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 51.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 24.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 51.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 20.5362, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.2843, + "eval_rouge1_for_task1562_zest_question_rewriting": 50.3023, + "eval_rouge1_for_task1586_scifact_title_generation": 26.7233, + "eval_rouge1_for_task1598_nyc_data_to_text": 26.6923, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 76.7305, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 46.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 57.796, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_rouge1_for_task1659_billsum_title_generation": 27.4898, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 61.7284, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 46.8283, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 35.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task219_rocstories_title_generation": 8.3667, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 43.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 46.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 56.7833, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 34.3342, + "eval_rouge1_for_task288_gigaword_title_generation": 26.5482, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 40.6, + "eval_rouge1_for_task329_gap_coreference_resolution": 36.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 36.4333, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 51.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.518, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 5.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 65.7468, + "eval_rouge1_for_task418_persent_title_generation": 17.0081, + "eval_rouge1_for_task442_com_qa_question_rewriting": 63.5855, + "eval_rouge1_for_task500_scruples_title_generation": 14.0455, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 29.7815, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 49.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 7.3476, + "eval_rouge1_for_task602_wikitext_title_generation": 8.003, + "eval_rouge1_for_task613_liar_keyword_tagging": 36.0, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 29.6494, + "eval_rouge1_for_task619_ohsumed_title_generation": 30.196, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 20.7612, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 25.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 51.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 86.7202, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 41.4371, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 76.5232, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 62.7404, + "eval_rouge1_for_task677_ollie_data_to_text": 12.9568, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 22.8454, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 1.5778, + "eval_rouge1_for_task769_qed_title_generation": 74.6986, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 37.0524, + "eval_rouge1_for_task892_gap_coreference_resolution": 49.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 21.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rouge1_for_task957_e2e_data_to_text": 46.7466, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 41.9861, + "eval_rouge1_for_title_generation": 24.4907, + "eval_rouge1_for_word_analogy": 11.25, + "eval_rougeL": 40.1733, + "eval_rougeL_for_answerability_classification": 51.4872, + "eval_rougeL_for_cause_effect_classification": 53.2554, + "eval_rougeL_for_coreference_resolution": 41.5977, + "eval_rougeL_for_data_to_text": 28.8694, + "eval_rougeL_for_dialogue_act_recognition": 41.1905, + "eval_rougeL_for_grammar_error_correction": 61.4948, + "eval_rougeL_for_keyword_tagging": 51.9446, + "eval_rougeL_for_overlap_extraction": 31.0224, + "eval_rougeL_for_question_rewriting": 62.7995, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 42.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 88.3094, + "eval_rougeL_for_task035_winogrande_question_rewriting": 79.0924, + "eval_rougeL_for_task036_qasc_keyword_tagging": 67.8022, + "eval_rougeL_for_task039_qasc_overlap_extraction": 28.8333, + "eval_rougeL_for_task050_multirc_answerability_classification": 44.0, + "eval_rougeL_for_task102_commongen_data_to_text": 31.7363, + "eval_rougeL_for_task1152_bard_word_analogy": 3.0, + "eval_rougeL_for_task1153_bard_word_analogy": 0.0, + "eval_rougeL_for_task1154_bard_word_analogy": 7.0, + "eval_rougeL_for_task1155_bard_word_analogy": 51.0, + "eval_rougeL_for_task1156_bard_word_analogy": 6.0, + "eval_rougeL_for_task1157_bard_word_analogy": 9.0, + "eval_rougeL_for_task1158_bard_word_analogy": 5.0, + "eval_rougeL_for_task1159_bard_word_analogy": 9.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 18.2125, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.7139, + "eval_rougeL_for_task121_zest_question_rewriting": 42.0365, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 5.6942, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 34.3149, + "eval_rougeL_for_task1356_xlsum_title_generation": 10.3622, + "eval_rougeL_for_task1358_xlsum_title_generation": 28.8565, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 48.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 16.2746, + "eval_rougeL_for_task1409_dart_data_to_text": 28.6439, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.6622, + "eval_rougeL_for_task1439_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 51.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 24.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 51.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 19.7836, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.3273, + "eval_rougeL_for_task1562_zest_question_rewriting": 42.1922, + "eval_rougeL_for_task1586_scifact_title_generation": 21.8434, + "eval_rougeL_for_task1598_nyc_data_to_text": 22.7345, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 75.5664, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 46.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 49.2647, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_rougeL_for_task1659_billsum_title_generation": 22.3573, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 61.7284, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 41.4038, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 35.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task219_rocstories_title_generation": 8.3667, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 43.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 46.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 56.7833, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 33.2114, + "eval_rougeL_for_task288_gigaword_title_generation": 22.4698, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 40.6, + "eval_rougeL_for_task329_gap_coreference_resolution": 36.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 36.4333, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 51.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.3292, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 5.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 54.5338, + "eval_rougeL_for_task418_persent_title_generation": 15.1559, + "eval_rougeL_for_task442_com_qa_question_rewriting": 59.9413, + "eval_rougeL_for_task500_scruples_title_generation": 12.7564, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 29.1639, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 49.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 7.3476, + "eval_rougeL_for_task602_wikitext_title_generation": 7.8777, + "eval_rougeL_for_task613_liar_keyword_tagging": 36.0, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 26.792, + "eval_rougeL_for_task619_ohsumed_title_generation": 27.0364, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 20.2006, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 25.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 51.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 86.7202, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 41.4371, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 74.8468, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 61.247, + "eval_rougeL_for_task677_ollie_data_to_text": 10.8729, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 19.4193, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 1.5187, + "eval_rougeL_for_task769_qed_title_generation": 74.6986, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 37.0524, + "eval_rougeL_for_task892_gap_coreference_resolution": 49.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 21.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rougeL_for_task957_e2e_data_to_text": 37.1354, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 41.9861, + "eval_rougeL_for_title_generation": 22.4295, + "eval_rougeL_for_word_analogy": 11.25, + "eval_runtime": 384.4624, + "eval_samples_per_second": 30.978, + "eval_steps_per_second": 0.97, + "step": 4000 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 1.7647, + "step": 4500 + }, + { + "epoch": 1.03, + "eval_exact_match": 24.6096, + "eval_exact_match_for_answerability_classification": 49.4615, + "eval_exact_match_for_cause_effect_classification": 34.8571, + "eval_exact_match_for_coreference_resolution": 33.3571, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 28.1429, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 33.8, + "eval_exact_match_for_overlap_extraction": 11.5, + "eval_exact_match_for_question_rewriting": 0.8182, + "eval_exact_match_for_task020_mctaco_answerability_classification": 51.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 38.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 20.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 23.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 2.0, + "eval_exact_match_for_task1153_bard_word_analogy": 0.0, + "eval_exact_match_for_task1154_bard_word_analogy": 7.0, + "eval_exact_match_for_task1155_bard_word_analogy": 51.0, + "eval_exact_match_for_task1156_bard_word_analogy": 6.0, + "eval_exact_match_for_task1157_bard_word_analogy": 11.0, + "eval_exact_match_for_task1158_bard_word_analogy": 5.0, + "eval_exact_match_for_task1159_bard_word_analogy": 9.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 49.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 53.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 54.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 48.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 49.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 55.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 16.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 35.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 36.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 2.0, + "eval_exact_match_for_task220_rocstories_title_generation": 51.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 45.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 44.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 51.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 1.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 34.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 37.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 45.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 8.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 3.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 51.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 19.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 5.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 28.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 75.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 25.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 57.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 46.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 45.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 40.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 48.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 24.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 43.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 39.9167, + "eval_exact_match_for_title_generation": 6.4462, + "eval_exact_match_for_word_analogy": 11.375, + "eval_f1": 38.97, + "eval_f1_for_answerability_classification": 51.9744, + "eval_f1_for_cause_effect_classification": 52.9311, + "eval_f1_for_coreference_resolution": 40.6821, + "eval_f1_for_data_to_text": 30.0923, + "eval_f1_for_dialogue_act_recognition": 28.9286, + "eval_f1_for_grammar_error_correction": 48.9414, + "eval_f1_for_keyword_tagging": 48.1797, + "eval_f1_for_overlap_extraction": 30.099, + "eval_f1_for_question_rewriting": 65.3774, + "eval_f1_for_task020_mctaco_answerability_classification": 51.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 39.3333, + "eval_f1_for_task034_winogrande_question_rewriting": 89.1998, + "eval_f1_for_task035_winogrande_question_rewriting": 78.7747, + "eval_f1_for_task036_qasc_keyword_tagging": 61.8227, + "eval_f1_for_task039_qasc_overlap_extraction": 27.6667, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 26.7473, + "eval_f1_for_task1152_bard_word_analogy": 2.0, + "eval_f1_for_task1153_bard_word_analogy": 0.6667, + "eval_f1_for_task1154_bard_word_analogy": 7.0, + "eval_f1_for_task1155_bard_word_analogy": 51.0, + "eval_f1_for_task1156_bard_word_analogy": 6.0, + "eval_f1_for_task1157_bard_word_analogy": 11.0, + "eval_f1_for_task1158_bard_word_analogy": 5.0, + "eval_f1_for_task1159_bard_word_analogy": 9.0, + "eval_f1_for_task1161_coda_19_title_generation": 20.106, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.9112, + "eval_f1_for_task121_zest_question_rewriting": 45.2174, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 8.0953, + "eval_f1_for_task1344_rte_textual_entailment": 49.0, + "eval_f1_for_task1345_qqp_question_rewriting": 37.4705, + "eval_f1_for_task1356_xlsum_title_generation": 9.674, + "eval_f1_for_task1358_xlsum_title_generation": 28.3293, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 53.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 54.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 48.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 19.6775, + "eval_f1_for_task1409_dart_data_to_text": 32.5203, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 14.6627, + "eval_f1_for_task1439_doqa_answerability_classification": 49.0, + "eval_f1_for_task1442_doqa_answerability_classification": 49.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 49.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_f1_for_task1540_peer_read_title_generation": 21.2874, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.2201, + "eval_f1_for_task1562_zest_question_rewriting": 45.8646, + "eval_f1_for_task1586_scifact_title_generation": 20.6813, + "eval_f1_for_task1598_nyc_data_to_text": 24.8515, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 77.818, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 60.2936, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 55.0, + "eval_f1_for_task1659_billsum_title_generation": 24.233, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 59.6017, + "eval_f1_for_task1728_web_nlg_data_to_text": 25.5484, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 35.0, + "eval_f1_for_task201_multinli_textual_entailment": 36.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 6.5778, + "eval_f1_for_task220_rocstories_title_generation": 51.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 45.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 57.5167, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 32.5314, + "eval_f1_for_task288_gigaword_title_generation": 23.3031, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 24.6, + "eval_f1_for_task329_gap_coreference_resolution": 34.0, + "eval_f1_for_task330_gap_coreference_resolution": 44.1, + "eval_f1_for_task349_squad2.0_answerability_classification": 45.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 13.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 32.7318, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 3.0, + "eval_f1_for_task402_grailqa_question_rewriting": 65.7858, + "eval_f1_for_task418_persent_title_generation": 15.3498, + "eval_f1_for_task442_com_qa_question_rewriting": 60.1844, + "eval_f1_for_task500_scruples_title_generation": 11.8023, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 30.9181, + "eval_f1_for_task520_aquamuse_answerability_classification": 51.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 6.9797, + "eval_f1_for_task602_wikitext_title_generation": 7.8052, + "eval_f1_for_task613_liar_keyword_tagging": 19.5, + "eval_f1_for_task614_glucose_cause_effect_classification": 27.1191, + "eval_f1_for_task619_ohsumed_title_generation": 29.2821, + "eval_f1_for_task620_ohsumed_keyword_tagging": 22.1857, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 28.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 87.39, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 33.5861, + "eval_f1_for_task670_ambigqa_question_rewriting": 75.612, + "eval_f1_for_task671_ambigqa_question_rewriting": 63.3133, + "eval_f1_for_task677_ollie_data_to_text": 12.8256, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 20.6976, + "eval_f1_for_task760_msr_sqa_data_to_text": 0.8962, + "eval_f1_for_task769_qed_title_generation": 71.8587, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 46.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 45.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 44.8111, + "eval_f1_for_task892_gap_coreference_resolution": 48.0, + "eval_f1_for_task893_gap_coreference_resolution": 24.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 43.0, + "eval_f1_for_task957_e2e_data_to_text": 45.8654, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 39.9167, + "eval_f1_for_title_generation": 22.7989, + "eval_f1_for_word_analogy": 11.4583, + "eval_gen_len": 10.9474, + "eval_global_step": 4500, + "eval_loss": 1.744218349456787, + "eval_rouge1": 40.6586, + "eval_rouge1_for_answerability_classification": 51.9744, + "eval_rouge1_for_cause_effect_classification": 53.2976, + "eval_rouge1_for_coreference_resolution": 40.9671, + "eval_rouge1_for_data_to_text": 35.5131, + "eval_rouge1_for_dialogue_act_recognition": 30.8333, + "eval_rouge1_for_grammar_error_correction": 60.8884, + "eval_rouge1_for_keyword_tagging": 51.6064, + "eval_rouge1_for_overlap_extraction": 32.0866, + "eval_rouge1_for_question_rewriting": 66.9574, + "eval_rouge1_for_task020_mctaco_answerability_classification": 51.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 41.3333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 89.2232, + "eval_rouge1_for_task035_winogrande_question_rewriting": 79.5805, + "eval_rouge1_for_task036_qasc_keyword_tagging": 63.2204, + "eval_rouge1_for_task039_qasc_overlap_extraction": 30.3333, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 39.7608, + "eval_rouge1_for_task1152_bard_word_analogy": 2.0, + "eval_rouge1_for_task1153_bard_word_analogy": 0.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 7.0, + "eval_rouge1_for_task1155_bard_word_analogy": 51.0, + "eval_rouge1_for_task1156_bard_word_analogy": 6.0, + "eval_rouge1_for_task1157_bard_word_analogy": 11.0, + "eval_rouge1_for_task1158_bard_word_analogy": 6.0, + "eval_rouge1_for_task1159_bard_word_analogy": 9.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 22.8702, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.1001, + "eval_rouge1_for_task121_zest_question_rewriting": 47.291, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 8.2741, + "eval_rouge1_for_task1344_rte_textual_entailment": 49.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 40.5078, + "eval_rouge1_for_task1356_xlsum_title_generation": 11.5214, + "eval_rouge1_for_task1358_xlsum_title_generation": 33.0839, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 53.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 54.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 48.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 19.9736, + "eval_rouge1_for_task1409_dart_data_to_text": 33.9014, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.349, + "eval_rouge1_for_task1439_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 49.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 23.5439, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.4279, + "eval_rouge1_for_task1562_zest_question_rewriting": 48.9872, + "eval_rouge1_for_task1586_scifact_title_generation": 23.6167, + "eval_rouge1_for_task1598_nyc_data_to_text": 29.2581, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.1564, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 60.5416, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 55.0, + "eval_rouge1_for_task1659_billsum_title_generation": 25.7482, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 59.6017, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 48.738, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 35.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 36.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 8.7, + "eval_rouge1_for_task220_rocstories_title_generation": 51.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 45.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 58.1833, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 33.8399, + "eval_rouge1_for_task288_gigaword_title_generation": 26.1761, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 24.6, + "eval_rouge1_for_task329_gap_coreference_resolution": 34.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 44.1, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 45.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 13.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.3792, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 4.0, + "eval_rouge1_for_task402_grailqa_question_rewriting": 67.459, + "eval_rouge1_for_task418_persent_title_generation": 17.3852, + "eval_rouge1_for_task442_com_qa_question_rewriting": 64.293, + "eval_rouge1_for_task500_scruples_title_generation": 13.7436, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 30.9316, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 51.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 7.3987, + "eval_rouge1_for_task602_wikitext_title_generation": 7.9178, + "eval_rouge1_for_task613_liar_keyword_tagging": 31.8333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 29.0374, + "eval_rouge1_for_task619_ohsumed_title_generation": 31.2244, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 24.8025, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 28.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 88.1758, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 33.7216, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 76.771, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 64.1627, + "eval_rouge1_for_task677_ollie_data_to_text": 13.8331, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 21.4296, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 1.1174, + "eval_rouge1_for_task769_qed_title_generation": 71.8313, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 46.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 45.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 45.0, + "eval_rouge1_for_task892_gap_coreference_resolution": 48.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 24.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 43.0, + "eval_rouge1_for_task957_e2e_data_to_text": 47.0413, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 41.7778, + "eval_rouge1_for_title_generation": 24.3907, + "eval_rouge1_for_word_analogy": 11.5833, + "eval_rougeL": 39.5718, + "eval_rougeL_for_answerability_classification": 51.9744, + "eval_rougeL_for_cause_effect_classification": 52.6843, + "eval_rougeL_for_coreference_resolution": 40.9671, + "eval_rougeL_for_data_to_text": 30.0862, + "eval_rougeL_for_dialogue_act_recognition": 30.8333, + "eval_rougeL_for_grammar_error_correction": 60.0095, + "eval_rougeL_for_keyword_tagging": 51.0796, + "eval_rougeL_for_overlap_extraction": 31.5059, + "eval_rougeL_for_question_rewriting": 63.7796, + "eval_rougeL_for_task020_mctaco_answerability_classification": 51.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 41.3333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 89.2232, + "eval_rougeL_for_task035_winogrande_question_rewriting": 79.2291, + "eval_rougeL_for_task036_qasc_keyword_tagging": 61.4805, + "eval_rougeL_for_task039_qasc_overlap_extraction": 30.3333, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 34.0465, + "eval_rougeL_for_task1152_bard_word_analogy": 2.0, + "eval_rougeL_for_task1153_bard_word_analogy": 0.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 7.0, + "eval_rougeL_for_task1155_bard_word_analogy": 51.0, + "eval_rougeL_for_task1156_bard_word_analogy": 6.0, + "eval_rougeL_for_task1157_bard_word_analogy": 11.0, + "eval_rougeL_for_task1158_bard_word_analogy": 6.0, + "eval_rougeL_for_task1159_bard_word_analogy": 9.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 18.4174, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.0115, + "eval_rougeL_for_task121_zest_question_rewriting": 42.2591, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 8.2201, + "eval_rougeL_for_task1344_rte_textual_entailment": 49.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 37.6509, + "eval_rougeL_for_task1356_xlsum_title_generation": 9.597, + "eval_rougeL_for_task1358_xlsum_title_generation": 28.0667, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 53.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 54.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 48.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 17.4266, + "eval_rougeL_for_task1409_dart_data_to_text": 29.9252, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 34.4936, + "eval_rougeL_for_task1439_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 49.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 21.7869, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.5254, + "eval_rougeL_for_task1562_zest_question_rewriting": 41.8887, + "eval_rougeL_for_task1586_scifact_title_generation": 18.5385, + "eval_rougeL_for_task1598_nyc_data_to_text": 24.4294, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 76.6592, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 50.2904, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 55.0, + "eval_rougeL_for_task1659_billsum_title_generation": 20.9627, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 59.6017, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 43.0684, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 35.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 36.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 8.7, + "eval_rougeL_for_task220_rocstories_title_generation": 51.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 45.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 58.1833, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 32.6784, + "eval_rougeL_for_task288_gigaword_title_generation": 21.9505, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 24.6, + "eval_rougeL_for_task329_gap_coreference_resolution": 34.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 44.1, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 45.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 13.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.8498, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 4.0, + "eval_rougeL_for_task402_grailqa_question_rewriting": 56.1056, + "eval_rougeL_for_task418_persent_title_generation": 14.9455, + "eval_rougeL_for_task442_com_qa_question_rewriting": 60.9302, + "eval_rougeL_for_task500_scruples_title_generation": 11.8032, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 30.5334, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 51.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 7.3987, + "eval_rougeL_for_task602_wikitext_title_generation": 7.7925, + "eval_rougeL_for_task613_liar_keyword_tagging": 31.8333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 26.2734, + "eval_rougeL_for_task619_ohsumed_title_generation": 27.085, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 23.9085, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 28.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 88.1758, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 33.7216, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 75.614, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 63.0042, + "eval_rougeL_for_task677_ollie_data_to_text": 11.818, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 17.9414, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 1.1174, + "eval_rougeL_for_task769_qed_title_generation": 71.8313, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 46.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 45.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 45.0, + "eval_rougeL_for_task892_gap_coreference_resolution": 48.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 24.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 43.0, + "eval_rougeL_for_task957_e2e_data_to_text": 37.2168, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 41.7778, + "eval_rougeL_for_title_generation": 22.1594, + "eval_rougeL_for_word_analogy": 11.5833, + "eval_runtime": 335.5599, + "eval_samples_per_second": 35.493, + "eval_steps_per_second": 1.112, + "step": 4500 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 1.7679, + "step": 5000 + }, + { + "epoch": 1.14, + "eval_exact_match": 25.1889, + "eval_exact_match_for_answerability_classification": 49.7692, + "eval_exact_match_for_cause_effect_classification": 35.1429, + "eval_exact_match_for_coreference_resolution": 32.2857, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 34.2857, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 32.6, + "eval_exact_match_for_overlap_extraction": 11.5, + "eval_exact_match_for_question_rewriting": 1.0, + "eval_exact_match_for_task020_mctaco_answerability_classification": 55.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 37.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 17.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 23.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 2.0, + "eval_exact_match_for_task1153_bard_word_analogy": 0.0, + "eval_exact_match_for_task1154_bard_word_analogy": 7.0, + "eval_exact_match_for_task1155_bard_word_analogy": 51.0, + "eval_exact_match_for_task1156_bard_word_analogy": 6.0, + "eval_exact_match_for_task1157_bard_word_analogy": 12.0, + "eval_exact_match_for_task1158_bard_word_analogy": 7.0, + "eval_exact_match_for_task1159_bard_word_analogy": 9.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 29.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 35.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 32.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 55.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 49.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 52.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 48.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 24.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 45.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 59.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 19.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 35.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 36.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 2.0, + "eval_exact_match_for_task220_rocstories_title_generation": 62.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 47.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 48.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 41.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 1.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 32.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 48.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 2.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 48.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 16.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 3.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 56.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 30.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 52.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 71.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 29.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 56.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 47.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 45.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 34.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 50.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 20.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 43.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 61.0, + "eval_exact_match_for_textual_entailment": 40.9167, + "eval_exact_match_for_title_generation": 7.1188, + "eval_exact_match_for_word_analogy": 11.75, + "eval_f1": 39.892, + "eval_f1_for_answerability_classification": 52.3333, + "eval_f1_for_cause_effect_classification": 53.3957, + "eval_f1_for_coreference_resolution": 40.452, + "eval_f1_for_data_to_text": 29.8171, + "eval_f1_for_dialogue_act_recognition": 37.6429, + "eval_f1_for_grammar_error_correction": 47.8757, + "eval_f1_for_keyword_tagging": 48.0856, + "eval_f1_for_overlap_extraction": 30.2635, + "eval_f1_for_question_rewriting": 66.2841, + "eval_f1_for_task020_mctaco_answerability_classification": 55.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 38.3333, + "eval_f1_for_task034_winogrande_question_rewriting": 90.7098, + "eval_f1_for_task035_winogrande_question_rewriting": 79.4121, + "eval_f1_for_task036_qasc_keyword_tagging": 60.4644, + "eval_f1_for_task039_qasc_overlap_extraction": 27.5, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 26.7979, + "eval_f1_for_task1152_bard_word_analogy": 2.0, + "eval_f1_for_task1153_bard_word_analogy": 0.0, + "eval_f1_for_task1154_bard_word_analogy": 7.0, + "eval_f1_for_task1155_bard_word_analogy": 51.0, + "eval_f1_for_task1156_bard_word_analogy": 6.0, + "eval_f1_for_task1157_bard_word_analogy": 12.0, + "eval_f1_for_task1158_bard_word_analogy": 7.0, + "eval_f1_for_task1159_bard_word_analogy": 9.0, + "eval_f1_for_task1161_coda_19_title_generation": 20.5748, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.8544, + "eval_f1_for_task121_zest_question_rewriting": 46.3187, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 7.949, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 37.3658, + "eval_f1_for_task1356_xlsum_title_generation": 8.3336, + "eval_f1_for_task1358_xlsum_title_generation": 29.5892, + "eval_f1_for_task1385_anli_textual_entailment": 29.0, + "eval_f1_for_task1386_anli_textual_entailment": 35.0, + "eval_f1_for_task1387_anli_textual_entailment": 32.0, + "eval_f1_for_task1388_cb_textual_entailment": 32.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 55.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 49.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 19.9733, + "eval_f1_for_task1409_dart_data_to_text": 31.0054, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 12.1699, + "eval_f1_for_task1439_doqa_answerability_classification": 52.0, + "eval_f1_for_task1442_doqa_answerability_classification": 48.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 24.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 20.3601, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.5814, + "eval_f1_for_task1562_zest_question_rewriting": 48.251, + "eval_f1_for_task1586_scifact_title_generation": 23.0275, + "eval_f1_for_task1598_nyc_data_to_text": 23.9952, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 77.1437, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 45.0, + "eval_f1_for_task1631_open_pi_data_to_text": 59.8023, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 59.0, + "eval_f1_for_task1659_billsum_title_generation": 24.0876, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 61.4853, + "eval_f1_for_task1728_web_nlg_data_to_text": 26.9442, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 35.0, + "eval_f1_for_task201_multinli_textual_entailment": 36.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 5.5778, + "eval_f1_for_task220_rocstories_title_generation": 62.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 49.0, + "eval_f1_for_task233_iirc_answerability_classification": 47.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 48.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 55.1167, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 33.027, + "eval_f1_for_task288_gigaword_title_generation": 23.4525, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 36.9, + "eval_f1_for_task329_gap_coreference_resolution": 32.0, + "eval_f1_for_task330_gap_coreference_resolution": 40.1, + "eval_f1_for_task349_squad2.0_answerability_classification": 48.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 73.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 33.8284, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 2.0, + "eval_f1_for_task402_grailqa_question_rewriting": 66.5907, + "eval_f1_for_task418_persent_title_generation": 15.8231, + "eval_f1_for_task442_com_qa_question_rewriting": 62.8318, + "eval_f1_for_task500_scruples_title_generation": 11.6172, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 29.8598, + "eval_f1_for_task520_aquamuse_answerability_classification": 48.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 8.0072, + "eval_f1_for_task602_wikitext_title_generation": 7.9951, + "eval_f1_for_task613_liar_keyword_tagging": 17.5317, + "eval_f1_for_task614_glucose_cause_effect_classification": 27.2747, + "eval_f1_for_task619_ohsumed_title_generation": 28.4536, + "eval_f1_for_task620_ohsumed_keyword_tagging": 20.4828, + "eval_f1_for_task623_ohsumed_keyword_tagging": 56.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 30.0, + "eval_f1_for_task642_e_snli_textual_entailment": 52.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 85.9488, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 37.1813, + "eval_f1_for_task670_ambigqa_question_rewriting": 76.467, + "eval_f1_for_task671_ambigqa_question_rewriting": 64.1807, + "eval_f1_for_task677_ollie_data_to_text": 12.8882, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 20.7002, + "eval_f1_for_task760_msr_sqa_data_to_text": 1.5201, + "eval_f1_for_task769_qed_title_generation": 72.9502, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 47.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 45.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 39.2111, + "eval_f1_for_task892_gap_coreference_resolution": 50.0, + "eval_f1_for_task893_gap_coreference_resolution": 20.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 43.0, + "eval_f1_for_task957_e2e_data_to_text": 44.4877, + "eval_f1_for_task970_sherliic_textual_entailment": 61.0, + "eval_f1_for_textual_entailment": 40.9167, + "eval_f1_for_title_generation": 23.491, + "eval_f1_for_word_analogy": 11.75, + "eval_gen_len": 11.2437, + "eval_global_step": 5000, + "eval_loss": 1.7398592233657837, + "eval_rouge1": 41.5986, + "eval_rouge1_for_answerability_classification": 52.3333, + "eval_rouge1_for_cause_effect_classification": 53.8103, + "eval_rouge1_for_coreference_resolution": 40.8133, + "eval_rouge1_for_data_to_text": 35.059, + "eval_rouge1_for_dialogue_act_recognition": 39.5476, + "eval_rouge1_for_grammar_error_correction": 58.5439, + "eval_rouge1_for_keyword_tagging": 52.1567, + "eval_rouge1_for_overlap_extraction": 32.8371, + "eval_rouge1_for_question_rewriting": 67.9526, + "eval_rouge1_for_task020_mctaco_answerability_classification": 55.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 40.3333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 90.7434, + "eval_rouge1_for_task035_winogrande_question_rewriting": 80.2442, + "eval_rouge1_for_task036_qasc_keyword_tagging": 62.912, + "eval_rouge1_for_task039_qasc_overlap_extraction": 31.1667, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 39.3357, + "eval_rouge1_for_task1152_bard_word_analogy": 2.0, + "eval_rouge1_for_task1153_bard_word_analogy": 0.0, + "eval_rouge1_for_task1154_bard_word_analogy": 7.0, + "eval_rouge1_for_task1155_bard_word_analogy": 51.0, + "eval_rouge1_for_task1156_bard_word_analogy": 6.0, + "eval_rouge1_for_task1157_bard_word_analogy": 12.0, + "eval_rouge1_for_task1158_bard_word_analogy": 7.0, + "eval_rouge1_for_task1159_bard_word_analogy": 9.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 23.3566, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.0486, + "eval_rouge1_for_task121_zest_question_rewriting": 48.3876, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 8.0608, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 40.8946, + "eval_rouge1_for_task1356_xlsum_title_generation": 9.8563, + "eval_rouge1_for_task1358_xlsum_title_generation": 34.3997, + "eval_rouge1_for_task1385_anli_textual_entailment": 29.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 35.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 32.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 55.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 49.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 19.9998, + "eval_rouge1_for_task1409_dart_data_to_text": 32.404, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 30.2061, + "eval_rouge1_for_task1439_doqa_answerability_classification": 52.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 48.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 24.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 21.8925, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.8817, + "eval_rouge1_for_task1562_zest_question_rewriting": 51.4476, + "eval_rouge1_for_task1586_scifact_title_generation": 25.4033, + "eval_rouge1_for_task1598_nyc_data_to_text": 27.4394, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 77.6383, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 45.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 60.4809, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 59.0, + "eval_rouge1_for_task1659_billsum_title_generation": 25.8227, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 61.4853, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 49.6292, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 35.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 36.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 8.7667, + "eval_rouge1_for_task220_rocstories_title_generation": 62.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 47.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 48.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 56.45, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 34.5076, + "eval_rouge1_for_task288_gigaword_title_generation": 26.2994, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 37.3, + "eval_rouge1_for_task329_gap_coreference_resolution": 32.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 40.1, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 48.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 73.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 34.5412, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 3.0, + "eval_rouge1_for_task402_grailqa_question_rewriting": 68.3596, + "eval_rouge1_for_task418_persent_title_generation": 18.1065, + "eval_rouge1_for_task442_com_qa_question_rewriting": 66.8393, + "eval_rouge1_for_task500_scruples_title_generation": 13.6891, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 30.0077, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 48.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 8.7233, + "eval_rouge1_for_task602_wikitext_title_generation": 8.1515, + "eval_rouge1_for_task613_liar_keyword_tagging": 31.0222, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 29.4641, + "eval_rouge1_for_task619_ohsumed_title_generation": 30.4214, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 24.1148, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 56.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 30.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 52.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 86.7345, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 37.3169, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 77.7646, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 65.1108, + "eval_rouge1_for_task677_ollie_data_to_text": 14.2044, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 21.319, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 2.3656, + "eval_rouge1_for_task769_qed_title_generation": 72.9228, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 47.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 45.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 39.4, + "eval_rouge1_for_task892_gap_coreference_resolution": 50.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 20.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 43.0, + "eval_rouge1_for_task957_e2e_data_to_text": 45.4788, + "eval_rouge1_for_task970_sherliic_textual_entailment": 61.0, + "eval_rouge1_for_textual_entailment": 42.7778, + "eval_rouge1_for_title_generation": 25.1062, + "eval_rouge1_for_word_analogy": 11.75, + "eval_rougeL": 40.4768, + "eval_rougeL_for_answerability_classification": 52.3333, + "eval_rougeL_for_cause_effect_classification": 53.0517, + "eval_rougeL_for_coreference_resolution": 40.8133, + "eval_rougeL_for_data_to_text": 29.5659, + "eval_rougeL_for_dialogue_act_recognition": 39.5476, + "eval_rougeL_for_grammar_error_correction": 57.4605, + "eval_rougeL_for_keyword_tagging": 51.5346, + "eval_rougeL_for_overlap_extraction": 32.301, + "eval_rougeL_for_question_rewriting": 64.5603, + "eval_rougeL_for_task020_mctaco_answerability_classification": 55.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 40.3333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 90.7434, + "eval_rougeL_for_task035_winogrande_question_rewriting": 79.6848, + "eval_rougeL_for_task036_qasc_keyword_tagging": 60.6873, + "eval_rougeL_for_task039_qasc_overlap_extraction": 31.1667, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 33.2242, + "eval_rougeL_for_task1152_bard_word_analogy": 2.0, + "eval_rougeL_for_task1153_bard_word_analogy": 0.0, + "eval_rougeL_for_task1154_bard_word_analogy": 7.0, + "eval_rougeL_for_task1155_bard_word_analogy": 51.0, + "eval_rougeL_for_task1156_bard_word_analogy": 6.0, + "eval_rougeL_for_task1157_bard_word_analogy": 12.0, + "eval_rougeL_for_task1158_bard_word_analogy": 7.0, + "eval_rougeL_for_task1159_bard_word_analogy": 9.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 18.3914, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.0798, + "eval_rougeL_for_task121_zest_question_rewriting": 43.4848, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 8.0068, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 37.7422, + "eval_rougeL_for_task1356_xlsum_title_generation": 8.3308, + "eval_rougeL_for_task1358_xlsum_title_generation": 28.6859, + "eval_rougeL_for_task1385_anli_textual_entailment": 29.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 35.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 32.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 55.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 49.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 17.6609, + "eval_rougeL_for_task1409_dart_data_to_text": 29.0092, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 28.9419, + "eval_rougeL_for_task1439_doqa_answerability_classification": 52.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 48.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 24.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 20.2358, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.9792, + "eval_rougeL_for_task1562_zest_question_rewriting": 42.6136, + "eval_rougeL_for_task1586_scifact_title_generation": 19.9935, + "eval_rougeL_for_task1598_nyc_data_to_text": 22.5773, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 76.3194, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 45.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 49.3911, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 59.0, + "eval_rougeL_for_task1659_billsum_title_generation": 21.0014, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 61.4853, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 43.5415, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 35.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 36.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 8.7667, + "eval_rougeL_for_task220_rocstories_title_generation": 62.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 47.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 48.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 56.45, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 33.4353, + "eval_rougeL_for_task288_gigaword_title_generation": 22.2975, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 37.3, + "eval_rougeL_for_task329_gap_coreference_resolution": 32.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 40.1, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 48.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 73.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 32.3164, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 3.0, + "eval_rougeL_for_task402_grailqa_question_rewriting": 56.7282, + "eval_rougeL_for_task418_persent_title_generation": 16.1285, + "eval_rougeL_for_task442_com_qa_question_rewriting": 63.2786, + "eval_rougeL_for_task500_scruples_title_generation": 12.1685, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 29.6483, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 48.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 8.5078, + "eval_rougeL_for_task602_wikitext_title_generation": 8.0694, + "eval_rougeL_for_task613_liar_keyword_tagging": 31.0222, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 26.3791, + "eval_rougeL_for_task619_ohsumed_title_generation": 26.9399, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 23.2291, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 56.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 30.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 52.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 86.7345, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 37.3169, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 76.5987, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 63.89, + "eval_rougeL_for_task677_ollie_data_to_text": 12.3724, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 17.832, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 2.2222, + "eval_rougeL_for_task769_qed_title_generation": 72.9228, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 47.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 45.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 39.4, + "eval_rougeL_for_task892_gap_coreference_resolution": 50.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 20.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 43.0, + "eval_rougeL_for_task957_e2e_data_to_text": 35.8601, + "eval_rougeL_for_task970_sherliic_textual_entailment": 61.0, + "eval_rougeL_for_textual_entailment": 42.7778, + "eval_rougeL_for_title_generation": 22.9056, + "eval_rougeL_for_word_analogy": 11.75, + "eval_runtime": 408.0922, + "eval_samples_per_second": 29.185, + "eval_steps_per_second": 0.914, + "step": 5000 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 1.7598, + "step": 5500 + }, + { + "epoch": 1.26, + "eval_exact_match": 25.424, + "eval_exact_match_for_answerability_classification": 49.7692, + "eval_exact_match_for_cause_effect_classification": 35.4286, + "eval_exact_match_for_coreference_resolution": 32.9286, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 34.4286, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 35.0, + "eval_exact_match_for_overlap_extraction": 11.0, + "eval_exact_match_for_question_rewriting": 1.0, + "eval_exact_match_for_task020_mctaco_answerability_classification": 52.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 36.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 32.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 22.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 3.0, + "eval_exact_match_for_task1153_bard_word_analogy": 1.0, + "eval_exact_match_for_task1154_bard_word_analogy": 8.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 8.0, + "eval_exact_match_for_task1157_bard_word_analogy": 9.0, + "eval_exact_match_for_task1158_bard_word_analogy": 7.0, + "eval_exact_match_for_task1159_bard_word_analogy": 9.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 35.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 54.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 52.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 49.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 54.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 24.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 37.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 18.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 35.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 37.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 3.0, + "eval_exact_match_for_task220_rocstories_title_generation": 60.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 48.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 46.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 47.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 36.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 51.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 32.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 48.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 4.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 21.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 4.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 47.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 28.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 57.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 71.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 31.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 54.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 49.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 38.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 24.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 62.0, + "eval_exact_match_for_textual_entailment": 41.2917, + "eval_exact_match_for_title_generation": 6.8386, + "eval_exact_match_for_word_analogy": 11.875, + "eval_f1": 40.0049, + "eval_f1_for_answerability_classification": 52.2821, + "eval_f1_for_cause_effect_classification": 53.4129, + "eval_f1_for_coreference_resolution": 40.9718, + "eval_f1_for_data_to_text": 29.9347, + "eval_f1_for_dialogue_act_recognition": 37.8571, + "eval_f1_for_grammar_error_correction": 49.3564, + "eval_f1_for_keyword_tagging": 49.1588, + "eval_f1_for_overlap_extraction": 29.1293, + "eval_f1_for_question_rewriting": 65.4683, + "eval_f1_for_task020_mctaco_answerability_classification": 52.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 37.3333, + "eval_f1_for_task034_winogrande_question_rewriting": 89.0859, + "eval_f1_for_task035_winogrande_question_rewriting": 80.5446, + "eval_f1_for_task036_qasc_keyword_tagging": 70.5438, + "eval_f1_for_task039_qasc_overlap_extraction": 25.3333, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 27.4456, + "eval_f1_for_task1152_bard_word_analogy": 3.0, + "eval_f1_for_task1153_bard_word_analogy": 1.0, + "eval_f1_for_task1154_bard_word_analogy": 8.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 8.0, + "eval_f1_for_task1157_bard_word_analogy": 9.0, + "eval_f1_for_task1158_bard_word_analogy": 7.0, + "eval_f1_for_task1159_bard_word_analogy": 9.0, + "eval_f1_for_task1161_coda_19_title_generation": 21.8927, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.976, + "eval_f1_for_task121_zest_question_rewriting": 43.3027, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 7.058, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 32.9572, + "eval_f1_for_task1356_xlsum_title_generation": 8.9284, + "eval_f1_for_task1358_xlsum_title_generation": 29.1185, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 35.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 54.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 52.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 49.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 20.7966, + "eval_f1_for_task1409_dart_data_to_text": 31.9962, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 15.5601, + "eval_f1_for_task1439_doqa_answerability_classification": 54.0, + "eval_f1_for_task1442_doqa_answerability_classification": 49.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 24.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1540_peer_read_title_generation": 20.174, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.1527, + "eval_f1_for_task1562_zest_question_rewriting": 46.0499, + "eval_f1_for_task1586_scifact_title_generation": 23.5012, + "eval_f1_for_task1598_nyc_data_to_text": 23.4556, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 37.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 77.587, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_f1_for_task1631_open_pi_data_to_text": 58.9754, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_f1_for_task1659_billsum_title_generation": 26.2427, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 61.8187, + "eval_f1_for_task1728_web_nlg_data_to_text": 26.158, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 35.0, + "eval_f1_for_task201_multinli_textual_entailment": 37.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 7.0944, + "eval_f1_for_task220_rocstories_title_generation": 60.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 48.0, + "eval_f1_for_task233_iirc_answerability_classification": 46.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 47.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 52.55, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 32.9252, + "eval_f1_for_task288_gigaword_title_generation": 23.3945, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 30.7, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 39.7667, + "eval_f1_for_task349_squad2.0_answerability_classification": 48.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 73.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.4909, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 5.1667, + "eval_f1_for_task402_grailqa_question_rewriting": 64.5801, + "eval_f1_for_task418_persent_title_generation": 14.848, + "eval_f1_for_task442_com_qa_question_rewriting": 62.2981, + "eval_f1_for_task500_scruples_title_generation": 12.7664, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 31.6526, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 7.7195, + "eval_f1_for_task602_wikitext_title_generation": 7.4754, + "eval_f1_for_task613_liar_keyword_tagging": 22.1667, + "eval_f1_for_task614_glucose_cause_effect_classification": 27.7325, + "eval_f1_for_task619_ohsumed_title_generation": 27.2363, + "eval_f1_for_task620_ohsumed_keyword_tagging": 18.4323, + "eval_f1_for_task623_ohsumed_keyword_tagging": 47.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 28.0, + "eval_f1_for_task642_e_snli_textual_entailment": 57.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 87.6512, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 38.4924, + "eval_f1_for_task670_ambigqa_question_rewriting": 79.2358, + "eval_f1_for_task671_ambigqa_question_rewriting": 64.5345, + "eval_f1_for_task677_ollie_data_to_text": 12.9267, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 18.7648, + "eval_f1_for_task760_msr_sqa_data_to_text": 1.7396, + "eval_f1_for_task769_qed_title_generation": 69.5773, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 49.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 47.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 43.7778, + "eval_f1_for_task892_gap_coreference_resolution": 51.0, + "eval_f1_for_task893_gap_coreference_resolution": 24.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_f1_for_task957_e2e_data_to_text": 45.0543, + "eval_f1_for_task970_sherliic_textual_entailment": 62.0, + "eval_f1_for_textual_entailment": 41.2917, + "eval_f1_for_title_generation": 23.3323, + "eval_f1_for_word_analogy": 11.875, + "eval_gen_len": 10.0365, + "eval_global_step": 5500, + "eval_loss": 1.7715083360671997, + "eval_rouge1": 41.603, + "eval_rouge1_for_answerability_classification": 52.2821, + "eval_rouge1_for_cause_effect_classification": 53.7942, + "eval_rouge1_for_coreference_resolution": 41.3329, + "eval_rouge1_for_data_to_text": 34.6806, + "eval_rouge1_for_dialogue_act_recognition": 39.7619, + "eval_rouge1_for_grammar_error_correction": 58.7574, + "eval_rouge1_for_keyword_tagging": 53.3761, + "eval_rouge1_for_overlap_extraction": 31.2379, + "eval_rouge1_for_question_rewriting": 66.9719, + "eval_rouge1_for_task020_mctaco_answerability_classification": 52.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 39.3333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 89.1288, + "eval_rouge1_for_task035_winogrande_question_rewriting": 81.2244, + "eval_rouge1_for_task036_qasc_keyword_tagging": 73.1057, + "eval_rouge1_for_task039_qasc_overlap_extraction": 28.3333, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 34.5286, + "eval_rouge1_for_task1152_bard_word_analogy": 3.0, + "eval_rouge1_for_task1153_bard_word_analogy": 1.0, + "eval_rouge1_for_task1154_bard_word_analogy": 8.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 8.0, + "eval_rouge1_for_task1157_bard_word_analogy": 9.0, + "eval_rouge1_for_task1158_bard_word_analogy": 7.0, + "eval_rouge1_for_task1159_bard_word_analogy": 9.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 24.4103, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.1539, + "eval_rouge1_for_task121_zest_question_rewriting": 45.5858, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 7.1599, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 35.4076, + "eval_rouge1_for_task1356_xlsum_title_generation": 10.3082, + "eval_rouge1_for_task1358_xlsum_title_generation": 33.1802, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 35.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 54.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 52.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 49.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 20.9457, + "eval_rouge1_for_task1409_dart_data_to_text": 33.1115, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 30.9463, + "eval_rouge1_for_task1439_doqa_answerability_classification": 54.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 24.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 21.7641, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.5684, + "eval_rouge1_for_task1562_zest_question_rewriting": 49.0926, + "eval_rouge1_for_task1586_scifact_title_generation": 26.7508, + "eval_rouge1_for_task1598_nyc_data_to_text": 28.3062, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 79.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.4228, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 59.4974, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_rouge1_for_task1659_billsum_title_generation": 27.9106, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 61.8187, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 50.453, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 35.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 37.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 9.4167, + "eval_rouge1_for_task220_rocstories_title_generation": 60.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 48.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 46.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 47.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 53.8833, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 34.1425, + "eval_rouge1_for_task288_gigaword_title_generation": 26.1703, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 31.1, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 39.6, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 48.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 73.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 32.2748, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 6.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 66.1491, + "eval_rouge1_for_task418_persent_title_generation": 16.8481, + "eval_rouge1_for_task442_com_qa_question_rewriting": 66.1317, + "eval_rouge1_for_task500_scruples_title_generation": 15.0282, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 31.7902, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 8.791, + "eval_rouge1_for_task602_wikitext_title_generation": 7.8267, + "eval_rouge1_for_task613_liar_keyword_tagging": 37.0, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 29.6179, + "eval_rouge1_for_task619_ohsumed_title_generation": 28.7467, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 21.6239, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 47.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 28.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 57.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 88.1512, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 38.7924, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.1313, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 65.2631, + "eval_rouge1_for_task677_ollie_data_to_text": 13.8577, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 19.275, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 2.0526, + "eval_rouge1_for_task769_qed_title_generation": 69.5499, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 49.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 47.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 43.9667, + "eval_rouge1_for_task892_gap_coreference_resolution": 51.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 24.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rouge1_for_task957_e2e_data_to_text": 45.2282, + "eval_rouge1_for_task970_sherliic_textual_entailment": 62.0, + "eval_rouge1_for_textual_entailment": 43.0417, + "eval_rouge1_for_title_generation": 24.8696, + "eval_rouge1_for_word_analogy": 11.875, + "eval_rougeL": 40.5687, + "eval_rougeL_for_answerability_classification": 52.2821, + "eval_rougeL_for_cause_effect_classification": 53.151, + "eval_rougeL_for_coreference_resolution": 41.3329, + "eval_rougeL_for_data_to_text": 29.5365, + "eval_rougeL_for_dialogue_act_recognition": 39.7619, + "eval_rougeL_for_grammar_error_correction": 57.8751, + "eval_rougeL_for_keyword_tagging": 53.1034, + "eval_rougeL_for_overlap_extraction": 30.6046, + "eval_rougeL_for_question_rewriting": 63.6915, + "eval_rougeL_for_task020_mctaco_answerability_classification": 52.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 39.3333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 89.1288, + "eval_rougeL_for_task035_winogrande_question_rewriting": 80.7885, + "eval_rougeL_for_task036_qasc_keyword_tagging": 71.842, + "eval_rougeL_for_task039_qasc_overlap_extraction": 28.3333, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 29.8134, + "eval_rougeL_for_task1152_bard_word_analogy": 3.0, + "eval_rougeL_for_task1153_bard_word_analogy": 1.0, + "eval_rougeL_for_task1154_bard_word_analogy": 8.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 8.0, + "eval_rougeL_for_task1157_bard_word_analogy": 9.0, + "eval_rougeL_for_task1158_bard_word_analogy": 7.0, + "eval_rougeL_for_task1159_bard_word_analogy": 9.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 19.9699, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.1279, + "eval_rougeL_for_task121_zest_question_rewriting": 39.4857, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 7.1058, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 33.0799, + "eval_rougeL_for_task1356_xlsum_title_generation": 8.56, + "eval_rougeL_for_task1358_xlsum_title_generation": 28.0566, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 35.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 54.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 52.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 49.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 18.5168, + "eval_rougeL_for_task1409_dart_data_to_text": 29.5054, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 30.0979, + "eval_rougeL_for_task1439_doqa_answerability_classification": 54.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 24.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 20.882, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.6522, + "eval_rougeL_for_task1562_zest_question_rewriting": 41.8232, + "eval_rougeL_for_task1586_scifact_title_generation": 21.5926, + "eval_rougeL_for_task1598_nyc_data_to_text": 23.9537, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 79.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 76.881, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 48.1403, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_rougeL_for_task1659_billsum_title_generation": 23.4373, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 61.8187, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 44.6801, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 35.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 37.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 9.4167, + "eval_rougeL_for_task220_rocstories_title_generation": 60.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 48.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 46.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 47.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 53.8833, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 32.8758, + "eval_rougeL_for_task288_gigaword_title_generation": 22.0817, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 31.1, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 39.6, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 48.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 73.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.7006, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 6.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 54.056, + "eval_rougeL_for_task418_persent_title_generation": 14.6754, + "eval_rougeL_for_task442_com_qa_question_rewriting": 62.6065, + "eval_rougeL_for_task500_scruples_title_generation": 13.6645, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 31.4123, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 8.791, + "eval_rougeL_for_task602_wikitext_title_generation": 7.8267, + "eval_rougeL_for_task613_liar_keyword_tagging": 37.0, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 26.6898, + "eval_rougeL_for_task619_ohsumed_title_generation": 25.8876, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 21.5239, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 47.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 28.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 57.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 88.1512, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 38.7924, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 79.4421, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 64.1863, + "eval_rougeL_for_task677_ollie_data_to_text": 11.9996, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 16.3005, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 1.9728, + "eval_rougeL_for_task769_qed_title_generation": 69.5499, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 49.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 47.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 43.9667, + "eval_rougeL_for_task892_gap_coreference_resolution": 51.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 24.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rougeL_for_task957_e2e_data_to_text": 36.8495, + "eval_rougeL_for_task970_sherliic_textual_entailment": 62.0, + "eval_rougeL_for_textual_entailment": 43.0417, + "eval_rougeL_for_title_generation": 22.8676, + "eval_rougeL_for_word_analogy": 11.875, + "eval_runtime": 334.5742, + "eval_samples_per_second": 35.597, + "eval_steps_per_second": 1.115, + "step": 5500 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 1.6966, + "step": 6000 + }, + { + "epoch": 1.37, + "eval_exact_match": 25.508, + "eval_exact_match_for_answerability_classification": 50.3846, + "eval_exact_match_for_cause_effect_classification": 35.5714, + "eval_exact_match_for_coreference_resolution": 32.5714, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 35.0, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 37.4, + "eval_exact_match_for_overlap_extraction": 10.5, + "eval_exact_match_for_question_rewriting": 0.6364, + "eval_exact_match_for_task020_mctaco_answerability_classification": 51.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 34.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 32.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 21.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 49.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 3.0, + "eval_exact_match_for_task1153_bard_word_analogy": 0.0, + "eval_exact_match_for_task1154_bard_word_analogy": 8.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 5.0, + "eval_exact_match_for_task1157_bard_word_analogy": 9.0, + "eval_exact_match_for_task1158_bard_word_analogy": 4.0, + "eval_exact_match_for_task1159_bard_word_analogy": 8.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 29.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 37.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 29.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 49.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 55.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 54.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 55.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 18.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 38.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 38.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 0.0, + "eval_exact_match_for_task220_rocstories_title_generation": 58.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 46.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 51.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 46.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 1.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 36.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 26.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 46.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 3.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 49.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 20.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 6.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 59.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 29.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 52.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 70.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 28.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 57.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 32.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 28.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 64.0, + "eval_exact_match_for_textual_entailment": 41.5, + "eval_exact_match_for_title_generation": 6.7265, + "eval_exact_match_for_word_analogy": 10.875, + "eval_f1": 40.2714, + "eval_f1_for_answerability_classification": 52.9487, + "eval_f1_for_cause_effect_classification": 53.5303, + "eval_f1_for_coreference_resolution": 40.5506, + "eval_f1_for_data_to_text": 30.4082, + "eval_f1_for_dialogue_act_recognition": 38.5, + "eval_f1_for_grammar_error_correction": 47.6951, + "eval_f1_for_keyword_tagging": 51.7175, + "eval_f1_for_overlap_extraction": 28.5229, + "eval_f1_for_question_rewriting": 66.8762, + "eval_f1_for_task020_mctaco_answerability_classification": 51.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 36.0, + "eval_f1_for_task034_winogrande_question_rewriting": 90.8044, + "eval_f1_for_task035_winogrande_question_rewriting": 80.7104, + "eval_f1_for_task036_qasc_keyword_tagging": 68.1974, + "eval_f1_for_task039_qasc_overlap_extraction": 24.3333, + "eval_f1_for_task050_multirc_answerability_classification": 49.0, + "eval_f1_for_task102_commongen_data_to_text": 27.2654, + "eval_f1_for_task1152_bard_word_analogy": 3.0, + "eval_f1_for_task1153_bard_word_analogy": 0.6667, + "eval_f1_for_task1154_bard_word_analogy": 8.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 5.0, + "eval_f1_for_task1157_bard_word_analogy": 9.0, + "eval_f1_for_task1158_bard_word_analogy": 4.0, + "eval_f1_for_task1159_bard_word_analogy": 8.0, + "eval_f1_for_task1161_coda_19_title_generation": 22.3233, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.3788, + "eval_f1_for_task121_zest_question_rewriting": 45.8589, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 9.9372, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 36.0229, + "eval_f1_for_task1356_xlsum_title_generation": 10.0934, + "eval_f1_for_task1358_xlsum_title_generation": 29.1326, + "eval_f1_for_task1385_anli_textual_entailment": 29.0, + "eval_f1_for_task1386_anli_textual_entailment": 37.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 29.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 49.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 20.2124, + "eval_f1_for_task1409_dart_data_to_text": 33.1762, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 12.1733, + "eval_f1_for_task1439_doqa_answerability_classification": 55.0, + "eval_f1_for_task1442_doqa_answerability_classification": 54.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1540_peer_read_title_generation": 20.4507, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.2169, + "eval_f1_for_task1562_zest_question_rewriting": 46.8741, + "eval_f1_for_task1586_scifact_title_generation": 21.0304, + "eval_f1_for_task1598_nyc_data_to_text": 24.6931, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 77.6902, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_f1_for_task1631_open_pi_data_to_text": 60.1798, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 55.0, + "eval_f1_for_task1659_billsum_title_generation": 26.6572, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 61.3111, + "eval_f1_for_task1728_web_nlg_data_to_text": 26.1113, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 38.0, + "eval_f1_for_task201_multinli_textual_entailment": 38.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 3.1444, + "eval_f1_for_task220_rocstories_title_generation": 58.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 46.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 51.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 58.9833, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 32.7124, + "eval_f1_for_task288_gigaword_title_generation": 23.6656, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 33.0333, + "eval_f1_for_task329_gap_coreference_resolution": 36.0, + "eval_f1_for_task330_gap_coreference_resolution": 33.7667, + "eval_f1_for_task349_squad2.0_answerability_classification": 46.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 32.0228, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 4.1667, + "eval_f1_for_task402_grailqa_question_rewriting": 66.736, + "eval_f1_for_task418_persent_title_generation": 15.5729, + "eval_f1_for_task442_com_qa_question_rewriting": 62.2457, + "eval_f1_for_task500_scruples_title_generation": 10.5718, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 31.788, + "eval_f1_for_task520_aquamuse_answerability_classification": 49.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 6.9136, + "eval_f1_for_task602_wikitext_title_generation": 7.2132, + "eval_f1_for_task613_liar_keyword_tagging": 22.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 27.0225, + "eval_f1_for_task619_ohsumed_title_generation": 26.0382, + "eval_f1_for_task620_ohsumed_keyword_tagging": 23.489, + "eval_f1_for_task623_ohsumed_keyword_tagging": 59.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 29.0, + "eval_f1_for_task642_e_snli_textual_entailment": 52.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 85.9012, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 34.8035, + "eval_f1_for_task670_ambigqa_question_rewriting": 81.1559, + "eval_f1_for_task671_ambigqa_question_rewriting": 68.1613, + "eval_f1_for_task677_ollie_data_to_text": 15.7504, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 19.6757, + "eval_f1_for_task760_msr_sqa_data_to_text": 1.5233, + "eval_f1_for_task769_qed_title_generation": 73.5139, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 49.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 37.6444, + "eval_f1_for_task892_gap_coreference_resolution": 51.0, + "eval_f1_for_task893_gap_coreference_resolution": 28.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 43.3875, + "eval_f1_for_task970_sherliic_textual_entailment": 64.0, + "eval_f1_for_textual_entailment": 41.5, + "eval_f1_for_title_generation": 23.2381, + "eval_f1_for_word_analogy": 10.9583, + "eval_gen_len": 10.761, + "eval_global_step": 6000, + "eval_loss": 1.788931965827942, + "eval_rouge1": 41.976, + "eval_rouge1_for_answerability_classification": 52.9487, + "eval_rouge1_for_cause_effect_classification": 53.8913, + "eval_rouge1_for_coreference_resolution": 40.9749, + "eval_rouge1_for_data_to_text": 35.2965, + "eval_rouge1_for_dialogue_act_recognition": 40.4048, + "eval_rouge1_for_grammar_error_correction": 58.274, + "eval_rouge1_for_keyword_tagging": 55.9324, + "eval_rouge1_for_overlap_extraction": 31.6091, + "eval_rouge1_for_question_rewriting": 68.4434, + "eval_rouge1_for_task020_mctaco_answerability_classification": 51.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 38.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 90.8277, + "eval_rouge1_for_task035_winogrande_question_rewriting": 81.5176, + "eval_rouge1_for_task036_qasc_keyword_tagging": 70.8593, + "eval_rouge1_for_task039_qasc_overlap_extraction": 29.3333, + "eval_rouge1_for_task050_multirc_answerability_classification": 49.0, + "eval_rouge1_for_task102_commongen_data_to_text": 36.0848, + "eval_rouge1_for_task1152_bard_word_analogy": 3.0, + "eval_rouge1_for_task1153_bard_word_analogy": 0.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 8.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 5.0, + "eval_rouge1_for_task1157_bard_word_analogy": 9.0, + "eval_rouge1_for_task1158_bard_word_analogy": 4.0, + "eval_rouge1_for_task1159_bard_word_analogy": 8.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 25.0671, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.6216, + "eval_rouge1_for_task121_zest_question_rewriting": 48.402, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 10.0715, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 38.749, + "eval_rouge1_for_task1356_xlsum_title_generation": 11.458, + "eval_rouge1_for_task1358_xlsum_title_generation": 33.46, + "eval_rouge1_for_task1385_anli_textual_entailment": 29.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 37.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 29.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 49.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 20.4441, + "eval_rouge1_for_task1409_dart_data_to_text": 34.5538, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.9954, + "eval_rouge1_for_task1439_doqa_answerability_classification": 55.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 54.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 22.6047, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.5527, + "eval_rouge1_for_task1562_zest_question_rewriting": 50.1954, + "eval_rouge1_for_task1586_scifact_title_generation": 24.3806, + "eval_rouge1_for_task1598_nyc_data_to_text": 27.6537, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 77.9965, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 60.5048, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 55.0, + "eval_rouge1_for_task1659_billsum_title_generation": 28.2923, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 61.3111, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 50.3211, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 38.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 38.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 6.1667, + "eval_rouge1_for_task220_rocstories_title_generation": 58.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 46.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 51.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 60.3167, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 33.8849, + "eval_rouge1_for_task288_gigaword_title_generation": 26.4813, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 33.4333, + "eval_rouge1_for_task329_gap_coreference_resolution": 36.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 33.6, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 46.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 32.6314, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 6.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 68.418, + "eval_rouge1_for_task418_persent_title_generation": 17.8237, + "eval_rouge1_for_task442_com_qa_question_rewriting": 65.8665, + "eval_rouge1_for_task500_scruples_title_generation": 12.4322, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 31.7776, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 49.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 8.2352, + "eval_rouge1_for_task602_wikitext_title_generation": 7.396, + "eval_rouge1_for_task613_liar_keyword_tagging": 35.5, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 28.9411, + "eval_rouge1_for_task619_ohsumed_title_generation": 28.5584, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 27.616, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 59.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 29.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 52.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 86.6869, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 34.9869, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 82.2196, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 69.063, + "eval_rouge1_for_task677_ollie_data_to_text": 17.1639, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 20.5955, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 1.8438, + "eval_rouge1_for_task769_qed_title_generation": 73.4864, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 37.8333, + "eval_rouge1_for_task892_gap_coreference_resolution": 51.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 28.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 44.3432, + "eval_rouge1_for_task970_sherliic_textual_entailment": 64.0, + "eval_rouge1_for_textual_entailment": 43.3611, + "eval_rouge1_for_title_generation": 24.9498, + "eval_rouge1_for_word_analogy": 10.9583, + "eval_rougeL": 40.8808, + "eval_rougeL_for_answerability_classification": 52.9487, + "eval_rougeL_for_cause_effect_classification": 53.1276, + "eval_rougeL_for_coreference_resolution": 40.9749, + "eval_rougeL_for_data_to_text": 29.7088, + "eval_rougeL_for_dialogue_act_recognition": 40.4048, + "eval_rougeL_for_grammar_error_correction": 57.4369, + "eval_rougeL_for_keyword_tagging": 55.4896, + "eval_rougeL_for_overlap_extraction": 30.9651, + "eval_rougeL_for_question_rewriting": 65.1556, + "eval_rougeL_for_task020_mctaco_answerability_classification": 51.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 38.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 90.8277, + "eval_rougeL_for_task035_winogrande_question_rewriting": 81.1638, + "eval_rougeL_for_task036_qasc_keyword_tagging": 69.5122, + "eval_rougeL_for_task039_qasc_overlap_extraction": 29.3333, + "eval_rougeL_for_task050_multirc_answerability_classification": 49.0, + "eval_rougeL_for_task102_commongen_data_to_text": 31.2063, + "eval_rougeL_for_task1152_bard_word_analogy": 3.0, + "eval_rougeL_for_task1153_bard_word_analogy": 0.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 8.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 5.0, + "eval_rougeL_for_task1157_bard_word_analogy": 9.0, + "eval_rougeL_for_task1158_bard_word_analogy": 4.0, + "eval_rougeL_for_task1159_bard_word_analogy": 8.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 20.6238, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.6623, + "eval_rougeL_for_task121_zest_question_rewriting": 42.5444, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 9.8413, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 36.1847, + "eval_rougeL_for_task1356_xlsum_title_generation": 9.5104, + "eval_rougeL_for_task1358_xlsum_title_generation": 27.8401, + "eval_rougeL_for_task1385_anli_textual_entailment": 29.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 37.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 29.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 49.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 17.6474, + "eval_rougeL_for_task1409_dart_data_to_text": 30.7873, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.2373, + "eval_rougeL_for_task1439_doqa_answerability_classification": 55.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 54.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 21.6857, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.6365, + "eval_rougeL_for_task1562_zest_question_rewriting": 40.8837, + "eval_rougeL_for_task1586_scifact_title_generation": 19.6006, + "eval_rougeL_for_task1598_nyc_data_to_text": 22.8616, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 76.6905, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 49.6585, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 55.0, + "eval_rougeL_for_task1659_billsum_title_generation": 23.6634, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 61.3111, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 43.6581, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 38.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 38.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 6.1667, + "eval_rougeL_for_task220_rocstories_title_generation": 58.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 46.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 51.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 60.3167, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 32.5969, + "eval_rougeL_for_task288_gigaword_title_generation": 22.4251, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 33.4333, + "eval_rougeL_for_task329_gap_coreference_resolution": 36.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 33.6, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 46.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.3738, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 6.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 57.1821, + "eval_rougeL_for_task418_persent_title_generation": 15.2261, + "eval_rougeL_for_task442_com_qa_question_rewriting": 62.4944, + "eval_rougeL_for_task500_scruples_title_generation": 11.0991, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 31.4436, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 49.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 8.1826, + "eval_rougeL_for_task602_wikitext_title_generation": 7.2707, + "eval_rougeL_for_task613_liar_keyword_tagging": 35.5, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 25.8527, + "eval_rougeL_for_task619_ohsumed_title_generation": 25.2269, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 26.7489, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 59.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 29.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 52.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 86.6869, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 34.9869, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 81.717, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 68.3613, + "eval_rougeL_for_task677_ollie_data_to_text": 14.4492, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 17.3799, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 1.7846, + "eval_rougeL_for_task769_qed_title_generation": 73.4864, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 37.8333, + "eval_rougeL_for_task892_gap_coreference_resolution": 51.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 28.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 34.6624, + "eval_rougeL_for_task970_sherliic_textual_entailment": 64.0, + "eval_rougeL_for_textual_entailment": 43.3611, + "eval_rougeL_for_title_generation": 22.8424, + "eval_rougeL_for_word_analogy": 10.9583, + "eval_runtime": 381.5838, + "eval_samples_per_second": 31.212, + "eval_steps_per_second": 0.978, + "step": 6000 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 1.6892, + "step": 6500 + }, + { + "epoch": 1.49, + "eval_exact_match": 24.7523, + "eval_exact_match_for_answerability_classification": 49.0769, + "eval_exact_match_for_cause_effect_classification": 35.0, + "eval_exact_match_for_coreference_resolution": 32.6429, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 27.0, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 36.0, + "eval_exact_match_for_overlap_extraction": 10.0, + "eval_exact_match_for_question_rewriting": 0.9091, + "eval_exact_match_for_task020_mctaco_answerability_classification": 51.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 36.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 26.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 20.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 46.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 3.0, + "eval_exact_match_for_task1153_bard_word_analogy": 0.0, + "eval_exact_match_for_task1154_bard_word_analogy": 8.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 9.0, + "eval_exact_match_for_task1157_bard_word_analogy": 12.0, + "eval_exact_match_for_task1158_bard_word_analogy": 6.0, + "eval_exact_match_for_task1159_bard_word_analogy": 9.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 47.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 52.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 49.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 19.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 38.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 47.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 19.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 35.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 4.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 45.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 46.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 43.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 51.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 55.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 1.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 53.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 3.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 3.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 5.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 48.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 20.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 5.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 55.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 52.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 74.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 30.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 58.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 48.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 50.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 22.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 51.0, + "eval_exact_match_for_textual_entailment": 40.8333, + "eval_exact_match_for_title_generation": 6.5583, + "eval_exact_match_for_word_analogy": 12.125, + "eval_f1": 39.3922, + "eval_f1_for_answerability_classification": 51.3846, + "eval_f1_for_cause_effect_classification": 53.4777, + "eval_f1_for_coreference_resolution": 40.8121, + "eval_f1_for_data_to_text": 30.4862, + "eval_f1_for_dialogue_act_recognition": 27.3571, + "eval_f1_for_grammar_error_correction": 47.7292, + "eval_f1_for_keyword_tagging": 50.9165, + "eval_f1_for_overlap_extraction": 28.1921, + "eval_f1_for_question_rewriting": 66.7, + "eval_f1_for_task020_mctaco_answerability_classification": 51.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 38.0, + "eval_f1_for_task034_winogrande_question_rewriting": 88.5353, + "eval_f1_for_task035_winogrande_question_rewriting": 78.8934, + "eval_f1_for_task036_qasc_keyword_tagging": 66.2298, + "eval_f1_for_task039_qasc_overlap_extraction": 23.3333, + "eval_f1_for_task050_multirc_answerability_classification": 46.0, + "eval_f1_for_task102_commongen_data_to_text": 32.3468, + "eval_f1_for_task1152_bard_word_analogy": 3.0, + "eval_f1_for_task1153_bard_word_analogy": 0.6667, + "eval_f1_for_task1154_bard_word_analogy": 8.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 9.0, + "eval_f1_for_task1157_bard_word_analogy": 12.0, + "eval_f1_for_task1158_bard_word_analogy": 6.0, + "eval_f1_for_task1159_bard_word_analogy": 9.0, + "eval_f1_for_task1161_coda_19_title_generation": 20.8875, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.6337, + "eval_f1_for_task121_zest_question_rewriting": 45.5598, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 7.3216, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 36.4657, + "eval_f1_for_task1356_xlsum_title_generation": 10.7066, + "eval_f1_for_task1358_xlsum_title_generation": 29.0538, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 47.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 19.755, + "eval_f1_for_task1409_dart_data_to_text": 31.51, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 12.7042, + "eval_f1_for_task1439_doqa_answerability_classification": 52.0, + "eval_f1_for_task1442_doqa_answerability_classification": 49.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 49.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 19.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1540_peer_read_title_generation": 22.3132, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 82.7543, + "eval_f1_for_task1562_zest_question_rewriting": 45.4663, + "eval_f1_for_task1586_scifact_title_generation": 22.6861, + "eval_f1_for_task1598_nyc_data_to_text": 24.1222, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 38.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 77.5409, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 47.0, + "eval_f1_for_task1631_open_pi_data_to_text": 62.1718, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_f1_for_task1659_billsum_title_generation": 25.8501, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 62.7381, + "eval_f1_for_task1728_web_nlg_data_to_text": 25.6507, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 35.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 7.2611, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 45.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 46.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 43.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 62.1833, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 33.0509, + "eval_f1_for_task288_gigaword_title_generation": 23.8559, + "eval_f1_for_task290_tellmewhy_answerability_classification": 85.0, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 34.1, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 40.7667, + "eval_f1_for_task349_squad2.0_answerability_classification": 53.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 5.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 34.0184, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 4.1667, + "eval_f1_for_task402_grailqa_question_rewriting": 68.1227, + "eval_f1_for_task418_persent_title_generation": 15.6562, + "eval_f1_for_task442_com_qa_question_rewriting": 63.3682, + "eval_f1_for_task500_scruples_title_generation": 10.1398, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 31.7131, + "eval_f1_for_task520_aquamuse_answerability_classification": 48.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 9.0054, + "eval_f1_for_task602_wikitext_title_generation": 7.8384, + "eval_f1_for_task613_liar_keyword_tagging": 21.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 28.6586, + "eval_f1_for_task619_ohsumed_title_generation": 27.728, + "eval_f1_for_task620_ohsumed_keyword_tagging": 24.3866, + "eval_f1_for_task623_ohsumed_keyword_tagging": 55.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 52.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 87.6329, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 39.5035, + "eval_f1_for_task670_ambigqa_question_rewriting": 81.9764, + "eval_f1_for_task671_ambigqa_question_rewriting": 68.1377, + "eval_f1_for_task677_ollie_data_to_text": 12.7262, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 21.6195, + "eval_f1_for_task760_msr_sqa_data_to_text": 1.9141, + "eval_f1_for_task769_qed_title_generation": 74.4109, + "eval_f1_for_task827_copa_cause_effect_classification": 48.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 47.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 38.9111, + "eval_f1_for_task892_gap_coreference_resolution": 50.0, + "eval_f1_for_task893_gap_coreference_resolution": 22.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_f1_for_task957_e2e_data_to_text": 43.0354, + "eval_f1_for_task970_sherliic_textual_entailment": 51.0, + "eval_f1_for_textual_entailment": 40.8333, + "eval_f1_for_title_generation": 23.3628, + "eval_f1_for_word_analogy": 12.2083, + "eval_gen_len": 10.9793, + "eval_global_step": 6500, + "eval_loss": 1.7937037944793701, + "eval_rouge1": 41.0628, + "eval_rouge1_for_answerability_classification": 51.3846, + "eval_rouge1_for_cause_effect_classification": 53.8894, + "eval_rouge1_for_coreference_resolution": 41.1278, + "eval_rouge1_for_data_to_text": 35.3839, + "eval_rouge1_for_dialogue_act_recognition": 29.2619, + "eval_rouge1_for_grammar_error_correction": 59.0991, + "eval_rouge1_for_keyword_tagging": 55.2287, + "eval_rouge1_for_overlap_extraction": 30.5851, + "eval_rouge1_for_question_rewriting": 68.2869, + "eval_rouge1_for_task020_mctaco_answerability_classification": 51.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 40.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 88.5709, + "eval_rouge1_for_task035_winogrande_question_rewriting": 79.6852, + "eval_rouge1_for_task036_qasc_keyword_tagging": 69.3878, + "eval_rouge1_for_task039_qasc_overlap_extraction": 27.0, + "eval_rouge1_for_task050_multirc_answerability_classification": 46.0, + "eval_rouge1_for_task102_commongen_data_to_text": 40.2557, + "eval_rouge1_for_task1152_bard_word_analogy": 3.0, + "eval_rouge1_for_task1153_bard_word_analogy": 0.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 8.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 9.0, + "eval_rouge1_for_task1157_bard_word_analogy": 12.0, + "eval_rouge1_for_task1158_bard_word_analogy": 6.0, + "eval_rouge1_for_task1159_bard_word_analogy": 9.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 23.7519, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.8653, + "eval_rouge1_for_task121_zest_question_rewriting": 47.8618, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 7.6159, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 39.655, + "eval_rouge1_for_task1356_xlsum_title_generation": 12.5523, + "eval_rouge1_for_task1358_xlsum_title_generation": 33.8304, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 47.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 19.851, + "eval_rouge1_for_task1409_dart_data_to_text": 32.7851, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 32.2224, + "eval_rouge1_for_task1439_doqa_answerability_classification": 52.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 49.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 19.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 24.3141, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 85.9757, + "eval_rouge1_for_task1562_zest_question_rewriting": 48.6233, + "eval_rouge1_for_task1586_scifact_title_generation": 25.5891, + "eval_rouge1_for_task1598_nyc_data_to_text": 26.7991, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 79.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.0971, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 47.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 62.6939, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_rouge1_for_task1659_billsum_title_generation": 27.5083, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 62.7381, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 51.5854, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 35.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 10.35, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 45.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 46.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 43.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 63.0167, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 34.1701, + "eval_rouge1_for_task288_gigaword_title_generation": 26.6439, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 85.0, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 34.5, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 40.6, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 53.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 5.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 34.4818, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 5.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 69.8302, + "eval_rouge1_for_task418_persent_title_generation": 17.3098, + "eval_rouge1_for_task442_com_qa_question_rewriting": 67.1222, + "eval_rouge1_for_task500_scruples_title_generation": 11.7736, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 31.8408, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 48.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 10.288, + "eval_rouge1_for_task602_wikitext_title_generation": 8.336, + "eval_rouge1_for_task613_liar_keyword_tagging": 35.1667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 31.0774, + "eval_rouge1_for_task619_ohsumed_title_generation": 29.8669, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 28.1706, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 55.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 52.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 88.4186, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 39.6672, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 82.8862, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 68.9582, + "eval_rouge1_for_task677_ollie_data_to_text": 13.8562, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 22.2428, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 2.7977, + "eval_rouge1_for_task769_qed_title_generation": 74.3168, + "eval_rouge1_for_task827_copa_cause_effect_classification": 48.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 47.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 39.1, + "eval_rouge1_for_task892_gap_coreference_resolution": 50.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 22.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_rouge1_for_task957_e2e_data_to_text": 43.7172, + "eval_rouge1_for_task970_sherliic_textual_entailment": 51.0, + "eval_rouge1_for_textual_entailment": 42.5556, + "eval_rouge1_for_title_generation": 25.0447, + "eval_rouge1_for_word_analogy": 12.2083, + "eval_rougeL": 39.9833, + "eval_rougeL_for_answerability_classification": 51.3846, + "eval_rougeL_for_cause_effect_classification": 53.2133, + "eval_rougeL_for_coreference_resolution": 41.1278, + "eval_rougeL_for_data_to_text": 30.1584, + "eval_rougeL_for_dialogue_act_recognition": 29.2619, + "eval_rougeL_for_grammar_error_correction": 58.2079, + "eval_rougeL_for_keyword_tagging": 54.7751, + "eval_rougeL_for_overlap_extraction": 29.9169, + "eval_rougeL_for_question_rewriting": 64.9337, + "eval_rougeL_for_task020_mctaco_answerability_classification": 51.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 40.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 88.5709, + "eval_rougeL_for_task035_winogrande_question_rewriting": 79.205, + "eval_rougeL_for_task036_qasc_keyword_tagging": 68.3622, + "eval_rougeL_for_task039_qasc_overlap_extraction": 27.0, + "eval_rougeL_for_task050_multirc_answerability_classification": 46.0, + "eval_rougeL_for_task102_commongen_data_to_text": 34.0889, + "eval_rougeL_for_task1152_bard_word_analogy": 3.0, + "eval_rougeL_for_task1153_bard_word_analogy": 0.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 8.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 9.0, + "eval_rougeL_for_task1157_bard_word_analogy": 12.0, + "eval_rougeL_for_task1158_bard_word_analogy": 6.0, + "eval_rougeL_for_task1159_bard_word_analogy": 9.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 18.9943, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.906, + "eval_rougeL_for_task121_zest_question_rewriting": 42.7716, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 7.5261, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 36.9876, + "eval_rougeL_for_task1356_xlsum_title_generation": 10.1798, + "eval_rougeL_for_task1358_xlsum_title_generation": 28.551, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 47.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 17.425, + "eval_rougeL_for_task1409_dart_data_to_text": 28.6216, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 31.3562, + "eval_rougeL_for_task1439_doqa_answerability_classification": 52.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 49.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 19.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 22.982, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.0596, + "eval_rougeL_for_task1562_zest_question_rewriting": 39.6436, + "eval_rougeL_for_task1586_scifact_title_generation": 20.7106, + "eval_rougeL_for_task1598_nyc_data_to_text": 22.2072, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 79.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 76.8652, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 47.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 54.7316, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_rougeL_for_task1659_billsum_title_generation": 22.8857, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 62.7381, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 45.2908, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 35.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 10.35, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 45.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 46.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 43.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 63.0167, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 32.8338, + "eval_rougeL_for_task288_gigaword_title_generation": 22.5899, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 85.0, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 34.5, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 40.6, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 53.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 5.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 32.5647, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 5.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 57.5, + "eval_rougeL_for_task418_persent_title_generation": 14.9356, + "eval_rougeL_for_task442_com_qa_question_rewriting": 63.4148, + "eval_rougeL_for_task500_scruples_title_generation": 10.5874, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 31.2783, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 48.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 10.1213, + "eval_rougeL_for_task602_wikitext_title_generation": 8.217, + "eval_rougeL_for_task613_liar_keyword_tagging": 35.1667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 28.2617, + "eval_rougeL_for_task619_ohsumed_title_generation": 26.5212, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 26.9281, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 55.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 52.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 88.4186, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 39.6672, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 82.266, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 68.14, + "eval_rougeL_for_task677_ollie_data_to_text": 11.147, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 18.954, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 2.5973, + "eval_rougeL_for_task769_qed_title_generation": 74.3168, + "eval_rougeL_for_task827_copa_cause_effect_classification": 48.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 47.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 39.1, + "eval_rougeL_for_task892_gap_coreference_resolution": 50.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 22.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_rougeL_for_task957_e2e_data_to_text": 34.9214, + "eval_rougeL_for_task970_sherliic_textual_entailment": 51.0, + "eval_rougeL_for_textual_entailment": 42.5556, + "eval_rougeL_for_title_generation": 22.8916, + "eval_rougeL_for_word_analogy": 12.2083, + "eval_runtime": 326.5205, + "eval_samples_per_second": 36.476, + "eval_steps_per_second": 1.142, + "step": 6500 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 1.6506, + "step": 7000 + }, + { + "epoch": 1.6, + "eval_exact_match": 24.937, + "eval_exact_match_for_answerability_classification": 49.3846, + "eval_exact_match_for_cause_effect_classification": 35.1429, + "eval_exact_match_for_coreference_resolution": 33.7857, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 28.5714, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 35.2, + "eval_exact_match_for_overlap_extraction": 11.5, + "eval_exact_match_for_question_rewriting": 0.6364, + "eval_exact_match_for_task020_mctaco_answerability_classification": 52.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 37.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 24.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 23.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 53.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 3.0, + "eval_exact_match_for_task1153_bard_word_analogy": 1.0, + "eval_exact_match_for_task1154_bard_word_analogy": 9.0, + "eval_exact_match_for_task1155_bard_word_analogy": 53.0, + "eval_exact_match_for_task1156_bard_word_analogy": 7.0, + "eval_exact_match_for_task1157_bard_word_analogy": 11.0, + "eval_exact_match_for_task1158_bard_word_analogy": 5.0, + "eval_exact_match_for_task1159_bard_word_analogy": 9.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 46.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 51.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 28.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 30.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 52.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 20.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 32.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 38.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 3.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 44.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 46.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 49.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 46.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 1.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 49.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 1.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 47.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 20.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 8.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 52.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 75.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 33.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 58.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 40.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 28.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 45.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 62.0, + "eval_exact_match_for_textual_entailment": 40.5, + "eval_exact_match_for_title_generation": 6.6143, + "eval_exact_match_for_word_analogy": 12.25, + "eval_f1": 39.5353, + "eval_f1_for_answerability_classification": 51.9487, + "eval_f1_for_cause_effect_classification": 53.5856, + "eval_f1_for_coreference_resolution": 41.0106, + "eval_f1_for_data_to_text": 30.5628, + "eval_f1_for_dialogue_act_recognition": 28.5714, + "eval_f1_for_grammar_error_correction": 46.7691, + "eval_f1_for_keyword_tagging": 49.8068, + "eval_f1_for_overlap_extraction": 29.8508, + "eval_f1_for_question_rewriting": 67.559, + "eval_f1_for_task020_mctaco_answerability_classification": 52.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 39.0, + "eval_f1_for_task034_winogrande_question_rewriting": 87.4699, + "eval_f1_for_task035_winogrande_question_rewriting": 82.3999, + "eval_f1_for_task036_qasc_keyword_tagging": 64.6955, + "eval_f1_for_task039_qasc_overlap_extraction": 26.3333, + "eval_f1_for_task050_multirc_answerability_classification": 53.0, + "eval_f1_for_task102_commongen_data_to_text": 31.4439, + "eval_f1_for_task1152_bard_word_analogy": 3.0, + "eval_f1_for_task1153_bard_word_analogy": 1.0, + "eval_f1_for_task1154_bard_word_analogy": 9.0, + "eval_f1_for_task1155_bard_word_analogy": 53.0, + "eval_f1_for_task1156_bard_word_analogy": 7.0, + "eval_f1_for_task1157_bard_word_analogy": 11.0, + "eval_f1_for_task1158_bard_word_analogy": 5.0, + "eval_f1_for_task1159_bard_word_analogy": 9.0, + "eval_f1_for_task1161_coda_19_title_generation": 20.8556, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.3675, + "eval_f1_for_task121_zest_question_rewriting": 45.4615, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 8.7072, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 38.3568, + "eval_f1_for_task1356_xlsum_title_generation": 11.547, + "eval_f1_for_task1358_xlsum_title_generation": 28.823, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 46.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 20.2425, + "eval_f1_for_task1409_dart_data_to_text": 33.0124, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 10.7487, + "eval_f1_for_task1439_doqa_answerability_classification": 50.0, + "eval_f1_for_task1442_doqa_answerability_classification": 51.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1540_peer_read_title_generation": 22.8552, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 82.7895, + "eval_f1_for_task1562_zest_question_rewriting": 46.7365, + "eval_f1_for_task1586_scifact_title_generation": 22.3998, + "eval_f1_for_task1598_nyc_data_to_text": 25.8819, + "eval_f1_for_task1612_sick_textual_entailment": 28.0, + "eval_f1_for_task1615_sick_textual_entailment": 30.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 77.8481, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 61.4647, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 52.0, + "eval_f1_for_task1659_billsum_title_generation": 27.0732, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 60.6064, + "eval_f1_for_task1728_web_nlg_data_to_text": 24.3584, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 32.0, + "eval_f1_for_task201_multinli_textual_entailment": 38.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 6.8444, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 44.0, + "eval_f1_for_task232_iirc_answerability_classification": 49.0, + "eval_f1_for_task233_iirc_answerability_classification": 46.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 49.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 59.0167, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 33.3683, + "eval_f1_for_task288_gigaword_title_generation": 23.9734, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 25.7, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 42.7667, + "eval_f1_for_task349_squad2.0_answerability_classification": 49.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 2.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 34.262, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 2.1667, + "eval_f1_for_task402_grailqa_question_rewriting": 68.9024, + "eval_f1_for_task418_persent_title_generation": 15.3101, + "eval_f1_for_task442_com_qa_question_rewriting": 64.1988, + "eval_f1_for_task500_scruples_title_generation": 9.5678, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 31.1877, + "eval_f1_for_task520_aquamuse_answerability_classification": 47.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 8.4799, + "eval_f1_for_task602_wikitext_title_generation": 7.5493, + "eval_f1_for_task613_liar_keyword_tagging": 21.8333, + "eval_f1_for_task614_glucose_cause_effect_classification": 28.1704, + "eval_f1_for_task619_ohsumed_title_generation": 27.0228, + "eval_f1_for_task620_ohsumed_keyword_tagging": 24.604, + "eval_f1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 52.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 88.9012, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 40.748, + "eval_f1_for_task670_ambigqa_question_rewriting": 82.4327, + "eval_f1_for_task671_ambigqa_question_rewriting": 69.9745, + "eval_f1_for_task677_ollie_data_to_text": 12.4367, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 19.895, + "eval_f1_for_task760_msr_sqa_data_to_text": 1.8734, + "eval_f1_for_task769_qed_title_generation": 75.3154, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 44.1444, + "eval_f1_for_task892_gap_coreference_resolution": 49.0, + "eval_f1_for_task893_gap_coreference_resolution": 28.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 45.0, + "eval_f1_for_task957_e2e_data_to_text": 43.1208, + "eval_f1_for_task970_sherliic_textual_entailment": 62.0, + "eval_f1_for_textual_entailment": 40.5, + "eval_f1_for_title_generation": 23.3295, + "eval_f1_for_word_analogy": 12.25, + "eval_gen_len": 10.4505, + "eval_global_step": 7000, + "eval_loss": 1.8157719373703003, + "eval_rouge1": 41.2488, + "eval_rouge1_for_answerability_classification": 51.9487, + "eval_rouge1_for_cause_effect_classification": 53.9152, + "eval_rouge1_for_coreference_resolution": 41.2314, + "eval_rouge1_for_data_to_text": 35.639, + "eval_rouge1_for_dialogue_act_recognition": 30.4762, + "eval_rouge1_for_grammar_error_correction": 58.3754, + "eval_rouge1_for_keyword_tagging": 54.4491, + "eval_rouge1_for_overlap_extraction": 31.9639, + "eval_rouge1_for_question_rewriting": 69.1752, + "eval_rouge1_for_task020_mctaco_answerability_classification": 52.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 41.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 87.5055, + "eval_rouge1_for_task035_winogrande_question_rewriting": 83.2172, + "eval_rouge1_for_task036_qasc_keyword_tagging": 67.0523, + "eval_rouge1_for_task039_qasc_overlap_extraction": 29.3333, + "eval_rouge1_for_task050_multirc_answerability_classification": 53.0, + "eval_rouge1_for_task102_commongen_data_to_text": 39.6426, + "eval_rouge1_for_task1152_bard_word_analogy": 3.0, + "eval_rouge1_for_task1153_bard_word_analogy": 1.0, + "eval_rouge1_for_task1154_bard_word_analogy": 9.0, + "eval_rouge1_for_task1155_bard_word_analogy": 53.0, + "eval_rouge1_for_task1156_bard_word_analogy": 7.0, + "eval_rouge1_for_task1157_bard_word_analogy": 11.0, + "eval_rouge1_for_task1158_bard_word_analogy": 5.0, + "eval_rouge1_for_task1159_bard_word_analogy": 9.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 23.6502, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.6104, + "eval_rouge1_for_task121_zest_question_rewriting": 47.817, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 8.9502, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 41.8664, + "eval_rouge1_for_task1356_xlsum_title_generation": 13.1782, + "eval_rouge1_for_task1358_xlsum_title_generation": 33.5587, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 46.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 20.422, + "eval_rouge1_for_task1409_dart_data_to_text": 34.1657, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 30.7413, + "eval_rouge1_for_task1439_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 51.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 24.4852, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.0095, + "eval_rouge1_for_task1562_zest_question_rewriting": 50.4832, + "eval_rouge1_for_task1586_scifact_title_generation": 24.8602, + "eval_rouge1_for_task1598_nyc_data_to_text": 29.1905, + "eval_rouge1_for_task1612_sick_textual_entailment": 28.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 76.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.1298, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 61.9667, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 52.0, + "eval_rouge1_for_task1659_billsum_title_generation": 29.0948, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 60.6064, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 50.2926, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 32.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 38.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 8.9667, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 44.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 46.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 49.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 59.85, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 34.5944, + "eval_rouge1_for_task288_gigaword_title_generation": 26.8317, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 25.7, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 42.6, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 49.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 35.0737, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 2.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 70.8096, + "eval_rouge1_for_task418_persent_title_generation": 17.2999, + "eval_rouge1_for_task442_com_qa_question_rewriting": 67.6539, + "eval_rouge1_for_task500_scruples_title_generation": 10.9258, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 31.2713, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 47.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 10.1464, + "eval_rouge1_for_task602_wikitext_title_generation": 7.8548, + "eval_rouge1_for_task613_liar_keyword_tagging": 37.3333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 29.666, + "eval_rouge1_for_task619_ohsumed_title_generation": 28.5306, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 29.4587, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 52.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 89.4012, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 40.9835, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 83.134, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 70.6998, + "eval_rouge1_for_task677_ollie_data_to_text": 13.6954, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 20.8955, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 2.4241, + "eval_rouge1_for_task769_qed_title_generation": 75.2859, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 44.3333, + "eval_rouge1_for_task892_gap_coreference_resolution": 49.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 28.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 45.0, + "eval_rouge1_for_task957_e2e_data_to_text": 44.3725, + "eval_rouge1_for_task970_sherliic_textual_entailment": 62.0, + "eval_rouge1_for_textual_entailment": 42.4444, + "eval_rouge1_for_title_generation": 24.9175, + "eval_rouge1_for_word_analogy": 12.25, + "eval_rougeL": 40.1914, + "eval_rougeL_for_answerability_classification": 51.9487, + "eval_rougeL_for_cause_effect_classification": 53.1438, + "eval_rougeL_for_coreference_resolution": 41.2314, + "eval_rougeL_for_data_to_text": 30.5556, + "eval_rougeL_for_dialogue_act_recognition": 30.4762, + "eval_rougeL_for_grammar_error_correction": 57.6684, + "eval_rougeL_for_keyword_tagging": 54.0595, + "eval_rougeL_for_overlap_extraction": 31.2628, + "eval_rougeL_for_question_rewriting": 65.9729, + "eval_rougeL_for_task020_mctaco_answerability_classification": 52.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 41.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 87.5055, + "eval_rougeL_for_task035_winogrande_question_rewriting": 82.8533, + "eval_rougeL_for_task036_qasc_keyword_tagging": 66.0047, + "eval_rougeL_for_task039_qasc_overlap_extraction": 29.3333, + "eval_rougeL_for_task050_multirc_answerability_classification": 53.0, + "eval_rougeL_for_task102_commongen_data_to_text": 34.5976, + "eval_rougeL_for_task1152_bard_word_analogy": 3.0, + "eval_rougeL_for_task1153_bard_word_analogy": 1.0, + "eval_rougeL_for_task1154_bard_word_analogy": 9.0, + "eval_rougeL_for_task1155_bard_word_analogy": 53.0, + "eval_rougeL_for_task1156_bard_word_analogy": 7.0, + "eval_rougeL_for_task1157_bard_word_analogy": 11.0, + "eval_rougeL_for_task1158_bard_word_analogy": 5.0, + "eval_rougeL_for_task1159_bard_word_analogy": 9.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 19.0001, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.651, + "eval_rougeL_for_task121_zest_question_rewriting": 42.5325, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 8.7579, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 38.9888, + "eval_rougeL_for_task1356_xlsum_title_generation": 10.9005, + "eval_rougeL_for_task1358_xlsum_title_generation": 27.7611, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 46.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 17.8325, + "eval_rougeL_for_task1409_dart_data_to_text": 30.0061, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 30.2436, + "eval_rougeL_for_task1439_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 51.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 22.9652, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.0933, + "eval_rougeL_for_task1562_zest_question_rewriting": 42.526, + "eval_rougeL_for_task1586_scifact_title_generation": 20.2835, + "eval_rougeL_for_task1598_nyc_data_to_text": 24.3969, + "eval_rougeL_for_task1612_sick_textual_entailment": 28.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 76.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 76.8237, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 54.2187, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 52.0, + "eval_rougeL_for_task1659_billsum_title_generation": 24.0847, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 60.6064, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 44.2058, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 32.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 38.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 8.9667, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 44.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 46.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 49.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 59.85, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 33.1922, + "eval_rougeL_for_task288_gigaword_title_generation": 22.5356, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 25.7, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 42.6, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 49.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 32.9703, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 2.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 58.9291, + "eval_rougeL_for_task418_persent_title_generation": 14.9961, + "eval_rougeL_for_task442_com_qa_question_rewriting": 64.1623, + "eval_rougeL_for_task500_scruples_title_generation": 10.0412, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 30.9563, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 47.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 10.0214, + "eval_rougeL_for_task602_wikitext_title_generation": 7.8548, + "eval_rougeL_for_task613_liar_keyword_tagging": 37.3333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 26.37, + "eval_rougeL_for_task619_ohsumed_title_generation": 25.0531, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 28.5583, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 52.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 89.4012, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 40.9835, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 82.6315, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 70.098, + "eval_rougeL_for_task677_ollie_data_to_text": 11.4025, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 17.7597, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 2.3647, + "eval_rougeL_for_task769_qed_title_generation": 75.2859, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 44.3333, + "eval_rougeL_for_task892_gap_coreference_resolution": 49.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 28.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 45.0, + "eval_rougeL_for_task957_e2e_data_to_text": 35.1146, + "eval_rougeL_for_task970_sherliic_textual_entailment": 62.0, + "eval_rougeL_for_textual_entailment": 42.4444, + "eval_rougeL_for_title_generation": 22.756, + "eval_rougeL_for_word_analogy": 12.25, + "eval_runtime": 390.8078, + "eval_samples_per_second": 30.475, + "eval_steps_per_second": 0.954, + "step": 7000 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 1.655, + "step": 7500 + }, + { + "epoch": 1.71, + "eval_exact_match": 24.6348, + "eval_exact_match_for_answerability_classification": 47.7692, + "eval_exact_match_for_cause_effect_classification": 35.1429, + "eval_exact_match_for_coreference_resolution": 33.7143, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 27.2857, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 36.0, + "eval_exact_match_for_overlap_extraction": 11.0, + "eval_exact_match_for_question_rewriting": 0.8182, + "eval_exact_match_for_task020_mctaco_answerability_classification": 56.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 38.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 25.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 22.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 48.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 3.0, + "eval_exact_match_for_task1153_bard_word_analogy": 0.0, + "eval_exact_match_for_task1154_bard_word_analogy": 8.0, + "eval_exact_match_for_task1155_bard_word_analogy": 55.0, + "eval_exact_match_for_task1156_bard_word_analogy": 8.0, + "eval_exact_match_for_task1157_bard_word_analogy": 7.0, + "eval_exact_match_for_task1158_bard_word_analogy": 6.0, + "eval_exact_match_for_task1159_bard_word_analogy": 9.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 48.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 48.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 52.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 52.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 30.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 22.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 39.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 4.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 43.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 28.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 46.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 47.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 52.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 32.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 30.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 48.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 3.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 4.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 46.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 18.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 6.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 59.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 28.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 52.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 72.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 27.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 59.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 47.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 51.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 43.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 34.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 52.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 40.1667, + "eval_exact_match_for_title_generation": 6.6143, + "eval_exact_match_for_word_analogy": 12.0, + "eval_f1": 39.1606, + "eval_f1_for_answerability_classification": 50.3333, + "eval_f1_for_cause_effect_classification": 53.6816, + "eval_f1_for_coreference_resolution": 40.0569, + "eval_f1_for_data_to_text": 30.2738, + "eval_f1_for_dialogue_act_recognition": 27.5, + "eval_f1_for_grammar_error_correction": 49.8227, + "eval_f1_for_keyword_tagging": 50.8317, + "eval_f1_for_overlap_extraction": 29.9304, + "eval_f1_for_question_rewriting": 67.0756, + "eval_f1_for_task020_mctaco_answerability_classification": 56.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 40.0, + "eval_f1_for_task034_winogrande_question_rewriting": 90.4009, + "eval_f1_for_task035_winogrande_question_rewriting": 81.4474, + "eval_f1_for_task036_qasc_keyword_tagging": 65.2025, + "eval_f1_for_task039_qasc_overlap_extraction": 26.5, + "eval_f1_for_task050_multirc_answerability_classification": 48.0, + "eval_f1_for_task102_commongen_data_to_text": 34.0637, + "eval_f1_for_task1152_bard_word_analogy": 3.0, + "eval_f1_for_task1153_bard_word_analogy": 0.6667, + "eval_f1_for_task1154_bard_word_analogy": 8.0, + "eval_f1_for_task1155_bard_word_analogy": 55.0, + "eval_f1_for_task1156_bard_word_analogy": 8.0, + "eval_f1_for_task1157_bard_word_analogy": 7.0, + "eval_f1_for_task1158_bard_word_analogy": 6.0, + "eval_f1_for_task1159_bard_word_analogy": 9.0, + "eval_f1_for_task1161_coda_19_title_generation": 21.8866, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.3379, + "eval_f1_for_task121_zest_question_rewriting": 45.981, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 7.5241, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 35.111, + "eval_f1_for_task1356_xlsum_title_generation": 10.5874, + "eval_f1_for_task1358_xlsum_title_generation": 29.2567, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 48.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 48.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 20.2331, + "eval_f1_for_task1409_dart_data_to_text": 31.3012, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 16.4774, + "eval_f1_for_task1439_doqa_answerability_classification": 52.0, + "eval_f1_for_task1442_doqa_answerability_classification": 52.0, + "eval_f1_for_task1516_imppres_textual_entailment": 30.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_f1_for_task1540_peer_read_title_generation": 22.9905, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.168, + "eval_f1_for_task1562_zest_question_rewriting": 45.2515, + "eval_f1_for_task1586_scifact_title_generation": 23.271, + "eval_f1_for_task1598_nyc_data_to_text": 25.1059, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 77.9647, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_f1_for_task1631_open_pi_data_to_text": 62.0248, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_f1_for_task1659_billsum_title_generation": 24.9408, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 60.8571, + "eval_f1_for_task1728_web_nlg_data_to_text": 25.0321, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 39.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 7.7914, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 43.0, + "eval_f1_for_task232_iirc_answerability_classification": 28.0, + "eval_f1_for_task233_iirc_answerability_classification": 46.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 47.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 63.6167, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 33.3608, + "eval_f1_for_task288_gigaword_title_generation": 24.1738, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 11.3, + "eval_f1_for_task329_gap_coreference_resolution": 32.0, + "eval_f1_for_task330_gap_coreference_resolution": 38.2667, + "eval_f1_for_task349_squad2.0_answerability_classification": 48.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 4.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 35.4533, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 5.1667, + "eval_f1_for_task402_grailqa_question_rewriting": 66.1777, + "eval_f1_for_task418_persent_title_generation": 16.8733, + "eval_f1_for_task442_com_qa_question_rewriting": 63.7521, + "eval_f1_for_task500_scruples_title_generation": 9.8649, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 30.4363, + "eval_f1_for_task520_aquamuse_answerability_classification": 46.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 7.4286, + "eval_f1_for_task602_wikitext_title_generation": 7.6496, + "eval_f1_for_task613_liar_keyword_tagging": 21.1667, + "eval_f1_for_task614_glucose_cause_effect_classification": 27.6515, + "eval_f1_for_task619_ohsumed_title_generation": 29.1008, + "eval_f1_for_task620_ohsumed_keyword_tagging": 22.0213, + "eval_f1_for_task623_ohsumed_keyword_tagging": 59.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 28.0, + "eval_f1_for_task642_e_snli_textual_entailment": 52.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 86.7678, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 36.3788, + "eval_f1_for_task670_ambigqa_question_rewriting": 82.4327, + "eval_f1_for_task671_ambigqa_question_rewriting": 69.9745, + "eval_f1_for_task677_ollie_data_to_text": 11.1702, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 19.7774, + "eval_f1_for_task760_msr_sqa_data_to_text": 1.4265, + "eval_f1_for_task769_qed_title_generation": 76.0776, + "eval_f1_for_task827_copa_cause_effect_classification": 47.0, + "eval_f1_for_task828_copa_cause_effect_classification": 51.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 43.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 39.5444, + "eval_f1_for_task892_gap_coreference_resolution": 52.0, + "eval_f1_for_task893_gap_coreference_resolution": 33.6667, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_f1_for_task957_e2e_data_to_text": 40.7599, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 40.1667, + "eval_f1_for_title_generation": 23.4533, + "eval_f1_for_word_analogy": 12.0833, + "eval_gen_len": 10.8258, + "eval_global_step": 7500, + "eval_loss": 1.7726598978042603, + "eval_rouge1": 40.832, + "eval_rouge1_for_answerability_classification": 50.3333, + "eval_rouge1_for_cause_effect_classification": 54.0298, + "eval_rouge1_for_coreference_resolution": 40.393, + "eval_rouge1_for_data_to_text": 34.9405, + "eval_rouge1_for_dialogue_act_recognition": 29.4048, + "eval_rouge1_for_grammar_error_correction": 62.8747, + "eval_rouge1_for_keyword_tagging": 55.1009, + "eval_rouge1_for_overlap_extraction": 31.9881, + "eval_rouge1_for_question_rewriting": 68.5334, + "eval_rouge1_for_task020_mctaco_answerability_classification": 56.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 42.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 90.4366, + "eval_rouge1_for_task035_winogrande_question_rewriting": 82.216, + "eval_rouge1_for_task036_qasc_keyword_tagging": 67.2462, + "eval_rouge1_for_task039_qasc_overlap_extraction": 29.5, + "eval_rouge1_for_task050_multirc_answerability_classification": 48.0, + "eval_rouge1_for_task102_commongen_data_to_text": 43.3346, + "eval_rouge1_for_task1152_bard_word_analogy": 3.0, + "eval_rouge1_for_task1153_bard_word_analogy": 0.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 8.0, + "eval_rouge1_for_task1155_bard_word_analogy": 55.0, + "eval_rouge1_for_task1156_bard_word_analogy": 8.0, + "eval_rouge1_for_task1157_bard_word_analogy": 7.0, + "eval_rouge1_for_task1158_bard_word_analogy": 6.0, + "eval_rouge1_for_task1159_bard_word_analogy": 9.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 24.0637, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.5695, + "eval_rouge1_for_task121_zest_question_rewriting": 47.9953, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 7.5729, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 37.8727, + "eval_rouge1_for_task1356_xlsum_title_generation": 12.0383, + "eval_rouge1_for_task1358_xlsum_title_generation": 33.8873, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 48.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 48.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 20.3816, + "eval_rouge1_for_task1409_dart_data_to_text": 32.4954, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 39.1647, + "eval_rouge1_for_task1439_doqa_answerability_classification": 52.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 52.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 30.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 24.6397, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.5846, + "eval_rouge1_for_task1562_zest_question_rewriting": 48.4126, + "eval_rouge1_for_task1586_scifact_title_generation": 26.253, + "eval_rouge1_for_task1598_nyc_data_to_text": 27.8342, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.2709, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 62.5932, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_rouge1_for_task1659_billsum_title_generation": 26.7043, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 60.8571, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 49.0162, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 39.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 10.3303, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 43.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 28.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 46.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 47.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 64.45, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 34.4761, + "eval_rouge1_for_task288_gigaword_title_generation": 27.0064, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 11.3, + "eval_rouge1_for_task329_gap_coreference_resolution": 32.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 38.1, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 48.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 4.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 35.4977, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 7.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 67.7558, + "eval_rouge1_for_task418_persent_title_generation": 18.9167, + "eval_rouge1_for_task442_com_qa_question_rewriting": 67.3064, + "eval_rouge1_for_task500_scruples_title_generation": 11.8078, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 30.8706, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 46.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 8.2068, + "eval_rouge1_for_task602_wikitext_title_generation": 8.0454, + "eval_rouge1_for_task613_liar_keyword_tagging": 35.8333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 30.044, + "eval_rouge1_for_task619_ohsumed_title_generation": 30.8123, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 25.8713, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 59.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 28.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 52.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 87.5535, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 36.5621, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 83.2517, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 70.7798, + "eval_rouge1_for_task677_ollie_data_to_text": 11.8187, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 20.3172, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 2.1477, + "eval_rouge1_for_task769_qed_title_generation": 75.9835, + "eval_rouge1_for_task827_copa_cause_effect_classification": 47.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 51.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 43.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 39.4, + "eval_rouge1_for_task892_gap_coreference_resolution": 52.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 33.6667, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_rouge1_for_task957_e2e_data_to_text": 40.576, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 42.0278, + "eval_rouge1_for_title_generation": 25.0095, + "eval_rouge1_for_word_analogy": 12.0833, + "eval_rougeL": 39.8025, + "eval_rougeL_for_answerability_classification": 50.3333, + "eval_rougeL_for_cause_effect_classification": 53.3935, + "eval_rougeL_for_coreference_resolution": 40.393, + "eval_rougeL_for_data_to_text": 29.7272, + "eval_rougeL_for_dialogue_act_recognition": 29.4048, + "eval_rougeL_for_grammar_error_correction": 61.8353, + "eval_rougeL_for_keyword_tagging": 54.79, + "eval_rougeL_for_overlap_extraction": 31.3492, + "eval_rougeL_for_question_rewriting": 65.5385, + "eval_rougeL_for_task020_mctaco_answerability_classification": 56.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 42.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 90.4366, + "eval_rougeL_for_task035_winogrande_question_rewriting": 81.7022, + "eval_rougeL_for_task036_qasc_keyword_tagging": 66.0777, + "eval_rougeL_for_task039_qasc_overlap_extraction": 29.5, + "eval_rougeL_for_task050_multirc_answerability_classification": 48.0, + "eval_rougeL_for_task102_commongen_data_to_text": 36.3179, + "eval_rougeL_for_task1152_bard_word_analogy": 3.0, + "eval_rougeL_for_task1153_bard_word_analogy": 0.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 8.0, + "eval_rougeL_for_task1155_bard_word_analogy": 55.0, + "eval_rougeL_for_task1156_bard_word_analogy": 8.0, + "eval_rougeL_for_task1157_bard_word_analogy": 7.0, + "eval_rougeL_for_task1158_bard_word_analogy": 6.0, + "eval_rougeL_for_task1159_bard_word_analogy": 9.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 19.7614, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.6673, + "eval_rougeL_for_task121_zest_question_rewriting": 43.7183, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 7.5188, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 35.3422, + "eval_rougeL_for_task1356_xlsum_title_generation": 10.1349, + "eval_rougeL_for_task1358_xlsum_title_generation": 27.8229, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 48.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 48.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 18.0844, + "eval_rougeL_for_task1409_dart_data_to_text": 28.6299, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 38.003, + "eval_rougeL_for_task1439_doqa_answerability_classification": 52.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 52.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 30.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 23.6725, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.6676, + "eval_rougeL_for_task1562_zest_question_rewriting": 41.2275, + "eval_rougeL_for_task1586_scifact_title_generation": 21.0896, + "eval_rougeL_for_task1598_nyc_data_to_text": 22.9942, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 76.9649, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 53.1761, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_rougeL_for_task1659_billsum_title_generation": 21.8495, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 60.8571, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 42.2664, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 39.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 10.3303, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 43.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 28.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 46.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 47.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 64.45, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 33.1985, + "eval_rougeL_for_task288_gigaword_title_generation": 22.6687, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 11.3, + "eval_rougeL_for_task329_gap_coreference_resolution": 32.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 38.1, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 48.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 4.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 34.0557, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 7.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 56.0822, + "eval_rougeL_for_task418_persent_title_generation": 16.5081, + "eval_rougeL_for_task442_com_qa_question_rewriting": 63.8551, + "eval_rougeL_for_task500_scruples_title_generation": 11.1311, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 30.4712, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 46.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 8.0818, + "eval_rougeL_for_task602_wikitext_title_generation": 7.9263, + "eval_rougeL_for_task613_liar_keyword_tagging": 35.8333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 27.0319, + "eval_rougeL_for_task619_ohsumed_title_generation": 27.9668, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 25.4856, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 59.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 28.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 52.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 87.5535, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 36.5621, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 82.7491, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 70.178, + "eval_rougeL_for_task677_ollie_data_to_text": 10.095, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 17.2776, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 2.0693, + "eval_rougeL_for_task769_qed_title_generation": 75.9835, + "eval_rougeL_for_task827_copa_cause_effect_classification": 47.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 51.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 43.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 39.4, + "eval_rougeL_for_task892_gap_coreference_resolution": 52.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 33.6667, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_rougeL_for_task957_e2e_data_to_text": 33.4445, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 42.0278, + "eval_rougeL_for_title_generation": 22.9219, + "eval_rougeL_for_word_analogy": 12.0833, + "eval_runtime": 362.8196, + "eval_samples_per_second": 32.826, + "eval_steps_per_second": 1.028, + "step": 7500 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 1.6793, + "step": 8000 + }, + { + "epoch": 1.83, + "eval_exact_match": 24.9202, + "eval_exact_match_for_answerability_classification": 49.6923, + "eval_exact_match_for_cause_effect_classification": 35.1429, + "eval_exact_match_for_coreference_resolution": 33.4286, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 28.5714, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 33.2, + "eval_exact_match_for_overlap_extraction": 11.5, + "eval_exact_match_for_question_rewriting": 1.0909, + "eval_exact_match_for_task020_mctaco_answerability_classification": 54.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 38.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 17.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 23.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 56.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 2.0, + "eval_exact_match_for_task1153_bard_word_analogy": 0.0, + "eval_exact_match_for_task1154_bard_word_analogy": 8.0, + "eval_exact_match_for_task1155_bard_word_analogy": 54.0, + "eval_exact_match_for_task1156_bard_word_analogy": 8.0, + "eval_exact_match_for_task1157_bard_word_analogy": 12.0, + "eval_exact_match_for_task1158_bard_word_analogy": 6.0, + "eval_exact_match_for_task1159_bard_word_analogy": 12.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 5.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 48.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 32.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 38.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 18.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 36.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 4.0, + "eval_exact_match_for_task220_rocstories_title_generation": 59.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 44.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 48.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 42.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 34.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 52.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 3.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 6.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 2.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 45.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 19.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 5.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 23.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 76.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 26.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 56.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 48.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 36.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 34.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 52.0, + "eval_exact_match_for_textual_entailment": 40.1667, + "eval_exact_match_for_title_generation": 7.0628, + "eval_exact_match_for_word_analogy": 12.75, + "eval_f1": 39.3594, + "eval_f1_for_answerability_classification": 52.2564, + "eval_f1_for_cause_effect_classification": 53.9777, + "eval_f1_for_coreference_resolution": 40.1369, + "eval_f1_for_data_to_text": 31.3273, + "eval_f1_for_dialogue_act_recognition": 28.8571, + "eval_f1_for_grammar_error_correction": 52.2596, + "eval_f1_for_keyword_tagging": 48.3289, + "eval_f1_for_overlap_extraction": 29.9711, + "eval_f1_for_question_rewriting": 65.9968, + "eval_f1_for_task020_mctaco_answerability_classification": 54.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 40.0, + "eval_f1_for_task034_winogrande_question_rewriting": 88.8502, + "eval_f1_for_task035_winogrande_question_rewriting": 82.0034, + "eval_f1_for_task036_qasc_keyword_tagging": 62.7048, + "eval_f1_for_task039_qasc_overlap_extraction": 26.8333, + "eval_f1_for_task050_multirc_answerability_classification": 56.0, + "eval_f1_for_task102_commongen_data_to_text": 32.598, + "eval_f1_for_task1152_bard_word_analogy": 2.0, + "eval_f1_for_task1153_bard_word_analogy": 0.6667, + "eval_f1_for_task1154_bard_word_analogy": 8.0, + "eval_f1_for_task1155_bard_word_analogy": 54.0, + "eval_f1_for_task1156_bard_word_analogy": 8.0, + "eval_f1_for_task1157_bard_word_analogy": 12.0, + "eval_f1_for_task1158_bard_word_analogy": 6.0, + "eval_f1_for_task1159_bard_word_analogy": 12.0, + "eval_f1_for_task1161_coda_19_title_generation": 18.26, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.1003, + "eval_f1_for_task121_zest_question_rewriting": 45.7479, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 6.5439, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 28.9013, + "eval_f1_for_task1356_xlsum_title_generation": 11.0259, + "eval_f1_for_task1358_xlsum_title_generation": 27.2081, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 48.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 20.2563, + "eval_f1_for_task1409_dart_data_to_text": 34.603, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 20.8845, + "eval_f1_for_task1439_doqa_answerability_classification": 50.0, + "eval_f1_for_task1442_doqa_answerability_classification": 49.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_f1_for_task1540_peer_read_title_generation": 21.7274, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.6348, + "eval_f1_for_task1562_zest_question_rewriting": 44.0986, + "eval_f1_for_task1586_scifact_title_generation": 22.6999, + "eval_f1_for_task1598_nyc_data_to_text": 24.9601, + "eval_f1_for_task1612_sick_textual_entailment": 32.0, + "eval_f1_for_task1615_sick_textual_entailment": 38.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.3138, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 63.9224, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 26.2107, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 60.2762, + "eval_f1_for_task1728_web_nlg_data_to_text": 25.6188, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 36.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 8.2444, + "eval_f1_for_task220_rocstories_title_generation": 59.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 44.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 48.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 58.15, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 33.109, + "eval_f1_for_task288_gigaword_title_generation": 23.7715, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 8.7, + "eval_f1_for_task329_gap_coreference_resolution": 34.0, + "eval_f1_for_task330_gap_coreference_resolution": 42.7667, + "eval_f1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 5.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 35.9211, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 7.1667, + "eval_f1_for_task402_grailqa_question_rewriting": 63.8027, + "eval_f1_for_task418_persent_title_generation": 15.5, + "eval_f1_for_task442_com_qa_question_rewriting": 64.4925, + "eval_f1_for_task500_scruples_title_generation": 8.957, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 31.6874, + "eval_f1_for_task520_aquamuse_answerability_classification": 45.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 7.0827, + "eval_f1_for_task602_wikitext_title_generation": 7.725, + "eval_f1_for_task613_liar_keyword_tagging": 21.5, + "eval_f1_for_task614_glucose_cause_effect_classification": 29.2562, + "eval_f1_for_task619_ohsumed_title_generation": 26.2765, + "eval_f1_for_task620_ohsumed_keyword_tagging": 22.2062, + "eval_f1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 23.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 86.2333, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 34.1455, + "eval_f1_for_task670_ambigqa_question_rewriting": 81.4134, + "eval_f1_for_task671_ambigqa_question_rewriting": 68.2408, + "eval_f1_for_task677_ollie_data_to_text": 13.4181, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 21.8203, + "eval_f1_for_task760_msr_sqa_data_to_text": 1.4895, + "eval_f1_for_task769_qed_title_generation": 66.6082, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 48.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 41.8778, + "eval_f1_for_task892_gap_coreference_resolution": 51.0, + "eval_f1_for_task893_gap_coreference_resolution": 35.8333, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_f1_for_task957_e2e_data_to_text": 42.9993, + "eval_f1_for_task970_sherliic_textual_entailment": 52.0, + "eval_f1_for_textual_entailment": 40.1667, + "eval_f1_for_title_generation": 22.9323, + "eval_f1_for_word_analogy": 12.8333, + "eval_gen_len": 10.5809, + "eval_global_step": 8000, + "eval_loss": 1.7734715938568115, + "eval_rouge1": 41.0196, + "eval_rouge1_for_answerability_classification": 52.2564, + "eval_rouge1_for_cause_effect_classification": 54.3078, + "eval_rouge1_for_coreference_resolution": 40.4742, + "eval_rouge1_for_data_to_text": 35.9358, + "eval_rouge1_for_dialogue_act_recognition": 30.7619, + "eval_rouge1_for_grammar_error_correction": 64.5094, + "eval_rouge1_for_keyword_tagging": 52.5668, + "eval_rouge1_for_overlap_extraction": 32.8244, + "eval_rouge1_for_question_rewriting": 67.5071, + "eval_rouge1_for_task020_mctaco_answerability_classification": 54.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 42.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 88.8859, + "eval_rouge1_for_task035_winogrande_question_rewriting": 82.7627, + "eval_rouge1_for_task036_qasc_keyword_tagging": 64.6944, + "eval_rouge1_for_task039_qasc_overlap_extraction": 31.5, + "eval_rouge1_for_task050_multirc_answerability_classification": 56.0, + "eval_rouge1_for_task102_commongen_data_to_text": 41.8789, + "eval_rouge1_for_task1152_bard_word_analogy": 2.0, + "eval_rouge1_for_task1153_bard_word_analogy": 0.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 8.0, + "eval_rouge1_for_task1155_bard_word_analogy": 54.0, + "eval_rouge1_for_task1156_bard_word_analogy": 8.0, + "eval_rouge1_for_task1157_bard_word_analogy": 12.0, + "eval_rouge1_for_task1158_bard_word_analogy": 6.0, + "eval_rouge1_for_task1159_bard_word_analogy": 12.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 20.6645, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.2764, + "eval_rouge1_for_task121_zest_question_rewriting": 47.6173, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 6.6128, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 31.4135, + "eval_rouge1_for_task1356_xlsum_title_generation": 12.8083, + "eval_rouge1_for_task1358_xlsum_title_generation": 32.5697, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 48.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 20.1886, + "eval_rouge1_for_task1409_dart_data_to_text": 35.3188, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 42.0619, + "eval_rouge1_for_task1439_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 24.1568, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.9569, + "eval_rouge1_for_task1562_zest_question_rewriting": 47.4849, + "eval_rouge1_for_task1586_scifact_title_generation": 26.0153, + "eval_rouge1_for_task1598_nyc_data_to_text": 26.9226, + "eval_rouge1_for_task1612_sick_textual_entailment": 32.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 79.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.6227, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 64.8006, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 28.4828, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 60.2762, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 49.2434, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 36.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 10.3667, + "eval_rouge1_for_task220_rocstories_title_generation": 59.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 44.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 48.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 58.9833, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 34.1487, + "eval_rouge1_for_task288_gigaword_title_generation": 26.5962, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 9.2, + "eval_rouge1_for_task329_gap_coreference_resolution": 34.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 42.7667, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 5.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 36.2216, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 8.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 65.7905, + "eval_rouge1_for_task418_persent_title_generation": 17.4763, + "eval_rouge1_for_task442_com_qa_question_rewriting": 68.3195, + "eval_rouge1_for_task500_scruples_title_generation": 10.3433, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 31.5764, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 45.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 8.0909, + "eval_rouge1_for_task602_wikitext_title_generation": 8.4068, + "eval_rouge1_for_task613_liar_keyword_tagging": 36.1667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 31.2664, + "eval_rouge1_for_task619_ohsumed_title_generation": 28.2176, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 25.9538, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 23.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 87.019, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 34.3455, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 82.439, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 68.9661, + "eval_rouge1_for_task677_ollie_data_to_text": 14.7, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 22.4389, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 1.9829, + "eval_rouge1_for_task769_qed_title_generation": 66.5923, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 48.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 42.0667, + "eval_rouge1_for_task892_gap_coreference_resolution": 51.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 35.8333, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_rouge1_for_task957_e2e_data_to_text": 43.2616, + "eval_rouge1_for_task970_sherliic_textual_entailment": 52.0, + "eval_rouge1_for_textual_entailment": 41.8889, + "eval_rouge1_for_title_generation": 24.6116, + "eval_rouge1_for_word_analogy": 12.8333, + "eval_rougeL": 39.9727, + "eval_rougeL_for_answerability_classification": 52.2564, + "eval_rougeL_for_cause_effect_classification": 53.5919, + "eval_rougeL_for_coreference_resolution": 40.4742, + "eval_rougeL_for_data_to_text": 30.4245, + "eval_rougeL_for_dialogue_act_recognition": 30.7619, + "eval_rougeL_for_grammar_error_correction": 63.6056, + "eval_rougeL_for_keyword_tagging": 52.1104, + "eval_rougeL_for_overlap_extraction": 32.1813, + "eval_rougeL_for_question_rewriting": 64.4021, + "eval_rougeL_for_task020_mctaco_answerability_classification": 54.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 42.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 88.8859, + "eval_rougeL_for_task035_winogrande_question_rewriting": 82.2568, + "eval_rougeL_for_task036_qasc_keyword_tagging": 62.7981, + "eval_rougeL_for_task039_qasc_overlap_extraction": 31.5, + "eval_rougeL_for_task050_multirc_answerability_classification": 56.0, + "eval_rougeL_for_task102_commongen_data_to_text": 35.1378, + "eval_rougeL_for_task1152_bard_word_analogy": 2.0, + "eval_rougeL_for_task1153_bard_word_analogy": 0.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 8.0, + "eval_rougeL_for_task1155_bard_word_analogy": 54.0, + "eval_rougeL_for_task1156_bard_word_analogy": 8.0, + "eval_rougeL_for_task1157_bard_word_analogy": 12.0, + "eval_rougeL_for_task1158_bard_word_analogy": 6.0, + "eval_rougeL_for_task1159_bard_word_analogy": 12.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 17.3264, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.2504, + "eval_rougeL_for_task121_zest_question_rewriting": 42.4417, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 6.5587, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 29.0803, + "eval_rougeL_for_task1356_xlsum_title_generation": 10.9964, + "eval_rougeL_for_task1358_xlsum_title_generation": 26.8475, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 48.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 17.7069, + "eval_rougeL_for_task1409_dart_data_to_text": 31.2699, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 41.1568, + "eval_rougeL_for_task1439_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 22.9713, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.0544, + "eval_rougeL_for_task1562_zest_question_rewriting": 39.8202, + "eval_rougeL_for_task1586_scifact_title_generation": 21.1481, + "eval_rougeL_for_task1598_nyc_data_to_text": 22.7002, + "eval_rougeL_for_task1612_sick_textual_entailment": 32.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 79.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.3167, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 54.0581, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 23.2712, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 60.2762, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 42.2957, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 36.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 10.3667, + "eval_rougeL_for_task220_rocstories_title_generation": 59.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 44.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 48.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 58.9833, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 32.8626, + "eval_rougeL_for_task288_gigaword_title_generation": 22.4217, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 9.2, + "eval_rougeL_for_task329_gap_coreference_resolution": 34.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 42.7667, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 5.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 34.4008, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 8.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 54.6603, + "eval_rougeL_for_task418_persent_title_generation": 15.2643, + "eval_rougeL_for_task442_com_qa_question_rewriting": 64.8954, + "eval_rougeL_for_task500_scruples_title_generation": 9.7048, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 31.3101, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 45.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 7.9242, + "eval_rougeL_for_task602_wikitext_title_generation": 8.2877, + "eval_rougeL_for_task613_liar_keyword_tagging": 36.1667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 28.0762, + "eval_rougeL_for_task619_ohsumed_title_generation": 26.3097, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 25.5681, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 23.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 87.019, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 34.3455, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 81.8031, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 68.0124, + "eval_rougeL_for_task677_ollie_data_to_text": 12.4833, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 19.4748, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 1.874, + "eval_rougeL_for_task769_qed_title_generation": 66.5923, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 48.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 42.0667, + "eval_rougeL_for_task892_gap_coreference_resolution": 51.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 35.8333, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_rougeL_for_task957_e2e_data_to_text": 35.1672, + "eval_rougeL_for_task970_sherliic_textual_entailment": 52.0, + "eval_rougeL_for_textual_entailment": 41.8889, + "eval_rougeL_for_title_generation": 22.671, + "eval_rougeL_for_word_analogy": 12.8333, + "eval_runtime": 365.6642, + "eval_samples_per_second": 32.571, + "eval_steps_per_second": 1.02, + "step": 8000 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 1.6651, + "step": 8500 + }, + { + "epoch": 1.94, + "eval_exact_match": 24.8615, + "eval_exact_match_for_answerability_classification": 50.0769, + "eval_exact_match_for_cause_effect_classification": 35.4286, + "eval_exact_match_for_coreference_resolution": 33.2143, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 27.1429, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 33.6, + "eval_exact_match_for_overlap_extraction": 12.0, + "eval_exact_match_for_question_rewriting": 0.9091, + "eval_exact_match_for_task020_mctaco_answerability_classification": 57.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 38.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 22.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 24.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 46.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 3.0, + "eval_exact_match_for_task1153_bard_word_analogy": 0.0, + "eval_exact_match_for_task1154_bard_word_analogy": 10.0, + "eval_exact_match_for_task1155_bard_word_analogy": 51.0, + "eval_exact_match_for_task1156_bard_word_analogy": 8.0, + "eval_exact_match_for_task1157_bard_word_analogy": 12.0, + "eval_exact_match_for_task1158_bard_word_analogy": 6.0, + "eval_exact_match_for_task1159_bard_word_analogy": 10.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 31.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 45.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 49.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 23.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 32.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 58.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 18.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 4.0, + "eval_exact_match_for_task220_rocstories_title_generation": 62.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 42.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 44.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 48.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 47.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 32.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 54.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 5.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 0.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 47.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 19.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 4.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 56.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 24.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 54.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 67.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 27.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 56.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 49.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 40.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 32.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 40.0833, + "eval_exact_match_for_title_generation": 7.1749, + "eval_exact_match_for_word_analogy": 12.5, + "eval_f1": 39.3849, + "eval_f1_for_answerability_classification": 52.641, + "eval_f1_for_cause_effect_classification": 53.8322, + "eval_f1_for_coreference_resolution": 40.4187, + "eval_f1_for_data_to_text": 30.8737, + "eval_f1_for_dialogue_act_recognition": 27.1429, + "eval_f1_for_grammar_error_correction": 48.4437, + "eval_f1_for_keyword_tagging": 48.1537, + "eval_f1_for_overlap_extraction": 31.1047, + "eval_f1_for_question_rewriting": 66.9087, + "eval_f1_for_task020_mctaco_answerability_classification": 57.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 40.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 92.9848, + "eval_f1_for_task035_winogrande_question_rewriting": 83.3536, + "eval_f1_for_task036_qasc_keyword_tagging": 63.0314, + "eval_f1_for_task039_qasc_overlap_extraction": 28.3333, + "eval_f1_for_task050_multirc_answerability_classification": 46.0, + "eval_f1_for_task102_commongen_data_to_text": 32.256, + "eval_f1_for_task1152_bard_word_analogy": 3.0, + "eval_f1_for_task1153_bard_word_analogy": 0.6667, + "eval_f1_for_task1154_bard_word_analogy": 10.0, + "eval_f1_for_task1155_bard_word_analogy": 51.0, + "eval_f1_for_task1156_bard_word_analogy": 8.0, + "eval_f1_for_task1157_bard_word_analogy": 12.0, + "eval_f1_for_task1158_bard_word_analogy": 6.0, + "eval_f1_for_task1159_bard_word_analogy": 10.0, + "eval_f1_for_task1161_coda_19_title_generation": 22.1531, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.0285, + "eval_f1_for_task121_zest_question_rewriting": 45.5841, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 8.6328, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 34.972, + "eval_f1_for_task1356_xlsum_title_generation": 10.7074, + "eval_f1_for_task1358_xlsum_title_generation": 28.2336, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 31.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 45.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 49.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 19.7255, + "eval_f1_for_task1409_dart_data_to_text": 31.7326, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 13.2526, + "eval_f1_for_task1439_doqa_answerability_classification": 53.0, + "eval_f1_for_task1442_doqa_answerability_classification": 53.0, + "eval_f1_for_task1516_imppres_textual_entailment": 23.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_f1_for_task1540_peer_read_title_generation": 23.336, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.6348, + "eval_f1_for_task1562_zest_question_rewriting": 42.9461, + "eval_f1_for_task1586_scifact_title_generation": 23.298, + "eval_f1_for_task1598_nyc_data_to_text": 25.8435, + "eval_f1_for_task1612_sick_textual_entailment": 32.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.7962, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_f1_for_task1631_open_pi_data_to_text": 62.7655, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 58.0, + "eval_f1_for_task1659_billsum_title_generation": 26.5417, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 60.9048, + "eval_f1_for_task1728_web_nlg_data_to_text": 26.724, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 8.3444, + "eval_f1_for_task220_rocstories_title_generation": 62.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 42.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 44.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 48.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 61.8833, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 33.876, + "eval_f1_for_task288_gigaword_title_generation": 23.4354, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 21.1, + "eval_f1_for_task329_gap_coreference_resolution": 32.0, + "eval_f1_for_task330_gap_coreference_resolution": 40.1, + "eval_f1_for_task349_squad2.0_answerability_classification": 54.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 0.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 35.6671, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 5.5, + "eval_f1_for_task402_grailqa_question_rewriting": 63.8145, + "eval_f1_for_task418_persent_title_generation": 15.2066, + "eval_f1_for_task442_com_qa_question_rewriting": 63.382, + "eval_f1_for_task500_scruples_title_generation": 10.356, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 28.9878, + "eval_f1_for_task520_aquamuse_answerability_classification": 47.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 7.413, + "eval_f1_for_task602_wikitext_title_generation": 7.608, + "eval_f1_for_task613_liar_keyword_tagging": 21.5, + "eval_f1_for_task614_glucose_cause_effect_classification": 26.4916, + "eval_f1_for_task619_ohsumed_title_generation": 27.4919, + "eval_f1_for_task620_ohsumed_keyword_tagging": 18.0354, + "eval_f1_for_task623_ohsumed_keyword_tagging": 56.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 24.0, + "eval_f1_for_task642_e_snli_textual_entailment": 54.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 82.2016, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 36.0621, + "eval_f1_for_task670_ambigqa_question_rewriting": 81.4898, + "eval_f1_for_task671_ambigqa_question_rewriting": 68.6441, + "eval_f1_for_task677_ollie_data_to_text": 14.1568, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 18.6179, + "eval_f1_for_task760_msr_sqa_data_to_text": 2.4111, + "eval_f1_for_task769_qed_title_generation": 69.0349, + "eval_f1_for_task827_copa_cause_effect_classification": 49.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 43.4778, + "eval_f1_for_task892_gap_coreference_resolution": 49.0, + "eval_f1_for_task893_gap_coreference_resolution": 33.1667, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_f1_for_task957_e2e_data_to_text": 41.1857, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 40.0833, + "eval_f1_for_title_generation": 23.5528, + "eval_f1_for_word_analogy": 12.5833, + "eval_gen_len": 11.0541, + "eval_global_step": 8500, + "eval_loss": 1.786635398864746, + "eval_rouge1": 41.1017, + "eval_rouge1_for_answerability_classification": 52.641, + "eval_rouge1_for_cause_effect_classification": 54.1283, + "eval_rouge1_for_coreference_resolution": 40.7474, + "eval_rouge1_for_data_to_text": 35.3407, + "eval_rouge1_for_dialogue_act_recognition": 29.0476, + "eval_rouge1_for_grammar_error_correction": 64.2941, + "eval_rouge1_for_keyword_tagging": 52.6143, + "eval_rouge1_for_overlap_extraction": 33.3724, + "eval_rouge1_for_question_rewriting": 68.4641, + "eval_rouge1_for_task020_mctaco_answerability_classification": 57.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 42.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 93.0236, + "eval_rouge1_for_task035_winogrande_question_rewriting": 83.9802, + "eval_rouge1_for_task036_qasc_keyword_tagging": 65.2322, + "eval_rouge1_for_task039_qasc_overlap_extraction": 31.7333, + "eval_rouge1_for_task050_multirc_answerability_classification": 46.0, + "eval_rouge1_for_task102_commongen_data_to_text": 40.9796, + "eval_rouge1_for_task1152_bard_word_analogy": 3.0, + "eval_rouge1_for_task1153_bard_word_analogy": 0.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 10.0, + "eval_rouge1_for_task1155_bard_word_analogy": 51.0, + "eval_rouge1_for_task1156_bard_word_analogy": 8.0, + "eval_rouge1_for_task1157_bard_word_analogy": 12.0, + "eval_rouge1_for_task1158_bard_word_analogy": 6.0, + "eval_rouge1_for_task1159_bard_word_analogy": 10.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 24.3096, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.2547, + "eval_rouge1_for_task121_zest_question_rewriting": 47.8603, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 8.6922, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 37.9694, + "eval_rouge1_for_task1356_xlsum_title_generation": 12.089, + "eval_rouge1_for_task1358_xlsum_title_generation": 32.7664, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 31.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 45.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 49.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 20.1735, + "eval_rouge1_for_task1409_dart_data_to_text": 33.0872, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 41.6313, + "eval_rouge1_for_task1439_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 23.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 24.4791, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.9569, + "eval_rouge1_for_task1562_zest_question_rewriting": 46.0403, + "eval_rouge1_for_task1586_scifact_title_generation": 25.967, + "eval_rouge1_for_task1598_nyc_data_to_text": 27.526, + "eval_rouge1_for_task1612_sick_textual_entailment": 32.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.1073, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 63.2557, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 58.0, + "eval_rouge1_for_task1659_billsum_title_generation": 28.3237, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 60.9048, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 49.2217, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 10.4389, + "eval_rouge1_for_task220_rocstories_title_generation": 62.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 42.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 44.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 48.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 62.7167, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 35.0114, + "eval_rouge1_for_task288_gigaword_title_generation": 26.1703, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 21.6, + "eval_rouge1_for_task329_gap_coreference_resolution": 32.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 40.1, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 54.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 0.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 35.9829, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 6.5, + "eval_rouge1_for_task402_grailqa_question_rewriting": 65.7703, + "eval_rouge1_for_task418_persent_title_generation": 17.3765, + "eval_rouge1_for_task442_com_qa_question_rewriting": 67.2501, + "eval_rouge1_for_task500_scruples_title_generation": 12.0121, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 29.3162, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 47.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 9.0263, + "eval_rouge1_for_task602_wikitext_title_generation": 8.0949, + "eval_rouge1_for_task613_liar_keyword_tagging": 35.8333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 28.2485, + "eval_rouge1_for_task619_ohsumed_title_generation": 29.5192, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 23.0188, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 56.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 24.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 54.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 82.9873, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 36.1421, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 82.3996, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 69.4494, + "eval_rouge1_for_task677_ollie_data_to_text": 15.4935, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 19.1303, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.2919, + "eval_rouge1_for_task769_qed_title_generation": 69.0075, + "eval_rouge1_for_task827_copa_cause_effect_classification": 49.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 43.6667, + "eval_rouge1_for_task892_gap_coreference_resolution": 49.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 33.1667, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_rouge1_for_task957_e2e_data_to_text": 41.3212, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 41.9444, + "eval_rouge1_for_title_generation": 25.0798, + "eval_rouge1_for_word_analogy": 12.5833, + "eval_rougeL": 40.0483, + "eval_rougeL_for_answerability_classification": 52.641, + "eval_rougeL_for_cause_effect_classification": 53.3607, + "eval_rougeL_for_coreference_resolution": 40.7474, + "eval_rougeL_for_data_to_text": 29.8376, + "eval_rougeL_for_dialogue_act_recognition": 29.0476, + "eval_rougeL_for_grammar_error_correction": 63.2403, + "eval_rougeL_for_keyword_tagging": 52.3209, + "eval_rougeL_for_overlap_extraction": 32.8573, + "eval_rougeL_for_question_rewriting": 65.4584, + "eval_rougeL_for_task020_mctaco_answerability_classification": 57.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 42.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 93.0236, + "eval_rougeL_for_task035_winogrande_question_rewriting": 83.6093, + "eval_rougeL_for_task036_qasc_keyword_tagging": 63.8256, + "eval_rougeL_for_task039_qasc_overlap_extraction": 31.7333, + "eval_rougeL_for_task050_multirc_answerability_classification": 46.0, + "eval_rougeL_for_task102_commongen_data_to_text": 34.0519, + "eval_rougeL_for_task1152_bard_word_analogy": 3.0, + "eval_rougeL_for_task1153_bard_word_analogy": 0.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 10.0, + "eval_rougeL_for_task1155_bard_word_analogy": 51.0, + "eval_rougeL_for_task1156_bard_word_analogy": 8.0, + "eval_rougeL_for_task1157_bard_word_analogy": 12.0, + "eval_rougeL_for_task1158_bard_word_analogy": 6.0, + "eval_rougeL_for_task1159_bard_word_analogy": 10.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 20.1325, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.2954, + "eval_rougeL_for_task121_zest_question_rewriting": 42.8652, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 8.5582, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 35.5195, + "eval_rougeL_for_task1356_xlsum_title_generation": 10.354, + "eval_rougeL_for_task1358_xlsum_title_generation": 27.1372, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 31.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 45.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 49.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 17.4838, + "eval_rougeL_for_task1409_dart_data_to_text": 28.7943, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 40.4262, + "eval_rougeL_for_task1439_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 23.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 22.9969, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.0544, + "eval_rougeL_for_task1562_zest_question_rewriting": 40.4793, + "eval_rougeL_for_task1586_scifact_title_generation": 20.8015, + "eval_rougeL_for_task1598_nyc_data_to_text": 23.395, + "eval_rougeL_for_task1612_sick_textual_entailment": 32.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.4624, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 52.2374, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 58.0, + "eval_rougeL_for_task1659_billsum_title_generation": 23.0565, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 60.9048, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 43.2984, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 10.4389, + "eval_rougeL_for_task220_rocstories_title_generation": 62.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 42.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 44.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 48.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 62.7167, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 33.9813, + "eval_rougeL_for_task288_gigaword_title_generation": 22.0903, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 21.6, + "eval_rougeL_for_task329_gap_coreference_resolution": 32.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 40.1, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 54.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 0.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 33.7548, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 6.5, + "eval_rougeL_for_task402_grailqa_question_rewriting": 53.5632, + "eval_rougeL_for_task418_persent_title_generation": 15.1459, + "eval_rougeL_for_task442_com_qa_question_rewriting": 64.0139, + "eval_rougeL_for_task500_scruples_title_generation": 10.9975, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 28.6809, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 47.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 8.8597, + "eval_rougeL_for_task602_wikitext_title_generation": 8.0949, + "eval_rougeL_for_task613_liar_keyword_tagging": 35.8333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 25.1034, + "eval_rougeL_for_task619_ohsumed_title_generation": 26.7392, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 22.9582, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 56.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 24.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 54.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 82.9873, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 36.1421, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 81.7192, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 68.4917, + "eval_rougeL_for_task677_ollie_data_to_text": 12.6567, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 16.6649, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.0701, + "eval_rougeL_for_task769_qed_title_generation": 69.0075, + "eval_rougeL_for_task827_copa_cause_effect_classification": 49.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 43.6667, + "eval_rougeL_for_task892_gap_coreference_resolution": 49.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 33.1667, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_rougeL_for_task957_e2e_data_to_text": 33.7432, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 41.9444, + "eval_rougeL_for_title_generation": 23.0079, + "eval_rougeL_for_word_analogy": 12.5833, + "eval_runtime": 364.2804, + "eval_samples_per_second": 32.695, + "eval_steps_per_second": 1.024, + "step": 8500 + }, + { + "epoch": 2.0, + "step": 8748, + "total_flos": 4.483082779203994e+16, + "train_loss": 1.8247261483428354, + "train_runtime": 14319.7307, + "train_samples_per_second": 9.772, + "train_steps_per_second": 0.611 + } + ], + "max_steps": 8748, + "num_train_epochs": 2, + "total_flos": 4.483082779203994e+16, + "trial_name": null, + "trial_params": null +}