diff --git "a/README.md" "b/README.md" --- "a/README.md" +++ "b/README.md" @@ -7,7 +7,3657 @@ tags: pipeline_tag: text-generation model-index: - name: Qwen2-7B-Instruct - results: [] + results: + - task: + type: niah_8192_90 + dataset: + name: niah_8192_90 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_8192_80 + dataset: + name: niah_8192_80 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_8192_70 + dataset: + name: niah_8192_70 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_8192_60 + dataset: + name: niah_8192_60 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_8192_50 + dataset: + name: niah_8192_50 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_8192_40 + dataset: + name: niah_8192_40 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_8192_30 + dataset: + name: niah_8192_30 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_8192_20 + dataset: + name: niah_8192_20 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_8192_100 + dataset: + name: niah_8192_100 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_8192_10 + dataset: + name: niah_8192_10 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_6000_90 + dataset: + name: niah_6000_90 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_6000_80 + dataset: + name: niah_6000_80 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_6000_70 + dataset: + name: niah_6000_70 + type: niah + metrics: + - type: acc + value: '0.0' + - task: + type: niah_6000_60 + dataset: + name: niah_6000_60 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_6000_50 + dataset: + name: niah_6000_50 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_6000_40 + dataset: + name: niah_6000_40 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_6000_30 + dataset: + name: niah_6000_30 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_6000_20 + dataset: + name: niah_6000_20 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_6000_100 + dataset: + name: niah_6000_100 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_6000_10 + dataset: + name: niah_6000_10 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_4096_90 + dataset: + name: niah_4096_90 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_4096_80 + dataset: + name: niah_4096_80 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_4096_70 + dataset: + name: niah_4096_70 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_4096_60 + dataset: + name: niah_4096_60 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_4096_50 + dataset: + name: niah_4096_50 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_4096_40 + dataset: + name: niah_4096_40 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_4096_30 + dataset: + name: niah_4096_30 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_4096_20 + dataset: + name: niah_4096_20 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_4096_100 + dataset: + name: niah_4096_100 + type: niah + metrics: + - type: acc + value: '0.0' + - task: + type: niah_4096_10 + dataset: + name: niah_4096_10 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_2048_90 + dataset: + name: niah_2048_90 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_2048_80 + dataset: + name: niah_2048_80 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_2048_70 + dataset: + name: niah_2048_70 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_2048_60 + dataset: + name: niah_2048_60 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_2048_50 + dataset: + name: niah_2048_50 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_2048_40 + dataset: + name: niah_2048_40 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_2048_30 + dataset: + name: niah_2048_30 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_2048_20 + dataset: + name: niah_2048_20 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_2048_100 + dataset: + name: niah_2048_100 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_2048_10 + dataset: + name: niah_2048_10 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_1024_90 + dataset: + name: niah_1024_90 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_1024_80 + dataset: + name: niah_1024_80 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_1024_70 + dataset: + name: niah_1024_70 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_1024_60 + dataset: + name: niah_1024_60 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_1024_50 + dataset: + name: niah_1024_50 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_1024_40 + dataset: + name: niah_1024_40 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_1024_30 + dataset: + name: niah_1024_30 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_1024_20 + dataset: + name: niah_1024_20 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_1024_100 + dataset: + name: niah_1024_100 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: niah_1024_10 + dataset: + name: niah_1024_10 + type: niah + metrics: + - type: acc + value: '1.0' + - task: + type: mmlu + dataset: + name: mmlu + type: public-dataset + metrics: + - type: acc + value: '0.709' + args: + results: + mmlu: + acc,none: 0.6991881498362057 + acc_stderr,none: 0.003669336524005856 + alias: mmlu + mmlu_humanities: + alias: ' - humanities' + acc,none: 0.6350690754516471 + acc_stderr,none: 0.006600169354896744 + mmlu_formal_logic: + alias: ' - formal_logic' + acc,none: 0.5079365079365079 + acc_stderr,none: 0.044715725362943486 + mmlu_high_school_european_history: + alias: ' - high_school_european_history' + acc,none: 0.806060606060606 + acc_stderr,none: 0.030874145136562097 + mmlu_high_school_us_history: + alias: ' - high_school_us_history' + acc,none: 0.8725490196078431 + acc_stderr,none: 0.02340553048084631 + mmlu_high_school_world_history: + alias: ' - high_school_world_history' + acc,none: 0.8523206751054853 + acc_stderr,none: 0.023094329582595684 + mmlu_international_law: + alias: ' - international_law' + acc,none: 0.8264462809917356 + acc_stderr,none: 0.0345727283691767 + mmlu_jurisprudence: + alias: ' - jurisprudence' + acc,none: 0.8703703703703703 + acc_stderr,none: 0.03247224389917948 + mmlu_logical_fallacies: + alias: ' - logical_fallacies' + acc,none: 0.803680981595092 + acc_stderr,none: 0.031207970394709218 + mmlu_moral_disputes: + alias: ' - moral_disputes' + acc,none: 0.7687861271676301 + acc_stderr,none: 0.022698657167855716 + mmlu_moral_scenarios: + alias: ' - moral_scenarios' + acc,none: 0.4346368715083799 + acc_stderr,none: 0.016578997435496713 + mmlu_philosophy: + alias: ' - philosophy' + acc,none: 0.7813504823151125 + acc_stderr,none: 0.023475581417861102 + mmlu_prehistory: + alias: ' - prehistory' + acc,none: 0.7839506172839507 + acc_stderr,none: 0.022899162918445806 + mmlu_professional_law: + alias: ' - professional_law' + acc,none: 0.516297262059974 + acc_stderr,none: 0.012763450734699804 + mmlu_world_religions: + alias: ' - world_religions' + acc,none: 0.8304093567251462 + acc_stderr,none: 0.02878210810540171 + mmlu_other: + alias: ' - other' + acc,none: 0.7563566140971999 + acc_stderr,none: 0.007446207961067767 + mmlu_business_ethics: + alias: ' - business_ethics' + acc,none: 0.77 + acc_stderr,none: 0.04229525846816506 + mmlu_clinical_knowledge: + alias: ' - clinical_knowledge' + acc,none: 0.7811320754716982 + acc_stderr,none: 0.025447863825108614 + mmlu_college_medicine: + alias: ' - college_medicine' + acc,none: 0.6878612716763006 + acc_stderr,none: 0.03533133389323657 + mmlu_global_facts: + alias: ' - global_facts' + acc,none: 0.47 + acc_stderr,none: 0.05016135580465919 + mmlu_human_aging: + alias: ' - human_aging' + acc,none: 0.7443946188340808 + acc_stderr,none: 0.029275891003969927 + mmlu_management: + alias: ' - management' + acc,none: 0.7961165048543689 + acc_stderr,none: 0.0398913985953177 + mmlu_marketing: + alias: ' - marketing' + acc,none: 0.9017094017094017 + acc_stderr,none: 0.019503444900757567 + mmlu_medical_genetics: + alias: ' - medical_genetics' + acc,none: 0.82 + acc_stderr,none: 0.03861229196653694 + mmlu_miscellaneous: + alias: ' - miscellaneous' + acc,none: 0.8544061302681992 + acc_stderr,none: 0.012612475800423451 + mmlu_nutrition: + alias: ' - nutrition' + acc,none: 0.7810457516339869 + acc_stderr,none: 0.02367908986180772 + mmlu_professional_accounting: + alias: ' - professional_accounting' + acc,none: 0.5886524822695035 + acc_stderr,none: 0.02935491115994098 + mmlu_professional_medicine: + alias: ' - professional_medicine' + acc,none: 0.7279411764705882 + acc_stderr,none: 0.02703304115168146 + mmlu_virology: + alias: ' - virology' + acc,none: 0.5240963855421686 + acc_stderr,none: 0.03887971849597264 + mmlu_social_sciences: + alias: ' - social_sciences' + acc,none: 0.8020799480013 + acc_stderr,none: 0.007073049587404706 + mmlu_econometrics: + alias: ' - econometrics' + acc,none: 0.5964912280701754 + acc_stderr,none: 0.04615186962583707 + mmlu_high_school_geography: + alias: ' - high_school_geography' + acc,none: 0.8838383838383839 + acc_stderr,none: 0.022828881775249377 + mmlu_high_school_government_and_politics: + alias: ' - high_school_government_and_politics' + acc,none: 0.927461139896373 + acc_stderr,none: 0.01871899852067819 + mmlu_high_school_macroeconomics: + alias: ' - high_school_macroeconomics' + acc,none: 0.764102564102564 + acc_stderr,none: 0.021525965407408726 + mmlu_high_school_microeconomics: + alias: ' - high_school_microeconomics' + acc,none: 0.8277310924369747 + acc_stderr,none: 0.024528664971305424 + mmlu_high_school_psychology: + alias: ' - high_school_psychology' + acc,none: 0.8623853211009175 + acc_stderr,none: 0.01477010587864942 + mmlu_human_sexuality: + alias: ' - human_sexuality' + acc,none: 0.7709923664122137 + acc_stderr,none: 0.036853466317118506 + mmlu_professional_psychology: + alias: ' - professional_psychology' + acc,none: 0.7467320261437909 + acc_stderr,none: 0.017593486895366835 + mmlu_public_relations: + alias: ' - public_relations' + acc,none: 0.7363636363636363 + acc_stderr,none: 0.04220224692971987 + mmlu_security_studies: + alias: ' - security_studies' + acc,none: 0.7387755102040816 + acc_stderr,none: 0.02812342933514278 + mmlu_sociology: + alias: ' - sociology' + acc,none: 0.8756218905472637 + acc_stderr,none: 0.023335401790166327 + mmlu_us_foreign_policy: + alias: ' - us_foreign_policy' + acc,none: 0.85 + acc_stderr,none: 0.03588702812826371 + mmlu_stem: + alias: ' - stem' + acc,none: 0.6381224230891215 + acc_stderr,none: 0.008279915099259731 + mmlu_abstract_algebra: + alias: ' - abstract_algebra' + acc,none: 0.52 + acc_stderr,none: 0.050211673156867795 + mmlu_anatomy: + alias: ' - anatomy' + acc,none: 0.6 + acc_stderr,none: 0.04232073695151589 + mmlu_astronomy: + alias: ' - astronomy' + acc,none: 0.7763157894736842 + acc_stderr,none: 0.033911609343436025 + mmlu_college_biology: + alias: ' - college_biology' + acc,none: 0.7916666666666666 + acc_stderr,none: 0.033961162058453336 + mmlu_college_chemistry: + alias: ' - college_chemistry' + acc,none: 0.5 + acc_stderr,none: 0.050251890762960605 + mmlu_college_computer_science: + alias: ' - college_computer_science' + acc,none: 0.62 + acc_stderr,none: 0.04878317312145633 + mmlu_college_mathematics: + alias: ' - college_mathematics' + acc,none: 0.39 + acc_stderr,none: 0.04902071300001974 + mmlu_college_physics: + alias: ' - college_physics' + acc,none: 0.4019607843137255 + acc_stderr,none: 0.048786087144669955 + mmlu_computer_security: + alias: ' - computer_security' + acc,none: 0.72 + acc_stderr,none: 0.04512608598542129 + mmlu_conceptual_physics: + alias: ' - conceptual_physics' + acc,none: 0.7063829787234043 + acc_stderr,none: 0.029771642712491227 + mmlu_electrical_engineering: + alias: ' - electrical_engineering' + acc,none: 0.7034482758620689 + acc_stderr,none: 0.03806142687309992 + mmlu_elementary_mathematics: + alias: ' - elementary_mathematics' + acc,none: 0.6481481481481481 + acc_stderr,none: 0.024594975128920938 + mmlu_high_school_biology: + alias: ' - high_school_biology' + acc,none: 0.8387096774193549 + acc_stderr,none: 0.020923327006423298 + mmlu_high_school_chemistry: + alias: ' - high_school_chemistry' + acc,none: 0.6157635467980296 + acc_stderr,none: 0.03422398565657551 + mmlu_high_school_computer_science: + alias: ' - high_school_computer_science' + acc,none: 0.79 + acc_stderr,none: 0.040936018074033256 + mmlu_high_school_mathematics: + alias: ' - high_school_mathematics' + acc,none: 0.4962962962962963 + acc_stderr,none: 0.03048470166508437 + mmlu_high_school_physics: + alias: ' - high_school_physics' + acc,none: 0.4966887417218543 + acc_stderr,none: 0.04082393379449654 + mmlu_high_school_statistics: + alias: ' - high_school_statistics' + acc,none: 0.6666666666666666 + acc_stderr,none: 0.03214952147802748 + mmlu_machine_learning: + alias: ' - machine_learning' + acc,none: 0.4732142857142857 + acc_stderr,none: 0.047389751192741546 + groups: + mmlu: + acc,none: 0.6991881498362057 + acc_stderr,none: 0.003669336524005856 + alias: mmlu + mmlu_humanities: + alias: ' - humanities' + acc,none: 0.6350690754516471 + acc_stderr,none: 0.006600169354896744 + mmlu_other: + alias: ' - other' + acc,none: 0.7563566140971999 + acc_stderr,none: 0.007446207961067767 + mmlu_social_sciences: + alias: ' - social_sciences' + acc,none: 0.8020799480013 + acc_stderr,none: 0.007073049587404706 + mmlu_stem: + alias: ' - stem' + acc,none: 0.6381224230891215 + acc_stderr,none: 0.008279915099259731 + group_subtasks: + mmlu_stem: + - mmlu_machine_learning + - mmlu_high_school_statistics + - mmlu_high_school_physics + - mmlu_high_school_mathematics + - mmlu_high_school_computer_science + - mmlu_high_school_chemistry + - mmlu_high_school_biology + - mmlu_elementary_mathematics + - mmlu_electrical_engineering + - mmlu_conceptual_physics + - mmlu_computer_security + - mmlu_college_physics + - mmlu_college_mathematics + - mmlu_college_computer_science + - mmlu_college_chemistry + - mmlu_college_biology + - mmlu_astronomy + - mmlu_anatomy + - mmlu_abstract_algebra + mmlu_other: + - mmlu_virology + - mmlu_professional_medicine + - mmlu_professional_accounting + - mmlu_nutrition + - mmlu_miscellaneous + - mmlu_medical_genetics + - mmlu_marketing + - mmlu_management + - mmlu_human_aging + - mmlu_global_facts + - mmlu_college_medicine + - mmlu_clinical_knowledge + - mmlu_business_ethics + mmlu_social_sciences: + - mmlu_us_foreign_policy + - mmlu_sociology + - mmlu_security_studies + - mmlu_public_relations + - mmlu_professional_psychology + - mmlu_human_sexuality + - mmlu_high_school_psychology + - mmlu_high_school_microeconomics + - mmlu_high_school_macroeconomics + - mmlu_high_school_government_and_politics + - mmlu_high_school_geography + - mmlu_econometrics + mmlu_humanities: + - mmlu_world_religions + - mmlu_professional_law + - mmlu_prehistory + - mmlu_philosophy + - mmlu_moral_scenarios + - mmlu_moral_disputes + - mmlu_logical_fallacies + - mmlu_jurisprudence + - mmlu_international_law + - mmlu_high_school_world_history + - mmlu_high_school_us_history + - mmlu_high_school_european_history + - mmlu_formal_logic + mmlu: + - mmlu_humanities + - mmlu_social_sciences + - mmlu_other + - mmlu_stem + configs: + mmlu_abstract_algebra: + task: mmlu_abstract_algebra + task_alias: abstract_algebra + group: mmlu_stem + group_alias: stem + dataset_path: hails/mmlu_no_train + dataset_name: abstract_algebra + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about abstract algebra. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_anatomy: + task: mmlu_anatomy + task_alias: anatomy + group: mmlu_stem + group_alias: stem + dataset_path: hails/mmlu_no_train + dataset_name: anatomy + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about anatomy. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_astronomy: + task: mmlu_astronomy + task_alias: astronomy + group: mmlu_stem + group_alias: stem + dataset_path: hails/mmlu_no_train + dataset_name: astronomy + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about astronomy. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_business_ethics: + task: mmlu_business_ethics + task_alias: business_ethics + group: mmlu_other + group_alias: other + dataset_path: hails/mmlu_no_train + dataset_name: business_ethics + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about business ethics. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_clinical_knowledge: + task: mmlu_clinical_knowledge + task_alias: clinical_knowledge + group: mmlu_other + group_alias: other + dataset_path: hails/mmlu_no_train + dataset_name: clinical_knowledge + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about clinical knowledge. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_college_biology: + task: mmlu_college_biology + task_alias: college_biology + group: mmlu_stem + group_alias: stem + dataset_path: hails/mmlu_no_train + dataset_name: college_biology + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about college biology. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_college_chemistry: + task: mmlu_college_chemistry + task_alias: college_chemistry + group: mmlu_stem + group_alias: stem + dataset_path: hails/mmlu_no_train + dataset_name: college_chemistry + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about college chemistry. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_college_computer_science: + task: mmlu_college_computer_science + task_alias: college_computer_science + group: mmlu_stem + group_alias: stem + dataset_path: hails/mmlu_no_train + dataset_name: college_computer_science + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about college computer science. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_college_mathematics: + task: mmlu_college_mathematics + task_alias: college_mathematics + group: mmlu_stem + group_alias: stem + dataset_path: hails/mmlu_no_train + dataset_name: college_mathematics + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about college mathematics. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_college_medicine: + task: mmlu_college_medicine + task_alias: college_medicine + group: mmlu_other + group_alias: other + dataset_path: hails/mmlu_no_train + dataset_name: college_medicine + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about college medicine. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_college_physics: + task: mmlu_college_physics + task_alias: college_physics + group: mmlu_stem + group_alias: stem + dataset_path: hails/mmlu_no_train + dataset_name: college_physics + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about college physics. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_computer_security: + task: mmlu_computer_security + task_alias: computer_security + group: mmlu_stem + group_alias: stem + dataset_path: hails/mmlu_no_train + dataset_name: computer_security + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about computer security. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_conceptual_physics: + task: mmlu_conceptual_physics + task_alias: conceptual_physics + group: mmlu_stem + group_alias: stem + dataset_path: hails/mmlu_no_train + dataset_name: conceptual_physics + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about conceptual physics. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_econometrics: + task: mmlu_econometrics + task_alias: econometrics + group: mmlu_social_sciences + group_alias: social_sciences + dataset_path: hails/mmlu_no_train + dataset_name: econometrics + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about econometrics. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_electrical_engineering: + task: mmlu_electrical_engineering + task_alias: electrical_engineering + group: mmlu_stem + group_alias: stem + dataset_path: hails/mmlu_no_train + dataset_name: electrical_engineering + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about electrical engineering. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_elementary_mathematics: + task: mmlu_elementary_mathematics + task_alias: elementary_mathematics + group: mmlu_stem + group_alias: stem + dataset_path: hails/mmlu_no_train + dataset_name: elementary_mathematics + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about elementary mathematics. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_formal_logic: + task: mmlu_formal_logic + task_alias: formal_logic + group: mmlu_humanities + group_alias: humanities + dataset_path: hails/mmlu_no_train + dataset_name: formal_logic + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about formal logic. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_global_facts: + task: mmlu_global_facts + task_alias: global_facts + group: mmlu_other + group_alias: other + dataset_path: hails/mmlu_no_train + dataset_name: global_facts + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about global facts. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_high_school_biology: + task: mmlu_high_school_biology + task_alias: high_school_biology + group: mmlu_stem + group_alias: stem + dataset_path: hails/mmlu_no_train + dataset_name: high_school_biology + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about high school biology. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_high_school_chemistry: + task: mmlu_high_school_chemistry + task_alias: high_school_chemistry + group: mmlu_stem + group_alias: stem + dataset_path: hails/mmlu_no_train + dataset_name: high_school_chemistry + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about high school chemistry. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_high_school_computer_science: + task: mmlu_high_school_computer_science + task_alias: high_school_computer_science + group: mmlu_stem + group_alias: stem + dataset_path: hails/mmlu_no_train + dataset_name: high_school_computer_science + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about high school computer science. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_high_school_european_history: + task: mmlu_high_school_european_history + task_alias: high_school_european_history + group: mmlu_humanities + group_alias: humanities + dataset_path: hails/mmlu_no_train + dataset_name: high_school_european_history + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about high school european history. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_high_school_geography: + task: mmlu_high_school_geography + task_alias: high_school_geography + group: mmlu_social_sciences + group_alias: social_sciences + dataset_path: hails/mmlu_no_train + dataset_name: high_school_geography + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about high school geography. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_high_school_government_and_politics: + task: mmlu_high_school_government_and_politics + task_alias: high_school_government_and_politics + group: mmlu_social_sciences + group_alias: social_sciences + dataset_path: hails/mmlu_no_train + dataset_name: high_school_government_and_politics + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about high school government and politics. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_high_school_macroeconomics: + task: mmlu_high_school_macroeconomics + task_alias: high_school_macroeconomics + group: mmlu_social_sciences + group_alias: social_sciences + dataset_path: hails/mmlu_no_train + dataset_name: high_school_macroeconomics + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about high school macroeconomics. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_high_school_mathematics: + task: mmlu_high_school_mathematics + task_alias: high_school_mathematics + group: mmlu_stem + group_alias: stem + dataset_path: hails/mmlu_no_train + dataset_name: high_school_mathematics + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about high school mathematics. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_high_school_microeconomics: + task: mmlu_high_school_microeconomics + task_alias: high_school_microeconomics + group: mmlu_social_sciences + group_alias: social_sciences + dataset_path: hails/mmlu_no_train + dataset_name: high_school_microeconomics + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about high school microeconomics. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_high_school_physics: + task: mmlu_high_school_physics + task_alias: high_school_physics + group: mmlu_stem + group_alias: stem + dataset_path: hails/mmlu_no_train + dataset_name: high_school_physics + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about high school physics. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_high_school_psychology: + task: mmlu_high_school_psychology + task_alias: high_school_psychology + group: mmlu_social_sciences + group_alias: social_sciences + dataset_path: hails/mmlu_no_train + dataset_name: high_school_psychology + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about high school psychology. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_high_school_statistics: + task: mmlu_high_school_statistics + task_alias: high_school_statistics + group: mmlu_stem + group_alias: stem + dataset_path: hails/mmlu_no_train + dataset_name: high_school_statistics + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about high school statistics. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_high_school_us_history: + task: mmlu_high_school_us_history + task_alias: high_school_us_history + group: mmlu_humanities + group_alias: humanities + dataset_path: hails/mmlu_no_train + dataset_name: high_school_us_history + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about high school us history. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_high_school_world_history: + task: mmlu_high_school_world_history + task_alias: high_school_world_history + group: mmlu_humanities + group_alias: humanities + dataset_path: hails/mmlu_no_train + dataset_name: high_school_world_history + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about high school world history. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_human_aging: + task: mmlu_human_aging + task_alias: human_aging + group: mmlu_other + group_alias: other + dataset_path: hails/mmlu_no_train + dataset_name: human_aging + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about human aging. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_human_sexuality: + task: mmlu_human_sexuality + task_alias: human_sexuality + group: mmlu_social_sciences + group_alias: social_sciences + dataset_path: hails/mmlu_no_train + dataset_name: human_sexuality + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about human sexuality. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_international_law: + task: mmlu_international_law + task_alias: international_law + group: mmlu_humanities + group_alias: humanities + dataset_path: hails/mmlu_no_train + dataset_name: international_law + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about international law. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_jurisprudence: + task: mmlu_jurisprudence + task_alias: jurisprudence + group: mmlu_humanities + group_alias: humanities + dataset_path: hails/mmlu_no_train + dataset_name: jurisprudence + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about jurisprudence. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_logical_fallacies: + task: mmlu_logical_fallacies + task_alias: logical_fallacies + group: mmlu_humanities + group_alias: humanities + dataset_path: hails/mmlu_no_train + dataset_name: logical_fallacies + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about logical fallacies. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_machine_learning: + task: mmlu_machine_learning + task_alias: machine_learning + group: mmlu_stem + group_alias: stem + dataset_path: hails/mmlu_no_train + dataset_name: machine_learning + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about machine learning. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_management: + task: mmlu_management + task_alias: management + group: mmlu_other + group_alias: other + dataset_path: hails/mmlu_no_train + dataset_name: management + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about management. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_marketing: + task: mmlu_marketing + task_alias: marketing + group: mmlu_other + group_alias: other + dataset_path: hails/mmlu_no_train + dataset_name: marketing + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about marketing. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_medical_genetics: + task: mmlu_medical_genetics + task_alias: medical_genetics + group: mmlu_other + group_alias: other + dataset_path: hails/mmlu_no_train + dataset_name: medical_genetics + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about medical genetics. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_miscellaneous: + task: mmlu_miscellaneous + task_alias: miscellaneous + group: mmlu_other + group_alias: other + dataset_path: hails/mmlu_no_train + dataset_name: miscellaneous + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about miscellaneous. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_moral_disputes: + task: mmlu_moral_disputes + task_alias: moral_disputes + group: mmlu_humanities + group_alias: humanities + dataset_path: hails/mmlu_no_train + dataset_name: moral_disputes + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about moral disputes. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_moral_scenarios: + task: mmlu_moral_scenarios + task_alias: moral_scenarios + group: mmlu_humanities + group_alias: humanities + dataset_path: hails/mmlu_no_train + dataset_name: moral_scenarios + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about moral scenarios. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_nutrition: + task: mmlu_nutrition + task_alias: nutrition + group: mmlu_other + group_alias: other + dataset_path: hails/mmlu_no_train + dataset_name: nutrition + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about nutrition. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_philosophy: + task: mmlu_philosophy + task_alias: philosophy + group: mmlu_humanities + group_alias: humanities + dataset_path: hails/mmlu_no_train + dataset_name: philosophy + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about philosophy. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_prehistory: + task: mmlu_prehistory + task_alias: prehistory + group: mmlu_humanities + group_alias: humanities + dataset_path: hails/mmlu_no_train + dataset_name: prehistory + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about prehistory. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_professional_accounting: + task: mmlu_professional_accounting + task_alias: professional_accounting + group: mmlu_other + group_alias: other + dataset_path: hails/mmlu_no_train + dataset_name: professional_accounting + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about professional accounting. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_professional_law: + task: mmlu_professional_law + task_alias: professional_law + group: mmlu_humanities + group_alias: humanities + dataset_path: hails/mmlu_no_train + dataset_name: professional_law + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about professional law. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_professional_medicine: + task: mmlu_professional_medicine + task_alias: professional_medicine + group: mmlu_other + group_alias: other + dataset_path: hails/mmlu_no_train + dataset_name: professional_medicine + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about professional medicine. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_professional_psychology: + task: mmlu_professional_psychology + task_alias: professional_psychology + group: mmlu_social_sciences + group_alias: social_sciences + dataset_path: hails/mmlu_no_train + dataset_name: professional_psychology + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about professional psychology. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_public_relations: + task: mmlu_public_relations + task_alias: public_relations + group: mmlu_social_sciences + group_alias: social_sciences + dataset_path: hails/mmlu_no_train + dataset_name: public_relations + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about public relations. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_security_studies: + task: mmlu_security_studies + task_alias: security_studies + group: mmlu_social_sciences + group_alias: social_sciences + dataset_path: hails/mmlu_no_train + dataset_name: security_studies + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about security studies. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_sociology: + task: mmlu_sociology + task_alias: sociology + group: mmlu_social_sciences + group_alias: social_sciences + dataset_path: hails/mmlu_no_train + dataset_name: sociology + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about sociology. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_us_foreign_policy: + task: mmlu_us_foreign_policy + task_alias: us_foreign_policy + group: mmlu_social_sciences + group_alias: social_sciences + dataset_path: hails/mmlu_no_train + dataset_name: us_foreign_policy + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about us foreign policy. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_virology: + task: mmlu_virology + task_alias: virology + group: mmlu_other + group_alias: other + dataset_path: hails/mmlu_no_train + dataset_name: virology + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about virology. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + mmlu_world_religions: + task: mmlu_world_religions + task_alias: world_religions + group: mmlu_humanities + group_alias: humanities + dataset_path: hails/mmlu_no_train + dataset_name: world_religions + test_split: test + fewshot_split: dev + doc_to_text: '{{question.strip()}} + + A. {{choices[0]}} + + B. {{choices[1]}} + + C. {{choices[2]}} + + D. {{choices[3]}} + + Answer:' + doc_to_target: answer + doc_to_choice: + - A + - B + - C + - D + description: 'The following are multiple choice questions (with answers) + about world religions. + + + ' + target_delimiter: ' ' + fewshot_delimiter: ' + + + ' + fewshot_config: + sampler: first_n + metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + output_type: multiple_choice + repeats: 1 + should_decontaminate: false + metadata: + version: 0.0 + versions: + mmlu_abstract_algebra: 0.0 + mmlu_anatomy: 0.0 + mmlu_astronomy: 0.0 + mmlu_business_ethics: 0.0 + mmlu_clinical_knowledge: 0.0 + mmlu_college_biology: 0.0 + mmlu_college_chemistry: 0.0 + mmlu_college_computer_science: 0.0 + mmlu_college_mathematics: 0.0 + mmlu_college_medicine: 0.0 + mmlu_college_physics: 0.0 + mmlu_computer_security: 0.0 + mmlu_conceptual_physics: 0.0 + mmlu_econometrics: 0.0 + mmlu_electrical_engineering: 0.0 + mmlu_elementary_mathematics: 0.0 + mmlu_formal_logic: 0.0 + mmlu_global_facts: 0.0 + mmlu_high_school_biology: 0.0 + mmlu_high_school_chemistry: 0.0 + mmlu_high_school_computer_science: 0.0 + mmlu_high_school_european_history: 0.0 + mmlu_high_school_geography: 0.0 + mmlu_high_school_government_and_politics: 0.0 + mmlu_high_school_macroeconomics: 0.0 + mmlu_high_school_mathematics: 0.0 + mmlu_high_school_microeconomics: 0.0 + mmlu_high_school_physics: 0.0 + mmlu_high_school_psychology: 0.0 + mmlu_high_school_statistics: 0.0 + mmlu_high_school_us_history: 0.0 + mmlu_high_school_world_history: 0.0 + mmlu_human_aging: 0.0 + mmlu_human_sexuality: 0.0 + mmlu_international_law: 0.0 + mmlu_jurisprudence: 0.0 + mmlu_logical_fallacies: 0.0 + mmlu_machine_learning: 0.0 + mmlu_management: 0.0 + mmlu_marketing: 0.0 + mmlu_medical_genetics: 0.0 + mmlu_miscellaneous: 0.0 + mmlu_moral_disputes: 0.0 + mmlu_moral_scenarios: 0.0 + mmlu_nutrition: 0.0 + mmlu_philosophy: 0.0 + mmlu_prehistory: 0.0 + mmlu_professional_accounting: 0.0 + mmlu_professional_law: 0.0 + mmlu_professional_medicine: 0.0 + mmlu_professional_psychology: 0.0 + mmlu_public_relations: 0.0 + mmlu_security_studies: 0.0 + mmlu_sociology: 0.0 + mmlu_us_foreign_policy: 0.0 + mmlu_virology: 0.0 + mmlu_world_religions: 0.0 + n-shot: + mmlu: 0 + config: + model: vllm + model_args: pretrained=Qwen/Qwen2-7B-Instruct,tensor_parallel_size=1,dtype=auto,gpu_memory_utilization=0.8,max_model_len=2048,trust_remote_code=True + batch_size: auto + batch_sizes: [] + bootstrap_iters: 100000 + git_hash: d6bc7cc + pretty_env_info: 'PyTorch version: 2.1.2+cu121 + + Is debug build: False + + CUDA used to build PyTorch: 12.1 + + ROCM used to build PyTorch: N/A + + + OS: Ubuntu 22.04.3 LTS (x86_64) + + GCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 + + Clang version: Could not collect + + CMake version: version 3.25.0 + + Libc version: glibc-2.35 + + + Python version: 3.10.12 (main, Jun 11 2023, 05:26:28) [GCC 11.4.0] (64-bit + runtime) + + Python platform: Linux-6.5.0-35-generic-x86_64-with-glibc2.35 + + Is CUDA available: True + + CUDA runtime version: 11.8.89 + + CUDA_MODULE_LOADING set to: LAZY + + GPU models and configuration: GPU 0: NVIDIA GeForce RTX 4090 + + Nvidia driver version: 550.54.15 + + cuDNN version: Could not collect + + HIP runtime version: N/A + + MIOpen runtime version: N/A + + Is XNNPACK available: True + + + CPU: + + Architecture: x86_64 + + CPU op-mode(s): 32-bit, 64-bit + + Address sizes: 43 bits physical, 48 bits virtual + + Byte Order: Little Endian + + CPU(s): 64 + + On-line CPU(s) list: 0-63 + + Vendor ID: AuthenticAMD + + Model name: AMD EPYC 7282 16-Core Processor + + CPU family: 23 + + Model: 49 + + Thread(s) per core: 2 + + Core(s) per socket: 16 + + Socket(s): 2 + + Stepping: 0 + + Frequency boost: enabled + + CPU max MHz: 2800.0000 + + CPU min MHz: 1500.0000 + + BogoMIPS: 5589.73 + + Flags: fpu vme de pse tsc msr pae mce cx8 apic + sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx + mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc + cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 + sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic + cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext + perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate + ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a + rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 cqm_llc cqm_occup_llc + cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd amd_ppin + arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists + pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip rdpid + overflow_recov succor smca sev sev_es + + Virtualization: AMD-V + + L1d cache: 1 MiB (32 instances) + + L1i cache: 1 MiB (32 instances) + + L2 cache: 16 MiB (32 instances) + + L3 cache: 128 MiB (8 instances) + + NUMA node(s): 2 + + NUMA node0 CPU(s): 0-15,32-47 + + NUMA node1 CPU(s): 16-31,48-63 + + Vulnerability Gather data sampling: Not affected + + Vulnerability Itlb multihit: Not affected + + Vulnerability L1tf: Not affected + + Vulnerability Mds: Not affected + + Vulnerability Meltdown: Not affected + + Vulnerability Mmio stale data: Not affected + + Vulnerability Retbleed: Mitigation; untrained return thunk; + SMT enabled with STIBP protection + + Vulnerability Spec rstack overflow: Mitigation; Safe RET + + Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass + disabled via prctl + + Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers + and __user pointer sanitization + + Vulnerability Spectre v2: Mitigation; Retpolines; IBPB conditional; + STIBP always-on; RSB filling; PBRSB-eIBRS Not affected; BHI Not affected + + Vulnerability Srbds: Not affected + + Vulnerability Tsx async abort: Not affected + + + Versions of relevant libraries: + + [pip3] numpy==1.24.1 + + [pip3] torch==2.1.2 + + [pip3] torchaudio==2.0.2+cu118 + + [pip3] torchvision==0.15.2+cu118 + + [pip3] triton==2.1.0 + + [conda] Could not collect' + transformers_version: 4.40.2 --- # Qwen2-7B-Instruct