diff --git "a/README.md" "b/README.md"
--- "a/README.md"
+++ "b/README.md"
@@ -7,7 +7,3657 @@ tags:
 pipeline_tag: text-generation
 model-index:
 - name: Qwen2-7B-Instruct
-  results: []
+  results:
+  - task:
+      type: niah_8192_90
+    dataset:
+      name: niah_8192_90
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_8192_80
+    dataset:
+      name: niah_8192_80
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_8192_70
+    dataset:
+      name: niah_8192_70
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_8192_60
+    dataset:
+      name: niah_8192_60
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_8192_50
+    dataset:
+      name: niah_8192_50
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_8192_40
+    dataset:
+      name: niah_8192_40
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_8192_30
+    dataset:
+      name: niah_8192_30
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_8192_20
+    dataset:
+      name: niah_8192_20
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_8192_100
+    dataset:
+      name: niah_8192_100
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_8192_10
+    dataset:
+      name: niah_8192_10
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_6000_90
+    dataset:
+      name: niah_6000_90
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_6000_80
+    dataset:
+      name: niah_6000_80
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_6000_70
+    dataset:
+      name: niah_6000_70
+      type: niah
+    metrics:
+    - type: acc
+      value: '0.0'
+  - task:
+      type: niah_6000_60
+    dataset:
+      name: niah_6000_60
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_6000_50
+    dataset:
+      name: niah_6000_50
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_6000_40
+    dataset:
+      name: niah_6000_40
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_6000_30
+    dataset:
+      name: niah_6000_30
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_6000_20
+    dataset:
+      name: niah_6000_20
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_6000_100
+    dataset:
+      name: niah_6000_100
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_6000_10
+    dataset:
+      name: niah_6000_10
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_4096_90
+    dataset:
+      name: niah_4096_90
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_4096_80
+    dataset:
+      name: niah_4096_80
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_4096_70
+    dataset:
+      name: niah_4096_70
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_4096_60
+    dataset:
+      name: niah_4096_60
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_4096_50
+    dataset:
+      name: niah_4096_50
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_4096_40
+    dataset:
+      name: niah_4096_40
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_4096_30
+    dataset:
+      name: niah_4096_30
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_4096_20
+    dataset:
+      name: niah_4096_20
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_4096_100
+    dataset:
+      name: niah_4096_100
+      type: niah
+    metrics:
+    - type: acc
+      value: '0.0'
+  - task:
+      type: niah_4096_10
+    dataset:
+      name: niah_4096_10
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_2048_90
+    dataset:
+      name: niah_2048_90
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_2048_80
+    dataset:
+      name: niah_2048_80
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_2048_70
+    dataset:
+      name: niah_2048_70
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_2048_60
+    dataset:
+      name: niah_2048_60
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_2048_50
+    dataset:
+      name: niah_2048_50
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_2048_40
+    dataset:
+      name: niah_2048_40
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_2048_30
+    dataset:
+      name: niah_2048_30
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_2048_20
+    dataset:
+      name: niah_2048_20
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_2048_100
+    dataset:
+      name: niah_2048_100
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_2048_10
+    dataset:
+      name: niah_2048_10
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_1024_90
+    dataset:
+      name: niah_1024_90
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_1024_80
+    dataset:
+      name: niah_1024_80
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_1024_70
+    dataset:
+      name: niah_1024_70
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_1024_60
+    dataset:
+      name: niah_1024_60
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_1024_50
+    dataset:
+      name: niah_1024_50
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_1024_40
+    dataset:
+      name: niah_1024_40
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_1024_30
+    dataset:
+      name: niah_1024_30
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_1024_20
+    dataset:
+      name: niah_1024_20
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_1024_100
+    dataset:
+      name: niah_1024_100
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: niah_1024_10
+    dataset:
+      name: niah_1024_10
+      type: niah
+    metrics:
+    - type: acc
+      value: '1.0'
+  - task:
+      type: mmlu
+    dataset:
+      name: mmlu
+      type: public-dataset
+    metrics:
+    - type: acc
+      value: '0.709'
+      args:
+        results:
+          mmlu:
+            acc,none: 0.6991881498362057
+            acc_stderr,none: 0.003669336524005856
+            alias: mmlu
+          mmlu_humanities:
+            alias: ' - humanities'
+            acc,none: 0.6350690754516471
+            acc_stderr,none: 0.006600169354896744
+          mmlu_formal_logic:
+            alias: '  - formal_logic'
+            acc,none: 0.5079365079365079
+            acc_stderr,none: 0.044715725362943486
+          mmlu_high_school_european_history:
+            alias: '  - high_school_european_history'
+            acc,none: 0.806060606060606
+            acc_stderr,none: 0.030874145136562097
+          mmlu_high_school_us_history:
+            alias: '  - high_school_us_history'
+            acc,none: 0.8725490196078431
+            acc_stderr,none: 0.02340553048084631
+          mmlu_high_school_world_history:
+            alias: '  - high_school_world_history'
+            acc,none: 0.8523206751054853
+            acc_stderr,none: 0.023094329582595684
+          mmlu_international_law:
+            alias: '  - international_law'
+            acc,none: 0.8264462809917356
+            acc_stderr,none: 0.0345727283691767
+          mmlu_jurisprudence:
+            alias: '  - jurisprudence'
+            acc,none: 0.8703703703703703
+            acc_stderr,none: 0.03247224389917948
+          mmlu_logical_fallacies:
+            alias: '  - logical_fallacies'
+            acc,none: 0.803680981595092
+            acc_stderr,none: 0.031207970394709218
+          mmlu_moral_disputes:
+            alias: '  - moral_disputes'
+            acc,none: 0.7687861271676301
+            acc_stderr,none: 0.022698657167855716
+          mmlu_moral_scenarios:
+            alias: '  - moral_scenarios'
+            acc,none: 0.4346368715083799
+            acc_stderr,none: 0.016578997435496713
+          mmlu_philosophy:
+            alias: '  - philosophy'
+            acc,none: 0.7813504823151125
+            acc_stderr,none: 0.023475581417861102
+          mmlu_prehistory:
+            alias: '  - prehistory'
+            acc,none: 0.7839506172839507
+            acc_stderr,none: 0.022899162918445806
+          mmlu_professional_law:
+            alias: '  - professional_law'
+            acc,none: 0.516297262059974
+            acc_stderr,none: 0.012763450734699804
+          mmlu_world_religions:
+            alias: '  - world_religions'
+            acc,none: 0.8304093567251462
+            acc_stderr,none: 0.02878210810540171
+          mmlu_other:
+            alias: ' - other'
+            acc,none: 0.7563566140971999
+            acc_stderr,none: 0.007446207961067767
+          mmlu_business_ethics:
+            alias: '  - business_ethics'
+            acc,none: 0.77
+            acc_stderr,none: 0.04229525846816506
+          mmlu_clinical_knowledge:
+            alias: '  - clinical_knowledge'
+            acc,none: 0.7811320754716982
+            acc_stderr,none: 0.025447863825108614
+          mmlu_college_medicine:
+            alias: '  - college_medicine'
+            acc,none: 0.6878612716763006
+            acc_stderr,none: 0.03533133389323657
+          mmlu_global_facts:
+            alias: '  - global_facts'
+            acc,none: 0.47
+            acc_stderr,none: 0.05016135580465919
+          mmlu_human_aging:
+            alias: '  - human_aging'
+            acc,none: 0.7443946188340808
+            acc_stderr,none: 0.029275891003969927
+          mmlu_management:
+            alias: '  - management'
+            acc,none: 0.7961165048543689
+            acc_stderr,none: 0.0398913985953177
+          mmlu_marketing:
+            alias: '  - marketing'
+            acc,none: 0.9017094017094017
+            acc_stderr,none: 0.019503444900757567
+          mmlu_medical_genetics:
+            alias: '  - medical_genetics'
+            acc,none: 0.82
+            acc_stderr,none: 0.03861229196653694
+          mmlu_miscellaneous:
+            alias: '  - miscellaneous'
+            acc,none: 0.8544061302681992
+            acc_stderr,none: 0.012612475800423451
+          mmlu_nutrition:
+            alias: '  - nutrition'
+            acc,none: 0.7810457516339869
+            acc_stderr,none: 0.02367908986180772
+          mmlu_professional_accounting:
+            alias: '  - professional_accounting'
+            acc,none: 0.5886524822695035
+            acc_stderr,none: 0.02935491115994098
+          mmlu_professional_medicine:
+            alias: '  - professional_medicine'
+            acc,none: 0.7279411764705882
+            acc_stderr,none: 0.02703304115168146
+          mmlu_virology:
+            alias: '  - virology'
+            acc,none: 0.5240963855421686
+            acc_stderr,none: 0.03887971849597264
+          mmlu_social_sciences:
+            alias: ' - social_sciences'
+            acc,none: 0.8020799480013
+            acc_stderr,none: 0.007073049587404706
+          mmlu_econometrics:
+            alias: '  - econometrics'
+            acc,none: 0.5964912280701754
+            acc_stderr,none: 0.04615186962583707
+          mmlu_high_school_geography:
+            alias: '  - high_school_geography'
+            acc,none: 0.8838383838383839
+            acc_stderr,none: 0.022828881775249377
+          mmlu_high_school_government_and_politics:
+            alias: '  - high_school_government_and_politics'
+            acc,none: 0.927461139896373
+            acc_stderr,none: 0.01871899852067819
+          mmlu_high_school_macroeconomics:
+            alias: '  - high_school_macroeconomics'
+            acc,none: 0.764102564102564
+            acc_stderr,none: 0.021525965407408726
+          mmlu_high_school_microeconomics:
+            alias: '  - high_school_microeconomics'
+            acc,none: 0.8277310924369747
+            acc_stderr,none: 0.024528664971305424
+          mmlu_high_school_psychology:
+            alias: '  - high_school_psychology'
+            acc,none: 0.8623853211009175
+            acc_stderr,none: 0.01477010587864942
+          mmlu_human_sexuality:
+            alias: '  - human_sexuality'
+            acc,none: 0.7709923664122137
+            acc_stderr,none: 0.036853466317118506
+          mmlu_professional_psychology:
+            alias: '  - professional_psychology'
+            acc,none: 0.7467320261437909
+            acc_stderr,none: 0.017593486895366835
+          mmlu_public_relations:
+            alias: '  - public_relations'
+            acc,none: 0.7363636363636363
+            acc_stderr,none: 0.04220224692971987
+          mmlu_security_studies:
+            alias: '  - security_studies'
+            acc,none: 0.7387755102040816
+            acc_stderr,none: 0.02812342933514278
+          mmlu_sociology:
+            alias: '  - sociology'
+            acc,none: 0.8756218905472637
+            acc_stderr,none: 0.023335401790166327
+          mmlu_us_foreign_policy:
+            alias: '  - us_foreign_policy'
+            acc,none: 0.85
+            acc_stderr,none: 0.03588702812826371
+          mmlu_stem:
+            alias: ' - stem'
+            acc,none: 0.6381224230891215
+            acc_stderr,none: 0.008279915099259731
+          mmlu_abstract_algebra:
+            alias: '  - abstract_algebra'
+            acc,none: 0.52
+            acc_stderr,none: 0.050211673156867795
+          mmlu_anatomy:
+            alias: '  - anatomy'
+            acc,none: 0.6
+            acc_stderr,none: 0.04232073695151589
+          mmlu_astronomy:
+            alias: '  - astronomy'
+            acc,none: 0.7763157894736842
+            acc_stderr,none: 0.033911609343436025
+          mmlu_college_biology:
+            alias: '  - college_biology'
+            acc,none: 0.7916666666666666
+            acc_stderr,none: 0.033961162058453336
+          mmlu_college_chemistry:
+            alias: '  - college_chemistry'
+            acc,none: 0.5
+            acc_stderr,none: 0.050251890762960605
+          mmlu_college_computer_science:
+            alias: '  - college_computer_science'
+            acc,none: 0.62
+            acc_stderr,none: 0.04878317312145633
+          mmlu_college_mathematics:
+            alias: '  - college_mathematics'
+            acc,none: 0.39
+            acc_stderr,none: 0.04902071300001974
+          mmlu_college_physics:
+            alias: '  - college_physics'
+            acc,none: 0.4019607843137255
+            acc_stderr,none: 0.048786087144669955
+          mmlu_computer_security:
+            alias: '  - computer_security'
+            acc,none: 0.72
+            acc_stderr,none: 0.04512608598542129
+          mmlu_conceptual_physics:
+            alias: '  - conceptual_physics'
+            acc,none: 0.7063829787234043
+            acc_stderr,none: 0.029771642712491227
+          mmlu_electrical_engineering:
+            alias: '  - electrical_engineering'
+            acc,none: 0.7034482758620689
+            acc_stderr,none: 0.03806142687309992
+          mmlu_elementary_mathematics:
+            alias: '  - elementary_mathematics'
+            acc,none: 0.6481481481481481
+            acc_stderr,none: 0.024594975128920938
+          mmlu_high_school_biology:
+            alias: '  - high_school_biology'
+            acc,none: 0.8387096774193549
+            acc_stderr,none: 0.020923327006423298
+          mmlu_high_school_chemistry:
+            alias: '  - high_school_chemistry'
+            acc,none: 0.6157635467980296
+            acc_stderr,none: 0.03422398565657551
+          mmlu_high_school_computer_science:
+            alias: '  - high_school_computer_science'
+            acc,none: 0.79
+            acc_stderr,none: 0.040936018074033256
+          mmlu_high_school_mathematics:
+            alias: '  - high_school_mathematics'
+            acc,none: 0.4962962962962963
+            acc_stderr,none: 0.03048470166508437
+          mmlu_high_school_physics:
+            alias: '  - high_school_physics'
+            acc,none: 0.4966887417218543
+            acc_stderr,none: 0.04082393379449654
+          mmlu_high_school_statistics:
+            alias: '  - high_school_statistics'
+            acc,none: 0.6666666666666666
+            acc_stderr,none: 0.03214952147802748
+          mmlu_machine_learning:
+            alias: '  - machine_learning'
+            acc,none: 0.4732142857142857
+            acc_stderr,none: 0.047389751192741546
+        groups:
+          mmlu:
+            acc,none: 0.6991881498362057
+            acc_stderr,none: 0.003669336524005856
+            alias: mmlu
+          mmlu_humanities:
+            alias: ' - humanities'
+            acc,none: 0.6350690754516471
+            acc_stderr,none: 0.006600169354896744
+          mmlu_other:
+            alias: ' - other'
+            acc,none: 0.7563566140971999
+            acc_stderr,none: 0.007446207961067767
+          mmlu_social_sciences:
+            alias: ' - social_sciences'
+            acc,none: 0.8020799480013
+            acc_stderr,none: 0.007073049587404706
+          mmlu_stem:
+            alias: ' - stem'
+            acc,none: 0.6381224230891215
+            acc_stderr,none: 0.008279915099259731
+        group_subtasks:
+          mmlu_stem:
+          - mmlu_machine_learning
+          - mmlu_high_school_statistics
+          - mmlu_high_school_physics
+          - mmlu_high_school_mathematics
+          - mmlu_high_school_computer_science
+          - mmlu_high_school_chemistry
+          - mmlu_high_school_biology
+          - mmlu_elementary_mathematics
+          - mmlu_electrical_engineering
+          - mmlu_conceptual_physics
+          - mmlu_computer_security
+          - mmlu_college_physics
+          - mmlu_college_mathematics
+          - mmlu_college_computer_science
+          - mmlu_college_chemistry
+          - mmlu_college_biology
+          - mmlu_astronomy
+          - mmlu_anatomy
+          - mmlu_abstract_algebra
+          mmlu_other:
+          - mmlu_virology
+          - mmlu_professional_medicine
+          - mmlu_professional_accounting
+          - mmlu_nutrition
+          - mmlu_miscellaneous
+          - mmlu_medical_genetics
+          - mmlu_marketing
+          - mmlu_management
+          - mmlu_human_aging
+          - mmlu_global_facts
+          - mmlu_college_medicine
+          - mmlu_clinical_knowledge
+          - mmlu_business_ethics
+          mmlu_social_sciences:
+          - mmlu_us_foreign_policy
+          - mmlu_sociology
+          - mmlu_security_studies
+          - mmlu_public_relations
+          - mmlu_professional_psychology
+          - mmlu_human_sexuality
+          - mmlu_high_school_psychology
+          - mmlu_high_school_microeconomics
+          - mmlu_high_school_macroeconomics
+          - mmlu_high_school_government_and_politics
+          - mmlu_high_school_geography
+          - mmlu_econometrics
+          mmlu_humanities:
+          - mmlu_world_religions
+          - mmlu_professional_law
+          - mmlu_prehistory
+          - mmlu_philosophy
+          - mmlu_moral_scenarios
+          - mmlu_moral_disputes
+          - mmlu_logical_fallacies
+          - mmlu_jurisprudence
+          - mmlu_international_law
+          - mmlu_high_school_world_history
+          - mmlu_high_school_us_history
+          - mmlu_high_school_european_history
+          - mmlu_formal_logic
+          mmlu:
+          - mmlu_humanities
+          - mmlu_social_sciences
+          - mmlu_other
+          - mmlu_stem
+        configs:
+          mmlu_abstract_algebra:
+            task: mmlu_abstract_algebra
+            task_alias: abstract_algebra
+            group: mmlu_stem
+            group_alias: stem
+            dataset_path: hails/mmlu_no_train
+            dataset_name: abstract_algebra
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about abstract algebra.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_anatomy:
+            task: mmlu_anatomy
+            task_alias: anatomy
+            group: mmlu_stem
+            group_alias: stem
+            dataset_path: hails/mmlu_no_train
+            dataset_name: anatomy
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about anatomy.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_astronomy:
+            task: mmlu_astronomy
+            task_alias: astronomy
+            group: mmlu_stem
+            group_alias: stem
+            dataset_path: hails/mmlu_no_train
+            dataset_name: astronomy
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about astronomy.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_business_ethics:
+            task: mmlu_business_ethics
+            task_alias: business_ethics
+            group: mmlu_other
+            group_alias: other
+            dataset_path: hails/mmlu_no_train
+            dataset_name: business_ethics
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about business ethics.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_clinical_knowledge:
+            task: mmlu_clinical_knowledge
+            task_alias: clinical_knowledge
+            group: mmlu_other
+            group_alias: other
+            dataset_path: hails/mmlu_no_train
+            dataset_name: clinical_knowledge
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about clinical knowledge.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_college_biology:
+            task: mmlu_college_biology
+            task_alias: college_biology
+            group: mmlu_stem
+            group_alias: stem
+            dataset_path: hails/mmlu_no_train
+            dataset_name: college_biology
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about college biology.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_college_chemistry:
+            task: mmlu_college_chemistry
+            task_alias: college_chemistry
+            group: mmlu_stem
+            group_alias: stem
+            dataset_path: hails/mmlu_no_train
+            dataset_name: college_chemistry
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about college chemistry.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_college_computer_science:
+            task: mmlu_college_computer_science
+            task_alias: college_computer_science
+            group: mmlu_stem
+            group_alias: stem
+            dataset_path: hails/mmlu_no_train
+            dataset_name: college_computer_science
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about college computer science.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_college_mathematics:
+            task: mmlu_college_mathematics
+            task_alias: college_mathematics
+            group: mmlu_stem
+            group_alias: stem
+            dataset_path: hails/mmlu_no_train
+            dataset_name: college_mathematics
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about college mathematics.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_college_medicine:
+            task: mmlu_college_medicine
+            task_alias: college_medicine
+            group: mmlu_other
+            group_alias: other
+            dataset_path: hails/mmlu_no_train
+            dataset_name: college_medicine
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about college medicine.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_college_physics:
+            task: mmlu_college_physics
+            task_alias: college_physics
+            group: mmlu_stem
+            group_alias: stem
+            dataset_path: hails/mmlu_no_train
+            dataset_name: college_physics
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about college physics.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_computer_security:
+            task: mmlu_computer_security
+            task_alias: computer_security
+            group: mmlu_stem
+            group_alias: stem
+            dataset_path: hails/mmlu_no_train
+            dataset_name: computer_security
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about computer security.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_conceptual_physics:
+            task: mmlu_conceptual_physics
+            task_alias: conceptual_physics
+            group: mmlu_stem
+            group_alias: stem
+            dataset_path: hails/mmlu_no_train
+            dataset_name: conceptual_physics
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about conceptual physics.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_econometrics:
+            task: mmlu_econometrics
+            task_alias: econometrics
+            group: mmlu_social_sciences
+            group_alias: social_sciences
+            dataset_path: hails/mmlu_no_train
+            dataset_name: econometrics
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about econometrics.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_electrical_engineering:
+            task: mmlu_electrical_engineering
+            task_alias: electrical_engineering
+            group: mmlu_stem
+            group_alias: stem
+            dataset_path: hails/mmlu_no_train
+            dataset_name: electrical_engineering
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about electrical engineering.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_elementary_mathematics:
+            task: mmlu_elementary_mathematics
+            task_alias: elementary_mathematics
+            group: mmlu_stem
+            group_alias: stem
+            dataset_path: hails/mmlu_no_train
+            dataset_name: elementary_mathematics
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about elementary mathematics.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_formal_logic:
+            task: mmlu_formal_logic
+            task_alias: formal_logic
+            group: mmlu_humanities
+            group_alias: humanities
+            dataset_path: hails/mmlu_no_train
+            dataset_name: formal_logic
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about formal logic.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_global_facts:
+            task: mmlu_global_facts
+            task_alias: global_facts
+            group: mmlu_other
+            group_alias: other
+            dataset_path: hails/mmlu_no_train
+            dataset_name: global_facts
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about global facts.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_high_school_biology:
+            task: mmlu_high_school_biology
+            task_alias: high_school_biology
+            group: mmlu_stem
+            group_alias: stem
+            dataset_path: hails/mmlu_no_train
+            dataset_name: high_school_biology
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about high school biology.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_high_school_chemistry:
+            task: mmlu_high_school_chemistry
+            task_alias: high_school_chemistry
+            group: mmlu_stem
+            group_alias: stem
+            dataset_path: hails/mmlu_no_train
+            dataset_name: high_school_chemistry
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about high school chemistry.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_high_school_computer_science:
+            task: mmlu_high_school_computer_science
+            task_alias: high_school_computer_science
+            group: mmlu_stem
+            group_alias: stem
+            dataset_path: hails/mmlu_no_train
+            dataset_name: high_school_computer_science
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about high school computer science.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_high_school_european_history:
+            task: mmlu_high_school_european_history
+            task_alias: high_school_european_history
+            group: mmlu_humanities
+            group_alias: humanities
+            dataset_path: hails/mmlu_no_train
+            dataset_name: high_school_european_history
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about high school european history.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_high_school_geography:
+            task: mmlu_high_school_geography
+            task_alias: high_school_geography
+            group: mmlu_social_sciences
+            group_alias: social_sciences
+            dataset_path: hails/mmlu_no_train
+            dataset_name: high_school_geography
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about high school geography.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_high_school_government_and_politics:
+            task: mmlu_high_school_government_and_politics
+            task_alias: high_school_government_and_politics
+            group: mmlu_social_sciences
+            group_alias: social_sciences
+            dataset_path: hails/mmlu_no_train
+            dataset_name: high_school_government_and_politics
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about high school government and politics.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_high_school_macroeconomics:
+            task: mmlu_high_school_macroeconomics
+            task_alias: high_school_macroeconomics
+            group: mmlu_social_sciences
+            group_alias: social_sciences
+            dataset_path: hails/mmlu_no_train
+            dataset_name: high_school_macroeconomics
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about high school macroeconomics.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_high_school_mathematics:
+            task: mmlu_high_school_mathematics
+            task_alias: high_school_mathematics
+            group: mmlu_stem
+            group_alias: stem
+            dataset_path: hails/mmlu_no_train
+            dataset_name: high_school_mathematics
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about high school mathematics.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_high_school_microeconomics:
+            task: mmlu_high_school_microeconomics
+            task_alias: high_school_microeconomics
+            group: mmlu_social_sciences
+            group_alias: social_sciences
+            dataset_path: hails/mmlu_no_train
+            dataset_name: high_school_microeconomics
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about high school microeconomics.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_high_school_physics:
+            task: mmlu_high_school_physics
+            task_alias: high_school_physics
+            group: mmlu_stem
+            group_alias: stem
+            dataset_path: hails/mmlu_no_train
+            dataset_name: high_school_physics
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about high school physics.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_high_school_psychology:
+            task: mmlu_high_school_psychology
+            task_alias: high_school_psychology
+            group: mmlu_social_sciences
+            group_alias: social_sciences
+            dataset_path: hails/mmlu_no_train
+            dataset_name: high_school_psychology
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about high school psychology.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_high_school_statistics:
+            task: mmlu_high_school_statistics
+            task_alias: high_school_statistics
+            group: mmlu_stem
+            group_alias: stem
+            dataset_path: hails/mmlu_no_train
+            dataset_name: high_school_statistics
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about high school statistics.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_high_school_us_history:
+            task: mmlu_high_school_us_history
+            task_alias: high_school_us_history
+            group: mmlu_humanities
+            group_alias: humanities
+            dataset_path: hails/mmlu_no_train
+            dataset_name: high_school_us_history
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about high school us history.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_high_school_world_history:
+            task: mmlu_high_school_world_history
+            task_alias: high_school_world_history
+            group: mmlu_humanities
+            group_alias: humanities
+            dataset_path: hails/mmlu_no_train
+            dataset_name: high_school_world_history
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about high school world history.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_human_aging:
+            task: mmlu_human_aging
+            task_alias: human_aging
+            group: mmlu_other
+            group_alias: other
+            dataset_path: hails/mmlu_no_train
+            dataset_name: human_aging
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about human aging.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_human_sexuality:
+            task: mmlu_human_sexuality
+            task_alias: human_sexuality
+            group: mmlu_social_sciences
+            group_alias: social_sciences
+            dataset_path: hails/mmlu_no_train
+            dataset_name: human_sexuality
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about human sexuality.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_international_law:
+            task: mmlu_international_law
+            task_alias: international_law
+            group: mmlu_humanities
+            group_alias: humanities
+            dataset_path: hails/mmlu_no_train
+            dataset_name: international_law
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about international law.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_jurisprudence:
+            task: mmlu_jurisprudence
+            task_alias: jurisprudence
+            group: mmlu_humanities
+            group_alias: humanities
+            dataset_path: hails/mmlu_no_train
+            dataset_name: jurisprudence
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about jurisprudence.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_logical_fallacies:
+            task: mmlu_logical_fallacies
+            task_alias: logical_fallacies
+            group: mmlu_humanities
+            group_alias: humanities
+            dataset_path: hails/mmlu_no_train
+            dataset_name: logical_fallacies
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about logical fallacies.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_machine_learning:
+            task: mmlu_machine_learning
+            task_alias: machine_learning
+            group: mmlu_stem
+            group_alias: stem
+            dataset_path: hails/mmlu_no_train
+            dataset_name: machine_learning
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about machine learning.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_management:
+            task: mmlu_management
+            task_alias: management
+            group: mmlu_other
+            group_alias: other
+            dataset_path: hails/mmlu_no_train
+            dataset_name: management
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about management.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_marketing:
+            task: mmlu_marketing
+            task_alias: marketing
+            group: mmlu_other
+            group_alias: other
+            dataset_path: hails/mmlu_no_train
+            dataset_name: marketing
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about marketing.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_medical_genetics:
+            task: mmlu_medical_genetics
+            task_alias: medical_genetics
+            group: mmlu_other
+            group_alias: other
+            dataset_path: hails/mmlu_no_train
+            dataset_name: medical_genetics
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about medical genetics.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_miscellaneous:
+            task: mmlu_miscellaneous
+            task_alias: miscellaneous
+            group: mmlu_other
+            group_alias: other
+            dataset_path: hails/mmlu_no_train
+            dataset_name: miscellaneous
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about miscellaneous.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_moral_disputes:
+            task: mmlu_moral_disputes
+            task_alias: moral_disputes
+            group: mmlu_humanities
+            group_alias: humanities
+            dataset_path: hails/mmlu_no_train
+            dataset_name: moral_disputes
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about moral disputes.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_moral_scenarios:
+            task: mmlu_moral_scenarios
+            task_alias: moral_scenarios
+            group: mmlu_humanities
+            group_alias: humanities
+            dataset_path: hails/mmlu_no_train
+            dataset_name: moral_scenarios
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about moral scenarios.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_nutrition:
+            task: mmlu_nutrition
+            task_alias: nutrition
+            group: mmlu_other
+            group_alias: other
+            dataset_path: hails/mmlu_no_train
+            dataset_name: nutrition
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about nutrition.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_philosophy:
+            task: mmlu_philosophy
+            task_alias: philosophy
+            group: mmlu_humanities
+            group_alias: humanities
+            dataset_path: hails/mmlu_no_train
+            dataset_name: philosophy
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about philosophy.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_prehistory:
+            task: mmlu_prehistory
+            task_alias: prehistory
+            group: mmlu_humanities
+            group_alias: humanities
+            dataset_path: hails/mmlu_no_train
+            dataset_name: prehistory
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about prehistory.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_professional_accounting:
+            task: mmlu_professional_accounting
+            task_alias: professional_accounting
+            group: mmlu_other
+            group_alias: other
+            dataset_path: hails/mmlu_no_train
+            dataset_name: professional_accounting
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about professional accounting.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_professional_law:
+            task: mmlu_professional_law
+            task_alias: professional_law
+            group: mmlu_humanities
+            group_alias: humanities
+            dataset_path: hails/mmlu_no_train
+            dataset_name: professional_law
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about professional law.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_professional_medicine:
+            task: mmlu_professional_medicine
+            task_alias: professional_medicine
+            group: mmlu_other
+            group_alias: other
+            dataset_path: hails/mmlu_no_train
+            dataset_name: professional_medicine
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about professional medicine.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_professional_psychology:
+            task: mmlu_professional_psychology
+            task_alias: professional_psychology
+            group: mmlu_social_sciences
+            group_alias: social_sciences
+            dataset_path: hails/mmlu_no_train
+            dataset_name: professional_psychology
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about professional psychology.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_public_relations:
+            task: mmlu_public_relations
+            task_alias: public_relations
+            group: mmlu_social_sciences
+            group_alias: social_sciences
+            dataset_path: hails/mmlu_no_train
+            dataset_name: public_relations
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about public relations.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_security_studies:
+            task: mmlu_security_studies
+            task_alias: security_studies
+            group: mmlu_social_sciences
+            group_alias: social_sciences
+            dataset_path: hails/mmlu_no_train
+            dataset_name: security_studies
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about security studies.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_sociology:
+            task: mmlu_sociology
+            task_alias: sociology
+            group: mmlu_social_sciences
+            group_alias: social_sciences
+            dataset_path: hails/mmlu_no_train
+            dataset_name: sociology
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about sociology.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_us_foreign_policy:
+            task: mmlu_us_foreign_policy
+            task_alias: us_foreign_policy
+            group: mmlu_social_sciences
+            group_alias: social_sciences
+            dataset_path: hails/mmlu_no_train
+            dataset_name: us_foreign_policy
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about us foreign policy.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_virology:
+            task: mmlu_virology
+            task_alias: virology
+            group: mmlu_other
+            group_alias: other
+            dataset_path: hails/mmlu_no_train
+            dataset_name: virology
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about virology.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+          mmlu_world_religions:
+            task: mmlu_world_religions
+            task_alias: world_religions
+            group: mmlu_humanities
+            group_alias: humanities
+            dataset_path: hails/mmlu_no_train
+            dataset_name: world_religions
+            test_split: test
+            fewshot_split: dev
+            doc_to_text: '{{question.strip()}}
+
+              A. {{choices[0]}}
+
+              B. {{choices[1]}}
+
+              C. {{choices[2]}}
+
+              D. {{choices[3]}}
+
+              Answer:'
+            doc_to_target: answer
+            doc_to_choice:
+            - A
+            - B
+            - C
+            - D
+            description: 'The following are multiple choice questions (with answers)
+              about world religions.
+
+
+              '
+            target_delimiter: ' '
+            fewshot_delimiter: '
+
+
+              '
+            fewshot_config:
+              sampler: first_n
+            metric_list:
+            - metric: acc
+              aggregation: mean
+              higher_is_better: true
+            output_type: multiple_choice
+            repeats: 1
+            should_decontaminate: false
+            metadata:
+              version: 0.0
+        versions:
+          mmlu_abstract_algebra: 0.0
+          mmlu_anatomy: 0.0
+          mmlu_astronomy: 0.0
+          mmlu_business_ethics: 0.0
+          mmlu_clinical_knowledge: 0.0
+          mmlu_college_biology: 0.0
+          mmlu_college_chemistry: 0.0
+          mmlu_college_computer_science: 0.0
+          mmlu_college_mathematics: 0.0
+          mmlu_college_medicine: 0.0
+          mmlu_college_physics: 0.0
+          mmlu_computer_security: 0.0
+          mmlu_conceptual_physics: 0.0
+          mmlu_econometrics: 0.0
+          mmlu_electrical_engineering: 0.0
+          mmlu_elementary_mathematics: 0.0
+          mmlu_formal_logic: 0.0
+          mmlu_global_facts: 0.0
+          mmlu_high_school_biology: 0.0
+          mmlu_high_school_chemistry: 0.0
+          mmlu_high_school_computer_science: 0.0
+          mmlu_high_school_european_history: 0.0
+          mmlu_high_school_geography: 0.0
+          mmlu_high_school_government_and_politics: 0.0
+          mmlu_high_school_macroeconomics: 0.0
+          mmlu_high_school_mathematics: 0.0
+          mmlu_high_school_microeconomics: 0.0
+          mmlu_high_school_physics: 0.0
+          mmlu_high_school_psychology: 0.0
+          mmlu_high_school_statistics: 0.0
+          mmlu_high_school_us_history: 0.0
+          mmlu_high_school_world_history: 0.0
+          mmlu_human_aging: 0.0
+          mmlu_human_sexuality: 0.0
+          mmlu_international_law: 0.0
+          mmlu_jurisprudence: 0.0
+          mmlu_logical_fallacies: 0.0
+          mmlu_machine_learning: 0.0
+          mmlu_management: 0.0
+          mmlu_marketing: 0.0
+          mmlu_medical_genetics: 0.0
+          mmlu_miscellaneous: 0.0
+          mmlu_moral_disputes: 0.0
+          mmlu_moral_scenarios: 0.0
+          mmlu_nutrition: 0.0
+          mmlu_philosophy: 0.0
+          mmlu_prehistory: 0.0
+          mmlu_professional_accounting: 0.0
+          mmlu_professional_law: 0.0
+          mmlu_professional_medicine: 0.0
+          mmlu_professional_psychology: 0.0
+          mmlu_public_relations: 0.0
+          mmlu_security_studies: 0.0
+          mmlu_sociology: 0.0
+          mmlu_us_foreign_policy: 0.0
+          mmlu_virology: 0.0
+          mmlu_world_religions: 0.0
+        n-shot:
+          mmlu: 0
+        config:
+          model: vllm
+          model_args: pretrained=Qwen/Qwen2-7B-Instruct,tensor_parallel_size=1,dtype=auto,gpu_memory_utilization=0.8,max_model_len=2048,trust_remote_code=True
+          batch_size: auto
+          batch_sizes: []
+          bootstrap_iters: 100000
+        git_hash: d6bc7cc
+        pretty_env_info: 'PyTorch version: 2.1.2+cu121
+
+          Is debug build: False
+
+          CUDA used to build PyTorch: 12.1
+
+          ROCM used to build PyTorch: N/A
+
+
+          OS: Ubuntu 22.04.3 LTS (x86_64)
+
+          GCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
+
+          Clang version: Could not collect
+
+          CMake version: version 3.25.0
+
+          Libc version: glibc-2.35
+
+
+          Python version: 3.10.12 (main, Jun 11 2023, 05:26:28) [GCC 11.4.0] (64-bit
+          runtime)
+
+          Python platform: Linux-6.5.0-35-generic-x86_64-with-glibc2.35
+
+          Is CUDA available: True
+
+          CUDA runtime version: 11.8.89
+
+          CUDA_MODULE_LOADING set to: LAZY
+
+          GPU models and configuration: GPU 0: NVIDIA GeForce RTX 4090
+
+          Nvidia driver version: 550.54.15
+
+          cuDNN version: Could not collect
+
+          HIP runtime version: N/A
+
+          MIOpen runtime version: N/A
+
+          Is XNNPACK available: True
+
+
+          CPU:
+
+          Architecture:                       x86_64
+
+          CPU op-mode(s):                     32-bit, 64-bit
+
+          Address sizes:                      43 bits physical, 48 bits virtual
+
+          Byte Order:                         Little Endian
+
+          CPU(s):                             64
+
+          On-line CPU(s) list:                0-63
+
+          Vendor ID:                          AuthenticAMD
+
+          Model name:                         AMD EPYC 7282 16-Core Processor
+
+          CPU family:                         23
+
+          Model:                              49
+
+          Thread(s) per core:                 2
+
+          Core(s) per socket:                 16
+
+          Socket(s):                          2
+
+          Stepping:                           0
+
+          Frequency boost:                    enabled
+
+          CPU max MHz:                        2800.0000
+
+          CPU min MHz:                        1500.0000
+
+          BogoMIPS:                           5589.73
+
+          Flags:                              fpu vme de pse tsc msr pae mce cx8 apic
+          sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx
+          mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc
+          cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1
+          sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic
+          cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext
+          perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate
+          ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a
+          rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 cqm_llc cqm_occup_llc
+          cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd amd_ppin
+          arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists
+          pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip rdpid
+          overflow_recov succor smca sev sev_es
+
+          Virtualization:                     AMD-V
+
+          L1d cache:                          1 MiB (32 instances)
+
+          L1i cache:                          1 MiB (32 instances)
+
+          L2 cache:                           16 MiB (32 instances)
+
+          L3 cache:                           128 MiB (8 instances)
+
+          NUMA node(s):                       2
+
+          NUMA node0 CPU(s):                  0-15,32-47
+
+          NUMA node1 CPU(s):                  16-31,48-63
+
+          Vulnerability Gather data sampling: Not affected
+
+          Vulnerability Itlb multihit:        Not affected
+
+          Vulnerability L1tf:                 Not affected
+
+          Vulnerability Mds:                  Not affected
+
+          Vulnerability Meltdown:             Not affected
+
+          Vulnerability Mmio stale data:      Not affected
+
+          Vulnerability Retbleed:             Mitigation; untrained return thunk;
+          SMT enabled with STIBP protection
+
+          Vulnerability Spec rstack overflow: Mitigation; Safe RET
+
+          Vulnerability Spec store bypass:    Mitigation; Speculative Store Bypass
+          disabled via prctl
+
+          Vulnerability Spectre v1:           Mitigation; usercopy/swapgs barriers
+          and __user pointer sanitization
+
+          Vulnerability Spectre v2:           Mitigation; Retpolines; IBPB conditional;
+          STIBP always-on; RSB filling; PBRSB-eIBRS Not affected; BHI Not affected
+
+          Vulnerability Srbds:                Not affected
+
+          Vulnerability Tsx async abort:      Not affected
+
+
+          Versions of relevant libraries:
+
+          [pip3] numpy==1.24.1
+
+          [pip3] torch==2.1.2
+
+          [pip3] torchaudio==2.0.2+cu118
+
+          [pip3] torchvision==0.15.2+cu118
+
+          [pip3] triton==2.1.0
+
+          [conda] Could not collect'
+        transformers_version: 4.40.2
 ---
 
 # Qwen2-7B-Instruct