lewtun's picture
lewtun HF staff
Upload eval_results/alignment-handbook/zephyr-2b-gemma-sft-v1/main/mmlu/results_2024-03-05T08-31-33.029449.json with huggingface_hub
4ed0a4f verified
raw
history blame
78.1 kB
{
"config_general": {
"lighteval_sha": "?",
"num_fewshot_seeds": 1,
"override_batch_size": 1,
"max_samples": null,
"job_id": "",
"start_time": 860011.786598115,
"end_time": 860678.218378255,
"total_evaluation_time_secondes": "666.4317801400321",
"model_name": "alignment-handbook/zephyr-2b-gemma-sft-v1",
"model_sha": "6607539d04bc66d5856772cc04b3b1ce17ebe644",
"model_dtype": "torch.bfloat16",
"model_size": "4.68 GB",
"config": null
},
"results": {
"lighteval|mmlu:abstract_algebra|5": {
"acc": 0.28,
"acc_stderr": 0.045126085985421296
},
"lighteval|mmlu:anatomy|5": {
"acc": 0.45185185185185184,
"acc_stderr": 0.04299268905480864
},
"lighteval|mmlu:astronomy|5": {
"acc": 0.42105263157894735,
"acc_stderr": 0.040179012759817494
},
"lighteval|mmlu:business_ethics|5": {
"acc": 0.27,
"acc_stderr": 0.04461960433384741
},
"lighteval|mmlu:clinical_knowledge|5": {
"acc": 0.4679245283018868,
"acc_stderr": 0.03070948699255654
},
"lighteval|mmlu:college_biology|5": {
"acc": 0.4513888888888889,
"acc_stderr": 0.04161402398403279
},
"lighteval|mmlu:college_chemistry|5": {
"acc": 0.41,
"acc_stderr": 0.049431107042371025
},
"lighteval|mmlu:college_computer_science|5": {
"acc": 0.32,
"acc_stderr": 0.046882617226215034
},
"lighteval|mmlu:college_mathematics|5": {
"acc": 0.33,
"acc_stderr": 0.047258156262526045
},
"lighteval|mmlu:college_medicine|5": {
"acc": 0.36416184971098264,
"acc_stderr": 0.03669072477416907
},
"lighteval|mmlu:college_physics|5": {
"acc": 0.23529411764705882,
"acc_stderr": 0.042207736591714506
},
"lighteval|mmlu:computer_security|5": {
"acc": 0.5,
"acc_stderr": 0.050251890762960605
},
"lighteval|mmlu:conceptual_physics|5": {
"acc": 0.37446808510638296,
"acc_stderr": 0.03163910665367291
},
"lighteval|mmlu:econometrics|5": {
"acc": 0.2807017543859649,
"acc_stderr": 0.042270544512322
},
"lighteval|mmlu:electrical_engineering|5": {
"acc": 0.41379310344827586,
"acc_stderr": 0.041042692118062316
},
"lighteval|mmlu:elementary_mathematics|5": {
"acc": 0.29365079365079366,
"acc_stderr": 0.02345603738398203
},
"lighteval|mmlu:formal_logic|5": {
"acc": 0.30158730158730157,
"acc_stderr": 0.04104947269903394
},
"lighteval|mmlu:global_facts|5": {
"acc": 0.33,
"acc_stderr": 0.047258156262526045
},
"lighteval|mmlu:high_school_biology|5": {
"acc": 0.4032258064516129,
"acc_stderr": 0.027906150826041143
},
"lighteval|mmlu:high_school_chemistry|5": {
"acc": 0.3793103448275862,
"acc_stderr": 0.034139638059062345
},
"lighteval|mmlu:high_school_computer_science|5": {
"acc": 0.37,
"acc_stderr": 0.04852365870939099
},
"lighteval|mmlu:high_school_european_history|5": {
"acc": 0.30303030303030304,
"acc_stderr": 0.035886248000917075
},
"lighteval|mmlu:high_school_geography|5": {
"acc": 0.5303030303030303,
"acc_stderr": 0.0355580405176393
},
"lighteval|mmlu:high_school_government_and_politics|5": {
"acc": 0.5181347150259067,
"acc_stderr": 0.036060650018329185
},
"lighteval|mmlu:high_school_macroeconomics|5": {
"acc": 0.39487179487179486,
"acc_stderr": 0.024784316942156378
},
"lighteval|mmlu:high_school_mathematics|5": {
"acc": 0.2518518518518518,
"acc_stderr": 0.02646611753895992
},
"lighteval|mmlu:high_school_microeconomics|5": {
"acc": 0.3697478991596639,
"acc_stderr": 0.03135709599613591
},
"lighteval|mmlu:high_school_physics|5": {
"acc": 0.3443708609271523,
"acc_stderr": 0.03879687024073328
},
"lighteval|mmlu:high_school_psychology|5": {
"acc": 0.5064220183486239,
"acc_stderr": 0.021435554820013077
},
"lighteval|mmlu:high_school_statistics|5": {
"acc": 0.24074074074074073,
"acc_stderr": 0.029157522184605603
},
"lighteval|mmlu:high_school_us_history|5": {
"acc": 0.28431372549019607,
"acc_stderr": 0.03166009679399812
},
"lighteval|mmlu:high_school_world_history|5": {
"acc": 0.28270042194092826,
"acc_stderr": 0.029312814153955927
},
"lighteval|mmlu:human_aging|5": {
"acc": 0.40358744394618834,
"acc_stderr": 0.032928028193303135
},
"lighteval|mmlu:human_sexuality|5": {
"acc": 0.44274809160305345,
"acc_stderr": 0.043564472026650695
},
"lighteval|mmlu:international_law|5": {
"acc": 0.47107438016528924,
"acc_stderr": 0.04556710331269498
},
"lighteval|mmlu:jurisprudence|5": {
"acc": 0.37037037037037035,
"acc_stderr": 0.04668408033024931
},
"lighteval|mmlu:logical_fallacies|5": {
"acc": 0.3619631901840491,
"acc_stderr": 0.03775700729141441
},
"lighteval|mmlu:machine_learning|5": {
"acc": 0.3482142857142857,
"acc_stderr": 0.04521829902833586
},
"lighteval|mmlu:management|5": {
"acc": 0.5145631067961165,
"acc_stderr": 0.04948637324026637
},
"lighteval|mmlu:marketing|5": {
"acc": 0.5982905982905983,
"acc_stderr": 0.03211693751051621
},
"lighteval|mmlu:medical_genetics|5": {
"acc": 0.42,
"acc_stderr": 0.049604496374885836
},
"lighteval|mmlu:miscellaneous|5": {
"acc": 0.5185185185185185,
"acc_stderr": 0.017867695938429774
},
"lighteval|mmlu:moral_disputes|5": {
"acc": 0.3988439306358382,
"acc_stderr": 0.026362437574546538
},
"lighteval|mmlu:moral_scenarios|5": {
"acc": 0.23687150837988827,
"acc_stderr": 0.014219570788103984
},
"lighteval|mmlu:nutrition|5": {
"acc": 0.4738562091503268,
"acc_stderr": 0.028590752958852394
},
"lighteval|mmlu:philosophy|5": {
"acc": 0.43729903536977494,
"acc_stderr": 0.028173917761762875
},
"lighteval|mmlu:prehistory|5": {
"acc": 0.4382716049382716,
"acc_stderr": 0.027607914087400473
},
"lighteval|mmlu:professional_accounting|5": {
"acc": 0.2978723404255319,
"acc_stderr": 0.02728160834446942
},
"lighteval|mmlu:professional_law|5": {
"acc": 0.2835723598435463,
"acc_stderr": 0.011511900775968325
},
"lighteval|mmlu:professional_medicine|5": {
"acc": 0.3014705882352941,
"acc_stderr": 0.027875982114273168
},
"lighteval|mmlu:professional_psychology|5": {
"acc": 0.33986928104575165,
"acc_stderr": 0.019162418588623553
},
"lighteval|mmlu:public_relations|5": {
"acc": 0.37272727272727274,
"acc_stderr": 0.046313813194254635
},
"lighteval|mmlu:security_studies|5": {
"acc": 0.45714285714285713,
"acc_stderr": 0.031891418324213966
},
"lighteval|mmlu:sociology|5": {
"acc": 0.48258706467661694,
"acc_stderr": 0.03533389234739245
},
"lighteval|mmlu:us_foreign_policy|5": {
"acc": 0.54,
"acc_stderr": 0.05009082659620333
},
"lighteval|mmlu:virology|5": {
"acc": 0.3855421686746988,
"acc_stderr": 0.037891344246115496
},
"lighteval|mmlu:world_religions|5": {
"acc": 0.47953216374269003,
"acc_stderr": 0.0383161053282193
},
"lighteval|mmlu:_average|5": {
"acc": 0.38683661034569394,
"acc_stderr": 0.035914777482089905
}
},
"versions": {
"lighteval|mmlu:abstract_algebra|5": 0,
"lighteval|mmlu:anatomy|5": 0,
"lighteval|mmlu:astronomy|5": 0,
"lighteval|mmlu:business_ethics|5": 0,
"lighteval|mmlu:clinical_knowledge|5": 0,
"lighteval|mmlu:college_biology|5": 0,
"lighteval|mmlu:college_chemistry|5": 0,
"lighteval|mmlu:college_computer_science|5": 0,
"lighteval|mmlu:college_mathematics|5": 0,
"lighteval|mmlu:college_medicine|5": 0,
"lighteval|mmlu:college_physics|5": 0,
"lighteval|mmlu:computer_security|5": 0,
"lighteval|mmlu:conceptual_physics|5": 0,
"lighteval|mmlu:econometrics|5": 0,
"lighteval|mmlu:electrical_engineering|5": 0,
"lighteval|mmlu:elementary_mathematics|5": 0,
"lighteval|mmlu:formal_logic|5": 0,
"lighteval|mmlu:global_facts|5": 0,
"lighteval|mmlu:high_school_biology|5": 0,
"lighteval|mmlu:high_school_chemistry|5": 0,
"lighteval|mmlu:high_school_computer_science|5": 0,
"lighteval|mmlu:high_school_european_history|5": 0,
"lighteval|mmlu:high_school_geography|5": 0,
"lighteval|mmlu:high_school_government_and_politics|5": 0,
"lighteval|mmlu:high_school_macroeconomics|5": 0,
"lighteval|mmlu:high_school_mathematics|5": 0,
"lighteval|mmlu:high_school_microeconomics|5": 0,
"lighteval|mmlu:high_school_physics|5": 0,
"lighteval|mmlu:high_school_psychology|5": 0,
"lighteval|mmlu:high_school_statistics|5": 0,
"lighteval|mmlu:high_school_us_history|5": 0,
"lighteval|mmlu:high_school_world_history|5": 0,
"lighteval|mmlu:human_aging|5": 0,
"lighteval|mmlu:human_sexuality|5": 0,
"lighteval|mmlu:international_law|5": 0,
"lighteval|mmlu:jurisprudence|5": 0,
"lighteval|mmlu:logical_fallacies|5": 0,
"lighteval|mmlu:machine_learning|5": 0,
"lighteval|mmlu:management|5": 0,
"lighteval|mmlu:marketing|5": 0,
"lighteval|mmlu:medical_genetics|5": 0,
"lighteval|mmlu:miscellaneous|5": 0,
"lighteval|mmlu:moral_disputes|5": 0,
"lighteval|mmlu:moral_scenarios|5": 0,
"lighteval|mmlu:nutrition|5": 0,
"lighteval|mmlu:philosophy|5": 0,
"lighteval|mmlu:prehistory|5": 0,
"lighteval|mmlu:professional_accounting|5": 0,
"lighteval|mmlu:professional_law|5": 0,
"lighteval|mmlu:professional_medicine|5": 0,
"lighteval|mmlu:professional_psychology|5": 0,
"lighteval|mmlu:public_relations|5": 0,
"lighteval|mmlu:security_studies|5": 0,
"lighteval|mmlu:sociology|5": 0,
"lighteval|mmlu:us_foreign_policy|5": 0,
"lighteval|mmlu:virology|5": 0,
"lighteval|mmlu:world_religions|5": 0
},
"config_tasks": {
"lighteval|mmlu:abstract_algebra": {
"name": "mmlu:abstract_algebra",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "abstract_algebra",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:anatomy": {
"name": "mmlu:anatomy",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "anatomy",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 135,
"effective_num_docs": 135
},
"lighteval|mmlu:astronomy": {
"name": "mmlu:astronomy",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "astronomy",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 152,
"effective_num_docs": 152
},
"lighteval|mmlu:business_ethics": {
"name": "mmlu:business_ethics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "business_ethics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:clinical_knowledge": {
"name": "mmlu:clinical_knowledge",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "clinical_knowledge",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 265,
"effective_num_docs": 265
},
"lighteval|mmlu:college_biology": {
"name": "mmlu:college_biology",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "college_biology",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 144,
"effective_num_docs": 144
},
"lighteval|mmlu:college_chemistry": {
"name": "mmlu:college_chemistry",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "college_chemistry",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:college_computer_science": {
"name": "mmlu:college_computer_science",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "college_computer_science",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:college_mathematics": {
"name": "mmlu:college_mathematics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "college_mathematics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:college_medicine": {
"name": "mmlu:college_medicine",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "college_medicine",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 173,
"effective_num_docs": 173
},
"lighteval|mmlu:college_physics": {
"name": "mmlu:college_physics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "college_physics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 102,
"effective_num_docs": 102
},
"lighteval|mmlu:computer_security": {
"name": "mmlu:computer_security",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "computer_security",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:conceptual_physics": {
"name": "mmlu:conceptual_physics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "conceptual_physics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 235,
"effective_num_docs": 235
},
"lighteval|mmlu:econometrics": {
"name": "mmlu:econometrics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "econometrics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 114,
"effective_num_docs": 114
},
"lighteval|mmlu:electrical_engineering": {
"name": "mmlu:electrical_engineering",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "electrical_engineering",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 145,
"effective_num_docs": 145
},
"lighteval|mmlu:elementary_mathematics": {
"name": "mmlu:elementary_mathematics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "elementary_mathematics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 378,
"effective_num_docs": 378
},
"lighteval|mmlu:formal_logic": {
"name": "mmlu:formal_logic",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "formal_logic",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 126,
"effective_num_docs": 126
},
"lighteval|mmlu:global_facts": {
"name": "mmlu:global_facts",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "global_facts",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:high_school_biology": {
"name": "mmlu:high_school_biology",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_biology",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 310,
"effective_num_docs": 310
},
"lighteval|mmlu:high_school_chemistry": {
"name": "mmlu:high_school_chemistry",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_chemistry",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 203,
"effective_num_docs": 203
},
"lighteval|mmlu:high_school_computer_science": {
"name": "mmlu:high_school_computer_science",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_computer_science",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:high_school_european_history": {
"name": "mmlu:high_school_european_history",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_european_history",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 165,
"effective_num_docs": 165
},
"lighteval|mmlu:high_school_geography": {
"name": "mmlu:high_school_geography",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_geography",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 198,
"effective_num_docs": 198
},
"lighteval|mmlu:high_school_government_and_politics": {
"name": "mmlu:high_school_government_and_politics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_government_and_politics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 193,
"effective_num_docs": 193
},
"lighteval|mmlu:high_school_macroeconomics": {
"name": "mmlu:high_school_macroeconomics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_macroeconomics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 390,
"effective_num_docs": 390
},
"lighteval|mmlu:high_school_mathematics": {
"name": "mmlu:high_school_mathematics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_mathematics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 270,
"effective_num_docs": 270
},
"lighteval|mmlu:high_school_microeconomics": {
"name": "mmlu:high_school_microeconomics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_microeconomics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 238,
"effective_num_docs": 238
},
"lighteval|mmlu:high_school_physics": {
"name": "mmlu:high_school_physics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_physics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 151,
"effective_num_docs": 151
},
"lighteval|mmlu:high_school_psychology": {
"name": "mmlu:high_school_psychology",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_psychology",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 545,
"effective_num_docs": 545
},
"lighteval|mmlu:high_school_statistics": {
"name": "mmlu:high_school_statistics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_statistics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 216,
"effective_num_docs": 216
},
"lighteval|mmlu:high_school_us_history": {
"name": "mmlu:high_school_us_history",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_us_history",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 204,
"effective_num_docs": 204
},
"lighteval|mmlu:high_school_world_history": {
"name": "mmlu:high_school_world_history",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_world_history",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 237,
"effective_num_docs": 237
},
"lighteval|mmlu:human_aging": {
"name": "mmlu:human_aging",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "human_aging",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 223,
"effective_num_docs": 223
},
"lighteval|mmlu:human_sexuality": {
"name": "mmlu:human_sexuality",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "human_sexuality",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 131,
"effective_num_docs": 131
},
"lighteval|mmlu:international_law": {
"name": "mmlu:international_law",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "international_law",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 121,
"effective_num_docs": 121
},
"lighteval|mmlu:jurisprudence": {
"name": "mmlu:jurisprudence",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "jurisprudence",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 108,
"effective_num_docs": 108
},
"lighteval|mmlu:logical_fallacies": {
"name": "mmlu:logical_fallacies",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "logical_fallacies",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 163,
"effective_num_docs": 163
},
"lighteval|mmlu:machine_learning": {
"name": "mmlu:machine_learning",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "machine_learning",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 112,
"effective_num_docs": 112
},
"lighteval|mmlu:management": {
"name": "mmlu:management",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "management",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 103,
"effective_num_docs": 103
},
"lighteval|mmlu:marketing": {
"name": "mmlu:marketing",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "marketing",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 234,
"effective_num_docs": 234
},
"lighteval|mmlu:medical_genetics": {
"name": "mmlu:medical_genetics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "medical_genetics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:miscellaneous": {
"name": "mmlu:miscellaneous",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "miscellaneous",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 783,
"effective_num_docs": 783
},
"lighteval|mmlu:moral_disputes": {
"name": "mmlu:moral_disputes",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "moral_disputes",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 346,
"effective_num_docs": 346
},
"lighteval|mmlu:moral_scenarios": {
"name": "mmlu:moral_scenarios",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "moral_scenarios",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 895,
"effective_num_docs": 895
},
"lighteval|mmlu:nutrition": {
"name": "mmlu:nutrition",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "nutrition",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 306,
"effective_num_docs": 306
},
"lighteval|mmlu:philosophy": {
"name": "mmlu:philosophy",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "philosophy",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 311,
"effective_num_docs": 311
},
"lighteval|mmlu:prehistory": {
"name": "mmlu:prehistory",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "prehistory",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 324,
"effective_num_docs": 324
},
"lighteval|mmlu:professional_accounting": {
"name": "mmlu:professional_accounting",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "professional_accounting",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 282,
"effective_num_docs": 282
},
"lighteval|mmlu:professional_law": {
"name": "mmlu:professional_law",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "professional_law",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 1534,
"effective_num_docs": 1534
},
"lighteval|mmlu:professional_medicine": {
"name": "mmlu:professional_medicine",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "professional_medicine",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 272,
"effective_num_docs": 272
},
"lighteval|mmlu:professional_psychology": {
"name": "mmlu:professional_psychology",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "professional_psychology",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 612,
"effective_num_docs": 612
},
"lighteval|mmlu:public_relations": {
"name": "mmlu:public_relations",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "public_relations",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 110,
"effective_num_docs": 110
},
"lighteval|mmlu:security_studies": {
"name": "mmlu:security_studies",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "security_studies",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 245,
"effective_num_docs": 245
},
"lighteval|mmlu:sociology": {
"name": "mmlu:sociology",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "sociology",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 201,
"effective_num_docs": 201
},
"lighteval|mmlu:us_foreign_policy": {
"name": "mmlu:us_foreign_policy",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "us_foreign_policy",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:virology": {
"name": "mmlu:virology",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "virology",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 166,
"effective_num_docs": 166
},
"lighteval|mmlu:world_religions": {
"name": "mmlu:world_religions",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "world_religions",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 171,
"effective_num_docs": 171
}
},
"summary_tasks": {
"lighteval|mmlu:abstract_algebra|5": {
"hashes": {
"hash_examples": "4c76229e00c9c0e9",
"hash_full_prompts": "a316d5f10f1c4fc3",
"hash_input_tokens": "b87e1cd51e4cdb89",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:anatomy|5": {
"hashes": {
"hash_examples": "6a1f8104dccbd33b",
"hash_full_prompts": "fa80e4331377b478",
"hash_input_tokens": "9393a535f481cfe5",
"hash_cont_tokens": "96c2bab19c75f48d"
},
"truncated": 0,
"non_truncated": 135,
"padded": 540,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:astronomy|5": {
"hashes": {
"hash_examples": "1302effa3a76ce4c",
"hash_full_prompts": "824f7fba40f07d6a",
"hash_input_tokens": "d23f1f749725c6ec",
"hash_cont_tokens": "6cc2d6fb43989c46"
},
"truncated": 0,
"non_truncated": 152,
"padded": 608,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:business_ethics|5": {
"hashes": {
"hash_examples": "03cb8bce5336419a",
"hash_full_prompts": "09edd202cc596692",
"hash_input_tokens": "71a09c30a05a6e7d",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:clinical_knowledge|5": {
"hashes": {
"hash_examples": "ffbb9c7b2be257f9",
"hash_full_prompts": "70cc39d220c7b400",
"hash_input_tokens": "8af194657c9b6943",
"hash_cont_tokens": "4566966a1e601b6c"
},
"truncated": 0,
"non_truncated": 265,
"padded": 1060,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:college_biology|5": {
"hashes": {
"hash_examples": "3ee77f176f38eb8e",
"hash_full_prompts": "a6d4737b00af78b6",
"hash_input_tokens": "73384e1f26f9af72",
"hash_cont_tokens": "4ea00cd7b2f74799"
},
"truncated": 0,
"non_truncated": 144,
"padded": 576,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:college_chemistry|5": {
"hashes": {
"hash_examples": "ce61a69c46d47aeb",
"hash_full_prompts": "ff6fb75d880e777a",
"hash_input_tokens": "438a46bf77df6cb9",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:college_computer_science|5": {
"hashes": {
"hash_examples": "32805b52d7d5daab",
"hash_full_prompts": "1a2b1bfdbbfc168c",
"hash_input_tokens": "6b479720c4df6881",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:college_mathematics|5": {
"hashes": {
"hash_examples": "55da1a0a0bd33722",
"hash_full_prompts": "6b940166b67e6a0c",
"hash_input_tokens": "061bf0b1df48d4ec",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:college_medicine|5": {
"hashes": {
"hash_examples": "c33e143163049176",
"hash_full_prompts": "950fedf0d751265d",
"hash_input_tokens": "bc4473ba239e2883",
"hash_cont_tokens": "aed3e7fd8adea27e"
},
"truncated": 0,
"non_truncated": 173,
"padded": 692,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:college_physics|5": {
"hashes": {
"hash_examples": "ebdab1cdb7e555df",
"hash_full_prompts": "2144c10eab705657",
"hash_input_tokens": "41d82f8a31d66df4",
"hash_cont_tokens": "1ca37bb9b8be1c5d"
},
"truncated": 0,
"non_truncated": 102,
"padded": 408,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:computer_security|5": {
"hashes": {
"hash_examples": "a24fd7d08a560921",
"hash_full_prompts": "f9164444c27c2eb3",
"hash_input_tokens": "c1fd0d621231fd85",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:conceptual_physics|5": {
"hashes": {
"hash_examples": "8300977a79386993",
"hash_full_prompts": "a76f787b57e7a885",
"hash_input_tokens": "af47247587837007",
"hash_cont_tokens": "26db9e6e7dfdac00"
},
"truncated": 0,
"non_truncated": 235,
"padded": 940,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:econometrics|5": {
"hashes": {
"hash_examples": "ddde36788a04a46f",
"hash_full_prompts": "9c574243eaf1af25",
"hash_input_tokens": "028aefb54bcd394e",
"hash_cont_tokens": "2ef49b394cfb87e1"
},
"truncated": 0,
"non_truncated": 114,
"padded": 456,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:electrical_engineering|5": {
"hashes": {
"hash_examples": "acbc5def98c19b3f",
"hash_full_prompts": "27b104b8ac4dd53e",
"hash_input_tokens": "51fad1e92571e71f",
"hash_cont_tokens": "adb5a1c5d57fbb41"
},
"truncated": 0,
"non_truncated": 145,
"padded": 580,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:elementary_mathematics|5": {
"hashes": {
"hash_examples": "146e61d07497a9bd",
"hash_full_prompts": "61fe73f29609efa0",
"hash_input_tokens": "c3a3e5ab857bb805",
"hash_cont_tokens": "d0782f141bcc895b"
},
"truncated": 0,
"non_truncated": 378,
"padded": 1512,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:formal_logic|5": {
"hashes": {
"hash_examples": "8635216e1909a03f",
"hash_full_prompts": "eed1d295e6976a2b",
"hash_input_tokens": "10125c8c57d40c06",
"hash_cont_tokens": "315a91fa1f805c93"
},
"truncated": 0,
"non_truncated": 126,
"padded": 504,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:global_facts|5": {
"hashes": {
"hash_examples": "30b315aa6353ee47",
"hash_full_prompts": "aa3b8cd1b4caef67",
"hash_input_tokens": "3745c719168ab057",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_biology|5": {
"hashes": {
"hash_examples": "c9136373af2180de",
"hash_full_prompts": "563919f4a7e8cfa0",
"hash_input_tokens": "ef148a7fbb855b4a",
"hash_cont_tokens": "715bc46d18155135"
},
"truncated": 0,
"non_truncated": 310,
"padded": 1240,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_chemistry|5": {
"hashes": {
"hash_examples": "b0661bfa1add6404",
"hash_full_prompts": "673af6ff7f175e54",
"hash_input_tokens": "ad5dda4a94b94236",
"hash_cont_tokens": "3d12f9b93cc609a2"
},
"truncated": 0,
"non_truncated": 203,
"padded": 812,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_computer_science|5": {
"hashes": {
"hash_examples": "80fc1d623a3d665f",
"hash_full_prompts": "54edf289edf6a21b",
"hash_input_tokens": "b4905bb04989c325",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_european_history|5": {
"hashes": {
"hash_examples": "854da6e5af0fe1a1",
"hash_full_prompts": "fe3a16c6c460b023",
"hash_input_tokens": "32d7171e04b8a11f",
"hash_cont_tokens": "6d9c47e593859ccd"
},
"truncated": 0,
"non_truncated": 165,
"padded": 656,
"non_padded": 4,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_geography|5": {
"hashes": {
"hash_examples": "7dc963c7acd19ad8",
"hash_full_prompts": "ab9a10a8f824e912",
"hash_input_tokens": "02ffa411d3e65393",
"hash_cont_tokens": "84097c7fa87dfe61"
},
"truncated": 0,
"non_truncated": 198,
"padded": 792,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_government_and_politics|5": {
"hashes": {
"hash_examples": "1f675dcdebc9758f",
"hash_full_prompts": "5edbb212b89b37c2",
"hash_input_tokens": "7cbf9f330f9848cd",
"hash_cont_tokens": "86d43dfe026b5e6e"
},
"truncated": 0,
"non_truncated": 193,
"padded": 772,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_macroeconomics|5": {
"hashes": {
"hash_examples": "2fb32cf2d80f0b35",
"hash_full_prompts": "a03f1c58bff9be7c",
"hash_input_tokens": "c49c112a9d5fe955",
"hash_cont_tokens": "99f5469b1de9a21b"
},
"truncated": 0,
"non_truncated": 390,
"padded": 1560,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_mathematics|5": {
"hashes": {
"hash_examples": "fd6646fdb5d58a1f",
"hash_full_prompts": "804e7468979f87b0",
"hash_input_tokens": "ba0dfc1e3421d9cd",
"hash_cont_tokens": "e215c84aa19ccb33"
},
"truncated": 0,
"non_truncated": 270,
"padded": 1078,
"non_padded": 2,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_microeconomics|5": {
"hashes": {
"hash_examples": "2118f21f71d87d84",
"hash_full_prompts": "6aad089b085f6261",
"hash_input_tokens": "ae3d419d3b200341",
"hash_cont_tokens": "dc8017437d84c710"
},
"truncated": 0,
"non_truncated": 238,
"padded": 952,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_physics|5": {
"hashes": {
"hash_examples": "dc3ce06378548565",
"hash_full_prompts": "748f2785c56490fe",
"hash_input_tokens": "168c4746c2c782f3",
"hash_cont_tokens": "b8152fcdcf86c673"
},
"truncated": 0,
"non_truncated": 151,
"padded": 596,
"non_padded": 8,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_psychology|5": {
"hashes": {
"hash_examples": "c8d1d98a40e11f2f",
"hash_full_prompts": "91c2c4e9f4ef640d",
"hash_input_tokens": "ad59ecce726b81f7",
"hash_cont_tokens": "ac45cbb9009f81d9"
},
"truncated": 0,
"non_truncated": 545,
"padded": 2168,
"non_padded": 12,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_statistics|5": {
"hashes": {
"hash_examples": "666c8759b98ee4ff",
"hash_full_prompts": "35db4871e53974e3",
"hash_input_tokens": "4a95f30069946bb1",
"hash_cont_tokens": "9c9b68ee68272b16"
},
"truncated": 0,
"non_truncated": 216,
"padded": 864,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_us_history|5": {
"hashes": {
"hash_examples": "95fef1c4b7d3f81e",
"hash_full_prompts": "64a4220710854b0c",
"hash_input_tokens": "9cbadc7afd5b364b",
"hash_cont_tokens": "cec285b624c15c10"
},
"truncated": 0,
"non_truncated": 204,
"padded": 816,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_world_history|5": {
"hashes": {
"hash_examples": "7e5085b6184b0322",
"hash_full_prompts": "04061c742ebe8768",
"hash_input_tokens": "cf5740bee56319ca",
"hash_cont_tokens": "2c02128f8f2f7539"
},
"truncated": 0,
"non_truncated": 237,
"padded": 948,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:human_aging|5": {
"hashes": {
"hash_examples": "c17333e7c7c10797",
"hash_full_prompts": "82f49e7280e80cf6",
"hash_input_tokens": "354c4fa2fc35d79f",
"hash_cont_tokens": "faa94c4ec8e7be4e"
},
"truncated": 0,
"non_truncated": 223,
"padded": 892,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:human_sexuality|5": {
"hashes": {
"hash_examples": "4edd1e9045df5e3d",
"hash_full_prompts": "a661f4d429c05587",
"hash_input_tokens": "c03b92d669e52759",
"hash_cont_tokens": "d642d34719fa5ff6"
},
"truncated": 0,
"non_truncated": 131,
"padded": 524,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:international_law|5": {
"hashes": {
"hash_examples": "db2fa00d771a062a",
"hash_full_prompts": "b903b8a0ef33aaa0",
"hash_input_tokens": "2dbceb85bb9962fc",
"hash_cont_tokens": "f0d54717d3cdc783"
},
"truncated": 0,
"non_truncated": 121,
"padded": 484,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:jurisprudence|5": {
"hashes": {
"hash_examples": "e956f86b124076fe",
"hash_full_prompts": "09566c878dd2bee2",
"hash_input_tokens": "0cdec18f35799629",
"hash_cont_tokens": "d766ae8c3d361559"
},
"truncated": 0,
"non_truncated": 108,
"padded": 432,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:logical_fallacies|5": {
"hashes": {
"hash_examples": "956e0e6365ab79f1",
"hash_full_prompts": "23d259ac774b5ed5",
"hash_input_tokens": "f5c2a1ebc3001890",
"hash_cont_tokens": "0fcca855210b4243"
},
"truncated": 0,
"non_truncated": 163,
"padded": 652,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:machine_learning|5": {
"hashes": {
"hash_examples": "397997cc6f4d581e",
"hash_full_prompts": "f2fba3fcd07e8270",
"hash_input_tokens": "b6ee91938ab015df",
"hash_cont_tokens": "8b369a2ff9235b9d"
},
"truncated": 0,
"non_truncated": 112,
"padded": 448,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:management|5": {
"hashes": {
"hash_examples": "2bcbe6f6ca63d740",
"hash_full_prompts": "d3e694f59e5e9c14",
"hash_input_tokens": "a0316eb0174082ac",
"hash_cont_tokens": "c77ad5f59321afa5"
},
"truncated": 0,
"non_truncated": 103,
"padded": 412,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:marketing|5": {
"hashes": {
"hash_examples": "8ddb20d964a1b065",
"hash_full_prompts": "859304e17136ce75",
"hash_input_tokens": "2d347ff563a291d4",
"hash_cont_tokens": "c94db408fe712d9b"
},
"truncated": 0,
"non_truncated": 234,
"padded": 936,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:medical_genetics|5": {
"hashes": {
"hash_examples": "182a71f4763d2cea",
"hash_full_prompts": "42e1ef61698d0cd8",
"hash_input_tokens": "54e9d1ebe9299f17",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:miscellaneous|5": {
"hashes": {
"hash_examples": "4c404fdbb4ca57fc",
"hash_full_prompts": "4b836cb32f1d9f1c",
"hash_input_tokens": "bfd569a9633251d6",
"hash_cont_tokens": "60215a6f77eaf4d9"
},
"truncated": 0,
"non_truncated": 783,
"padded": 3132,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:moral_disputes|5": {
"hashes": {
"hash_examples": "60cbd2baa3fea5c9",
"hash_full_prompts": "285523191aa3d754",
"hash_input_tokens": "e945109ff5b7773f",
"hash_cont_tokens": "3ca55f92255c9f21"
},
"truncated": 0,
"non_truncated": 346,
"padded": 1384,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:moral_scenarios|5": {
"hashes": {
"hash_examples": "fd8b0431fbdd75ef",
"hash_full_prompts": "cc2dc703a4ea5626",
"hash_input_tokens": "7540830d244858b3",
"hash_cont_tokens": "a82e76a0738dc6ac"
},
"truncated": 0,
"non_truncated": 895,
"padded": 3551,
"non_padded": 29,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:nutrition|5": {
"hashes": {
"hash_examples": "71e55e2b829b6528",
"hash_full_prompts": "95da0ecaaca81f0c",
"hash_input_tokens": "ad2d3f90bac5daaf",
"hash_cont_tokens": "b683842a2cf7cdd6"
},
"truncated": 0,
"non_truncated": 306,
"padded": 1224,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:philosophy|5": {
"hashes": {
"hash_examples": "a6d489a8d208fa4b",
"hash_full_prompts": "a12c86a1cc15caf1",
"hash_input_tokens": "21629734db2baf80",
"hash_cont_tokens": "a545f25ae279a135"
},
"truncated": 0,
"non_truncated": 311,
"padded": 1244,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:prehistory|5": {
"hashes": {
"hash_examples": "6cc50f032a19acaa",
"hash_full_prompts": "60a9f42322f7d076",
"hash_input_tokens": "fec26d07ab114747",
"hash_cont_tokens": "5a5ebca069b16663"
},
"truncated": 0,
"non_truncated": 324,
"padded": 1268,
"non_padded": 28,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:professional_accounting|5": {
"hashes": {
"hash_examples": "50f57ab32f5f6cea",
"hash_full_prompts": "c0ebe46710635608",
"hash_input_tokens": "443dfeb8cface0e0",
"hash_cont_tokens": "e45018e60164d208"
},
"truncated": 0,
"non_truncated": 282,
"padded": 1120,
"non_padded": 8,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:professional_law|5": {
"hashes": {
"hash_examples": "a8fdc85c64f4b215",
"hash_full_prompts": "bf4e78194cbc908e",
"hash_input_tokens": "1cd3a9a61d6e593f",
"hash_cont_tokens": "b11002d08c03f837"
},
"truncated": 0,
"non_truncated": 1534,
"padded": 6136,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:professional_medicine|5": {
"hashes": {
"hash_examples": "c373a28a3050a73a",
"hash_full_prompts": "dcbd2e68cdbadbb2",
"hash_input_tokens": "269f00f50b882a91",
"hash_cont_tokens": "11ce4c2ab1132810"
},
"truncated": 0,
"non_truncated": 272,
"padded": 1088,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:professional_psychology|5": {
"hashes": {
"hash_examples": "bf5254fe818356af",
"hash_full_prompts": "6171d464a5d04506",
"hash_input_tokens": "4f2be9a15b195243",
"hash_cont_tokens": "3835bfc898aacaa0"
},
"truncated": 0,
"non_truncated": 612,
"padded": 2448,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:public_relations|5": {
"hashes": {
"hash_examples": "b66d52e28e7d14e0",
"hash_full_prompts": "4ead0df88af1730e",
"hash_input_tokens": "5470d37cfcaebef7",
"hash_cont_tokens": "1692112db1aec618"
},
"truncated": 0,
"non_truncated": 110,
"padded": 440,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:security_studies|5": {
"hashes": {
"hash_examples": "514c14feaf000ad9",
"hash_full_prompts": "00768a1d238a1756",
"hash_input_tokens": "3b06989a2c329947",
"hash_cont_tokens": "9801a1ce7f762a8b"
},
"truncated": 0,
"non_truncated": 245,
"padded": 980,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:sociology|5": {
"hashes": {
"hash_examples": "f6c9bc9d18c80870",
"hash_full_prompts": "4aa27cf15b0bad9d",
"hash_input_tokens": "a95cbf3f4777e430",
"hash_cont_tokens": "277e7d5b38c0960d"
},
"truncated": 0,
"non_truncated": 201,
"padded": 804,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:us_foreign_policy|5": {
"hashes": {
"hash_examples": "ed7b78629db6678f",
"hash_full_prompts": "6ea0aa7c4f19aaea",
"hash_input_tokens": "3527ab8d27acf9ba",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 397,
"non_padded": 3,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:virology|5": {
"hashes": {
"hash_examples": "bc52ffdc3f9b994a",
"hash_full_prompts": "4049f1088a6433bb",
"hash_input_tokens": "c5c9e9261b38b647",
"hash_cont_tokens": "a4a0852e6fb42244"
},
"truncated": 0,
"non_truncated": 166,
"padded": 664,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:world_religions|5": {
"hashes": {
"hash_examples": "ecdb4a4f94f62930",
"hash_full_prompts": "569221fa18187415",
"hash_input_tokens": "5b6e672d8863808f",
"hash_cont_tokens": "c96f2973fdf12010"
},
"truncated": 0,
"non_truncated": 171,
"padded": 684,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
}
},
"summary_general": {
"hashes": {
"hash_examples": "341a076d0beb7048",
"hash_full_prompts": "bd301d9e40b69cb5",
"hash_input_tokens": "06789d11909d9084",
"hash_cont_tokens": "28aa09e44eee2d3e"
},
"truncated": 0,
"non_truncated": 14042,
"padded": 56074,
"non_padded": 94,
"num_truncated_few_shots": 0
}
}