open-r1-eval-leaderboard / eval_results /google /gemma-7b-it /main /mmlu /results_2024-03-02T15-50-24.914824.json
lewtun's picture
lewtun HF staff
Upload eval_results/google/gemma-7b-it/main/mmlu/results_2024-03-02T15-50-24.914824.json with huggingface_hub
d0c08dd verified
raw
history blame
78.1 kB
{
"config_general": {
"lighteval_sha": "?",
"num_fewshot_seeds": 1,
"override_batch_size": 1,
"max_samples": null,
"job_id": "",
"start_time": 3697225.475270671,
"end_time": 3698159.551546604,
"total_evaluation_time_secondes": "934.0762759330682",
"model_name": "google/gemma-7b-it",
"model_sha": "bee54ce1bd2a5b36d68a06857fc0871a3389ca0b",
"model_dtype": "torch.bfloat16",
"model_size": "15.91 GB",
"config": null
},
"results": {
"lighteval|mmlu:abstract_algebra|5": {
"acc": 0.29,
"acc_stderr": 0.04560480215720685
},
"lighteval|mmlu:anatomy|5": {
"acc": 0.48148148148148145,
"acc_stderr": 0.043163785995113245
},
"lighteval|mmlu:astronomy|5": {
"acc": 0.5131578947368421,
"acc_stderr": 0.04067533136309174
},
"lighteval|mmlu:business_ethics|5": {
"acc": 0.48,
"acc_stderr": 0.050211673156867795
},
"lighteval|mmlu:clinical_knowledge|5": {
"acc": 0.47547169811320755,
"acc_stderr": 0.030735822206205608
},
"lighteval|mmlu:college_biology|5": {
"acc": 0.4652777777777778,
"acc_stderr": 0.04171115858181618
},
"lighteval|mmlu:college_chemistry|5": {
"acc": 0.37,
"acc_stderr": 0.048523658709391
},
"lighteval|mmlu:college_computer_science|5": {
"acc": 0.4,
"acc_stderr": 0.049236596391733084
},
"lighteval|mmlu:college_mathematics|5": {
"acc": 0.3,
"acc_stderr": 0.046056618647183814
},
"lighteval|mmlu:college_medicine|5": {
"acc": 0.34104046242774566,
"acc_stderr": 0.036146654241808254
},
"lighteval|mmlu:college_physics|5": {
"acc": 0.2647058823529412,
"acc_stderr": 0.04389869956808779
},
"lighteval|mmlu:computer_security|5": {
"acc": 0.58,
"acc_stderr": 0.049604496374885836
},
"lighteval|mmlu:conceptual_physics|5": {
"acc": 0.44680851063829785,
"acc_stderr": 0.0325005368436584
},
"lighteval|mmlu:econometrics|5": {
"acc": 0.39473684210526316,
"acc_stderr": 0.045981880578165414
},
"lighteval|mmlu:electrical_engineering|5": {
"acc": 0.503448275862069,
"acc_stderr": 0.04166567577101579
},
"lighteval|mmlu:elementary_mathematics|5": {
"acc": 0.3306878306878307,
"acc_stderr": 0.02422996529842508
},
"lighteval|mmlu:formal_logic|5": {
"acc": 0.31746031746031744,
"acc_stderr": 0.04163453031302859
},
"lighteval|mmlu:global_facts|5": {
"acc": 0.34,
"acc_stderr": 0.04760952285695235
},
"lighteval|mmlu:high_school_biology|5": {
"acc": 0.5096774193548387,
"acc_stderr": 0.028438677998909548
},
"lighteval|mmlu:high_school_chemistry|5": {
"acc": 0.3793103448275862,
"acc_stderr": 0.034139638059062345
},
"lighteval|mmlu:high_school_computer_science|5": {
"acc": 0.53,
"acc_stderr": 0.050161355804659205
},
"lighteval|mmlu:high_school_european_history|5": {
"acc": 0.6181818181818182,
"acc_stderr": 0.03793713171165636
},
"lighteval|mmlu:high_school_geography|5": {
"acc": 0.5909090909090909,
"acc_stderr": 0.03502975799413007
},
"lighteval|mmlu:high_school_government_and_politics|5": {
"acc": 0.5751295336787565,
"acc_stderr": 0.035674713352125395
},
"lighteval|mmlu:high_school_macroeconomics|5": {
"acc": 0.3871794871794872,
"acc_stderr": 0.02469721693087894
},
"lighteval|mmlu:high_school_mathematics|5": {
"acc": 0.23333333333333334,
"acc_stderr": 0.02578787422095931
},
"lighteval|mmlu:high_school_microeconomics|5": {
"acc": 0.4411764705882353,
"acc_stderr": 0.0322529423239964
},
"lighteval|mmlu:high_school_physics|5": {
"acc": 0.26490066225165565,
"acc_stderr": 0.03603038545360384
},
"lighteval|mmlu:high_school_psychology|5": {
"acc": 0.6403669724770642,
"acc_stderr": 0.020575234660123776
},
"lighteval|mmlu:high_school_statistics|5": {
"acc": 0.25462962962962965,
"acc_stderr": 0.02971127586000534
},
"lighteval|mmlu:high_school_us_history|5": {
"acc": 0.5784313725490197,
"acc_stderr": 0.03465868196380761
},
"lighteval|mmlu:high_school_world_history|5": {
"acc": 0.6413502109704642,
"acc_stderr": 0.031219569445301843
},
"lighteval|mmlu:human_aging|5": {
"acc": 0.6233183856502242,
"acc_stderr": 0.03252113489929188
},
"lighteval|mmlu:human_sexuality|5": {
"acc": 0.5877862595419847,
"acc_stderr": 0.04317171194870254
},
"lighteval|mmlu:international_law|5": {
"acc": 0.6363636363636364,
"acc_stderr": 0.043913262867240704
},
"lighteval|mmlu:jurisprudence|5": {
"acc": 0.6018518518518519,
"acc_stderr": 0.04732332615978814
},
"lighteval|mmlu:logical_fallacies|5": {
"acc": 0.4294478527607362,
"acc_stderr": 0.038890666191127216
},
"lighteval|mmlu:machine_learning|5": {
"acc": 0.44642857142857145,
"acc_stderr": 0.04718471485219588
},
"lighteval|mmlu:management|5": {
"acc": 0.5631067961165048,
"acc_stderr": 0.04911147107365777
},
"lighteval|mmlu:marketing|5": {
"acc": 0.7264957264957265,
"acc_stderr": 0.029202540153431194
},
"lighteval|mmlu:medical_genetics|5": {
"acc": 0.5,
"acc_stderr": 0.050251890762960605
},
"lighteval|mmlu:miscellaneous|5": {
"acc": 0.6283524904214559,
"acc_stderr": 0.017280802522133185
},
"lighteval|mmlu:moral_disputes|5": {
"acc": 0.5809248554913294,
"acc_stderr": 0.026564178111422622
},
"lighteval|mmlu:moral_scenarios|5": {
"acc": 0.293854748603352,
"acc_stderr": 0.015235075776719613
},
"lighteval|mmlu:nutrition|5": {
"acc": 0.5326797385620915,
"acc_stderr": 0.028568699752225868
},
"lighteval|mmlu:philosophy|5": {
"acc": 0.5787781350482315,
"acc_stderr": 0.028043399858210628
},
"lighteval|mmlu:prehistory|5": {
"acc": 0.558641975308642,
"acc_stderr": 0.02762873715566876
},
"lighteval|mmlu:professional_accounting|5": {
"acc": 0.3120567375886525,
"acc_stderr": 0.027640120545169924
},
"lighteval|mmlu:professional_law|5": {
"acc": 0.378748370273794,
"acc_stderr": 0.012389052105003732
},
"lighteval|mmlu:professional_medicine|5": {
"acc": 0.3088235294117647,
"acc_stderr": 0.028064998167040094
},
"lighteval|mmlu:professional_psychology|5": {
"acc": 0.43790849673202614,
"acc_stderr": 0.02007125788688652
},
"lighteval|mmlu:public_relations|5": {
"acc": 0.5818181818181818,
"acc_stderr": 0.04724577405731571
},
"lighteval|mmlu:security_studies|5": {
"acc": 0.6081632653061224,
"acc_stderr": 0.031251275910891656
},
"lighteval|mmlu:sociology|5": {
"acc": 0.7014925373134329,
"acc_stderr": 0.03235743789355043
},
"lighteval|mmlu:us_foreign_policy|5": {
"acc": 0.63,
"acc_stderr": 0.04852365870939099
},
"lighteval|mmlu:virology|5": {
"acc": 0.4457831325301205,
"acc_stderr": 0.03869543323472101
},
"lighteval|mmlu:world_religions|5": {
"acc": 0.6374269005847953,
"acc_stderr": 0.0368713061556206
},
"lighteval|mmlu:_average|5": {
"acc": 0.4784053595575083,
"acc_stderr": 0.03620189108042497
}
},
"versions": {
"lighteval|mmlu:abstract_algebra|5": 0,
"lighteval|mmlu:anatomy|5": 0,
"lighteval|mmlu:astronomy|5": 0,
"lighteval|mmlu:business_ethics|5": 0,
"lighteval|mmlu:clinical_knowledge|5": 0,
"lighteval|mmlu:college_biology|5": 0,
"lighteval|mmlu:college_chemistry|5": 0,
"lighteval|mmlu:college_computer_science|5": 0,
"lighteval|mmlu:college_mathematics|5": 0,
"lighteval|mmlu:college_medicine|5": 0,
"lighteval|mmlu:college_physics|5": 0,
"lighteval|mmlu:computer_security|5": 0,
"lighteval|mmlu:conceptual_physics|5": 0,
"lighteval|mmlu:econometrics|5": 0,
"lighteval|mmlu:electrical_engineering|5": 0,
"lighteval|mmlu:elementary_mathematics|5": 0,
"lighteval|mmlu:formal_logic|5": 0,
"lighteval|mmlu:global_facts|5": 0,
"lighteval|mmlu:high_school_biology|5": 0,
"lighteval|mmlu:high_school_chemistry|5": 0,
"lighteval|mmlu:high_school_computer_science|5": 0,
"lighteval|mmlu:high_school_european_history|5": 0,
"lighteval|mmlu:high_school_geography|5": 0,
"lighteval|mmlu:high_school_government_and_politics|5": 0,
"lighteval|mmlu:high_school_macroeconomics|5": 0,
"lighteval|mmlu:high_school_mathematics|5": 0,
"lighteval|mmlu:high_school_microeconomics|5": 0,
"lighteval|mmlu:high_school_physics|5": 0,
"lighteval|mmlu:high_school_psychology|5": 0,
"lighteval|mmlu:high_school_statistics|5": 0,
"lighteval|mmlu:high_school_us_history|5": 0,
"lighteval|mmlu:high_school_world_history|5": 0,
"lighteval|mmlu:human_aging|5": 0,
"lighteval|mmlu:human_sexuality|5": 0,
"lighteval|mmlu:international_law|5": 0,
"lighteval|mmlu:jurisprudence|5": 0,
"lighteval|mmlu:logical_fallacies|5": 0,
"lighteval|mmlu:machine_learning|5": 0,
"lighteval|mmlu:management|5": 0,
"lighteval|mmlu:marketing|5": 0,
"lighteval|mmlu:medical_genetics|5": 0,
"lighteval|mmlu:miscellaneous|5": 0,
"lighteval|mmlu:moral_disputes|5": 0,
"lighteval|mmlu:moral_scenarios|5": 0,
"lighteval|mmlu:nutrition|5": 0,
"lighteval|mmlu:philosophy|5": 0,
"lighteval|mmlu:prehistory|5": 0,
"lighteval|mmlu:professional_accounting|5": 0,
"lighteval|mmlu:professional_law|5": 0,
"lighteval|mmlu:professional_medicine|5": 0,
"lighteval|mmlu:professional_psychology|5": 0,
"lighteval|mmlu:public_relations|5": 0,
"lighteval|mmlu:security_studies|5": 0,
"lighteval|mmlu:sociology|5": 0,
"lighteval|mmlu:us_foreign_policy|5": 0,
"lighteval|mmlu:virology|5": 0,
"lighteval|mmlu:world_religions|5": 0
},
"config_tasks": {
"lighteval|mmlu:abstract_algebra": {
"name": "mmlu:abstract_algebra",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "abstract_algebra",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:anatomy": {
"name": "mmlu:anatomy",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "anatomy",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 135,
"effective_num_docs": 135
},
"lighteval|mmlu:astronomy": {
"name": "mmlu:astronomy",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "astronomy",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 152,
"effective_num_docs": 152
},
"lighteval|mmlu:business_ethics": {
"name": "mmlu:business_ethics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "business_ethics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:clinical_knowledge": {
"name": "mmlu:clinical_knowledge",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "clinical_knowledge",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 265,
"effective_num_docs": 265
},
"lighteval|mmlu:college_biology": {
"name": "mmlu:college_biology",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "college_biology",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 144,
"effective_num_docs": 144
},
"lighteval|mmlu:college_chemistry": {
"name": "mmlu:college_chemistry",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "college_chemistry",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:college_computer_science": {
"name": "mmlu:college_computer_science",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "college_computer_science",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:college_mathematics": {
"name": "mmlu:college_mathematics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "college_mathematics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:college_medicine": {
"name": "mmlu:college_medicine",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "college_medicine",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 173,
"effective_num_docs": 173
},
"lighteval|mmlu:college_physics": {
"name": "mmlu:college_physics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "college_physics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 102,
"effective_num_docs": 102
},
"lighteval|mmlu:computer_security": {
"name": "mmlu:computer_security",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "computer_security",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:conceptual_physics": {
"name": "mmlu:conceptual_physics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "conceptual_physics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 235,
"effective_num_docs": 235
},
"lighteval|mmlu:econometrics": {
"name": "mmlu:econometrics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "econometrics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 114,
"effective_num_docs": 114
},
"lighteval|mmlu:electrical_engineering": {
"name": "mmlu:electrical_engineering",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "electrical_engineering",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 145,
"effective_num_docs": 145
},
"lighteval|mmlu:elementary_mathematics": {
"name": "mmlu:elementary_mathematics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "elementary_mathematics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 378,
"effective_num_docs": 378
},
"lighteval|mmlu:formal_logic": {
"name": "mmlu:formal_logic",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "formal_logic",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 126,
"effective_num_docs": 126
},
"lighteval|mmlu:global_facts": {
"name": "mmlu:global_facts",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "global_facts",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:high_school_biology": {
"name": "mmlu:high_school_biology",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_biology",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 310,
"effective_num_docs": 310
},
"lighteval|mmlu:high_school_chemistry": {
"name": "mmlu:high_school_chemistry",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_chemistry",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 203,
"effective_num_docs": 203
},
"lighteval|mmlu:high_school_computer_science": {
"name": "mmlu:high_school_computer_science",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_computer_science",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:high_school_european_history": {
"name": "mmlu:high_school_european_history",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_european_history",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 165,
"effective_num_docs": 165
},
"lighteval|mmlu:high_school_geography": {
"name": "mmlu:high_school_geography",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_geography",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 198,
"effective_num_docs": 198
},
"lighteval|mmlu:high_school_government_and_politics": {
"name": "mmlu:high_school_government_and_politics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_government_and_politics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 193,
"effective_num_docs": 193
},
"lighteval|mmlu:high_school_macroeconomics": {
"name": "mmlu:high_school_macroeconomics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_macroeconomics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 390,
"effective_num_docs": 390
},
"lighteval|mmlu:high_school_mathematics": {
"name": "mmlu:high_school_mathematics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_mathematics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 270,
"effective_num_docs": 270
},
"lighteval|mmlu:high_school_microeconomics": {
"name": "mmlu:high_school_microeconomics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_microeconomics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 238,
"effective_num_docs": 238
},
"lighteval|mmlu:high_school_physics": {
"name": "mmlu:high_school_physics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_physics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 151,
"effective_num_docs": 151
},
"lighteval|mmlu:high_school_psychology": {
"name": "mmlu:high_school_psychology",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_psychology",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 545,
"effective_num_docs": 545
},
"lighteval|mmlu:high_school_statistics": {
"name": "mmlu:high_school_statistics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_statistics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 216,
"effective_num_docs": 216
},
"lighteval|mmlu:high_school_us_history": {
"name": "mmlu:high_school_us_history",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_us_history",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 204,
"effective_num_docs": 204
},
"lighteval|mmlu:high_school_world_history": {
"name": "mmlu:high_school_world_history",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_world_history",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 237,
"effective_num_docs": 237
},
"lighteval|mmlu:human_aging": {
"name": "mmlu:human_aging",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "human_aging",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 223,
"effective_num_docs": 223
},
"lighteval|mmlu:human_sexuality": {
"name": "mmlu:human_sexuality",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "human_sexuality",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 131,
"effective_num_docs": 131
},
"lighteval|mmlu:international_law": {
"name": "mmlu:international_law",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "international_law",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 121,
"effective_num_docs": 121
},
"lighteval|mmlu:jurisprudence": {
"name": "mmlu:jurisprudence",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "jurisprudence",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 108,
"effective_num_docs": 108
},
"lighteval|mmlu:logical_fallacies": {
"name": "mmlu:logical_fallacies",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "logical_fallacies",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 163,
"effective_num_docs": 163
},
"lighteval|mmlu:machine_learning": {
"name": "mmlu:machine_learning",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "machine_learning",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 112,
"effective_num_docs": 112
},
"lighteval|mmlu:management": {
"name": "mmlu:management",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "management",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 103,
"effective_num_docs": 103
},
"lighteval|mmlu:marketing": {
"name": "mmlu:marketing",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "marketing",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 234,
"effective_num_docs": 234
},
"lighteval|mmlu:medical_genetics": {
"name": "mmlu:medical_genetics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "medical_genetics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:miscellaneous": {
"name": "mmlu:miscellaneous",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "miscellaneous",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 783,
"effective_num_docs": 783
},
"lighteval|mmlu:moral_disputes": {
"name": "mmlu:moral_disputes",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "moral_disputes",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 346,
"effective_num_docs": 346
},
"lighteval|mmlu:moral_scenarios": {
"name": "mmlu:moral_scenarios",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "moral_scenarios",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 895,
"effective_num_docs": 895
},
"lighteval|mmlu:nutrition": {
"name": "mmlu:nutrition",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "nutrition",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 306,
"effective_num_docs": 306
},
"lighteval|mmlu:philosophy": {
"name": "mmlu:philosophy",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "philosophy",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 311,
"effective_num_docs": 311
},
"lighteval|mmlu:prehistory": {
"name": "mmlu:prehistory",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "prehistory",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 324,
"effective_num_docs": 324
},
"lighteval|mmlu:professional_accounting": {
"name": "mmlu:professional_accounting",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "professional_accounting",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 282,
"effective_num_docs": 282
},
"lighteval|mmlu:professional_law": {
"name": "mmlu:professional_law",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "professional_law",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 1534,
"effective_num_docs": 1534
},
"lighteval|mmlu:professional_medicine": {
"name": "mmlu:professional_medicine",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "professional_medicine",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 272,
"effective_num_docs": 272
},
"lighteval|mmlu:professional_psychology": {
"name": "mmlu:professional_psychology",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "professional_psychology",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 612,
"effective_num_docs": 612
},
"lighteval|mmlu:public_relations": {
"name": "mmlu:public_relations",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "public_relations",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 110,
"effective_num_docs": 110
},
"lighteval|mmlu:security_studies": {
"name": "mmlu:security_studies",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "security_studies",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 245,
"effective_num_docs": 245
},
"lighteval|mmlu:sociology": {
"name": "mmlu:sociology",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "sociology",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 201,
"effective_num_docs": 201
},
"lighteval|mmlu:us_foreign_policy": {
"name": "mmlu:us_foreign_policy",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "us_foreign_policy",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:virology": {
"name": "mmlu:virology",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "virology",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 166,
"effective_num_docs": 166
},
"lighteval|mmlu:world_religions": {
"name": "mmlu:world_religions",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "world_religions",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 171,
"effective_num_docs": 171
}
},
"summary_tasks": {
"lighteval|mmlu:abstract_algebra|5": {
"hashes": {
"hash_examples": "4c76229e00c9c0e9",
"hash_full_prompts": "b2aadfa9f8c41abb",
"hash_input_tokens": "663535dae55e3c96",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:anatomy|5": {
"hashes": {
"hash_examples": "6a1f8104dccbd33b",
"hash_full_prompts": "feb56a4a2dc4af1e",
"hash_input_tokens": "43f1d25deeff43a3",
"hash_cont_tokens": "96c2bab19c75f48d"
},
"truncated": 0,
"non_truncated": 135,
"padded": 540,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:astronomy|5": {
"hashes": {
"hash_examples": "1302effa3a76ce4c",
"hash_full_prompts": "8af70999c2195ea5",
"hash_input_tokens": "49919e6d12cfa0fd",
"hash_cont_tokens": "6cc2d6fb43989c46"
},
"truncated": 0,
"non_truncated": 152,
"padded": 608,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:business_ethics|5": {
"hashes": {
"hash_examples": "03cb8bce5336419a",
"hash_full_prompts": "38f03d07f8aab19d",
"hash_input_tokens": "da5fb0d2c66e7b34",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:clinical_knowledge|5": {
"hashes": {
"hash_examples": "ffbb9c7b2be257f9",
"hash_full_prompts": "e427b6b6701df7e7",
"hash_input_tokens": "a67e43c99d015cd4",
"hash_cont_tokens": "4566966a1e601b6c"
},
"truncated": 0,
"non_truncated": 265,
"padded": 1060,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:college_biology|5": {
"hashes": {
"hash_examples": "3ee77f176f38eb8e",
"hash_full_prompts": "067acf2f9ba94453",
"hash_input_tokens": "46b208bbba16bd80",
"hash_cont_tokens": "4ea00cd7b2f74799"
},
"truncated": 0,
"non_truncated": 144,
"padded": 576,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:college_chemistry|5": {
"hashes": {
"hash_examples": "ce61a69c46d47aeb",
"hash_full_prompts": "6700f9ebe50bb36c",
"hash_input_tokens": "103db03595e08318",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:college_computer_science|5": {
"hashes": {
"hash_examples": "32805b52d7d5daab",
"hash_full_prompts": "3c3357ec0945272a",
"hash_input_tokens": "0982e79e95500662",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:college_mathematics|5": {
"hashes": {
"hash_examples": "55da1a0a0bd33722",
"hash_full_prompts": "124288c3ba5537a5",
"hash_input_tokens": "4a39f157b5ba4b6b",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:college_medicine|5": {
"hashes": {
"hash_examples": "c33e143163049176",
"hash_full_prompts": "79ddd40d49870066",
"hash_input_tokens": "1d4e56a93e0cd1ed",
"hash_cont_tokens": "aed3e7fd8adea27e"
},
"truncated": 0,
"non_truncated": 173,
"padded": 692,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:college_physics|5": {
"hashes": {
"hash_examples": "ebdab1cdb7e555df",
"hash_full_prompts": "5a242543120701af",
"hash_input_tokens": "76e469092b0ed095",
"hash_cont_tokens": "1ca37bb9b8be1c5d"
},
"truncated": 0,
"non_truncated": 102,
"padded": 408,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:computer_security|5": {
"hashes": {
"hash_examples": "a24fd7d08a560921",
"hash_full_prompts": "77c983586d0e5080",
"hash_input_tokens": "91316bc18f936b5c",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:conceptual_physics|5": {
"hashes": {
"hash_examples": "8300977a79386993",
"hash_full_prompts": "50947b02d20f7107",
"hash_input_tokens": "00fbe8fbd995cd5d",
"hash_cont_tokens": "26db9e6e7dfdac00"
},
"truncated": 0,
"non_truncated": 235,
"padded": 940,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:econometrics|5": {
"hashes": {
"hash_examples": "ddde36788a04a46f",
"hash_full_prompts": "0b65e9f60a27a3fd",
"hash_input_tokens": "d8f92e445a507eb1",
"hash_cont_tokens": "2ef49b394cfb87e1"
},
"truncated": 0,
"non_truncated": 114,
"padded": 456,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:electrical_engineering|5": {
"hashes": {
"hash_examples": "acbc5def98c19b3f",
"hash_full_prompts": "2c125ca5d46ea8f5",
"hash_input_tokens": "f1dc000dd21b8725",
"hash_cont_tokens": "adb5a1c5d57fbb41"
},
"truncated": 0,
"non_truncated": 145,
"padded": 580,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:elementary_mathematics|5": {
"hashes": {
"hash_examples": "146e61d07497a9bd",
"hash_full_prompts": "94cefdd04018e241",
"hash_input_tokens": "96b3e156970cfc4b",
"hash_cont_tokens": "d0782f141bcc895b"
},
"truncated": 0,
"non_truncated": 378,
"padded": 1512,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:formal_logic|5": {
"hashes": {
"hash_examples": "8635216e1909a03f",
"hash_full_prompts": "1be3183e688e6eaf",
"hash_input_tokens": "27f5b2784bdc1ee2",
"hash_cont_tokens": "315a91fa1f805c93"
},
"truncated": 0,
"non_truncated": 126,
"padded": 504,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:global_facts|5": {
"hashes": {
"hash_examples": "30b315aa6353ee47",
"hash_full_prompts": "31a70a3632b83334",
"hash_input_tokens": "c7beb466820096c3",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_biology|5": {
"hashes": {
"hash_examples": "c9136373af2180de",
"hash_full_prompts": "5f51225d79d867b1",
"hash_input_tokens": "c92b5a53ab5e4ca1",
"hash_cont_tokens": "715bc46d18155135"
},
"truncated": 0,
"non_truncated": 310,
"padded": 1240,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_chemistry|5": {
"hashes": {
"hash_examples": "b0661bfa1add6404",
"hash_full_prompts": "2a738ab6e445c2e7",
"hash_input_tokens": "eecfb55aa35f890f",
"hash_cont_tokens": "3d12f9b93cc609a2"
},
"truncated": 0,
"non_truncated": 203,
"padded": 812,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_computer_science|5": {
"hashes": {
"hash_examples": "80fc1d623a3d665f",
"hash_full_prompts": "9f6f77ca4af2274d",
"hash_input_tokens": "d2798c1a86ae4ec3",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_european_history|5": {
"hashes": {
"hash_examples": "854da6e5af0fe1a1",
"hash_full_prompts": "a48a719b1c8246d0",
"hash_input_tokens": "4ae578b64f3233b5",
"hash_cont_tokens": "6d9c47e593859ccd"
},
"truncated": 0,
"non_truncated": 165,
"padded": 656,
"non_padded": 4,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_geography|5": {
"hashes": {
"hash_examples": "7dc963c7acd19ad8",
"hash_full_prompts": "1936fe27e0aa2699",
"hash_input_tokens": "1fe2edcc486cca31",
"hash_cont_tokens": "84097c7fa87dfe61"
},
"truncated": 0,
"non_truncated": 198,
"padded": 792,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_government_and_politics|5": {
"hashes": {
"hash_examples": "1f675dcdebc9758f",
"hash_full_prompts": "e08e7a3f8c2273b3",
"hash_input_tokens": "28dec64f64b9d9e4",
"hash_cont_tokens": "86d43dfe026b5e6e"
},
"truncated": 0,
"non_truncated": 193,
"padded": 772,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_macroeconomics|5": {
"hashes": {
"hash_examples": "2fb32cf2d80f0b35",
"hash_full_prompts": "6e2cc0d2c84b2b3a",
"hash_input_tokens": "20c35f79f03c3339",
"hash_cont_tokens": "99f5469b1de9a21b"
},
"truncated": 0,
"non_truncated": 390,
"padded": 1560,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_mathematics|5": {
"hashes": {
"hash_examples": "fd6646fdb5d58a1f",
"hash_full_prompts": "8a03c4f9959bba8c",
"hash_input_tokens": "b40e0db68d37ae42",
"hash_cont_tokens": "e215c84aa19ccb33"
},
"truncated": 0,
"non_truncated": 270,
"padded": 1078,
"non_padded": 2,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_microeconomics|5": {
"hashes": {
"hash_examples": "2118f21f71d87d84",
"hash_full_prompts": "2a28db62cd0549ef",
"hash_input_tokens": "68f4abfd378acb19",
"hash_cont_tokens": "dc8017437d84c710"
},
"truncated": 0,
"non_truncated": 238,
"padded": 952,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_physics|5": {
"hashes": {
"hash_examples": "dc3ce06378548565",
"hash_full_prompts": "1e954387adb81e5e",
"hash_input_tokens": "2417a1563b911d69",
"hash_cont_tokens": "b8152fcdcf86c673"
},
"truncated": 0,
"non_truncated": 151,
"padded": 596,
"non_padded": 8,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_psychology|5": {
"hashes": {
"hash_examples": "c8d1d98a40e11f2f",
"hash_full_prompts": "a177419d81cb9aed",
"hash_input_tokens": "248ccfab27d853e4",
"hash_cont_tokens": "ac45cbb9009f81d9"
},
"truncated": 0,
"non_truncated": 545,
"padded": 2168,
"non_padded": 12,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_statistics|5": {
"hashes": {
"hash_examples": "666c8759b98ee4ff",
"hash_full_prompts": "9cc401fc5cb72ae8",
"hash_input_tokens": "ccd0ca41b82c6c88",
"hash_cont_tokens": "9c9b68ee68272b16"
},
"truncated": 0,
"non_truncated": 216,
"padded": 864,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_us_history|5": {
"hashes": {
"hash_examples": "95fef1c4b7d3f81e",
"hash_full_prompts": "2e2b9ea62d6555db",
"hash_input_tokens": "5cc462844131213d",
"hash_cont_tokens": "cec285b624c15c10"
},
"truncated": 0,
"non_truncated": 204,
"padded": 816,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_world_history|5": {
"hashes": {
"hash_examples": "7e5085b6184b0322",
"hash_full_prompts": "4a768486b0dbf8f1",
"hash_input_tokens": "5b6e7ba675e32a1f",
"hash_cont_tokens": "2c02128f8f2f7539"
},
"truncated": 0,
"non_truncated": 237,
"padded": 948,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:human_aging|5": {
"hashes": {
"hash_examples": "c17333e7c7c10797",
"hash_full_prompts": "1f82ea7e4fc00b85",
"hash_input_tokens": "6ef4e4ac3e598040",
"hash_cont_tokens": "faa94c4ec8e7be4e"
},
"truncated": 0,
"non_truncated": 223,
"padded": 892,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:human_sexuality|5": {
"hashes": {
"hash_examples": "4edd1e9045df5e3d",
"hash_full_prompts": "64605d8bae4a3abc",
"hash_input_tokens": "f9ffa1943f9f6330",
"hash_cont_tokens": "d642d34719fa5ff6"
},
"truncated": 0,
"non_truncated": 131,
"padded": 524,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:international_law|5": {
"hashes": {
"hash_examples": "db2fa00d771a062a",
"hash_full_prompts": "0967ad927e0cb0bc",
"hash_input_tokens": "69dd145618fabe38",
"hash_cont_tokens": "f0d54717d3cdc783"
},
"truncated": 0,
"non_truncated": 121,
"padded": 484,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:jurisprudence|5": {
"hashes": {
"hash_examples": "e956f86b124076fe",
"hash_full_prompts": "4c0cb661df9d2606",
"hash_input_tokens": "0416d27441934740",
"hash_cont_tokens": "d766ae8c3d361559"
},
"truncated": 0,
"non_truncated": 108,
"padded": 432,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:logical_fallacies|5": {
"hashes": {
"hash_examples": "956e0e6365ab79f1",
"hash_full_prompts": "762c80a1d1e6a64f",
"hash_input_tokens": "3812485f3cc70382",
"hash_cont_tokens": "0fcca855210b4243"
},
"truncated": 0,
"non_truncated": 163,
"padded": 652,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:machine_learning|5": {
"hashes": {
"hash_examples": "397997cc6f4d581e",
"hash_full_prompts": "c9ea71ae889fda98",
"hash_input_tokens": "81a56c63afdfdf79",
"hash_cont_tokens": "8b369a2ff9235b9d"
},
"truncated": 0,
"non_truncated": 112,
"padded": 448,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:management|5": {
"hashes": {
"hash_examples": "2bcbe6f6ca63d740",
"hash_full_prompts": "517cb50ad01e2d49",
"hash_input_tokens": "16c313cb707ce55d",
"hash_cont_tokens": "c77ad5f59321afa5"
},
"truncated": 0,
"non_truncated": 103,
"padded": 412,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:marketing|5": {
"hashes": {
"hash_examples": "8ddb20d964a1b065",
"hash_full_prompts": "1d291f62bd6f8feb",
"hash_input_tokens": "4b389a0f20db3f75",
"hash_cont_tokens": "c94db408fe712d9b"
},
"truncated": 0,
"non_truncated": 234,
"padded": 936,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:medical_genetics|5": {
"hashes": {
"hash_examples": "182a71f4763d2cea",
"hash_full_prompts": "acdacc0a5b2c4d89",
"hash_input_tokens": "1c877e48ed47c25a",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:miscellaneous|5": {
"hashes": {
"hash_examples": "4c404fdbb4ca57fc",
"hash_full_prompts": "e861d67cbf41668a",
"hash_input_tokens": "173478c0490f0269",
"hash_cont_tokens": "60215a6f77eaf4d9"
},
"truncated": 0,
"non_truncated": 783,
"padded": 3132,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:moral_disputes|5": {
"hashes": {
"hash_examples": "60cbd2baa3fea5c9",
"hash_full_prompts": "db79b260094e87ce",
"hash_input_tokens": "0847aac3a5ba26f3",
"hash_cont_tokens": "3ca55f92255c9f21"
},
"truncated": 0,
"non_truncated": 346,
"padded": 1384,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:moral_scenarios|5": {
"hashes": {
"hash_examples": "fd8b0431fbdd75ef",
"hash_full_prompts": "53fa5f828f642b52",
"hash_input_tokens": "698f3abbb96f1ca7",
"hash_cont_tokens": "a82e76a0738dc6ac"
},
"truncated": 0,
"non_truncated": 895,
"padded": 3551,
"non_padded": 29,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:nutrition|5": {
"hashes": {
"hash_examples": "71e55e2b829b6528",
"hash_full_prompts": "ed630c2108c47f6a",
"hash_input_tokens": "dbe37b8214b52ae7",
"hash_cont_tokens": "b683842a2cf7cdd6"
},
"truncated": 0,
"non_truncated": 306,
"padded": 1224,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:philosophy|5": {
"hashes": {
"hash_examples": "a6d489a8d208fa4b",
"hash_full_prompts": "5e556d0979b2f539",
"hash_input_tokens": "fd3b70bff3a0b905",
"hash_cont_tokens": "a545f25ae279a135"
},
"truncated": 0,
"non_truncated": 311,
"padded": 1244,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:prehistory|5": {
"hashes": {
"hash_examples": "6cc50f032a19acaa",
"hash_full_prompts": "16d509f4878076c6",
"hash_input_tokens": "6ff7116762737dd4",
"hash_cont_tokens": "5a5ebca069b16663"
},
"truncated": 0,
"non_truncated": 324,
"padded": 1268,
"non_padded": 28,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:professional_accounting|5": {
"hashes": {
"hash_examples": "50f57ab32f5f6cea",
"hash_full_prompts": "541084a3b32c2ed6",
"hash_input_tokens": "5e0a74090e44b077",
"hash_cont_tokens": "e45018e60164d208"
},
"truncated": 0,
"non_truncated": 282,
"padded": 1120,
"non_padded": 8,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:professional_law|5": {
"hashes": {
"hash_examples": "a8fdc85c64f4b215",
"hash_full_prompts": "33a817087cc93afc",
"hash_input_tokens": "43b873edce973769",
"hash_cont_tokens": "b11002d08c03f837"
},
"truncated": 0,
"non_truncated": 1534,
"padded": 6136,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:professional_medicine|5": {
"hashes": {
"hash_examples": "c373a28a3050a73a",
"hash_full_prompts": "1f79ef87516f0689",
"hash_input_tokens": "7ed589a6340c21aa",
"hash_cont_tokens": "11ce4c2ab1132810"
},
"truncated": 0,
"non_truncated": 272,
"padded": 1088,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:professional_psychology|5": {
"hashes": {
"hash_examples": "bf5254fe818356af",
"hash_full_prompts": "8949338b52b86898",
"hash_input_tokens": "efb9165abf9c6d21",
"hash_cont_tokens": "3835bfc898aacaa0"
},
"truncated": 0,
"non_truncated": 612,
"padded": 2448,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:public_relations|5": {
"hashes": {
"hash_examples": "b66d52e28e7d14e0",
"hash_full_prompts": "1af7d0db005d327e",
"hash_input_tokens": "5d55a597521f1576",
"hash_cont_tokens": "1692112db1aec618"
},
"truncated": 0,
"non_truncated": 110,
"padded": 440,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:security_studies|5": {
"hashes": {
"hash_examples": "514c14feaf000ad9",
"hash_full_prompts": "1c32988c90e4d51d",
"hash_input_tokens": "ff59b963c5d168fe",
"hash_cont_tokens": "9801a1ce7f762a8b"
},
"truncated": 0,
"non_truncated": 245,
"padded": 980,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:sociology|5": {
"hashes": {
"hash_examples": "f6c9bc9d18c80870",
"hash_full_prompts": "4b625924b6290e4e",
"hash_input_tokens": "d7f4bed6917e7560",
"hash_cont_tokens": "277e7d5b38c0960d"
},
"truncated": 0,
"non_truncated": 201,
"padded": 804,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:us_foreign_policy|5": {
"hashes": {
"hash_examples": "ed7b78629db6678f",
"hash_full_prompts": "cc116383d2f7ae61",
"hash_input_tokens": "e5b976611aed095b",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 393,
"non_padded": 7,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:virology|5": {
"hashes": {
"hash_examples": "bc52ffdc3f9b994a",
"hash_full_prompts": "4701fb0a1e217218",
"hash_input_tokens": "cfe6fc0a4af096ac",
"hash_cont_tokens": "a4a0852e6fb42244"
},
"truncated": 0,
"non_truncated": 166,
"padded": 664,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:world_religions|5": {
"hashes": {
"hash_examples": "ecdb4a4f94f62930",
"hash_full_prompts": "5b9b63fd636996bb",
"hash_input_tokens": "ec585547a7056c8b",
"hash_cont_tokens": "c96f2973fdf12010"
},
"truncated": 0,
"non_truncated": 171,
"padded": 684,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
}
},
"summary_general": {
"hashes": {
"hash_examples": "341a076d0beb7048",
"hash_full_prompts": "807cd11b18a20ab0",
"hash_input_tokens": "ac8b873b1b49d5de",
"hash_cont_tokens": "28aa09e44eee2d3e"
},
"truncated": 0,
"non_truncated": 14042,
"padded": 56070,
"non_padded": 98,
"num_truncated_few_shots": 0
}
}