lewtun's picture
lewtun HF staff
Upload eval_results/alignment-handbook/zephyr-2b-gemma-sft-hermes-epoch-3/main/mmlu/results_2024-03-05T19-01-34.346508.json with huggingface_hub
d0dee02 verified
raw
history blame
78.1 kB
{
"config_general": {
"lighteval_sha": "?",
"num_fewshot_seeds": 1,
"override_batch_size": 1,
"max_samples": null,
"job_id": "",
"start_time": 2070499.281696748,
"end_time": 2071244.032479967,
"total_evaluation_time_secondes": "744.7507832190022",
"model_name": "alignment-handbook/zephyr-2b-gemma-sft-hermes-epoch-3",
"model_sha": "7632953abc77826b40c15df99eee627ad34a5a38",
"model_dtype": "torch.bfloat16",
"model_size": "4.68 GB",
"config": null
},
"results": {
"lighteval|mmlu:abstract_algebra|5": {
"acc": 0.2,
"acc_stderr": 0.04020151261036846
},
"lighteval|mmlu:anatomy|5": {
"acc": 0.4222222222222222,
"acc_stderr": 0.04266763404099582
},
"lighteval|mmlu:astronomy|5": {
"acc": 0.3684210526315789,
"acc_stderr": 0.03925523381052932
},
"lighteval|mmlu:business_ethics|5": {
"acc": 0.39,
"acc_stderr": 0.04902071300001975
},
"lighteval|mmlu:clinical_knowledge|5": {
"acc": 0.5056603773584906,
"acc_stderr": 0.030770900763851316
},
"lighteval|mmlu:college_biology|5": {
"acc": 0.4791666666666667,
"acc_stderr": 0.041775789507399935
},
"lighteval|mmlu:college_chemistry|5": {
"acc": 0.32,
"acc_stderr": 0.04688261722621503
},
"lighteval|mmlu:college_computer_science|5": {
"acc": 0.32,
"acc_stderr": 0.046882617226215034
},
"lighteval|mmlu:college_mathematics|5": {
"acc": 0.32,
"acc_stderr": 0.04688261722621504
},
"lighteval|mmlu:college_medicine|5": {
"acc": 0.3872832369942196,
"acc_stderr": 0.037143259063020656
},
"lighteval|mmlu:college_physics|5": {
"acc": 0.29411764705882354,
"acc_stderr": 0.04533838195929776
},
"lighteval|mmlu:computer_security|5": {
"acc": 0.5,
"acc_stderr": 0.050251890762960605
},
"lighteval|mmlu:conceptual_physics|5": {
"acc": 0.43829787234042555,
"acc_stderr": 0.03243618636108101
},
"lighteval|mmlu:econometrics|5": {
"acc": 0.2894736842105263,
"acc_stderr": 0.04266339443159394
},
"lighteval|mmlu:electrical_engineering|5": {
"acc": 0.47586206896551725,
"acc_stderr": 0.041618085035015295
},
"lighteval|mmlu:elementary_mathematics|5": {
"acc": 0.31746031746031744,
"acc_stderr": 0.023973861998992072
},
"lighteval|mmlu:formal_logic|5": {
"acc": 0.3253968253968254,
"acc_stderr": 0.04190596438871137
},
"lighteval|mmlu:global_facts|5": {
"acc": 0.26,
"acc_stderr": 0.044084400227680794
},
"lighteval|mmlu:high_school_biology|5": {
"acc": 0.4935483870967742,
"acc_stderr": 0.02844163823354051
},
"lighteval|mmlu:high_school_chemistry|5": {
"acc": 0.33497536945812806,
"acc_stderr": 0.033208527423483104
},
"lighteval|mmlu:high_school_computer_science|5": {
"acc": 0.4,
"acc_stderr": 0.049236596391733084
},
"lighteval|mmlu:high_school_european_history|5": {
"acc": 0.509090909090909,
"acc_stderr": 0.03903698647748441
},
"lighteval|mmlu:high_school_geography|5": {
"acc": 0.51010101010101,
"acc_stderr": 0.035616254886737454
},
"lighteval|mmlu:high_school_government_and_politics|5": {
"acc": 0.5595854922279793,
"acc_stderr": 0.03582724530036094
},
"lighteval|mmlu:high_school_macroeconomics|5": {
"acc": 0.37948717948717947,
"acc_stderr": 0.024603626924097417
},
"lighteval|mmlu:high_school_mathematics|5": {
"acc": 0.25555555555555554,
"acc_stderr": 0.02659393910184407
},
"lighteval|mmlu:high_school_microeconomics|5": {
"acc": 0.39915966386554624,
"acc_stderr": 0.03181110032413926
},
"lighteval|mmlu:high_school_physics|5": {
"acc": 0.26490066225165565,
"acc_stderr": 0.03603038545360384
},
"lighteval|mmlu:high_school_psychology|5": {
"acc": 0.6091743119266055,
"acc_stderr": 0.020920058346111062
},
"lighteval|mmlu:high_school_statistics|5": {
"acc": 0.2777777777777778,
"acc_stderr": 0.030546745264953167
},
"lighteval|mmlu:high_school_us_history|5": {
"acc": 0.46568627450980393,
"acc_stderr": 0.03501038327635897
},
"lighteval|mmlu:high_school_world_history|5": {
"acc": 0.5780590717299579,
"acc_stderr": 0.032148146302403695
},
"lighteval|mmlu:human_aging|5": {
"acc": 0.4663677130044843,
"acc_stderr": 0.033481800170603065
},
"lighteval|mmlu:human_sexuality|5": {
"acc": 0.45038167938931295,
"acc_stderr": 0.04363643698524779
},
"lighteval|mmlu:international_law|5": {
"acc": 0.5206611570247934,
"acc_stderr": 0.04560456086387235
},
"lighteval|mmlu:jurisprudence|5": {
"acc": 0.5185185185185185,
"acc_stderr": 0.04830366024635331
},
"lighteval|mmlu:logical_fallacies|5": {
"acc": 0.4539877300613497,
"acc_stderr": 0.0391170190467718
},
"lighteval|mmlu:machine_learning|5": {
"acc": 0.3482142857142857,
"acc_stderr": 0.04521829902833585
},
"lighteval|mmlu:management|5": {
"acc": 0.5048543689320388,
"acc_stderr": 0.04950504382128921
},
"lighteval|mmlu:marketing|5": {
"acc": 0.6709401709401709,
"acc_stderr": 0.03078232157768817
},
"lighteval|mmlu:medical_genetics|5": {
"acc": 0.48,
"acc_stderr": 0.050211673156867795
},
"lighteval|mmlu:miscellaneous|5": {
"acc": 0.5772669220945083,
"acc_stderr": 0.017665180351954062
},
"lighteval|mmlu:moral_disputes|5": {
"acc": 0.45664739884393063,
"acc_stderr": 0.026817718130348916
},
"lighteval|mmlu:moral_scenarios|5": {
"acc": 0.2681564245810056,
"acc_stderr": 0.014816119635317012
},
"lighteval|mmlu:nutrition|5": {
"acc": 0.4738562091503268,
"acc_stderr": 0.028590752958852394
},
"lighteval|mmlu:philosophy|5": {
"acc": 0.45980707395498394,
"acc_stderr": 0.028306190403305696
},
"lighteval|mmlu:prehistory|5": {
"acc": 0.5246913580246914,
"acc_stderr": 0.02778680093142745
},
"lighteval|mmlu:professional_accounting|5": {
"acc": 0.3475177304964539,
"acc_stderr": 0.028406627809590947
},
"lighteval|mmlu:professional_law|5": {
"acc": 0.3494132985658409,
"acc_stderr": 0.012177306252786686
},
"lighteval|mmlu:professional_medicine|5": {
"acc": 0.33088235294117646,
"acc_stderr": 0.028582709753898452
},
"lighteval|mmlu:professional_psychology|5": {
"acc": 0.4133986928104575,
"acc_stderr": 0.019922115682786685
},
"lighteval|mmlu:public_relations|5": {
"acc": 0.5,
"acc_stderr": 0.04789131426105757
},
"lighteval|mmlu:security_studies|5": {
"acc": 0.4489795918367347,
"acc_stderr": 0.03184213866687579
},
"lighteval|mmlu:sociology|5": {
"acc": 0.5870646766169154,
"acc_stderr": 0.03481520803367348
},
"lighteval|mmlu:us_foreign_policy|5": {
"acc": 0.59,
"acc_stderr": 0.04943110704237101
},
"lighteval|mmlu:virology|5": {
"acc": 0.39156626506024095,
"acc_stderr": 0.03799857454479637
},
"lighteval|mmlu:world_religions|5": {
"acc": 0.5789473684210527,
"acc_stderr": 0.03786720706234214
},
"lighteval|mmlu:_average|5": {
"acc": 0.42741376532224157,
"acc_stderr": 0.03616779964500751
}
},
"versions": {
"lighteval|mmlu:abstract_algebra|5": 0,
"lighteval|mmlu:anatomy|5": 0,
"lighteval|mmlu:astronomy|5": 0,
"lighteval|mmlu:business_ethics|5": 0,
"lighteval|mmlu:clinical_knowledge|5": 0,
"lighteval|mmlu:college_biology|5": 0,
"lighteval|mmlu:college_chemistry|5": 0,
"lighteval|mmlu:college_computer_science|5": 0,
"lighteval|mmlu:college_mathematics|5": 0,
"lighteval|mmlu:college_medicine|5": 0,
"lighteval|mmlu:college_physics|5": 0,
"lighteval|mmlu:computer_security|5": 0,
"lighteval|mmlu:conceptual_physics|5": 0,
"lighteval|mmlu:econometrics|5": 0,
"lighteval|mmlu:electrical_engineering|5": 0,
"lighteval|mmlu:elementary_mathematics|5": 0,
"lighteval|mmlu:formal_logic|5": 0,
"lighteval|mmlu:global_facts|5": 0,
"lighteval|mmlu:high_school_biology|5": 0,
"lighteval|mmlu:high_school_chemistry|5": 0,
"lighteval|mmlu:high_school_computer_science|5": 0,
"lighteval|mmlu:high_school_european_history|5": 0,
"lighteval|mmlu:high_school_geography|5": 0,
"lighteval|mmlu:high_school_government_and_politics|5": 0,
"lighteval|mmlu:high_school_macroeconomics|5": 0,
"lighteval|mmlu:high_school_mathematics|5": 0,
"lighteval|mmlu:high_school_microeconomics|5": 0,
"lighteval|mmlu:high_school_physics|5": 0,
"lighteval|mmlu:high_school_psychology|5": 0,
"lighteval|mmlu:high_school_statistics|5": 0,
"lighteval|mmlu:high_school_us_history|5": 0,
"lighteval|mmlu:high_school_world_history|5": 0,
"lighteval|mmlu:human_aging|5": 0,
"lighteval|mmlu:human_sexuality|5": 0,
"lighteval|mmlu:international_law|5": 0,
"lighteval|mmlu:jurisprudence|5": 0,
"lighteval|mmlu:logical_fallacies|5": 0,
"lighteval|mmlu:machine_learning|5": 0,
"lighteval|mmlu:management|5": 0,
"lighteval|mmlu:marketing|5": 0,
"lighteval|mmlu:medical_genetics|5": 0,
"lighteval|mmlu:miscellaneous|5": 0,
"lighteval|mmlu:moral_disputes|5": 0,
"lighteval|mmlu:moral_scenarios|5": 0,
"lighteval|mmlu:nutrition|5": 0,
"lighteval|mmlu:philosophy|5": 0,
"lighteval|mmlu:prehistory|5": 0,
"lighteval|mmlu:professional_accounting|5": 0,
"lighteval|mmlu:professional_law|5": 0,
"lighteval|mmlu:professional_medicine|5": 0,
"lighteval|mmlu:professional_psychology|5": 0,
"lighteval|mmlu:public_relations|5": 0,
"lighteval|mmlu:security_studies|5": 0,
"lighteval|mmlu:sociology|5": 0,
"lighteval|mmlu:us_foreign_policy|5": 0,
"lighteval|mmlu:virology|5": 0,
"lighteval|mmlu:world_religions|5": 0
},
"config_tasks": {
"lighteval|mmlu:abstract_algebra": {
"name": "mmlu:abstract_algebra",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "abstract_algebra",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:anatomy": {
"name": "mmlu:anatomy",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "anatomy",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 135,
"effective_num_docs": 135
},
"lighteval|mmlu:astronomy": {
"name": "mmlu:astronomy",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "astronomy",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 152,
"effective_num_docs": 152
},
"lighteval|mmlu:business_ethics": {
"name": "mmlu:business_ethics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "business_ethics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:clinical_knowledge": {
"name": "mmlu:clinical_knowledge",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "clinical_knowledge",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 265,
"effective_num_docs": 265
},
"lighteval|mmlu:college_biology": {
"name": "mmlu:college_biology",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "college_biology",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 144,
"effective_num_docs": 144
},
"lighteval|mmlu:college_chemistry": {
"name": "mmlu:college_chemistry",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "college_chemistry",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:college_computer_science": {
"name": "mmlu:college_computer_science",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "college_computer_science",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:college_mathematics": {
"name": "mmlu:college_mathematics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "college_mathematics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:college_medicine": {
"name": "mmlu:college_medicine",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "college_medicine",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 173,
"effective_num_docs": 173
},
"lighteval|mmlu:college_physics": {
"name": "mmlu:college_physics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "college_physics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 102,
"effective_num_docs": 102
},
"lighteval|mmlu:computer_security": {
"name": "mmlu:computer_security",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "computer_security",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:conceptual_physics": {
"name": "mmlu:conceptual_physics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "conceptual_physics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 235,
"effective_num_docs": 235
},
"lighteval|mmlu:econometrics": {
"name": "mmlu:econometrics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "econometrics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 114,
"effective_num_docs": 114
},
"lighteval|mmlu:electrical_engineering": {
"name": "mmlu:electrical_engineering",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "electrical_engineering",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 145,
"effective_num_docs": 145
},
"lighteval|mmlu:elementary_mathematics": {
"name": "mmlu:elementary_mathematics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "elementary_mathematics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 378,
"effective_num_docs": 378
},
"lighteval|mmlu:formal_logic": {
"name": "mmlu:formal_logic",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "formal_logic",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 126,
"effective_num_docs": 126
},
"lighteval|mmlu:global_facts": {
"name": "mmlu:global_facts",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "global_facts",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:high_school_biology": {
"name": "mmlu:high_school_biology",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_biology",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 310,
"effective_num_docs": 310
},
"lighteval|mmlu:high_school_chemistry": {
"name": "mmlu:high_school_chemistry",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_chemistry",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 203,
"effective_num_docs": 203
},
"lighteval|mmlu:high_school_computer_science": {
"name": "mmlu:high_school_computer_science",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_computer_science",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:high_school_european_history": {
"name": "mmlu:high_school_european_history",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_european_history",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 165,
"effective_num_docs": 165
},
"lighteval|mmlu:high_school_geography": {
"name": "mmlu:high_school_geography",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_geography",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 198,
"effective_num_docs": 198
},
"lighteval|mmlu:high_school_government_and_politics": {
"name": "mmlu:high_school_government_and_politics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_government_and_politics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 193,
"effective_num_docs": 193
},
"lighteval|mmlu:high_school_macroeconomics": {
"name": "mmlu:high_school_macroeconomics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_macroeconomics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 390,
"effective_num_docs": 390
},
"lighteval|mmlu:high_school_mathematics": {
"name": "mmlu:high_school_mathematics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_mathematics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 270,
"effective_num_docs": 270
},
"lighteval|mmlu:high_school_microeconomics": {
"name": "mmlu:high_school_microeconomics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_microeconomics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 238,
"effective_num_docs": 238
},
"lighteval|mmlu:high_school_physics": {
"name": "mmlu:high_school_physics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_physics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 151,
"effective_num_docs": 151
},
"lighteval|mmlu:high_school_psychology": {
"name": "mmlu:high_school_psychology",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_psychology",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 545,
"effective_num_docs": 545
},
"lighteval|mmlu:high_school_statistics": {
"name": "mmlu:high_school_statistics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_statistics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 216,
"effective_num_docs": 216
},
"lighteval|mmlu:high_school_us_history": {
"name": "mmlu:high_school_us_history",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_us_history",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 204,
"effective_num_docs": 204
},
"lighteval|mmlu:high_school_world_history": {
"name": "mmlu:high_school_world_history",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "high_school_world_history",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 237,
"effective_num_docs": 237
},
"lighteval|mmlu:human_aging": {
"name": "mmlu:human_aging",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "human_aging",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 223,
"effective_num_docs": 223
},
"lighteval|mmlu:human_sexuality": {
"name": "mmlu:human_sexuality",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "human_sexuality",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 131,
"effective_num_docs": 131
},
"lighteval|mmlu:international_law": {
"name": "mmlu:international_law",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "international_law",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 121,
"effective_num_docs": 121
},
"lighteval|mmlu:jurisprudence": {
"name": "mmlu:jurisprudence",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "jurisprudence",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 108,
"effective_num_docs": 108
},
"lighteval|mmlu:logical_fallacies": {
"name": "mmlu:logical_fallacies",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "logical_fallacies",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 163,
"effective_num_docs": 163
},
"lighteval|mmlu:machine_learning": {
"name": "mmlu:machine_learning",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "machine_learning",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 112,
"effective_num_docs": 112
},
"lighteval|mmlu:management": {
"name": "mmlu:management",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "management",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 103,
"effective_num_docs": 103
},
"lighteval|mmlu:marketing": {
"name": "mmlu:marketing",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "marketing",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 234,
"effective_num_docs": 234
},
"lighteval|mmlu:medical_genetics": {
"name": "mmlu:medical_genetics",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "medical_genetics",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:miscellaneous": {
"name": "mmlu:miscellaneous",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "miscellaneous",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 783,
"effective_num_docs": 783
},
"lighteval|mmlu:moral_disputes": {
"name": "mmlu:moral_disputes",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "moral_disputes",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 346,
"effective_num_docs": 346
},
"lighteval|mmlu:moral_scenarios": {
"name": "mmlu:moral_scenarios",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "moral_scenarios",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 895,
"effective_num_docs": 895
},
"lighteval|mmlu:nutrition": {
"name": "mmlu:nutrition",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "nutrition",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 306,
"effective_num_docs": 306
},
"lighteval|mmlu:philosophy": {
"name": "mmlu:philosophy",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "philosophy",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 311,
"effective_num_docs": 311
},
"lighteval|mmlu:prehistory": {
"name": "mmlu:prehistory",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "prehistory",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 324,
"effective_num_docs": 324
},
"lighteval|mmlu:professional_accounting": {
"name": "mmlu:professional_accounting",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "professional_accounting",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 282,
"effective_num_docs": 282
},
"lighteval|mmlu:professional_law": {
"name": "mmlu:professional_law",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "professional_law",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 1534,
"effective_num_docs": 1534
},
"lighteval|mmlu:professional_medicine": {
"name": "mmlu:professional_medicine",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "professional_medicine",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 272,
"effective_num_docs": 272
},
"lighteval|mmlu:professional_psychology": {
"name": "mmlu:professional_psychology",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "professional_psychology",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 612,
"effective_num_docs": 612
},
"lighteval|mmlu:public_relations": {
"name": "mmlu:public_relations",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "public_relations",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 110,
"effective_num_docs": 110
},
"lighteval|mmlu:security_studies": {
"name": "mmlu:security_studies",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "security_studies",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 245,
"effective_num_docs": 245
},
"lighteval|mmlu:sociology": {
"name": "mmlu:sociology",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "sociology",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 201,
"effective_num_docs": 201
},
"lighteval|mmlu:us_foreign_policy": {
"name": "mmlu:us_foreign_policy",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "us_foreign_policy",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 100,
"effective_num_docs": 100
},
"lighteval|mmlu:virology": {
"name": "mmlu:virology",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "virology",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 166,
"effective_num_docs": 166
},
"lighteval|mmlu:world_religions": {
"name": "mmlu:world_religions",
"prompt_function": "mmlu_harness",
"hf_repo": "lighteval/mmlu",
"hf_subset": "world_religions",
"metric": [
"loglikelihood_acc"
],
"hf_avail_splits": [
"auxiliary_train",
"test",
"validation",
"dev"
],
"evaluation_splits": [
"test"
],
"few_shots_split": "dev",
"few_shots_select": "sequential",
"generation_size": 1,
"stop_sequence": [
"\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval",
"mmlu"
],
"original_num_docs": 171,
"effective_num_docs": 171
}
},
"summary_tasks": {
"lighteval|mmlu:abstract_algebra|5": {
"hashes": {
"hash_examples": "4c76229e00c9c0e9",
"hash_full_prompts": "a45d01c3409c889c",
"hash_input_tokens": "fc11398ca4e995e6",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:anatomy|5": {
"hashes": {
"hash_examples": "6a1f8104dccbd33b",
"hash_full_prompts": "e245c6600e03cc32",
"hash_input_tokens": "0e63aad739f5d777",
"hash_cont_tokens": "96c2bab19c75f48d"
},
"truncated": 0,
"non_truncated": 135,
"padded": 540,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:astronomy|5": {
"hashes": {
"hash_examples": "1302effa3a76ce4c",
"hash_full_prompts": "390f9bddf857ad04",
"hash_input_tokens": "53afd9483d456920",
"hash_cont_tokens": "6cc2d6fb43989c46"
},
"truncated": 0,
"non_truncated": 152,
"padded": 608,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:business_ethics|5": {
"hashes": {
"hash_examples": "03cb8bce5336419a",
"hash_full_prompts": "5504f893bc4f2fa1",
"hash_input_tokens": "1d0d99c2f7f95728",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:clinical_knowledge|5": {
"hashes": {
"hash_examples": "ffbb9c7b2be257f9",
"hash_full_prompts": "106ad0bab4b90b78",
"hash_input_tokens": "6abbbf267dbe9940",
"hash_cont_tokens": "4566966a1e601b6c"
},
"truncated": 0,
"non_truncated": 265,
"padded": 1060,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:college_biology|5": {
"hashes": {
"hash_examples": "3ee77f176f38eb8e",
"hash_full_prompts": "59f9bdf2695cb226",
"hash_input_tokens": "803196bfad4a393a",
"hash_cont_tokens": "4ea00cd7b2f74799"
},
"truncated": 0,
"non_truncated": 144,
"padded": 576,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:college_chemistry|5": {
"hashes": {
"hash_examples": "ce61a69c46d47aeb",
"hash_full_prompts": "3cac9b759fcff7a0",
"hash_input_tokens": "87bd9eea77de9a9a",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:college_computer_science|5": {
"hashes": {
"hash_examples": "32805b52d7d5daab",
"hash_full_prompts": "010b0cca35070130",
"hash_input_tokens": "b6775c67bfa0c782",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:college_mathematics|5": {
"hashes": {
"hash_examples": "55da1a0a0bd33722",
"hash_full_prompts": "511422eb9eefc773",
"hash_input_tokens": "cbd8a9d6bbda7b3c",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:college_medicine|5": {
"hashes": {
"hash_examples": "c33e143163049176",
"hash_full_prompts": "c8cc1a82a51a046e",
"hash_input_tokens": "b3c40eab0fb83731",
"hash_cont_tokens": "aed3e7fd8adea27e"
},
"truncated": 0,
"non_truncated": 173,
"padded": 692,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:college_physics|5": {
"hashes": {
"hash_examples": "ebdab1cdb7e555df",
"hash_full_prompts": "e40721b5059c5818",
"hash_input_tokens": "c69c0bfb74e99180",
"hash_cont_tokens": "1ca37bb9b8be1c5d"
},
"truncated": 0,
"non_truncated": 102,
"padded": 408,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:computer_security|5": {
"hashes": {
"hash_examples": "a24fd7d08a560921",
"hash_full_prompts": "946c9be5964ac44a",
"hash_input_tokens": "70914e4af05d09b4",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:conceptual_physics|5": {
"hashes": {
"hash_examples": "8300977a79386993",
"hash_full_prompts": "506a4f6094cc40c9",
"hash_input_tokens": "dcb90ef41648f505",
"hash_cont_tokens": "26db9e6e7dfdac00"
},
"truncated": 0,
"non_truncated": 235,
"padded": 940,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:econometrics|5": {
"hashes": {
"hash_examples": "ddde36788a04a46f",
"hash_full_prompts": "4ed2703f27f1ed05",
"hash_input_tokens": "ef8da4b8e9eb5a76",
"hash_cont_tokens": "2ef49b394cfb87e1"
},
"truncated": 0,
"non_truncated": 114,
"padded": 456,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:electrical_engineering|5": {
"hashes": {
"hash_examples": "acbc5def98c19b3f",
"hash_full_prompts": "d8f4b3e11c23653c",
"hash_input_tokens": "1a5e9d41be2d9981",
"hash_cont_tokens": "adb5a1c5d57fbb41"
},
"truncated": 0,
"non_truncated": 145,
"padded": 580,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:elementary_mathematics|5": {
"hashes": {
"hash_examples": "146e61d07497a9bd",
"hash_full_prompts": "256d111bd15647ff",
"hash_input_tokens": "e0d51d86d03e1394",
"hash_cont_tokens": "d0782f141bcc895b"
},
"truncated": 0,
"non_truncated": 378,
"padded": 1512,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:formal_logic|5": {
"hashes": {
"hash_examples": "8635216e1909a03f",
"hash_full_prompts": "1171d04f3b1a11f5",
"hash_input_tokens": "4c75b7f176e01a01",
"hash_cont_tokens": "315a91fa1f805c93"
},
"truncated": 0,
"non_truncated": 126,
"padded": 504,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:global_facts|5": {
"hashes": {
"hash_examples": "30b315aa6353ee47",
"hash_full_prompts": "a7e56dbc074c7529",
"hash_input_tokens": "b83cb180a97c221d",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_biology|5": {
"hashes": {
"hash_examples": "c9136373af2180de",
"hash_full_prompts": "ad6e859ed978e04a",
"hash_input_tokens": "179a2ab8e131445a",
"hash_cont_tokens": "715bc46d18155135"
},
"truncated": 0,
"non_truncated": 310,
"padded": 1240,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_chemistry|5": {
"hashes": {
"hash_examples": "b0661bfa1add6404",
"hash_full_prompts": "6eb9c04bcc8a8f2a",
"hash_input_tokens": "1e6a4441b61eb8f6",
"hash_cont_tokens": "3d12f9b93cc609a2"
},
"truncated": 0,
"non_truncated": 203,
"padded": 812,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_computer_science|5": {
"hashes": {
"hash_examples": "80fc1d623a3d665f",
"hash_full_prompts": "8e51bc91c81cf8dd",
"hash_input_tokens": "4df816916ded3a8c",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_european_history|5": {
"hashes": {
"hash_examples": "854da6e5af0fe1a1",
"hash_full_prompts": "664a1f16c9f3195c",
"hash_input_tokens": "317d565e995cda09",
"hash_cont_tokens": "6d9c47e593859ccd"
},
"truncated": 0,
"non_truncated": 165,
"padded": 656,
"non_padded": 4,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_geography|5": {
"hashes": {
"hash_examples": "7dc963c7acd19ad8",
"hash_full_prompts": "f3acf911f4023c8a",
"hash_input_tokens": "0f17bdb1600d33f7",
"hash_cont_tokens": "84097c7fa87dfe61"
},
"truncated": 0,
"non_truncated": 198,
"padded": 792,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_government_and_politics|5": {
"hashes": {
"hash_examples": "1f675dcdebc9758f",
"hash_full_prompts": "066254feaa3158ae",
"hash_input_tokens": "ac3cca039d98e159",
"hash_cont_tokens": "86d43dfe026b5e6e"
},
"truncated": 0,
"non_truncated": 193,
"padded": 772,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_macroeconomics|5": {
"hashes": {
"hash_examples": "2fb32cf2d80f0b35",
"hash_full_prompts": "19a7fa502aa85c95",
"hash_input_tokens": "3e795472fd70b8e9",
"hash_cont_tokens": "99f5469b1de9a21b"
},
"truncated": 0,
"non_truncated": 390,
"padded": 1560,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_mathematics|5": {
"hashes": {
"hash_examples": "fd6646fdb5d58a1f",
"hash_full_prompts": "4f704e369778b5b0",
"hash_input_tokens": "37e154ab071591d5",
"hash_cont_tokens": "e215c84aa19ccb33"
},
"truncated": 0,
"non_truncated": 270,
"padded": 1078,
"non_padded": 2,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_microeconomics|5": {
"hashes": {
"hash_examples": "2118f21f71d87d84",
"hash_full_prompts": "4350f9e2240f8010",
"hash_input_tokens": "02d65d5e1ee6dea9",
"hash_cont_tokens": "dc8017437d84c710"
},
"truncated": 0,
"non_truncated": 238,
"padded": 952,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_physics|5": {
"hashes": {
"hash_examples": "dc3ce06378548565",
"hash_full_prompts": "5dc0d6831b66188f",
"hash_input_tokens": "6f0c932d12edce11",
"hash_cont_tokens": "b8152fcdcf86c673"
},
"truncated": 0,
"non_truncated": 151,
"padded": 596,
"non_padded": 8,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_psychology|5": {
"hashes": {
"hash_examples": "c8d1d98a40e11f2f",
"hash_full_prompts": "af2b097da6d50365",
"hash_input_tokens": "0e444eb7ba0a1fb0",
"hash_cont_tokens": "ac45cbb9009f81d9"
},
"truncated": 0,
"non_truncated": 545,
"padded": 2168,
"non_padded": 12,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_statistics|5": {
"hashes": {
"hash_examples": "666c8759b98ee4ff",
"hash_full_prompts": "c757694421d6d68d",
"hash_input_tokens": "4e1485b614b2dc7f",
"hash_cont_tokens": "9c9b68ee68272b16"
},
"truncated": 0,
"non_truncated": 216,
"padded": 864,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_us_history|5": {
"hashes": {
"hash_examples": "95fef1c4b7d3f81e",
"hash_full_prompts": "e34a028d0ddeec5e",
"hash_input_tokens": "b836c43a53625ee3",
"hash_cont_tokens": "cec285b624c15c10"
},
"truncated": 0,
"non_truncated": 204,
"padded": 816,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:high_school_world_history|5": {
"hashes": {
"hash_examples": "7e5085b6184b0322",
"hash_full_prompts": "1fa3d51392765601",
"hash_input_tokens": "bb11d024e2405b72",
"hash_cont_tokens": "2c02128f8f2f7539"
},
"truncated": 0,
"non_truncated": 237,
"padded": 948,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:human_aging|5": {
"hashes": {
"hash_examples": "c17333e7c7c10797",
"hash_full_prompts": "cac900721f9a1a94",
"hash_input_tokens": "2a1e5a167a3788c9",
"hash_cont_tokens": "faa94c4ec8e7be4e"
},
"truncated": 0,
"non_truncated": 223,
"padded": 892,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:human_sexuality|5": {
"hashes": {
"hash_examples": "4edd1e9045df5e3d",
"hash_full_prompts": "0d6567bafee0a13c",
"hash_input_tokens": "73b98b906cf7ce7f",
"hash_cont_tokens": "d642d34719fa5ff6"
},
"truncated": 0,
"non_truncated": 131,
"padded": 524,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:international_law|5": {
"hashes": {
"hash_examples": "db2fa00d771a062a",
"hash_full_prompts": "d018f9116479795e",
"hash_input_tokens": "5f7cf71ef19fdf7d",
"hash_cont_tokens": "f0d54717d3cdc783"
},
"truncated": 0,
"non_truncated": 121,
"padded": 484,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:jurisprudence|5": {
"hashes": {
"hash_examples": "e956f86b124076fe",
"hash_full_prompts": "1487e89a10ec58b7",
"hash_input_tokens": "0f30607df3aa1190",
"hash_cont_tokens": "d766ae8c3d361559"
},
"truncated": 0,
"non_truncated": 108,
"padded": 432,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:logical_fallacies|5": {
"hashes": {
"hash_examples": "956e0e6365ab79f1",
"hash_full_prompts": "677785b2181f9243",
"hash_input_tokens": "ac2bcfdf302d6dcd",
"hash_cont_tokens": "0fcca855210b4243"
},
"truncated": 0,
"non_truncated": 163,
"padded": 652,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:machine_learning|5": {
"hashes": {
"hash_examples": "397997cc6f4d581e",
"hash_full_prompts": "769ee14a2aea49bb",
"hash_input_tokens": "3d634b614f766363",
"hash_cont_tokens": "8b369a2ff9235b9d"
},
"truncated": 0,
"non_truncated": 112,
"padded": 448,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:management|5": {
"hashes": {
"hash_examples": "2bcbe6f6ca63d740",
"hash_full_prompts": "cb1ff9dac9582144",
"hash_input_tokens": "d2728b0835c2fa6d",
"hash_cont_tokens": "c77ad5f59321afa5"
},
"truncated": 0,
"non_truncated": 103,
"padded": 412,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:marketing|5": {
"hashes": {
"hash_examples": "8ddb20d964a1b065",
"hash_full_prompts": "9fc2114a187ad9a2",
"hash_input_tokens": "9472fa5111070553",
"hash_cont_tokens": "c94db408fe712d9b"
},
"truncated": 0,
"non_truncated": 234,
"padded": 936,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:medical_genetics|5": {
"hashes": {
"hash_examples": "182a71f4763d2cea",
"hash_full_prompts": "46a616fa51878959",
"hash_input_tokens": "53f9c4977b0be4e0",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 400,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:miscellaneous|5": {
"hashes": {
"hash_examples": "4c404fdbb4ca57fc",
"hash_full_prompts": "0813e1be36dbaae1",
"hash_input_tokens": "fca7aac8daf1d0c7",
"hash_cont_tokens": "60215a6f77eaf4d9"
},
"truncated": 0,
"non_truncated": 783,
"padded": 3132,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:moral_disputes|5": {
"hashes": {
"hash_examples": "60cbd2baa3fea5c9",
"hash_full_prompts": "1d14adebb9b62519",
"hash_input_tokens": "e06669b20b6dba74",
"hash_cont_tokens": "3ca55f92255c9f21"
},
"truncated": 0,
"non_truncated": 346,
"padded": 1384,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:moral_scenarios|5": {
"hashes": {
"hash_examples": "fd8b0431fbdd75ef",
"hash_full_prompts": "b80d3d236165e3de",
"hash_input_tokens": "d22a130cb0ce4eec",
"hash_cont_tokens": "a82e76a0738dc6ac"
},
"truncated": 0,
"non_truncated": 895,
"padded": 3551,
"non_padded": 29,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:nutrition|5": {
"hashes": {
"hash_examples": "71e55e2b829b6528",
"hash_full_prompts": "2bfb18e5fab8dea7",
"hash_input_tokens": "6213f514742fc41d",
"hash_cont_tokens": "b683842a2cf7cdd6"
},
"truncated": 0,
"non_truncated": 306,
"padded": 1224,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:philosophy|5": {
"hashes": {
"hash_examples": "a6d489a8d208fa4b",
"hash_full_prompts": "e8c0d5b6dae3ccc8",
"hash_input_tokens": "99ddb7e2f24852cc",
"hash_cont_tokens": "a545f25ae279a135"
},
"truncated": 0,
"non_truncated": 311,
"padded": 1244,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:prehistory|5": {
"hashes": {
"hash_examples": "6cc50f032a19acaa",
"hash_full_prompts": "4a6a1d3ab1bf28e4",
"hash_input_tokens": "246ab4e3ab88967a",
"hash_cont_tokens": "5a5ebca069b16663"
},
"truncated": 0,
"non_truncated": 324,
"padded": 1268,
"non_padded": 28,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:professional_accounting|5": {
"hashes": {
"hash_examples": "50f57ab32f5f6cea",
"hash_full_prompts": "e60129bd2d82ffc6",
"hash_input_tokens": "aaeb137f42b60e30",
"hash_cont_tokens": "e45018e60164d208"
},
"truncated": 0,
"non_truncated": 282,
"padded": 1120,
"non_padded": 8,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:professional_law|5": {
"hashes": {
"hash_examples": "a8fdc85c64f4b215",
"hash_full_prompts": "0dbb1d9b72dcea03",
"hash_input_tokens": "a4dd0c29f47b7e84",
"hash_cont_tokens": "b11002d08c03f837"
},
"truncated": 0,
"non_truncated": 1534,
"padded": 6136,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:professional_medicine|5": {
"hashes": {
"hash_examples": "c373a28a3050a73a",
"hash_full_prompts": "5e040f9ca68b089e",
"hash_input_tokens": "4e14a4f7fcb794ad",
"hash_cont_tokens": "11ce4c2ab1132810"
},
"truncated": 0,
"non_truncated": 272,
"padded": 1088,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:professional_psychology|5": {
"hashes": {
"hash_examples": "bf5254fe818356af",
"hash_full_prompts": "b386ecda8b87150e",
"hash_input_tokens": "d81a045694559382",
"hash_cont_tokens": "3835bfc898aacaa0"
},
"truncated": 0,
"non_truncated": 612,
"padded": 2448,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:public_relations|5": {
"hashes": {
"hash_examples": "b66d52e28e7d14e0",
"hash_full_prompts": "fe43562263e25677",
"hash_input_tokens": "1d492df812b3c419",
"hash_cont_tokens": "1692112db1aec618"
},
"truncated": 0,
"non_truncated": 110,
"padded": 440,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:security_studies|5": {
"hashes": {
"hash_examples": "514c14feaf000ad9",
"hash_full_prompts": "27d4a2ac541ef4b9",
"hash_input_tokens": "edb25052e8b3c231",
"hash_cont_tokens": "9801a1ce7f762a8b"
},
"truncated": 0,
"non_truncated": 245,
"padded": 980,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:sociology|5": {
"hashes": {
"hash_examples": "f6c9bc9d18c80870",
"hash_full_prompts": "c072ea7d1a1524f2",
"hash_input_tokens": "d10e1fc02e9bb000",
"hash_cont_tokens": "277e7d5b38c0960d"
},
"truncated": 0,
"non_truncated": 201,
"padded": 804,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:us_foreign_policy|5": {
"hashes": {
"hash_examples": "ed7b78629db6678f",
"hash_full_prompts": "341a97ca3e4d699d",
"hash_input_tokens": "357e68691f7bb5be",
"hash_cont_tokens": "dadea1de19dee95c"
},
"truncated": 0,
"non_truncated": 100,
"padded": 397,
"non_padded": 3,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:virology|5": {
"hashes": {
"hash_examples": "bc52ffdc3f9b994a",
"hash_full_prompts": "651d471e2eb8b5e9",
"hash_input_tokens": "b38fa14ee2b9cc9d",
"hash_cont_tokens": "a4a0852e6fb42244"
},
"truncated": 0,
"non_truncated": 166,
"padded": 664,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"lighteval|mmlu:world_religions|5": {
"hashes": {
"hash_examples": "ecdb4a4f94f62930",
"hash_full_prompts": "3773f03542ce44a3",
"hash_input_tokens": "e2e0b330ff7c67d5",
"hash_cont_tokens": "c96f2973fdf12010"
},
"truncated": 0,
"non_truncated": 171,
"padded": 684,
"non_padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
}
},
"summary_general": {
"hashes": {
"hash_examples": "341a076d0beb7048",
"hash_full_prompts": "a5c8f2b7ff4f5ae2",
"hash_input_tokens": "7d5d2fb20602eddc",
"hash_cont_tokens": "28aa09e44eee2d3e"
},
"truncated": 0,
"non_truncated": 14042,
"padded": 56074,
"non_padded": 94,
"num_truncated_few_shots": 0
}
}