{ "config_general": { "lighteval_sha": "?", "num_fewshot_seeds": 1, "override_batch_size": 1, "max_samples": null, "job_id": "", "start_time": 2216850.3533037, "end_time": 2217863.032406279, "total_evaluation_time_secondes": "1012.6791025786661", "model_name": "HuggingFaceH4/mistral-7b-odpo", "model_sha": "86720fec9e71465a02f39ce2126f1c14cbb0fb11", "model_dtype": "torch.bfloat16", "model_size": "13.99 GB", "config": null }, "results": { "leaderboard|mmlu:abstract_algebra|5": { "acc": 0.3, "acc_stderr": 0.04605661864718381 }, "leaderboard|mmlu:anatomy|5": { "acc": 0.5259259259259259, "acc_stderr": 0.04313531696750575 }, "leaderboard|mmlu:astronomy|5": { "acc": 0.5789473684210527, "acc_stderr": 0.04017901275981749 }, "leaderboard|mmlu:business_ethics|5": { "acc": 0.54, "acc_stderr": 0.05009082659620332 }, "leaderboard|mmlu:clinical_knowledge|5": { "acc": 0.6113207547169811, "acc_stderr": 0.030000485448675986 }, "leaderboard|mmlu:college_biology|5": { "acc": 0.625, "acc_stderr": 0.04048439222695598 }, "leaderboard|mmlu:college_chemistry|5": { "acc": 0.41, "acc_stderr": 0.049431107042371025 }, "leaderboard|mmlu:college_computer_science|5": { "acc": 0.49, "acc_stderr": 0.05024183937956912 }, "leaderboard|mmlu:college_mathematics|5": { "acc": 0.33, "acc_stderr": 0.04725815626252604 }, "leaderboard|mmlu:college_medicine|5": { "acc": 0.5838150289017341, "acc_stderr": 0.037585177754049466 }, "leaderboard|mmlu:college_physics|5": { "acc": 0.35294117647058826, "acc_stderr": 0.04755129616062946 }, "leaderboard|mmlu:computer_security|5": { "acc": 0.73, "acc_stderr": 0.04461960433384739 }, "leaderboard|mmlu:conceptual_physics|5": { "acc": 0.451063829787234, "acc_stderr": 0.03252909619613197 }, "leaderboard|mmlu:econometrics|5": { "acc": 0.4473684210526316, "acc_stderr": 0.04677473004491199 }, "leaderboard|mmlu:electrical_engineering|5": { "acc": 0.5172413793103449, "acc_stderr": 0.04164188720169375 }, "leaderboard|mmlu:elementary_mathematics|5": { "acc": 0.36507936507936506, "acc_stderr": 0.02479606060269995 }, "leaderboard|mmlu:formal_logic|5": { "acc": 0.4126984126984127, "acc_stderr": 0.04403438954768176 }, "leaderboard|mmlu:global_facts|5": { "acc": 0.37, "acc_stderr": 0.04852365870939099 }, "leaderboard|mmlu:high_school_biology|5": { "acc": 0.5806451612903226, "acc_stderr": 0.028071588901091845 }, "leaderboard|mmlu:high_school_chemistry|5": { "acc": 0.42857142857142855, "acc_stderr": 0.03481904844438804 }, "leaderboard|mmlu:high_school_computer_science|5": { "acc": 0.61, "acc_stderr": 0.04902071300001975 }, "leaderboard|mmlu:high_school_european_history|5": { "acc": 0.7333333333333333, "acc_stderr": 0.03453131801885417 }, "leaderboard|mmlu:high_school_geography|5": { "acc": 0.7272727272727273, "acc_stderr": 0.03173071239071724 }, "leaderboard|mmlu:high_school_government_and_politics|5": { "acc": 0.8238341968911918, "acc_stderr": 0.027493504244548057 }, "leaderboard|mmlu:high_school_macroeconomics|5": { "acc": 0.5512820512820513, "acc_stderr": 0.025217315184846486 }, "leaderboard|mmlu:high_school_mathematics|5": { "acc": 0.3333333333333333, "acc_stderr": 0.028742040903948482 }, "leaderboard|mmlu:high_school_microeconomics|5": { "acc": 0.5756302521008403, "acc_stderr": 0.032104790510157764 }, "leaderboard|mmlu:high_school_physics|5": { "acc": 0.3509933774834437, "acc_stderr": 0.03896981964257375 }, "leaderboard|mmlu:high_school_psychology|5": { "acc": 0.7908256880733945, "acc_stderr": 0.017437937173343233 }, "leaderboard|mmlu:high_school_statistics|5": { "acc": 0.37962962962962965, "acc_stderr": 0.03309682581119035 }, "leaderboard|mmlu:high_school_us_history|5": { "acc": 0.7156862745098039, "acc_stderr": 0.03166009679399812 }, "leaderboard|mmlu:high_school_world_history|5": { "acc": 0.7257383966244726, "acc_stderr": 0.029041333510598035 }, "leaderboard|mmlu:human_aging|5": { "acc": 0.6367713004484304, "acc_stderr": 0.032277904428505 }, "leaderboard|mmlu:human_sexuality|5": { "acc": 0.6641221374045801, "acc_stderr": 0.041423137719966634 }, "leaderboard|mmlu:international_law|5": { "acc": 0.7272727272727273, "acc_stderr": 0.04065578140908705 }, "leaderboard|mmlu:jurisprudence|5": { "acc": 0.7129629629629629, "acc_stderr": 0.043733130409147614 }, "leaderboard|mmlu:logical_fallacies|5": { "acc": 0.6380368098159509, "acc_stderr": 0.037757007291414416 }, "leaderboard|mmlu:machine_learning|5": { "acc": 0.4375, "acc_stderr": 0.04708567521880525 }, "leaderboard|mmlu:management|5": { "acc": 0.6990291262135923, "acc_stderr": 0.04541609446503948 }, "leaderboard|mmlu:marketing|5": { "acc": 0.7991452991452992, "acc_stderr": 0.02624677294689048 }, "leaderboard|mmlu:medical_genetics|5": { "acc": 0.66, "acc_stderr": 0.04760952285695237 }, "leaderboard|mmlu:miscellaneous|5": { "acc": 0.7701149425287356, "acc_stderr": 0.015046301846691814 }, "leaderboard|mmlu:moral_disputes|5": { "acc": 0.6502890173410405, "acc_stderr": 0.025674281456531018 }, "leaderboard|mmlu:moral_scenarios|5": { "acc": 0.31620111731843575, "acc_stderr": 0.015551673652172552 }, "leaderboard|mmlu:nutrition|5": { "acc": 0.6372549019607843, "acc_stderr": 0.027530078447110314 }, "leaderboard|mmlu:philosophy|5": { "acc": 0.6688102893890675, "acc_stderr": 0.0267306207280049 }, "leaderboard|mmlu:prehistory|5": { "acc": 0.6481481481481481, "acc_stderr": 0.026571483480719974 }, "leaderboard|mmlu:professional_accounting|5": { "acc": 0.425531914893617, "acc_stderr": 0.02949482760014437 }, "leaderboard|mmlu:professional_law|5": { "acc": 0.3924380704041721, "acc_stderr": 0.01247124366922912 }, "leaderboard|mmlu:professional_medicine|5": { "acc": 0.5955882352941176, "acc_stderr": 0.029812630701569743 }, "leaderboard|mmlu:professional_psychology|5": { "acc": 0.5816993464052288, "acc_stderr": 0.019955975145835546 }, "leaderboard|mmlu:public_relations|5": { "acc": 0.6090909090909091, "acc_stderr": 0.04673752333670238 }, "leaderboard|mmlu:security_studies|5": { "acc": 0.5591836734693878, "acc_stderr": 0.03178419114175363 }, "leaderboard|mmlu:sociology|5": { "acc": 0.7064676616915423, "acc_stderr": 0.03220024104534205 }, "leaderboard|mmlu:us_foreign_policy|5": { "acc": 0.77, "acc_stderr": 0.042295258468165065 }, "leaderboard|mmlu:virology|5": { "acc": 0.4819277108433735, "acc_stderr": 0.03889951252827217 }, "leaderboard|mmlu:world_religions|5": { "acc": 0.8011695906432749, "acc_stderr": 0.030611116557432528 }, "leaderboard|mmlu:_average|5": { "acc": 0.5711742702709757, "acc_stderr": 0.03544636292918608 } }, "versions": { "leaderboard|mmlu:abstract_algebra|5": 0, "leaderboard|mmlu:anatomy|5": 0, "leaderboard|mmlu:astronomy|5": 0, "leaderboard|mmlu:business_ethics|5": 0, "leaderboard|mmlu:clinical_knowledge|5": 0, "leaderboard|mmlu:college_biology|5": 0, "leaderboard|mmlu:college_chemistry|5": 0, "leaderboard|mmlu:college_computer_science|5": 0, "leaderboard|mmlu:college_mathematics|5": 0, "leaderboard|mmlu:college_medicine|5": 0, "leaderboard|mmlu:college_physics|5": 0, "leaderboard|mmlu:computer_security|5": 0, "leaderboard|mmlu:conceptual_physics|5": 0, "leaderboard|mmlu:econometrics|5": 0, "leaderboard|mmlu:electrical_engineering|5": 0, "leaderboard|mmlu:elementary_mathematics|5": 0, "leaderboard|mmlu:formal_logic|5": 0, "leaderboard|mmlu:global_facts|5": 0, "leaderboard|mmlu:high_school_biology|5": 0, "leaderboard|mmlu:high_school_chemistry|5": 0, "leaderboard|mmlu:high_school_computer_science|5": 0, "leaderboard|mmlu:high_school_european_history|5": 0, "leaderboard|mmlu:high_school_geography|5": 0, "leaderboard|mmlu:high_school_government_and_politics|5": 0, "leaderboard|mmlu:high_school_macroeconomics|5": 0, "leaderboard|mmlu:high_school_mathematics|5": 0, "leaderboard|mmlu:high_school_microeconomics|5": 0, "leaderboard|mmlu:high_school_physics|5": 0, "leaderboard|mmlu:high_school_psychology|5": 0, "leaderboard|mmlu:high_school_statistics|5": 0, "leaderboard|mmlu:high_school_us_history|5": 0, "leaderboard|mmlu:high_school_world_history|5": 0, "leaderboard|mmlu:human_aging|5": 0, "leaderboard|mmlu:human_sexuality|5": 0, "leaderboard|mmlu:international_law|5": 0, "leaderboard|mmlu:jurisprudence|5": 0, "leaderboard|mmlu:logical_fallacies|5": 0, "leaderboard|mmlu:machine_learning|5": 0, "leaderboard|mmlu:management|5": 0, "leaderboard|mmlu:marketing|5": 0, "leaderboard|mmlu:medical_genetics|5": 0, "leaderboard|mmlu:miscellaneous|5": 0, "leaderboard|mmlu:moral_disputes|5": 0, "leaderboard|mmlu:moral_scenarios|5": 0, "leaderboard|mmlu:nutrition|5": 0, "leaderboard|mmlu:philosophy|5": 0, "leaderboard|mmlu:prehistory|5": 0, "leaderboard|mmlu:professional_accounting|5": 0, "leaderboard|mmlu:professional_law|5": 0, "leaderboard|mmlu:professional_medicine|5": 0, "leaderboard|mmlu:professional_psychology|5": 0, "leaderboard|mmlu:public_relations|5": 0, "leaderboard|mmlu:security_studies|5": 0, "leaderboard|mmlu:sociology|5": 0, "leaderboard|mmlu:us_foreign_policy|5": 0, "leaderboard|mmlu:virology|5": 0, "leaderboard|mmlu:world_religions|5": 0 }, "config_tasks": { "leaderboard|mmlu:abstract_algebra": { "name": "mmlu:abstract_algebra", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "abstract_algebra", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 100, "effective_num_docs": 100, "trust_dataset": true }, "leaderboard|mmlu:anatomy": { "name": "mmlu:anatomy", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "anatomy", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 135, "effective_num_docs": 135, "trust_dataset": true }, "leaderboard|mmlu:astronomy": { "name": "mmlu:astronomy", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "astronomy", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 152, "effective_num_docs": 152, "trust_dataset": true }, "leaderboard|mmlu:business_ethics": { "name": "mmlu:business_ethics", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "business_ethics", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 100, "effective_num_docs": 100, "trust_dataset": true }, "leaderboard|mmlu:clinical_knowledge": { "name": "mmlu:clinical_knowledge", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "clinical_knowledge", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 265, "effective_num_docs": 265, "trust_dataset": true }, "leaderboard|mmlu:college_biology": { "name": "mmlu:college_biology", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "college_biology", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 144, "effective_num_docs": 144, "trust_dataset": true }, "leaderboard|mmlu:college_chemistry": { "name": "mmlu:college_chemistry", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "college_chemistry", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 100, "effective_num_docs": 100, "trust_dataset": true }, "leaderboard|mmlu:college_computer_science": { "name": "mmlu:college_computer_science", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "college_computer_science", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 100, "effective_num_docs": 100, "trust_dataset": true }, "leaderboard|mmlu:college_mathematics": { "name": "mmlu:college_mathematics", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "college_mathematics", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 100, "effective_num_docs": 100, "trust_dataset": true }, "leaderboard|mmlu:college_medicine": { "name": "mmlu:college_medicine", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "college_medicine", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 173, "effective_num_docs": 173, "trust_dataset": true }, "leaderboard|mmlu:college_physics": { "name": "mmlu:college_physics", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "college_physics", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 102, "effective_num_docs": 102, "trust_dataset": true }, "leaderboard|mmlu:computer_security": { "name": "mmlu:computer_security", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "computer_security", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 100, "effective_num_docs": 100, "trust_dataset": true }, "leaderboard|mmlu:conceptual_physics": { "name": "mmlu:conceptual_physics", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "conceptual_physics", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 235, "effective_num_docs": 235, "trust_dataset": true }, "leaderboard|mmlu:econometrics": { "name": "mmlu:econometrics", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "econometrics", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 114, "effective_num_docs": 114, "trust_dataset": true }, "leaderboard|mmlu:electrical_engineering": { "name": "mmlu:electrical_engineering", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "electrical_engineering", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 145, "effective_num_docs": 145, "trust_dataset": true }, "leaderboard|mmlu:elementary_mathematics": { "name": "mmlu:elementary_mathematics", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "elementary_mathematics", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 378, "effective_num_docs": 378, "trust_dataset": true }, "leaderboard|mmlu:formal_logic": { "name": "mmlu:formal_logic", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "formal_logic", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 126, "effective_num_docs": 126, "trust_dataset": true }, "leaderboard|mmlu:global_facts": { "name": "mmlu:global_facts", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "global_facts", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 100, "effective_num_docs": 100, "trust_dataset": true }, "leaderboard|mmlu:high_school_biology": { "name": "mmlu:high_school_biology", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "high_school_biology", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 310, "effective_num_docs": 310, "trust_dataset": true }, "leaderboard|mmlu:high_school_chemistry": { "name": "mmlu:high_school_chemistry", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "high_school_chemistry", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 203, "effective_num_docs": 203, "trust_dataset": true }, "leaderboard|mmlu:high_school_computer_science": { "name": "mmlu:high_school_computer_science", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "high_school_computer_science", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 100, "effective_num_docs": 100, "trust_dataset": true }, "leaderboard|mmlu:high_school_european_history": { "name": "mmlu:high_school_european_history", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "high_school_european_history", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 165, "effective_num_docs": 165, "trust_dataset": true }, "leaderboard|mmlu:high_school_geography": { "name": "mmlu:high_school_geography", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "high_school_geography", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 198, "effective_num_docs": 198, "trust_dataset": true }, "leaderboard|mmlu:high_school_government_and_politics": { "name": "mmlu:high_school_government_and_politics", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "high_school_government_and_politics", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 193, "effective_num_docs": 193, "trust_dataset": true }, "leaderboard|mmlu:high_school_macroeconomics": { "name": "mmlu:high_school_macroeconomics", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "high_school_macroeconomics", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 390, "effective_num_docs": 390, "trust_dataset": true }, "leaderboard|mmlu:high_school_mathematics": { "name": "mmlu:high_school_mathematics", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "high_school_mathematics", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 270, "effective_num_docs": 270, "trust_dataset": true }, "leaderboard|mmlu:high_school_microeconomics": { "name": "mmlu:high_school_microeconomics", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "high_school_microeconomics", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 238, "effective_num_docs": 238, "trust_dataset": true }, "leaderboard|mmlu:high_school_physics": { "name": "mmlu:high_school_physics", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "high_school_physics", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 151, "effective_num_docs": 151, "trust_dataset": true }, "leaderboard|mmlu:high_school_psychology": { "name": "mmlu:high_school_psychology", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "high_school_psychology", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 545, "effective_num_docs": 545, "trust_dataset": true }, "leaderboard|mmlu:high_school_statistics": { "name": "mmlu:high_school_statistics", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "high_school_statistics", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 216, "effective_num_docs": 216, "trust_dataset": true }, "leaderboard|mmlu:high_school_us_history": { "name": "mmlu:high_school_us_history", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "high_school_us_history", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 204, "effective_num_docs": 204, "trust_dataset": true }, "leaderboard|mmlu:high_school_world_history": { "name": "mmlu:high_school_world_history", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "high_school_world_history", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 237, "effective_num_docs": 237, "trust_dataset": true }, "leaderboard|mmlu:human_aging": { "name": "mmlu:human_aging", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "human_aging", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 223, "effective_num_docs": 223, "trust_dataset": true }, "leaderboard|mmlu:human_sexuality": { "name": "mmlu:human_sexuality", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "human_sexuality", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 131, "effective_num_docs": 131, "trust_dataset": true }, "leaderboard|mmlu:international_law": { "name": "mmlu:international_law", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "international_law", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 121, "effective_num_docs": 121, "trust_dataset": true }, "leaderboard|mmlu:jurisprudence": { "name": "mmlu:jurisprudence", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "jurisprudence", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 108, "effective_num_docs": 108, "trust_dataset": true }, "leaderboard|mmlu:logical_fallacies": { "name": "mmlu:logical_fallacies", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "logical_fallacies", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 163, "effective_num_docs": 163, "trust_dataset": true }, "leaderboard|mmlu:machine_learning": { "name": "mmlu:machine_learning", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "machine_learning", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 112, "effective_num_docs": 112, "trust_dataset": true }, "leaderboard|mmlu:management": { "name": "mmlu:management", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "management", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 103, "effective_num_docs": 103, "trust_dataset": true }, "leaderboard|mmlu:marketing": { "name": "mmlu:marketing", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "marketing", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 234, "effective_num_docs": 234, "trust_dataset": true }, "leaderboard|mmlu:medical_genetics": { "name": "mmlu:medical_genetics", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "medical_genetics", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 100, "effective_num_docs": 100, "trust_dataset": true }, "leaderboard|mmlu:miscellaneous": { "name": "mmlu:miscellaneous", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "miscellaneous", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 783, "effective_num_docs": 783, "trust_dataset": true }, "leaderboard|mmlu:moral_disputes": { "name": "mmlu:moral_disputes", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "moral_disputes", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 346, "effective_num_docs": 346, "trust_dataset": true }, "leaderboard|mmlu:moral_scenarios": { "name": "mmlu:moral_scenarios", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "moral_scenarios", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 895, "effective_num_docs": 895, "trust_dataset": true }, "leaderboard|mmlu:nutrition": { "name": "mmlu:nutrition", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "nutrition", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 306, "effective_num_docs": 306, "trust_dataset": true }, "leaderboard|mmlu:philosophy": { "name": "mmlu:philosophy", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "philosophy", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 311, "effective_num_docs": 311, "trust_dataset": true }, "leaderboard|mmlu:prehistory": { "name": "mmlu:prehistory", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "prehistory", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 324, "effective_num_docs": 324, "trust_dataset": true }, "leaderboard|mmlu:professional_accounting": { "name": "mmlu:professional_accounting", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "professional_accounting", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 282, "effective_num_docs": 282, "trust_dataset": true }, "leaderboard|mmlu:professional_law": { "name": "mmlu:professional_law", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "professional_law", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 1534, "effective_num_docs": 1534, "trust_dataset": true }, "leaderboard|mmlu:professional_medicine": { "name": "mmlu:professional_medicine", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "professional_medicine", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 272, "effective_num_docs": 272, "trust_dataset": true }, "leaderboard|mmlu:professional_psychology": { "name": "mmlu:professional_psychology", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "professional_psychology", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 612, "effective_num_docs": 612, "trust_dataset": true }, "leaderboard|mmlu:public_relations": { "name": "mmlu:public_relations", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "public_relations", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 110, "effective_num_docs": 110, "trust_dataset": true }, "leaderboard|mmlu:security_studies": { "name": "mmlu:security_studies", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "security_studies", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 245, "effective_num_docs": 245, "trust_dataset": true }, "leaderboard|mmlu:sociology": { "name": "mmlu:sociology", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "sociology", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 201, "effective_num_docs": 201, "trust_dataset": true }, "leaderboard|mmlu:us_foreign_policy": { "name": "mmlu:us_foreign_policy", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "us_foreign_policy", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 100, "effective_num_docs": 100, "trust_dataset": true }, "leaderboard|mmlu:virology": { "name": "mmlu:virology", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "virology", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 166, "effective_num_docs": 166, "trust_dataset": true }, "leaderboard|mmlu:world_religions": { "name": "mmlu:world_religions", "prompt_function": "mmlu_harness", "hf_repo": "lighteval/mmlu", "hf_subset": "world_religions", "metric": [ "loglikelihood_acc" ], "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "stop_sequence": [ "\n" ], "output_regex": null, "frozen": false, "suite": [ "leaderboard", "mmlu" ], "original_num_docs": 171, "effective_num_docs": 171, "trust_dataset": true } }, "summary_tasks": { "leaderboard|mmlu:abstract_algebra|5": { "hashes": { "hash_examples": "4c76229e00c9c0e9", "hash_full_prompts": "c3130662e7cc91d3", "hash_input_tokens": "b617a339eb3b3eb7", "hash_cont_tokens": "9e1c9ca2c51de57e" }, "truncated": 0, "non_truncated": 100, "padded": 400, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:anatomy|5": { "hashes": { "hash_examples": "6a1f8104dccbd33b", "hash_full_prompts": "05a97165c871964d", "hash_input_tokens": "14e9962d3b1706ea", "hash_cont_tokens": "025910e68cf29c3d" }, "truncated": 0, "non_truncated": 135, "padded": 540, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:astronomy|5": { "hashes": { "hash_examples": "1302effa3a76ce4c", "hash_full_prompts": "68355efd63c4de09", "hash_input_tokens": "44bd837a633de965", "hash_cont_tokens": "1a66fd04f03e0517" }, "truncated": 0, "non_truncated": 152, "padded": 608, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:business_ethics|5": { "hashes": { "hash_examples": "03cb8bce5336419a", "hash_full_prompts": "8f440e0924442390", "hash_input_tokens": "16217026443317e4", "hash_cont_tokens": "9e1c9ca2c51de57e" }, "truncated": 0, "non_truncated": 100, "padded": 400, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:clinical_knowledge|5": { "hashes": { "hash_examples": "ffbb9c7b2be257f9", "hash_full_prompts": "595feee698057167", "hash_input_tokens": "896539d33768791a", "hash_cont_tokens": "de872053260a1588" }, "truncated": 0, "non_truncated": 265, "padded": 1060, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:college_biology|5": { "hashes": { "hash_examples": "3ee77f176f38eb8e", "hash_full_prompts": "dcd354e231c805ee", "hash_input_tokens": "56c8c2aa3e63f094", "hash_cont_tokens": "9ace296b3e00bba3" }, "truncated": 0, "non_truncated": 144, "padded": 576, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:college_chemistry|5": { "hashes": { "hash_examples": "ce61a69c46d47aeb", "hash_full_prompts": "a520ca0fd7868631", "hash_input_tokens": "0049443634b997e3", "hash_cont_tokens": "9e1c9ca2c51de57e" }, "truncated": 0, "non_truncated": 100, "padded": 400, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:college_computer_science|5": { "hashes": { "hash_examples": "32805b52d7d5daab", "hash_full_prompts": "ae8f53adf4b6a6e3", "hash_input_tokens": "894bbabad16b75a1", "hash_cont_tokens": "9e1c9ca2c51de57e" }, "truncated": 0, "non_truncated": 100, "padded": 400, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:college_mathematics|5": { "hashes": { "hash_examples": "55da1a0a0bd33722", "hash_full_prompts": "39cd3169534550f3", "hash_input_tokens": "5bfda6d5c7af507c", "hash_cont_tokens": "9e1c9ca2c51de57e" }, "truncated": 0, "non_truncated": 100, "padded": 400, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:college_medicine|5": { "hashes": { "hash_examples": "c33e143163049176", "hash_full_prompts": "bca31c5d5f3a0e4a", "hash_input_tokens": "13452a8f3d9b4b3d", "hash_cont_tokens": "c80c0b5489bdbc5a" }, "truncated": 0, "non_truncated": 173, "padded": 692, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:college_physics|5": { "hashes": { "hash_examples": "ebdab1cdb7e555df", "hash_full_prompts": "f819d74029f4a018", "hash_input_tokens": "57c45bd30a378407", "hash_cont_tokens": "569fcb9ac44734ae" }, "truncated": 0, "non_truncated": 102, "padded": 408, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:computer_security|5": { "hashes": { "hash_examples": "a24fd7d08a560921", "hash_full_prompts": "d0f4d31508009cd6", "hash_input_tokens": "0af9499b3cb67d95", "hash_cont_tokens": "9e1c9ca2c51de57e" }, "truncated": 0, "non_truncated": 100, "padded": 400, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:conceptual_physics|5": { "hashes": { "hash_examples": "8300977a79386993", "hash_full_prompts": "6e2f619c2f0da087", "hash_input_tokens": "00b0c9ac0fc683e8", "hash_cont_tokens": "6e88c64c1a76752a" }, "truncated": 0, "non_truncated": 235, "padded": 940, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:econometrics|5": { "hashes": { "hash_examples": "ddde36788a04a46f", "hash_full_prompts": "3f81ad69c49e1691", "hash_input_tokens": "9314d720a35c62b6", "hash_cont_tokens": "a315e0e16c922c3c" }, "truncated": 0, "non_truncated": 114, "padded": 456, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:electrical_engineering|5": { "hashes": { "hash_examples": "acbc5def98c19b3f", "hash_full_prompts": "f5ab31c3b1d51682", "hash_input_tokens": "863125c49d60d6a4", "hash_cont_tokens": "44c72e6a7422c304" }, "truncated": 0, "non_truncated": 145, "padded": 580, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:elementary_mathematics|5": { "hashes": { "hash_examples": "146e61d07497a9bd", "hash_full_prompts": "3e6f38a631108730", "hash_input_tokens": "ed58bf384a932c74", "hash_cont_tokens": "cac0a6c304791bb7" }, "truncated": 0, "non_truncated": 378, "padded": 1512, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:formal_logic|5": { "hashes": { "hash_examples": "8635216e1909a03f", "hash_full_prompts": "2db73981fed3cf02", "hash_input_tokens": "78b4957033a990a3", "hash_cont_tokens": "8801fad3bbc72e57" }, "truncated": 0, "non_truncated": 126, "padded": 504, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:global_facts|5": { "hashes": { "hash_examples": "30b315aa6353ee47", "hash_full_prompts": "3b5eef82483c02a6", "hash_input_tokens": "65cf7f73e20e1bc1", "hash_cont_tokens": "9e1c9ca2c51de57e" }, "truncated": 0, "non_truncated": 100, "padded": 400, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:high_school_biology|5": { "hashes": { "hash_examples": "c9136373af2180de", "hash_full_prompts": "97a500550ada1104", "hash_input_tokens": "1c299ee1038cf043", "hash_cont_tokens": "2d57d9e2c5a1fd64" }, "truncated": 0, "non_truncated": 310, "padded": 1240, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:high_school_chemistry|5": { "hashes": { "hash_examples": "b0661bfa1add6404", "hash_full_prompts": "7d42623066fb1e8e", "hash_input_tokens": "38aa4f175383a891", "hash_cont_tokens": "bb0fd92673ddfb31" }, "truncated": 0, "non_truncated": 203, "padded": 812, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:high_school_computer_science|5": { "hashes": { "hash_examples": "80fc1d623a3d665f", "hash_full_prompts": "2af192ae1faf8c63", "hash_input_tokens": "5a1229c044a91023", "hash_cont_tokens": "9e1c9ca2c51de57e" }, "truncated": 0, "non_truncated": 100, "padded": 400, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:high_school_european_history|5": { "hashes": { "hash_examples": "854da6e5af0fe1a1", "hash_full_prompts": "189af6182c551e23", "hash_input_tokens": "f0e54538395a12c1", "hash_cont_tokens": "16e494cddccc4a04" }, "truncated": 0, "non_truncated": 165, "padded": 656, "non_padded": 4, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:high_school_geography|5": { "hashes": { "hash_examples": "7dc963c7acd19ad8", "hash_full_prompts": "0906f591b7f79a10", "hash_input_tokens": "40aceb5dde64fe64", "hash_cont_tokens": "16b7f65a07b3d47b" }, "truncated": 0, "non_truncated": 198, "padded": 792, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:high_school_government_and_politics|5": { "hashes": { "hash_examples": "1f675dcdebc9758f", "hash_full_prompts": "7223a4aebabcdcbd", "hash_input_tokens": "96a4444be05f5ede", "hash_cont_tokens": "476e87fd675136aa" }, "truncated": 0, "non_truncated": 193, "padded": 772, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:high_school_macroeconomics|5": { "hashes": { "hash_examples": "2fb32cf2d80f0b35", "hash_full_prompts": "9c32c005a808c453", "hash_input_tokens": "a78ba4100d84ecc5", "hash_cont_tokens": "b0c7b4c5f7bdf3e7" }, "truncated": 0, "non_truncated": 390, "padded": 1560, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:high_school_mathematics|5": { "hashes": { "hash_examples": "fd6646fdb5d58a1f", "hash_full_prompts": "61845b4e3d0eafe9", "hash_input_tokens": "72e903543d60e864", "hash_cont_tokens": "1a05d6ff49846fd1" }, "truncated": 0, "non_truncated": 270, "padded": 1080, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:high_school_microeconomics|5": { "hashes": { "hash_examples": "2118f21f71d87d84", "hash_full_prompts": "020f7f6e77a6b641", "hash_input_tokens": "8b428c95ab32cdeb", "hash_cont_tokens": "0e7f0645ffffd6cd" }, "truncated": 0, "non_truncated": 238, "padded": 949, "non_padded": 3, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:high_school_physics|5": { "hashes": { "hash_examples": "dc3ce06378548565", "hash_full_prompts": "571b28c0f53b90a0", "hash_input_tokens": "0862d9ba4184f5e6", "hash_cont_tokens": "41ca6560b8c10183" }, "truncated": 0, "non_truncated": 151, "padded": 604, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:high_school_psychology|5": { "hashes": { "hash_examples": "c8d1d98a40e11f2f", "hash_full_prompts": "896e9a19476b90ed", "hash_input_tokens": "539679e51cf0dadf", "hash_cont_tokens": "53a17ff85c607844" }, "truncated": 0, "non_truncated": 545, "padded": 2178, "non_padded": 2, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:high_school_statistics|5": { "hashes": { "hash_examples": "666c8759b98ee4ff", "hash_full_prompts": "9ca986b471235e07", "hash_input_tokens": "d2df2e9ec9cc5ff9", "hash_cont_tokens": "bc9063ad140cc941" }, "truncated": 0, "non_truncated": 216, "padded": 864, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:high_school_us_history|5": { "hashes": { "hash_examples": "95fef1c4b7d3f81e", "hash_full_prompts": "b4616b587c96945d", "hash_input_tokens": "1b9a891fe1e28335", "hash_cont_tokens": "5cf777085ba01096" }, "truncated": 0, "non_truncated": 204, "padded": 816, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:high_school_world_history|5": { "hashes": { "hash_examples": "7e5085b6184b0322", "hash_full_prompts": "e790690fb05fa0d1", "hash_input_tokens": "60fc90341eab6ac2", "hash_cont_tokens": "152af2d9e4830517" }, "truncated": 0, "non_truncated": 237, "padded": 948, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:human_aging|5": { "hashes": { "hash_examples": "c17333e7c7c10797", "hash_full_prompts": "327f9f213650f977", "hash_input_tokens": "3527cd9b1efd6b7c", "hash_cont_tokens": "da4d9eaa044021dd" }, "truncated": 0, "non_truncated": 223, "padded": 892, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:human_sexuality|5": { "hashes": { "hash_examples": "4edd1e9045df5e3d", "hash_full_prompts": "0b6a52b3d3863745", "hash_input_tokens": "7a97714c98ec3df0", "hash_cont_tokens": "1b99e384258a4eeb" }, "truncated": 0, "non_truncated": 131, "padded": 524, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:international_law|5": { "hashes": { "hash_examples": "db2fa00d771a062a", "hash_full_prompts": "429b8d84640cdf75", "hash_input_tokens": "7e572d7ea1a3e509", "hash_cont_tokens": "cbf02c30cdded208" }, "truncated": 0, "non_truncated": 121, "padded": 484, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:jurisprudence|5": { "hashes": { "hash_examples": "e956f86b124076fe", "hash_full_prompts": "571f9505d9f6fa3d", "hash_input_tokens": "e771bba2041d48e1", "hash_cont_tokens": "4b248cf879d97a50" }, "truncated": 0, "non_truncated": 108, "padded": 424, "non_padded": 8, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:logical_fallacies|5": { "hashes": { "hash_examples": "956e0e6365ab79f1", "hash_full_prompts": "abf6d18a0245c552", "hash_input_tokens": "7016f4de62d61e8f", "hash_cont_tokens": "6d9c35172b158838" }, "truncated": 0, "non_truncated": 163, "padded": 632, "non_padded": 20, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:machine_learning|5": { "hashes": { "hash_examples": "397997cc6f4d581e", "hash_full_prompts": "8b9115560a815fab", "hash_input_tokens": "a718bd4f9fb8eab0", "hash_cont_tokens": "66c3ec85fee2fc98" }, "truncated": 0, "non_truncated": 112, "padded": 448, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:management|5": { "hashes": { "hash_examples": "2bcbe6f6ca63d740", "hash_full_prompts": "f18191cecdc130be", "hash_input_tokens": "dd6a99048a822e5a", "hash_cont_tokens": "5e2470abd1fb9d10" }, "truncated": 0, "non_truncated": 103, "padded": 412, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:marketing|5": { "hashes": { "hash_examples": "8ddb20d964a1b065", "hash_full_prompts": "ad9ff50246bf7d49", "hash_input_tokens": "fb59075fb468b035", "hash_cont_tokens": "27fe68d9630f8999" }, "truncated": 0, "non_truncated": 234, "padded": 916, "non_padded": 20, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:medical_genetics|5": { "hashes": { "hash_examples": "182a71f4763d2cea", "hash_full_prompts": "e95c568978da29c1", "hash_input_tokens": "6ec76fde9dca6553", "hash_cont_tokens": "9e1c9ca2c51de57e" }, "truncated": 0, "non_truncated": 100, "padded": 400, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:miscellaneous|5": { "hashes": { "hash_examples": "4c404fdbb4ca57fc", "hash_full_prompts": "468305dc71aa217c", "hash_input_tokens": "9ab5ce7430aeeff7", "hash_cont_tokens": "dfa423a160edd337" }, "truncated": 0, "non_truncated": 783, "padded": 3128, "non_padded": 4, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:moral_disputes|5": { "hashes": { "hash_examples": "60cbd2baa3fea5c9", "hash_full_prompts": "7a24f9c6f83420f2", "hash_input_tokens": "17712020d9c38d0f", "hash_cont_tokens": "bef966e6669349be" }, "truncated": 0, "non_truncated": 346, "padded": 1380, "non_padded": 4, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:moral_scenarios|5": { "hashes": { "hash_examples": "fd8b0431fbdd75ef", "hash_full_prompts": "8723c262038898c8", "hash_input_tokens": "a4a16b58339a1b08", "hash_cont_tokens": "a7bfdd944d86bcb5" }, "truncated": 0, "non_truncated": 895, "padded": 3575, "non_padded": 5, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:nutrition|5": { "hashes": { "hash_examples": "71e55e2b829b6528", "hash_full_prompts": "cc3034694d476c82", "hash_input_tokens": "4589c74e55901b66", "hash_cont_tokens": "fcda7736026f2449" }, "truncated": 0, "non_truncated": 306, "padded": 1224, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:philosophy|5": { "hashes": { "hash_examples": "a6d489a8d208fa4b", "hash_full_prompts": "d92988a447a6ce08", "hash_input_tokens": "fa85837aaec1aef6", "hash_cont_tokens": "0f39b851342e8986" }, "truncated": 0, "non_truncated": 311, "padded": 1244, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:prehistory|5": { "hashes": { "hash_examples": "6cc50f032a19acaa", "hash_full_prompts": "0d0d33c8f9bed861", "hash_input_tokens": "735ed41425466729", "hash_cont_tokens": "b60e45d3e9856b35" }, "truncated": 0, "non_truncated": 324, "padded": 1280, "non_padded": 16, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:professional_accounting|5": { "hashes": { "hash_examples": "50f57ab32f5f6cea", "hash_full_prompts": "9c809e7b8ca8ec1f", "hash_input_tokens": "b0c851d675e5355b", "hash_cont_tokens": "a0c4e121b7293818" }, "truncated": 0, "non_truncated": 282, "padded": 1112, "non_padded": 16, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:professional_law|5": { "hashes": { "hash_examples": "a8fdc85c64f4b215", "hash_full_prompts": "246b3e8a9054a5de", "hash_input_tokens": "c27b16ef17f69218", "hash_cont_tokens": "68b662abeba54fbc" }, "truncated": 0, "non_truncated": 1534, "padded": 6136, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:professional_medicine|5": { "hashes": { "hash_examples": "c373a28a3050a73a", "hash_full_prompts": "f66dd653b5c5022b", "hash_input_tokens": "955343929a6793cb", "hash_cont_tokens": "6caeac5412bb4a09" }, "truncated": 0, "non_truncated": 272, "padded": 1088, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:professional_psychology|5": { "hashes": { "hash_examples": "bf5254fe818356af", "hash_full_prompts": "03228f18e58fb42c", "hash_input_tokens": "a18463f8187e4322", "hash_cont_tokens": "79b091252a1095a9" }, "truncated": 0, "non_truncated": 612, "padded": 2448, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:public_relations|5": { "hashes": { "hash_examples": "b66d52e28e7d14e0", "hash_full_prompts": "2717ec2f9cc3ea3f", "hash_input_tokens": "3118fb19254356b8", "hash_cont_tokens": "987115a77c8704f0" }, "truncated": 0, "non_truncated": 110, "padded": 436, "non_padded": 4, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:security_studies|5": { "hashes": { "hash_examples": "514c14feaf000ad9", "hash_full_prompts": "fd10221b4be3bf11", "hash_input_tokens": "619ae48b231f13d1", "hash_cont_tokens": "6c35bc7e96074b27" }, "truncated": 0, "non_truncated": 245, "padded": 980, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:sociology|5": { "hashes": { "hash_examples": "f6c9bc9d18c80870", "hash_full_prompts": "16bc50365bda7e74", "hash_input_tokens": "e77c9db987dfeede", "hash_cont_tokens": "32af622f73b2e657" }, "truncated": 0, "non_truncated": 201, "padded": 804, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:us_foreign_policy|5": { "hashes": { "hash_examples": "ed7b78629db6678f", "hash_full_prompts": "249ca3f4999e41ad", "hash_input_tokens": "0fa36661f20b1b58", "hash_cont_tokens": "9e1c9ca2c51de57e" }, "truncated": 0, "non_truncated": 100, "padded": 400, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:virology|5": { "hashes": { "hash_examples": "bc52ffdc3f9b994a", "hash_full_prompts": "09939d976cecacd7", "hash_input_tokens": "b8237a5fe3c03938", "hash_cont_tokens": "beded8c3660dc8f5" }, "truncated": 0, "non_truncated": 166, "padded": 664, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "leaderboard|mmlu:world_religions|5": { "hashes": { "hash_examples": "ecdb4a4f94f62930", "hash_full_prompts": "addabd4dc9734c08", "hash_input_tokens": "23943b2941071751", "hash_cont_tokens": "9b1952a4af3d6a73" }, "truncated": 0, "non_truncated": 171, "padded": 684, "non_padded": 0, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 } }, "summary_general": { "hashes": { "hash_examples": "341a076d0beb7048", "hash_full_prompts": "11973fef11ba4c9d", "hash_input_tokens": "0e9d676b8e37ef05", "hash_cont_tokens": "25e9f343d6b95644" }, "truncated": 0, "non_truncated": 14042, "padded": 56062, "non_padded": 106, "num_truncated_few_shots": 0 } }