open-r1-eval-leaderboard
/
eval_results
/Qwen
/Qwen1.5-72B-Chat
/main
/agieval
/results_2024-03-28T17-04-06.812530.json
{ | |
"config_general": { | |
"lighteval_sha": "?", | |
"num_fewshot_seeds": 1, | |
"override_batch_size": 1, | |
"max_samples": null, | |
"job_id": "", | |
"start_time": 832268.901585241, | |
"end_time": 834667.754142529, | |
"total_evaluation_time_secondes": "2398.852557288017", | |
"model_name": "Qwen/Qwen1.5-72B-Chat", | |
"model_sha": "1a6ccc1215278f962c794b1848c710c29ef4053d", | |
"model_dtype": "torch.bfloat16", | |
"model_size": "135.9 GB", | |
"config": null | |
}, | |
"results": { | |
"lighteval|agieval:aqua-rat|0": { | |
"acc": 0.3464566929133858, | |
"acc_stderr": 0.029915853851357088, | |
"acc_norm": 0.32677165354330706, | |
"acc_norm_stderr": 0.029487851051239324 | |
}, | |
"lighteval|agieval:gaokao-biology|0": { | |
"acc": 0.8904761904761904, | |
"acc_stderr": 0.021601916523074194, | |
"acc_norm": 0.7714285714285715, | |
"acc_norm_stderr": 0.029045956871566567 | |
}, | |
"lighteval|agieval:gaokao-chemistry|0": { | |
"acc": 0.7536231884057971, | |
"acc_stderr": 0.030022263446335178, | |
"acc_norm": 0.5458937198067633, | |
"acc_norm_stderr": 0.03468959207684281 | |
}, | |
"lighteval|agieval:gaokao-chinese|0": { | |
"acc": 0.8536585365853658, | |
"acc_stderr": 0.02258097804323291, | |
"acc_norm": 0.8699186991869918, | |
"acc_norm_stderr": 0.02149135146335956 | |
}, | |
"lighteval|agieval:gaokao-english|0": { | |
"acc": 0.761437908496732, | |
"acc_stderr": 0.024404394928087877, | |
"acc_norm": 0.6503267973856209, | |
"acc_norm_stderr": 0.027305308076274695 | |
}, | |
"lighteval|agieval:gaokao-geography|0": { | |
"acc": 0.8994974874371859, | |
"acc_stderr": 0.02136760475548776, | |
"acc_norm": 0.8944723618090452, | |
"acc_norm_stderr": 0.021834033734867263 | |
}, | |
"lighteval|agieval:gaokao-history|0": { | |
"acc": 0.9446808510638298, | |
"acc_stderr": 0.014944189720358486, | |
"acc_norm": 0.8638297872340426, | |
"acc_norm_stderr": 0.02242059930438205 | |
}, | |
"lighteval|agieval:gaokao-mathqa|0": { | |
"acc": 0.5811965811965812, | |
"acc_stderr": 0.026371365163318797, | |
"acc_norm": 0.47863247863247865, | |
"acc_norm_stderr": 0.026701708293697946 | |
}, | |
"lighteval|agieval:gaokao-physics|0": { | |
"acc": 0.77, | |
"acc_stderr": 0.029832025555495224, | |
"acc_norm": 0.665, | |
"acc_norm_stderr": 0.03345851702943582 | |
}, | |
"lighteval|agieval:logiqa-en|0": { | |
"acc": 0.4731182795698925, | |
"acc_stderr": 0.01958324924350953, | |
"acc_norm": 0.4362519201228879, | |
"acc_norm_stderr": 0.01945156439474855 | |
}, | |
"lighteval|agieval:logiqa-zh|0": { | |
"acc": 0.5929339477726574, | |
"acc_stderr": 0.01926987610639942, | |
"acc_norm": 0.5207373271889401, | |
"acc_norm_stderr": 0.019594738825317357 | |
}, | |
"lighteval|agieval:lsat-ar|0": { | |
"acc": 0.26521739130434785, | |
"acc_stderr": 0.02917176407847258, | |
"acc_norm": 0.23043478260869565, | |
"acc_norm_stderr": 0.027827807522276156 | |
}, | |
"lighteval|agieval:lsat-lr|0": { | |
"acc": 0.6745098039215687, | |
"acc_stderr": 0.02076845539181952, | |
"acc_norm": 0.5705882352941176, | |
"acc_norm_stderr": 0.02194014455513715 | |
}, | |
"lighteval|agieval:lsat-rc|0": { | |
"acc": 0.7620817843866171, | |
"acc_stderr": 0.02601041254562785, | |
"acc_norm": 0.5762081784386617, | |
"acc_norm_stderr": 0.030185515550116917 | |
}, | |
"lighteval|agieval:sat-en|0": { | |
"acc": 0.8689320388349514, | |
"acc_stderr": 0.023570253133680667, | |
"acc_norm": 0.7427184466019418, | |
"acc_norm_stderr": 0.030530892446123822 | |
}, | |
"lighteval|agieval:sat-en-without-passage|0": { | |
"acc": 0.5728155339805825, | |
"acc_stderr": 0.03454921537431907, | |
"acc_norm": 0.48058252427184467, | |
"acc_norm_stderr": 0.034895171350660135 | |
}, | |
"lighteval|agieval:sat-math|0": { | |
"acc": 0.5818181818181818, | |
"acc_stderr": 0.033331446416271206, | |
"acc_norm": 0.4409090909090909, | |
"acc_norm_stderr": 0.03355008962027993 | |
}, | |
"lighteval|agieval:_average|0": { | |
"acc": 0.6819090822449333, | |
"acc_stderr": 0.0251350155456969, | |
"acc_norm": 0.5920414455566471, | |
"acc_norm_stderr": 0.0273182848333133 | |
}, | |
"all": { | |
"acc": 0.6819090822449333, | |
"acc_stderr": 0.0251350155456969, | |
"acc_norm": 0.5920414455566471, | |
"acc_norm_stderr": 0.0273182848333133 | |
} | |
}, | |
"versions": { | |
"lighteval|agieval:aqua-rat|0": 0, | |
"lighteval|agieval:gaokao-biology|0": 0, | |
"lighteval|agieval:gaokao-chemistry|0": 0, | |
"lighteval|agieval:gaokao-chinese|0": 0, | |
"lighteval|agieval:gaokao-english|0": 0, | |
"lighteval|agieval:gaokao-geography|0": 0, | |
"lighteval|agieval:gaokao-history|0": 0, | |
"lighteval|agieval:gaokao-mathqa|0": 0, | |
"lighteval|agieval:gaokao-physics|0": 0, | |
"lighteval|agieval:logiqa-en|0": 0, | |
"lighteval|agieval:logiqa-zh|0": 0, | |
"lighteval|agieval:lsat-ar|0": 0, | |
"lighteval|agieval:lsat-lr|0": 0, | |
"lighteval|agieval:lsat-rc|0": 0, | |
"lighteval|agieval:sat-en|0": 0, | |
"lighteval|agieval:sat-en-without-passage|0": 0, | |
"lighteval|agieval:sat-math|0": 0 | |
}, | |
"config_tasks": { | |
"lighteval|agieval:aqua-rat": { | |
"name": "agieval:aqua-rat", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-aqua-rat", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 254, | |
"effective_num_docs": 254, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-biology": { | |
"name": "agieval:gaokao-biology", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-biology", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 210, | |
"effective_num_docs": 210, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-chemistry": { | |
"name": "agieval:gaokao-chemistry", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-chemistry", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 207, | |
"effective_num_docs": 207, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-chinese": { | |
"name": "agieval:gaokao-chinese", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-chinese", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 246, | |
"effective_num_docs": 246, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-english": { | |
"name": "agieval:gaokao-english", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-english", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 306, | |
"effective_num_docs": 306, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-geography": { | |
"name": "agieval:gaokao-geography", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-geography", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 199, | |
"effective_num_docs": 199, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-history": { | |
"name": "agieval:gaokao-history", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-history", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 235, | |
"effective_num_docs": 235, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-mathqa": { | |
"name": "agieval:gaokao-mathqa", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-mathqa", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 351, | |
"effective_num_docs": 351, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-physics": { | |
"name": "agieval:gaokao-physics", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-physics", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 200, | |
"effective_num_docs": 200, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:logiqa-en": { | |
"name": "agieval:logiqa-en", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-logiqa-en", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 651, | |
"effective_num_docs": 651, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:logiqa-zh": { | |
"name": "agieval:logiqa-zh", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-logiqa-zh", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 651, | |
"effective_num_docs": 651, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:lsat-ar": { | |
"name": "agieval:lsat-ar", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-lsat-ar", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 230, | |
"effective_num_docs": 230, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:lsat-lr": { | |
"name": "agieval:lsat-lr", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-lsat-lr", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 510, | |
"effective_num_docs": 510, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:lsat-rc": { | |
"name": "agieval:lsat-rc", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-lsat-rc", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 269, | |
"effective_num_docs": 269, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:sat-en": { | |
"name": "agieval:sat-en", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-sat-en", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 206, | |
"effective_num_docs": 206, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:sat-en-without-passage": { | |
"name": "agieval:sat-en-without-passage", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-sat-en-without-passage", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 206, | |
"effective_num_docs": 206, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:sat-math": { | |
"name": "agieval:sat-math", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-sat-math", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 220, | |
"effective_num_docs": 220, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
} | |
}, | |
"summary_tasks": { | |
"lighteval|agieval:aqua-rat|0": { | |
"hashes": { | |
"hash_examples": "f09607f69e5b7525", | |
"hash_full_prompts": "ab1c49d62ea014ca", | |
"hash_input_tokens": "143221c522438063", | |
"hash_cont_tokens": "8e124080e2ead575" | |
}, | |
"truncated": 0, | |
"non_truncated": 254, | |
"padded": 1265, | |
"non_padded": 5, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-biology|0": { | |
"hashes": { | |
"hash_examples": "f262eaf4a72db963", | |
"hash_full_prompts": "21fe3fd322fce0c3", | |
"hash_input_tokens": "132d4eba908a4ed4", | |
"hash_cont_tokens": "00dd4d2e2bec28ef" | |
}, | |
"truncated": 0, | |
"non_truncated": 210, | |
"padded": 830, | |
"non_padded": 10, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-chemistry|0": { | |
"hashes": { | |
"hash_examples": "47f2e649f58d9da5", | |
"hash_full_prompts": "65eb1f54d409142f", | |
"hash_input_tokens": "d3217238b8a8c275", | |
"hash_cont_tokens": "605322759d55d2f3" | |
}, | |
"truncated": 0, | |
"non_truncated": 207, | |
"padded": 829, | |
"non_padded": 2, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-chinese|0": { | |
"hashes": { | |
"hash_examples": "1010b21fde4726ab", | |
"hash_full_prompts": "0261d102d2b4213e", | |
"hash_input_tokens": "93fd34dfdd71fe30", | |
"hash_cont_tokens": "417a0311b1710ac6" | |
}, | |
"truncated": 0, | |
"non_truncated": 246, | |
"padded": 983, | |
"non_padded": 1, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-english|0": { | |
"hashes": { | |
"hash_examples": "4864e492a350ae93", | |
"hash_full_prompts": "5378c70f856b0327", | |
"hash_input_tokens": "5fd1877f71786ff3", | |
"hash_cont_tokens": "fa539624aef75648" | |
}, | |
"truncated": 0, | |
"non_truncated": 306, | |
"padded": 1224, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-geography|0": { | |
"hashes": { | |
"hash_examples": "ec3a021e37650e7d", | |
"hash_full_prompts": "67b040bcf10390ab", | |
"hash_input_tokens": "4ddac45940e7a829", | |
"hash_cont_tokens": "a808ce2fc9f8ac9e" | |
}, | |
"truncated": 0, | |
"non_truncated": 199, | |
"padded": 796, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-history|0": { | |
"hashes": { | |
"hash_examples": "b3fad1596f1ae1f9", | |
"hash_full_prompts": "147e1ca1a5d92e55", | |
"hash_input_tokens": "ebc1fd047d9f3d12", | |
"hash_cont_tokens": "bf187ccfab294223" | |
}, | |
"truncated": 0, | |
"non_truncated": 235, | |
"padded": 934, | |
"non_padded": 6, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-mathqa|0": { | |
"hashes": { | |
"hash_examples": "1d1088556861b0b0", | |
"hash_full_prompts": "d6f785498f2ec712", | |
"hash_input_tokens": "b7e76efbbc244922", | |
"hash_cont_tokens": "99fe31f3682f2d40" | |
}, | |
"truncated": 0, | |
"non_truncated": 351, | |
"padded": 1392, | |
"non_padded": 12, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-physics|0": { | |
"hashes": { | |
"hash_examples": "eb05f035c7bfca2f", | |
"hash_full_prompts": "eb323255dc83409c", | |
"hash_input_tokens": "eea8422b472c2cac", | |
"hash_cont_tokens": "562ad307ae3b6f26" | |
}, | |
"truncated": 0, | |
"non_truncated": 200, | |
"padded": 797, | |
"non_padded": 3, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:logiqa-en|0": { | |
"hashes": { | |
"hash_examples": "0a688a45f69c21e0", | |
"hash_full_prompts": "0a29985a5d76d442", | |
"hash_input_tokens": "8e43645b0dd64706", | |
"hash_cont_tokens": "d6a0c7bb4b4c5331" | |
}, | |
"truncated": 0, | |
"non_truncated": 651, | |
"padded": 2590, | |
"non_padded": 14, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:logiqa-zh|0": { | |
"hashes": { | |
"hash_examples": "620d6888b6012ea5", | |
"hash_full_prompts": "6240c31f1dc378f1", | |
"hash_input_tokens": "c5409492635fd368", | |
"hash_cont_tokens": "104492bcec28b979" | |
}, | |
"truncated": 0, | |
"non_truncated": 651, | |
"padded": 2561, | |
"non_padded": 43, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:lsat-ar|0": { | |
"hashes": { | |
"hash_examples": "627c8f5ccd5da209", | |
"hash_full_prompts": "bf740466dbecb79b", | |
"hash_input_tokens": "305a10135d896aac", | |
"hash_cont_tokens": "91b11ac7df4e566b" | |
}, | |
"truncated": 0, | |
"non_truncated": 230, | |
"padded": 1138, | |
"non_padded": 12, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:lsat-lr|0": { | |
"hashes": { | |
"hash_examples": "794641c86de172f5", | |
"hash_full_prompts": "73141717013969a1", | |
"hash_input_tokens": "3ad964f7deb5f145", | |
"hash_cont_tokens": "c0971bd7c68f42f5" | |
}, | |
"truncated": 0, | |
"non_truncated": 510, | |
"padded": 2526, | |
"non_padded": 24, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:lsat-rc|0": { | |
"hashes": { | |
"hash_examples": "35981ed917ea01cf", | |
"hash_full_prompts": "3eda7a53b0762ee9", | |
"hash_input_tokens": "0a6fc04df66f8d4d", | |
"hash_cont_tokens": "904259a2682f51a0" | |
}, | |
"truncated": 0, | |
"non_truncated": 269, | |
"padded": 1345, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:sat-en|0": { | |
"hashes": { | |
"hash_examples": "041c39c646536a1e", | |
"hash_full_prompts": "ca20876d35375196", | |
"hash_input_tokens": "f323eb2313f835e8", | |
"hash_cont_tokens": "75a67f2f9f9272ce" | |
}, | |
"truncated": 0, | |
"non_truncated": 206, | |
"padded": 821, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:sat-en-without-passage|0": { | |
"hashes": { | |
"hash_examples": "e4d9284367dff68f", | |
"hash_full_prompts": "1ef1f84d5ecc98da", | |
"hash_input_tokens": "66ef93779458e842", | |
"hash_cont_tokens": "75a67f2f9f9272ce" | |
}, | |
"truncated": 0, | |
"non_truncated": 206, | |
"padded": 812, | |
"non_padded": 9, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:sat-math|0": { | |
"hashes": { | |
"hash_examples": "01db7291603fc1a0", | |
"hash_full_prompts": "92e2efa05050a7cb", | |
"hash_input_tokens": "4860b569680b9912", | |
"hash_cont_tokens": "d890085c34d380ec" | |
}, | |
"truncated": 0, | |
"non_truncated": 220, | |
"padded": 873, | |
"non_padded": 7, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
} | |
}, | |
"summary_general": { | |
"hashes": { | |
"hash_examples": "da3af66181f18ddf", | |
"hash_full_prompts": "56637e9243089333", | |
"hash_input_tokens": "f0430454cf0301f4", | |
"hash_cont_tokens": "5db0cf23fcf8ab8d" | |
}, | |
"truncated": 0, | |
"non_truncated": 5151, | |
"padded": 21716, | |
"non_padded": 148, | |
"num_truncated_few_shots": 0 | |
} | |
} |