open-r1-eval-leaderboard
/
eval_results
/Qwen
/Qwen1.5-0.5B-Chat
/main
/agieval
/results_2024-03-28T16-38-49.297471.json
{ | |
"config_general": { | |
"lighteval_sha": "?", | |
"num_fewshot_seeds": 1, | |
"override_batch_size": 1, | |
"max_samples": null, | |
"job_id": "", | |
"start_time": 5972472.642907576, | |
"end_time": 5972669.472901344, | |
"total_evaluation_time_secondes": "196.82999376859516", | |
"model_name": "Qwen/Qwen1.5-0.5B-Chat", | |
"model_sha": "6c705984bb8b5591dd4e1a9e66e1a127965fd08d", | |
"model_dtype": "torch.bfloat16", | |
"model_size": "1.05 GB", | |
"config": null | |
}, | |
"results": { | |
"lighteval|agieval:aqua-rat|0": { | |
"acc": 0.19291338582677164, | |
"acc_stderr": 0.024807385375179295, | |
"acc_norm": 0.1889763779527559, | |
"acc_norm_stderr": 0.02461275630319305 | |
}, | |
"lighteval|agieval:gaokao-biology|0": { | |
"acc": 0.2857142857142857, | |
"acc_stderr": 0.03124847423284339, | |
"acc_norm": 0.32857142857142857, | |
"acc_norm_stderr": 0.03248939796876842 | |
}, | |
"lighteval|agieval:gaokao-chemistry|0": { | |
"acc": 0.28019323671497587, | |
"acc_stderr": 0.031289827964521094, | |
"acc_norm": 0.2318840579710145, | |
"acc_norm_stderr": 0.029404596565406532 | |
}, | |
"lighteval|agieval:gaokao-chinese|0": { | |
"acc": 0.35772357723577236, | |
"acc_stderr": 0.030623281761072305, | |
"acc_norm": 0.3617886178861789, | |
"acc_norm_stderr": 0.03069917328237318 | |
}, | |
"lighteval|agieval:gaokao-english|0": { | |
"acc": 0.3562091503267974, | |
"acc_stderr": 0.027420477662629245, | |
"acc_norm": 0.3006535947712418, | |
"acc_norm_stderr": 0.02625605383571896 | |
}, | |
"lighteval|agieval:gaokao-geography|0": { | |
"acc": 0.2914572864321608, | |
"acc_stderr": 0.03229519279811605, | |
"acc_norm": 0.27638190954773867, | |
"acc_norm_stderr": 0.031781685026817864 | |
}, | |
"lighteval|agieval:gaokao-history|0": { | |
"acc": 0.225531914893617, | |
"acc_stderr": 0.027321078417387536, | |
"acc_norm": 0.23404255319148937, | |
"acc_norm_stderr": 0.027678452578212387 | |
}, | |
"lighteval|agieval:gaokao-mathqa|0": { | |
"acc": 0.2564102564102564, | |
"acc_stderr": 0.023339974098276806, | |
"acc_norm": 0.28774928774928776, | |
"acc_norm_stderr": 0.024198561654366728 | |
}, | |
"lighteval|agieval:gaokao-physics|0": { | |
"acc": 0.29, | |
"acc_stderr": 0.03216633903375033, | |
"acc_norm": 0.29, | |
"acc_norm_stderr": 0.03216633903375033 | |
}, | |
"lighteval|agieval:logiqa-en|0": { | |
"acc": 0.2350230414746544, | |
"acc_stderr": 0.01663116682389096, | |
"acc_norm": 0.30414746543778803, | |
"acc_norm_stderr": 0.018044465791506776 | |
}, | |
"lighteval|agieval:logiqa-zh|0": { | |
"acc": 0.28417818740399386, | |
"acc_stderr": 0.017690542680190782, | |
"acc_norm": 0.3210445468509985, | |
"acc_norm_stderr": 0.018312456701476132 | |
}, | |
"lighteval|agieval:lsat-ar|0": { | |
"acc": 0.1956521739130435, | |
"acc_stderr": 0.02621479970981959, | |
"acc_norm": 0.16956521739130434, | |
"acc_norm_stderr": 0.024797243687717654 | |
}, | |
"lighteval|agieval:lsat-lr|0": { | |
"acc": 0.22156862745098038, | |
"acc_stderr": 0.018407949229981374, | |
"acc_norm": 0.2196078431372549, | |
"acc_norm_stderr": 0.01834938361142325 | |
}, | |
"lighteval|agieval:lsat-rc|0": { | |
"acc": 0.241635687732342, | |
"acc_stderr": 0.026148819366805718, | |
"acc_norm": 0.1970260223048327, | |
"acc_norm_stderr": 0.02429657927212685 | |
}, | |
"lighteval|agieval:sat-en|0": { | |
"acc": 0.25728155339805825, | |
"acc_stderr": 0.030530892446123822, | |
"acc_norm": 0.23786407766990292, | |
"acc_norm_stderr": 0.02973744934886544 | |
}, | |
"lighteval|agieval:sat-en-without-passage|0": { | |
"acc": 0.22330097087378642, | |
"acc_stderr": 0.029086720403095623, | |
"acc_norm": 0.20388349514563106, | |
"acc_norm_stderr": 0.028138595623668782 | |
}, | |
"lighteval|agieval:sat-math|0": { | |
"acc": 0.3, | |
"acc_stderr": 0.030966176864266677, | |
"acc_norm": 0.21363636363636362, | |
"acc_norm_stderr": 0.027696649960503885 | |
}, | |
"lighteval|agieval:_average|0": { | |
"acc": 0.2643996079883233, | |
"acc_stderr": 0.026834652874585335, | |
"acc_norm": 0.25687193289501253, | |
"acc_norm_stderr": 0.026391755308582128 | |
}, | |
"all": { | |
"acc": 0.2643996079883233, | |
"acc_stderr": 0.026834652874585335, | |
"acc_norm": 0.25687193289501253, | |
"acc_norm_stderr": 0.026391755308582128 | |
} | |
}, | |
"versions": { | |
"lighteval|agieval:aqua-rat|0": 0, | |
"lighteval|agieval:gaokao-biology|0": 0, | |
"lighteval|agieval:gaokao-chemistry|0": 0, | |
"lighteval|agieval:gaokao-chinese|0": 0, | |
"lighteval|agieval:gaokao-english|0": 0, | |
"lighteval|agieval:gaokao-geography|0": 0, | |
"lighteval|agieval:gaokao-history|0": 0, | |
"lighteval|agieval:gaokao-mathqa|0": 0, | |
"lighteval|agieval:gaokao-physics|0": 0, | |
"lighteval|agieval:logiqa-en|0": 0, | |
"lighteval|agieval:logiqa-zh|0": 0, | |
"lighteval|agieval:lsat-ar|0": 0, | |
"lighteval|agieval:lsat-lr|0": 0, | |
"lighteval|agieval:lsat-rc|0": 0, | |
"lighteval|agieval:sat-en|0": 0, | |
"lighteval|agieval:sat-en-without-passage|0": 0, | |
"lighteval|agieval:sat-math|0": 0 | |
}, | |
"config_tasks": { | |
"lighteval|agieval:aqua-rat": { | |
"name": "agieval:aqua-rat", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-aqua-rat", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 254, | |
"effective_num_docs": 254, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-biology": { | |
"name": "agieval:gaokao-biology", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-biology", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 210, | |
"effective_num_docs": 210, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-chemistry": { | |
"name": "agieval:gaokao-chemistry", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-chemistry", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 207, | |
"effective_num_docs": 207, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-chinese": { | |
"name": "agieval:gaokao-chinese", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-chinese", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 246, | |
"effective_num_docs": 246, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-english": { | |
"name": "agieval:gaokao-english", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-english", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 306, | |
"effective_num_docs": 306, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-geography": { | |
"name": "agieval:gaokao-geography", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-geography", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 199, | |
"effective_num_docs": 199, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-history": { | |
"name": "agieval:gaokao-history", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-history", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 235, | |
"effective_num_docs": 235, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-mathqa": { | |
"name": "agieval:gaokao-mathqa", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-mathqa", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 351, | |
"effective_num_docs": 351, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-physics": { | |
"name": "agieval:gaokao-physics", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-physics", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 200, | |
"effective_num_docs": 200, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:logiqa-en": { | |
"name": "agieval:logiqa-en", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-logiqa-en", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 651, | |
"effective_num_docs": 651, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:logiqa-zh": { | |
"name": "agieval:logiqa-zh", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-logiqa-zh", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 651, | |
"effective_num_docs": 651, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:lsat-ar": { | |
"name": "agieval:lsat-ar", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-lsat-ar", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 230, | |
"effective_num_docs": 230, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:lsat-lr": { | |
"name": "agieval:lsat-lr", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-lsat-lr", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 510, | |
"effective_num_docs": 510, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:lsat-rc": { | |
"name": "agieval:lsat-rc", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-lsat-rc", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 269, | |
"effective_num_docs": 269, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:sat-en": { | |
"name": "agieval:sat-en", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-sat-en", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 206, | |
"effective_num_docs": 206, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:sat-en-without-passage": { | |
"name": "agieval:sat-en-without-passage", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-sat-en-without-passage", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 206, | |
"effective_num_docs": 206, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:sat-math": { | |
"name": "agieval:sat-math", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-sat-math", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 220, | |
"effective_num_docs": 220, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
} | |
}, | |
"summary_tasks": { | |
"lighteval|agieval:aqua-rat|0": { | |
"hashes": { | |
"hash_examples": "f09607f69e5b7525", | |
"hash_full_prompts": "ab1c49d62ea014ca", | |
"hash_input_tokens": "143221c522438063", | |
"hash_cont_tokens": "8e124080e2ead575" | |
}, | |
"truncated": 0, | |
"non_truncated": 254, | |
"padded": 1265, | |
"non_padded": 5, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-biology|0": { | |
"hashes": { | |
"hash_examples": "f262eaf4a72db963", | |
"hash_full_prompts": "21fe3fd322fce0c3", | |
"hash_input_tokens": "132d4eba908a4ed4", | |
"hash_cont_tokens": "00dd4d2e2bec28ef" | |
}, | |
"truncated": 0, | |
"non_truncated": 210, | |
"padded": 830, | |
"non_padded": 10, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-chemistry|0": { | |
"hashes": { | |
"hash_examples": "47f2e649f58d9da5", | |
"hash_full_prompts": "65eb1f54d409142f", | |
"hash_input_tokens": "d3217238b8a8c275", | |
"hash_cont_tokens": "605322759d55d2f3" | |
}, | |
"truncated": 0, | |
"non_truncated": 207, | |
"padded": 829, | |
"non_padded": 2, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-chinese|0": { | |
"hashes": { | |
"hash_examples": "1010b21fde4726ab", | |
"hash_full_prompts": "0261d102d2b4213e", | |
"hash_input_tokens": "93fd34dfdd71fe30", | |
"hash_cont_tokens": "417a0311b1710ac6" | |
}, | |
"truncated": 0, | |
"non_truncated": 246, | |
"padded": 983, | |
"non_padded": 1, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-english|0": { | |
"hashes": { | |
"hash_examples": "4864e492a350ae93", | |
"hash_full_prompts": "5378c70f856b0327", | |
"hash_input_tokens": "5fd1877f71786ff3", | |
"hash_cont_tokens": "fa539624aef75648" | |
}, | |
"truncated": 0, | |
"non_truncated": 306, | |
"padded": 1224, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-geography|0": { | |
"hashes": { | |
"hash_examples": "ec3a021e37650e7d", | |
"hash_full_prompts": "67b040bcf10390ab", | |
"hash_input_tokens": "4ddac45940e7a829", | |
"hash_cont_tokens": "a808ce2fc9f8ac9e" | |
}, | |
"truncated": 0, | |
"non_truncated": 199, | |
"padded": 796, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-history|0": { | |
"hashes": { | |
"hash_examples": "b3fad1596f1ae1f9", | |
"hash_full_prompts": "147e1ca1a5d92e55", | |
"hash_input_tokens": "ebc1fd047d9f3d12", | |
"hash_cont_tokens": "bf187ccfab294223" | |
}, | |
"truncated": 0, | |
"non_truncated": 235, | |
"padded": 934, | |
"non_padded": 6, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-mathqa|0": { | |
"hashes": { | |
"hash_examples": "1d1088556861b0b0", | |
"hash_full_prompts": "d6f785498f2ec712", | |
"hash_input_tokens": "b7e76efbbc244922", | |
"hash_cont_tokens": "99fe31f3682f2d40" | |
}, | |
"truncated": 0, | |
"non_truncated": 351, | |
"padded": 1392, | |
"non_padded": 12, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-physics|0": { | |
"hashes": { | |
"hash_examples": "eb05f035c7bfca2f", | |
"hash_full_prompts": "eb323255dc83409c", | |
"hash_input_tokens": "eea8422b472c2cac", | |
"hash_cont_tokens": "562ad307ae3b6f26" | |
}, | |
"truncated": 0, | |
"non_truncated": 200, | |
"padded": 797, | |
"non_padded": 3, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:logiqa-en|0": { | |
"hashes": { | |
"hash_examples": "0a688a45f69c21e0", | |
"hash_full_prompts": "0a29985a5d76d442", | |
"hash_input_tokens": "8e43645b0dd64706", | |
"hash_cont_tokens": "d6a0c7bb4b4c5331" | |
}, | |
"truncated": 0, | |
"non_truncated": 651, | |
"padded": 2590, | |
"non_padded": 14, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:logiqa-zh|0": { | |
"hashes": { | |
"hash_examples": "620d6888b6012ea5", | |
"hash_full_prompts": "6240c31f1dc378f1", | |
"hash_input_tokens": "c5409492635fd368", | |
"hash_cont_tokens": "104492bcec28b979" | |
}, | |
"truncated": 0, | |
"non_truncated": 651, | |
"padded": 2561, | |
"non_padded": 43, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:lsat-ar|0": { | |
"hashes": { | |
"hash_examples": "627c8f5ccd5da209", | |
"hash_full_prompts": "bf740466dbecb79b", | |
"hash_input_tokens": "305a10135d896aac", | |
"hash_cont_tokens": "91b11ac7df4e566b" | |
}, | |
"truncated": 0, | |
"non_truncated": 230, | |
"padded": 1138, | |
"non_padded": 12, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:lsat-lr|0": { | |
"hashes": { | |
"hash_examples": "794641c86de172f5", | |
"hash_full_prompts": "73141717013969a1", | |
"hash_input_tokens": "3ad964f7deb5f145", | |
"hash_cont_tokens": "c0971bd7c68f42f5" | |
}, | |
"truncated": 0, | |
"non_truncated": 510, | |
"padded": 2526, | |
"non_padded": 24, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:lsat-rc|0": { | |
"hashes": { | |
"hash_examples": "35981ed917ea01cf", | |
"hash_full_prompts": "3eda7a53b0762ee9", | |
"hash_input_tokens": "0a6fc04df66f8d4d", | |
"hash_cont_tokens": "904259a2682f51a0" | |
}, | |
"truncated": 0, | |
"non_truncated": 269, | |
"padded": 1345, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:sat-en|0": { | |
"hashes": { | |
"hash_examples": "041c39c646536a1e", | |
"hash_full_prompts": "ca20876d35375196", | |
"hash_input_tokens": "f323eb2313f835e8", | |
"hash_cont_tokens": "75a67f2f9f9272ce" | |
}, | |
"truncated": 0, | |
"non_truncated": 206, | |
"padded": 821, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:sat-en-without-passage|0": { | |
"hashes": { | |
"hash_examples": "e4d9284367dff68f", | |
"hash_full_prompts": "1ef1f84d5ecc98da", | |
"hash_input_tokens": "66ef93779458e842", | |
"hash_cont_tokens": "75a67f2f9f9272ce" | |
}, | |
"truncated": 0, | |
"non_truncated": 206, | |
"padded": 812, | |
"non_padded": 9, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:sat-math|0": { | |
"hashes": { | |
"hash_examples": "01db7291603fc1a0", | |
"hash_full_prompts": "92e2efa05050a7cb", | |
"hash_input_tokens": "4860b569680b9912", | |
"hash_cont_tokens": "d890085c34d380ec" | |
}, | |
"truncated": 0, | |
"non_truncated": 220, | |
"padded": 873, | |
"non_padded": 7, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
} | |
}, | |
"summary_general": { | |
"hashes": { | |
"hash_examples": "da3af66181f18ddf", | |
"hash_full_prompts": "56637e9243089333", | |
"hash_input_tokens": "f0430454cf0301f4", | |
"hash_cont_tokens": "5db0cf23fcf8ab8d" | |
}, | |
"truncated": 0, | |
"non_truncated": 5151, | |
"padded": 21716, | |
"non_padded": 148, | |
"num_truncated_few_shots": 0 | |
} | |
} |