open-r1-eval-leaderboard
/
eval_results
/Qwen
/Qwen1.5-0.5B-Chat
/main
/agieval
/results_2024-04-21T19-27-00.757285.json
{ | |
"config_general": { | |
"lighteval_sha": "?", | |
"num_fewshot_seeds": 1, | |
"override_batch_size": 4, | |
"max_samples": null, | |
"job_id": "", | |
"start_time": 4162389.286794406, | |
"end_time": 4162510.151446024, | |
"total_evaluation_time_secondes": "120.86465161805972", | |
"model_name": "Qwen/Qwen1.5-0.5B-Chat", | |
"model_sha": "f82bd3692de0283f4a4b31e06d164dd8467fb52e", | |
"model_dtype": "torch.bfloat16", | |
"model_size": "1.05 GB", | |
"config": null | |
}, | |
"results": { | |
"lighteval|agieval:aqua-rat|0": { | |
"acc": 0.2047244094488189, | |
"acc_stderr": 0.025367833544738528, | |
"acc_norm": 0.1968503937007874, | |
"acc_norm_stderr": 0.024998048635355926 | |
}, | |
"lighteval|agieval:gaokao-biology|0": { | |
"acc": 0.2571428571428571, | |
"acc_stderr": 0.030231990420749873, | |
"acc_norm": 0.3238095238095238, | |
"acc_norm_stderr": 0.03236727895404352 | |
}, | |
"lighteval|agieval:gaokao-chemistry|0": { | |
"acc": 0.2753623188405797, | |
"acc_stderr": 0.03112283151905817, | |
"acc_norm": 0.22705314009661837, | |
"acc_norm_stderr": 0.029188042144307678 | |
}, | |
"lighteval|agieval:gaokao-chinese|0": { | |
"acc": 0.34959349593495936, | |
"acc_stderr": 0.030464290472947703, | |
"acc_norm": 0.37398373983739835, | |
"acc_norm_stderr": 0.030912642032790927 | |
}, | |
"lighteval|agieval:gaokao-english|0": { | |
"acc": 0.35294117647058826, | |
"acc_stderr": 0.027363593284684944, | |
"acc_norm": 0.30718954248366015, | |
"acc_norm_stderr": 0.026415601914388995 | |
}, | |
"lighteval|agieval:gaokao-geography|0": { | |
"acc": 0.2914572864321608, | |
"acc_stderr": 0.03229519279811605, | |
"acc_norm": 0.27638190954773867, | |
"acc_norm_stderr": 0.031781685026817864 | |
}, | |
"lighteval|agieval:gaokao-history|0": { | |
"acc": 0.2297872340425532, | |
"acc_stderr": 0.02750175294441242, | |
"acc_norm": 0.23404255319148937, | |
"acc_norm_stderr": 0.02767845257821239 | |
}, | |
"lighteval|agieval:gaokao-mathqa|0": { | |
"acc": 0.25925925925925924, | |
"acc_stderr": 0.02342427896421016, | |
"acc_norm": 0.2849002849002849, | |
"acc_norm_stderr": 0.02412657767241174 | |
}, | |
"lighteval|agieval:gaokao-physics|0": { | |
"acc": 0.29, | |
"acc_stderr": 0.03216633903375033, | |
"acc_norm": 0.29, | |
"acc_norm_stderr": 0.03216633903375033 | |
}, | |
"lighteval|agieval:logiqa-en|0": { | |
"acc": 0.24270353302611367, | |
"acc_stderr": 0.01681567620647953, | |
"acc_norm": 0.3010752688172043, | |
"acc_norm_stderr": 0.01799268874266824 | |
}, | |
"lighteval|agieval:logiqa-zh|0": { | |
"acc": 0.28110599078341014, | |
"acc_stderr": 0.017632374626459998, | |
"acc_norm": 0.3118279569892473, | |
"acc_norm_stderr": 0.018169767037546324 | |
}, | |
"lighteval|agieval:lsat-ar|0": { | |
"acc": 0.19130434782608696, | |
"acc_stderr": 0.025991852462828483, | |
"acc_norm": 0.1608695652173913, | |
"acc_norm_stderr": 0.024279175777554177 | |
}, | |
"lighteval|agieval:lsat-lr|0": { | |
"acc": 0.21568627450980393, | |
"acc_stderr": 0.018230445049830818, | |
"acc_norm": 0.21764705882352942, | |
"acc_norm_stderr": 0.018290217500245287 | |
}, | |
"lighteval|agieval:lsat-rc|0": { | |
"acc": 0.23048327137546468, | |
"acc_stderr": 0.02572535286092568, | |
"acc_norm": 0.20074349442379183, | |
"acc_norm_stderr": 0.024467885125224513 | |
}, | |
"lighteval|agieval:sat-en|0": { | |
"acc": 0.25728155339805825, | |
"acc_stderr": 0.030530892446123822, | |
"acc_norm": 0.23300970873786409, | |
"acc_norm_stderr": 0.029526026912337834 | |
}, | |
"lighteval|agieval:sat-en-without-passage|0": { | |
"acc": 0.21844660194174756, | |
"acc_stderr": 0.028858585740397263, | |
"acc_norm": 0.20388349514563106, | |
"acc_norm_stderr": 0.028138595623668782 | |
}, | |
"lighteval|agieval:sat-math|0": { | |
"acc": 0.2909090909090909, | |
"acc_stderr": 0.03069075327671109, | |
"acc_norm": 0.22272727272727272, | |
"acc_norm_stderr": 0.02811585901870265 | |
}, | |
"lighteval|agieval:_average|0": { | |
"acc": 0.26106992360832665, | |
"acc_stderr": 0.02673023739131911, | |
"acc_norm": 0.2568232299087902, | |
"acc_norm_stderr": 0.026389110807648657 | |
}, | |
"all": { | |
"acc": 0.26106992360832665, | |
"acc_stderr": 0.02673023739131911, | |
"acc_norm": 0.2568232299087902, | |
"acc_norm_stderr": 0.026389110807648657 | |
} | |
}, | |
"versions": { | |
"lighteval|agieval:aqua-rat|0": 0, | |
"lighteval|agieval:gaokao-biology|0": 0, | |
"lighteval|agieval:gaokao-chemistry|0": 0, | |
"lighteval|agieval:gaokao-chinese|0": 0, | |
"lighteval|agieval:gaokao-english|0": 0, | |
"lighteval|agieval:gaokao-geography|0": 0, | |
"lighteval|agieval:gaokao-history|0": 0, | |
"lighteval|agieval:gaokao-mathqa|0": 0, | |
"lighteval|agieval:gaokao-physics|0": 0, | |
"lighteval|agieval:logiqa-en|0": 0, | |
"lighteval|agieval:logiqa-zh|0": 0, | |
"lighteval|agieval:lsat-ar|0": 0, | |
"lighteval|agieval:lsat-lr|0": 0, | |
"lighteval|agieval:lsat-rc|0": 0, | |
"lighteval|agieval:sat-en|0": 0, | |
"lighteval|agieval:sat-en-without-passage|0": 0, | |
"lighteval|agieval:sat-math|0": 0 | |
}, | |
"config_tasks": { | |
"lighteval|agieval:aqua-rat": { | |
"name": "agieval:aqua-rat", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-aqua-rat", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 254, | |
"effective_num_docs": 254, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-biology": { | |
"name": "agieval:gaokao-biology", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-biology", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 210, | |
"effective_num_docs": 210, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-chemistry": { | |
"name": "agieval:gaokao-chemistry", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-chemistry", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 207, | |
"effective_num_docs": 207, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-chinese": { | |
"name": "agieval:gaokao-chinese", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-chinese", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 246, | |
"effective_num_docs": 246, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-english": { | |
"name": "agieval:gaokao-english", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-english", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 306, | |
"effective_num_docs": 306, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-geography": { | |
"name": "agieval:gaokao-geography", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-geography", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 199, | |
"effective_num_docs": 199, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-history": { | |
"name": "agieval:gaokao-history", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-history", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 235, | |
"effective_num_docs": 235, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-mathqa": { | |
"name": "agieval:gaokao-mathqa", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-mathqa", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 351, | |
"effective_num_docs": 351, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-physics": { | |
"name": "agieval:gaokao-physics", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-physics", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 200, | |
"effective_num_docs": 200, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:logiqa-en": { | |
"name": "agieval:logiqa-en", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-logiqa-en", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 651, | |
"effective_num_docs": 651, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:logiqa-zh": { | |
"name": "agieval:logiqa-zh", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-logiqa-zh", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 651, | |
"effective_num_docs": 651, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:lsat-ar": { | |
"name": "agieval:lsat-ar", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-lsat-ar", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 230, | |
"effective_num_docs": 230, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:lsat-lr": { | |
"name": "agieval:lsat-lr", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-lsat-lr", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 510, | |
"effective_num_docs": 510, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:lsat-rc": { | |
"name": "agieval:lsat-rc", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-lsat-rc", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 269, | |
"effective_num_docs": 269, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:sat-en": { | |
"name": "agieval:sat-en", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-sat-en", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 206, | |
"effective_num_docs": 206, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:sat-en-without-passage": { | |
"name": "agieval:sat-en-without-passage", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-sat-en-without-passage", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 206, | |
"effective_num_docs": 206, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:sat-math": { | |
"name": "agieval:sat-math", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-sat-math", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 220, | |
"effective_num_docs": 220, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
} | |
}, | |
"summary_tasks": { | |
"lighteval|agieval:aqua-rat|0": { | |
"hashes": { | |
"hash_examples": "f09607f69e5b7525", | |
"hash_full_prompts": "ab1c49d62ea014ca", | |
"hash_input_tokens": "143221c522438063", | |
"hash_cont_tokens": "b57d655fc6823b1d" | |
}, | |
"truncated": 0, | |
"non_truncated": 254, | |
"padded": 1265, | |
"non_padded": 5, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-biology|0": { | |
"hashes": { | |
"hash_examples": "f262eaf4a72db963", | |
"hash_full_prompts": "21fe3fd322fce0c3", | |
"hash_input_tokens": "132d4eba908a4ed4", | |
"hash_cont_tokens": "05fa3ff0a7509d37" | |
}, | |
"truncated": 0, | |
"non_truncated": 210, | |
"padded": 830, | |
"non_padded": 10, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-chemistry|0": { | |
"hashes": { | |
"hash_examples": "47f2e649f58d9da5", | |
"hash_full_prompts": "65eb1f54d409142f", | |
"hash_input_tokens": "d3217238b8a8c275", | |
"hash_cont_tokens": "f4fa0920de20952b" | |
}, | |
"truncated": 0, | |
"non_truncated": 207, | |
"padded": 829, | |
"non_padded": 2, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-chinese|0": { | |
"hashes": { | |
"hash_examples": "1010b21fde4726ab", | |
"hash_full_prompts": "0261d102d2b4213e", | |
"hash_input_tokens": "93fd34dfdd71fe30", | |
"hash_cont_tokens": "4542f0eface8c9c6" | |
}, | |
"truncated": 0, | |
"non_truncated": 246, | |
"padded": 983, | |
"non_padded": 1, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-english|0": { | |
"hashes": { | |
"hash_examples": "4864e492a350ae93", | |
"hash_full_prompts": "5378c70f856b0327", | |
"hash_input_tokens": "5fd1877f71786ff3", | |
"hash_cont_tokens": "c645b984d571ad8d" | |
}, | |
"truncated": 0, | |
"non_truncated": 306, | |
"padded": 1224, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-geography|0": { | |
"hashes": { | |
"hash_examples": "ec3a021e37650e7d", | |
"hash_full_prompts": "67b040bcf10390ab", | |
"hash_input_tokens": "4ddac45940e7a829", | |
"hash_cont_tokens": "121f5450390bfe43" | |
}, | |
"truncated": 0, | |
"non_truncated": 199, | |
"padded": 796, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-history|0": { | |
"hashes": { | |
"hash_examples": "b3fad1596f1ae1f9", | |
"hash_full_prompts": "147e1ca1a5d92e55", | |
"hash_input_tokens": "ebc1fd047d9f3d12", | |
"hash_cont_tokens": "102b33510063081d" | |
}, | |
"truncated": 0, | |
"non_truncated": 235, | |
"padded": 934, | |
"non_padded": 6, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-mathqa|0": { | |
"hashes": { | |
"hash_examples": "1d1088556861b0b0", | |
"hash_full_prompts": "d6f785498f2ec712", | |
"hash_input_tokens": "b7e76efbbc244922", | |
"hash_cont_tokens": "564a0e08facc7404" | |
}, | |
"truncated": 0, | |
"non_truncated": 351, | |
"padded": 1392, | |
"non_padded": 12, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-physics|0": { | |
"hashes": { | |
"hash_examples": "eb05f035c7bfca2f", | |
"hash_full_prompts": "eb323255dc83409c", | |
"hash_input_tokens": "eea8422b472c2cac", | |
"hash_cont_tokens": "387c2f1b5a18b09e" | |
}, | |
"truncated": 0, | |
"non_truncated": 200, | |
"padded": 797, | |
"non_padded": 3, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:logiqa-en|0": { | |
"hashes": { | |
"hash_examples": "0a688a45f69c21e0", | |
"hash_full_prompts": "0a29985a5d76d442", | |
"hash_input_tokens": "8e43645b0dd64706", | |
"hash_cont_tokens": "51d19cd9cac1bc5b" | |
}, | |
"truncated": 0, | |
"non_truncated": 651, | |
"padded": 2590, | |
"non_padded": 14, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:logiqa-zh|0": { | |
"hashes": { | |
"hash_examples": "620d6888b6012ea5", | |
"hash_full_prompts": "6240c31f1dc378f1", | |
"hash_input_tokens": "c5409492635fd368", | |
"hash_cont_tokens": "fcfe9810b1405591" | |
}, | |
"truncated": 0, | |
"non_truncated": 651, | |
"padded": 2561, | |
"non_padded": 43, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:lsat-ar|0": { | |
"hashes": { | |
"hash_examples": "627c8f5ccd5da209", | |
"hash_full_prompts": "bf740466dbecb79b", | |
"hash_input_tokens": "305a10135d896aac", | |
"hash_cont_tokens": "63bf2d451944ea4d" | |
}, | |
"truncated": 0, | |
"non_truncated": 230, | |
"padded": 1138, | |
"non_padded": 12, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:lsat-lr|0": { | |
"hashes": { | |
"hash_examples": "794641c86de172f5", | |
"hash_full_prompts": "73141717013969a1", | |
"hash_input_tokens": "3ad964f7deb5f145", | |
"hash_cont_tokens": "11408023e27186ba" | |
}, | |
"truncated": 0, | |
"non_truncated": 510, | |
"padded": 2526, | |
"non_padded": 24, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:lsat-rc|0": { | |
"hashes": { | |
"hash_examples": "35981ed917ea01cf", | |
"hash_full_prompts": "3eda7a53b0762ee9", | |
"hash_input_tokens": "0a6fc04df66f8d4d", | |
"hash_cont_tokens": "31095e24bc2090ce" | |
}, | |
"truncated": 0, | |
"non_truncated": 269, | |
"padded": 1345, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:sat-en|0": { | |
"hashes": { | |
"hash_examples": "041c39c646536a1e", | |
"hash_full_prompts": "ca20876d35375196", | |
"hash_input_tokens": "f323eb2313f835e8", | |
"hash_cont_tokens": "04c12d6062a7f14f" | |
}, | |
"truncated": 0, | |
"non_truncated": 206, | |
"padded": 821, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:sat-en-without-passage|0": { | |
"hashes": { | |
"hash_examples": "e4d9284367dff68f", | |
"hash_full_prompts": "1ef1f84d5ecc98da", | |
"hash_input_tokens": "66ef93779458e842", | |
"hash_cont_tokens": "021d545663c11402" | |
}, | |
"truncated": 0, | |
"non_truncated": 206, | |
"padded": 812, | |
"non_padded": 9, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:sat-math|0": { | |
"hashes": { | |
"hash_examples": "01db7291603fc1a0", | |
"hash_full_prompts": "92e2efa05050a7cb", | |
"hash_input_tokens": "4860b569680b9912", | |
"hash_cont_tokens": "ca46cf092f95eea4" | |
}, | |
"truncated": 0, | |
"non_truncated": 220, | |
"padded": 873, | |
"non_padded": 7, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
} | |
}, | |
"summary_general": { | |
"hashes": { | |
"hash_examples": "da3af66181f18ddf", | |
"hash_full_prompts": "56637e9243089333", | |
"hash_input_tokens": "f0430454cf0301f4", | |
"hash_cont_tokens": "bde521647a33bf84" | |
}, | |
"truncated": 0, | |
"non_truncated": 5151, | |
"padded": 21716, | |
"non_padded": 148, | |
"num_truncated_few_shots": 0 | |
} | |
} |