open-r1-eval-leaderboard
/
eval_results
/HuggingFaceH4
/zephyr-7b-alpha
/main
/agieval
/results_2024-03-28T16-41-57.836994.json
{ | |
"config_general": { | |
"lighteval_sha": "?", | |
"num_fewshot_seeds": 1, | |
"override_batch_size": 1, | |
"max_samples": null, | |
"job_id": "", | |
"start_time": 678461.123273277, | |
"end_time": 678732.855398687, | |
"total_evaluation_time_secondes": "271.7321254099952", | |
"model_name": "HuggingFaceH4/zephyr-7b-alpha", | |
"model_sha": "2ce2d025864af849b3e5029e2ec9d568eeda892d", | |
"model_dtype": "torch.bfloat16", | |
"model_size": "13.99 GB", | |
"config": null | |
}, | |
"results": { | |
"lighteval|agieval:aqua-rat|0": { | |
"acc": 0.23622047244094488, | |
"acc_stderr": 0.026704380837086997, | |
"acc_norm": 0.23622047244094488, | |
"acc_norm_stderr": 0.02670438083708699 | |
}, | |
"lighteval|agieval:gaokao-biology|0": { | |
"acc": 0.34285714285714286, | |
"acc_stderr": 0.032833210696431546, | |
"acc_norm": 0.3619047619047619, | |
"acc_norm_stderr": 0.03324043951593505 | |
}, | |
"lighteval|agieval:gaokao-chemistry|0": { | |
"acc": 0.23671497584541062, | |
"acc_stderr": 0.029615742669460064, | |
"acc_norm": 0.28019323671497587, | |
"acc_norm_stderr": 0.031289827964521094 | |
}, | |
"lighteval|agieval:gaokao-chinese|0": { | |
"acc": 0.3617886178861789, | |
"acc_stderr": 0.030699173282373184, | |
"acc_norm": 0.35772357723577236, | |
"acc_norm_stderr": 0.030623281761072298 | |
}, | |
"lighteval|agieval:gaokao-english|0": { | |
"acc": 0.6993464052287581, | |
"acc_stderr": 0.026256053835718968, | |
"acc_norm": 0.6666666666666666, | |
"acc_norm_stderr": 0.026992544339297243 | |
}, | |
"lighteval|agieval:gaokao-geography|0": { | |
"acc": 0.3065326633165829, | |
"acc_stderr": 0.03276565009957228, | |
"acc_norm": 0.33668341708542715, | |
"acc_norm_stderr": 0.03358448518553335 | |
}, | |
"lighteval|agieval:gaokao-history|0": { | |
"acc": 0.3148936170212766, | |
"acc_stderr": 0.03036358219723817, | |
"acc_norm": 0.28085106382978725, | |
"acc_norm_stderr": 0.02937917046412482 | |
}, | |
"lighteval|agieval:gaokao-mathqa|0": { | |
"acc": 0.24216524216524216, | |
"acc_stderr": 0.02289861116513968, | |
"acc_norm": 0.245014245014245, | |
"acc_norm_stderr": 0.02298957930108734 | |
}, | |
"lighteval|agieval:gaokao-physics|0": { | |
"acc": 0.3, | |
"acc_stderr": 0.03248501780682212, | |
"acc_norm": 0.36, | |
"acc_norm_stderr": 0.03402629784040015 | |
}, | |
"lighteval|agieval:logiqa-en|0": { | |
"acc": 0.36098310291858676, | |
"acc_stderr": 0.018838352954538687, | |
"acc_norm": 0.3548387096774194, | |
"acc_norm_stderr": 0.01876691851234651 | |
}, | |
"lighteval|agieval:logiqa-zh|0": { | |
"acc": 0.33640552995391704, | |
"acc_stderr": 0.018532169509358595, | |
"acc_norm": 0.3317972350230415, | |
"acc_norm_stderr": 0.0184685941264168 | |
}, | |
"lighteval|agieval:lsat-ar|0": { | |
"acc": 0.28695652173913044, | |
"acc_stderr": 0.029891541673635454, | |
"acc_norm": 0.2217391304347826, | |
"acc_norm_stderr": 0.027451496604058923 | |
}, | |
"lighteval|agieval:lsat-lr|0": { | |
"acc": 0.4, | |
"acc_stderr": 0.021714344801018554, | |
"acc_norm": 0.37254901960784315, | |
"acc_norm_stderr": 0.021430027204976793 | |
}, | |
"lighteval|agieval:lsat-rc|0": { | |
"acc": 0.5241635687732342, | |
"acc_stderr": 0.03050667421128308, | |
"acc_norm": 0.45353159851301117, | |
"acc_norm_stderr": 0.030410174042754434 | |
}, | |
"lighteval|agieval:sat-en|0": { | |
"acc": 0.6699029126213593, | |
"acc_stderr": 0.03284353151466849, | |
"acc_norm": 0.6407766990291263, | |
"acc_norm_stderr": 0.0335087845060878 | |
}, | |
"lighteval|agieval:sat-en-without-passage|0": { | |
"acc": 0.39805825242718446, | |
"acc_stderr": 0.03418799390613399, | |
"acc_norm": 0.38349514563106796, | |
"acc_norm_stderr": 0.033960279445866395 | |
}, | |
"lighteval|agieval:sat-math|0": { | |
"acc": 0.37272727272727274, | |
"acc_stderr": 0.0326739568483895, | |
"acc_norm": 0.33636363636363636, | |
"acc_norm_stderr": 0.03192622349349311 | |
}, | |
"lighteval|agieval:_average|0": { | |
"acc": 0.3758656645836601, | |
"acc_stderr": 0.028459411059345255, | |
"acc_norm": 0.3659028597160299, | |
"acc_norm_stderr": 0.028514853243827007 | |
}, | |
"all": { | |
"acc": 0.3758656645836601, | |
"acc_stderr": 0.028459411059345255, | |
"acc_norm": 0.3659028597160299, | |
"acc_norm_stderr": 0.028514853243827007 | |
} | |
}, | |
"versions": { | |
"lighteval|agieval:aqua-rat|0": 0, | |
"lighteval|agieval:gaokao-biology|0": 0, | |
"lighteval|agieval:gaokao-chemistry|0": 0, | |
"lighteval|agieval:gaokao-chinese|0": 0, | |
"lighteval|agieval:gaokao-english|0": 0, | |
"lighteval|agieval:gaokao-geography|0": 0, | |
"lighteval|agieval:gaokao-history|0": 0, | |
"lighteval|agieval:gaokao-mathqa|0": 0, | |
"lighteval|agieval:gaokao-physics|0": 0, | |
"lighteval|agieval:logiqa-en|0": 0, | |
"lighteval|agieval:logiqa-zh|0": 0, | |
"lighteval|agieval:lsat-ar|0": 0, | |
"lighteval|agieval:lsat-lr|0": 0, | |
"lighteval|agieval:lsat-rc|0": 0, | |
"lighteval|agieval:sat-en|0": 0, | |
"lighteval|agieval:sat-en-without-passage|0": 0, | |
"lighteval|agieval:sat-math|0": 0 | |
}, | |
"config_tasks": { | |
"lighteval|agieval:aqua-rat": { | |
"name": "agieval:aqua-rat", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-aqua-rat", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 254, | |
"effective_num_docs": 254, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-biology": { | |
"name": "agieval:gaokao-biology", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-biology", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 210, | |
"effective_num_docs": 210, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-chemistry": { | |
"name": "agieval:gaokao-chemistry", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-chemistry", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 207, | |
"effective_num_docs": 207, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-chinese": { | |
"name": "agieval:gaokao-chinese", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-chinese", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 246, | |
"effective_num_docs": 246, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-english": { | |
"name": "agieval:gaokao-english", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-english", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 306, | |
"effective_num_docs": 306, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-geography": { | |
"name": "agieval:gaokao-geography", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-geography", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 199, | |
"effective_num_docs": 199, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-history": { | |
"name": "agieval:gaokao-history", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-history", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 235, | |
"effective_num_docs": 235, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-mathqa": { | |
"name": "agieval:gaokao-mathqa", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-mathqa", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 351, | |
"effective_num_docs": 351, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:gaokao-physics": { | |
"name": "agieval:gaokao-physics", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-gaokao-physics", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 200, | |
"effective_num_docs": 200, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:logiqa-en": { | |
"name": "agieval:logiqa-en", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-logiqa-en", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 651, | |
"effective_num_docs": 651, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:logiqa-zh": { | |
"name": "agieval:logiqa-zh", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-logiqa-zh", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 651, | |
"effective_num_docs": 651, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:lsat-ar": { | |
"name": "agieval:lsat-ar", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-lsat-ar", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 230, | |
"effective_num_docs": 230, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:lsat-lr": { | |
"name": "agieval:lsat-lr", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-lsat-lr", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 510, | |
"effective_num_docs": 510, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:lsat-rc": { | |
"name": "agieval:lsat-rc", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-lsat-rc", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 269, | |
"effective_num_docs": 269, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:sat-en": { | |
"name": "agieval:sat-en", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-sat-en", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 206, | |
"effective_num_docs": 206, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:sat-en-without-passage": { | |
"name": "agieval:sat-en-without-passage", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-sat-en-without-passage", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 206, | |
"effective_num_docs": 206, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|agieval:sat-math": { | |
"name": "agieval:sat-math", | |
"prompt_function": "agieval", | |
"hf_repo": "dmayhem93/agieval-sat-math", | |
"hf_subset": "default", | |
"metric": [ | |
"loglikelihood_acc", | |
"loglikelihood_acc_norm_nospace" | |
], | |
"hf_avail_splits": [ | |
"test" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": null, | |
"few_shots_select": "random_sampling", | |
"generation_size": 1, | |
"stop_sequence": null, | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 220, | |
"effective_num_docs": 220, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
} | |
}, | |
"summary_tasks": { | |
"lighteval|agieval:aqua-rat|0": { | |
"hashes": { | |
"hash_examples": "f09607f69e5b7525", | |
"hash_full_prompts": "8b913655a6fea4ab", | |
"hash_input_tokens": "293a61e5163aa27e", | |
"hash_cont_tokens": "a12c4ac8996ba11d" | |
}, | |
"truncated": 0, | |
"non_truncated": 254, | |
"padded": 1270, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-biology|0": { | |
"hashes": { | |
"hash_examples": "f262eaf4a72db963", | |
"hash_full_prompts": "c7078ace868f7ee8", | |
"hash_input_tokens": "4abbc3b98bca27c3", | |
"hash_cont_tokens": "22b786cf7aa6d1a9" | |
}, | |
"truncated": 0, | |
"non_truncated": 210, | |
"padded": 840, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-chemistry|0": { | |
"hashes": { | |
"hash_examples": "47f2e649f58d9da5", | |
"hash_full_prompts": "bd066d6d8c807f39", | |
"hash_input_tokens": "3d9728247bf04ac3", | |
"hash_cont_tokens": "318562bcb4103fc4" | |
}, | |
"truncated": 0, | |
"non_truncated": 207, | |
"padded": 831, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-chinese|0": { | |
"hashes": { | |
"hash_examples": "1010b21fde4726ab", | |
"hash_full_prompts": "3f53e9dd34c43d52", | |
"hash_input_tokens": "f2a1d4c848527f86", | |
"hash_cont_tokens": "7b177add04591cdb" | |
}, | |
"truncated": 0, | |
"non_truncated": 246, | |
"padded": 982, | |
"non_padded": 2, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-english|0": { | |
"hashes": { | |
"hash_examples": "4864e492a350ae93", | |
"hash_full_prompts": "59104cb8623f69e5", | |
"hash_input_tokens": "ff82bcaabb6cde43", | |
"hash_cont_tokens": "c9ca0addab2a9327" | |
}, | |
"truncated": 0, | |
"non_truncated": 306, | |
"padded": 1224, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-geography|0": { | |
"hashes": { | |
"hash_examples": "ec3a021e37650e7d", | |
"hash_full_prompts": "d2456e0377df1973", | |
"hash_input_tokens": "b6593d42d60f9e65", | |
"hash_cont_tokens": "e1bc87e81807da78" | |
}, | |
"truncated": 0, | |
"non_truncated": 199, | |
"padded": 796, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-history|0": { | |
"hashes": { | |
"hash_examples": "b3fad1596f1ae1f9", | |
"hash_full_prompts": "faea8f291d9a0cd5", | |
"hash_input_tokens": "ffa388f05b4bfadf", | |
"hash_cont_tokens": "b3c6c60f59b08db4" | |
}, | |
"truncated": 0, | |
"non_truncated": 235, | |
"padded": 940, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-mathqa|0": { | |
"hashes": { | |
"hash_examples": "1d1088556861b0b0", | |
"hash_full_prompts": "de899bfeaaa61154", | |
"hash_input_tokens": "1e826e8ae60c9cf4", | |
"hash_cont_tokens": "5d69ebf8391bf298" | |
}, | |
"truncated": 0, | |
"non_truncated": 351, | |
"padded": 1404, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:gaokao-physics|0": { | |
"hashes": { | |
"hash_examples": "eb05f035c7bfca2f", | |
"hash_full_prompts": "08008e0300283edc", | |
"hash_input_tokens": "0a4cddbeea3c31fa", | |
"hash_cont_tokens": "93b4c52fa838ace2" | |
}, | |
"truncated": 0, | |
"non_truncated": 200, | |
"padded": 800, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:logiqa-en|0": { | |
"hashes": { | |
"hash_examples": "0a688a45f69c21e0", | |
"hash_full_prompts": "3405fd262d4b2d28", | |
"hash_input_tokens": "8672a68d080ba1fb", | |
"hash_cont_tokens": "2624c1243afac3f2" | |
}, | |
"truncated": 0, | |
"non_truncated": 651, | |
"padded": 2604, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:logiqa-zh|0": { | |
"hashes": { | |
"hash_examples": "620d6888b6012ea5", | |
"hash_full_prompts": "ac19dc4eaa56f5e0", | |
"hash_input_tokens": "24c3bd34df395ee2", | |
"hash_cont_tokens": "725ca2b921b6f8fe" | |
}, | |
"truncated": 0, | |
"non_truncated": 651, | |
"padded": 2603, | |
"non_padded": 1, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:lsat-ar|0": { | |
"hashes": { | |
"hash_examples": "627c8f5ccd5da209", | |
"hash_full_prompts": "9aed992c4bfa8dd7", | |
"hash_input_tokens": "042173fcbbb85776", | |
"hash_cont_tokens": "23c097e1d431f2b8" | |
}, | |
"truncated": 0, | |
"non_truncated": 230, | |
"padded": 1137, | |
"non_padded": 13, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:lsat-lr|0": { | |
"hashes": { | |
"hash_examples": "794641c86de172f5", | |
"hash_full_prompts": "6a36e90325996129", | |
"hash_input_tokens": "58c268b8b0f3c5c4", | |
"hash_cont_tokens": "b555f4319746d815" | |
}, | |
"truncated": 0, | |
"non_truncated": 510, | |
"padded": 2532, | |
"non_padded": 18, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:lsat-rc|0": { | |
"hashes": { | |
"hash_examples": "35981ed917ea01cf", | |
"hash_full_prompts": "15f0f342f9572c41", | |
"hash_input_tokens": "1eaa7331cf1a555e", | |
"hash_cont_tokens": "8c1c4fc8c9cabd97" | |
}, | |
"truncated": 0, | |
"non_truncated": 269, | |
"padded": 1345, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:sat-en|0": { | |
"hashes": { | |
"hash_examples": "041c39c646536a1e", | |
"hash_full_prompts": "163217fd603b9352", | |
"hash_input_tokens": "e1e9417c28fa0db0", | |
"hash_cont_tokens": "4837f17aae6c95e0" | |
}, | |
"truncated": 0, | |
"non_truncated": 206, | |
"padded": 821, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:sat-en-without-passage|0": { | |
"hashes": { | |
"hash_examples": "e4d9284367dff68f", | |
"hash_full_prompts": "bdd4c7065b87de8a", | |
"hash_input_tokens": "70bcab139ba874e5", | |
"hash_cont_tokens": "4837f17aae6c95e0" | |
}, | |
"truncated": 0, | |
"non_truncated": 206, | |
"padded": 817, | |
"non_padded": 4, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|agieval:sat-math|0": { | |
"hashes": { | |
"hash_examples": "01db7291603fc1a0", | |
"hash_full_prompts": "63ca65b2f0baebb5", | |
"hash_input_tokens": "e242b0a44e256fbb", | |
"hash_cont_tokens": "d959ef83452da9fe" | |
}, | |
"truncated": 0, | |
"non_truncated": 220, | |
"padded": 877, | |
"non_padded": 3, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
} | |
}, | |
"summary_general": { | |
"hashes": { | |
"hash_examples": "da3af66181f18ddf", | |
"hash_full_prompts": "f7c298d03686fa0e", | |
"hash_input_tokens": "84e02a40c0a714da", | |
"hash_cont_tokens": "b3bace8c3199f6d8" | |
}, | |
"truncated": 0, | |
"non_truncated": 5151, | |
"padded": 21823, | |
"non_padded": 41, | |
"num_truncated_few_shots": 0 | |
} | |
} |