open-r1-eval-leaderboard
/
eval_results
/NousResearch
/Nous-Hermes-2-Mixtral-8x7B-DPO
/main
/bbh
/results_2024-03-28T16-33-31.664648.json
{ | |
"config_general": { | |
"lighteval_sha": "?", | |
"num_fewshot_seeds": 1, | |
"override_batch_size": 1, | |
"max_samples": null, | |
"job_id": "", | |
"start_time": 1861136.960443986, | |
"end_time": 1861873.643255575, | |
"total_evaluation_time_secondes": "736.6828115889803", | |
"model_name": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", | |
"model_sha": "707b6e3251d114cc3d326e6a2bcff1449110aedf", | |
"model_dtype": "torch.bfloat16", | |
"model_size": "87.49 GB", | |
"config": null | |
}, | |
"results": { | |
"lighteval|bigbench:causal_judgment|0": { | |
"acc": 0.6052631578947368, | |
"acc_stderr": 0.03555453874463932 | |
}, | |
"lighteval|bigbench:date_understanding|0": { | |
"acc": 0.33875338753387535, | |
"acc_stderr": 0.024671732304675786 | |
}, | |
"lighteval|bigbench:disambiguation_qa|0": { | |
"acc": 0.5658914728682171, | |
"acc_stderr": 0.030917129965514243 | |
}, | |
"lighteval|bigbench:geometric_shapes|0": { | |
"acc": 0.21666666666666667, | |
"acc_stderr": 0.02174313340347186 | |
}, | |
"lighteval|bigbench:logical_deduction_five_objects|0": { | |
"acc": 0.448, | |
"acc_stderr": 0.02226169729227014 | |
}, | |
"lighteval|bigbench:logical_deduction_seven_objects|0": { | |
"acc": 0.4042857142857143, | |
"acc_stderr": 0.018561993547286486 | |
}, | |
"lighteval|bigbench:logical_deduction_three_objects|0": { | |
"acc": 0.51, | |
"acc_stderr": 0.02890996287056168 | |
}, | |
"lighteval|bigbench:movie_recommendation|0": { | |
"acc": 0.766, | |
"acc_stderr": 0.01895274156489368 | |
}, | |
"lighteval|bigbench:navigate|0": { | |
"acc": 0.5, | |
"acc_stderr": 0.015819299929208316 | |
}, | |
"lighteval|bigbench:reasoning_about_colored_objects|0": { | |
"acc": 0.5845, | |
"acc_stderr": 0.0110222783629408 | |
}, | |
"lighteval|bigbench:ruin_names|0": { | |
"acc": 0.6607142857142857, | |
"acc_stderr": 0.02239421657358994 | |
}, | |
"lighteval|bigbench:salient_translation_error_detection|0": { | |
"acc": 0.48096192384769537, | |
"acc_stderr": 0.015823675862764886 | |
}, | |
"lighteval|bigbench:snarks|0": { | |
"acc": 0.5966850828729282, | |
"acc_stderr": 0.03656440244811993 | |
}, | |
"lighteval|bigbench:sports_understanding|0": { | |
"acc": 0.747, | |
"acc_stderr": 0.01375427861358708 | |
}, | |
"lighteval|bigbench:temporal_sequences|0": { | |
"acc": 0.773, | |
"acc_stderr": 0.01325317496476391 | |
}, | |
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": { | |
"acc": 0.2, | |
"acc_stderr": 0.011318236699485796 | |
}, | |
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": { | |
"acc": 0.12914285714285714, | |
"acc_stderr": 0.00801888065002003 | |
}, | |
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": { | |
"acc": 0.5133333333333333, | |
"acc_stderr": 0.028905463615555044 | |
}, | |
"lighteval|bigbench:_average|0": { | |
"acc": 0.5022332156755728, | |
"acc_stderr": 0.02102482430074161 | |
}, | |
"all": { | |
"acc": 0.5022332156755728, | |
"acc_stderr": 0.02102482430074161 | |
} | |
}, | |
"versions": { | |
"lighteval|bigbench:causal_judgment|0": 0, | |
"lighteval|bigbench:date_understanding|0": 0, | |
"lighteval|bigbench:disambiguation_qa|0": 0, | |
"lighteval|bigbench:geometric_shapes|0": 0, | |
"lighteval|bigbench:logical_deduction_five_objects|0": 0, | |
"lighteval|bigbench:logical_deduction_seven_objects|0": 0, | |
"lighteval|bigbench:logical_deduction_three_objects|0": 0, | |
"lighteval|bigbench:movie_recommendation|0": 0, | |
"lighteval|bigbench:navigate|0": 0, | |
"lighteval|bigbench:reasoning_about_colored_objects|0": 0, | |
"lighteval|bigbench:ruin_names|0": 0, | |
"lighteval|bigbench:salient_translation_error_detection|0": 0, | |
"lighteval|bigbench:snarks|0": 0, | |
"lighteval|bigbench:sports_understanding|0": 0, | |
"lighteval|bigbench:temporal_sequences|0": 0, | |
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": 0, | |
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": 0, | |
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": 0 | |
}, | |
"config_tasks": { | |
"lighteval|bigbench:causal_judgment": { | |
"name": "bigbench:causal_judgment", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "causal_judgement", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 190, | |
"effective_num_docs": 190, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:date_understanding": { | |
"name": "bigbench:date_understanding", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "date_understanding", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 369, | |
"effective_num_docs": 369, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:disambiguation_qa": { | |
"name": "bigbench:disambiguation_qa", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "disambiguation_qa", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 258, | |
"effective_num_docs": 258, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:geometric_shapes": { | |
"name": "bigbench:geometric_shapes", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "geometric_shapes", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 360, | |
"effective_num_docs": 360, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:logical_deduction_five_objects": { | |
"name": "bigbench:logical_deduction_five_objects", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "logical_deduction_five_objects", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 500, | |
"effective_num_docs": 500, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:logical_deduction_seven_objects": { | |
"name": "bigbench:logical_deduction_seven_objects", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "logical_deduction_seven_objects", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 700, | |
"effective_num_docs": 700, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:logical_deduction_three_objects": { | |
"name": "bigbench:logical_deduction_three_objects", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "logical_deduction_three_objects", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 300, | |
"effective_num_docs": 300, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:movie_recommendation": { | |
"name": "bigbench:movie_recommendation", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "movie_recommendation", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 500, | |
"effective_num_docs": 500, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:navigate": { | |
"name": "bigbench:navigate", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "navigate", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 1000, | |
"effective_num_docs": 1000, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:reasoning_about_colored_objects": { | |
"name": "bigbench:reasoning_about_colored_objects", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "reasoning_about_colored_objects", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 2000, | |
"effective_num_docs": 2000, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:ruin_names": { | |
"name": "bigbench:ruin_names", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "ruin_names", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 448, | |
"effective_num_docs": 448, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:salient_translation_error_detection": { | |
"name": "bigbench:salient_translation_error_detection", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "salient_translation_error_detection", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 998, | |
"effective_num_docs": 998, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:snarks": { | |
"name": "bigbench:snarks", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "snarks", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 181, | |
"effective_num_docs": 181, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:sports_understanding": { | |
"name": "bigbench:sports_understanding", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "sports_understanding", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 1000, | |
"effective_num_docs": 1000, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:temporal_sequences": { | |
"name": "bigbench:temporal_sequences", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "temporal_sequences", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 1000, | |
"effective_num_docs": 1000, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:tracking_shuffled_objects_five_objects": { | |
"name": "bigbench:tracking_shuffled_objects_five_objects", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "tracking_shuffled_objects_five_objects", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 1250, | |
"effective_num_docs": 1250, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:tracking_shuffled_objects_seven_objects": { | |
"name": "bigbench:tracking_shuffled_objects_seven_objects", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "tracking_shuffled_objects_seven_objects", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 1750, | |
"effective_num_docs": 1750, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:tracking_shuffled_objects_three_objects": { | |
"name": "bigbench:tracking_shuffled_objects_three_objects", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "tracking_shuffled_objects_three_objects", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 300, | |
"effective_num_docs": 300, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
} | |
}, | |
"summary_tasks": { | |
"lighteval|bigbench:causal_judgment|0": { | |
"hashes": { | |
"hash_examples": "dfb1ae47218f2850", | |
"hash_full_prompts": "7292c47f5bf2ba48", | |
"hash_input_tokens": "38a7f6bf61e002d4", | |
"hash_cont_tokens": "ac670c3ea513a639" | |
}, | |
"truncated": 0, | |
"non_truncated": 190, | |
"padded": 189, | |
"non_padded": 1, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:date_understanding|0": { | |
"hashes": { | |
"hash_examples": "2b823c41500a6ec2", | |
"hash_full_prompts": "4db646afa4176c07", | |
"hash_input_tokens": "e1b94d842c1694dc", | |
"hash_cont_tokens": "e7711b87d7f90d38" | |
}, | |
"truncated": 0, | |
"non_truncated": 369, | |
"padded": 369, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:disambiguation_qa|0": { | |
"hashes": { | |
"hash_examples": "2a4c3d41db198cea", | |
"hash_full_prompts": "12d668cf5edc9542", | |
"hash_input_tokens": "34e9783f4eab459c", | |
"hash_cont_tokens": "de89f8a6e5dac00c" | |
}, | |
"truncated": 0, | |
"non_truncated": 258, | |
"padded": 258, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:geometric_shapes|0": { | |
"hashes": { | |
"hash_examples": "24aa261103911b72", | |
"hash_full_prompts": "51dfb12a121e7a69", | |
"hash_input_tokens": "23351f6852f46202", | |
"hash_cont_tokens": "e51eec73c3eb26c9" | |
}, | |
"truncated": 0, | |
"non_truncated": 360, | |
"padded": 360, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:logical_deduction_five_objects|0": { | |
"hashes": { | |
"hash_examples": "cb5bdc92afc41f83", | |
"hash_full_prompts": "b6e4a71663bc3e1c", | |
"hash_input_tokens": "c4cda2e708514477", | |
"hash_cont_tokens": "4c9e9d2d14981c58" | |
}, | |
"truncated": 0, | |
"non_truncated": 500, | |
"padded": 500, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:logical_deduction_seven_objects|0": { | |
"hashes": { | |
"hash_examples": "b6805ea696739f9f", | |
"hash_full_prompts": "d0c82c066345c294", | |
"hash_input_tokens": "9002a6d061bf23f0", | |
"hash_cont_tokens": "1745fa6fd92f0e0d" | |
}, | |
"truncated": 0, | |
"non_truncated": 700, | |
"padded": 700, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:logical_deduction_three_objects|0": { | |
"hashes": { | |
"hash_examples": "0509e5712ab9bcdb", | |
"hash_full_prompts": "396c1e56901b46ed", | |
"hash_input_tokens": "b4e9bf04c25db80f", | |
"hash_cont_tokens": "2b5b679169d7bcf1" | |
}, | |
"truncated": 0, | |
"non_truncated": 300, | |
"padded": 300, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:movie_recommendation|0": { | |
"hashes": { | |
"hash_examples": "530cc6f737830f45", | |
"hash_full_prompts": "e821384b2a44e36b", | |
"hash_input_tokens": "7b15e974f776a46b", | |
"hash_cont_tokens": "be520838bf2427bc" | |
}, | |
"truncated": 0, | |
"non_truncated": 500, | |
"padded": 500, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:navigate|0": { | |
"hashes": { | |
"hash_examples": "7962ef85d0058b9a", | |
"hash_full_prompts": "43248e6945903d81", | |
"hash_input_tokens": "9b3688a5804bdb72", | |
"hash_cont_tokens": "04e3a57b821a3dd8" | |
}, | |
"truncated": 0, | |
"non_truncated": 1000, | |
"padded": 988, | |
"non_padded": 12, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:reasoning_about_colored_objects|0": { | |
"hashes": { | |
"hash_examples": "39be1ab1677a651d", | |
"hash_full_prompts": "7f7a503aaa70068f", | |
"hash_input_tokens": "e4a82d21a6a5158a", | |
"hash_cont_tokens": "3fe982d2154a001a" | |
}, | |
"truncated": 0, | |
"non_truncated": 2000, | |
"padded": 1969, | |
"non_padded": 31, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:ruin_names|0": { | |
"hashes": { | |
"hash_examples": "e9b96b31d2154941", | |
"hash_full_prompts": "ae8931c806192844", | |
"hash_input_tokens": "f0d9c874f996ad1a", | |
"hash_cont_tokens": "046bbbbddb05b429" | |
}, | |
"truncated": 0, | |
"non_truncated": 448, | |
"padded": 442, | |
"non_padded": 6, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:salient_translation_error_detection|0": { | |
"hashes": { | |
"hash_examples": "951ac59f7ad0427d", | |
"hash_full_prompts": "643d82c4ce3fab01", | |
"hash_input_tokens": "d9f6fda316ec9cc3", | |
"hash_cont_tokens": "e78fb6d09071e0f6" | |
}, | |
"truncated": 0, | |
"non_truncated": 998, | |
"padded": 998, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:snarks|0": { | |
"hashes": { | |
"hash_examples": "3a53eb9b9d758534", | |
"hash_full_prompts": "b12bcea4b9bc9027", | |
"hash_input_tokens": "4bd420484ded7d23", | |
"hash_cont_tokens": "f5cb71a436613293" | |
}, | |
"truncated": 0, | |
"non_truncated": 181, | |
"padded": 178, | |
"non_padded": 3, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:sports_understanding|0": { | |
"hashes": { | |
"hash_examples": "bd65741f00770373", | |
"hash_full_prompts": "39d7688aa2d209e1", | |
"hash_input_tokens": "8895f91670520bad", | |
"hash_cont_tokens": "02230fac16464d15" | |
}, | |
"truncated": 0, | |
"non_truncated": 1000, | |
"padded": 1000, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:temporal_sequences|0": { | |
"hashes": { | |
"hash_examples": "1d13139f47cb2df7", | |
"hash_full_prompts": "1a874610f00343dc", | |
"hash_input_tokens": "1856c82407266cde", | |
"hash_cont_tokens": "88c86d8bfb960c7d" | |
}, | |
"truncated": 0, | |
"non_truncated": 1000, | |
"padded": 1000, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": { | |
"hashes": { | |
"hash_examples": "8770a702a9646648", | |
"hash_full_prompts": "392b486c4039dca8", | |
"hash_input_tokens": "ca5f693edb7e6412", | |
"hash_cont_tokens": "7cf11d867348e0b1" | |
}, | |
"truncated": 0, | |
"non_truncated": 1250, | |
"padded": 1180, | |
"non_padded": 70, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": { | |
"hashes": { | |
"hash_examples": "b469b7d073824a59", | |
"hash_full_prompts": "1bad8a693cc74da1", | |
"hash_input_tokens": "bf2c8544db87959f", | |
"hash_cont_tokens": "f76ba63a583d749e" | |
}, | |
"truncated": 0, | |
"non_truncated": 1750, | |
"padded": 1701, | |
"non_padded": 49, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": { | |
"hashes": { | |
"hash_examples": "0509e5712ab9bcdb", | |
"hash_full_prompts": "396c1e56901b46ed", | |
"hash_input_tokens": "067226cc5c02942d", | |
"hash_cont_tokens": "b2cce0a4a2edc859" | |
}, | |
"truncated": 0, | |
"non_truncated": 300, | |
"padded": 294, | |
"non_padded": 6, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
} | |
}, | |
"summary_general": { | |
"hashes": { | |
"hash_examples": "51a30c4501ba4586", | |
"hash_full_prompts": "96a511cab844bc38", | |
"hash_input_tokens": "9b82a3a567ed4879", | |
"hash_cont_tokens": "2f0ff7c19ccc0d8e" | |
}, | |
"truncated": 0, | |
"non_truncated": 13104, | |
"padded": 12926, | |
"non_padded": 178, | |
"num_truncated_few_shots": 0 | |
} | |
} |