open-r1-eval-leaderboard / eval_results /Qwen /Qwen1.5-0.5B-Chat /main /bbh /results_2024-03-28T16-37-54.545961.json
lewtun's picture
lewtun HF staff
Upload eval_results/Qwen/Qwen1.5-0.5B-Chat/main/bbh/results_2024-03-28T16-37-54.545961.json with huggingface_hub
3945ac9 verified
raw
history blame
27 kB
{
"config_general": {
"lighteval_sha": "?",
"num_fewshot_seeds": 1,
"override_batch_size": 1,
"max_samples": null,
"job_id": "",
"start_time": 1795532.004019956,
"end_time": 1795695.48919369,
"total_evaluation_time_secondes": "163.48517373390496",
"model_name": "Qwen/Qwen1.5-0.5B-Chat",
"model_sha": "6c705984bb8b5591dd4e1a9e66e1a127965fd08d",
"model_dtype": "torch.bfloat16",
"model_size": "1.05 GB",
"config": null
},
"results": {
"lighteval|bigbench:causal_judgment|0": {
"acc": 0.4789473684210526,
"acc_stderr": 0.03633739504773335
},
"lighteval|bigbench:date_understanding|0": {
"acc": 0.3794037940379404,
"acc_stderr": 0.025294813606764693
},
"lighteval|bigbench:disambiguation_qa|0": {
"acc": 0.3875968992248062,
"acc_stderr": 0.030390810031682234
},
"lighteval|bigbench:geometric_shapes|0": {
"acc": 0.013888888888888888,
"acc_stderr": 0.006176599666339092
},
"lighteval|bigbench:logical_deduction_five_objects|0": {
"acc": 0.2,
"acc_stderr": 0.017906459241433827
},
"lighteval|bigbench:logical_deduction_seven_objects|0": {
"acc": 0.14285714285714285,
"acc_stderr": 0.013235458703202271
},
"lighteval|bigbench:logical_deduction_three_objects|0": {
"acc": 0.3333333333333333,
"acc_stderr": 0.027262027336984386
},
"lighteval|bigbench:movie_recommendation|0": {
"acc": 0.326,
"acc_stderr": 0.020984009562393567
},
"lighteval|bigbench:navigate|0": {
"acc": 0.5,
"acc_stderr": 0.015819299929208316
},
"lighteval|bigbench:reasoning_about_colored_objects|0": {
"acc": 0.099,
"acc_stderr": 0.0066799559059513266
},
"lighteval|bigbench:ruin_names|0": {
"acc": 0.17857142857142858,
"acc_stderr": 0.018114949483399163
},
"lighteval|bigbench:salient_translation_error_detection|0": {
"acc": 0.12424849699398798,
"acc_stderr": 0.010446924076334348
},
"lighteval|bigbench:snarks|0": {
"acc": 0.47513812154696133,
"acc_stderr": 0.03722169968951702
},
"lighteval|bigbench:sports_understanding|0": {
"acc": 0.499,
"acc_stderr": 0.015819268290576817
},
"lighteval|bigbench:temporal_sequences|0": {
"acc": 0.018,
"acc_stderr": 0.004206387249611469
},
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": {
"acc": 0.1744,
"acc_stderr": 0.010736849442399807
},
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": {
"acc": 0.12114285714285715,
"acc_stderr": 0.007802129325772281
},
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": {
"acc": 0.3333333333333333,
"acc_stderr": 0.027262027336984386
},
"lighteval|bigbench:_average|0": {
"acc": 0.2658256480195407,
"acc_stderr": 0.01842761466257158
},
"all": {
"acc": 0.2658256480195407,
"acc_stderr": 0.01842761466257158
}
},
"versions": {
"lighteval|bigbench:causal_judgment|0": 0,
"lighteval|bigbench:date_understanding|0": 0,
"lighteval|bigbench:disambiguation_qa|0": 0,
"lighteval|bigbench:geometric_shapes|0": 0,
"lighteval|bigbench:logical_deduction_five_objects|0": 0,
"lighteval|bigbench:logical_deduction_seven_objects|0": 0,
"lighteval|bigbench:logical_deduction_three_objects|0": 0,
"lighteval|bigbench:movie_recommendation|0": 0,
"lighteval|bigbench:navigate|0": 0,
"lighteval|bigbench:reasoning_about_colored_objects|0": 0,
"lighteval|bigbench:ruin_names|0": 0,
"lighteval|bigbench:salient_translation_error_detection|0": 0,
"lighteval|bigbench:snarks|0": 0,
"lighteval|bigbench:sports_understanding|0": 0,
"lighteval|bigbench:temporal_sequences|0": 0,
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": 0,
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": 0,
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": 0
},
"config_tasks": {
"lighteval|bigbench:causal_judgment": {
"name": "bigbench:causal_judgment",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "causal_judgement",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 190,
"effective_num_docs": 190,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:date_understanding": {
"name": "bigbench:date_understanding",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "date_understanding",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 369,
"effective_num_docs": 369,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:disambiguation_qa": {
"name": "bigbench:disambiguation_qa",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "disambiguation_qa",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 258,
"effective_num_docs": 258,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:geometric_shapes": {
"name": "bigbench:geometric_shapes",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "geometric_shapes",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 360,
"effective_num_docs": 360,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:logical_deduction_five_objects": {
"name": "bigbench:logical_deduction_five_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "logical_deduction_five_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 500,
"effective_num_docs": 500,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:logical_deduction_seven_objects": {
"name": "bigbench:logical_deduction_seven_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "logical_deduction_seven_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 700,
"effective_num_docs": 700,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:logical_deduction_three_objects": {
"name": "bigbench:logical_deduction_three_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "logical_deduction_three_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 300,
"effective_num_docs": 300,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:movie_recommendation": {
"name": "bigbench:movie_recommendation",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "movie_recommendation",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 500,
"effective_num_docs": 500,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:navigate": {
"name": "bigbench:navigate",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "navigate",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1000,
"effective_num_docs": 1000,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:reasoning_about_colored_objects": {
"name": "bigbench:reasoning_about_colored_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "reasoning_about_colored_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 2000,
"effective_num_docs": 2000,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:ruin_names": {
"name": "bigbench:ruin_names",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "ruin_names",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 448,
"effective_num_docs": 448,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:salient_translation_error_detection": {
"name": "bigbench:salient_translation_error_detection",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "salient_translation_error_detection",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 998,
"effective_num_docs": 998,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:snarks": {
"name": "bigbench:snarks",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "snarks",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 181,
"effective_num_docs": 181,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:sports_understanding": {
"name": "bigbench:sports_understanding",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "sports_understanding",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1000,
"effective_num_docs": 1000,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:temporal_sequences": {
"name": "bigbench:temporal_sequences",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "temporal_sequences",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1000,
"effective_num_docs": 1000,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:tracking_shuffled_objects_five_objects": {
"name": "bigbench:tracking_shuffled_objects_five_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "tracking_shuffled_objects_five_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1250,
"effective_num_docs": 1250,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:tracking_shuffled_objects_seven_objects": {
"name": "bigbench:tracking_shuffled_objects_seven_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "tracking_shuffled_objects_seven_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1750,
"effective_num_docs": 1750,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:tracking_shuffled_objects_three_objects": {
"name": "bigbench:tracking_shuffled_objects_three_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "tracking_shuffled_objects_three_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 300,
"effective_num_docs": 300,
"trust_dataset": true,
"must_remove_duplicate_docs": null
}
},
"summary_tasks": {
"lighteval|bigbench:causal_judgment|0": {
"hashes": {
"hash_examples": "dfb1ae47218f2850",
"hash_full_prompts": "e04f51fae9168ed3",
"hash_input_tokens": "43fd09ab4f79bab4",
"hash_cont_tokens": "0bd2e09b9b2063d5"
},
"truncated": 0,
"non_truncated": 190,
"padded": 189,
"non_padded": 1,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:date_understanding|0": {
"hashes": {
"hash_examples": "2b823c41500a6ec2",
"hash_full_prompts": "579e435e125ebfc9",
"hash_input_tokens": "8a34a6cd2498e112",
"hash_cont_tokens": "d37f82f02dc27ad5"
},
"truncated": 0,
"non_truncated": 369,
"padded": 369,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:disambiguation_qa|0": {
"hashes": {
"hash_examples": "2a4c3d41db198cea",
"hash_full_prompts": "017dab7775146995",
"hash_input_tokens": "479b145ad1a36326",
"hash_cont_tokens": "7ef2f8d13014d861"
},
"truncated": 0,
"non_truncated": 258,
"padded": 245,
"non_padded": 13,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:geometric_shapes|0": {
"hashes": {
"hash_examples": "24aa261103911b72",
"hash_full_prompts": "a67d2ac0fff49c64",
"hash_input_tokens": "b5db21be86c73429",
"hash_cont_tokens": "57b55833bf76ee1f"
},
"truncated": 0,
"non_truncated": 360,
"padded": 360,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:logical_deduction_five_objects|0": {
"hashes": {
"hash_examples": "cb5bdc92afc41f83",
"hash_full_prompts": "df9c09d184a17286",
"hash_input_tokens": "43544cec1fc3369b",
"hash_cont_tokens": "d2690912d35d2c9d"
},
"truncated": 0,
"non_truncated": 500,
"padded": 500,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:logical_deduction_seven_objects|0": {
"hashes": {
"hash_examples": "b6805ea696739f9f",
"hash_full_prompts": "422b4d9404690780",
"hash_input_tokens": "f9991231f7b1c997",
"hash_cont_tokens": "633f6f1127975a81"
},
"truncated": 0,
"non_truncated": 700,
"padded": 700,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:logical_deduction_three_objects|0": {
"hashes": {
"hash_examples": "0509e5712ab9bcdb",
"hash_full_prompts": "82d86c2a6554ad99",
"hash_input_tokens": "6574216b7f933fa9",
"hash_cont_tokens": "6a0e6e1c01e1dcdd"
},
"truncated": 0,
"non_truncated": 300,
"padded": 264,
"non_padded": 36,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:movie_recommendation|0": {
"hashes": {
"hash_examples": "530cc6f737830f45",
"hash_full_prompts": "51f43c421591e388",
"hash_input_tokens": "0b4cd816a5128ea2",
"hash_cont_tokens": "337988ba0f6b6159"
},
"truncated": 0,
"non_truncated": 500,
"padded": 500,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:navigate|0": {
"hashes": {
"hash_examples": "7962ef85d0058b9a",
"hash_full_prompts": "37ba35cc6185edd5",
"hash_input_tokens": "16a08228780c2aa4",
"hash_cont_tokens": "a1d752ca90a360bc"
},
"truncated": 0,
"non_truncated": 1000,
"padded": 978,
"non_padded": 22,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:reasoning_about_colored_objects|0": {
"hashes": {
"hash_examples": "39be1ab1677a651d",
"hash_full_prompts": "138c2847b9901aa6",
"hash_input_tokens": "aa6c18c996a7ca99",
"hash_cont_tokens": "dbb3876c36a97a0e"
},
"truncated": 0,
"non_truncated": 2000,
"padded": 2000,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:ruin_names|0": {
"hashes": {
"hash_examples": "e9b96b31d2154941",
"hash_full_prompts": "2e9bf9b689fa2488",
"hash_input_tokens": "9ab3210f8e62de62",
"hash_cont_tokens": "19d0b304bf4664c1"
},
"truncated": 0,
"non_truncated": 448,
"padded": 443,
"non_padded": 5,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:salient_translation_error_detection|0": {
"hashes": {
"hash_examples": "951ac59f7ad0427d",
"hash_full_prompts": "04e26eb17dfdab37",
"hash_input_tokens": "fe2b82feff0ab97d",
"hash_cont_tokens": "5630f19f7027ea3f"
},
"truncated": 0,
"non_truncated": 998,
"padded": 998,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:snarks|0": {
"hashes": {
"hash_examples": "3a53eb9b9d758534",
"hash_full_prompts": "2815ce0f51def951",
"hash_input_tokens": "8bfd163249dadc53",
"hash_cont_tokens": "976b8fb97db1c063"
},
"truncated": 0,
"non_truncated": 181,
"padded": 180,
"non_padded": 1,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:sports_understanding|0": {
"hashes": {
"hash_examples": "bd65741f00770373",
"hash_full_prompts": "1a09e8a806f1368d",
"hash_input_tokens": "906d6f19d23cd20c",
"hash_cont_tokens": "dc942e2d909c04d4"
},
"truncated": 0,
"non_truncated": 1000,
"padded": 1000,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:temporal_sequences|0": {
"hashes": {
"hash_examples": "1d13139f47cb2df7",
"hash_full_prompts": "27acc8c08f1b38c3",
"hash_input_tokens": "be78efa87d861ab3",
"hash_cont_tokens": "dc1efb01cf03259d"
},
"truncated": 0,
"non_truncated": 1000,
"padded": 1000,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": {
"hashes": {
"hash_examples": "8770a702a9646648",
"hash_full_prompts": "6eb76afebebe1c95",
"hash_input_tokens": "e6d941f6d05ff401",
"hash_cont_tokens": "f0094a1c3f5ac484"
},
"truncated": 0,
"non_truncated": 1250,
"padded": 1180,
"non_padded": 70,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": {
"hashes": {
"hash_examples": "b469b7d073824a59",
"hash_full_prompts": "c743363dcab71b60",
"hash_input_tokens": "216efa2da61c0c31",
"hash_cont_tokens": "41a2680a95dc69ec"
},
"truncated": 0,
"non_truncated": 1750,
"padded": 1629,
"non_padded": 121,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": {
"hashes": {
"hash_examples": "0509e5712ab9bcdb",
"hash_full_prompts": "82d86c2a6554ad99",
"hash_input_tokens": "6574216b7f933fa9",
"hash_cont_tokens": "f3c3d2b939accd8f"
},
"truncated": 0,
"non_truncated": 300,
"padded": 264,
"non_padded": 36,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
}
},
"summary_general": {
"hashes": {
"hash_examples": "51a30c4501ba4586",
"hash_full_prompts": "1a7e5492d275c843",
"hash_input_tokens": "540f333b3bc520cd",
"hash_cont_tokens": "baf520770a353964"
},
"truncated": 0,
"non_truncated": 13104,
"padded": 12799,
"non_padded": 305,
"num_truncated_few_shots": 0
}
}