lewtun's picture
lewtun HF staff
Upload eval_results/NousResearch/Nous-Hermes-2-Yi-34B/main/bbh/results_2024-03-28T16-30-43.156201.json with huggingface_hub
089496a verified
raw
history blame
27 kB
{
"config_general": {
"lighteval_sha": "?",
"num_fewshot_seeds": 1,
"override_batch_size": 1,
"max_samples": null,
"job_id": "",
"start_time": 8645407.928935125,
"end_time": 8645891.4652991,
"total_evaluation_time_secondes": "483.5363639742136",
"model_name": "NousResearch/Nous-Hermes-2-Yi-34B",
"model_sha": "fcb0a8847e76aea14aba9aa44009d4418ad7c18f",
"model_dtype": "torch.bfloat16",
"model_size": "64.17 GB",
"config": null
},
"results": {
"lighteval|bigbench:causal_judgment|0": {
"acc": 0.6263157894736842,
"acc_stderr": 0.03518990966860906
},
"lighteval|bigbench:date_understanding|0": {
"acc": 0.4986449864498645,
"acc_stderr": 0.026064206045625166
},
"lighteval|bigbench:disambiguation_qa|0": {
"acc": 0.6201550387596899,
"acc_stderr": 0.030275181926834843
},
"lighteval|bigbench:geometric_shapes|0": {
"acc": 0.2111111111111111,
"acc_stderr": 0.0215385390082232
},
"lighteval|bigbench:logical_deduction_five_objects|0": {
"acc": 0.474,
"acc_stderr": 0.022352791650914156
},
"lighteval|bigbench:logical_deduction_seven_objects|0": {
"acc": 0.4757142857142857,
"acc_stderr": 0.018889415558597866
},
"lighteval|bigbench:logical_deduction_three_objects|0": {
"acc": 0.6533333333333333,
"acc_stderr": 0.027522498482247405
},
"lighteval|bigbench:movie_recommendation|0": {
"acc": 0.622,
"acc_stderr": 0.021706550824518184
},
"lighteval|bigbench:navigate|0": {
"acc": 0.563,
"acc_stderr": 0.015693223928730377
},
"lighteval|bigbench:reasoning_about_colored_objects|0": {
"acc": 0.652,
"acc_stderr": 0.01065386091406244
},
"lighteval|bigbench:ruin_names|0": {
"acc": 0.6004464285714286,
"acc_stderr": 0.02316705767879724
},
"lighteval|bigbench:salient_translation_error_detection|0": {
"acc": 0.45490981963927857,
"acc_stderr": 0.015770637738502503
},
"lighteval|bigbench:snarks|0": {
"acc": 0.7403314917127072,
"acc_stderr": 0.03268033507334897
},
"lighteval|bigbench:sports_understanding|0": {
"acc": 0.753,
"acc_stderr": 0.013644675781314128
},
"lighteval|bigbench:temporal_sequences|0": {
"acc": 0.98,
"acc_stderr": 0.004429403980178343
},
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": {
"acc": 0.2136,
"acc_stderr": 0.011596879843202517
},
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": {
"acc": 0.156,
"acc_stderr": 0.00867638106218217
},
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": {
"acc": 0.6533333333333333,
"acc_stderr": 0.027522498482247405
},
"lighteval|bigbench:_average|0": {
"acc": 0.552660867672151,
"acc_stderr": 0.020409669313785336
},
"all": {
"acc": 0.552660867672151,
"acc_stderr": 0.020409669313785336
}
},
"versions": {
"lighteval|bigbench:causal_judgment|0": 0,
"lighteval|bigbench:date_understanding|0": 0,
"lighteval|bigbench:disambiguation_qa|0": 0,
"lighteval|bigbench:geometric_shapes|0": 0,
"lighteval|bigbench:logical_deduction_five_objects|0": 0,
"lighteval|bigbench:logical_deduction_seven_objects|0": 0,
"lighteval|bigbench:logical_deduction_three_objects|0": 0,
"lighteval|bigbench:movie_recommendation|0": 0,
"lighteval|bigbench:navigate|0": 0,
"lighteval|bigbench:reasoning_about_colored_objects|0": 0,
"lighteval|bigbench:ruin_names|0": 0,
"lighteval|bigbench:salient_translation_error_detection|0": 0,
"lighteval|bigbench:snarks|0": 0,
"lighteval|bigbench:sports_understanding|0": 0,
"lighteval|bigbench:temporal_sequences|0": 0,
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": 0,
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": 0,
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": 0
},
"config_tasks": {
"lighteval|bigbench:causal_judgment": {
"name": "bigbench:causal_judgment",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "causal_judgement",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 190,
"effective_num_docs": 190,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:date_understanding": {
"name": "bigbench:date_understanding",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "date_understanding",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 369,
"effective_num_docs": 369,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:disambiguation_qa": {
"name": "bigbench:disambiguation_qa",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "disambiguation_qa",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 258,
"effective_num_docs": 258,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:geometric_shapes": {
"name": "bigbench:geometric_shapes",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "geometric_shapes",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 360,
"effective_num_docs": 360,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:logical_deduction_five_objects": {
"name": "bigbench:logical_deduction_five_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "logical_deduction_five_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 500,
"effective_num_docs": 500,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:logical_deduction_seven_objects": {
"name": "bigbench:logical_deduction_seven_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "logical_deduction_seven_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 700,
"effective_num_docs": 700,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:logical_deduction_three_objects": {
"name": "bigbench:logical_deduction_three_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "logical_deduction_three_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 300,
"effective_num_docs": 300,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:movie_recommendation": {
"name": "bigbench:movie_recommendation",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "movie_recommendation",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 500,
"effective_num_docs": 500,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:navigate": {
"name": "bigbench:navigate",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "navigate",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1000,
"effective_num_docs": 1000,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:reasoning_about_colored_objects": {
"name": "bigbench:reasoning_about_colored_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "reasoning_about_colored_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 2000,
"effective_num_docs": 2000,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:ruin_names": {
"name": "bigbench:ruin_names",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "ruin_names",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 448,
"effective_num_docs": 448,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:salient_translation_error_detection": {
"name": "bigbench:salient_translation_error_detection",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "salient_translation_error_detection",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 998,
"effective_num_docs": 998,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:snarks": {
"name": "bigbench:snarks",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "snarks",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 181,
"effective_num_docs": 181,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:sports_understanding": {
"name": "bigbench:sports_understanding",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "sports_understanding",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1000,
"effective_num_docs": 1000,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:temporal_sequences": {
"name": "bigbench:temporal_sequences",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "temporal_sequences",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1000,
"effective_num_docs": 1000,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:tracking_shuffled_objects_five_objects": {
"name": "bigbench:tracking_shuffled_objects_five_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "tracking_shuffled_objects_five_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1250,
"effective_num_docs": 1250,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:tracking_shuffled_objects_seven_objects": {
"name": "bigbench:tracking_shuffled_objects_seven_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "tracking_shuffled_objects_seven_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1750,
"effective_num_docs": 1750,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:tracking_shuffled_objects_three_objects": {
"name": "bigbench:tracking_shuffled_objects_three_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "tracking_shuffled_objects_three_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 300,
"effective_num_docs": 300,
"trust_dataset": true,
"must_remove_duplicate_docs": null
}
},
"summary_tasks": {
"lighteval|bigbench:causal_judgment|0": {
"hashes": {
"hash_examples": "dfb1ae47218f2850",
"hash_full_prompts": "7292c47f5bf2ba48",
"hash_input_tokens": "86ac77a6ea463b36",
"hash_cont_tokens": "f91ddf2bf5932965"
},
"truncated": 0,
"non_truncated": 190,
"padded": 189,
"non_padded": 1,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:date_understanding|0": {
"hashes": {
"hash_examples": "2b823c41500a6ec2",
"hash_full_prompts": "4db646afa4176c07",
"hash_input_tokens": "0b4ea785c04e97c2",
"hash_cont_tokens": "19581718ef40717a"
},
"truncated": 0,
"non_truncated": 369,
"padded": 369,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:disambiguation_qa|0": {
"hashes": {
"hash_examples": "2a4c3d41db198cea",
"hash_full_prompts": "12d668cf5edc9542",
"hash_input_tokens": "1859a0251fb45921",
"hash_cont_tokens": "6dc5c9e29baf7b40"
},
"truncated": 0,
"non_truncated": 258,
"padded": 258,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:geometric_shapes|0": {
"hashes": {
"hash_examples": "24aa261103911b72",
"hash_full_prompts": "51dfb12a121e7a69",
"hash_input_tokens": "608abf0cf805fd91",
"hash_cont_tokens": "cf60b306d8cbb91a"
},
"truncated": 0,
"non_truncated": 360,
"padded": 360,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:logical_deduction_five_objects|0": {
"hashes": {
"hash_examples": "cb5bdc92afc41f83",
"hash_full_prompts": "b6e4a71663bc3e1c",
"hash_input_tokens": "de90ebf55f2dc9b9",
"hash_cont_tokens": "18541cce2c29007a"
},
"truncated": 0,
"non_truncated": 500,
"padded": 500,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:logical_deduction_seven_objects|0": {
"hashes": {
"hash_examples": "b6805ea696739f9f",
"hash_full_prompts": "d0c82c066345c294",
"hash_input_tokens": "39f536652be39acc",
"hash_cont_tokens": "d91ea3d608b2b021"
},
"truncated": 0,
"non_truncated": 700,
"padded": 700,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:logical_deduction_three_objects|0": {
"hashes": {
"hash_examples": "0509e5712ab9bcdb",
"hash_full_prompts": "396c1e56901b46ed",
"hash_input_tokens": "49b2bd097da6ff11",
"hash_cont_tokens": "317e4f1359f93f31"
},
"truncated": 0,
"non_truncated": 300,
"padded": 300,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:movie_recommendation|0": {
"hashes": {
"hash_examples": "530cc6f737830f45",
"hash_full_prompts": "e821384b2a44e36b",
"hash_input_tokens": "96d2b7ca40ab4f59",
"hash_cont_tokens": "addccef657a7ca90"
},
"truncated": 0,
"non_truncated": 500,
"padded": 499,
"non_padded": 1,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:navigate|0": {
"hashes": {
"hash_examples": "7962ef85d0058b9a",
"hash_full_prompts": "43248e6945903d81",
"hash_input_tokens": "5dddf91b1bfa4edb",
"hash_cont_tokens": "4deb4dd8d35a3ed1"
},
"truncated": 0,
"non_truncated": 1000,
"padded": 998,
"non_padded": 2,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:reasoning_about_colored_objects|0": {
"hashes": {
"hash_examples": "39be1ab1677a651d",
"hash_full_prompts": "7f7a503aaa70068f",
"hash_input_tokens": "a836d4c12e4a99fa",
"hash_cont_tokens": "08c596b81f5a22ed"
},
"truncated": 0,
"non_truncated": 2000,
"padded": 2000,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:ruin_names|0": {
"hashes": {
"hash_examples": "e9b96b31d2154941",
"hash_full_prompts": "ae8931c806192844",
"hash_input_tokens": "f722cc7fb56e64a4",
"hash_cont_tokens": "b77a4f12c2829981"
},
"truncated": 0,
"non_truncated": 448,
"padded": 444,
"non_padded": 4,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:salient_translation_error_detection|0": {
"hashes": {
"hash_examples": "951ac59f7ad0427d",
"hash_full_prompts": "643d82c4ce3fab01",
"hash_input_tokens": "51f222c4cac88c48",
"hash_cont_tokens": "5f912d065084cbaf"
},
"truncated": 0,
"non_truncated": 998,
"padded": 998,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:snarks|0": {
"hashes": {
"hash_examples": "3a53eb9b9d758534",
"hash_full_prompts": "b12bcea4b9bc9027",
"hash_input_tokens": "1d36d0b9f6857dfc",
"hash_cont_tokens": "52f7ab62e113f30c"
},
"truncated": 0,
"non_truncated": 181,
"padded": 179,
"non_padded": 2,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:sports_understanding|0": {
"hashes": {
"hash_examples": "bd65741f00770373",
"hash_full_prompts": "39d7688aa2d209e1",
"hash_input_tokens": "3e2bab10213b0dcc",
"hash_cont_tokens": "3d0c373055bf8d31"
},
"truncated": 0,
"non_truncated": 1000,
"padded": 1000,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:temporal_sequences|0": {
"hashes": {
"hash_examples": "1d13139f47cb2df7",
"hash_full_prompts": "1a874610f00343dc",
"hash_input_tokens": "ee25d02df18c5e7c",
"hash_cont_tokens": "1f75870c8e02c316"
},
"truncated": 0,
"non_truncated": 1000,
"padded": 1000,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": {
"hashes": {
"hash_examples": "8770a702a9646648",
"hash_full_prompts": "392b486c4039dca8",
"hash_input_tokens": "a9bbabe45246306b",
"hash_cont_tokens": "bb007bb19f979310"
},
"truncated": 0,
"non_truncated": 1250,
"padded": 1018,
"non_padded": 232,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": {
"hashes": {
"hash_examples": "b469b7d073824a59",
"hash_full_prompts": "1bad8a693cc74da1",
"hash_input_tokens": "0f2c928df0aecd99",
"hash_cont_tokens": "bbedf4197474aac4"
},
"truncated": 0,
"non_truncated": 1750,
"padded": 1654,
"non_padded": 96,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": {
"hashes": {
"hash_examples": "0509e5712ab9bcdb",
"hash_full_prompts": "396c1e56901b46ed",
"hash_input_tokens": "5f6c833800ad0ef7",
"hash_cont_tokens": "d69a5277d7675acc"
},
"truncated": 0,
"non_truncated": 300,
"padded": 294,
"non_padded": 6,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
}
},
"summary_general": {
"hashes": {
"hash_examples": "51a30c4501ba4586",
"hash_full_prompts": "96a511cab844bc38",
"hash_input_tokens": "39265efacd2b20fd",
"hash_cont_tokens": "138d6f75ff2c56d1"
},
"truncated": 0,
"non_truncated": 13104,
"padded": 12760,
"non_padded": 344,
"num_truncated_few_shots": 0
}
}