abhishek's picture
Upload eval_results/abhishek/autotrain-llama3-70b-orpo-v1/main/bbh/results_2024-05-03T07-51-19.812871.json with huggingface_hub
7ded333 verified
raw
history blame
27 kB
{
"config_general": {
"lighteval_sha": "?",
"num_fewshot_seeds": 1,
"override_batch_size": 4,
"max_samples": null,
"job_id": "",
"start_time": 21489.642559391,
"end_time": 24860.669378341,
"total_evaluation_time_secondes": "3371.026818950002",
"model_name": "abhishek/autotrain-llama3-70b-orpo-v1",
"model_sha": "053236c6846cc561c1503ba05e2b28c94855a432",
"model_dtype": "torch.float16",
"model_size": "131.73 GB",
"config": null
},
"results": {
"lighteval|bigbench:causal_judgment|0": {
"acc": 0.5473684210526316,
"acc_stderr": 0.03620607045823047
},
"lighteval|bigbench:date_understanding|0": {
"acc": 0.8699186991869918,
"acc_stderr": 0.017535690003269638
},
"lighteval|bigbench:disambiguation_qa|0": {
"acc": 0.2713178294573643,
"acc_stderr": 0.027735862587039094
},
"lighteval|bigbench:geometric_shapes|0": {
"acc": 0.11944444444444445,
"acc_stderr": 0.01711646726494003
},
"lighteval|bigbench:logical_deduction_five_objects|0": {
"acc": 0.2,
"acc_stderr": 0.01790645924143384
},
"lighteval|bigbench:logical_deduction_seven_objects|0": {
"acc": 0.17285714285714285,
"acc_stderr": 0.014301952757432554
},
"lighteval|bigbench:logical_deduction_three_objects|0": {
"acc": 0.42333333333333334,
"acc_stderr": 0.02857380411635232
},
"lighteval|bigbench:movie_recommendation|0": {
"acc": 0.562,
"acc_stderr": 0.02221032636397741
},
"lighteval|bigbench:navigate|0": {
"acc": 0.504,
"acc_stderr": 0.015818793703510883
},
"lighteval|bigbench:reasoning_about_colored_objects|0": {
"acc": 0.2935,
"acc_stderr": 0.010184828931807666
},
"lighteval|bigbench:ruin_names|0": {
"acc": 0.5290178571428571,
"acc_stderr": 0.02360932796197701
},
"lighteval|bigbench:salient_translation_error_detection|0": {
"acc": 0.18937875751503006,
"acc_stderr": 0.012408721643892128
},
"lighteval|bigbench:snarks|0": {
"acc": 0.4696132596685083,
"acc_stderr": 0.03719891321680328
},
"lighteval|bigbench:sports_understanding|0": {
"acc": 0.649,
"acc_stderr": 0.015100563798316405
},
"lighteval|bigbench:temporal_sequences|0": {
"acc": 1.0,
"acc_stderr": 0.0
},
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": {
"acc": 0.2048,
"acc_stderr": 0.011418838815918432
},
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": {
"acc": 0.14285714285714285,
"acc_stderr": 0.008367248752248816
},
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": {
"acc": 0.42333333333333334,
"acc_stderr": 0.02857380411635232
},
"lighteval|bigbench:_average|0": {
"acc": 0.42065223449159883,
"acc_stderr": 0.019125981874083462
},
"all": {
"acc": 0.42065223449159883,
"acc_stderr": 0.019125981874083462
}
},
"versions": {
"lighteval|bigbench:causal_judgment|0": 0,
"lighteval|bigbench:date_understanding|0": 0,
"lighteval|bigbench:disambiguation_qa|0": 0,
"lighteval|bigbench:geometric_shapes|0": 0,
"lighteval|bigbench:logical_deduction_five_objects|0": 0,
"lighteval|bigbench:logical_deduction_seven_objects|0": 0,
"lighteval|bigbench:logical_deduction_three_objects|0": 0,
"lighteval|bigbench:movie_recommendation|0": 0,
"lighteval|bigbench:navigate|0": 0,
"lighteval|bigbench:reasoning_about_colored_objects|0": 0,
"lighteval|bigbench:ruin_names|0": 0,
"lighteval|bigbench:salient_translation_error_detection|0": 0,
"lighteval|bigbench:snarks|0": 0,
"lighteval|bigbench:sports_understanding|0": 0,
"lighteval|bigbench:temporal_sequences|0": 0,
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": 0,
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": 0,
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": 0
},
"config_tasks": {
"lighteval|bigbench:causal_judgment": {
"name": "bigbench:causal_judgment",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "causal_judgement",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 190,
"effective_num_docs": 190,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:date_understanding": {
"name": "bigbench:date_understanding",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "date_understanding",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 369,
"effective_num_docs": 369,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:disambiguation_qa": {
"name": "bigbench:disambiguation_qa",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "disambiguation_qa",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 258,
"effective_num_docs": 258,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:geometric_shapes": {
"name": "bigbench:geometric_shapes",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "geometric_shapes",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 360,
"effective_num_docs": 360,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:logical_deduction_five_objects": {
"name": "bigbench:logical_deduction_five_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "logical_deduction_five_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 500,
"effective_num_docs": 500,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:logical_deduction_seven_objects": {
"name": "bigbench:logical_deduction_seven_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "logical_deduction_seven_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 700,
"effective_num_docs": 700,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:logical_deduction_three_objects": {
"name": "bigbench:logical_deduction_three_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "logical_deduction_three_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 300,
"effective_num_docs": 300,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:movie_recommendation": {
"name": "bigbench:movie_recommendation",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "movie_recommendation",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 500,
"effective_num_docs": 500,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:navigate": {
"name": "bigbench:navigate",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "navigate",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1000,
"effective_num_docs": 1000,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:reasoning_about_colored_objects": {
"name": "bigbench:reasoning_about_colored_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "reasoning_about_colored_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 2000,
"effective_num_docs": 2000,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:ruin_names": {
"name": "bigbench:ruin_names",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "ruin_names",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 448,
"effective_num_docs": 448,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:salient_translation_error_detection": {
"name": "bigbench:salient_translation_error_detection",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "salient_translation_error_detection",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 998,
"effective_num_docs": 998,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:snarks": {
"name": "bigbench:snarks",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "snarks",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 181,
"effective_num_docs": 181,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:sports_understanding": {
"name": "bigbench:sports_understanding",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "sports_understanding",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1000,
"effective_num_docs": 1000,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:temporal_sequences": {
"name": "bigbench:temporal_sequences",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "temporal_sequences",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1000,
"effective_num_docs": 1000,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:tracking_shuffled_objects_five_objects": {
"name": "bigbench:tracking_shuffled_objects_five_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "tracking_shuffled_objects_five_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1250,
"effective_num_docs": 1250,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:tracking_shuffled_objects_seven_objects": {
"name": "bigbench:tracking_shuffled_objects_seven_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "tracking_shuffled_objects_seven_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1750,
"effective_num_docs": 1750,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:tracking_shuffled_objects_three_objects": {
"name": "bigbench:tracking_shuffled_objects_three_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "tracking_shuffled_objects_three_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 300,
"effective_num_docs": 300,
"trust_dataset": true,
"must_remove_duplicate_docs": null
}
},
"summary_tasks": {
"lighteval|bigbench:causal_judgment|0": {
"hashes": {
"hash_examples": "dfb1ae47218f2850",
"hash_full_prompts": "7292c47f5bf2ba48",
"hash_input_tokens": "61e1aa83b8b300f3",
"hash_cont_tokens": "30dfa9354bfa4ea7"
},
"truncated": 0,
"non_truncated": 190,
"padded": 189,
"non_padded": 1,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:date_understanding|0": {
"hashes": {
"hash_examples": "2b823c41500a6ec2",
"hash_full_prompts": "4db646afa4176c07",
"hash_input_tokens": "0d36e29d28932b63",
"hash_cont_tokens": "2200d35a17fcfadf"
},
"truncated": 0,
"non_truncated": 369,
"padded": 369,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:disambiguation_qa|0": {
"hashes": {
"hash_examples": "2a4c3d41db198cea",
"hash_full_prompts": "12d668cf5edc9542",
"hash_input_tokens": "90406b7804e5be46",
"hash_cont_tokens": "b6d47efad4db3840"
},
"truncated": 0,
"non_truncated": 258,
"padded": 258,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:geometric_shapes|0": {
"hashes": {
"hash_examples": "24aa261103911b72",
"hash_full_prompts": "51dfb12a121e7a69",
"hash_input_tokens": "a559dad9547ba0b1",
"hash_cont_tokens": "fcc0ea2904589c56"
},
"truncated": 0,
"non_truncated": 360,
"padded": 360,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:logical_deduction_five_objects|0": {
"hashes": {
"hash_examples": "cb5bdc92afc41f83",
"hash_full_prompts": "b6e4a71663bc3e1c",
"hash_input_tokens": "148122705efe633b",
"hash_cont_tokens": "f76e64ee34e05e86"
},
"truncated": 0,
"non_truncated": 500,
"padded": 500,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:logical_deduction_seven_objects|0": {
"hashes": {
"hash_examples": "b6805ea696739f9f",
"hash_full_prompts": "d0c82c066345c294",
"hash_input_tokens": "2a5a8fa143a3e366",
"hash_cont_tokens": "e3be0b55506ef44c"
},
"truncated": 0,
"non_truncated": 700,
"padded": 700,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:logical_deduction_three_objects|0": {
"hashes": {
"hash_examples": "0509e5712ab9bcdb",
"hash_full_prompts": "396c1e56901b46ed",
"hash_input_tokens": "d283f5bdd42808c8",
"hash_cont_tokens": "a220b50db36ea137"
},
"truncated": 0,
"non_truncated": 300,
"padded": 267,
"non_padded": 33,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:movie_recommendation|0": {
"hashes": {
"hash_examples": "530cc6f737830f45",
"hash_full_prompts": "e821384b2a44e36b",
"hash_input_tokens": "56a10c1f996222e1",
"hash_cont_tokens": "d0089dce2f90542e"
},
"truncated": 0,
"non_truncated": 500,
"padded": 500,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:navigate|0": {
"hashes": {
"hash_examples": "7962ef85d0058b9a",
"hash_full_prompts": "43248e6945903d81",
"hash_input_tokens": "2aaae2464dc9607f",
"hash_cont_tokens": "16ef089e27fd4ecd"
},
"truncated": 0,
"non_truncated": 1000,
"padded": 990,
"non_padded": 10,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:reasoning_about_colored_objects|0": {
"hashes": {
"hash_examples": "39be1ab1677a651d",
"hash_full_prompts": "7f7a503aaa70068f",
"hash_input_tokens": "5a80e15afd068078",
"hash_cont_tokens": "ee24e9acc1ef15a4"
},
"truncated": 0,
"non_truncated": 2000,
"padded": 1967,
"non_padded": 33,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:ruin_names|0": {
"hashes": {
"hash_examples": "e9b96b31d2154941",
"hash_full_prompts": "ae8931c806192844",
"hash_input_tokens": "77d76e99dbf7bb0a",
"hash_cont_tokens": "67949635d7b35df4"
},
"truncated": 0,
"non_truncated": 448,
"padded": 443,
"non_padded": 5,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:salient_translation_error_detection|0": {
"hashes": {
"hash_examples": "951ac59f7ad0427d",
"hash_full_prompts": "643d82c4ce3fab01",
"hash_input_tokens": "271f1700c9e45a53",
"hash_cont_tokens": "cb14b4ccac044cae"
},
"truncated": 0,
"non_truncated": 998,
"padded": 998,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:snarks|0": {
"hashes": {
"hash_examples": "3a53eb9b9d758534",
"hash_full_prompts": "b12bcea4b9bc9027",
"hash_input_tokens": "a86888a2c12977ba",
"hash_cont_tokens": "9f478582040dafd7"
},
"truncated": 0,
"non_truncated": 181,
"padded": 180,
"non_padded": 1,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:sports_understanding|0": {
"hashes": {
"hash_examples": "bd65741f00770373",
"hash_full_prompts": "39d7688aa2d209e1",
"hash_input_tokens": "832bb3d08dbb24da",
"hash_cont_tokens": "b18e483b3ae0b782"
},
"truncated": 0,
"non_truncated": 1000,
"padded": 1000,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:temporal_sequences|0": {
"hashes": {
"hash_examples": "1d13139f47cb2df7",
"hash_full_prompts": "1a874610f00343dc",
"hash_input_tokens": "c1b026d77ea90744",
"hash_cont_tokens": "17d2fa5ae6fa0ccc"
},
"truncated": 0,
"non_truncated": 1000,
"padded": 984,
"non_padded": 16,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": {
"hashes": {
"hash_examples": "8770a702a9646648",
"hash_full_prompts": "392b486c4039dca8",
"hash_input_tokens": "da593867fd57e3f0",
"hash_cont_tokens": "40bfd5cb8f4b187c"
},
"truncated": 0,
"non_truncated": 1250,
"padded": 1220,
"non_padded": 30,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": {
"hashes": {
"hash_examples": "b469b7d073824a59",
"hash_full_prompts": "1bad8a693cc74da1",
"hash_input_tokens": "aed2cf5daeb3e601",
"hash_cont_tokens": "2119f406f708fb05"
},
"truncated": 0,
"non_truncated": 1750,
"padded": 1693,
"non_padded": 57,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": {
"hashes": {
"hash_examples": "0509e5712ab9bcdb",
"hash_full_prompts": "396c1e56901b46ed",
"hash_input_tokens": "5c8605826bf7422b",
"hash_cont_tokens": "419f5cf4c2029190"
},
"truncated": 0,
"non_truncated": 300,
"padded": 264,
"non_padded": 36,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
}
},
"summary_general": {
"hashes": {
"hash_examples": "51a30c4501ba4586",
"hash_full_prompts": "96a511cab844bc38",
"hash_input_tokens": "f89badb832747d24",
"hash_cont_tokens": "50762c06a4e8bea3"
},
"truncated": 0,
"non_truncated": 13104,
"padded": 12882,
"non_padded": 222,
"num_truncated_few_shots": 0
}
}