edbeeching HF Staff commited on
Commit
6fb6254
·
verified ·
1 Parent(s): 0474016

Delete eval_results/HuggingFaceTB

Browse files
eval_results/HuggingFaceTB/SmolLM2-1.7B-Instruct/main/gsm8k/results_2025-02-12T14-56-55.504908.json DELETED
@@ -1,113 +0,0 @@
1
- {
2
- "config_general": {
3
- "lighteval_sha": "?",
4
- "num_fewshot_seeds": 1,
5
- "override_batch_size": -1,
6
- "max_samples": null,
7
- "job_id": 0,
8
- "start_time": 662980.141519316,
9
- "end_time": 663183.167246989,
10
- "total_evaluation_time_secondes": "203.02572767296806",
11
- "model_name": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
12
- "model_sha": "",
13
- "model_dtype": null,
14
- "model_size": null
15
- },
16
- "results": {
17
- "lighteval|gsm8k|0": {
18
- "maj@8": 0.04700530705079606,
19
- "maj@8_stderr": 0.005829898355937189,
20
- "qem": 0.09097801364670205,
21
- "qem_stderr": 0.007921322844013654
22
- },
23
- "all": {
24
- "maj@8": 0.04700530705079606,
25
- "maj@8_stderr": 0.005829898355937189,
26
- "qem": 0.09097801364670205,
27
- "qem_stderr": 0.007921322844013654
28
- }
29
- },
30
- "versions": {
31
- "lighteval|gsm8k|0": 0
32
- },
33
- "config_tasks": {
34
- "lighteval|gsm8k": {
35
- "name": "gsm8k",
36
- "prompt_function": "gsm8k",
37
- "hf_repo": "gsm8k",
38
- "hf_subset": "main",
39
- "metric": [
40
- {
41
- "metric_name": "qem",
42
- "higher_is_better": true,
43
- "category": "3",
44
- "use_case": "5",
45
- "sample_level_fn": "compute",
46
- "corpus_level_fn": "mean"
47
- },
48
- {
49
- "metric_name": "maj@8",
50
- "higher_is_better": true,
51
- "category": "5",
52
- "use_case": "5",
53
- "sample_level_fn": "compute",
54
- "corpus_level_fn": "mean"
55
- }
56
- ],
57
- "hf_revision": null,
58
- "hf_filter": null,
59
- "hf_avail_splits": [
60
- "train",
61
- "test"
62
- ],
63
- "trust_dataset": true,
64
- "evaluation_splits": [
65
- "test"
66
- ],
67
- "few_shots_split": null,
68
- "few_shots_select": "random_sampling_from_train",
69
- "generation_size": 256,
70
- "generation_grammar": null,
71
- "stop_sequence": [
72
- "Question="
73
- ],
74
- "num_samples": null,
75
- "suite": [
76
- "lighteval"
77
- ],
78
- "original_num_docs": 1319,
79
- "effective_num_docs": 1319,
80
- "must_remove_duplicate_docs": false,
81
- "version": 0
82
- }
83
- },
84
- "summary_tasks": {
85
- "lighteval|gsm8k|0": {
86
- "hashes": {
87
- "hash_examples": "7957ba36acab8574",
88
- "hash_full_prompts": "1dad1de90e245444",
89
- "hash_input_tokens": "acac8977121c9ea9",
90
- "hash_cont_tokens": "bf643cf9dc530426"
91
- },
92
- "truncated": 0,
93
- "non_truncated": 2638,
94
- "padded": 0,
95
- "non_padded": 2638,
96
- "effective_few_shots": 0.0,
97
- "num_truncated_few_shots": 0
98
- }
99
- },
100
- "summary_general": {
101
- "hashes": {
102
- "hash_examples": "e99e29b2d4ab55c1",
103
- "hash_full_prompts": "04c6169702a9369e",
104
- "hash_input_tokens": "39f93cc19f3f49c9",
105
- "hash_cont_tokens": "1d791bf43ab6f002"
106
- },
107
- "truncated": 0,
108
- "non_truncated": 2638,
109
- "padded": 0,
110
- "non_padded": 2638,
111
- "num_truncated_few_shots": 0
112
- }
113
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eval_results/HuggingFaceTB/SmolLM2-1.7B-Instruct/main/gsm8k/results_2025-02-12T15-02-58.450524.json DELETED
@@ -1,113 +0,0 @@
1
- {
2
- "config_general": {
3
- "lighteval_sha": "?",
4
- "num_fewshot_seeds": 1,
5
- "override_batch_size": -1,
6
- "max_samples": null,
7
- "job_id": 0,
8
- "start_time": 343146.31067292,
9
- "end_time": 343349.693702219,
10
- "total_evaluation_time_secondes": "203.38302929897327",
11
- "model_name": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
12
- "model_sha": "",
13
- "model_dtype": null,
14
- "model_size": null
15
- },
16
- "results": {
17
- "lighteval|gsm8k|0": {
18
- "maj@8": 0.000758150113722517,
19
- "maj@8_stderr": 0.0007581501137225375,
20
- "qem": 0.000758150113722517,
21
- "qem_stderr": 0.0007581501137225247
22
- },
23
- "all": {
24
- "maj@8": 0.000758150113722517,
25
- "maj@8_stderr": 0.0007581501137225375,
26
- "qem": 0.000758150113722517,
27
- "qem_stderr": 0.0007581501137225247
28
- }
29
- },
30
- "versions": {
31
- "lighteval|gsm8k|0": 0
32
- },
33
- "config_tasks": {
34
- "lighteval|gsm8k": {
35
- "name": "gsm8k",
36
- "prompt_function": "gsm8k",
37
- "hf_repo": "gsm8k",
38
- "hf_subset": "main",
39
- "metric": [
40
- {
41
- "metric_name": "qem",
42
- "higher_is_better": true,
43
- "category": "3",
44
- "use_case": "5",
45
- "sample_level_fn": "compute",
46
- "corpus_level_fn": "mean"
47
- },
48
- {
49
- "metric_name": "maj@8",
50
- "higher_is_better": true,
51
- "category": "5",
52
- "use_case": "5",
53
- "sample_level_fn": "compute",
54
- "corpus_level_fn": "mean"
55
- }
56
- ],
57
- "hf_revision": null,
58
- "hf_filter": null,
59
- "hf_avail_splits": [
60
- "train",
61
- "test"
62
- ],
63
- "trust_dataset": true,
64
- "evaluation_splits": [
65
- "test"
66
- ],
67
- "few_shots_split": null,
68
- "few_shots_select": "random_sampling_from_train",
69
- "generation_size": 256,
70
- "generation_grammar": null,
71
- "stop_sequence": [
72
- "Question="
73
- ],
74
- "num_samples": null,
75
- "suite": [
76
- "lighteval"
77
- ],
78
- "original_num_docs": 1319,
79
- "effective_num_docs": 1319,
80
- "must_remove_duplicate_docs": false,
81
- "version": 0
82
- }
83
- },
84
- "summary_tasks": {
85
- "lighteval|gsm8k|0": {
86
- "hashes": {
87
- "hash_examples": "7957ba36acab8574",
88
- "hash_full_prompts": "10c92cb4d0869dd2",
89
- "hash_input_tokens": "bb685ca93dc87ad9",
90
- "hash_cont_tokens": "dd1c8df7717b0659"
91
- },
92
- "truncated": 0,
93
- "non_truncated": 2638,
94
- "padded": 0,
95
- "non_padded": 2638,
96
- "effective_few_shots": 0.0,
97
- "num_truncated_few_shots": 0
98
- }
99
- },
100
- "summary_general": {
101
- "hashes": {
102
- "hash_examples": "e99e29b2d4ab55c1",
103
- "hash_full_prompts": "087dc6ae8b324c9a",
104
- "hash_input_tokens": "0f08978b7c932878",
105
- "hash_cont_tokens": "968d36161d6c64cd"
106
- },
107
- "truncated": 0,
108
- "non_truncated": 2638,
109
- "padded": 0,
110
- "non_padded": 2638,
111
- "num_truncated_few_shots": 0
112
- }
113
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eval_results/HuggingFaceTB/SmolLM2-1.7B-Instruct/main/gsm8k/results_2025-02-12T15-44-04.626650.json DELETED
@@ -1,99 +0,0 @@
1
- {
2
- "config_general": {
3
- "lighteval_sha": "?",
4
- "num_fewshot_seeds": 1,
5
- "override_batch_size": -1,
6
- "max_samples": null,
7
- "job_id": 0,
8
- "start_time": 665894.934985964,
9
- "end_time": 666012.292897578,
10
- "total_evaluation_time_secondes": "117.35791161400266",
11
- "model_name": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
12
- "model_sha": "",
13
- "model_dtype": null,
14
- "model_size": null
15
- },
16
- "results": {
17
- "custom|gsm8k|0": {
18
- "extractive_match": 0.3169067475360121,
19
- "extractive_match_stderr": 0.012815868296721378
20
- },
21
- "all": {
22
- "extractive_match": 0.3169067475360121,
23
- "extractive_match_stderr": 0.012815868296721378
24
- }
25
- },
26
- "versions": {
27
- "custom|gsm8k|0": 0
28
- },
29
- "config_tasks": {
30
- "custom|gsm8k": {
31
- "name": "gsm8k",
32
- "prompt_function": "gsm8k_prompt_fn",
33
- "hf_repo": "gsm8k",
34
- "hf_subset": "main",
35
- "metric": [
36
- {
37
- "metric_name": "extractive_match",
38
- "higher_is_better": true,
39
- "category": "3",
40
- "use_case": "1",
41
- "sample_level_fn": "sample_level_fn",
42
- "corpus_level_fn": "mean"
43
- }
44
- ],
45
- "hf_revision": null,
46
- "hf_filter": null,
47
- "hf_avail_splits": [
48
- "train",
49
- "test"
50
- ],
51
- "trust_dataset": false,
52
- "evaluation_splits": [
53
- "test"
54
- ],
55
- "few_shots_split": null,
56
- "few_shots_select": null,
57
- "generation_size": 8192,
58
- "generation_grammar": null,
59
- "stop_sequence": [],
60
- "num_samples": null,
61
- "suite": [
62
- "custom"
63
- ],
64
- "original_num_docs": 1319,
65
- "effective_num_docs": 1319,
66
- "must_remove_duplicate_docs": false,
67
- "version": 0
68
- }
69
- },
70
- "summary_tasks": {
71
- "custom|gsm8k|0": {
72
- "hashes": {
73
- "hash_examples": "4c0843a5d99bcfdc",
74
- "hash_full_prompts": "c53d40c3dafc71d9",
75
- "hash_input_tokens": "df9416030feff162",
76
- "hash_cont_tokens": "569fe4ad83cf9673"
77
- },
78
- "truncated": 0,
79
- "non_truncated": 1319,
80
- "padded": 0,
81
- "non_padded": 1319,
82
- "effective_few_shots": 0.0,
83
- "num_truncated_few_shots": 0
84
- }
85
- },
86
- "summary_general": {
87
- "hashes": {
88
- "hash_examples": "18b756b7813d1bdf",
89
- "hash_full_prompts": "bbae0e5faff2dc56",
90
- "hash_input_tokens": "6d816743a921fc1e",
91
- "hash_cont_tokens": "4fe00ed950244f12"
92
- },
93
- "truncated": 0,
94
- "non_truncated": 1319,
95
- "padded": 0,
96
- "non_padded": 1319,
97
- "num_truncated_few_shots": 0
98
- }
99
- }