lewtun HF Staff commited on
Commit
fc33791
·
verified ·
1 Parent(s): 820f131

Upload eval_results/AI-MO/deepseek-math-7b-sft/aimo_v03.00/aimo_kaggle/results_2024-05-07T12-15-37.241134.json with huggingface_hub

Browse files
eval_results/AI-MO/deepseek-math-7b-sft/aimo_v03.00/aimo_kaggle/results_2024-05-07T12-15-37.241134.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config_general": {
3
+ "lighteval_sha": "?",
4
+ "num_fewshot_seeds": 1,
5
+ "override_batch_size": 4,
6
+ "max_samples": null,
7
+ "job_id": "",
8
+ "start_time": 437176.178939583,
9
+ "end_time": 437485.420318817,
10
+ "total_evaluation_time_secondes": "309.2413792340085",
11
+ "model_name": "AI-MO/deepseek-math-7b-sft",
12
+ "model_sha": "f500a523198914c8fa81f759aed450551064ab3a",
13
+ "model_dtype": "torch.bfloat16",
14
+ "model_size": "12.93 GB",
15
+ "config": null
16
+ },
17
+ "results": {
18
+ "custom|aimo_kaggle:train|0": {
19
+ "qem": 0.0,
20
+ "qem_stderr": 0.0
21
+ },
22
+ "custom|aimo_kaggle:valid|0": {
23
+ "qem": 0.28,
24
+ "qem_stderr": 0.06414269805898185
25
+ },
26
+ "custom|aimo_kaggle:_average|0": {
27
+ "qem": 0.14,
28
+ "qem_stderr": 0.03207134902949092
29
+ },
30
+ "all": {
31
+ "qem": 0.14,
32
+ "qem_stderr": 0.03207134902949092
33
+ }
34
+ },
35
+ "versions": {
36
+ "custom|aimo_kaggle:train|0": 0,
37
+ "custom|aimo_kaggle:valid|0": 0
38
+ },
39
+ "config_tasks": {
40
+ "custom|aimo_kaggle:train": {
41
+ "name": "aimo_kaggle:train",
42
+ "prompt_function": "kaggle_prompt_fn",
43
+ "hf_repo": "AI-MO/kaggle-train-set",
44
+ "hf_subset": "",
45
+ "metric": [
46
+ "quasi_exact_match_math"
47
+ ],
48
+ "hf_avail_splits": [
49
+ "train"
50
+ ],
51
+ "evaluation_splits": [
52
+ "train"
53
+ ],
54
+ "few_shots_split": null,
55
+ "few_shots_select": null,
56
+ "generation_size": 2048,
57
+ "stop_sequence": null,
58
+ "output_regex": null,
59
+ "num_samples": null,
60
+ "frozen": false,
61
+ "suite": [
62
+ "custom"
63
+ ],
64
+ "original_num_docs": 10,
65
+ "effective_num_docs": 10,
66
+ "trust_dataset": null,
67
+ "must_remove_duplicate_docs": null,
68
+ "version": 0
69
+ },
70
+ "custom|aimo_kaggle:valid": {
71
+ "name": "aimo_kaggle:valid",
72
+ "prompt_function": "kaggle_prompt_fn",
73
+ "hf_repo": "AI-MO/kaggle-validation-set",
74
+ "hf_subset": "v0",
75
+ "metric": [
76
+ "quasi_exact_match_math"
77
+ ],
78
+ "hf_avail_splits": [
79
+ "train"
80
+ ],
81
+ "evaluation_splits": [
82
+ "train"
83
+ ],
84
+ "few_shots_split": null,
85
+ "few_shots_select": null,
86
+ "generation_size": 2048,
87
+ "stop_sequence": null,
88
+ "output_regex": null,
89
+ "num_samples": null,
90
+ "frozen": false,
91
+ "suite": [
92
+ "custom"
93
+ ],
94
+ "original_num_docs": 50,
95
+ "effective_num_docs": 50,
96
+ "trust_dataset": null,
97
+ "must_remove_duplicate_docs": null,
98
+ "version": 0
99
+ }
100
+ },
101
+ "summary_tasks": {
102
+ "custom|aimo_kaggle:train|0": {
103
+ "hashes": {
104
+ "hash_examples": "3ef01feac22949ce",
105
+ "hash_full_prompts": "74c9baf89033f76a",
106
+ "hash_input_tokens": "2f2b429412d88494",
107
+ "hash_cont_tokens": "28f1d3fdc9ea3afd"
108
+ },
109
+ "truncated": 10,
110
+ "non_truncated": 0,
111
+ "padded": 6,
112
+ "non_padded": 4,
113
+ "effective_few_shots": 0.0,
114
+ "num_truncated_few_shots": 0
115
+ },
116
+ "custom|aimo_kaggle:valid|0": {
117
+ "hashes": {
118
+ "hash_examples": "2a919e6b839e921a",
119
+ "hash_full_prompts": "e0424ade9e31a0fe",
120
+ "hash_input_tokens": "7da1857b6831dd82",
121
+ "hash_cont_tokens": "9139787617b042e2"
122
+ },
123
+ "truncated": 48,
124
+ "non_truncated": 2,
125
+ "padded": 34,
126
+ "non_padded": 16,
127
+ "effective_few_shots": 0.0,
128
+ "num_truncated_few_shots": 0
129
+ }
130
+ },
131
+ "summary_general": {
132
+ "hashes": {
133
+ "hash_examples": "42ad458c3603ccb6",
134
+ "hash_full_prompts": "51040dc54a721b0c",
135
+ "hash_input_tokens": "1337f29495dfd28d",
136
+ "hash_cont_tokens": "a4f1caa708b78677"
137
+ },
138
+ "truncated": 58,
139
+ "non_truncated": 2,
140
+ "padded": 40,
141
+ "non_padded": 20,
142
+ "num_truncated_few_shots": 0
143
+ }
144
+ }