Spaces:
Running
Running
wenhuchen
commited on
Commit
·
236a68e
1
Parent(s):
4abf394
update leaderboard
Browse files
utils.py
CHANGED
@@ -14,9 +14,10 @@ MODEL_INFO = [
|
|
14 |
"TheoremQA",
|
15 |
"MATH",
|
16 |
"GSM",
|
|
|
17 |
]
|
18 |
|
19 |
-
DATA_TITILE_TYPE = ['markdown', 'number', 'number', 'number', 'number']
|
20 |
|
21 |
SUBMISSION_NAME = "science_leaderboard_submission"
|
22 |
SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/TIGER-Lab/", SUBMISSION_NAME)
|
@@ -64,13 +65,11 @@ GPQA: A Graduate-Level Google-Proof Q&A Benchmark<br>
|
|
64 |
"""
|
65 |
|
66 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
67 |
-
CITATION_BUTTON_TEXT = r"""@
|
68 |
title={Measuring Mathematical Problem Solving With the MATH Dataset},
|
69 |
author={Hendrycks, Dan and Burns, Collin and Kadavath, Saurav and Arora, Akul and Basart, Steven and Tang, Eric and Song, Dawn and Steinhardt, Jacob},
|
70 |
-
|
71 |
-
|
72 |
-
number={4},
|
73 |
-
pages={0--6}
|
74 |
}
|
75 |
@article{cobbe2021training,
|
76 |
title={Training verifiers to solve math word problems},
|
@@ -111,7 +110,7 @@ def get_df():
|
|
111 |
repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
|
112 |
repo.git_pull()
|
113 |
df = pd.read_csv(CSV_DIR)
|
114 |
-
df['Avg'] = df[['TheoremQA', 'MATH', 'GSM']].mean(axis=1).round(1)
|
115 |
df = df.sort_values(by=['Avg'], ascending=False)
|
116 |
return df[COLUMN_NAMES]
|
117 |
|
@@ -122,7 +121,7 @@ def add_new_eval(
|
|
122 |
return "Error! Empty file!"
|
123 |
|
124 |
upload_data=json.loads(input_file)
|
125 |
-
data_row = [upload_data['ModelName'], upload_data['TheoremQA'], upload_data['MATH'], upload_data['GSM']]
|
126 |
|
127 |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
|
128 |
submission_repo.git_pull()
|
|
|
14 |
"TheoremQA",
|
15 |
"MATH",
|
16 |
"GSM",
|
17 |
+
"GPQA",
|
18 |
]
|
19 |
|
20 |
+
DATA_TITILE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number']
|
21 |
|
22 |
SUBMISSION_NAME = "science_leaderboard_submission"
|
23 |
SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/TIGER-Lab/", SUBMISSION_NAME)
|
|
|
65 |
"""
|
66 |
|
67 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
68 |
+
CITATION_BUTTON_TEXT = r"""@inproceedings{hendrycks2021measuring,
|
69 |
title={Measuring Mathematical Problem Solving With the MATH Dataset},
|
70 |
author={Hendrycks, Dan and Burns, Collin and Kadavath, Saurav and Arora, Akul and Basart, Steven and Tang, Eric and Song, Dawn and Steinhardt, Jacob},
|
71 |
+
booktitle={Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)},
|
72 |
+
year={2021}
|
|
|
|
|
73 |
}
|
74 |
@article{cobbe2021training,
|
75 |
title={Training verifiers to solve math word problems},
|
|
|
110 |
repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
|
111 |
repo.git_pull()
|
112 |
df = pd.read_csv(CSV_DIR)
|
113 |
+
df['Avg'] = df[['TheoremQA', 'MATH', 'GSM', 'GPQA']].mean(axis=1).round(1)
|
114 |
df = df.sort_values(by=['Avg'], ascending=False)
|
115 |
return df[COLUMN_NAMES]
|
116 |
|
|
|
121 |
return "Error! Empty file!"
|
122 |
|
123 |
upload_data=json.loads(input_file)
|
124 |
+
data_row = [upload_data['ModelName'], upload_data['TheoremQA'], upload_data['MATH'], upload_data['GSM'], upload_data['GPQA']]
|
125 |
|
126 |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
|
127 |
submission_repo.git_pull()
|