wenhuchen commited on
Commit
236a68e
·
1 Parent(s): 4abf394

update leaderboard

Browse files
Files changed (1) hide show
  1. utils.py +7 -8
utils.py CHANGED
@@ -14,9 +14,10 @@ MODEL_INFO = [
14
  "TheoremQA",
15
  "MATH",
16
  "GSM",
 
17
  ]
18
 
19
- DATA_TITILE_TYPE = ['markdown', 'number', 'number', 'number', 'number']
20
 
21
  SUBMISSION_NAME = "science_leaderboard_submission"
22
  SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/TIGER-Lab/", SUBMISSION_NAME)
@@ -64,13 +65,11 @@ GPQA: A Graduate-Level Google-Proof Q&A Benchmark<br>
64
  """
65
 
66
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
67
- CITATION_BUTTON_TEXT = r"""@article{hendrycks2measuring,
68
  title={Measuring Mathematical Problem Solving With the MATH Dataset},
69
  author={Hendrycks, Dan and Burns, Collin and Kadavath, Saurav and Arora, Akul and Basart, Steven and Tang, Eric and Song, Dawn and Steinhardt, Jacob},
70
- journal={Sort},
71
- volume={2},
72
- number={4},
73
- pages={0--6}
74
  }
75
  @article{cobbe2021training,
76
  title={Training verifiers to solve math word problems},
@@ -111,7 +110,7 @@ def get_df():
111
  repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
112
  repo.git_pull()
113
  df = pd.read_csv(CSV_DIR)
114
- df['Avg'] = df[['TheoremQA', 'MATH', 'GSM']].mean(axis=1).round(1)
115
  df = df.sort_values(by=['Avg'], ascending=False)
116
  return df[COLUMN_NAMES]
117
 
@@ -122,7 +121,7 @@ def add_new_eval(
122
  return "Error! Empty file!"
123
 
124
  upload_data=json.loads(input_file)
125
- data_row = [upload_data['ModelName'], upload_data['TheoremQA'], upload_data['MATH'], upload_data['GSM']]
126
 
127
  submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
128
  submission_repo.git_pull()
 
14
  "TheoremQA",
15
  "MATH",
16
  "GSM",
17
+ "GPQA",
18
  ]
19
 
20
+ DATA_TITILE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number']
21
 
22
  SUBMISSION_NAME = "science_leaderboard_submission"
23
  SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/TIGER-Lab/", SUBMISSION_NAME)
 
65
  """
66
 
67
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
68
+ CITATION_BUTTON_TEXT = r"""@inproceedings{hendrycks2021measuring,
69
  title={Measuring Mathematical Problem Solving With the MATH Dataset},
70
  author={Hendrycks, Dan and Burns, Collin and Kadavath, Saurav and Arora, Akul and Basart, Steven and Tang, Eric and Song, Dawn and Steinhardt, Jacob},
71
+ booktitle={Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)},
72
+ year={2021}
 
 
73
  }
74
  @article{cobbe2021training,
75
  title={Training verifiers to solve math word problems},
 
110
  repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
111
  repo.git_pull()
112
  df = pd.read_csv(CSV_DIR)
113
+ df['Avg'] = df[['TheoremQA', 'MATH', 'GSM', 'GPQA']].mean(axis=1).round(1)
114
  df = df.sort_values(by=['Avg'], ascending=False)
115
  return df[COLUMN_NAMES]
116
 
 
121
  return "Error! Empty file!"
122
 
123
  upload_data=json.loads(input_file)
124
+ data_row = [upload_data['ModelName'], upload_data['TheoremQA'], upload_data['MATH'], upload_data['GSM'], upload_data['GPQA']]
125
 
126
  submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
127
  submission_repo.git_pull()