Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 3,874 Bytes
e353a82 3388e82 e353a82 3388e82 e353a82 3388e82 e353a82 3388e82 11c0693 3388e82 11c0693 e353a82 3388e82 e353a82 3388e82 e353a82 3388e82 e353a82 3388e82 e353a82 3388e82 e353a82 3388e82 e353a82 3388e82 e353a82 3388e82 bb25558 3388e82 bb25558 3388e82 e353a82 3388e82 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import pandas as pd
import gradio as gr
import csv
import json
import os
import shutil
from huggingface_hub import Repository
HF_TOKEN = os.environ.get("HF_TOKEN")
SUBJECTS = ["Biology", "Business", "Chemistry", "Computer Science", "Economics", "Engineering",
"Health", "History", "Law", "Math", "Philosophy", "Physics", "Psychology", "Other"]
MODEL_INFO = [
"Models",
"Overall",
"Biology", "Business", "Chemistry", "Computer Science", "Economics", "Engineering",
"Health", "History", "Law", "Math", "Philosophy", "Physics", "Psychology", "Other"]
DATA_TITLE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number',
'number', 'number', 'number', 'number', 'number', 'number', 'number',
'number', 'number']
SUBMISSION_NAME = "mmlu_pro_leaderboard_submission"
SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/TIGER-Lab/", SUBMISSION_NAME)
CSV_DIR = "./mmlu_pro_leaderboard_submission/results.csv"
COLUMN_NAMES = MODEL_INFO
LEADERBOARD_INTRODUCTION = """# MMLU-Pro Leaderboard
Welcome to the MMLU-Pro leaderboard, showcasing the performance of various advanced language models on the MMLU-Pro dataset. The MMLU-Pro dataset is an enhanced version of the original MMLU, specifically engineered to offer a more rigorous and realistic evaluation environment..
The MMLU-Pro dataset consists of approximately 12,000 intricate questions that challenge the comprehension and reasoning abilities of LLMs. Below you can find the accuracies of different models tested on this dataset.
For detailed information about the dataset, visit our page on Hugging Face: MMLU-Pro at Hugging Face. If you are interested in replicating these results or wish to evaluate your models using our dataset, access our evaluation scripts available on GitHub: TIGER-AI-Lab/MMLU-Pro.
"""
TABLE_INTRODUCTION = """
"""
LEADERBOARD_INFO = """
We list the information of the used datasets as follows:<br>
"""
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r""""""
SUBMIT_INTRODUCTION = """# Submit on Science Leaderboard Introduction
## ⚠ Please note that you need to submit the json file with following format:
```json
{
"Model": "[MODEL_NAME]",
"Overall": 0.5678,
"Biology": 0.1234,
"Business": 0.4567,
...,
"Other: 0.3456"
}
```
After submitting, you can click the "Refresh" button to see the updated leaderboard (it may takes few seconds).
"""
def get_df():
repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
repo.git_pull()
df = pd.read_csv(CSV_DIR)
df = df.sort_values(by=['Overall'], ascending=False)
return df[COLUMN_NAMES]
def add_new_eval(
input_file,
):
if input_file is None:
return "Error! Empty file!"
upload_data = json.loads(input_file)
print("upload_data:\n", upload_data)
data_row = [f'{upload_data["Model"]}', upload_data['Overall']]
for subject in SUBJECTS:
data_row += [upload_data[subject]]
print("data_row:\n", data_row)
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL,
use_auth_token=HF_TOKEN, repo_type="dataset")
submission_repo.git_pull()
already_submitted = []
with open(CSV_DIR, mode='r') as file:
reader = csv.reader(file, delimiter=',')
for row in reader:
already_submitted.append(row[0])
if data_row[0] not in already_submitted:
with open(CSV_DIR, mode='a', newline='') as file:
writer = csv.writer(file)
writer.writerow(data_row)
submission_repo.push_to_hub()
print('Submission Successful')
else:
print('The entry already exists')
def refresh_data():
return get_df()
|