Spaces:
Runtime error
Runtime error
File size: 3,300 Bytes
212696e be48d91 212696e be48d91 212696e b727941 212696e b727941 be48d91 85fb5e3 b727941 212696e b727941 212696e cc17f27 b727941 212696e b727941 be48d91 b727941 cc17f27 212696e b727941 be48d91 b727941 be48d91 b727941 be48d91 85fb5e3 b727941 212696e b727941 212696e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import os
from datetime import datetime
from pathlib import Path
from re import sub
import pandas as pd
import requests
import streamlit as st
from datasets import get_dataset_config_names
from dotenv import load_dotenv
if Path(".env").is_file():
load_dotenv(".env")
auth_token = os.getenv("HF_HUB_TOKEN")
header = {"Authorization": "Bearer " + auth_token}
TASKS = get_dataset_config_names("ought/raft")
# TODO(lewtun): Evaluate with new subtasks and remove this filter
TASKS_TO_EXCLUDE = ["gpai_initiatives", "ade_corpus_v2", "tweet_eval_hate"]
TASKS = [t for t in TASKS if t not in TASKS_TO_EXCLUDE]
# Split and capitalize the task names, e.g. banking_77 => Banking 77
FORMATTED_TASK_NAMES = [" ".join(t.capitalize() for t in task.split("_")) for task in TASKS]
def extract_tags(dataset):
tags = {}
for tag in dataset["tags"]:
k, v = tuple(tag.split(":", 1))
tags[k] = v
return tags
def download_submissions():
response = requests.get("http://huggingface.co/api/datasets", headers=header)
all_datasets = response.json()
submissions = []
for dataset in all_datasets:
tags = extract_tags(dataset)
if tags.get("benchmark") == "ought/raft" and tags.get("type") == "evaluation":
submissions.append(dataset)
submissions = sorted(submissions, key=lambda x: int(x["id"].split("-")[-1]))
return submissions
def format_submissions(submissions):
submission_data = {**{"Submission": []}, **{"Date": []}, **{t: [] for t in TASKS}}
# TODO(lewtun): delete / filter all the junk repos from development
# The following picks the latest submissions which adhere to the model card schema
for submission in submissions[-2:]:
submission_id = submission["id"]
response = requests.get(
f"http://huggingface.co/api/datasets/{submission_id}?full=true",
headers=header,
)
data = response.json()
card_data = data["card_data"]
submission_name = card_data["submission_dataset"]
submission_data["Submission"].append(submission_name)
submission_id = card_data["submission_id"]
timestamp = submission_id.split("-")[-1]
timestamp = pd.to_datetime(int(timestamp))
submission_data["Date"].append(datetime.date(timestamp))
for task in card_data["results"]:
task_data = task["task"]
task_name = task_data["name"]
# TODO(lewtun): Evaluate with new subtasks and remove this filter
if task_name in TASKS_TO_EXCLUDE:
continue
score = task_data["metrics"][0]["value"]
submission_data[task_name].append(score)
df = pd.DataFrame(submission_data)
df.insert(2, "Overall", df[TASKS].mean(axis=1))
df = df.copy().sort_values("Overall", ascending=False).reset_index().rename(columns={"index": "Rank"})
df.rename(columns={k: v for k, v in zip(TASKS, FORMATTED_TASK_NAMES)}, inplace=True)
return df
###########
### APP ###
###########
st.set_page_config(layout="wide")
st.title("RAFT Leaderboard")
submissions = download_submissions()
df = format_submissions(submissions)
# hack to remove index column from https://github.com/streamlit/streamlit/issues/641
st.table(df.assign(hack="").set_index("hack"))
|