File size: 2,953 Bytes
212696e
be48d91
212696e
be48d91
212696e
 
 
 
b727941
212696e
 
 
 
 
b727941
 
 
 
85fb5e3
 
b727941
 
 
 
 
 
 
 
 
212696e
 
 
 
 
 
 
 
b727941
 
212696e
cc17f27
b727941
212696e
b727941
 
be48d91
b727941
 
 
cc17f27
212696e
 
 
 
 
 
b727941
 
 
be48d91
 
 
 
b727941
 
 
 
 
 
 
 
be48d91
85fb5e3
 
b727941
212696e
 
 
 
 
 
 
b727941
 
212696e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
from datetime import datetime
from pathlib import Path
from re import sub

import pandas as pd
import requests
import streamlit as st
from datasets import get_dataset_config_names
from dotenv import load_dotenv

if Path(".env").is_file():
    load_dotenv(".env")

auth_token = os.getenv("HF_HUB_TOKEN")
header = {"Authorization": "Bearer " + auth_token}

TASKS = get_dataset_config_names("ought/raft")
# Split and capitalize the task names, e.g. banking_77 => Banking 77
FORMATTED_TASK_NAMES = [" ".join(t.capitalize() for t in task.split("_")) for task in TASKS]


def extract_tags(dataset):
    tags = {}
    for tag in dataset["tags"]:
        k, v = tuple(tag.split(":", 1))
        tags[k] = v
    return tags


def download_submissions():
    response = requests.get("http://huggingface.co/api/datasets", headers=header)
    all_datasets = response.json()

    submissions = []

    for dataset in all_datasets:
        tags = extract_tags(dataset)
        if tags.get("benchmark") == "ought/raft" and tags.get("type") == "evaluation":
            submissions.append(dataset)
    submissions = sorted(submissions, key=lambda x: int(x["id"].split("-")[-1]))
    return submissions


def format_submissions(submissions):
    submission_data = {**{"Submission": []}, **{"Date": []}, **{t: [] for t in TASKS}}

    # TODO(lewtun): delete / filter all the junk repos from development
    # The following picks the latest submissions which adhere to the model card schema
    for submission in submissions[-2:]:
        submission_id = submission["id"]
        response = requests.get(
            f"http://huggingface.co/api/datasets/{submission_id}?full=true",
            headers=header,
        )
        data = response.json()
        card_data = data["card_data"]
        submission_name = card_data["submission_dataset"]
        submission_data["Submission"].append(submission_name)
        submission_id = card_data["submission_id"]
        timestamp = submission_id.split("-")[-1]
        timestamp = pd.to_datetime(int(timestamp))
        submission_data["Date"].append(datetime.date(timestamp))

        for task in card_data["results"]:
            task_data = task["task"]
            task_name = task_data["name"]
            score = task_data["metrics"][0]["value"]
            submission_data[task_name].append(score)

    df = pd.DataFrame(submission_data)
    df.insert(2, "Overall", df[TASKS].mean(axis=1))
    df = df.copy().sort_values("Overall", ascending=False).reset_index().rename(columns={"index": "Rank"})
    df.rename(columns={k: v for k, v in zip(TASKS, FORMATTED_TASK_NAMES)}, inplace=True)
    return df


###########
### APP ###
###########
st.set_page_config(layout="wide")
st.title("RAFT Leaderboard")
submissions = download_submissions()
df = format_submissions(submissions)
# hack to remove index column from https://github.com/streamlit/streamlit/issues/641
st.table(df.assign(hack="").set_index("hack"))