Spaces:

GEM
/

submission-form

Runtime error

File size: 5,672 Bytes

import json
import os
import re
import shutil
import subprocess
from datetime import datetime
from pathlib import Path

import requests
import streamlit as st
from dotenv import load_dotenv
from huggingface_hub import HfApi, Repository

from validate import validate_submission

if Path(".env").is_file():
    load_dotenv(".env")

HF_TOKEN = os.getenv("HF_TOKEN")
AUTONLP_USERNAME = os.getenv("AUTONLP_USERNAME")
HF_AUTONLP_BACKEND_API = os.getenv("HF_AUTONLP_BACKEND_API")
LOCAL_REPO = "submission_repo"


def get_auth_headers(token: str, prefix: str = "autonlp"):
    return {"Authorization": f"{prefix} {token}"}


def http_post(
    path: str,
    token: str,
    payload=None,
    domain: str = HF_AUTONLP_BACKEND_API,
    suppress_logs: bool = False,
    **kwargs,
) -> requests.Response:
    """HTTP POST request to the AutoNLP API, raises UnreachableAPIError if the API cannot be reached"""
    try:
        response = requests.post(
            url=domain + path, json=payload, headers=get_auth_headers(token=token), allow_redirects=True, **kwargs
        )
    except requests.exceptions.ConnectionError:
        print("❌ Failed to reach AutoNLP API, check your internet connection")
    response.raise_for_status()
    return response


###########
### APP ###
###########
st.title("GEM Submissions")
st.markdown(
    """
Welcome to the [GEM benchmark](https://gem-benchmark.com/)! GEM is a benchmark environment for Natural Language Generation with a focus on its Evaluation, both through human annotations and automated Metrics.

GEM aims to:

- measure NLG progress across many NLG tasks across languages.
- audit data and models and present results via data cards and model robustness reports.
- develop standards for evaluation of generated text using both automated and human metrics.

Use this page to submit your predcitions to the benchmark.
"""
)

with st.form(key="form"):
    # Flush local repo
    shutil.rmtree(LOCAL_REPO, ignore_errors=True)
    uploaded_file = st.file_uploader("Upload submission.json file", type=["json"])

    if uploaded_file is not None:
        data = str(uploaded_file.read(), "utf-8")
        json_data = json.loads(data)

    with st.expander("Submission format"):
        st.markdown(
            """
        Please follow this JSON format for your `submission.json` file:

        ```json
        {
        "submission_name": "An identifying name of your system",
        "param_count": 123, # The number of parameters your system has.
        "description": "An optional brief description of the system that will be shown on the results page",
        "tasks":
            {
            "dataset_identifier": {
                "values": ["output1", "output2", "..."], # A list of system outputs.
                # Optionally, you can add the keys which are part of an example to ensure that there is no shuffling mistakes.
                "keys": ["key-0", "key-1", ...]
                }
            }
        }
        ```
        In this case, `dataset_identifier` is the identifier of the dataset
        followed by an identifier of the set the outputs were created from, for
        example `_validation` or `_test`. For example, the `mlsum_de` test set
        would have the identifier `mlsum_de_test`. The `keys` field can be set
        to avoid accidental shuffling to impact your metrics. Simply add a list
        of the `gem_id` for each output example in the same order as your
        values. Please see the sample submission below:
        """
        )
        with open("sample-submission.json", "r") as f:
            example_submission = json.load(f)
            st.json(example_submission)

    token = st.text_input(
        "Enter 🤗 Hub access token",
        type="password",
        help="You can generate an access token via your 🤗 Hub settings. See the [docs](https://huggingface.co/docs/hub/security#user-access-tokens) for more details",
    )

    submit_button = st.form_submit_button("Make Submission")

if submit_button:
    validate_submission(json_data)
    user_info = HfApi().whoami(token)
    user_name = user_info["name"]
    submission_name = json_data["submission_name"]

    # Create submission dataset under benchmarks ORG
    dataset_repo_url = f"https://huggingface.co/datasets/benchmarks/gem-{user_name}"
    repo = Repository(
        local_dir=LOCAL_REPO, clone_from=dataset_repo_url, repo_type="dataset", private=True, use_auth_token=HF_TOKEN
    )
    submission_metadata = {"benchmark": "gem", "type": "prediction", "submission_name": submission_name}
    repo.repocard_metadata_save(submission_metadata)

    with open(f"{LOCAL_REPO}/submission.json", "w", encoding="utf-8") as f:
        json.dump(json_data, f)

    # TODO: add informative commit msg
    commit_url = repo.push_to_hub()
    if commit_url is not None:
        commit_sha = commit_url.split("/")[-1]
    else:
        commit_sha = repo.git_head_commit_url().split("/")[-1]

    submission_time = str(int(datetime.now().timestamp()))
    submission_id = submission_name + "__" + commit_sha + "__" + submission_time

    payload = {
        "username": AUTONLP_USERNAME,
        "dataset": "GEM/references",
        "task": 1,
        "model": "gem",
        "submission_dataset": f"benchmarks/gem-{user_name}",
        "submission_id": submission_id,
        "col_mapping": {},
        "split": "test",
        "config": None,
    }
    json_resp = http_post(path="/evaluate/create", payload=payload, token=HF_TOKEN).json()
    st.write(json_data["submission_name"])
    st.write(commit_sha)
    st.write(json_resp)

    # Flush local repo
    shutil.rmtree(LOCAL_REPO, ignore_errors=True)