File size: 4,828 Bytes
b4851e0
 
87d617d
b4851e0
 
87d617d
 
 
 
 
 
 
 
 
 
 
b4851e0
87d617d
b4851e0
 
 
 
 
 
 
 
87d617d
5a15668
 
 
 
b4851e0
87d617d
 
 
 
 
 
 
 
 
 
b4851e0
87d617d
 
 
 
 
 
 
 
 
b4851e0
87d617d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4851e0
87d617d
b4851e0
87d617d
 
 
 
 
 
b4851e0
87d617d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4851e0
87d617d
 
 
 
 
 
 
 
 
 
 
 
b4851e0
87d617d
 
 
b4851e0
87d617d
b4851e0
 
87d617d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import json
import os
import re
from datetime import datetime, timezone

from src.challenges.result_parsers import parse_challenge_result_dict

# email validity checker
from email.utils import parseaddr

# url validity checker
from urllib.parse import urlparse

# json parser
from json.decoder import JSONDecodeError

from src.display.formatting import styled_error, styled_message, styled_warning
from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, DATA_REPO
from src.submission.check_validity import (
    already_submitted_models,
    check_model_card,
    get_model_size,
    is_model_on_hub,
)

def add_new_eval(
    submission_file,
    algo_name: str,
    algo_info: str,
    algo_link: str,
    submitter_email: str,
):
    return_str = 'Success! Your submission will soon be added to the leaderboard.'

    # validate email and url
    if not parseaddr(submitter_email):
        return styled_error("Please enter a valid email address.")
    
    if algo_link.strip() and not urlparse(algo_link).scheme:
        return styled_error("Please enter a valid URL.")
    
    # get file path
    try:
        file_path: str = submission_file.name,
        assert isinstance(file_path, str)
    except:
        if isinstance(submission_file, str):
            file_path: str = submission_file
        else:
            return styled_error("Invalid submission file: File path not found.")
    
    # parse the submission file
    try:
        submission_data = json.loads(file_path)
    except JSONDecodeError:
        return styled_error("Invalid submission file: JSON parsing failed.")
    
    try:
        assert isinstance(submission_data, dict)
        submission_data_content = list(submission_data.items())
        assert len(submission_data_content) == 1
        results_per_challenge = submission_data_content[0][1]
        assert isinstance(results_per_challenge, dict)
        assert all(isinstance(challenge, str) for challenge in results_per_challenge.keys())
        assert all(isinstance(result, dict) for result in results_per_challenge.values())
    except (AssertionError, KeyError):
        return styled_error("Invalid submission file: Incorrect organization of the JSON file.")
    
    # format the algo name
    algo_name = algo_name.strip()
    algo_name_filename = re.sub(r"[^a-zA-Z0-9]+", "-", algo_name).lower()
    timestamp_filename = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%S")
    
    print("Uploading submission file")
    API.upload_file(
        path_or_fileobj=file_path,
        path_in_repo=f'upload_history/{algo_name_filename}/{timestamp_filename}.json',
        repo_id=DATA_REPO,
        repo_type="dataset",
        commit_message=f"Add {algo_name} to eval queue by {submitter_email} at {timestamp_filename}",
    )

    # Construct entry in the master table
    eval_entry = {
        "name": algo_name,
        "id": algo_name_filename,
        "info": algo_info,
        "link": algo_link,
        "email": submitter_email,
        "update_timestamp": timestamp_filename,
    }
    
    for challenge, result in results_per_challenge:
        try:
            parsed_result: float = parse_challenge_result_dict(challenge, result)
            assert isinstance(parsed_result, float)
        except:
            return styled_error(f"Could not parse the score for {challenge}.")
        
        eval_entry[challenge] = parsed_result
    
    # Get content of the master table from DATA_REPO
    try:
        master_table = {}
        if API.file_exists(DATA_REPO, "master_table.json"):
            API.hf_hub_download(DATA_REPO, "master_table.json", EVAL_REQUESTS_PATH, force_download=True)
            with open(f"{EVAL_REQUESTS_PATH}/master_table.json", "r") as f:
                master_table = json.load(f)
        else:
            print("No master table found. Will create a new one.")
    except:
        return styled_error("Could not get the master table from the data repository.")
    
    # Check for duplicate submission
    if algo_name_filename in master_table:
        return_str += ' An existing submission with the same name has been found. Your submission will be used to update the existing one.'
        master_table[algo_name_filename].update(eval_entry)
    else:
        print("Creating eval entry")
        master_table[algo_name_filename] = eval_entry

    # Save the updated master table
    with open(f"./master_table.json", "w") as f:
        f.write(json.dumps(master_table))
    
    print("Uploading master table")
    API.upload_file(
        path_or_fileobj="./master_table.json",
        path_in_repo="master_table.json",
        repo_id=DATA_REPO,
        repo_type="dataset",
        commit_message=f"Update master table with {algo_name} by {submitter_email} at {timestamp_filename}",
    )

    return styled_message(return_str)