Adam Jirkovsky
commited on
Commit
·
0e88066
1
Parent(s):
ae3034b
Submit error handling
Browse files- src/submission/submit.py +171 -168
src/submission/submit.py
CHANGED
@@ -29,176 +29,179 @@ def add_new_eval(
|
|
29 |
hf_model_id: str,
|
30 |
contact_email: str
|
31 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
}
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
#for input_col in results.keys():
|
67 |
-
# if input_col not in BENCHMARK_COLS:
|
68 |
-
# print(input_col)
|
69 |
-
# return styled_error(f'Missing: {input_col}')
|
70 |
-
#ret.update({i:j['acc,none'] for i,j in results.items()})
|
71 |
-
# fake data for testing...
|
72 |
-
#ret.update({i:round(np.random.normal(1, 0.5, 1)[0], 2) for i,j in results.items()})
|
73 |
-
|
74 |
-
user_name = "czechbench_leaderboard"
|
75 |
-
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
76 |
-
|
77 |
-
existing_eval_names = []
|
78 |
-
for fname in glob(f"{OUT_DIR}/*.json"):
|
79 |
-
with open(fname, mode="r") as f:
|
80 |
-
existing_eval = json.load(f)
|
81 |
-
existing_eval_names.append(existing_eval['eval_name'])
|
82 |
-
|
83 |
-
if ret['eval_name'] in existing_eval_names:
|
84 |
-
print(f"Model name {ret['eval_name']} is used!")
|
85 |
-
return styled_error(f"Model name {ret['eval_name']} is used!")
|
86 |
-
|
87 |
-
out_path = f"{OUT_DIR}/{eval_name}_eval_request.json"
|
88 |
-
|
89 |
-
with open(out_path, "w") as f:
|
90 |
-
f.write(json.dumps(ret))
|
91 |
-
|
92 |
-
|
93 |
-
print("Uploading eval file")
|
94 |
-
|
95 |
-
print("path_or_fileobj: ", out_path)
|
96 |
-
print("path_in_repo: ",out_path.split("eval-queue/")[1])
|
97 |
-
print("repo_id: ", RESULTS_REPO)
|
98 |
-
print("repo_type: ", "dataset")
|
99 |
-
|
100 |
-
response = API.upload_file(
|
101 |
-
path_or_fileobj=out_path,
|
102 |
-
path_in_repo=out_path.split("eval-queue/")[1],
|
103 |
-
repo_id=RESULTS_REPO,
|
104 |
-
repo_type="dataset",
|
105 |
-
commit_message=f"Add {eval_name} to eval queue",
|
106 |
-
)
|
107 |
-
|
108 |
-
"""
|
109 |
-
global REQUESTED_MODELS
|
110 |
-
global USERS_TO_SUBMISSION_DATES
|
111 |
-
if not REQUESTED_MODELS:
|
112 |
-
REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
|
113 |
-
|
114 |
-
user_name = ""
|
115 |
-
model_path = model
|
116 |
-
if "/" in model:
|
117 |
-
user_name = model.split("/")[0]
|
118 |
-
model_path = model.split("/")[1]
|
119 |
-
|
120 |
-
precision = precision.split(" ")[0]
|
121 |
-
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
122 |
-
|
123 |
-
if model_type is None or model_type == "":
|
124 |
-
return styled_error("Please select a model type.")
|
125 |
-
|
126 |
-
# Does the model actually exist?
|
127 |
-
if revision == "":
|
128 |
-
revision = "main"
|
129 |
-
|
130 |
-
# Is the model on the hub?
|
131 |
-
if weight_type in ["Delta", "Adapter"]:
|
132 |
-
base_model_on_hub, error, _ = is_model_on_hub(
|
133 |
-
model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True
|
134 |
)
|
135 |
-
if not base_model_on_hub:
|
136 |
-
return styled_error(f'Base model "{base_model}" {error}')
|
137 |
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
return styled_error("Could not get your model information. Please fill it up properly.")
|
148 |
-
|
149 |
-
model_size = get_model_size(model_info=model_info, precision=precision)
|
150 |
-
|
151 |
-
# Were the model card and license filled?
|
152 |
-
try:
|
153 |
-
license = model_info.cardData["license"]
|
154 |
-
except Exception:
|
155 |
-
return styled_error("Please select a license for your model")
|
156 |
-
|
157 |
-
modelcard_OK, error_msg = check_model_card(model)
|
158 |
-
if not modelcard_OK:
|
159 |
-
return styled_error(error_msg)
|
160 |
-
|
161 |
-
# Seems good, creating the eval
|
162 |
-
print("Adding new eval")
|
163 |
-
|
164 |
-
eval_entry = {
|
165 |
-
"model": model,
|
166 |
-
"base_model": base_model,
|
167 |
-
"revision": revision,
|
168 |
-
"precision": precision,
|
169 |
-
"weight_type": weight_type,
|
170 |
-
"status": "PENDING",
|
171 |
-
"submitted_time": current_time,
|
172 |
-
"model_type": model_type,
|
173 |
-
"likes": model_info.likes,
|
174 |
-
"params": model_size,
|
175 |
-
"license": license,
|
176 |
-
}
|
177 |
-
|
178 |
-
# Check for duplicate submission
|
179 |
-
if f"{model}_{revision}_{precision}" in REQUESTED_MODELS:
|
180 |
-
return styled_warning("This model has been already submitted.")
|
181 |
-
|
182 |
-
print("Creating eval file")
|
183 |
-
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
184 |
-
os.makedirs(OUT_DIR, exist_ok=True)
|
185 |
-
out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"
|
186 |
-
|
187 |
-
with open(out_path, "w") as f:
|
188 |
-
f.write(json.dumps(eval_entry))
|
189 |
-
|
190 |
-
print("Uploading eval file")
|
191 |
-
API.upload_file(
|
192 |
-
path_or_fileobj=out_path,
|
193 |
-
path_in_repo=out_path.split("eval-queue/")[1],
|
194 |
-
repo_id=QUEUE_REPO,
|
195 |
-
repo_type="dataset",
|
196 |
-
commit_message=f"Add {model} to eval queue",
|
197 |
-
)
|
198 |
-
|
199 |
-
# Remove the local file
|
200 |
-
os.remove(out_path)
|
201 |
-
"""
|
202 |
-
return styled_message(
|
203 |
-
"Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
|
204 |
-
), "", "", "", ""
|
|
|
29 |
hf_model_id: str,
|
30 |
contact_email: str
|
31 |
):
|
32 |
+
try:
|
33 |
+
with open(upload, mode="r") as f:
|
34 |
+
data = json.load(f)
|
35 |
+
|
36 |
+
results = data['results']
|
37 |
+
|
38 |
+
acc_keys = ['exact_match,none', 'exact_match,flexible-extract', 'exact_match,strict-match']
|
39 |
+
|
40 |
+
ret = {
|
41 |
+
'eval_name': eval_name,
|
42 |
+
'precision': precision,
|
43 |
+
'hf_model_id': hf_model_id,
|
44 |
+
'contact_email': contact_email
|
45 |
+
}
|
46 |
+
|
47 |
+
for k, v in results.items():
|
48 |
+
for acc_k in acc_keys:
|
49 |
+
if acc_k in v and k in BENCHMARK_COLS:
|
50 |
+
ret[k] = v[acc_k]
|
51 |
+
#validation
|
52 |
+
for k,v in ret.items():
|
53 |
+
if k in ['eval_name', 'precision', 'hf_model_id', 'contact_email']:
|
54 |
+
continue
|
55 |
+
if k not in BENCHMARK_COLS:
|
56 |
+
print(f"Missing: {k}")
|
57 |
+
return styled_error(f'Missing: {k}')
|
58 |
+
|
59 |
+
if len(BENCHMARK_COLS) != len(ret) - 4:
|
60 |
+
print(f"Missing columns")
|
61 |
+
return styled_error(f'Missing columns')
|
62 |
+
|
63 |
+
# TODO add complex validation
|
64 |
+
#print(results.keys())
|
65 |
+
#print(BENCHMARK_COLS)
|
66 |
+
#for input_col in results.keys():
|
67 |
+
# if input_col not in BENCHMARK_COLS:
|
68 |
+
# print(input_col)
|
69 |
+
# return styled_error(f'Missing: {input_col}')
|
70 |
+
#ret.update({i:j['acc,none'] for i,j in results.items()})
|
71 |
+
# fake data for testing...
|
72 |
+
#ret.update({i:round(np.random.normal(1, 0.5, 1)[0], 2) for i,j in results.items()})
|
73 |
+
|
74 |
+
user_name = "czechbench_leaderboard"
|
75 |
+
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
76 |
+
|
77 |
+
existing_eval_names = []
|
78 |
+
for fname in glob(f"{OUT_DIR}/*.json"):
|
79 |
+
with open(fname, mode="r") as f:
|
80 |
+
existing_eval = json.load(f)
|
81 |
+
existing_eval_names.append(existing_eval['eval_name'])
|
82 |
+
|
83 |
+
if ret['eval_name'] in existing_eval_names:
|
84 |
+
print(f"Model name {ret['eval_name']} is used!")
|
85 |
+
return styled_error(f"Model name {ret['eval_name']} is used!")
|
86 |
+
|
87 |
+
out_path = f"{OUT_DIR}/{eval_name}_eval_request.json"
|
88 |
+
|
89 |
+
with open(out_path, "w") as f:
|
90 |
+
f.write(json.dumps(ret))
|
91 |
+
|
92 |
+
|
93 |
+
print("Uploading eval file")
|
94 |
+
|
95 |
+
print("path_or_fileobj: ", out_path)
|
96 |
+
print("path_in_repo: ",out_path.split("eval-queue/")[1])
|
97 |
+
print("repo_id: ", RESULTS_REPO)
|
98 |
+
print("repo_type: ", "dataset")
|
99 |
+
|
100 |
+
response = API.upload_file(
|
101 |
+
path_or_fileobj=out_path,
|
102 |
+
path_in_repo=out_path.split("eval-queue/")[1],
|
103 |
+
repo_id=RESULTS_REPO,
|
104 |
+
repo_type="dataset",
|
105 |
+
commit_message=f"Add {eval_name} to eval queue",
|
106 |
+
)
|
107 |
|
108 |
+
"""
|
109 |
+
global REQUESTED_MODELS
|
110 |
+
global USERS_TO_SUBMISSION_DATES
|
111 |
+
if not REQUESTED_MODELS:
|
112 |
+
REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
|
113 |
+
|
114 |
+
user_name = ""
|
115 |
+
model_path = model
|
116 |
+
if "/" in model:
|
117 |
+
user_name = model.split("/")[0]
|
118 |
+
model_path = model.split("/")[1]
|
119 |
+
|
120 |
+
precision = precision.split(" ")[0]
|
121 |
+
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
122 |
+
|
123 |
+
if model_type is None or model_type == "":
|
124 |
+
return styled_error("Please select a model type.")
|
125 |
+
|
126 |
+
# Does the model actually exist?
|
127 |
+
if revision == "":
|
128 |
+
revision = "main"
|
129 |
+
|
130 |
+
# Is the model on the hub?
|
131 |
+
if weight_type in ["Delta", "Adapter"]:
|
132 |
+
base_model_on_hub, error, _ = is_model_on_hub(
|
133 |
+
model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True
|
134 |
+
)
|
135 |
+
if not base_model_on_hub:
|
136 |
+
return styled_error(f'Base model "{base_model}" {error}')
|
137 |
+
|
138 |
+
if not weight_type == "Adapter":
|
139 |
+
model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True)
|
140 |
+
if not model_on_hub:
|
141 |
+
return styled_error(f'Model "{model}" {error}')
|
142 |
+
|
143 |
+
# Is the model info correctly filled?
|
144 |
+
try:
|
145 |
+
model_info = API.model_info(repo_id=model, revision=revision)
|
146 |
+
except Exception:
|
147 |
+
return styled_error("Could not get your model information. Please fill it up properly.")
|
148 |
+
|
149 |
+
model_size = get_model_size(model_info=model_info, precision=precision)
|
150 |
+
|
151 |
+
# Were the model card and license filled?
|
152 |
+
try:
|
153 |
+
license = model_info.cardData["license"]
|
154 |
+
except Exception:
|
155 |
+
return styled_error("Please select a license for your model")
|
156 |
+
|
157 |
+
modelcard_OK, error_msg = check_model_card(model)
|
158 |
+
if not modelcard_OK:
|
159 |
+
return styled_error(error_msg)
|
160 |
+
|
161 |
+
# Seems good, creating the eval
|
162 |
+
print("Adding new eval")
|
163 |
+
|
164 |
+
eval_entry = {
|
165 |
+
"model": model,
|
166 |
+
"base_model": base_model,
|
167 |
+
"revision": revision,
|
168 |
+
"precision": precision,
|
169 |
+
"weight_type": weight_type,
|
170 |
+
"status": "PENDING",
|
171 |
+
"submitted_time": current_time,
|
172 |
+
"model_type": model_type,
|
173 |
+
"likes": model_info.likes,
|
174 |
+
"params": model_size,
|
175 |
+
"license": license,
|
176 |
}
|
177 |
|
178 |
+
# Check for duplicate submission
|
179 |
+
if f"{model}_{revision}_{precision}" in REQUESTED_MODELS:
|
180 |
+
return styled_warning("This model has been already submitted.")
|
181 |
+
|
182 |
+
print("Creating eval file")
|
183 |
+
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
184 |
+
os.makedirs(OUT_DIR, exist_ok=True)
|
185 |
+
out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"
|
186 |
+
|
187 |
+
with open(out_path, "w") as f:
|
188 |
+
f.write(json.dumps(eval_entry))
|
189 |
+
|
190 |
+
print("Uploading eval file")
|
191 |
+
API.upload_file(
|
192 |
+
path_or_fileobj=out_path,
|
193 |
+
path_in_repo=out_path.split("eval-queue/")[1],
|
194 |
+
repo_id=QUEUE_REPO,
|
195 |
+
repo_type="dataset",
|
196 |
+
commit_message=f"Add {model} to eval queue",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
)
|
|
|
|
|
198 |
|
199 |
+
# Remove the local file
|
200 |
+
os.remove(out_path)
|
201 |
+
"""
|
202 |
+
return styled_message(
|
203 |
+
"Your results have been successfully submitted. They will be added to the leaderboard upon verification."
|
204 |
+
), "", "", "", ""
|
205 |
+
|
206 |
+
except Exception as e:
|
207 |
+
return styled_error(f"An error occurred: {e}"), "", "", "", ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|