Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
Β·
b3de191
1
Parent(s):
887de83
Update requirements and refactor model submission logic to improve error handling and data loading
Browse files- requirements.txt +1 -1
- utils.py +50 -109
requirements.txt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
fuzzywuzzy
|
2 |
-
Levenshtein
|
3 |
python-dotenv
|
|
|
1 |
fuzzywuzzy
|
2 |
+
python-Levenshtein
|
3 |
python-dotenv
|
utils.py
CHANGED
@@ -3,7 +3,8 @@ import pandas as pd
|
|
3 |
import json
|
4 |
import os
|
5 |
from pathlib import Path
|
6 |
-
from huggingface_hub import HfApi
|
|
|
7 |
|
8 |
api = HfApi()
|
9 |
|
@@ -48,7 +49,7 @@ def get_model_info(model_id, verbose=False):
|
|
48 |
return num_downloads, num_likes, license, num_parameters, supported_precisions
|
49 |
except Exception as e:
|
50 |
print(f"Error: Could not fetch model information. {str(e)}")
|
51 |
-
return 0, 0, "Unknown", 0, []
|
52 |
|
53 |
def fetch_model_information(model_name):
|
54 |
try:
|
@@ -60,61 +61,65 @@ def fetch_model_information(model_name):
|
|
60 |
return
|
61 |
return gr.update(choices=supported_precisions, value=supported_precisions[0]), license, num_parameters, num_downloads, num_likes
|
62 |
|
63 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
|
|
68 |
if float(params) > 5000:
|
69 |
-
return "Model size should be less than 5000 million parameters (5 billion) π",
|
70 |
|
71 |
# Handle 'Missing' precision
|
72 |
if precision == 'Missing':
|
73 |
precision = None
|
74 |
else:
|
75 |
precision = precision.strip().lower()
|
76 |
-
|
77 |
-
#
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
if model_exists_in_pending:
|
90 |
-
return f"Model {model_name} is already in the evaluation queue as a {task} π", pending_gradio_df
|
91 |
|
92 |
# Check if model is in finished requests
|
93 |
-
if
|
94 |
-
|
95 |
-
model_exists_in_finished = ((existing_models_finished['model_name'] == model_name) &
|
96 |
-
(existing_models_finished['revision'] == revision) &
|
97 |
-
(existing_models_finished['precision'] == precision.capitalize()) &
|
98 |
-
(existing_models_finished['task'] == task)).any()
|
99 |
-
if model_exists_in_finished:
|
100 |
-
return f"Model {model_name} has already been evaluated as a {task} π", pending_gradio_df
|
101 |
|
102 |
# Check if model is in failed requests
|
103 |
-
if
|
104 |
-
|
105 |
-
model_exists_in_failed = ((existing_models_failed['model_name'] == model_name) &
|
106 |
-
(existing_models_failed['revision'] == revision) &
|
107 |
-
(existing_models_failed['precision'] == precision.capitalize()) &
|
108 |
-
(existing_models_failed['task'] == task)).any()
|
109 |
-
if model_exists_in_failed:
|
110 |
-
return f"Model {model_name} has previously failed evaluation as a {task} β", pending_gradio_df
|
111 |
|
112 |
# Check if model exists on HuggingFace Hub
|
113 |
try:
|
114 |
api.model_info(model_name)
|
115 |
except Exception as e:
|
116 |
print(f"Error fetching model info: {e}")
|
117 |
-
return f"Model {model_name} not found on HuggingFace Hub π€·ββοΈ",
|
118 |
|
119 |
# Proceed with submission
|
120 |
status = "PENDING"
|
@@ -136,7 +141,7 @@ def submit_model(model_name, revision, precision, params, license, task, pending
|
|
136 |
# Define the file path in the repository
|
137 |
org_model = model_name.split('/')
|
138 |
if len(org_model) != 2:
|
139 |
-
return "Please enter the full model name including the organization or username, e.g., 'intfloat/multilingual-e5-large-instruct' π€·ββοΈ",
|
140 |
org, model_id = org_model
|
141 |
precision_str = precision if precision else 'Missing'
|
142 |
file_path_in_repo = f"pending/{org}/{model_id}_eval_request_{revision}_{precision_str}_{task.lower()}.json"
|
@@ -152,71 +157,10 @@ def submit_model(model_name, revision, precision, params, license, task, pending
|
|
152 |
)
|
153 |
except Exception as e:
|
154 |
print(f"Error uploading file: {e}")
|
155 |
-
return f"Error: Could not submit model '{model_name}' for evaluation.",
|
156 |
|
157 |
-
|
158 |
-
|
159 |
-
pending_gradio_df = pd.DataFrame(columns=["model_name", "license", "revision", "precision", "status", "params", "task"])
|
160 |
-
elif isinstance(pending_gradio_df, dict):
|
161 |
-
pending_gradio_df = pd.DataFrame(pending_gradio_df)
|
162 |
-
|
163 |
-
pending_gradio_df = pending_gradio_df._append(
|
164 |
-
{
|
165 |
-
"model_name": model_name,
|
166 |
-
"license": license,
|
167 |
-
"revision": revision,
|
168 |
-
"precision": precision.capitalize(),
|
169 |
-
"status": status,
|
170 |
-
"params": params,
|
171 |
-
"task": task
|
172 |
-
},
|
173 |
-
ignore_index=True
|
174 |
-
)
|
175 |
-
return f"Model {model_name} has been submitted successfully as a {task} π", pending_gradio_df
|
176 |
-
|
177 |
-
def load_requests(status_folder, task_type=None):
|
178 |
-
api = HfApi()
|
179 |
-
requests_data = []
|
180 |
-
folder_path_in_repo = status_folder # 'pending', 'finished', or 'failed'
|
181 |
-
|
182 |
-
try:
|
183 |
-
# Use the cached token
|
184 |
-
files_info = api.list_repo_files(
|
185 |
-
repo_id=DATASET_REPO_ID,
|
186 |
-
repo_type="dataset",
|
187 |
-
token=HF_TOKEN
|
188 |
-
)
|
189 |
-
except Exception as e:
|
190 |
-
print(f"Error accessing dataset repository: {e}")
|
191 |
-
return pd.DataFrame() # Return empty DataFrame if repository not found or inaccessible
|
192 |
-
|
193 |
-
# Filter files in the desired folder
|
194 |
-
files_in_folder = [f for f in files_info if f.startswith(f"{folder_path_in_repo}/") and f.endswith('.json')]
|
195 |
-
|
196 |
-
for file_path in files_in_folder:
|
197 |
-
try:
|
198 |
-
# Download the JSON file
|
199 |
-
local_file_path = hf_hub_download(
|
200 |
-
repo_id=DATASET_REPO_ID,
|
201 |
-
filename=file_path,
|
202 |
-
repo_type="dataset",
|
203 |
-
token=HF_TOKEN
|
204 |
-
)
|
205 |
-
# Load JSON data
|
206 |
-
with open(local_file_path, 'r') as f:
|
207 |
-
request = json.load(f)
|
208 |
-
requests_data.append(request)
|
209 |
-
except Exception as e:
|
210 |
-
print(f"Error loading file {file_path}: {e}")
|
211 |
-
continue # Skip files that can't be loaded
|
212 |
-
|
213 |
-
df = pd.DataFrame(requests_data)
|
214 |
-
|
215 |
-
# Filter by task type
|
216 |
-
if task_type and not df.empty:
|
217 |
-
df = df[df['task'] == task_type]
|
218 |
-
|
219 |
-
return df
|
220 |
|
221 |
|
222 |
def submit_gradio_module(task_type):
|
@@ -282,11 +226,8 @@ def submit_gradio_module(task_type):
|
|
282 |
|
283 |
# Display the tables
|
284 |
gr.Markdown("## Evaluation Status")
|
285 |
-
with gr.Accordion(f"Pending Evaluations ({len(df_pending)})", open=
|
286 |
-
|
287 |
-
pending_gradio_df = gr.Dataframe(df_pending)
|
288 |
-
else:
|
289 |
-
pending_gradio_df = gr.Markdown("No pending evaluations.")
|
290 |
with gr.Accordion(f"Finished Evaluations ({len(df_finished)})", open=False):
|
291 |
if not df_finished.empty:
|
292 |
gr.Dataframe(df_finished)
|
@@ -300,6 +241,6 @@ def submit_gradio_module(task_type):
|
|
300 |
|
301 |
submit_button.click(
|
302 |
submit_model,
|
303 |
-
inputs=[model_name_input, revision_input, precision_input, params_input, license_input, var
|
304 |
outputs=[submission_result, pending_gradio_df],
|
305 |
)
|
|
|
3 |
import json
|
4 |
import os
|
5 |
from pathlib import Path
|
6 |
+
from huggingface_hub import HfApi
|
7 |
+
from datasets import load_dataset
|
8 |
|
9 |
api = HfApi()
|
10 |
|
|
|
49 |
return num_downloads, num_likes, license, num_parameters, supported_precisions
|
50 |
except Exception as e:
|
51 |
print(f"Error: Could not fetch model information. {str(e)}")
|
52 |
+
return 0, 0, "Unknown", 0, ["Missing"]
|
53 |
|
54 |
def fetch_model_information(model_name):
|
55 |
try:
|
|
|
61 |
return
|
62 |
return gr.update(choices=supported_precisions, value=supported_precisions[0]), license, num_parameters, num_downloads, num_likes
|
63 |
|
64 |
+
def load_requests(status_folder, task_type=None):
|
65 |
+
# Load the dataset from the HuggingFace Hub
|
66 |
+
ds = load_dataset(DATASET_REPO_ID, split="test")
|
67 |
+
df = ds.to_pandas()
|
68 |
+
|
69 |
+
# Filter the dataframe based on the status folder and task type
|
70 |
+
df = df[df['status'] == status_folder.upper()]
|
71 |
+
df = df[df['task'] == task_type] if task_type else df
|
72 |
+
df.drop(columns=['status', 'task'], inplace=True)
|
73 |
+
|
74 |
+
return df
|
75 |
|
76 |
+
def submit_model(model_name, revision, precision, params, license, task):
|
77 |
+
# Load pending and finished requests from the dataset repository
|
78 |
+
df_pending = load_requests('pending', task_type=task)
|
79 |
+
df_finished = load_requests('finished', task_type=task)
|
80 |
+
df_failed = load_requests('failed', task_type=task)
|
81 |
+
|
82 |
+
# Check if Auto Fetch feature couldn't fetch model info
|
83 |
+
if float(params) == 0 and precision == 'Missing':
|
84 |
+
return "I think the auto-fetch feature couldn't fetch model info. If your model is not suitable for this task evaluation then this is expected, but if it's suitable and this behavior happened with you then please open a community discussion in the leaderboard discussion section and we will fix it ASAP.", df_pending
|
85 |
|
86 |
+
# Check if model size is in valid range
|
87 |
if float(params) > 5000:
|
88 |
+
return "Model size should be less than 5000 million parameters (5 billion) π", df_pending
|
89 |
|
90 |
# Handle 'Missing' precision
|
91 |
if precision == 'Missing':
|
92 |
precision = None
|
93 |
else:
|
94 |
precision = precision.strip().lower()
|
95 |
+
|
96 |
+
# Helper function to check if model exists in a dataframe
|
97 |
+
def model_exists_in_df(df):
|
98 |
+
|
99 |
+
if df.empty:
|
100 |
+
return False
|
101 |
+
return ((df['model_name'] == model_name) &
|
102 |
+
(df['revision'] == revision) &
|
103 |
+
(df['precision'] == precision)).any()
|
104 |
+
|
105 |
+
# Check if model is already in pending requests
|
106 |
+
if model_exists_in_df(df_pending):
|
107 |
+
return f"Model {model_name} is already in the evaluation queue as a {task} π", df_pending
|
|
|
|
|
108 |
|
109 |
# Check if model is in finished requests
|
110 |
+
if model_exists_in_df(df_finished):
|
111 |
+
return f"Model {model_name} has already been evaluated as a {task} π", df_pending
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
# Check if model is in failed requests
|
114 |
+
if model_exists_in_df(df_failed):
|
115 |
+
return f"Model {model_name} has previously failed evaluation as a {task} β", df_pending
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
# Check if model exists on HuggingFace Hub
|
118 |
try:
|
119 |
api.model_info(model_name)
|
120 |
except Exception as e:
|
121 |
print(f"Error fetching model info: {e}")
|
122 |
+
return f"Model {model_name} not found on HuggingFace Hub π€·ββοΈ", df_pending
|
123 |
|
124 |
# Proceed with submission
|
125 |
status = "PENDING"
|
|
|
141 |
# Define the file path in the repository
|
142 |
org_model = model_name.split('/')
|
143 |
if len(org_model) != 2:
|
144 |
+
return "Please enter the full model name including the organization or username, e.g., 'intfloat/multilingual-e5-large-instruct' π€·ββοΈ", df_pending
|
145 |
org, model_id = org_model
|
146 |
precision_str = precision if precision else 'Missing'
|
147 |
file_path_in_repo = f"pending/{org}/{model_id}_eval_request_{revision}_{precision_str}_{task.lower()}.json"
|
|
|
157 |
)
|
158 |
except Exception as e:
|
159 |
print(f"Error uploading file: {e}")
|
160 |
+
return f"Error: Could not submit model '{model_name}' for evaluation.", df_pending
|
161 |
|
162 |
+
df_pending = load_requests('pending', task_type=task)
|
163 |
+
return f"Model {model_name} has been submitted successfully as a {task} π", df_pending
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
|
165 |
|
166 |
def submit_gradio_module(task_type):
|
|
|
226 |
|
227 |
# Display the tables
|
228 |
gr.Markdown("## Evaluation Status")
|
229 |
+
with gr.Accordion(f"Pending Evaluations ({len(df_pending)})", open=True):
|
230 |
+
pending_gradio_df = gr.Dataframe(df_pending)
|
|
|
|
|
|
|
231 |
with gr.Accordion(f"Finished Evaluations ({len(df_finished)})", open=False):
|
232 |
if not df_finished.empty:
|
233 |
gr.Dataframe(df_finished)
|
|
|
241 |
|
242 |
submit_button.click(
|
243 |
submit_model,
|
244 |
+
inputs=[model_name_input, revision_input, precision_input, params_input, license_input, var],
|
245 |
outputs=[submission_result, pending_gradio_df],
|
246 |
)
|