MohamedRashad commited on
Commit
b3de191
Β·
1 Parent(s): 887de83

Update requirements and refactor model submission logic to improve error handling and data loading

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -1
  2. utils.py +50 -109
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
  fuzzywuzzy
2
- Levenshtein
3
  python-dotenv
 
1
  fuzzywuzzy
2
+ python-Levenshtein
3
  python-dotenv
utils.py CHANGED
@@ -3,7 +3,8 @@ import pandas as pd
3
  import json
4
  import os
5
  from pathlib import Path
6
- from huggingface_hub import HfApi, hf_hub_download
 
7
 
8
  api = HfApi()
9
 
@@ -48,7 +49,7 @@ def get_model_info(model_id, verbose=False):
48
  return num_downloads, num_likes, license, num_parameters, supported_precisions
49
  except Exception as e:
50
  print(f"Error: Could not fetch model information. {str(e)}")
51
- return 0, 0, "Unknown", 0, []
52
 
53
  def fetch_model_information(model_name):
54
  try:
@@ -60,61 +61,65 @@ def fetch_model_information(model_name):
60
  return
61
  return gr.update(choices=supported_precisions, value=supported_precisions[0]), license, num_parameters, num_downloads, num_likes
62
 
63
- def submit_model(model_name, revision, precision, params, license, task, pending_gradio_df):
 
 
 
 
 
 
 
 
 
 
64
 
65
- if float(params) == 0:
66
- return "Model size cannot be zero. If the Auto Fetch feature failed and your model is suitable for evaluation, please open a community discussion for help.", pending_gradio_df
 
 
 
 
 
 
 
67
 
 
68
  if float(params) > 5000:
69
- return "Model size should be less than 5000 million parameters (5 billion) πŸ‘€", pending_gradio_df
70
 
71
  # Handle 'Missing' precision
72
  if precision == 'Missing':
73
  precision = None
74
  else:
75
  precision = precision.strip().lower()
76
-
77
- # Load pending and finished requests from the dataset repository
78
- df_pending = load_requests('pending')
79
- df_finished = load_requests('finished')
80
- df_failed = load_requests('failed')
81
-
82
- # Check if model is in pending requests
83
- if not df_pending.empty:
84
- existing_models_pending = df_pending[['model_name', 'revision', 'precision', 'task']]
85
- model_exists_in_pending = ((existing_models_pending['model_name'] == model_name) &
86
- (existing_models_pending['revision'] == revision) &
87
- (existing_models_pending['precision'] == precision.capitalize()) &
88
- (existing_models_pending['task'] == task)).any()
89
- if model_exists_in_pending:
90
- return f"Model {model_name} is already in the evaluation queue as a {task} πŸš€", pending_gradio_df
91
 
92
  # Check if model is in finished requests
93
- if not df_finished.empty:
94
- existing_models_finished = df_finished[['model_name', 'revision', 'precision', 'task']]
95
- model_exists_in_finished = ((existing_models_finished['model_name'] == model_name) &
96
- (existing_models_finished['revision'] == revision) &
97
- (existing_models_finished['precision'] == precision.capitalize()) &
98
- (existing_models_finished['task'] == task)).any()
99
- if model_exists_in_finished:
100
- return f"Model {model_name} has already been evaluated as a {task} πŸŽ‰", pending_gradio_df
101
 
102
  # Check if model is in failed requests
103
- if not df_failed.empty:
104
- existing_models_failed = df_failed[['model_name', 'revision', 'precision', 'task']]
105
- model_exists_in_failed = ((existing_models_failed['model_name'] == model_name) &
106
- (existing_models_failed['revision'] == revision) &
107
- (existing_models_failed['precision'] == precision.capitalize()) &
108
- (existing_models_failed['task'] == task)).any()
109
- if model_exists_in_failed:
110
- return f"Model {model_name} has previously failed evaluation as a {task} ❌", pending_gradio_df
111
 
112
  # Check if model exists on HuggingFace Hub
113
  try:
114
  api.model_info(model_name)
115
  except Exception as e:
116
  print(f"Error fetching model info: {e}")
117
- return f"Model {model_name} not found on HuggingFace Hub πŸ€·β€β™‚οΈ", pending_gradio_df
118
 
119
  # Proceed with submission
120
  status = "PENDING"
@@ -136,7 +141,7 @@ def submit_model(model_name, revision, precision, params, license, task, pending
136
  # Define the file path in the repository
137
  org_model = model_name.split('/')
138
  if len(org_model) != 2:
139
- return "Please enter the full model name including the organization or username, e.g., 'intfloat/multilingual-e5-large-instruct' πŸ€·β€β™‚οΈ", pending_gradio_df
140
  org, model_id = org_model
141
  precision_str = precision if precision else 'Missing'
142
  file_path_in_repo = f"pending/{org}/{model_id}_eval_request_{revision}_{precision_str}_{task.lower()}.json"
@@ -152,71 +157,10 @@ def submit_model(model_name, revision, precision, params, license, task, pending
152
  )
153
  except Exception as e:
154
  print(f"Error uploading file: {e}")
155
- return f"Error: Could not submit model '{model_name}' for evaluation.", pending_gradio_df
156
 
157
- # Update the pending requests DataFrame
158
- if isinstance(pending_gradio_df, str):
159
- pending_gradio_df = pd.DataFrame(columns=["model_name", "license", "revision", "precision", "status", "params", "task"])
160
- elif isinstance(pending_gradio_df, dict):
161
- pending_gradio_df = pd.DataFrame(pending_gradio_df)
162
-
163
- pending_gradio_df = pending_gradio_df._append(
164
- {
165
- "model_name": model_name,
166
- "license": license,
167
- "revision": revision,
168
- "precision": precision.capitalize(),
169
- "status": status,
170
- "params": params,
171
- "task": task
172
- },
173
- ignore_index=True
174
- )
175
- return f"Model {model_name} has been submitted successfully as a {task} πŸš€", pending_gradio_df
176
-
177
- def load_requests(status_folder, task_type=None):
178
- api = HfApi()
179
- requests_data = []
180
- folder_path_in_repo = status_folder # 'pending', 'finished', or 'failed'
181
-
182
- try:
183
- # Use the cached token
184
- files_info = api.list_repo_files(
185
- repo_id=DATASET_REPO_ID,
186
- repo_type="dataset",
187
- token=HF_TOKEN
188
- )
189
- except Exception as e:
190
- print(f"Error accessing dataset repository: {e}")
191
- return pd.DataFrame() # Return empty DataFrame if repository not found or inaccessible
192
-
193
- # Filter files in the desired folder
194
- files_in_folder = [f for f in files_info if f.startswith(f"{folder_path_in_repo}/") and f.endswith('.json')]
195
-
196
- for file_path in files_in_folder:
197
- try:
198
- # Download the JSON file
199
- local_file_path = hf_hub_download(
200
- repo_id=DATASET_REPO_ID,
201
- filename=file_path,
202
- repo_type="dataset",
203
- token=HF_TOKEN
204
- )
205
- # Load JSON data
206
- with open(local_file_path, 'r') as f:
207
- request = json.load(f)
208
- requests_data.append(request)
209
- except Exception as e:
210
- print(f"Error loading file {file_path}: {e}")
211
- continue # Skip files that can't be loaded
212
-
213
- df = pd.DataFrame(requests_data)
214
-
215
- # Filter by task type
216
- if task_type and not df.empty:
217
- df = df[df['task'] == task_type]
218
-
219
- return df
220
 
221
 
222
  def submit_gradio_module(task_type):
@@ -282,11 +226,8 @@ def submit_gradio_module(task_type):
282
 
283
  # Display the tables
284
  gr.Markdown("## Evaluation Status")
285
- with gr.Accordion(f"Pending Evaluations ({len(df_pending)})", open=False):
286
- if not df_pending.empty:
287
- pending_gradio_df = gr.Dataframe(df_pending)
288
- else:
289
- pending_gradio_df = gr.Markdown("No pending evaluations.")
290
  with gr.Accordion(f"Finished Evaluations ({len(df_finished)})", open=False):
291
  if not df_finished.empty:
292
  gr.Dataframe(df_finished)
@@ -300,6 +241,6 @@ def submit_gradio_module(task_type):
300
 
301
  submit_button.click(
302
  submit_model,
303
- inputs=[model_name_input, revision_input, precision_input, params_input, license_input, var, pending_gradio_df],
304
  outputs=[submission_result, pending_gradio_df],
305
  )
 
3
  import json
4
  import os
5
  from pathlib import Path
6
+ from huggingface_hub import HfApi
7
+ from datasets import load_dataset
8
 
9
  api = HfApi()
10
 
 
49
  return num_downloads, num_likes, license, num_parameters, supported_precisions
50
  except Exception as e:
51
  print(f"Error: Could not fetch model information. {str(e)}")
52
+ return 0, 0, "Unknown", 0, ["Missing"]
53
 
54
  def fetch_model_information(model_name):
55
  try:
 
61
  return
62
  return gr.update(choices=supported_precisions, value=supported_precisions[0]), license, num_parameters, num_downloads, num_likes
63
 
64
+ def load_requests(status_folder, task_type=None):
65
+ # Load the dataset from the HuggingFace Hub
66
+ ds = load_dataset(DATASET_REPO_ID, split="test")
67
+ df = ds.to_pandas()
68
+
69
+ # Filter the dataframe based on the status folder and task type
70
+ df = df[df['status'] == status_folder.upper()]
71
+ df = df[df['task'] == task_type] if task_type else df
72
+ df.drop(columns=['status', 'task'], inplace=True)
73
+
74
+ return df
75
 
76
+ def submit_model(model_name, revision, precision, params, license, task):
77
+ # Load pending and finished requests from the dataset repository
78
+ df_pending = load_requests('pending', task_type=task)
79
+ df_finished = load_requests('finished', task_type=task)
80
+ df_failed = load_requests('failed', task_type=task)
81
+
82
+ # Check if Auto Fetch feature couldn't fetch model info
83
+ if float(params) == 0 and precision == 'Missing':
84
+ return "I think the auto-fetch feature couldn't fetch model info. If your model is not suitable for this task evaluation then this is expected, but if it's suitable and this behavior happened with you then please open a community discussion in the leaderboard discussion section and we will fix it ASAP.", df_pending
85
 
86
+ # Check if model size is in valid range
87
  if float(params) > 5000:
88
+ return "Model size should be less than 5000 million parameters (5 billion) πŸ‘€", df_pending
89
 
90
  # Handle 'Missing' precision
91
  if precision == 'Missing':
92
  precision = None
93
  else:
94
  precision = precision.strip().lower()
95
+
96
+ # Helper function to check if model exists in a dataframe
97
+ def model_exists_in_df(df):
98
+
99
+ if df.empty:
100
+ return False
101
+ return ((df['model_name'] == model_name) &
102
+ (df['revision'] == revision) &
103
+ (df['precision'] == precision)).any()
104
+
105
+ # Check if model is already in pending requests
106
+ if model_exists_in_df(df_pending):
107
+ return f"Model {model_name} is already in the evaluation queue as a {task} πŸ‘", df_pending
 
 
108
 
109
  # Check if model is in finished requests
110
+ if model_exists_in_df(df_finished):
111
+ return f"Model {model_name} has already been evaluated as a {task} πŸŽ‰", df_pending
 
 
 
 
 
 
112
 
113
  # Check if model is in failed requests
114
+ if model_exists_in_df(df_failed):
115
+ return f"Model {model_name} has previously failed evaluation as a {task} ❌", df_pending
 
 
 
 
 
 
116
 
117
  # Check if model exists on HuggingFace Hub
118
  try:
119
  api.model_info(model_name)
120
  except Exception as e:
121
  print(f"Error fetching model info: {e}")
122
+ return f"Model {model_name} not found on HuggingFace Hub πŸ€·β€β™‚οΈ", df_pending
123
 
124
  # Proceed with submission
125
  status = "PENDING"
 
141
  # Define the file path in the repository
142
  org_model = model_name.split('/')
143
  if len(org_model) != 2:
144
+ return "Please enter the full model name including the organization or username, e.g., 'intfloat/multilingual-e5-large-instruct' πŸ€·β€β™‚οΈ", df_pending
145
  org, model_id = org_model
146
  precision_str = precision if precision else 'Missing'
147
  file_path_in_repo = f"pending/{org}/{model_id}_eval_request_{revision}_{precision_str}_{task.lower()}.json"
 
157
  )
158
  except Exception as e:
159
  print(f"Error uploading file: {e}")
160
+ return f"Error: Could not submit model '{model_name}' for evaluation.", df_pending
161
 
162
+ df_pending = load_requests('pending', task_type=task)
163
+ return f"Model {model_name} has been submitted successfully as a {task} πŸš€", df_pending
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
 
166
  def submit_gradio_module(task_type):
 
226
 
227
  # Display the tables
228
  gr.Markdown("## Evaluation Status")
229
+ with gr.Accordion(f"Pending Evaluations ({len(df_pending)})", open=True):
230
+ pending_gradio_df = gr.Dataframe(df_pending)
 
 
 
231
  with gr.Accordion(f"Finished Evaluations ({len(df_finished)})", open=False):
232
  if not df_finished.empty:
233
  gr.Dataframe(df_finished)
 
241
 
242
  submit_button.click(
243
  submit_model,
244
+ inputs=[model_name_input, revision_input, precision_input, params_input, license_input, var],
245
  outputs=[submission_result, pending_gradio_df],
246
  )