SondosMB commited on
Commit
13e4c4d
Β·
verified Β·
1 Parent(s): 0ddd3ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -37
app.py CHANGED
@@ -170,47 +170,39 @@ import re
170
  from datetime import datetime
171
  from huggingface_hub import hf_hub_download
172
 
173
- LEADERBOARD_FILE = "leaderboard.csv" # File to store all submissions persistently
174
- GROUND_TRUTH_FILE = "ground_truth.csv" # File for ground truth data
175
  LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
176
 
177
- # Disable symlink warnings
178
  os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
 
 
 
179
 
180
  def initialize_leaderboard_file():
181
  """
182
  Ensure the leaderboard file exists and has the correct headers.
183
  """
184
  if not os.path.exists(LEADERBOARD_FILE):
185
- # Create the file with headers
186
  pd.DataFrame(columns=[
187
  "Model Name", "Overall Accuracy", "Valid Accuracy",
188
  "Correct Predictions", "Total Questions", "Timestamp"
189
  ]).to_csv(LEADERBOARD_FILE, index=False)
190
- else:
191
- # Check if the file is empty and write headers if needed
192
- if os.stat(LEADERBOARD_FILE).st_size == 0:
193
- pd.DataFrame(columns=[
194
- "Model Name", "Overall Accuracy", "Valid Accuracy",
195
- "Correct Predictions", "Total Questions", "Timestamp"
196
- ]).to_csv(LEADERBOARD_FILE, index=False)
197
 
198
  def clean_answer(answer):
199
- """
200
- Clean and normalize the predicted answers.
201
- """
202
  if pd.isna(answer):
203
  return None
204
  answer = str(answer)
205
  clean = re.sub(r'[^A-Da-d]', '', answer)
206
- if clean:
207
- return clean[0].upper()
208
- return None
209
 
210
  def update_leaderboard(results):
211
- """
212
- Append new submission results to the leaderboard file.
213
- """
214
  new_entry = {
215
  "Model Name": results['model_name'],
216
  "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
@@ -219,14 +211,10 @@ def update_leaderboard(results):
219
  "Total Questions": results['total_questions'],
220
  "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
221
  }
222
-
223
  new_entry_df = pd.DataFrame([new_entry])
224
  new_entry_df.to_csv(LEADERBOARD_FILE, mode='a', index=False, header=False)
225
 
226
  def load_leaderboard():
227
- """
228
- Load all submissions from the leaderboard file.
229
- """
230
  if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
231
  return pd.DataFrame({
232
  "Model Name": [],
@@ -239,17 +227,16 @@ def load_leaderboard():
239
  return pd.read_csv(LEADERBOARD_FILE)
240
 
241
  def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
242
- """
243
- Evaluate predictions and optionally add results to the leaderboard.
244
- """
245
  try:
246
- # Load ground truth data
247
  ground_truth_path = hf_hub_download(
248
  repo_id="SondosMB/ground-truth-dataset",
249
- filename=GROUND_TRUTH_FILE,
 
250
  use_auth_token=True
251
  )
252
  ground_truth_df = pd.read_csv(ground_truth_path)
 
 
253
  except Exception as e:
254
  return f"Error loading ground truth: {e}", load_leaderboard()
255
 
@@ -257,18 +244,15 @@ def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
257
  return "Prediction file not uploaded.", load_leaderboard()
258
 
259
  try:
260
- # Load predictions and merge with ground truth
261
  predictions_df = pd.read_csv(prediction_file.name)
262
  merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
263
  merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
264
 
265
- # Evaluate predictions
266
  valid_predictions = merged_df.dropna(subset=['pred_answer'])
267
  correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
268
  total_predictions = len(merged_df)
269
  total_valid_predictions = len(valid_predictions)
270
 
271
- # Calculate accuracy
272
  overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
273
  valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
274
 
@@ -280,7 +264,6 @@ def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
280
  'total_questions': total_predictions,
281
  }
282
 
283
- # Update leaderboard only if opted in
284
  if add_to_leaderboard:
285
  update_leaderboard(results)
286
  return "Evaluation completed and added to leaderboard.", load_leaderboard()
@@ -289,15 +272,12 @@ def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
289
  except Exception as e:
290
  return f"Error during evaluation: {str(e)}", load_leaderboard()
291
 
292
- # Initialize leaderboard file
293
  initialize_leaderboard_file()
294
 
295
- # Gradio Interface
296
  with gr.Blocks() as demo:
297
  gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
298
 
299
  with gr.Tabs():
300
- # Submission Tab
301
  with gr.TabItem("πŸ… Submission"):
302
  file_input = gr.File(label="Upload Prediction CSV")
303
  model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")
@@ -316,7 +296,6 @@ with gr.Blocks() as demo:
316
  outputs=[eval_status, leaderboard_table_preview],
317
  )
318
 
319
- # Leaderboard Tab
320
  with gr.TabItem("πŸ… Leaderboard"):
321
  leaderboard_table = gr.Dataframe(
322
  value=load_leaderboard(),
 
170
  from datetime import datetime
171
  from huggingface_hub import hf_hub_download
172
 
173
+ LEADERBOARD_FILE = "leaderboard.csv"
174
+ GROUND_TRUTH_FILE = "ground_truth.csv"
175
  LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
176
 
177
+ # Ensure authentication and suppress warnings
178
  os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
179
+ HF_TOKEN = os.getenv("HF_TOKEN")
180
+ if not HF_TOKEN:
181
+ raise ValueError("HF_TOKEN environment variable is not set or invalid.")
182
 
183
  def initialize_leaderboard_file():
184
  """
185
  Ensure the leaderboard file exists and has the correct headers.
186
  """
187
  if not os.path.exists(LEADERBOARD_FILE):
 
188
  pd.DataFrame(columns=[
189
  "Model Name", "Overall Accuracy", "Valid Accuracy",
190
  "Correct Predictions", "Total Questions", "Timestamp"
191
  ]).to_csv(LEADERBOARD_FILE, index=False)
192
+ elif os.stat(LEADERBOARD_FILE).st_size == 0:
193
+ pd.DataFrame(columns=[
194
+ "Model Name", "Overall Accuracy", "Valid Accuracy",
195
+ "Correct Predictions", "Total Questions", "Timestamp"
196
+ ]).to_csv(LEADERBOARD_FILE, index=False)
 
 
197
 
198
  def clean_answer(answer):
 
 
 
199
  if pd.isna(answer):
200
  return None
201
  answer = str(answer)
202
  clean = re.sub(r'[^A-Da-d]', '', answer)
203
+ return clean[0].upper() if clean else None
 
 
204
 
205
  def update_leaderboard(results):
 
 
 
206
  new_entry = {
207
  "Model Name": results['model_name'],
208
  "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
 
211
  "Total Questions": results['total_questions'],
212
  "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
213
  }
 
214
  new_entry_df = pd.DataFrame([new_entry])
215
  new_entry_df.to_csv(LEADERBOARD_FILE, mode='a', index=False, header=False)
216
 
217
  def load_leaderboard():
 
 
 
218
  if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
219
  return pd.DataFrame({
220
  "Model Name": [],
 
227
  return pd.read_csv(LEADERBOARD_FILE)
228
 
229
  def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
 
 
 
230
  try:
 
231
  ground_truth_path = hf_hub_download(
232
  repo_id="SondosMB/ground-truth-dataset",
233
+ filename="ground_truth.csv",
234
+ repo_type="dataset",
235
  use_auth_token=True
236
  )
237
  ground_truth_df = pd.read_csv(ground_truth_path)
238
+ except FileNotFoundError:
239
+ return "Ground truth file not found in the dataset repository.", load_leaderboard()
240
  except Exception as e:
241
  return f"Error loading ground truth: {e}", load_leaderboard()
242
 
 
244
  return "Prediction file not uploaded.", load_leaderboard()
245
 
246
  try:
 
247
  predictions_df = pd.read_csv(prediction_file.name)
248
  merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
249
  merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
250
 
 
251
  valid_predictions = merged_df.dropna(subset=['pred_answer'])
252
  correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
253
  total_predictions = len(merged_df)
254
  total_valid_predictions = len(valid_predictions)
255
 
 
256
  overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
257
  valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
258
 
 
264
  'total_questions': total_predictions,
265
  }
266
 
 
267
  if add_to_leaderboard:
268
  update_leaderboard(results)
269
  return "Evaluation completed and added to leaderboard.", load_leaderboard()
 
272
  except Exception as e:
273
  return f"Error during evaluation: {str(e)}", load_leaderboard()
274
 
 
275
  initialize_leaderboard_file()
276
 
 
277
  with gr.Blocks() as demo:
278
  gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
279
 
280
  with gr.Tabs():
 
281
  with gr.TabItem("πŸ… Submission"):
282
  file_input = gr.File(label="Upload Prediction CSV")
283
  model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")
 
296
  outputs=[eval_status, leaderboard_table_preview],
297
  )
298
 
 
299
  with gr.TabItem("πŸ… Leaderboard"):
300
  leaderboard_table = gr.Dataframe(
301
  value=load_leaderboard(),