SondosMB commited on
Commit
8f89713
Β·
verified Β·
1 Parent(s): ca6bd07

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -5
app.py CHANGED
@@ -258,25 +258,68 @@ def load_leaderboard():
258
  print("Loading leaderboard data...")
259
  return pd.read_csv(LEADERBOARD_FILE)
260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
  # Build Gradio App
262
  with gr.Blocks() as demo:
263
  gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
 
264
  with gr.Tabs():
 
265
  with gr.TabItem("πŸ… Submission"):
266
  file_input = gr.File(label="Upload Prediction CSV")
267
  eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
268
- eval_button = gr.Button("Evaluate and Update Leaderboard")
269
- leaderboard_table = gr.Dataframe(
270
  value=load_leaderboard(),
271
- label="Leaderboard",
272
  interactive=False,
273
  wrap=True,
274
  )
 
275
  eval_button.click(
276
- lambda file: (evaluate_predictions(file), load_leaderboard()),
277
  inputs=[file_input],
278
- outputs=[eval_status, leaderboard_table],
279
  )
 
 
280
  with gr.TabItem("πŸ… Leaderboard"):
281
  leaderboard_table = gr.Dataframe(
282
  value=load_leaderboard(),
@@ -284,8 +327,15 @@ with gr.Blocks() as demo:
284
  interactive=False,
285
  wrap=True,
286
  )
 
 
 
 
 
 
287
 
288
  gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
289
 
290
  demo.launch()
291
 
 
 
258
  print("Loading leaderboard data...")
259
  return pd.read_csv(LEADERBOARD_FILE)
260
 
261
+ def evaluate_predictions_and_update_leaderboard(prediction_file):
262
+ """
263
+ Evaluate predictions and update the leaderboard.
264
+ """
265
+ ground_truth_file = "ground_truth.csv"
266
+ if not os.path.exists(ground_truth_file):
267
+ return "Ground truth file not found.", None
268
+ if not prediction_file:
269
+ return "Prediction file not uploaded.", None
270
+
271
+ try:
272
+ predictions_df = pd.read_csv(prediction_file.name)
273
+ ground_truth_df = pd.read_csv(ground_truth_file)
274
+ model_name = os.path.basename(prediction_file.name).split('_')[1].split('.')[0]
275
+
276
+ merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
277
+ merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
278
+
279
+ valid_predictions = merged_df.dropna(subset=['pred_answer'])
280
+ correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
281
+ total_predictions = len(merged_df)
282
+ total_valid_predictions = len(valid_predictions)
283
+
284
+ overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
285
+ valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
286
+
287
+ results = {
288
+ 'model_name': model_name,
289
+ 'overall_accuracy': overall_accuracy,
290
+ 'valid_accuracy': valid_accuracy,
291
+ 'correct_predictions': correct_predictions,
292
+ 'total_questions': total_predictions,
293
+ }
294
+
295
+ update_leaderboard(results)
296
+ return "Evaluation completed successfully! Leaderboard updated.", load_leaderboard()
297
+ except Exception as e:
298
+ return f"Error during evaluation: {str(e)}", load_leaderboard()
299
+
300
  # Build Gradio App
301
  with gr.Blocks() as demo:
302
  gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
303
+
304
  with gr.Tabs():
305
+ # Submission Tab
306
  with gr.TabItem("πŸ… Submission"):
307
  file_input = gr.File(label="Upload Prediction CSV")
308
  eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
309
+ leaderboard_table_submission = gr.Dataframe(
 
310
  value=load_leaderboard(),
311
+ label="Leaderboard (Preview)",
312
  interactive=False,
313
  wrap=True,
314
  )
315
+ eval_button = gr.Button("Evaluate and Update Leaderboard")
316
  eval_button.click(
317
+ evaluate_predictions_and_update_leaderboard,
318
  inputs=[file_input],
319
+ outputs=[eval_status, leaderboard_table_submission],
320
  )
321
+
322
+ # Leaderboard Tab
323
  with gr.TabItem("πŸ… Leaderboard"):
324
  leaderboard_table = gr.Dataframe(
325
  value=load_leaderboard(),
 
327
  interactive=False,
328
  wrap=True,
329
  )
330
+ refresh_button = gr.Button("Refresh Leaderboard")
331
+ refresh_button.click(
332
+ lambda: load_leaderboard(),
333
+ inputs=[],
334
+ outputs=[leaderboard_table],
335
+ )
336
 
337
  gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
338
 
339
  demo.launch()
340
 
341
+