Update app.py
Browse files
app.py
CHANGED
@@ -365,13 +365,21 @@ LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
|
|
365 |
|
366 |
def initialize_leaderboard_file():
|
367 |
"""
|
368 |
-
|
369 |
"""
|
370 |
if not os.path.exists(LEADERBOARD_FILE):
|
|
|
371 |
pd.DataFrame(columns=[
|
372 |
"Model Name", "Overall Accuracy", "Valid Accuracy",
|
373 |
"Correct Predictions", "Total Questions", "Timestamp"
|
374 |
]).to_csv(LEADERBOARD_FILE, index=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
375 |
|
376 |
def clean_answer(answer):
|
377 |
"""
|
@@ -405,7 +413,7 @@ def load_leaderboard():
|
|
405 |
"""
|
406 |
Load all submissions from the leaderboard file.
|
407 |
"""
|
408 |
-
if not os.path.exists(LEADERBOARD_FILE):
|
409 |
return pd.DataFrame({
|
410 |
"Model Name": [],
|
411 |
"Overall Accuracy": [],
|
@@ -416,9 +424,9 @@ def load_leaderboard():
|
|
416 |
})
|
417 |
return pd.read_csv(LEADERBOARD_FILE)
|
418 |
|
419 |
-
def
|
420 |
"""
|
421 |
-
Evaluate predictions
|
422 |
"""
|
423 |
ground_truth_file = "ground_truth.csv"
|
424 |
if not os.path.exists(ground_truth_file):
|
@@ -430,7 +438,6 @@ def evaluate_predictions_and_update_leaderboard(prediction_file):
|
|
430 |
# Load predictions and ground truth
|
431 |
predictions_df = pd.read_csv(prediction_file.name)
|
432 |
ground_truth_df = pd.read_csv(ground_truth_file)
|
433 |
-
model_name = os.path.basename(prediction_file.name).split('_')[1].split('.')[0]
|
434 |
|
435 |
# Merge predictions with ground truth
|
436 |
merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
|
@@ -447,16 +454,19 @@ def evaluate_predictions_and_update_leaderboard(prediction_file):
|
|
447 |
valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
|
448 |
|
449 |
results = {
|
450 |
-
'model_name': model_name,
|
451 |
'overall_accuracy': overall_accuracy,
|
452 |
'valid_accuracy': valid_accuracy,
|
453 |
'correct_predictions': correct_predictions,
|
454 |
'total_questions': total_predictions,
|
455 |
}
|
456 |
|
457 |
-
# Update leaderboard
|
458 |
-
|
459 |
-
|
|
|
|
|
|
|
460 |
except Exception as e:
|
461 |
return f"Error during evaluation: {str(e)}", load_leaderboard()
|
462 |
|
@@ -471,6 +481,8 @@ with gr.Blocks() as demo:
|
|
471 |
# Submission Tab
|
472 |
with gr.TabItem("🏅 Submission"):
|
473 |
file_input = gr.File(label="Upload Prediction CSV")
|
|
|
|
|
474 |
eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
|
475 |
leaderboard_table_preview = gr.Dataframe(
|
476 |
value=load_leaderboard(),
|
@@ -480,8 +492,8 @@ with gr.Blocks() as demo:
|
|
480 |
)
|
481 |
eval_button = gr.Button("Evaluate and Update Leaderboard")
|
482 |
eval_button.click(
|
483 |
-
|
484 |
-
inputs=[file_input],
|
485 |
outputs=[eval_status, leaderboard_table_preview],
|
486 |
)
|
487 |
|
|
|
365 |
|
366 |
def initialize_leaderboard_file():
|
367 |
"""
|
368 |
+
Ensure the leaderboard file exists and has the correct headers.
|
369 |
"""
|
370 |
if not os.path.exists(LEADERBOARD_FILE):
|
371 |
+
# Create the file with headers
|
372 |
pd.DataFrame(columns=[
|
373 |
"Model Name", "Overall Accuracy", "Valid Accuracy",
|
374 |
"Correct Predictions", "Total Questions", "Timestamp"
|
375 |
]).to_csv(LEADERBOARD_FILE, index=False)
|
376 |
+
else:
|
377 |
+
# Check if the file is empty and write headers if needed
|
378 |
+
if os.stat(LEADERBOARD_FILE).st_size == 0:
|
379 |
+
pd.DataFrame(columns=[
|
380 |
+
"Model Name", "Overall Accuracy", "Valid Accuracy",
|
381 |
+
"Correct Predictions", "Total Questions", "Timestamp"
|
382 |
+
]).to_csv(LEADERBOARD_FILE, index=False)
|
383 |
|
384 |
def clean_answer(answer):
|
385 |
"""
|
|
|
413 |
"""
|
414 |
Load all submissions from the leaderboard file.
|
415 |
"""
|
416 |
+
if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
|
417 |
return pd.DataFrame({
|
418 |
"Model Name": [],
|
419 |
"Overall Accuracy": [],
|
|
|
424 |
})
|
425 |
return pd.read_csv(LEADERBOARD_FILE)
|
426 |
|
427 |
+
def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
|
428 |
"""
|
429 |
+
Evaluate predictions and optionally add results to the leaderboard.
|
430 |
"""
|
431 |
ground_truth_file = "ground_truth.csv"
|
432 |
if not os.path.exists(ground_truth_file):
|
|
|
438 |
# Load predictions and ground truth
|
439 |
predictions_df = pd.read_csv(prediction_file.name)
|
440 |
ground_truth_df = pd.read_csv(ground_truth_file)
|
|
|
441 |
|
442 |
# Merge predictions with ground truth
|
443 |
merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
|
|
|
454 |
valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
|
455 |
|
456 |
results = {
|
457 |
+
'model_name': model_name if model_name else "Unknown Model",
|
458 |
'overall_accuracy': overall_accuracy,
|
459 |
'valid_accuracy': valid_accuracy,
|
460 |
'correct_predictions': correct_predictions,
|
461 |
'total_questions': total_predictions,
|
462 |
}
|
463 |
|
464 |
+
# Update leaderboard only if opted in
|
465 |
+
if add_to_leaderboard:
|
466 |
+
update_leaderboard(results)
|
467 |
+
return "Evaluation completed and added to leaderboard.", load_leaderboard()
|
468 |
+
else:
|
469 |
+
return "Evaluation completed but not added to leaderboard.", load_leaderboard()
|
470 |
except Exception as e:
|
471 |
return f"Error during evaluation: {str(e)}", load_leaderboard()
|
472 |
|
|
|
481 |
# Submission Tab
|
482 |
with gr.TabItem("🏅 Submission"):
|
483 |
file_input = gr.File(label="Upload Prediction CSV")
|
484 |
+
model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")
|
485 |
+
add_to_leaderboard_checkbox = gr.Checkbox(label="Add to Leaderboard?", value=True)
|
486 |
eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
|
487 |
leaderboard_table_preview = gr.Dataframe(
|
488 |
value=load_leaderboard(),
|
|
|
492 |
)
|
493 |
eval_button = gr.Button("Evaluate and Update Leaderboard")
|
494 |
eval_button.click(
|
495 |
+
evaluate_predictions,
|
496 |
+
inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
|
497 |
outputs=[eval_status, leaderboard_table_preview],
|
498 |
)
|
499 |
|