SondosMB's picture
Update app.py
e109361 verified
raw
history blame
6.15 kB
# demo.launch()
import gradio as gr
import pandas as pd
import os
import re
from datetime import datetime
LEADERBOARD_FILE = "leaderboard.csv" # File to store all submissions persistently
LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
def initialize_leaderboard_file():
"""
Ensure the leaderboard file exists and has the correct headers.
"""
if not os.path.exists(LEADERBOARD_FILE):
# Create the file with headers
pd.DataFrame(columns=[
"Model Name", "Overall Accuracy", "Valid Accuracy",
"Correct Predictions", "Total Questions", "Timestamp"
]).to_csv(LEADERBOARD_FILE, index=False)
else:
# Check if the file is empty and write headers if needed
if os.stat(LEADERBOARD_FILE).st_size == 0:
pd.DataFrame(columns=[
"Model Name", "Overall Accuracy", "Valid Accuracy",
"Correct Predictions", "Total Questions", "Timestamp"
]).to_csv(LEADERBOARD_FILE, index=False)
def clean_answer(answer):
"""
Clean and normalize the predicted answers.
"""
if pd.isna(answer):
return None
answer = str(answer)
clean = re.sub(r'[^A-Da-d]', '', answer)
if clean:
return clean[0].upper()
return None
def update_leaderboard(results):
"""
Append new submission results to the leaderboard file.
"""
new_entry = {
"Model Name": results['model_name'],
"Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
"Valid Accuracy": round(results['valid_accuracy'] * 100, 2),
"Correct Predictions": results['correct_predictions'],
"Total Questions": results['total_questions'],
"Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
}
new_entry_df = pd.DataFrame([new_entry])
new_entry_df.to_csv(LEADERBOARD_FILE, mode='a', index=False, header=False)
def load_leaderboard():
"""
Load all submissions from the leaderboard file.
"""
if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
return pd.DataFrame({
"Model Name": [],
"Overall Accuracy": [],
"Valid Accuracy": [],
"Correct Predictions": [],
"Total Questions": [],
"Timestamp": [],
})
return pd.read_csv(LEADERBOARD_FILE)
def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
"""
Evaluate predictions and optionally add results to the leaderboard.
"""
ground_truth_file = "ground_truth.csv"
if not os.path.exists(ground_truth_file):
return "Ground truth file not found.", load_leaderboard()
if not prediction_file:
return "Prediction file not uploaded.", load_leaderboard()
try:
# Load predictions and ground truth
predictions_df = pd.read_csv(prediction_file.name)
ground_truth_df = pd.read_csv(ground_truth_file)
# Merge predictions with ground truth
merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
# Evaluate predictions
valid_predictions = merged_df.dropna(subset=['pred_answer'])
correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
total_predictions = len(merged_df)
total_valid_predictions = len(valid_predictions)
# Calculate accuracy
overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
results = {
'model_name': model_name if model_name else "Unknown Model",
'overall_accuracy': overall_accuracy,
'valid_accuracy': valid_accuracy,
'correct_predictions': correct_predictions,
'total_questions': total_predictions,
}
# Update leaderboard only if opted in
if add_to_leaderboard:
update_leaderboard(results)
return "Evaluation completed and added to leaderboard.", load_leaderboard()
else:
return "Evaluation completed but not added to leaderboard.", load_leaderboard()
except Exception as e:
return f"Error during evaluation: {str(e)}", load_leaderboard()
# Initialize leaderboard file
initialize_leaderboard_file()
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
with gr.Tabs():
# Submission Tab
with gr.TabItem("πŸ… Submission"):
file_input = gr.File(label="Upload Prediction CSV")
model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")
add_to_leaderboard_checkbox = gr.Checkbox(label="Add to Leaderboard?", value=True)
eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
leaderboard_table_preview = gr.Dataframe(
value=load_leaderboard(),
label="Leaderboard (Preview)",
interactive=False,
wrap=True,
)
eval_button = gr.Button("Evaluate and Update Leaderboard")
eval_button.click(
evaluate_predictions,
inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
outputs=[eval_status, leaderboard_table_preview],
)
# Leaderboard Tab
with gr.TabItem("πŸ… Leaderboard"):
leaderboard_table = gr.Dataframe(
value=load_leaderboard(),
label="Leaderboard",
interactive=False,
wrap=True,
)
refresh_button = gr.Button("Refresh Leaderboard")
refresh_button.click(
lambda: load_leaderboard(),
inputs=[],
outputs=[leaderboard_table],
)
gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
demo.launch()