Spaces:

MBZUAI-LLM
/

Mobile-MMLU-Challenge

Runtime error

App Files Files Community

SondosMB commited on Dec 20, 2024

Commit

8f89713

verified ·

1 Parent(s): ca6bd07

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -5

app.py CHANGED Viewed

@@ -258,25 +258,68 @@ def load_leaderboard():
     print("Loading leaderboard data...")
     return pd.read_csv(LEADERBOARD_FILE)
 # Build Gradio App
 with gr.Blocks() as demo:
     gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
     with gr.Tabs():
         with gr.TabItem("🏅 Submission"):
             file_input = gr.File(label="Upload Prediction CSV")
             eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
-            eval_button = gr.Button("Evaluate and Update Leaderboard")
-            leaderboard_table = gr.Dataframe(
                 value=load_leaderboard(),
-                label="Leaderboard",
                 interactive=False,
                 wrap=True,
             )
             eval_button.click(
-                lambda file: (evaluate_predictions(file), load_leaderboard()),
                 inputs=[file_input],
-                outputs=[eval_status, leaderboard_table],
             )
         with gr.TabItem("🏅 Leaderboard"):
             leaderboard_table = gr.Dataframe(
                 value=load_leaderboard(),
@@ -284,8 +327,15 @@ with gr.Blocks() as demo:
                 interactive=False,
                 wrap=True,
             )
     gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
 demo.launch()

     print("Loading leaderboard data...")
     return pd.read_csv(LEADERBOARD_FILE)
+def evaluate_predictions_and_update_leaderboard(prediction_file):
+    """
+    Evaluate predictions and update the leaderboard.
+    """
+    ground_truth_file = "ground_truth.csv"
+    if not os.path.exists(ground_truth_file):
+        return "Ground truth file not found.", None
+    if not prediction_file:
+        return "Prediction file not uploaded.", None
+    try:
+        predictions_df = pd.read_csv(prediction_file.name)
+        ground_truth_df = pd.read_csv(ground_truth_file)
+        model_name = os.path.basename(prediction_file.name).split('_')[1].split('.')[0]
+        merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
+        merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
+        valid_predictions = merged_df.dropna(subset=['pred_answer'])
+        correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
+        total_predictions = len(merged_df)
+        total_valid_predictions = len(valid_predictions)
+        overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
+        valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
+        results = {
+            'model_name': model_name,
+            'overall_accuracy': overall_accuracy,
+            'valid_accuracy': valid_accuracy,
+            'correct_predictions': correct_predictions,
+            'total_questions': total_predictions,
+        }
+        update_leaderboard(results)
+        return "Evaluation completed successfully! Leaderboard updated.", load_leaderboard()
+    except Exception as e:
+        return f"Error during evaluation: {str(e)}", load_leaderboard()
 # Build Gradio App
 with gr.Blocks() as demo:
     gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
     with gr.Tabs():
+        # Submission Tab
         with gr.TabItem("🏅 Submission"):
             file_input = gr.File(label="Upload Prediction CSV")
             eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
+            leaderboard_table_submission = gr.Dataframe(
                 value=load_leaderboard(),
+                label="Leaderboard (Preview)",
                 interactive=False,
                 wrap=True,
             )
+            eval_button = gr.Button("Evaluate and Update Leaderboard")
             eval_button.click(
+                evaluate_predictions_and_update_leaderboard,
                 inputs=[file_input],
+                outputs=[eval_status, leaderboard_table_submission],
             )
+        # Leaderboard Tab
         with gr.TabItem("🏅 Leaderboard"):
             leaderboard_table = gr.Dataframe(
                 value=load_leaderboard(),
                 interactive=False,
                 wrap=True,
             )
+            refresh_button = gr.Button("Refresh Leaderboard")
+            refresh_button.click(
+                lambda: load_leaderboard(),
+                inputs=[],
+                outputs=[leaderboard_table],
+            )
     gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
 demo.launch()