Spaces:

MBZUAI-LLM
/

Mobile-MMLU-Challenge

Sleeping

App Files Files Community

SondosMB commited on Dec 20, 2024

Commit

1fe4357

verified ·

1 Parent(s): ed98616

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -11

app.py CHANGED Viewed

@@ -365,13 +365,21 @@ LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
 def initialize_leaderboard_file():
     """
-    Check if the leaderboard file exists. If not, create it with appropriate headers.
     """
     if not os.path.exists(LEADERBOARD_FILE):
         pd.DataFrame(columns=[
             "Model Name", "Overall Accuracy", "Valid Accuracy",
             "Correct Predictions", "Total Questions", "Timestamp"
         ]).to_csv(LEADERBOARD_FILE, index=False)
 def clean_answer(answer):
     """
@@ -405,7 +413,7 @@ def load_leaderboard():
     """
     Load all submissions from the leaderboard file.
     """
-    if not os.path.exists(LEADERBOARD_FILE):
         return pd.DataFrame({
             "Model Name": [],
             "Overall Accuracy": [],
@@ -416,9 +424,9 @@ def load_leaderboard():
         })
     return pd.read_csv(LEADERBOARD_FILE)
-def evaluate_predictions_and_update_leaderboard(prediction_file):
     """
-    Evaluate predictions, update the leaderboard, and return the updated leaderboard.
     """
     ground_truth_file = "ground_truth.csv"
     if not os.path.exists(ground_truth_file):
@@ -430,7 +438,6 @@ def evaluate_predictions_and_update_leaderboard(prediction_file):
         # Load predictions and ground truth
         predictions_df = pd.read_csv(prediction_file.name)
         ground_truth_df = pd.read_csv(ground_truth_file)
-        model_name = os.path.basename(prediction_file.name).split('_')[1].split('.')[0]
         # Merge predictions with ground truth
         merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
@@ -447,16 +454,19 @@ def evaluate_predictions_and_update_leaderboard(prediction_file):
         valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
         results = {
-            'model_name': model_name,
             'overall_accuracy': overall_accuracy,
             'valid_accuracy': valid_accuracy,
             'correct_predictions': correct_predictions,
             'total_questions': total_predictions,
         }
-        # Update leaderboard
-        update_leaderboard(results)
-        return "Evaluation completed successfully! Leaderboard updated.", load_leaderboard()
     except Exception as e:
         return f"Error during evaluation: {str(e)}", load_leaderboard()
@@ -471,6 +481,8 @@ with gr.Blocks() as demo:
         # Submission Tab
         with gr.TabItem("🏅 Submission"):
             file_input = gr.File(label="Upload Prediction CSV")
             eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
             leaderboard_table_preview = gr.Dataframe(
                 value=load_leaderboard(),
@@ -480,8 +492,8 @@ with gr.Blocks() as demo:
             )
             eval_button = gr.Button("Evaluate and Update Leaderboard")
             eval_button.click(
-                evaluate_predictions_and_update_leaderboard,
-                inputs=[file_input],
                 outputs=[eval_status, leaderboard_table_preview],
             )

 def initialize_leaderboard_file():
     """
+    Ensure the leaderboard file exists and has the correct headers.
     """
     if not os.path.exists(LEADERBOARD_FILE):
+        # Create the file with headers
         pd.DataFrame(columns=[
             "Model Name", "Overall Accuracy", "Valid Accuracy",
             "Correct Predictions", "Total Questions", "Timestamp"
         ]).to_csv(LEADERBOARD_FILE, index=False)
+    else:
+        # Check if the file is empty and write headers if needed
+        if os.stat(LEADERBOARD_FILE).st_size == 0:
+            pd.DataFrame(columns=[
+                "Model Name", "Overall Accuracy", "Valid Accuracy",
+                "Correct Predictions", "Total Questions", "Timestamp"
+            ]).to_csv(LEADERBOARD_FILE, index=False)
 def clean_answer(answer):
     """
     """
     Load all submissions from the leaderboard file.
     """
+    if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
         return pd.DataFrame({
             "Model Name": [],
             "Overall Accuracy": [],
         })
     return pd.read_csv(LEADERBOARD_FILE)
+def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
     """
+    Evaluate predictions and optionally add results to the leaderboard.
     """
     ground_truth_file = "ground_truth.csv"
     if not os.path.exists(ground_truth_file):
         # Load predictions and ground truth
         predictions_df = pd.read_csv(prediction_file.name)
         ground_truth_df = pd.read_csv(ground_truth_file)
         # Merge predictions with ground truth
         merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
         valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
         results = {
+            'model_name': model_name if model_name else "Unknown Model",
             'overall_accuracy': overall_accuracy,
             'valid_accuracy': valid_accuracy,
             'correct_predictions': correct_predictions,
             'total_questions': total_predictions,
         }
+        # Update leaderboard only if opted in
+        if add_to_leaderboard:
+            update_leaderboard(results)
+            return "Evaluation completed and added to leaderboard.", load_leaderboard()
+        else:
+            return "Evaluation completed but not added to leaderboard.", load_leaderboard()
     except Exception as e:
         return f"Error during evaluation: {str(e)}", load_leaderboard()
         # Submission Tab
         with gr.TabItem("🏅 Submission"):
             file_input = gr.File(label="Upload Prediction CSV")
+            model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")
+            add_to_leaderboard_checkbox = gr.Checkbox(label="Add to Leaderboard?", value=True)
             eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
             leaderboard_table_preview = gr.Dataframe(
                 value=load_leaderboard(),
             )
             eval_button = gr.Button("Evaluate and Update Leaderboard")
             eval_button.click(
+                evaluate_predictions,
+                inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
                 outputs=[eval_status, leaderboard_table_preview],
             )