Spaces:

MBZUAI-LLM
/

Mobile-MMLU-Challenge

Runtime error

App Files Files Community

SondosMB commited on Mar 18

Commit

9e3da0c

verified ·

1 Parent(s): 8e8d6c7

Update app.py

Browse files

Files changed (1) hide show

app.py +178 -2

app.py CHANGED Viewed

@@ -274,7 +274,47 @@ def update_leaderboard(results):
     except Exception as e:
         print(f"Error updating leaderboard file: {e}")
 # def load_leaderboard():
@@ -419,6 +459,65 @@ def evaluate_predictions(prediction_file, model_name,Team_name ,add_to_leaderboa
 initialize_leaderboard_file()
 # Function to set default mode
 # Function to set default mode
 import gradio as gr
@@ -803,8 +902,8 @@ with gr.Blocks(css=css_tech_theme) as demo:
                 overall_accuracy_display = gr.Number(label="📊 Overall Accuracy (%)", interactive=False,scale=1,min_width=1200)
             with gr.Row(elem_id="submission-buttons"):
-                eval_button = gr.Button("📈 Evaluate",scale=1,min_width=1200)
-                submit_button = gr.Button("📤 Prove and Submit to Leaderboard", elem_id="evaluation-status", visible=False,scale=1,min_width=1200)
                 eval_status = gr.Textbox(label="🛠️ Evaluation Status", interactive=False,scale=1,min_width=1200)
@@ -855,12 +954,64 @@ with gr.Blocks(css=css_tech_theme) as demo:
             except Exception as e:
                 return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
         def handle_submission(file, model_name,Team_name):
             # Handle leaderboard submission
             status, _ = evaluate_predictions(file, model_name,Team_name, add_to_leaderboard=True)
             return f"Submission to leaderboard completed: {status}"
         # Connect button clicks to the functions
         eval_button.click(
@@ -868,6 +1019,18 @@ with gr.Blocks(css=css_tech_theme) as demo:
             inputs=[file_input, model_name_input,Team_name_input],
             outputs=[eval_status, overall_accuracy_display, submit_button],
         )
         submit_button.click(
             handle_submission,
@@ -890,6 +1053,19 @@ with gr.Blocks(css=css_tech_theme) as demo:
                 inputs=[],
                 outputs=[leaderboard_table],
             )
      # Post-Tabs Section
     # gr.Markdown("""

     except Exception as e:
         print(f"Error updating leaderboard file: {e}")
+def update_leaderboard_pro(results):
+    """
+    Append new submission results to the leaderboard file and push updates to the Hugging Face repository.
+    """
+    new_entry = {
+        "Model Name": results['model_name'],
+        "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
+        "Correct Predictions": results['correct_predictions'],
+        "Total Questions": results['total_questions'],
+        "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+        "Team Name": results['Team_name']
+    }
+    try:
+        # Update the local leaderboard file
+        new_entry_df = pd.DataFrame([new_entry])
+        file_exists = os.path.exists(LEADERBOARD_FILE)
+        new_entry_df.to_csv(
+            LEADERBOARD_FILE,
+            mode='a',  # Append mode
+            index=False,
+            header=not file_exists  # Write header only if the file is new
+        )
+        print(f"Leaderboard updated successfully at {LEADERBOARD_FILE}")
+        # Push the updated file to the Hugging Face repository using HTTP API
+        api = HfApi()
+        token = HfFolder.get_token()
+        api.upload_file(
+            path_or_fileobj=LEADERBOARD_FILE,
+            path_in_repo="leaderboardPro.csv",
+            repo_id="SondosMB/Mobile-MMLU",  # Your Space repository
+            repo_type="space",
+            token=token
+        )
+        print("Leaderboard changes pushed to Hugging Face repository.")
+    except Exception as e:
+        print(f"Error updating leaderboard file: {e}")
 # def load_leaderboard():
 initialize_leaderboard_file()
+def evaluate_predictions_pro(prediction_file, model_name,Team_name ,add_to_leaderboard):
+    try:
+        ground_truth_path = hf_hub_download(
+            repo_id="SondosMB/ground-truth-dataset",
+            filename="ground_truth.csv",
+            repo_type="dataset",
+            use_auth_token=True
+        )
+        ground_truth_df = pd.read_csv(ground_truth_path)
+    except FileNotFoundError:
+        return "Ground truth file not found in the dataset repository.", load_leaderboard_pro()
+    except Exception as e:
+        return f"Error loading ground truth: {e}", load_leaderboard_pro()
+    if not prediction_file:
+        return "Prediction file not uploaded.", load_leaderboard_pro()
+    try:
+        #load prediction file
+        predictions_df = pd.read_csv(prediction_file.name)
+        # Validate required columns in prediction file
+        required_columns = ['question_id', 'predicted_answer']
+        missing_columns = [col for col in required_columns if col not in predictions_df.columns]
+        if missing_columns:
+            return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
+                    load_leaderboard())
+        # Validate 'Answer' column in ground truth file
+        if 'Answer' not in ground_truth_df.columns:
+            return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard_pro()
+        merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
+        merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
+        valid_predictions = merged_df.dropna(subset=['pred_answer'])
+        correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
+        total_predictions = len(merged_df)
+        overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
+        results = {
+            'model_name': model_name if model_name else "Unknown Model",
+            'overall_accuracy': overall_accuracy,
+            'correct_predictions': correct_predictions,
+            'total_questions': total_predictions,
+            'Team_name': Team_name if Team_name else "Unknown Team",
+        }
+        if add_to_leaderboard:
+            update_leaderboard_pro(results)
+            return "Evaluation completed and added to leaderboard.", load_leaderboard_pro()
+        else:
+            return "Evaluation completed but not added to leaderboard.", load_leaderboard_pro()
+    except Exception as e:
+        return f"Error during evaluation: {str(e)}", load_leaderboard_pro()
+initialize_leaderboard_file()
 # Function to set default mode
 # Function to set default mode
 import gradio as gr
                 overall_accuracy_display = gr.Number(label="📊 Overall Accuracy (%)", interactive=False,scale=1,min_width=1200)
             with gr.Row(elem_id="submission-buttons"):
+                eval_button_pro = gr.Button("📈 Evaluate",scale=1,min_width=1200)
+                submit_button_pro = gr.Button("📤 Prove and Submit to Leaderboard", elem_id="evaluation-status", visible=False,scale=1,min_width=1200)
                 eval_status = gr.Textbox(label="🛠️ Evaluation Status", interactive=False,scale=1,min_width=1200)
             except Exception as e:
                 return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
+        def handle_evaluation_pro(file, model_name, Team_name):
+            if not file:
+                return "Error: Please upload a prediction file.", 0, gr.update(visible=False)
+            if not model_name or model_name.strip() == "":
+                return "Error: Please enter a model name.", 0, gr.update(visible=False)
+            if not Team_name or Team_name.strip() == "":
+                return "Error: Please enter a Team name.", 0, gr.update(visible=False)
+            try:
+                # Load predictions file
+                predictions_df = pd.read_csv(file.name)
+                # Validate required columns
+                required_columns = ['question_id', 'predicted_answer']
+                missing_columns = [col for col in required_columns if col not in predictions_df.columns]
+                if missing_columns:
+                    return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
+                            0, gr.update(visible=False))
+                # Load ground truth
+                try:
+                    ground_truth_path = hf_hub_download(
+                        repo_id="SondosMB/ground-truth-dataset",
+                        filename="ground_truth.csv",
+                        repo_type="dataset",
+                        use_auth_token=True
+                    )
+                    ground_truth_df = pd.read_csv(ground_truth_path)
+                except Exception as e:
+                    return f"Error loading ground truth: {e}", 0, gr.update(visible=False)
+                # Perform evaluation calculations
+                merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
+                merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
+                valid_predictions = merged_df.dropna(subset=['pred_answer'])
+                correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
+                total_predictions = len(merged_df)
+                overall_accuracy = (correct_predictions / total_predictions * 100) if total_predictions > 0 else 0
+                return "Evaluation completed successfully.", overall_accuracy, gr.update(visible=True)
+            except Exception as e:
+                return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
         def handle_submission(file, model_name,Team_name):
             # Handle leaderboard submission
             status, _ = evaluate_predictions(file, model_name,Team_name, add_to_leaderboard=True)
             return f"Submission to leaderboard completed: {status}"
+        def handle_submission_pro(file, model_name,Team_name):
+            # Handle leaderboard submission
+            status, _ = evaluate_predictions_pro(file, model_name,Team_name, add_to_leaderboard=True)
+            return f"Submission to leaderboard completed: {status}"
         # Connect button clicks to the functions
         eval_button.click(
             inputs=[file_input, model_name_input,Team_name_input],
             outputs=[eval_status, overall_accuracy_display, submit_button],
         )
+        eval_button_pro.click(
+            handle_evaluation_pro,
+            inputs=[file_input, model_name_input,Team_name_input],
+            outputs=[eval_status, overall_accuracy_display, submit_button_pro],
+        )
+        submit_button_pro.click(
+            handle_submission_pro,
+            inputs=[file_input, model_name_input,Team_name_input],
+            outputs=[eval_status],
+        )
         submit_button.click(
             handle_submission,
                 inputs=[],
                 outputs=[leaderboard_table],
             )
+        with gr.TabItem("🏅 Leaderboard-pro"):
+            leaderboard_table = gr.Dataframe(
+                value=load_leaderboard_pro(),
+                label="Leaderboard",
+                interactive=False,
+                wrap=True,
+            )
+            refresh_button = gr.Button("Refresh Leaderboard")
+            refresh_button.click(
+                lambda: load_leaderboard_pro(),
+                inputs=[],
+                outputs=[leaderboard_table],
+            )
      # Post-Tabs Section
     # gr.Markdown("""