Spaces:

MBZUAI-LLM
/

Mobile-MMLU-Challenge

Runtime error

App Files Files Community

SondosMB commited on Dec 20, 2024

Commit

5b78632

verified ·

1 Parent(s): 5b92bc4

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -233

app.py CHANGED Viewed

@@ -1,236 +1,48 @@
-# # demo.launch()
-# import gradio as gr
-# import pandas as pd
-# import os
-# import re
-# from datetime import datetime
-# LEADERBOARD_FILE = "leaderboard.csv"  # File to store all submissions persistently
-# LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
-# def initialize_leaderboard_file():
-#     """
-#     Ensure the leaderboard file exists and has the correct headers.
-#     """
-#     if not os.path.exists(LEADERBOARD_FILE):
-#         # Create the file with headers
-#         pd.DataFrame(columns=[
-#             "Model Name", "Overall Accuracy", "Valid Accuracy",
-#             "Correct Predictions", "Total Questions", "Timestamp"
-#         ]).to_csv(LEADERBOARD_FILE, index=False)
-#     else:
-#         # Check if the file is empty and write headers if needed
-#         if os.stat(LEADERBOARD_FILE).st_size == 0:
-#             pd.DataFrame(columns=[
-#                 "Model Name", "Overall Accuracy", "Valid Accuracy",
-#                 "Correct Predictions", "Total Questions", "Timestamp"
-#             ]).to_csv(LEADERBOARD_FILE, index=False)
-# def clean_answer(answer):
-#     """
-#     Clean and normalize the predicted answers.
-#     """
-#     if pd.isna(answer):
-#         return None
-#     answer = str(answer)
-#     clean = re.sub(r'[^A-Da-d]', '', answer)
-#     if clean:
-#         return clean[0].upper()
-#     return None
-# def update_leaderboard(results):
-#     """
-#     Append new submission results to the leaderboard file.
-#     """
-#     new_entry = {
-#         "Model Name": results['model_name'],
-#         "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
-#         "Valid Accuracy": round(results['valid_accuracy'] * 100, 2),
-#         "Correct Predictions": results['correct_predictions'],
-#         "Total Questions": results['total_questions'],
-#         "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-#     }
-#     new_entry_df = pd.DataFrame([new_entry])
-#     new_entry_df.to_csv(LEADERBOARD_FILE, mode='a', index=False, header=False)
-# def load_leaderboard():
-#     """
-#     Load all submissions from the leaderboard file.
-#     """
-#     if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
-#         return pd.DataFrame({
-#             "Model Name": [],
-#             "Overall Accuracy": [],
-#             "Valid Accuracy": [],
-#             "Correct Predictions": [],
-#             "Total Questions": [],
-#             "Timestamp": [],
-#         })
-#     return pd.read_csv(LEADERBOARD_FILE)
-# def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
-#     """
-#     Evaluate predictions and optionally add results to the leaderboard.
-#     """
-#     ground_truth_file = "ground_truth.csv"
-#     if not os.path.exists(ground_truth_file):
-#         return "Ground truth file not found.", load_leaderboard()
-#     if not prediction_file:
-#         return "Prediction file not uploaded.", load_leaderboard()
-#     try:
-#         # Load predictions and ground truth
-#         predictions_df = pd.read_csv(prediction_file.name)
-#         ground_truth_df = pd.read_csv(ground_truth_file)
-#         # Merge predictions with ground truth
-#         merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
-#         merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
-#         # Evaluate predictions
-#         valid_predictions = merged_df.dropna(subset=['pred_answer'])
-#         correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
-#         total_predictions = len(merged_df)
-#         total_valid_predictions = len(valid_predictions)
-#         # Calculate accuracy
-#         overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
-#         valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
-#         results = {
-#             'model_name': model_name if model_name else "Unknown Model",
-#             'overall_accuracy': overall_accuracy,
-#             'valid_accuracy': valid_accuracy,
-#             'correct_predictions': correct_predictions,
-#             'total_questions': total_predictions,
-#         }
-#         # Update leaderboard only if opted in
-#         if add_to_leaderboard:
-#             update_leaderboard(results)
-#             return "Evaluation completed and added to leaderboard.", load_leaderboard()
-#         else:
-#             return "Evaluation completed but not added to leaderboard.", load_leaderboard()
-#     except Exception as e:
-#         return f"Error during evaluation: {str(e)}", load_leaderboard()
-# # Initialize leaderboard file
-# initialize_leaderboard_file()
-# # Gradio Interface
-# with gr.Blocks() as demo:
-#     gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
-#     with gr.Tabs():
-#         # Submission Tab
-#         with gr.TabItem("🏅 Submission"):
-#             file_input = gr.File(label="Upload Prediction CSV")
-#             model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")
-#             add_to_leaderboard_checkbox = gr.Checkbox(label="Add to Leaderboard?", value=True)
-#             eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
-#             leaderboard_table_preview = gr.Dataframe(
-#                 value=load_leaderboard(),
-#                 label="Leaderboard (Preview)",
-#                 interactive=False,
-#                 wrap=True,
-#             )
-#             eval_button = gr.Button("Evaluate and Update Leaderboard")
-#             eval_button.click(
-#                 evaluate_predictions,
-#                 inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
-#                 outputs=[eval_status, leaderboard_table_preview],
-#             )
-#         # Leaderboard Tab
-#         with gr.TabItem("🏅 Leaderboard"):
-#             leaderboard_table = gr.Dataframe(
-#                 value=load_leaderboard(),
-#                 label="Leaderboard",
-#                 interactive=False,
-#                 wrap=True,
-#             )
-#             refresh_button = gr.Button("Refresh Leaderboard")
-#             refresh_button.click(
-#                 lambda: load_leaderboard(),
-#                 inputs=[],
-#                 outputs=[leaderboard_table],
-#             )
-#     gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
 # demo.launch()
 import gradio as gr
 import pandas as pd
 import re
 from datetime import datetime
-from huggingface_hub import hf_hub_download
-from datasets import Dataset
-import os
-HF_TOKEN = os.getenv("HF_TOKEN")  # Hugging Face token stored as an environment variable
-LEADERBOARD_REPO = "username/leaderboard-dataset"  # Replace with your leaderboard dataset name
-GROUND_TRUTH_REPO = "username/ground-truth-dataset"  # Replace with your ground truth dataset name
 LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
-def load_ground_truth():
     """
-    Load the ground truth file from a private Hugging Face dataset.
     """
-    try:
-        ground_truth_path = hf_hub_download(
-            repo_id=GROUND_TRUTH_REPO,
-            filename="ground_truth.csv",
-            use_auth_token=HF_TOKEN
-        )
-        return pd.read_csv(ground_truth_path)
-    except Exception as e:
-        print(f"Error loading ground truth: {e}")
-        return None
-def load_leaderboard():
     """
-    Load the leaderboard from a private Hugging Face dataset.
     """
-    try:
-        leaderboard_path = hf_hub_download(
-            repo_id=LEADERBOARD_REPO,
-            filename="leaderboard.csv",
-            use_auth_token=HF_TOKEN
-        )
-        return pd.read_csv(leaderboard_path)
-    except Exception as e:
-        print(f"Error loading leaderboard: {e}")
-        return pd.DataFrame({
-            "Model Name": [],
-            "Overall Accuracy": [],
-            "Valid Accuracy": [],
-            "Correct Predictions": [],
-            "Total Questions": [],
-            "Timestamp": [],
-        })
 def update_leaderboard(results):
     """
-    Append new submission results to the private leaderboard dataset.
     """
-    try:
-        # Load existing leaderboard or create a new one
-        leaderboard_path = hf_hub_download(
-            repo_id=LEADERBOARD_REPO,
-            filename="leaderboard.csv",
-            use_auth_token=HF_TOKEN
-        )
-        df = pd.read_csv(leaderboard_path)
-    except:
-        df = pd.DataFrame(columns=[
-            "Model Name", "Overall Accuracy", "Valid Accuracy",
-            "Correct Predictions", "Total Questions", "Timestamp"
-        ])
-    # Add new entry
     new_entry = {
         "Model Name": results['model_name'],
         "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
@@ -239,38 +51,41 @@ def update_leaderboard(results):
         "Total Questions": results['total_questions'],
         "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
     }
-    df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True)
-    # Save locally and push updated dataset to Hugging Face
-    df.to_csv("leaderboard.csv", index=False)
-    dataset = Dataset.from_pandas(df)
-    dataset.push_to_hub(LEADERBOARD_REPO, split="train", private=True)
-def clean_answer(answer):
     """
-    Clean and normalize the predicted answers.
     """
-    if pd.isna(answer):
-        return None
-    answer = str(answer)
-    clean = re.sub(r'[^A-Da-d]', '', answer)
-    if clean:
-        return clean[0].upper()
-    return None
 def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
     """
     Evaluate predictions and optionally add results to the leaderboard.
     """
-    ground_truth_df = load_ground_truth()
-    if ground_truth_df is None:
         return "Ground truth file not found.", load_leaderboard()
     if not prediction_file:
         return "Prediction file not uploaded.", load_leaderboard()
     try:
-        # Load predictions and merge with ground truth
         predictions_df = pd.read_csv(prediction_file.name)
         merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
         merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
@@ -301,9 +116,12 @@ def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
     except Exception as e:
         return f"Error during evaluation: {str(e)}", load_leaderboard()
 # Gradio Interface
 with gr.Blocks() as demo:
-    gr.Markdown("# Secure Prediction Evaluation Tool with Private Leaderboard")
     with gr.Tabs():
         # Submission Tab

 # demo.launch()
 import gradio as gr
 import pandas as pd
+import os
 import re
 from datetime import datetime
+LEADERBOARD_FILE = "leaderboard.csv"  # File to store all submissions persistently
 LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
+def initialize_leaderboard_file():
     """
+    Ensure the leaderboard file exists and has the correct headers.
     """
+    if not os.path.exists(LEADERBOARD_FILE):
+        # Create the file with headers
+        pd.DataFrame(columns=[
+            "Model Name", "Overall Accuracy", "Valid Accuracy",
+            "Correct Predictions", "Total Questions", "Timestamp"
+        ]).to_csv(LEADERBOARD_FILE, index=False)
+    else:
+        # Check if the file is empty and write headers if needed
+        if os.stat(LEADERBOARD_FILE).st_size == 0:
+            pd.DataFrame(columns=[
+                "Model Name", "Overall Accuracy", "Valid Accuracy",
+                "Correct Predictions", "Total Questions", "Timestamp"
+            ]).to_csv(LEADERBOARD_FILE, index=False)
+def clean_answer(answer):
     """
+    Clean and normalize the predicted answers.
     """
+    if pd.isna(answer):
+        return None
+    answer = str(answer)
+    clean = re.sub(r'[^A-Da-d]', '', answer)
+    if clean:
+        return clean[0].upper()
+    return None
 def update_leaderboard(results):
     """
+    Append new submission results to the leaderboard file.
     """
     new_entry = {
         "Model Name": results['model_name'],
         "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
         "Total Questions": results['total_questions'],
         "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
     }
+    new_entry_df = pd.DataFrame([new_entry])
+    new_entry_df.to_csv(LEADERBOARD_FILE, mode='a', index=False, header=False)
+def load_leaderboard():
     """
+    Load all submissions from the leaderboard file.
     """
+    if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
+        return pd.DataFrame({
+            "Model Name": [],
+            "Overall Accuracy": [],
+            "Valid Accuracy": [],
+            "Correct Predictions": [],
+            "Total Questions": [],
+            "Timestamp": [],
+        })
+    return pd.read_csv(LEADERBOARD_FILE)
 def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
     """
     Evaluate predictions and optionally add results to the leaderboard.
     """
+    ground_truth_file = "ground_truth.csv"
+    if not os.path.exists(ground_truth_file):
         return "Ground truth file not found.", load_leaderboard()
     if not prediction_file:
         return "Prediction file not uploaded.", load_leaderboard()
     try:
+        # Load predictions and ground truth
         predictions_df = pd.read_csv(prediction_file.name)
+        ground_truth_df = pd.read_csv(ground_truth_file)
+        # Merge predictions with ground truth
         merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
         merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
     except Exception as e:
         return f"Error during evaluation: {str(e)}", load_leaderboard()
+# Initialize leaderboard file
+initialize_leaderboard_file()
 # Gradio Interface
 with gr.Blocks() as demo:
+    gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
     with gr.Tabs():
         # Submission Tab