Spaces:

MBZUAI-LLM
/

Mobile-MMLU-Challenge

Runtime error

App Files Files Community

SondosMB commited on Dec 23, 2024

Commit

9803b0e

verified ·

1 Parent(s): 5a42658

Update app.py

Browse files

Files changed (1) hide show

app.py +136 -13

app.py CHANGED Viewed

@@ -151,19 +151,34 @@ if not HF_TOKEN:
 #         return f"Error during evaluation: {str(e)}", load_leaderboard()
 # initialize_leaderboard_file()
 def initialize_leaderboard_file():
     """
     Ensure the leaderboard file exists and has the correct headers.
     """
     if not os.path.exists(LEADERBOARD_FILE):
         pd.DataFrame(columns=[
-            "Model Name", "Overall Accuracy", "Valid Accuracy",
-            "Correct Predictions", "Total Questions", "Timestamp"
         ]).to_csv(LEADERBOARD_FILE, index=False)
     elif os.stat(LEADERBOARD_FILE).st_size == 0:
         pd.DataFrame(columns=[
-            "Model Name", "Overall Accuracy", "Valid Accuracy",
-            "Correct Predictions", "Total Questions", "Timestamp"
         ]).to_csv(LEADERBOARD_FILE, index=False)
 def clean_answer(answer):
@@ -174,6 +189,48 @@ def clean_answer(answer):
     return clean[0].upper() if clean else None
 def update_leaderboard(results):
     """
     Append new submission results to the leaderboard file and push updates to the Hugging Face repository.
@@ -181,7 +238,6 @@ def update_leaderboard(results):
     new_entry = {
         "Model Name": results['model_name'],
         "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
-        "Valid Accuracy": round(results['valid_accuracy'] * 100, 2),
         "Correct Predictions": results['correct_predictions'],
         "Total Questions": results['total_questions'],
         "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
@@ -207,7 +263,7 @@ def update_leaderboard(results):
         api.upload_file(
             path_or_fileobj=LEADERBOARD_FILE,
             path_in_repo="leaderboard.csv",
-            repo_id="SondosMB/ss",  # Your Space repository
             repo_type="space",
             token=token
         )
@@ -218,17 +274,88 @@ def update_leaderboard(results):
 def load_leaderboard():
     if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
         return pd.DataFrame({
             "Model Name": [],
             "Overall Accuracy": [],
-            "Valid Accuracy": [],
             "Correct Predictions": [],
             "Total Questions": [],
             "Timestamp": [],
         })
     return pd.read_csv(LEADERBOARD_FILE)
 def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
     try:
@@ -248,9 +375,9 @@ def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
         return "Prediction file not uploaded.", load_leaderboard()
     try:
-        #load predition file
         predictions_df = pd.read_csv(prediction_file.name)
-         # Validate required columns in prediction file
         required_columns = ['question_id', 'predicted_answer']
         missing_columns = [col for col in required_columns if col not in predictions_df.columns]
         if missing_columns:
@@ -266,15 +393,12 @@ def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
         valid_predictions = merged_df.dropna(subset=['pred_answer'])
         correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
         total_predictions = len(merged_df)
-        total_valid_predictions = len(valid_predictions)
         overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
-        valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
         results = {
             'model_name': model_name if model_name else "Unknown Model",
             'overall_accuracy': overall_accuracy,
-            'valid_accuracy': valid_accuracy,
             'correct_predictions': correct_predictions,
             'total_questions': total_predictions,
         }
@@ -287,7 +411,6 @@ def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
     except Exception as e:
         return f"Error during evaluation: {str(e)}", load_leaderboard()
 initialize_leaderboard_file()

 #         return f"Error during evaluation: {str(e)}", load_leaderboard()
 # initialize_leaderboard_file()
+# def initialize_leaderboard_file():
+#     """
+#     Ensure the leaderboard file exists and has the correct headers.
+#     """
+#     if not os.path.exists(LEADERBOARD_FILE):
+#         pd.DataFrame(columns=[
+#             "Model Name", "Overall Accuracy", "Valid Accuracy",
+#             "Correct Predictions", "Total Questions", "Timestamp"
+#         ]).to_csv(LEADERBOARD_FILE, index=False)
+#     elif os.stat(LEADERBOARD_FILE).st_size == 0:
+#         pd.DataFrame(columns=[
+#             "Model Name", "Overall Accuracy", "Valid Accuracy",
+#             "Correct Predictions", "Total Questions", "Timestamp"
+#         ]).to_csv(LEADERBOARD_FILE, index=False)
 def initialize_leaderboard_file():
     """
     Ensure the leaderboard file exists and has the correct headers.
     """
     if not os.path.exists(LEADERBOARD_FILE):
         pd.DataFrame(columns=[
+            "Model Name", "Overall Accuracy", "Correct Predictions",
+            "Total Questions", "Timestamp"
         ]).to_csv(LEADERBOARD_FILE, index=False)
     elif os.stat(LEADERBOARD_FILE).st_size == 0:
         pd.DataFrame(columns=[
+            "Model Name", "Overall Accuracy", "Correct Predictions",
+            "Total Questions", "Timestamp"
         ]).to_csv(LEADERBOARD_FILE, index=False)
 def clean_answer(answer):
     return clean[0].upper() if clean else None
+# def update_leaderboard(results):
+#     """
+#     Append new submission results to the leaderboard file and push updates to the Hugging Face repository.
+#     """
+#     new_entry = {
+#         "Model Name": results['model_name'],
+#         "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
+#         "Valid Accuracy": round(results['valid_accuracy'] * 100, 2),
+#         "Correct Predictions": results['correct_predictions'],
+#         "Total Questions": results['total_questions'],
+#         "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+#     }
+#     try:
+#         # Update the local leaderboard file
+#         new_entry_df = pd.DataFrame([new_entry])
+#         file_exists = os.path.exists(LEADERBOARD_FILE)
+#         new_entry_df.to_csv(
+#             LEADERBOARD_FILE,
+#             mode='a',  # Append mode
+#             index=False,
+#             header=not file_exists  # Write header only if the file is new
+#         )
+#         print(f"Leaderboard updated successfully at {LEADERBOARD_FILE}")
+#         # Push the updated file to the Hugging Face repository using HTTP API
+#         api = HfApi()
+#         token = HfFolder.get_token()
+#         api.upload_file(
+#             path_or_fileobj=LEADERBOARD_FILE,
+#             path_in_repo="leaderboard.csv",
+#             repo_id="SondosMB/ss",  # Your Space repository
+#             repo_type="space",
+#             token=token
+#         )
+#         print("Leaderboard changes pushed to Hugging Face repository.")
+#     except Exception as e:
+#         print(f"Error updating leaderboard file: {e}")
 def update_leaderboard(results):
     """
     Append new submission results to the leaderboard file and push updates to the Hugging Face repository.
     new_entry = {
         "Model Name": results['model_name'],
         "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
         "Correct Predictions": results['correct_predictions'],
         "Total Questions": results['total_questions'],
         "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
         api.upload_file(
             path_or_fileobj=LEADERBOARD_FILE,
             path_in_repo="leaderboard.csv",
+            repo_id="SondosMB/Mobile-MMLU",  # Your Space repository
             repo_type="space",
             token=token
         )
+# def load_leaderboard():
+#     if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
+#         return pd.DataFrame({
+#             "Model Name": [],
+#             "Overall Accuracy": [],
+#             "Valid Accuracy": [],
+#             "Correct Predictions": [],
+#             "Total Questions": [],
+#             "Timestamp": [],
+#         })
+#     return pd.read_csv(LEADERBOARD_FILE)
 def load_leaderboard():
     if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
         return pd.DataFrame({
             "Model Name": [],
             "Overall Accuracy": [],
             "Correct Predictions": [],
             "Total Questions": [],
             "Timestamp": [],
         })
     return pd.read_csv(LEADERBOARD_FILE)
+# def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
+#     try:
+#         ground_truth_path = hf_hub_download(
+#             repo_id="SondosMB/ground-truth-dataset",
+#             filename="ground_truth.csv",
+#             repo_type="dataset",
+#             use_auth_token=True
+#         )
+#         ground_truth_df = pd.read_csv(ground_truth_path)
+#     except FileNotFoundError:
+#         return "Ground truth file not found in the dataset repository.", load_leaderboard()
+#     except Exception as e:
+#         return f"Error loading ground truth: {e}", load_leaderboard()
+#     if not prediction_file:
+#         return "Prediction file not uploaded.", load_leaderboard()
+#     try:
+#         #load predition file
+#         predictions_df = pd.read_csv(prediction_file.name)
+#          # Validate required columns in prediction file
+#         required_columns = ['question_id', 'predicted_answer']
+#         missing_columns = [col for col in required_columns if col not in predictions_df.columns]
+#         if missing_columns:
+#             return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
+#                     load_leaderboard())
+#         # Validate 'Answer' column in ground truth file
+#         if 'Answer' not in ground_truth_df.columns:
+#             return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard()
+#         merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
+#         merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
+#         valid_predictions = merged_df.dropna(subset=['pred_answer'])
+#         correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
+#         total_predictions = len(merged_df)
+#         total_valid_predictions = len(valid_predictions)
+#         overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
+#         valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
+#         results = {
+#             'model_name': model_name if model_name else "Unknown Model",
+#             'overall_accuracy': overall_accuracy,
+#             'valid_accuracy': valid_accuracy,
+#             'correct_predictions': correct_predictions,
+#             'total_questions': total_predictions,
+#         }
+#         if add_to_leaderboard:
+#             update_leaderboard(results)
+#             return "Evaluation completed and added to leaderboard.", load_leaderboard()
+#         else:
+#             return "Evaluation completed but not added to leaderboard.", load_leaderboard()
+#     except Exception as e:
+#         return f"Error during evaluation: {str(e)}", load_leaderboard()
 def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
     try:
         return "Prediction file not uploaded.", load_leaderboard()
     try:
+        #load prediction file
         predictions_df = pd.read_csv(prediction_file.name)
+        # Validate required columns in prediction file
         required_columns = ['question_id', 'predicted_answer']
         missing_columns = [col for col in required_columns if col not in predictions_df.columns]
         if missing_columns:
         valid_predictions = merged_df.dropna(subset=['pred_answer'])
         correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
         total_predictions = len(merged_df)
         overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
         results = {
             'model_name': model_name if model_name else "Unknown Model",
             'overall_accuracy': overall_accuracy,
             'correct_predictions': correct_predictions,
             'total_questions': total_predictions,
         }
     except Exception as e:
         return f"Error during evaluation: {str(e)}", load_leaderboard()
 initialize_leaderboard_file()