SondosMB commited on
Commit
9e3da0c
Β·
verified Β·
1 Parent(s): 8e8d6c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +178 -2
app.py CHANGED
@@ -274,7 +274,47 @@ def update_leaderboard(results):
274
  except Exception as e:
275
  print(f"Error updating leaderboard file: {e}")
276
 
 
 
 
 
 
 
 
 
 
 
 
 
277
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
 
279
 
280
  # def load_leaderboard():
@@ -419,6 +459,65 @@ def evaluate_predictions(prediction_file, model_name,Team_name ,add_to_leaderboa
419
  initialize_leaderboard_file()
420
 
421
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422
  # Function to set default mode
423
  # Function to set default mode
424
  import gradio as gr
@@ -803,8 +902,8 @@ with gr.Blocks(css=css_tech_theme) as demo:
803
  overall_accuracy_display = gr.Number(label="πŸ“Š Overall Accuracy (%)", interactive=False,scale=1,min_width=1200)
804
 
805
  with gr.Row(elem_id="submission-buttons"):
806
- eval_button = gr.Button("πŸ“ˆ Evaluate",scale=1,min_width=1200)
807
- submit_button = gr.Button("πŸ“€ Prove and Submit to Leaderboard", elem_id="evaluation-status", visible=False,scale=1,min_width=1200)
808
  eval_status = gr.Textbox(label="πŸ› οΈ Evaluation Status", interactive=False,scale=1,min_width=1200)
809
 
810
 
@@ -855,12 +954,64 @@ with gr.Blocks(css=css_tech_theme) as demo:
855
  except Exception as e:
856
  return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
857
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
858
 
859
 
860
  def handle_submission(file, model_name,Team_name):
861
  # Handle leaderboard submission
862
  status, _ = evaluate_predictions(file, model_name,Team_name, add_to_leaderboard=True)
863
  return f"Submission to leaderboard completed: {status}"
 
 
 
 
 
 
864
 
865
  # Connect button clicks to the functions
866
  eval_button.click(
@@ -868,6 +1019,18 @@ with gr.Blocks(css=css_tech_theme) as demo:
868
  inputs=[file_input, model_name_input,Team_name_input],
869
  outputs=[eval_status, overall_accuracy_display, submit_button],
870
  )
 
 
 
 
 
 
 
 
 
 
 
 
871
 
872
  submit_button.click(
873
  handle_submission,
@@ -890,6 +1053,19 @@ with gr.Blocks(css=css_tech_theme) as demo:
890
  inputs=[],
891
  outputs=[leaderboard_table],
892
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
893
 
894
  # Post-Tabs Section
895
  # gr.Markdown("""
 
274
  except Exception as e:
275
  print(f"Error updating leaderboard file: {e}")
276
 
277
+ def update_leaderboard_pro(results):
278
+ """
279
+ Append new submission results to the leaderboard file and push updates to the Hugging Face repository.
280
+ """
281
+ new_entry = {
282
+ "Model Name": results['model_name'],
283
+ "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
284
+ "Correct Predictions": results['correct_predictions'],
285
+ "Total Questions": results['total_questions'],
286
+ "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
287
+ "Team Name": results['Team_name']
288
+ }
289
 
290
+ try:
291
+ # Update the local leaderboard file
292
+ new_entry_df = pd.DataFrame([new_entry])
293
+ file_exists = os.path.exists(LEADERBOARD_FILE)
294
+
295
+ new_entry_df.to_csv(
296
+ LEADERBOARD_FILE,
297
+ mode='a', # Append mode
298
+ index=False,
299
+ header=not file_exists # Write header only if the file is new
300
+ )
301
+ print(f"Leaderboard updated successfully at {LEADERBOARD_FILE}")
302
+
303
+ # Push the updated file to the Hugging Face repository using HTTP API
304
+ api = HfApi()
305
+ token = HfFolder.get_token()
306
+
307
+ api.upload_file(
308
+ path_or_fileobj=LEADERBOARD_FILE,
309
+ path_in_repo="leaderboardPro.csv",
310
+ repo_id="SondosMB/Mobile-MMLU", # Your Space repository
311
+ repo_type="space",
312
+ token=token
313
+ )
314
+ print("Leaderboard changes pushed to Hugging Face repository.")
315
+
316
+ except Exception as e:
317
+ print(f"Error updating leaderboard file: {e}")
318
 
319
 
320
  # def load_leaderboard():
 
459
  initialize_leaderboard_file()
460
 
461
 
462
+
463
+ def evaluate_predictions_pro(prediction_file, model_name,Team_name ,add_to_leaderboard):
464
+ try:
465
+ ground_truth_path = hf_hub_download(
466
+ repo_id="SondosMB/ground-truth-dataset",
467
+ filename="ground_truth.csv",
468
+ repo_type="dataset",
469
+ use_auth_token=True
470
+ )
471
+ ground_truth_df = pd.read_csv(ground_truth_path)
472
+ except FileNotFoundError:
473
+ return "Ground truth file not found in the dataset repository.", load_leaderboard_pro()
474
+ except Exception as e:
475
+ return f"Error loading ground truth: {e}", load_leaderboard_pro()
476
+
477
+ if not prediction_file:
478
+ return "Prediction file not uploaded.", load_leaderboard_pro()
479
+
480
+ try:
481
+ #load prediction file
482
+ predictions_df = pd.read_csv(prediction_file.name)
483
+ # Validate required columns in prediction file
484
+ required_columns = ['question_id', 'predicted_answer']
485
+ missing_columns = [col for col in required_columns if col not in predictions_df.columns]
486
+ if missing_columns:
487
+ return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
488
+ load_leaderboard())
489
+
490
+ # Validate 'Answer' column in ground truth file
491
+ if 'Answer' not in ground_truth_df.columns:
492
+ return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard_pro()
493
+ merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
494
+ merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
495
+
496
+ valid_predictions = merged_df.dropna(subset=['pred_answer'])
497
+ correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
498
+ total_predictions = len(merged_df)
499
+
500
+ overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
501
+
502
+ results = {
503
+ 'model_name': model_name if model_name else "Unknown Model",
504
+ 'overall_accuracy': overall_accuracy,
505
+ 'correct_predictions': correct_predictions,
506
+ 'total_questions': total_predictions,
507
+ 'Team_name': Team_name if Team_name else "Unknown Team",
508
+ }
509
+
510
+ if add_to_leaderboard:
511
+ update_leaderboard_pro(results)
512
+ return "Evaluation completed and added to leaderboard.", load_leaderboard_pro()
513
+ else:
514
+ return "Evaluation completed but not added to leaderboard.", load_leaderboard_pro()
515
+
516
+ except Exception as e:
517
+ return f"Error during evaluation: {str(e)}", load_leaderboard_pro()
518
+ initialize_leaderboard_file()
519
+
520
+
521
  # Function to set default mode
522
  # Function to set default mode
523
  import gradio as gr
 
902
  overall_accuracy_display = gr.Number(label="πŸ“Š Overall Accuracy (%)", interactive=False,scale=1,min_width=1200)
903
 
904
  with gr.Row(elem_id="submission-buttons"):
905
+ eval_button_pro = gr.Button("πŸ“ˆ Evaluate",scale=1,min_width=1200)
906
+ submit_button_pro = gr.Button("πŸ“€ Prove and Submit to Leaderboard", elem_id="evaluation-status", visible=False,scale=1,min_width=1200)
907
  eval_status = gr.Textbox(label="πŸ› οΈ Evaluation Status", interactive=False,scale=1,min_width=1200)
908
 
909
 
 
954
  except Exception as e:
955
  return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
956
 
957
+ def handle_evaluation_pro(file, model_name, Team_name):
958
+ if not file:
959
+ return "Error: Please upload a prediction file.", 0, gr.update(visible=False)
960
+ if not model_name or model_name.strip() == "":
961
+ return "Error: Please enter a model name.", 0, gr.update(visible=False)
962
+ if not Team_name or Team_name.strip() == "":
963
+ return "Error: Please enter a Team name.", 0, gr.update(visible=False)
964
+
965
+ try:
966
+ # Load predictions file
967
+ predictions_df = pd.read_csv(file.name)
968
+
969
+ # Validate required columns
970
+ required_columns = ['question_id', 'predicted_answer']
971
+ missing_columns = [col for col in required_columns if col not in predictions_df.columns]
972
+ if missing_columns:
973
+ return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
974
+ 0, gr.update(visible=False))
975
+
976
+ # Load ground truth
977
+ try:
978
+ ground_truth_path = hf_hub_download(
979
+ repo_id="SondosMB/ground-truth-dataset",
980
+ filename="ground_truth.csv",
981
+ repo_type="dataset",
982
+ use_auth_token=True
983
+ )
984
+ ground_truth_df = pd.read_csv(ground_truth_path)
985
+ except Exception as e:
986
+ return f"Error loading ground truth: {e}", 0, gr.update(visible=False)
987
+
988
+ # Perform evaluation calculations
989
+ merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
990
+ merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
991
+
992
+ valid_predictions = merged_df.dropna(subset=['pred_answer'])
993
+ correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
994
+ total_predictions = len(merged_df)
995
+
996
+ overall_accuracy = (correct_predictions / total_predictions * 100) if total_predictions > 0 else 0
997
+
998
+ return "Evaluation completed successfully.", overall_accuracy, gr.update(visible=True)
999
+
1000
+ except Exception as e:
1001
+ return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
1002
+
1003
 
1004
 
1005
  def handle_submission(file, model_name,Team_name):
1006
  # Handle leaderboard submission
1007
  status, _ = evaluate_predictions(file, model_name,Team_name, add_to_leaderboard=True)
1008
  return f"Submission to leaderboard completed: {status}"
1009
+
1010
+ def handle_submission_pro(file, model_name,Team_name):
1011
+ # Handle leaderboard submission
1012
+ status, _ = evaluate_predictions_pro(file, model_name,Team_name, add_to_leaderboard=True)
1013
+ return f"Submission to leaderboard completed: {status}"
1014
+
1015
 
1016
  # Connect button clicks to the functions
1017
  eval_button.click(
 
1019
  inputs=[file_input, model_name_input,Team_name_input],
1020
  outputs=[eval_status, overall_accuracy_display, submit_button],
1021
  )
1022
+
1023
+ eval_button_pro.click(
1024
+ handle_evaluation_pro,
1025
+ inputs=[file_input, model_name_input,Team_name_input],
1026
+ outputs=[eval_status, overall_accuracy_display, submit_button_pro],
1027
+ )
1028
+
1029
+ submit_button_pro.click(
1030
+ handle_submission_pro,
1031
+ inputs=[file_input, model_name_input,Team_name_input],
1032
+ outputs=[eval_status],
1033
+ )
1034
 
1035
  submit_button.click(
1036
  handle_submission,
 
1053
  inputs=[],
1054
  outputs=[leaderboard_table],
1055
  )
1056
+ with gr.TabItem("πŸ… Leaderboard-pro"):
1057
+ leaderboard_table = gr.Dataframe(
1058
+ value=load_leaderboard_pro(),
1059
+ label="Leaderboard",
1060
+ interactive=False,
1061
+ wrap=True,
1062
+ )
1063
+ refresh_button = gr.Button("Refresh Leaderboard")
1064
+ refresh_button.click(
1065
+ lambda: load_leaderboard_pro(),
1066
+ inputs=[],
1067
+ outputs=[leaderboard_table],
1068
+ )
1069
 
1070
  # Post-Tabs Section
1071
  # gr.Markdown("""