SondosMB commited on
Commit
aa88144
·
verified ·
1 Parent(s): 956b788

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -15
app.py CHANGED
@@ -641,8 +641,40 @@ with gr.Blocks(css=css_tech_theme) as demo:
641
  eval_status = gr.Textbox(label="🛠️ Evaluation Status", interactive=False,scale=1,min_width=1200)
642
 
643
  # Define the functions outside the `with` block
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
644
  def handle_evaluation(file, model_name):
645
- # Check if required inputs are provided
646
  if not file:
647
  return "Error: Please upload a prediction file.", 0, gr.update(visible=False)
648
  if not model_name or model_name.strip() == "":
@@ -652,27 +684,39 @@ with gr.Blocks(css=css_tech_theme) as demo:
652
  # Load predictions file
653
  predictions_df = pd.read_csv(file.name)
654
 
655
- # Validate required columns in the prediction file
656
  required_columns = ['question_id', 'predicted_answer']
657
  missing_columns = [col for col in required_columns if col not in predictions_df.columns]
658
  if missing_columns:
659
  return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
660
  0, gr.update(visible=False))
661
 
662
- # Perform evaluation
663
- status, leaderboard = evaluate_predictions(file, model_name, add_to_leaderboard=False)
664
- if leaderboard.empty:
665
- overall_accuracy = 0
666
- else:
667
- overall_accuracy = leaderboard.iloc[-1]["Overall Accuracy"]
 
 
 
 
 
 
 
 
 
668
 
669
-
670
- # Show the submit button after successful evaluation
671
- return status, overall_accuracy, gr.update(visible=True)
672
-
673
- except Exception as e:
674
- # Handle unexpected errors
675
- return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
 
 
 
676
 
677
 
678
 
 
641
  eval_status = gr.Textbox(label="🛠️ Evaluation Status", interactive=False,scale=1,min_width=1200)
642
 
643
  # Define the functions outside the `with` block
644
+ # def handle_evaluation(file, model_name):
645
+ # # Check if required inputs are provided
646
+ # if not file:
647
+ # return "Error: Please upload a prediction file.", 0, gr.update(visible=False)
648
+ # if not model_name or model_name.strip() == "":
649
+ # return "Error: Please enter a model name.", 0, gr.update(visible=False)
650
+
651
+ # try:
652
+ # # Load predictions file
653
+ # predictions_df = pd.read_csv(file.name)
654
+
655
+ # # Validate required columns in the prediction file
656
+ # required_columns = ['question_id', 'predicted_answer']
657
+ # missing_columns = [col for col in required_columns if col not in predictions_df.columns]
658
+ # if missing_columns:
659
+ # return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
660
+ # 0, gr.update(visible=False))
661
+
662
+ # # Perform evaluation
663
+ # status, leaderboard = evaluate_predictions(file, model_name, add_to_leaderboard=False)
664
+ # if leaderboard.empty:
665
+ # overall_accuracy = 0
666
+ # else:
667
+ # overall_accuracy = leaderboard.iloc[-1]["Overall Accuracy"]
668
+
669
+
670
+ # # Show the submit button after successful evaluation
671
+ # return status, overall_accuracy, gr.update(visible=True)
672
+
673
+ # except Exception as e:
674
+ # # Handle unexpected errors
675
+ # return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
676
+
677
  def handle_evaluation(file, model_name):
 
678
  if not file:
679
  return "Error: Please upload a prediction file.", 0, gr.update(visible=False)
680
  if not model_name or model_name.strip() == "":
 
684
  # Load predictions file
685
  predictions_df = pd.read_csv(file.name)
686
 
687
+ # Validate required columns
688
  required_columns = ['question_id', 'predicted_answer']
689
  missing_columns = [col for col in required_columns if col not in predictions_df.columns]
690
  if missing_columns:
691
  return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
692
  0, gr.update(visible=False))
693
 
694
+ # Load ground truth
695
+ try:
696
+ ground_truth_path = hf_hub_download(
697
+ repo_id="SondosMB/ground-truth-dataset",
698
+ filename="ground_truth.csv",
699
+ repo_type="dataset",
700
+ use_auth_token=True
701
+ )
702
+ ground_truth_df = pd.read_csv(ground_truth_path)
703
+ except Exception as e:
704
+ return f"Error loading ground truth: {e}", 0, gr.update(visible=False)
705
+
706
+ # Perform evaluation calculations
707
+ merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
708
+ merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
709
 
710
+ valid_predictions = merged_df.dropna(subset=['pred_answer'])
711
+ correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
712
+ total_predictions = len(merged_df)
713
+
714
+ overall_accuracy = (correct_predictions / total_predictions * 100) if total_predictions > 0 else 0
715
+
716
+ return "Evaluation completed successfully.", overall_accuracy, gr.update(visible=True)
717
+
718
+ except Exception as e:
719
+ return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
720
 
721
 
722