Update app.py
Browse files
app.py
CHANGED
@@ -703,20 +703,20 @@ with gr.Blocks(css=css_tech_theme) as demo:
|
|
703 |
except Exception as e:
|
704 |
return f"Error loading ground truth: {e}", 0, gr.update(visible=False)
|
705 |
|
706 |
-
|
707 |
-
|
708 |
-
|
709 |
-
|
710 |
-
|
711 |
-
|
712 |
-
|
713 |
-
|
714 |
-
|
715 |
-
|
716 |
-
|
717 |
-
|
718 |
-
|
719 |
-
|
720 |
|
721 |
|
722 |
|
|
|
703 |
except Exception as e:
|
704 |
return f"Error loading ground truth: {e}", 0, gr.update(visible=False)
|
705 |
|
706 |
+
# Perform evaluation calculations
|
707 |
+
merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
|
708 |
+
merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
|
709 |
+
|
710 |
+
valid_predictions = merged_df.dropna(subset=['pred_answer'])
|
711 |
+
correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
|
712 |
+
total_predictions = len(merged_df)
|
713 |
+
|
714 |
+
overall_accuracy = (correct_predictions / total_predictions * 100) if total_predictions > 0 else 0
|
715 |
+
|
716 |
+
return "Evaluation completed successfully.", overall_accuracy, gr.update(visible=True)
|
717 |
+
|
718 |
+
except Exception as e:
|
719 |
+
return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
|
720 |
|
721 |
|
722 |
|