Update app.py
Browse files
app.py
CHANGED
|
@@ -703,20 +703,20 @@ with gr.Blocks(css=css_tech_theme) as demo:
|
|
| 703 |
except Exception as e:
|
| 704 |
return f"Error loading ground truth: {e}", 0, gr.update(visible=False)
|
| 705 |
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
| 720 |
|
| 721 |
|
| 722 |
|
|
|
|
| 703 |
except Exception as e:
|
| 704 |
return f"Error loading ground truth: {e}", 0, gr.update(visible=False)
|
| 705 |
|
| 706 |
+
# Perform evaluation calculations
|
| 707 |
+
merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
|
| 708 |
+
merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
|
| 709 |
+
|
| 710 |
+
valid_predictions = merged_df.dropna(subset=['pred_answer'])
|
| 711 |
+
correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
|
| 712 |
+
total_predictions = len(merged_df)
|
| 713 |
+
|
| 714 |
+
overall_accuracy = (correct_predictions / total_predictions * 100) if total_predictions > 0 else 0
|
| 715 |
+
|
| 716 |
+
return "Evaluation completed successfully.", overall_accuracy, gr.update(visible=True)
|
| 717 |
+
|
| 718 |
+
except Exception as e:
|
| 719 |
+
return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
|
| 720 |
|
| 721 |
|
| 722 |
|