SondosMB commited on
Commit
d24563e
Β·
verified Β·
1 Parent(s): e109361

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +183 -9
app.py CHANGED
@@ -1,14 +1,182 @@
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  # demo.launch()
 
 
3
  import gradio as gr
4
  import pandas as pd
5
  import os
6
  import re
7
  from datetime import datetime
 
8
 
9
  LEADERBOARD_FILE = "leaderboard.csv" # File to store all submissions persistently
 
10
  LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
11
 
 
 
 
12
  def initialize_leaderboard_file():
13
  """
14
  Ensure the leaderboard file exists and has the correct headers.
@@ -74,18 +242,23 @@ def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
74
  """
75
  Evaluate predictions and optionally add results to the leaderboard.
76
  """
77
- ground_truth_file = "ground_truth.csv"
78
- if not os.path.exists(ground_truth_file):
79
- return "Ground truth file not found.", load_leaderboard()
 
 
 
 
 
 
 
 
80
  if not prediction_file:
81
  return "Prediction file not uploaded.", load_leaderboard()
82
 
83
  try:
84
- # Load predictions and ground truth
85
  predictions_df = pd.read_csv(prediction_file.name)
86
- ground_truth_df = pd.read_csv(ground_truth_file)
87
-
88
- # Merge predictions with ground truth
89
  merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
90
  merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
91
 
@@ -122,7 +295,7 @@ initialize_leaderboard_file()
122
  # Gradio Interface
123
  with gr.Blocks() as demo:
124
  gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
125
-
126
  with gr.Tabs():
127
  # Submission Tab
128
  with gr.TabItem("πŸ… Submission"):
@@ -142,7 +315,7 @@ with gr.Blocks() as demo:
142
  inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
143
  outputs=[eval_status, leaderboard_table_preview],
144
  )
145
-
146
  # Leaderboard Tab
147
  with gr.TabItem("πŸ… Leaderboard"):
148
  leaderboard_table = gr.Dataframe(
@@ -161,3 +334,4 @@ with gr.Blocks() as demo:
161
  gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
162
 
163
  demo.launch()
 
 
1
 
2
+ # # demo.launch()
3
+ # import gradio as gr
4
+ # import pandas as pd
5
+ # import os
6
+ # import re
7
+ # from datetime import datetime
8
+
9
+ # LEADERBOARD_FILE = "leaderboard.csv" # File to store all submissions persistently
10
+ # LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
11
+
12
+ # def initialize_leaderboard_file():
13
+ # """
14
+ # Ensure the leaderboard file exists and has the correct headers.
15
+ # """
16
+ # if not os.path.exists(LEADERBOARD_FILE):
17
+ # # Create the file with headers
18
+ # pd.DataFrame(columns=[
19
+ # "Model Name", "Overall Accuracy", "Valid Accuracy",
20
+ # "Correct Predictions", "Total Questions", "Timestamp"
21
+ # ]).to_csv(LEADERBOARD_FILE, index=False)
22
+ # else:
23
+ # # Check if the file is empty and write headers if needed
24
+ # if os.stat(LEADERBOARD_FILE).st_size == 0:
25
+ # pd.DataFrame(columns=[
26
+ # "Model Name", "Overall Accuracy", "Valid Accuracy",
27
+ # "Correct Predictions", "Total Questions", "Timestamp"
28
+ # ]).to_csv(LEADERBOARD_FILE, index=False)
29
+
30
+ # def clean_answer(answer):
31
+ # """
32
+ # Clean and normalize the predicted answers.
33
+ # """
34
+ # if pd.isna(answer):
35
+ # return None
36
+ # answer = str(answer)
37
+ # clean = re.sub(r'[^A-Da-d]', '', answer)
38
+ # if clean:
39
+ # return clean[0].upper()
40
+ # return None
41
+
42
+ # def update_leaderboard(results):
43
+ # """
44
+ # Append new submission results to the leaderboard file.
45
+ # """
46
+ # new_entry = {
47
+ # "Model Name": results['model_name'],
48
+ # "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
49
+ # "Valid Accuracy": round(results['valid_accuracy'] * 100, 2),
50
+ # "Correct Predictions": results['correct_predictions'],
51
+ # "Total Questions": results['total_questions'],
52
+ # "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
53
+ # }
54
+
55
+ # new_entry_df = pd.DataFrame([new_entry])
56
+ # new_entry_df.to_csv(LEADERBOARD_FILE, mode='a', index=False, header=False)
57
+
58
+ # def load_leaderboard():
59
+ # """
60
+ # Load all submissions from the leaderboard file.
61
+ # """
62
+ # if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
63
+ # return pd.DataFrame({
64
+ # "Model Name": [],
65
+ # "Overall Accuracy": [],
66
+ # "Valid Accuracy": [],
67
+ # "Correct Predictions": [],
68
+ # "Total Questions": [],
69
+ # "Timestamp": [],
70
+ # })
71
+ # return pd.read_csv(LEADERBOARD_FILE)
72
+
73
+ # def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
74
+ # """
75
+ # Evaluate predictions and optionally add results to the leaderboard.
76
+ # """
77
+ # ground_truth_file = "ground_truth.csv"
78
+ # if not os.path.exists(ground_truth_file):
79
+ # return "Ground truth file not found.", load_leaderboard()
80
+ # if not prediction_file:
81
+ # return "Prediction file not uploaded.", load_leaderboard()
82
+
83
+ # try:
84
+ # # Load predictions and ground truth
85
+ # predictions_df = pd.read_csv(prediction_file.name)
86
+ # ground_truth_df = pd.read_csv(ground_truth_file)
87
+
88
+ # # Merge predictions with ground truth
89
+ # merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
90
+ # merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
91
+
92
+ # # Evaluate predictions
93
+ # valid_predictions = merged_df.dropna(subset=['pred_answer'])
94
+ # correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
95
+ # total_predictions = len(merged_df)
96
+ # total_valid_predictions = len(valid_predictions)
97
+
98
+ # # Calculate accuracy
99
+ # overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
100
+ # valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
101
+
102
+ # results = {
103
+ # 'model_name': model_name if model_name else "Unknown Model",
104
+ # 'overall_accuracy': overall_accuracy,
105
+ # 'valid_accuracy': valid_accuracy,
106
+ # 'correct_predictions': correct_predictions,
107
+ # 'total_questions': total_predictions,
108
+ # }
109
+
110
+ # # Update leaderboard only if opted in
111
+ # if add_to_leaderboard:
112
+ # update_leaderboard(results)
113
+ # return "Evaluation completed and added to leaderboard.", load_leaderboard()
114
+ # else:
115
+ # return "Evaluation completed but not added to leaderboard.", load_leaderboard()
116
+ # except Exception as e:
117
+ # return f"Error during evaluation: {str(e)}", load_leaderboard()
118
+
119
+ # # Initialize leaderboard file
120
+ # initialize_leaderboard_file()
121
+
122
+ # # Gradio Interface
123
+ # with gr.Blocks() as demo:
124
+ # gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
125
+
126
+ # with gr.Tabs():
127
+ # # Submission Tab
128
+ # with gr.TabItem("πŸ… Submission"):
129
+ # file_input = gr.File(label="Upload Prediction CSV")
130
+ # model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")
131
+ # add_to_leaderboard_checkbox = gr.Checkbox(label="Add to Leaderboard?", value=True)
132
+ # eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
133
+ # leaderboard_table_preview = gr.Dataframe(
134
+ # value=load_leaderboard(),
135
+ # label="Leaderboard (Preview)",
136
+ # interactive=False,
137
+ # wrap=True,
138
+ # )
139
+ # eval_button = gr.Button("Evaluate and Update Leaderboard")
140
+ # eval_button.click(
141
+ # evaluate_predictions,
142
+ # inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
143
+ # outputs=[eval_status, leaderboard_table_preview],
144
+ # )
145
+
146
+ # # Leaderboard Tab
147
+ # with gr.TabItem("πŸ… Leaderboard"):
148
+ # leaderboard_table = gr.Dataframe(
149
+ # value=load_leaderboard(),
150
+ # label="Leaderboard",
151
+ # interactive=False,
152
+ # wrap=True,
153
+ # )
154
+ # refresh_button = gr.Button("Refresh Leaderboard")
155
+ # refresh_button.click(
156
+ # lambda: load_leaderboard(),
157
+ # inputs=[],
158
+ # outputs=[leaderboard_table],
159
+ # )
160
+
161
+ # gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
162
+
163
  # demo.launch()
164
+
165
+
166
  import gradio as gr
167
  import pandas as pd
168
  import os
169
  import re
170
  from datetime import datetime
171
+ from huggingface_hub import hf_hub_download
172
 
173
  LEADERBOARD_FILE = "leaderboard.csv" # File to store all submissions persistently
174
+ GROUND_TRUTH_FILE = "ground_truth.csv" # File for ground truth data
175
  LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
176
 
177
+ # Disable symlink warnings
178
+ os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
179
+
180
  def initialize_leaderboard_file():
181
  """
182
  Ensure the leaderboard file exists and has the correct headers.
 
242
  """
243
  Evaluate predictions and optionally add results to the leaderboard.
244
  """
245
+ try:
246
+ # Load ground truth data
247
+ ground_truth_path = hf_hub_download(
248
+ repo_id="SondosMB/ground-truth-dataset",
249
+ filename=GROUND_TRUTH_FILE,
250
+ use_auth_token=True
251
+ )
252
+ ground_truth_df = pd.read_csv(ground_truth_path)
253
+ except Exception as e:
254
+ return f"Error loading ground truth: {e}", load_leaderboard()
255
+
256
  if not prediction_file:
257
  return "Prediction file not uploaded.", load_leaderboard()
258
 
259
  try:
260
+ # Load predictions and merge with ground truth
261
  predictions_df = pd.read_csv(prediction_file.name)
 
 
 
262
  merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
263
  merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
264
 
 
295
  # Gradio Interface
296
  with gr.Blocks() as demo:
297
  gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
298
+
299
  with gr.Tabs():
300
  # Submission Tab
301
  with gr.TabItem("πŸ… Submission"):
 
315
  inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
316
  outputs=[eval_status, leaderboard_table_preview],
317
  )
318
+
319
  # Leaderboard Tab
320
  with gr.TabItem("πŸ… Leaderboard"):
321
  leaderboard_table = gr.Dataframe(
 
334
  gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
335
 
336
  demo.launch()
337
+