SondosMB commited on
Commit
e109361
·
verified ·
1 Parent(s): 2810005

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -245
app.py CHANGED
@@ -1,245 +1,48 @@
1
 
2
- # # demo.launch()
3
- # import gradio as gr
4
- # import pandas as pd
5
- # import os
6
- # import re
7
- # from datetime import datetime
8
-
9
- # LEADERBOARD_FILE = "leaderboard.csv" # File to store all submissions persistently
10
- # LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
11
-
12
- # def initialize_leaderboard_file():
13
- # """
14
- # Ensure the leaderboard file exists and has the correct headers.
15
- # """
16
- # if not os.path.exists(LEADERBOARD_FILE):
17
- # # Create the file with headers
18
- # pd.DataFrame(columns=[
19
- # "Model Name", "Overall Accuracy", "Valid Accuracy",
20
- # "Correct Predictions", "Total Questions", "Timestamp"
21
- # ]).to_csv(LEADERBOARD_FILE, index=False)
22
- # else:
23
- # # Check if the file is empty and write headers if needed
24
- # if os.stat(LEADERBOARD_FILE).st_size == 0:
25
- # pd.DataFrame(columns=[
26
- # "Model Name", "Overall Accuracy", "Valid Accuracy",
27
- # "Correct Predictions", "Total Questions", "Timestamp"
28
- # ]).to_csv(LEADERBOARD_FILE, index=False)
29
-
30
- # def clean_answer(answer):
31
- # """
32
- # Clean and normalize the predicted answers.
33
- # """
34
- # if pd.isna(answer):
35
- # return None
36
- # answer = str(answer)
37
- # clean = re.sub(r'[^A-Da-d]', '', answer)
38
- # if clean:
39
- # return clean[0].upper()
40
- # return None
41
-
42
- # def update_leaderboard(results):
43
- # """
44
- # Append new submission results to the leaderboard file.
45
- # """
46
- # new_entry = {
47
- # "Model Name": results['model_name'],
48
- # "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
49
- # "Valid Accuracy": round(results['valid_accuracy'] * 100, 2),
50
- # "Correct Predictions": results['correct_predictions'],
51
- # "Total Questions": results['total_questions'],
52
- # "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
53
- # }
54
-
55
- # new_entry_df = pd.DataFrame([new_entry])
56
- # new_entry_df.to_csv(LEADERBOARD_FILE, mode='a', index=False, header=False)
57
-
58
- # def load_leaderboard():
59
- # """
60
- # Load all submissions from the leaderboard file.
61
- # """
62
- # if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
63
- # return pd.DataFrame({
64
- # "Model Name": [],
65
- # "Overall Accuracy": [],
66
- # "Valid Accuracy": [],
67
- # "Correct Predictions": [],
68
- # "Total Questions": [],
69
- # "Timestamp": [],
70
- # })
71
- # return pd.read_csv(LEADERBOARD_FILE)
72
-
73
- # def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
74
- # """
75
- # Evaluate predictions and optionally add results to the leaderboard.
76
- # """
77
- # ground_truth_file = "ground_truth.csv"
78
- # if not os.path.exists(ground_truth_file):
79
- # return "Ground truth file not found.", load_leaderboard()
80
- # if not prediction_file:
81
- # return "Prediction file not uploaded.", load_leaderboard()
82
-
83
- # try:
84
- # # Load predictions and ground truth
85
- # predictions_df = pd.read_csv(prediction_file.name)
86
- # ground_truth_df = pd.read_csv(ground_truth_file)
87
-
88
- # # Merge predictions with ground truth
89
- # merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
90
- # merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
91
-
92
- # # Evaluate predictions
93
- # valid_predictions = merged_df.dropna(subset=['pred_answer'])
94
- # correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
95
- # total_predictions = len(merged_df)
96
- # total_valid_predictions = len(valid_predictions)
97
-
98
- # # Calculate accuracy
99
- # overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
100
- # valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
101
-
102
- # results = {
103
- # 'model_name': model_name if model_name else "Unknown Model",
104
- # 'overall_accuracy': overall_accuracy,
105
- # 'valid_accuracy': valid_accuracy,
106
- # 'correct_predictions': correct_predictions,
107
- # 'total_questions': total_predictions,
108
- # }
109
-
110
- # # Update leaderboard only if opted in
111
- # if add_to_leaderboard:
112
- # update_leaderboard(results)
113
- # return "Evaluation completed and added to leaderboard.", load_leaderboard()
114
- # else:
115
- # return "Evaluation completed but not added to leaderboard.", load_leaderboard()
116
- # except Exception as e:
117
- # return f"Error during evaluation: {str(e)}", load_leaderboard()
118
-
119
- # # Initialize leaderboard file
120
- # initialize_leaderboard_file()
121
-
122
- # # Gradio Interface
123
- # with gr.Blocks() as demo:
124
- # gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
125
-
126
- # with gr.Tabs():
127
- # # Submission Tab
128
- # with gr.TabItem("🏅 Submission"):
129
- # file_input = gr.File(label="Upload Prediction CSV")
130
- # model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")
131
- # add_to_leaderboard_checkbox = gr.Checkbox(label="Add to Leaderboard?", value=True)
132
- # eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
133
- # leaderboard_table_preview = gr.Dataframe(
134
- # value=load_leaderboard(),
135
- # label="Leaderboard (Preview)",
136
- # interactive=False,
137
- # wrap=True,
138
- # )
139
- # eval_button = gr.Button("Evaluate and Update Leaderboard")
140
- # eval_button.click(
141
- # evaluate_predictions,
142
- # inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
143
- # outputs=[eval_status, leaderboard_table_preview],
144
- # )
145
-
146
- # # Leaderboard Tab
147
- # with gr.TabItem("🏅 Leaderboard"):
148
- # leaderboard_table = gr.Dataframe(
149
- # value=load_leaderboard(),
150
- # label="Leaderboard",
151
- # interactive=False,
152
- # wrap=True,
153
- # )
154
- # refresh_button = gr.Button("Refresh Leaderboard")
155
- # refresh_button.click(
156
- # lambda: load_leaderboard(),
157
- # inputs=[],
158
- # outputs=[leaderboard_table],
159
- # )
160
-
161
- # gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
162
-
163
  # demo.launch()
164
-
165
  import gradio as gr
166
  import pandas as pd
 
167
  import re
168
  from datetime import datetime
169
- from huggingface_hub import hf_hub_download
170
- from datasets import Dataset
171
- import os
172
 
173
- # Constants for Hugging Face repositories
174
- HF_TOKEN = os.getenv("HF_TOKEN") # Hugging Face token stored as an environment variable
175
- if not HF_TOKEN:
176
- raise ValueError("HF_TOKEN is not set. Please add it as a secret in your Hugging Face Space.")
177
-
178
- LEADERBOARD_REPO = "SondosMB/leaderboard-dataset" # Replace with your leaderboard dataset name
179
- GROUND_TRUTH_REPO = "SondosMB/ground-truth-dataset" # Replace with your ground truth dataset name
180
  LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
181
 
182
- def load_ground_truth():
183
  """
184
- Load the ground truth file from a gated Hugging Face dataset.
185
  """
186
- try:
187
- print("Fetching ground truth file...")
188
- ground_truth_path = hf_hub_download(
189
- repo_id=GROUND_TRUTH_REPO,
190
- filename="ground_truth.csv",
191
- use_auth_token=HF_TOKEN
192
- )
193
- print(f"Ground truth file downloaded: {ground_truth_path}")
194
- return pd.read_csv(ground_truth_path)
195
- except Exception as e:
196
- print(f"Error loading ground truth file: {e}")
197
- return None
 
198
 
199
- def load_leaderboard():
200
  """
201
- Load the leaderboard from a gated Hugging Face dataset.
202
  """
203
- try:
204
- print("Fetching leaderboard file...")
205
- leaderboard_path = hf_hub_download(
206
- repo_id=LEADERBOARD_REPO,
207
- filename="leaderboard.csv",
208
- use_auth_token=HF_TOKEN
209
- )
210
- print(f"Leaderboard file downloaded: {leaderboard_path}")
211
- return pd.read_csv(leaderboard_path)
212
- except Exception as e:
213
- print(f"Error loading leaderboard: {e}")
214
- return pd.DataFrame({
215
- "Model Name": [],
216
- "Overall Accuracy": [],
217
- "Valid Accuracy": [],
218
- "Correct Predictions": [],
219
- "Total Questions": [],
220
- "Timestamp": [],
221
- })
222
 
223
  def update_leaderboard(results):
224
  """
225
- Append new submission results to the gated leaderboard dataset.
226
  """
227
- try:
228
- # Load existing leaderboard or create a new one
229
- leaderboard_path = hf_hub_download(
230
- repo_id=LEADERBOARD_REPO,
231
- filename="leaderboard.csv",
232
- use_auth_token=HF_TOKEN
233
- )
234
- df = pd.read_csv(leaderboard_path)
235
- except Exception as e:
236
- print(f"Error loading leaderboard: {e}")
237
- df = pd.DataFrame(columns=[
238
- "Model Name", "Overall Accuracy", "Valid Accuracy",
239
- "Correct Predictions", "Total Questions", "Timestamp"
240
- ])
241
-
242
- # Add new entry
243
  new_entry = {
244
  "Model Name": results['model_name'],
245
  "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
@@ -248,38 +51,41 @@ def update_leaderboard(results):
248
  "Total Questions": results['total_questions'],
249
  "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
250
  }
251
- df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True)
252
 
253
- # Save locally and push updated dataset to Hugging Face
254
- df.to_csv("leaderboard.csv", index=False)
255
- dataset = Dataset.from_pandas(df)
256
- dataset.push_to_hub(LEADERBOARD_REPO, split="train", private=True)
257
 
258
- def clean_answer(answer):
259
  """
260
- Clean and normalize the predicted answers.
261
  """
262
- if pd.isna(answer):
263
- return None
264
- answer = str(answer)
265
- clean = re.sub(r'[^A-Da-d]', '', answer)
266
- if clean:
267
- return clean[0].upper()
268
- return None
 
 
 
269
 
270
  def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
271
  """
272
  Evaluate predictions and optionally add results to the leaderboard.
273
  """
274
- ground_truth_df = load_ground_truth()
275
- if ground_truth_df is None:
276
  return "Ground truth file not found.", load_leaderboard()
277
  if not prediction_file:
278
  return "Prediction file not uploaded.", load_leaderboard()
279
 
280
  try:
281
- # Load predictions and merge with ground truth
282
  predictions_df = pd.read_csv(prediction_file.name)
 
 
 
283
  merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
284
  merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
285
 
@@ -310,9 +116,12 @@ def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
310
  except Exception as e:
311
  return f"Error during evaluation: {str(e)}", load_leaderboard()
312
 
 
 
 
313
  # Gradio Interface
314
  with gr.Blocks() as demo:
315
- gr.Markdown("# Secure Prediction Evaluation Tool with Gated Leaderboard")
316
 
317
  with gr.Tabs():
318
  # Submission Tab
@@ -352,6 +161,3 @@ with gr.Blocks() as demo:
352
  gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
353
 
354
  demo.launch()
355
-
356
-
357
-
 
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  # demo.launch()
 
3
  import gradio as gr
4
  import pandas as pd
5
+ import os
6
  import re
7
  from datetime import datetime
 
 
 
8
 
9
+ LEADERBOARD_FILE = "leaderboard.csv" # File to store all submissions persistently
 
 
 
 
 
 
10
  LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
11
 
12
+ def initialize_leaderboard_file():
13
  """
14
+ Ensure the leaderboard file exists and has the correct headers.
15
  """
16
+ if not os.path.exists(LEADERBOARD_FILE):
17
+ # Create the file with headers
18
+ pd.DataFrame(columns=[
19
+ "Model Name", "Overall Accuracy", "Valid Accuracy",
20
+ "Correct Predictions", "Total Questions", "Timestamp"
21
+ ]).to_csv(LEADERBOARD_FILE, index=False)
22
+ else:
23
+ # Check if the file is empty and write headers if needed
24
+ if os.stat(LEADERBOARD_FILE).st_size == 0:
25
+ pd.DataFrame(columns=[
26
+ "Model Name", "Overall Accuracy", "Valid Accuracy",
27
+ "Correct Predictions", "Total Questions", "Timestamp"
28
+ ]).to_csv(LEADERBOARD_FILE, index=False)
29
 
30
+ def clean_answer(answer):
31
  """
32
+ Clean and normalize the predicted answers.
33
  """
34
+ if pd.isna(answer):
35
+ return None
36
+ answer = str(answer)
37
+ clean = re.sub(r'[^A-Da-d]', '', answer)
38
+ if clean:
39
+ return clean[0].upper()
40
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  def update_leaderboard(results):
43
  """
44
+ Append new submission results to the leaderboard file.
45
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  new_entry = {
47
  "Model Name": results['model_name'],
48
  "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
 
51
  "Total Questions": results['total_questions'],
52
  "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
53
  }
 
54
 
55
+ new_entry_df = pd.DataFrame([new_entry])
56
+ new_entry_df.to_csv(LEADERBOARD_FILE, mode='a', index=False, header=False)
 
 
57
 
58
+ def load_leaderboard():
59
  """
60
+ Load all submissions from the leaderboard file.
61
  """
62
+ if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
63
+ return pd.DataFrame({
64
+ "Model Name": [],
65
+ "Overall Accuracy": [],
66
+ "Valid Accuracy": [],
67
+ "Correct Predictions": [],
68
+ "Total Questions": [],
69
+ "Timestamp": [],
70
+ })
71
+ return pd.read_csv(LEADERBOARD_FILE)
72
 
73
  def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
74
  """
75
  Evaluate predictions and optionally add results to the leaderboard.
76
  """
77
+ ground_truth_file = "ground_truth.csv"
78
+ if not os.path.exists(ground_truth_file):
79
  return "Ground truth file not found.", load_leaderboard()
80
  if not prediction_file:
81
  return "Prediction file not uploaded.", load_leaderboard()
82
 
83
  try:
84
+ # Load predictions and ground truth
85
  predictions_df = pd.read_csv(prediction_file.name)
86
+ ground_truth_df = pd.read_csv(ground_truth_file)
87
+
88
+ # Merge predictions with ground truth
89
  merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
90
  merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
91
 
 
116
  except Exception as e:
117
  return f"Error during evaluation: {str(e)}", load_leaderboard()
118
 
119
+ # Initialize leaderboard file
120
+ initialize_leaderboard_file()
121
+
122
  # Gradio Interface
123
  with gr.Blocks() as demo:
124
+ gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
125
 
126
  with gr.Tabs():
127
  # Submission Tab
 
161
  gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
162
 
163
  demo.launch()