SondosMB commited on
Commit
747c6f5
·
verified ·
1 Parent(s): 5b78632

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +236 -51
app.py CHANGED
@@ -1,48 +1,238 @@
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  # demo.launch()
 
 
3
  import gradio as gr
4
  import pandas as pd
5
- import os
6
  import re
7
  from datetime import datetime
 
 
 
8
 
9
- LEADERBOARD_FILE = "leaderboard.csv" # File to store all submissions persistently
 
 
 
10
  LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
11
 
12
- def initialize_leaderboard_file():
13
  """
14
- Ensure the leaderboard file exists and has the correct headers.
15
  """
16
- if not os.path.exists(LEADERBOARD_FILE):
17
- # Create the file with headers
18
- pd.DataFrame(columns=[
19
- "Model Name", "Overall Accuracy", "Valid Accuracy",
20
- "Correct Predictions", "Total Questions", "Timestamp"
21
- ]).to_csv(LEADERBOARD_FILE, index=False)
22
- else:
23
- # Check if the file is empty and write headers if needed
24
- if os.stat(LEADERBOARD_FILE).st_size == 0:
25
- pd.DataFrame(columns=[
26
- "Model Name", "Overall Accuracy", "Valid Accuracy",
27
- "Correct Predictions", "Total Questions", "Timestamp"
28
- ]).to_csv(LEADERBOARD_FILE, index=False)
29
 
30
- def clean_answer(answer):
31
  """
32
- Clean and normalize the predicted answers.
33
  """
34
- if pd.isna(answer):
35
- return None
36
- answer = str(answer)
37
- clean = re.sub(r'[^A-Da-d]', '', answer)
38
- if clean:
39
- return clean[0].upper()
40
- return None
 
 
 
 
 
 
 
 
 
 
41
 
42
  def update_leaderboard(results):
43
  """
44
- Append new submission results to the leaderboard file.
45
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  new_entry = {
47
  "Model Name": results['model_name'],
48
  "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
@@ -51,41 +241,38 @@ def update_leaderboard(results):
51
  "Total Questions": results['total_questions'],
52
  "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
53
  }
 
54
 
55
- new_entry_df = pd.DataFrame([new_entry])
56
- new_entry_df.to_csv(LEADERBOARD_FILE, mode='a', index=False, header=False)
 
 
57
 
58
- def load_leaderboard():
59
  """
60
- Load all submissions from the leaderboard file.
61
  """
62
- if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
63
- return pd.DataFrame({
64
- "Model Name": [],
65
- "Overall Accuracy": [],
66
- "Valid Accuracy": [],
67
- "Correct Predictions": [],
68
- "Total Questions": [],
69
- "Timestamp": [],
70
- })
71
- return pd.read_csv(LEADERBOARD_FILE)
72
 
73
  def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
74
  """
75
  Evaluate predictions and optionally add results to the leaderboard.
76
  """
77
- ground_truth_file = "ground_truth.csv"
78
- if not os.path.exists(ground_truth_file):
79
  return "Ground truth file not found.", load_leaderboard()
80
  if not prediction_file:
81
  return "Prediction file not uploaded.", load_leaderboard()
82
 
83
  try:
84
- # Load predictions and ground truth
85
  predictions_df = pd.read_csv(prediction_file.name)
86
- ground_truth_df = pd.read_csv(ground_truth_file)
87
-
88
- # Merge predictions with ground truth
89
  merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
90
  merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
91
 
@@ -116,12 +303,9 @@ def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
116
  except Exception as e:
117
  return f"Error during evaluation: {str(e)}", load_leaderboard()
118
 
119
- # Initialize leaderboard file
120
- initialize_leaderboard_file()
121
-
122
  # Gradio Interface
123
  with gr.Blocks() as demo:
124
- gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
125
 
126
  with gr.Tabs():
127
  # Submission Tab
@@ -162,3 +346,4 @@ with gr.Blocks() as demo:
162
 
163
  demo.launch()
164
 
 
 
1
 
2
+ # # demo.launch()
3
+ # import gradio as gr
4
+ # import pandas as pd
5
+ # import os
6
+ # import re
7
+ # from datetime import datetime
8
+
9
+ # LEADERBOARD_FILE = "leaderboard.csv" # File to store all submissions persistently
10
+ # LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
11
+
12
+ # def initialize_leaderboard_file():
13
+ # """
14
+ # Ensure the leaderboard file exists and has the correct headers.
15
+ # """
16
+ # if not os.path.exists(LEADERBOARD_FILE):
17
+ # # Create the file with headers
18
+ # pd.DataFrame(columns=[
19
+ # "Model Name", "Overall Accuracy", "Valid Accuracy",
20
+ # "Correct Predictions", "Total Questions", "Timestamp"
21
+ # ]).to_csv(LEADERBOARD_FILE, index=False)
22
+ # else:
23
+ # # Check if the file is empty and write headers if needed
24
+ # if os.stat(LEADERBOARD_FILE).st_size == 0:
25
+ # pd.DataFrame(columns=[
26
+ # "Model Name", "Overall Accuracy", "Valid Accuracy",
27
+ # "Correct Predictions", "Total Questions", "Timestamp"
28
+ # ]).to_csv(LEADERBOARD_FILE, index=False)
29
+
30
+ # def clean_answer(answer):
31
+ # """
32
+ # Clean and normalize the predicted answers.
33
+ # """
34
+ # if pd.isna(answer):
35
+ # return None
36
+ # answer = str(answer)
37
+ # clean = re.sub(r'[^A-Da-d]', '', answer)
38
+ # if clean:
39
+ # return clean[0].upper()
40
+ # return None
41
+
42
+ # def update_leaderboard(results):
43
+ # """
44
+ # Append new submission results to the leaderboard file.
45
+ # """
46
+ # new_entry = {
47
+ # "Model Name": results['model_name'],
48
+ # "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
49
+ # "Valid Accuracy": round(results['valid_accuracy'] * 100, 2),
50
+ # "Correct Predictions": results['correct_predictions'],
51
+ # "Total Questions": results['total_questions'],
52
+ # "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
53
+ # }
54
+
55
+ # new_entry_df = pd.DataFrame([new_entry])
56
+ # new_entry_df.to_csv(LEADERBOARD_FILE, mode='a', index=False, header=False)
57
+
58
+ # def load_leaderboard():
59
+ # """
60
+ # Load all submissions from the leaderboard file.
61
+ # """
62
+ # if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
63
+ # return pd.DataFrame({
64
+ # "Model Name": [],
65
+ # "Overall Accuracy": [],
66
+ # "Valid Accuracy": [],
67
+ # "Correct Predictions": [],
68
+ # "Total Questions": [],
69
+ # "Timestamp": [],
70
+ # })
71
+ # return pd.read_csv(LEADERBOARD_FILE)
72
+
73
+ # def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
74
+ # """
75
+ # Evaluate predictions and optionally add results to the leaderboard.
76
+ # """
77
+ # ground_truth_file = "ground_truth.csv"
78
+ # if not os.path.exists(ground_truth_file):
79
+ # return "Ground truth file not found.", load_leaderboard()
80
+ # if not prediction_file:
81
+ # return "Prediction file not uploaded.", load_leaderboard()
82
+
83
+ # try:
84
+ # # Load predictions and ground truth
85
+ # predictions_df = pd.read_csv(prediction_file.name)
86
+ # ground_truth_df = pd.read_csv(ground_truth_file)
87
+
88
+ # # Merge predictions with ground truth
89
+ # merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
90
+ # merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
91
+
92
+ # # Evaluate predictions
93
+ # valid_predictions = merged_df.dropna(subset=['pred_answer'])
94
+ # correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
95
+ # total_predictions = len(merged_df)
96
+ # total_valid_predictions = len(valid_predictions)
97
+
98
+ # # Calculate accuracy
99
+ # overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
100
+ # valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
101
+
102
+ # results = {
103
+ # 'model_name': model_name if model_name else "Unknown Model",
104
+ # 'overall_accuracy': overall_accuracy,
105
+ # 'valid_accuracy': valid_accuracy,
106
+ # 'correct_predictions': correct_predictions,
107
+ # 'total_questions': total_predictions,
108
+ # }
109
+
110
+ # # Update leaderboard only if opted in
111
+ # if add_to_leaderboard:
112
+ # update_leaderboard(results)
113
+ # return "Evaluation completed and added to leaderboard.", load_leaderboard()
114
+ # else:
115
+ # return "Evaluation completed but not added to leaderboard.", load_leaderboard()
116
+ # except Exception as e:
117
+ # return f"Error during evaluation: {str(e)}", load_leaderboard()
118
+
119
+ # # Initialize leaderboard file
120
+ # initialize_leaderboard_file()
121
+
122
+ # # Gradio Interface
123
+ # with gr.Blocks() as demo:
124
+ # gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
125
+
126
+ # with gr.Tabs():
127
+ # # Submission Tab
128
+ # with gr.TabItem("🏅 Submission"):
129
+ # file_input = gr.File(label="Upload Prediction CSV")
130
+ # model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")
131
+ # add_to_leaderboard_checkbox = gr.Checkbox(label="Add to Leaderboard?", value=True)
132
+ # eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
133
+ # leaderboard_table_preview = gr.Dataframe(
134
+ # value=load_leaderboard(),
135
+ # label="Leaderboard (Preview)",
136
+ # interactive=False,
137
+ # wrap=True,
138
+ # )
139
+ # eval_button = gr.Button("Evaluate and Update Leaderboard")
140
+ # eval_button.click(
141
+ # evaluate_predictions,
142
+ # inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
143
+ # outputs=[eval_status, leaderboard_table_preview],
144
+ # )
145
+
146
+ # # Leaderboard Tab
147
+ # with gr.TabItem("🏅 Leaderboard"):
148
+ # leaderboard_table = gr.Dataframe(
149
+ # value=load_leaderboard(),
150
+ # label="Leaderboard",
151
+ # interactive=False,
152
+ # wrap=True,
153
+ # )
154
+ # refresh_button = gr.Button("Refresh Leaderboard")
155
+ # refresh_button.click(
156
+ # lambda: load_leaderboard(),
157
+ # inputs=[],
158
+ # outputs=[leaderboard_table],
159
+ # )
160
+
161
+ # gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
162
+
163
  # demo.launch()
164
+
165
+
166
  import gradio as gr
167
  import pandas as pd
 
168
  import re
169
  from datetime import datetime
170
+ from huggingface_hub import hf_hub_download
171
+ from datasets import Dataset
172
+ import os
173
 
174
+ # Constants for Hugging Face repositories
175
+ HF_TOKEN = os.getenv("HF_TOKEN") # Hugging Face token stored as an environment variable
176
+ LEADERBOARD_REPO = "SondosMB/leaderboard-dataset" # Replace with your leaderboard dataset name
177
+ GROUND_TRUTH_REPO = "SondosMB/ground-truth-dataset" # Replace with your ground truth dataset name
178
  LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
179
 
180
+ def load_ground_truth():
181
  """
182
+ Load the ground truth file from a private Hugging Face dataset.
183
  """
184
+ try:
185
+ ground_truth_path = hf_hub_download(
186
+ repo_id=GROUND_TRUTH_REPO,
187
+ filename="ground_truth.csv",
188
+ use_auth_token=HF_TOKEN
189
+ )
190
+ return pd.read_csv(ground_truth_path)
191
+ except Exception as e:
192
+ print(f"Error loading ground truth: {e}")
193
+ return None
 
 
 
194
 
195
+ def load_leaderboard():
196
  """
197
+ Load the leaderboard from a private Hugging Face dataset.
198
  """
199
+ try:
200
+ leaderboard_path = hf_hub_download(
201
+ repo_id=LEADERBOARD_REPO,
202
+ filename="leaderboard.csv",
203
+ use_auth_token=HF_TOKEN
204
+ )
205
+ return pd.read_csv(leaderboard_path)
206
+ except Exception as e:
207
+ print(f"Error loading leaderboard: {e}")
208
+ return pd.DataFrame({
209
+ "Model Name": [],
210
+ "Overall Accuracy": [],
211
+ "Valid Accuracy": [],
212
+ "Correct Predictions": [],
213
+ "Total Questions": [],
214
+ "Timestamp": [],
215
+ })
216
 
217
  def update_leaderboard(results):
218
  """
219
+ Append new submission results to the private leaderboard dataset.
220
  """
221
+ try:
222
+ # Load existing leaderboard or create a new one
223
+ leaderboard_path = hf_hub_download(
224
+ repo_id=LEADERBOARD_REPO,
225
+ filename="leaderboard.csv",
226
+ use_auth_token=HF_TOKEN
227
+ )
228
+ df = pd.read_csv(leaderboard_path)
229
+ except:
230
+ df = pd.DataFrame(columns=[
231
+ "Model Name", "Overall Accuracy", "Valid Accuracy",
232
+ "Correct Predictions", "Total Questions", "Timestamp"
233
+ ])
234
+
235
+ # Add new entry
236
  new_entry = {
237
  "Model Name": results['model_name'],
238
  "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
 
241
  "Total Questions": results['total_questions'],
242
  "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
243
  }
244
+ df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True)
245
 
246
+ # Save locally and push updated dataset to Hugging Face
247
+ df.to_csv("leaderboard.csv", index=False)
248
+ dataset = Dataset.from_pandas(df)
249
+ dataset.push_to_hub(LEADERBOARD_REPO, split="train", private=True)
250
 
251
+ def clean_answer(answer):
252
  """
253
+ Clean and normalize the predicted answers.
254
  """
255
+ if pd.isna(answer):
256
+ return None
257
+ answer = str(answer)
258
+ clean = re.sub(r'[^A-Da-d]', '', answer)
259
+ if clean:
260
+ return clean[0].upper()
261
+ return None
 
 
 
262
 
263
  def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
264
  """
265
  Evaluate predictions and optionally add results to the leaderboard.
266
  """
267
+ ground_truth_df = load_ground_truth()
268
+ if ground_truth_df is None:
269
  return "Ground truth file not found.", load_leaderboard()
270
  if not prediction_file:
271
  return "Prediction file not uploaded.", load_leaderboard()
272
 
273
  try:
274
+ # Load predictions and merge with ground truth
275
  predictions_df = pd.read_csv(prediction_file.name)
 
 
 
276
  merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
277
  merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
278
 
 
303
  except Exception as e:
304
  return f"Error during evaluation: {str(e)}", load_leaderboard()
305
 
 
 
 
306
  # Gradio Interface
307
  with gr.Blocks() as demo:
308
+ gr.Markdown("# Secure Prediction Evaluation Tool with Private Leaderboard")
309
 
310
  with gr.Tabs():
311
  # Submission Tab
 
346
 
347
  demo.launch()
348
 
349
+