SondosMB commited on
Commit
5b78632
·
verified ·
1 Parent(s): 5b92bc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -233
app.py CHANGED
@@ -1,236 +1,48 @@
1
 
2
- # # demo.launch()
3
- # import gradio as gr
4
- # import pandas as pd
5
- # import os
6
- # import re
7
- # from datetime import datetime
8
-
9
- # LEADERBOARD_FILE = "leaderboard.csv" # File to store all submissions persistently
10
- # LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
11
-
12
- # def initialize_leaderboard_file():
13
- # """
14
- # Ensure the leaderboard file exists and has the correct headers.
15
- # """
16
- # if not os.path.exists(LEADERBOARD_FILE):
17
- # # Create the file with headers
18
- # pd.DataFrame(columns=[
19
- # "Model Name", "Overall Accuracy", "Valid Accuracy",
20
- # "Correct Predictions", "Total Questions", "Timestamp"
21
- # ]).to_csv(LEADERBOARD_FILE, index=False)
22
- # else:
23
- # # Check if the file is empty and write headers if needed
24
- # if os.stat(LEADERBOARD_FILE).st_size == 0:
25
- # pd.DataFrame(columns=[
26
- # "Model Name", "Overall Accuracy", "Valid Accuracy",
27
- # "Correct Predictions", "Total Questions", "Timestamp"
28
- # ]).to_csv(LEADERBOARD_FILE, index=False)
29
-
30
- # def clean_answer(answer):
31
- # """
32
- # Clean and normalize the predicted answers.
33
- # """
34
- # if pd.isna(answer):
35
- # return None
36
- # answer = str(answer)
37
- # clean = re.sub(r'[^A-Da-d]', '', answer)
38
- # if clean:
39
- # return clean[0].upper()
40
- # return None
41
-
42
- # def update_leaderboard(results):
43
- # """
44
- # Append new submission results to the leaderboard file.
45
- # """
46
- # new_entry = {
47
- # "Model Name": results['model_name'],
48
- # "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
49
- # "Valid Accuracy": round(results['valid_accuracy'] * 100, 2),
50
- # "Correct Predictions": results['correct_predictions'],
51
- # "Total Questions": results['total_questions'],
52
- # "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
53
- # }
54
-
55
- # new_entry_df = pd.DataFrame([new_entry])
56
- # new_entry_df.to_csv(LEADERBOARD_FILE, mode='a', index=False, header=False)
57
-
58
- # def load_leaderboard():
59
- # """
60
- # Load all submissions from the leaderboard file.
61
- # """
62
- # if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
63
- # return pd.DataFrame({
64
- # "Model Name": [],
65
- # "Overall Accuracy": [],
66
- # "Valid Accuracy": [],
67
- # "Correct Predictions": [],
68
- # "Total Questions": [],
69
- # "Timestamp": [],
70
- # })
71
- # return pd.read_csv(LEADERBOARD_FILE)
72
-
73
- # def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
74
- # """
75
- # Evaluate predictions and optionally add results to the leaderboard.
76
- # """
77
- # ground_truth_file = "ground_truth.csv"
78
- # if not os.path.exists(ground_truth_file):
79
- # return "Ground truth file not found.", load_leaderboard()
80
- # if not prediction_file:
81
- # return "Prediction file not uploaded.", load_leaderboard()
82
-
83
- # try:
84
- # # Load predictions and ground truth
85
- # predictions_df = pd.read_csv(prediction_file.name)
86
- # ground_truth_df = pd.read_csv(ground_truth_file)
87
-
88
- # # Merge predictions with ground truth
89
- # merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
90
- # merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
91
-
92
- # # Evaluate predictions
93
- # valid_predictions = merged_df.dropna(subset=['pred_answer'])
94
- # correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
95
- # total_predictions = len(merged_df)
96
- # total_valid_predictions = len(valid_predictions)
97
-
98
- # # Calculate accuracy
99
- # overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
100
- # valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
101
-
102
- # results = {
103
- # 'model_name': model_name if model_name else "Unknown Model",
104
- # 'overall_accuracy': overall_accuracy,
105
- # 'valid_accuracy': valid_accuracy,
106
- # 'correct_predictions': correct_predictions,
107
- # 'total_questions': total_predictions,
108
- # }
109
-
110
- # # Update leaderboard only if opted in
111
- # if add_to_leaderboard:
112
- # update_leaderboard(results)
113
- # return "Evaluation completed and added to leaderboard.", load_leaderboard()
114
- # else:
115
- # return "Evaluation completed but not added to leaderboard.", load_leaderboard()
116
- # except Exception as e:
117
- # return f"Error during evaluation: {str(e)}", load_leaderboard()
118
-
119
- # # Initialize leaderboard file
120
- # initialize_leaderboard_file()
121
-
122
- # # Gradio Interface
123
- # with gr.Blocks() as demo:
124
- # gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
125
-
126
- # with gr.Tabs():
127
- # # Submission Tab
128
- # with gr.TabItem("🏅 Submission"):
129
- # file_input = gr.File(label="Upload Prediction CSV")
130
- # model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")
131
- # add_to_leaderboard_checkbox = gr.Checkbox(label="Add to Leaderboard?", value=True)
132
- # eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
133
- # leaderboard_table_preview = gr.Dataframe(
134
- # value=load_leaderboard(),
135
- # label="Leaderboard (Preview)",
136
- # interactive=False,
137
- # wrap=True,
138
- # )
139
- # eval_button = gr.Button("Evaluate and Update Leaderboard")
140
- # eval_button.click(
141
- # evaluate_predictions,
142
- # inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
143
- # outputs=[eval_status, leaderboard_table_preview],
144
- # )
145
-
146
- # # Leaderboard Tab
147
- # with gr.TabItem("🏅 Leaderboard"):
148
- # leaderboard_table = gr.Dataframe(
149
- # value=load_leaderboard(),
150
- # label="Leaderboard",
151
- # interactive=False,
152
- # wrap=True,
153
- # )
154
- # refresh_button = gr.Button("Refresh Leaderboard")
155
- # refresh_button.click(
156
- # lambda: load_leaderboard(),
157
- # inputs=[],
158
- # outputs=[leaderboard_table],
159
- # )
160
-
161
- # gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
162
-
163
  # demo.launch()
164
-
165
  import gradio as gr
166
  import pandas as pd
 
167
  import re
168
  from datetime import datetime
169
- from huggingface_hub import hf_hub_download
170
- from datasets import Dataset
171
- import os
172
 
173
- HF_TOKEN = os.getenv("HF_TOKEN") # Hugging Face token stored as an environment variable
174
- LEADERBOARD_REPO = "username/leaderboard-dataset" # Replace with your leaderboard dataset name
175
- GROUND_TRUTH_REPO = "username/ground-truth-dataset" # Replace with your ground truth dataset name
176
  LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
177
 
178
- def load_ground_truth():
179
  """
180
- Load the ground truth file from a private Hugging Face dataset.
181
  """
182
- try:
183
- ground_truth_path = hf_hub_download(
184
- repo_id=GROUND_TRUTH_REPO,
185
- filename="ground_truth.csv",
186
- use_auth_token=HF_TOKEN
187
- )
188
- return pd.read_csv(ground_truth_path)
189
- except Exception as e:
190
- print(f"Error loading ground truth: {e}")
191
- return None
 
 
 
192
 
193
- def load_leaderboard():
194
  """
195
- Load the leaderboard from a private Hugging Face dataset.
196
  """
197
- try:
198
- leaderboard_path = hf_hub_download(
199
- repo_id=LEADERBOARD_REPO,
200
- filename="leaderboard.csv",
201
- use_auth_token=HF_TOKEN
202
- )
203
- return pd.read_csv(leaderboard_path)
204
- except Exception as e:
205
- print(f"Error loading leaderboard: {e}")
206
- return pd.DataFrame({
207
- "Model Name": [],
208
- "Overall Accuracy": [],
209
- "Valid Accuracy": [],
210
- "Correct Predictions": [],
211
- "Total Questions": [],
212
- "Timestamp": [],
213
- })
214
 
215
  def update_leaderboard(results):
216
  """
217
- Append new submission results to the private leaderboard dataset.
218
  """
219
- try:
220
- # Load existing leaderboard or create a new one
221
- leaderboard_path = hf_hub_download(
222
- repo_id=LEADERBOARD_REPO,
223
- filename="leaderboard.csv",
224
- use_auth_token=HF_TOKEN
225
- )
226
- df = pd.read_csv(leaderboard_path)
227
- except:
228
- df = pd.DataFrame(columns=[
229
- "Model Name", "Overall Accuracy", "Valid Accuracy",
230
- "Correct Predictions", "Total Questions", "Timestamp"
231
- ])
232
-
233
- # Add new entry
234
  new_entry = {
235
  "Model Name": results['model_name'],
236
  "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
@@ -239,38 +51,41 @@ def update_leaderboard(results):
239
  "Total Questions": results['total_questions'],
240
  "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
241
  }
242
- df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True)
243
 
244
- # Save locally and push updated dataset to Hugging Face
245
- df.to_csv("leaderboard.csv", index=False)
246
- dataset = Dataset.from_pandas(df)
247
- dataset.push_to_hub(LEADERBOARD_REPO, split="train", private=True)
248
 
249
- def clean_answer(answer):
250
  """
251
- Clean and normalize the predicted answers.
252
  """
253
- if pd.isna(answer):
254
- return None
255
- answer = str(answer)
256
- clean = re.sub(r'[^A-Da-d]', '', answer)
257
- if clean:
258
- return clean[0].upper()
259
- return None
 
 
 
260
 
261
  def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
262
  """
263
  Evaluate predictions and optionally add results to the leaderboard.
264
  """
265
- ground_truth_df = load_ground_truth()
266
- if ground_truth_df is None:
267
  return "Ground truth file not found.", load_leaderboard()
268
  if not prediction_file:
269
  return "Prediction file not uploaded.", load_leaderboard()
270
 
271
  try:
272
- # Load predictions and merge with ground truth
273
  predictions_df = pd.read_csv(prediction_file.name)
 
 
 
274
  merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
275
  merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
276
 
@@ -301,9 +116,12 @@ def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
301
  except Exception as e:
302
  return f"Error during evaluation: {str(e)}", load_leaderboard()
303
 
 
 
 
304
  # Gradio Interface
305
  with gr.Blocks() as demo:
306
- gr.Markdown("# Secure Prediction Evaluation Tool with Private Leaderboard")
307
 
308
  with gr.Tabs():
309
  # Submission Tab
 
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  # demo.launch()
 
3
  import gradio as gr
4
  import pandas as pd
5
+ import os
6
  import re
7
  from datetime import datetime
 
 
 
8
 
9
+ LEADERBOARD_FILE = "leaderboard.csv" # File to store all submissions persistently
 
 
10
  LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
11
 
12
+ def initialize_leaderboard_file():
13
  """
14
+ Ensure the leaderboard file exists and has the correct headers.
15
  """
16
+ if not os.path.exists(LEADERBOARD_FILE):
17
+ # Create the file with headers
18
+ pd.DataFrame(columns=[
19
+ "Model Name", "Overall Accuracy", "Valid Accuracy",
20
+ "Correct Predictions", "Total Questions", "Timestamp"
21
+ ]).to_csv(LEADERBOARD_FILE, index=False)
22
+ else:
23
+ # Check if the file is empty and write headers if needed
24
+ if os.stat(LEADERBOARD_FILE).st_size == 0:
25
+ pd.DataFrame(columns=[
26
+ "Model Name", "Overall Accuracy", "Valid Accuracy",
27
+ "Correct Predictions", "Total Questions", "Timestamp"
28
+ ]).to_csv(LEADERBOARD_FILE, index=False)
29
 
30
+ def clean_answer(answer):
31
  """
32
+ Clean and normalize the predicted answers.
33
  """
34
+ if pd.isna(answer):
35
+ return None
36
+ answer = str(answer)
37
+ clean = re.sub(r'[^A-Da-d]', '', answer)
38
+ if clean:
39
+ return clean[0].upper()
40
+ return None
 
 
 
 
 
 
 
 
 
 
41
 
42
  def update_leaderboard(results):
43
  """
44
+ Append new submission results to the leaderboard file.
45
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  new_entry = {
47
  "Model Name": results['model_name'],
48
  "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
 
51
  "Total Questions": results['total_questions'],
52
  "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
53
  }
 
54
 
55
+ new_entry_df = pd.DataFrame([new_entry])
56
+ new_entry_df.to_csv(LEADERBOARD_FILE, mode='a', index=False, header=False)
 
 
57
 
58
+ def load_leaderboard():
59
  """
60
+ Load all submissions from the leaderboard file.
61
  """
62
+ if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
63
+ return pd.DataFrame({
64
+ "Model Name": [],
65
+ "Overall Accuracy": [],
66
+ "Valid Accuracy": [],
67
+ "Correct Predictions": [],
68
+ "Total Questions": [],
69
+ "Timestamp": [],
70
+ })
71
+ return pd.read_csv(LEADERBOARD_FILE)
72
 
73
  def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
74
  """
75
  Evaluate predictions and optionally add results to the leaderboard.
76
  """
77
+ ground_truth_file = "ground_truth.csv"
78
+ if not os.path.exists(ground_truth_file):
79
  return "Ground truth file not found.", load_leaderboard()
80
  if not prediction_file:
81
  return "Prediction file not uploaded.", load_leaderboard()
82
 
83
  try:
84
+ # Load predictions and ground truth
85
  predictions_df = pd.read_csv(prediction_file.name)
86
+ ground_truth_df = pd.read_csv(ground_truth_file)
87
+
88
+ # Merge predictions with ground truth
89
  merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
90
  merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
91
 
 
116
  except Exception as e:
117
  return f"Error during evaluation: {str(e)}", load_leaderboard()
118
 
119
+ # Initialize leaderboard file
120
+ initialize_leaderboard_file()
121
+
122
  # Gradio Interface
123
  with gr.Blocks() as demo:
124
+ gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
125
 
126
  with gr.Tabs():
127
  # Submission Tab