SondosMB commited on
Commit
9803b0e
·
verified ·
1 Parent(s): 5a42658

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -13
app.py CHANGED
@@ -151,19 +151,34 @@ if not HF_TOKEN:
151
  # return f"Error during evaluation: {str(e)}", load_leaderboard()
152
 
153
  # initialize_leaderboard_file()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  def initialize_leaderboard_file():
155
  """
156
  Ensure the leaderboard file exists and has the correct headers.
157
  """
158
  if not os.path.exists(LEADERBOARD_FILE):
159
  pd.DataFrame(columns=[
160
- "Model Name", "Overall Accuracy", "Valid Accuracy",
161
- "Correct Predictions", "Total Questions", "Timestamp"
162
  ]).to_csv(LEADERBOARD_FILE, index=False)
163
  elif os.stat(LEADERBOARD_FILE).st_size == 0:
164
  pd.DataFrame(columns=[
165
- "Model Name", "Overall Accuracy", "Valid Accuracy",
166
- "Correct Predictions", "Total Questions", "Timestamp"
167
  ]).to_csv(LEADERBOARD_FILE, index=False)
168
 
169
  def clean_answer(answer):
@@ -174,6 +189,48 @@ def clean_answer(answer):
174
  return clean[0].upper() if clean else None
175
 
176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  def update_leaderboard(results):
178
  """
179
  Append new submission results to the leaderboard file and push updates to the Hugging Face repository.
@@ -181,7 +238,6 @@ def update_leaderboard(results):
181
  new_entry = {
182
  "Model Name": results['model_name'],
183
  "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
184
- "Valid Accuracy": round(results['valid_accuracy'] * 100, 2),
185
  "Correct Predictions": results['correct_predictions'],
186
  "Total Questions": results['total_questions'],
187
  "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
@@ -207,7 +263,7 @@ def update_leaderboard(results):
207
  api.upload_file(
208
  path_or_fileobj=LEADERBOARD_FILE,
209
  path_in_repo="leaderboard.csv",
210
- repo_id="SondosMB/ss", # Your Space repository
211
  repo_type="space",
212
  token=token
213
  )
@@ -218,17 +274,88 @@ def update_leaderboard(results):
218
 
219
 
220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  def load_leaderboard():
222
  if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
223
  return pd.DataFrame({
224
  "Model Name": [],
225
  "Overall Accuracy": [],
226
- "Valid Accuracy": [],
227
  "Correct Predictions": [],
228
  "Total Questions": [],
229
  "Timestamp": [],
230
  })
231
  return pd.read_csv(LEADERBOARD_FILE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
 
233
  def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
234
  try:
@@ -248,9 +375,9 @@ def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
248
  return "Prediction file not uploaded.", load_leaderboard()
249
 
250
  try:
251
- #load predition file
252
  predictions_df = pd.read_csv(prediction_file.name)
253
- # Validate required columns in prediction file
254
  required_columns = ['question_id', 'predicted_answer']
255
  missing_columns = [col for col in required_columns if col not in predictions_df.columns]
256
  if missing_columns:
@@ -266,15 +393,12 @@ def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
266
  valid_predictions = merged_df.dropna(subset=['pred_answer'])
267
  correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
268
  total_predictions = len(merged_df)
269
- total_valid_predictions = len(valid_predictions)
270
 
271
  overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
272
- valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
273
 
274
  results = {
275
  'model_name': model_name if model_name else "Unknown Model",
276
  'overall_accuracy': overall_accuracy,
277
- 'valid_accuracy': valid_accuracy,
278
  'correct_predictions': correct_predictions,
279
  'total_questions': total_predictions,
280
  }
@@ -287,7 +411,6 @@ def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
287
 
288
  except Exception as e:
289
  return f"Error during evaluation: {str(e)}", load_leaderboard()
290
-
291
  initialize_leaderboard_file()
292
 
293
 
 
151
  # return f"Error during evaluation: {str(e)}", load_leaderboard()
152
 
153
  # initialize_leaderboard_file()
154
+ # def initialize_leaderboard_file():
155
+ # """
156
+ # Ensure the leaderboard file exists and has the correct headers.
157
+ # """
158
+ # if not os.path.exists(LEADERBOARD_FILE):
159
+ # pd.DataFrame(columns=[
160
+ # "Model Name", "Overall Accuracy", "Valid Accuracy",
161
+ # "Correct Predictions", "Total Questions", "Timestamp"
162
+ # ]).to_csv(LEADERBOARD_FILE, index=False)
163
+ # elif os.stat(LEADERBOARD_FILE).st_size == 0:
164
+ # pd.DataFrame(columns=[
165
+ # "Model Name", "Overall Accuracy", "Valid Accuracy",
166
+ # "Correct Predictions", "Total Questions", "Timestamp"
167
+ # ]).to_csv(LEADERBOARD_FILE, index=False)
168
+
169
  def initialize_leaderboard_file():
170
  """
171
  Ensure the leaderboard file exists and has the correct headers.
172
  """
173
  if not os.path.exists(LEADERBOARD_FILE):
174
  pd.DataFrame(columns=[
175
+ "Model Name", "Overall Accuracy", "Correct Predictions",
176
+ "Total Questions", "Timestamp"
177
  ]).to_csv(LEADERBOARD_FILE, index=False)
178
  elif os.stat(LEADERBOARD_FILE).st_size == 0:
179
  pd.DataFrame(columns=[
180
+ "Model Name", "Overall Accuracy", "Correct Predictions",
181
+ "Total Questions", "Timestamp"
182
  ]).to_csv(LEADERBOARD_FILE, index=False)
183
 
184
  def clean_answer(answer):
 
189
  return clean[0].upper() if clean else None
190
 
191
 
192
+ # def update_leaderboard(results):
193
+ # """
194
+ # Append new submission results to the leaderboard file and push updates to the Hugging Face repository.
195
+ # """
196
+ # new_entry = {
197
+ # "Model Name": results['model_name'],
198
+ # "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
199
+ # "Valid Accuracy": round(results['valid_accuracy'] * 100, 2),
200
+ # "Correct Predictions": results['correct_predictions'],
201
+ # "Total Questions": results['total_questions'],
202
+ # "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
203
+ # }
204
+
205
+ # try:
206
+ # # Update the local leaderboard file
207
+ # new_entry_df = pd.DataFrame([new_entry])
208
+ # file_exists = os.path.exists(LEADERBOARD_FILE)
209
+
210
+ # new_entry_df.to_csv(
211
+ # LEADERBOARD_FILE,
212
+ # mode='a', # Append mode
213
+ # index=False,
214
+ # header=not file_exists # Write header only if the file is new
215
+ # )
216
+ # print(f"Leaderboard updated successfully at {LEADERBOARD_FILE}")
217
+
218
+ # # Push the updated file to the Hugging Face repository using HTTP API
219
+ # api = HfApi()
220
+ # token = HfFolder.get_token()
221
+
222
+ # api.upload_file(
223
+ # path_or_fileobj=LEADERBOARD_FILE,
224
+ # path_in_repo="leaderboard.csv",
225
+ # repo_id="SondosMB/ss", # Your Space repository
226
+ # repo_type="space",
227
+ # token=token
228
+ # )
229
+ # print("Leaderboard changes pushed to Hugging Face repository.")
230
+
231
+ # except Exception as e:
232
+ # print(f"Error updating leaderboard file: {e}")
233
+
234
  def update_leaderboard(results):
235
  """
236
  Append new submission results to the leaderboard file and push updates to the Hugging Face repository.
 
238
  new_entry = {
239
  "Model Name": results['model_name'],
240
  "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
 
241
  "Correct Predictions": results['correct_predictions'],
242
  "Total Questions": results['total_questions'],
243
  "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
 
263
  api.upload_file(
264
  path_or_fileobj=LEADERBOARD_FILE,
265
  path_in_repo="leaderboard.csv",
266
+ repo_id="SondosMB/Mobile-MMLU", # Your Space repository
267
  repo_type="space",
268
  token=token
269
  )
 
274
 
275
 
276
 
277
+
278
+ # def load_leaderboard():
279
+ # if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
280
+ # return pd.DataFrame({
281
+ # "Model Name": [],
282
+ # "Overall Accuracy": [],
283
+ # "Valid Accuracy": [],
284
+ # "Correct Predictions": [],
285
+ # "Total Questions": [],
286
+ # "Timestamp": [],
287
+ # })
288
+ # return pd.read_csv(LEADERBOARD_FILE)
289
+
290
+
291
  def load_leaderboard():
292
  if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
293
  return pd.DataFrame({
294
  "Model Name": [],
295
  "Overall Accuracy": [],
 
296
  "Correct Predictions": [],
297
  "Total Questions": [],
298
  "Timestamp": [],
299
  })
300
  return pd.read_csv(LEADERBOARD_FILE)
301
+
302
+ # def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
303
+ # try:
304
+ # ground_truth_path = hf_hub_download(
305
+ # repo_id="SondosMB/ground-truth-dataset",
306
+ # filename="ground_truth.csv",
307
+ # repo_type="dataset",
308
+ # use_auth_token=True
309
+ # )
310
+ # ground_truth_df = pd.read_csv(ground_truth_path)
311
+ # except FileNotFoundError:
312
+ # return "Ground truth file not found in the dataset repository.", load_leaderboard()
313
+ # except Exception as e:
314
+ # return f"Error loading ground truth: {e}", load_leaderboard()
315
+
316
+ # if not prediction_file:
317
+ # return "Prediction file not uploaded.", load_leaderboard()
318
+
319
+ # try:
320
+ # #load predition file
321
+ # predictions_df = pd.read_csv(prediction_file.name)
322
+ # # Validate required columns in prediction file
323
+ # required_columns = ['question_id', 'predicted_answer']
324
+ # missing_columns = [col for col in required_columns if col not in predictions_df.columns]
325
+ # if missing_columns:
326
+ # return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
327
+ # load_leaderboard())
328
+
329
+ # # Validate 'Answer' column in ground truth file
330
+ # if 'Answer' not in ground_truth_df.columns:
331
+ # return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard()
332
+ # merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
333
+ # merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
334
+
335
+ # valid_predictions = merged_df.dropna(subset=['pred_answer'])
336
+ # correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
337
+ # total_predictions = len(merged_df)
338
+ # total_valid_predictions = len(valid_predictions)
339
+
340
+ # overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
341
+ # valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
342
+
343
+ # results = {
344
+ # 'model_name': model_name if model_name else "Unknown Model",
345
+ # 'overall_accuracy': overall_accuracy,
346
+ # 'valid_accuracy': valid_accuracy,
347
+ # 'correct_predictions': correct_predictions,
348
+ # 'total_questions': total_predictions,
349
+ # }
350
+
351
+ # if add_to_leaderboard:
352
+ # update_leaderboard(results)
353
+ # return "Evaluation completed and added to leaderboard.", load_leaderboard()
354
+ # else:
355
+ # return "Evaluation completed but not added to leaderboard.", load_leaderboard()
356
+
357
+ # except Exception as e:
358
+ # return f"Error during evaluation: {str(e)}", load_leaderboard()
359
 
360
  def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
361
  try:
 
375
  return "Prediction file not uploaded.", load_leaderboard()
376
 
377
  try:
378
+ #load prediction file
379
  predictions_df = pd.read_csv(prediction_file.name)
380
+ # Validate required columns in prediction file
381
  required_columns = ['question_id', 'predicted_answer']
382
  missing_columns = [col for col in required_columns if col not in predictions_df.columns]
383
  if missing_columns:
 
393
  valid_predictions = merged_df.dropna(subset=['pred_answer'])
394
  correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
395
  total_predictions = len(merged_df)
 
396
 
397
  overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
 
398
 
399
  results = {
400
  'model_name': model_name if model_name else "Unknown Model",
401
  'overall_accuracy': overall_accuracy,
 
402
  'correct_predictions': correct_predictions,
403
  'total_questions': total_predictions,
404
  }
 
411
 
412
  except Exception as e:
413
  return f"Error during evaluation: {str(e)}", load_leaderboard()
 
414
  initialize_leaderboard_file()
415
 
416