SondosMB commited on
Commit
514663d
Β·
verified Β·
1 Parent(s): 45d118c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -194
app.py CHANGED
@@ -1,168 +1,3 @@
1
-
2
- # # demo.launch()
3
- # import gradio as gr
4
- # import pandas as pd
5
- # import os
6
- # import re
7
- # from datetime import datetime
8
-
9
- # LEADERBOARD_FILE = "leaderboard.csv" # File to store all submissions persistently
10
- # LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
11
-
12
- # def initialize_leaderboard_file():
13
- # """
14
- # Ensure the leaderboard file exists and has the correct headers.
15
- # """
16
- # if not os.path.exists(LEADERBOARD_FILE):
17
- # # Create the file with headers
18
- # pd.DataFrame(columns=[
19
- # "Model Name", "Overall Accuracy", "Valid Accuracy",
20
- # "Correct Predictions", "Total Questions", "Timestamp"
21
- # ]).to_csv(LEADERBOARD_FILE, index=False)
22
- # else:
23
- # # Check if the file is empty and write headers if needed
24
- # if os.stat(LEADERBOARD_FILE).st_size == 0:
25
- # pd.DataFrame(columns=[
26
- # "Model Name", "Overall Accuracy", "Valid Accuracy",
27
- # "Correct Predictions", "Total Questions", "Timestamp"
28
- # ]).to_csv(LEADERBOARD_FILE, index=False)
29
-
30
- # def clean_answer(answer):
31
- # """
32
- # Clean and normalize the predicted answers.
33
- # """
34
- # if pd.isna(answer):
35
- # return None
36
- # answer = str(answer)
37
- # clean = re.sub(r'[^A-Da-d]', '', answer)
38
- # if clean:
39
- # return clean[0].upper()
40
- # return None
41
-
42
- # def update_leaderboard(results):
43
- # """
44
- # Append new submission results to the leaderboard file.
45
- # """
46
- # new_entry = {
47
- # "Model Name": results['model_name'],
48
- # "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
49
- # "Valid Accuracy": round(results['valid_accuracy'] * 100, 2),
50
- # "Correct Predictions": results['correct_predictions'],
51
- # "Total Questions": results['total_questions'],
52
- # "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
53
- # }
54
-
55
- # new_entry_df = pd.DataFrame([new_entry])
56
- # new_entry_df.to_csv(LEADERBOARD_FILE, mode='a', index=False, header=False)
57
-
58
- # def load_leaderboard():
59
- # """
60
- # Load all submissions from the leaderboard file.
61
- # """
62
- # if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
63
- # return pd.DataFrame({
64
- # "Model Name": [],
65
- # "Overall Accuracy": [],
66
- # "Valid Accuracy": [],
67
- # "Correct Predictions": [],
68
- # "Total Questions": [],
69
- # "Timestamp": [],
70
- # })
71
- # return pd.read_csv(LEADERBOARD_FILE)
72
-
73
- # def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
74
- # """
75
- # Evaluate predictions and optionally add results to the leaderboard.
76
- # """
77
- # ground_truth_file = "ground_truth.csv"
78
- # if not os.path.exists(ground_truth_file):
79
- # return "Ground truth file not found.", load_leaderboard()
80
- # if not prediction_file:
81
- # return "Prediction file not uploaded.", load_leaderboard()
82
-
83
- # try:
84
- # # Load predictions and ground truth
85
- # predictions_df = pd.read_csv(prediction_file.name)
86
- # ground_truth_df = pd.read_csv(ground_truth_file)
87
-
88
- # # Merge predictions with ground truth
89
- # merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
90
- # merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
91
-
92
- # # Evaluate predictions
93
- # valid_predictions = merged_df.dropna(subset=['pred_answer'])
94
- # correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
95
- # total_predictions = len(merged_df)
96
- # total_valid_predictions = len(valid_predictions)
97
-
98
- # # Calculate accuracy
99
- # overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
100
- # valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
101
-
102
- # results = {
103
- # 'model_name': model_name if model_name else "Unknown Model",
104
- # 'overall_accuracy': overall_accuracy,
105
- # 'valid_accuracy': valid_accuracy,
106
- # 'correct_predictions': correct_predictions,
107
- # 'total_questions': total_predictions,
108
- # }
109
-
110
- # # Update leaderboard only if opted in
111
- # if add_to_leaderboard:
112
- # update_leaderboard(results)
113
- # return "Evaluation completed and added to leaderboard.", load_leaderboard()
114
- # else:
115
- # return "Evaluation completed but not added to leaderboard.", load_leaderboard()
116
- # except Exception as e:
117
- # return f"Error during evaluation: {str(e)}", load_leaderboard()
118
-
119
- # # Initialize leaderboard file
120
- # initialize_leaderboard_file()
121
-
122
- # # Gradio Interface
123
- # with gr.Blocks() as demo:
124
- # gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
125
-
126
- # with gr.Tabs():
127
- # # Submission Tab
128
- # with gr.TabItem("πŸ… Submission"):
129
- # file_input = gr.File(label="Upload Prediction CSV")
130
- # model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")
131
- # add_to_leaderboard_checkbox = gr.Checkbox(label="Add to Leaderboard?", value=True)
132
- # eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
133
- # leaderboard_table_preview = gr.Dataframe(
134
- # value=load_leaderboard(),
135
- # label="Leaderboard (Preview)",
136
- # interactive=False,
137
- # wrap=True,
138
- # )
139
- # eval_button = gr.Button("Evaluate and Update Leaderboard")
140
- # eval_button.click(
141
- # evaluate_predictions,
142
- # inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
143
- # outputs=[eval_status, leaderboard_table_preview],
144
- # )
145
-
146
- # # Leaderboard Tab
147
- # with gr.TabItem("πŸ… Leaderboard"):
148
- # leaderboard_table = gr.Dataframe(
149
- # value=load_leaderboard(),
150
- # label="Leaderboard",
151
- # interactive=False,
152
- # wrap=True,
153
- # )
154
- # refresh_button = gr.Button("Refresh Leaderboard")
155
- # refresh_button.click(
156
- # lambda: load_leaderboard(),
157
- # inputs=[],
158
- # outputs=[leaderboard_table],
159
- # )
160
-
161
- # gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
162
-
163
- # demo.launch()
164
-
165
-
166
  import gradio as gr
167
  import pandas as pd
168
  import os
@@ -309,70 +144,77 @@ def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
309
 
310
  initialize_leaderboard_file()
311
 
312
- import gradio as gr
313
-
314
  # Function to set default mode
315
  css_tech_theme = """
316
  body {
317
- background-color: #f4f4f9;
318
- color: #2e2e2e;
319
- font-family: Arial, sans-serif;
 
320
  }
321
 
322
  a {
323
- color: #4a90e2;
 
324
  }
325
 
326
  a:hover {
327
- color: #7d56c5;
328
  text-decoration: underline;
329
  }
330
 
331
  button {
332
- background-color: #4a90e2;
333
  color: #ffffff;
334
- border-radius: 5px;
335
- padding: 10px;
 
 
 
 
336
  }
337
 
338
  button:hover {
339
- background-color: #7d56c5;
340
  }
341
 
342
  .input-row, .tab-content {
343
- background-color: #e9eef5;
344
  border-radius: 8px;
345
- padding: 15px;
 
346
  }
347
 
348
  .dataframe {
349
- color: #2e2e2e;
350
- background-color: #f4f4f9;
351
- border: 1px solid #4a90e2;
 
 
 
352
  }
353
  """
354
 
355
-
356
  with gr.Blocks(css=css_tech_theme) as demo:
357
  gr.Markdown("""
358
- # πŸ† **Mobile-MMLU Benchmark Competition**
359
- ### 🌟 **Welcome to the Competition Overview**
360
  ![Competition Logo](mobile_mmlu_sd.jpeg)
361
  ---
362
- Welcome to the **Mobile-MMLU Benchmark Competition**. Here you can submit your predictions, view the leaderboard, and track your performance!
363
  ---
364
  """)
365
 
366
  with gr.Tabs():
367
  with gr.TabItem("πŸ“– Overview"):
368
  gr.Markdown("""
369
- ## πŸ“˜ Overview
370
  Welcome to the **Mobile-MMLU Benchmark Competition**! Evaluate mobile-compatible Large Language Models (LLMs) on **16,186 scenario-based and factual questions** across **80 fields**.
371
  ---
372
- ### 🌐 **What is Mobile-MMLU?**
373
  Mobile-MMLU is a benchmark designed to test the capabilities of LLMs optimized for mobile use. Contribute to advancing mobile AI systems by competing to achieve the highest accuracy.
374
 
375
- ### πŸ” **How It Works**
376
  1. **Download the Dataset**
377
  Access the dataset and instructions on our [GitHub page](https://github.com/your-github-repo).
378
  2. **Generate Predictions**
@@ -385,17 +227,17 @@ Mobile-MMLU is a benchmark designed to test the capabilities of LLMs optimized f
385
  View real-time rankings on the leaderboard.
386
 
387
  ---
388
- ### πŸ† **Competition Tasks**
389
  Participants must:
390
  - Optimize their models for **accuracy**.
391
  - Answer diverse field questions effectively.
392
  ---
393
- ### πŸš€ **Get Started**
394
  1. Prepare your model using resources on our [GitHub page](https://github.com/your-github-repo).
395
  2. Submit predictions in the required format.
396
  3. Track your progress on the leaderboard.
397
 
398
- ### πŸ“§ **Contact Us**
399
  For support, email: [Insert Email Address]
400
  ---
401
  """)
@@ -421,18 +263,18 @@ For support, email: [Insert Email Address]
421
  with gr.TabItem("πŸ… Leaderboard"):
422
  leaderboard_table = gr.Dataframe(
423
  value=load_leaderboard(),
424
- label="πŸ† Leaderboard",
425
  interactive=False,
426
  wrap=True,
427
  )
428
- refresh_button = gr.Button("πŸ”„ Refresh Leaderboard")
429
  refresh_button.click(
430
  lambda: load_leaderboard(),
431
  inputs=[],
432
  outputs=[leaderboard_table],
433
  )
434
 
435
- gr.Markdown(f"**πŸ“… Last updated:** {LAST_UPDATED}")
436
 
437
  demo.launch()
438
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  import os
 
144
 
145
  initialize_leaderboard_file()
146
 
 
 
147
  # Function to set default mode
148
  css_tech_theme = """
149
  body {
150
+ background-color: #ffffff;
151
+ color: #333333;
152
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
153
+ line-height: 1.6;
154
  }
155
 
156
  a {
157
+ color: #007acc;
158
+ font-weight: 500;
159
  }
160
 
161
  a:hover {
162
+ color: #005bb5;
163
  text-decoration: underline;
164
  }
165
 
166
  button {
167
+ background-color: #007acc;
168
  color: #ffffff;
169
+ border: none;
170
+ border-radius: 6px;
171
+ padding: 10px 15px;
172
+ font-size: 14px;
173
+ cursor: pointer;
174
+ transition: background-color 0.3s ease;
175
  }
176
 
177
  button:hover {
178
+ background-color: #005bb5;
179
  }
180
 
181
  .input-row, .tab-content {
182
+ background-color: #f9f9fc;
183
  border-radius: 8px;
184
+ padding: 20px;
185
+ box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
186
  }
187
 
188
  .dataframe {
189
+ color: #333333;
190
+ background-color: #ffffff;
191
+ border: 1px solid #d1d5db;
192
+ border-radius: 6px;
193
+ padding: 10px;
194
+ font-size: 14px;
195
  }
196
  """
197
 
 
198
  with gr.Blocks(css=css_tech_theme) as demo:
199
  gr.Markdown("""
200
+ # πŸ† Mobile-MMLU Benchmark Competition
201
+ ### 🌟 Welcome to the Competition Overview
202
  ![Competition Logo](mobile_mmlu_sd.jpeg)
203
  ---
204
+ Welcome to the **Mobile-MMLU Benchmark Competition**. Here you can submit your predictions, view the leaderboard, and track your performance.
205
  ---
206
  """)
207
 
208
  with gr.Tabs():
209
  with gr.TabItem("πŸ“– Overview"):
210
  gr.Markdown("""
211
+ ## Overview
212
  Welcome to the **Mobile-MMLU Benchmark Competition**! Evaluate mobile-compatible Large Language Models (LLMs) on **16,186 scenario-based and factual questions** across **80 fields**.
213
  ---
214
+ ### What is Mobile-MMLU?
215
  Mobile-MMLU is a benchmark designed to test the capabilities of LLMs optimized for mobile use. Contribute to advancing mobile AI systems by competing to achieve the highest accuracy.
216
 
217
+ ### How It Works
218
  1. **Download the Dataset**
219
  Access the dataset and instructions on our [GitHub page](https://github.com/your-github-repo).
220
  2. **Generate Predictions**
 
227
  View real-time rankings on the leaderboard.
228
 
229
  ---
230
+ ### Competition Tasks
231
  Participants must:
232
  - Optimize their models for **accuracy**.
233
  - Answer diverse field questions effectively.
234
  ---
235
+ ### Get Started
236
  1. Prepare your model using resources on our [GitHub page](https://github.com/your-github-repo).
237
  2. Submit predictions in the required format.
238
  3. Track your progress on the leaderboard.
239
 
240
+ ### Contact Us
241
  For support, email: [Insert Email Address]
242
  ---
243
  """)
 
263
  with gr.TabItem("πŸ… Leaderboard"):
264
  leaderboard_table = gr.Dataframe(
265
  value=load_leaderboard(),
266
+ label="Leaderboard",
267
  interactive=False,
268
  wrap=True,
269
  )
270
+ refresh_button = gr.Button("Refresh Leaderboard")
271
  refresh_button.click(
272
  lambda: load_leaderboard(),
273
  inputs=[],
274
  outputs=[leaderboard_table],
275
  )
276
 
277
+ gr.Markdown(f"**Last updated:** {LAST_UPDATED}")
278
 
279
  demo.launch()
280