SondosMB commited on
Commit
ea94a6e
Β·
verified Β·
1 Parent(s): 7ccc1b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +188 -39
app.py CHANGED
@@ -148,33 +148,196 @@ initialize_leaderboard_file()
148
  # Function to set default mode
149
  import gradio as gr
150
 
151
- # Ensure CSS is correctly defined
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  css_tech_theme = """
153
  body {
154
- background-color: #f4f6fa;
155
- color: #333333;
156
  font-family: 'Roboto', sans-serif;
157
- line-height: 1.8;
 
 
 
158
  }
159
-
160
  .center-content {
161
  display: flex;
162
  flex-direction: column;
163
  align-items: center;
164
  justify-content: center;
165
  text-align: center;
166
- margin: 30px 0;
167
  padding: 20px;
 
 
 
 
168
  }
169
-
170
- h1, h2 {
171
- color: #5e35b1;
172
- margin: 15px 0;
173
- text-align: center;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  }
175
- img {
176
- width: 100px;
177
- height: 100px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  }
179
  """
180
 
@@ -185,34 +348,23 @@ with gr.Blocks(css=css_tech_theme) as demo:
185
  <h1>πŸ† Mobile-MMLU Benchmark Competition</h1>
186
  <h2>🌟 Welcome to the Competition</h2>
187
  <p>
188
- Welcome to the Mobile-MMLU Benchmark Competition. Here you can submit your predictions,
189
  view the leaderboard, and track your performance!
190
  </p>
191
  <hr>
192
  </div>
193
  """)
194
 
195
-
196
  with gr.Tabs(elem_id="tabs"):
197
  with gr.TabItem("πŸ“– Overview"):
198
  gr.Markdown("""
199
- **Welcome to the Mobile-MMLU Benchmark Competition! Evaluate mobile-compatible Large Language Models (LLMs) on 16,186 scenario-based and factual questions across 80 fields**.
200
- ---
201
- ## What is Mobile-MMLU?
202
- Mobile-MMLU is a benchmark designed to test the capabilities of LLMs optimized for mobile use. Contribute to advancing mobile AI systems by competing to achieve the highest accuracy.
203
- ---
204
- ## How It Works
205
- 1. **Download the Dataset**
206
- Access the dataset and instructions on our [GitHub page](https://github.com/your-github-repo).
207
- 2. **Generate Predictions**
208
- Use your LLM to answer the dataset questions. Format your predictions as a CSV file.
209
- 3. **Submit Predictions**
210
- Upload your predictions on this platform.
211
- 4. **Evaluation**
212
- Submissions are scored on accuracy.
213
- 5. **Leaderboard**
214
- View real-time rankings on the leaderboard.
215
- ---
216
  """)
217
 
218
  with gr.TabItem("πŸ“€ Submission"):
@@ -229,10 +381,7 @@ with gr.Blocks(css=css_tech_theme) as demo:
229
 
230
  def handle_evaluation(file, model_name, add_to_leaderboard):
231
  status, leaderboard = evaluate_predictions(file, model_name, add_to_leaderboard)
232
- if leaderboard.empty:
233
- overall_accuracy = 0
234
- else:
235
- overall_accuracy = leaderboard.iloc[-1]["Overall Accuracy"]
236
  return status, overall_accuracy
237
 
238
  eval_button.click(
@@ -255,6 +404,6 @@ with gr.Blocks(css=css_tech_theme) as demo:
255
  outputs=[leaderboard_table],
256
  )
257
 
258
- gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
259
 
260
- demo.launch()
 
148
  # Function to set default mode
149
  import gradio as gr
150
 
151
+ # # Ensure CSS is correctly defined
152
+ # css_tech_theme = """
153
+ # body {
154
+ # background-color: #f4f6fa;
155
+ # color: #333333;
156
+ # font-family: 'Roboto', sans-serif;
157
+ # line-height: 1.8;
158
+ # }
159
+
160
+ # .center-content {
161
+ # display: flex;
162
+ # flex-direction: column;
163
+ # align-items: center;
164
+ # justify-content: center;
165
+ # text-align: center;
166
+ # margin: 30px 0;
167
+ # padding: 20px;
168
+ # }
169
+
170
+ # h1, h2 {
171
+ # color: #5e35b1;
172
+ # margin: 15px 0;
173
+ # text-align: center;
174
+ # }
175
+ # img {
176
+ # width: 100px;
177
+ # height: 100px;
178
+ # }
179
+ # """
180
+
181
+ # # Create the Gradio Interface
182
+ # with gr.Blocks(css=css_tech_theme) as demo:
183
+ # gr.Markdown("""
184
+ # <div class="center-content">
185
+ # <h1>πŸ† Mobile-MMLU Benchmark Competition</h1>
186
+ # <h2>🌟 Welcome to the Competition</h2>
187
+ # <p>
188
+ # Welcome to the Mobile-MMLU Benchmark Competition. Here you can submit your predictions,
189
+ # view the leaderboard, and track your performance!
190
+ # </p>
191
+ # <hr>
192
+ # </div>
193
+ # """)
194
+
195
+
196
+ # with gr.Tabs(elem_id="tabs"):
197
+ # with gr.TabItem("πŸ“– Overview"):
198
+ # gr.Markdown("""
199
+ # **Welcome to the Mobile-MMLU Benchmark Competition! Evaluate mobile-compatible Large Language Models (LLMs) on 16,186 scenario-based and factual questions across 80 fields**.
200
+ # ---
201
+ # ## What is Mobile-MMLU?
202
+ # Mobile-MMLU is a benchmark designed to test the capabilities of LLMs optimized for mobile use. Contribute to advancing mobile AI systems by competing to achieve the highest accuracy.
203
+ # ---
204
+ # ## How It Works
205
+ # 1. **Download the Dataset**
206
+ # Access the dataset and instructions on our [GitHub page](https://github.com/your-github-repo).
207
+ # 2. **Generate Predictions**
208
+ # Use your LLM to answer the dataset questions. Format your predictions as a CSV file.
209
+ # 3. **Submit Predictions**
210
+ # Upload your predictions on this platform.
211
+ # 4. **Evaluation**
212
+ # Submissions are scored on accuracy.
213
+ # 5. **Leaderboard**
214
+ # View real-time rankings on the leaderboard.
215
+ # ---
216
+ # """)
217
+
218
+ # with gr.TabItem("πŸ“€ Submission"):
219
+ # with gr.Row():
220
+ # file_input = gr.File(label="Upload Prediction CSV", file_types=[".csv"], interactive=True)
221
+ # model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")
222
+
223
+ # with gr.Row():
224
+ # overall_accuracy_display = gr.Number(label="Overall Accuracy", interactive=False)
225
+ # add_to_leaderboard_checkbox = gr.Checkbox(label="Add to Leaderboard?", value=True)
226
+
227
+ # eval_button = gr.Button("Evaluate")
228
+ # eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
229
+
230
+ # def handle_evaluation(file, model_name, add_to_leaderboard):
231
+ # status, leaderboard = evaluate_predictions(file, model_name, add_to_leaderboard)
232
+ # if leaderboard.empty:
233
+ # overall_accuracy = 0
234
+ # else:
235
+ # overall_accuracy = leaderboard.iloc[-1]["Overall Accuracy"]
236
+ # return status, overall_accuracy
237
+
238
+ # eval_button.click(
239
+ # handle_evaluation,
240
+ # inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
241
+ # outputs=[eval_status, overall_accuracy_display],
242
+ # )
243
+
244
+ # with gr.TabItem("πŸ… Leaderboard"):
245
+ # leaderboard_table = gr.Dataframe(
246
+ # value=load_leaderboard(),
247
+ # label="Leaderboard",
248
+ # interactive=False,
249
+ # wrap=True,
250
+ # )
251
+ # refresh_button = gr.Button("Refresh Leaderboard")
252
+ # refresh_button.click(
253
+ # lambda: load_leaderboard(),
254
+ # inputs=[],
255
+ # outputs=[leaderboard_table],
256
+ # )
257
+
258
+ # gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
259
+
260
+ # demo.launch()
261
+
262
+ import gradio as gr
263
+
264
+ # Custom CSS to match website style
265
  css_tech_theme = """
266
  body {
 
 
267
  font-family: 'Roboto', sans-serif;
268
+ margin: 0;
269
+ padding: 0;
270
+ color: #333;
271
+ background: #f4f6fa;
272
  }
 
273
  .center-content {
274
  display: flex;
275
  flex-direction: column;
276
  align-items: center;
277
  justify-content: center;
278
  text-align: center;
 
279
  padding: 20px;
280
+ background: linear-gradient(135deg, #6a1b9a, #64b5f6);
281
+ color: #ffffff;
282
+ border-radius: 10px;
283
+ margin: 20px;
284
  }
285
+ .center-content h1, .center-content h2 {
286
+ margin: 10px 0;
287
+ color: #ffffff;
288
+ }
289
+ .center-content p {
290
+ font-size: 1.2em;
291
+ line-height: 1.8;
292
+ color: #e1e8f0;
293
+ }
294
+ .center-content hr {
295
+ border: 1px solid #ffffff;
296
+ width: 80%;
297
+ margin: 20px 0;
298
+ }
299
+ .tabs {
300
+ display: flex;
301
+ justify-content: center;
302
+ margin: 20px 0;
303
+ }
304
+ .tab-button {
305
+ font-size: 1em;
306
+ padding: 10px 20px;
307
+ border: none;
308
+ background: #6a1b9a;
309
+ color: white;
310
+ cursor: pointer;
311
+ margin-right: 10px;
312
+ }
313
+ .tab-button.active {
314
+ background: #64b5f6;
315
+ }
316
+ .tab-content {
317
+ display: none;
318
+ padding: 20px;
319
+ background: #ffffff;
320
+ border-radius: 10px;
321
+ box-shadow: 0 4px 15px rgba(0, 0, 0, 0.1);
322
  }
323
+ .tab-content.active {
324
+ display: block;
325
+ }
326
+ #leaderboard {
327
+ max-width: 100%;
328
+ margin: 20px auto;
329
+ border-radius: 10px;
330
+ overflow: hidden;
331
+ border: 1px solid #e5eff2;
332
+ background: #f9f9f9;
333
+ }
334
+ footer {
335
+ text-align: center;
336
+ padding: 20px;
337
+ background: #8e44ad;
338
+ color: #ffffff;
339
+ border-top: 5px solid #64b5f6;
340
+ margin-top: 20px;
341
  }
342
  """
343
 
 
348
  <h1>πŸ† Mobile-MMLU Benchmark Competition</h1>
349
  <h2>🌟 Welcome to the Competition</h2>
350
  <p>
351
+ Welcome to the Mobile-MMLU Benchmark Competition. Submit your predictions,
352
  view the leaderboard, and track your performance!
353
  </p>
354
  <hr>
355
  </div>
356
  """)
357
 
 
358
  with gr.Tabs(elem_id="tabs"):
359
  with gr.TabItem("πŸ“– Overview"):
360
  gr.Markdown("""
361
+ <div class="tab-content active">
362
+ <h2>About the Competition</h2>
363
+ <p>
364
+ **Mobile-MMLU** evaluates mobile-optimized LLMs on 16,186 scenario-based and factual questions across 80 fields.
365
+ <br><br> Test your model, submit predictions, and climb the leaderboard!
366
+ </p>
367
+ </div>
 
 
 
 
 
 
 
 
 
 
368
  """)
369
 
370
  with gr.TabItem("πŸ“€ Submission"):
 
381
 
382
  def handle_evaluation(file, model_name, add_to_leaderboard):
383
  status, leaderboard = evaluate_predictions(file, model_name, add_to_leaderboard)
384
+ overall_accuracy = leaderboard.iloc[-1]["Overall Accuracy"] if not leaderboard.empty else 0
 
 
 
385
  return status, overall_accuracy
386
 
387
  eval_button.click(
 
404
  outputs=[leaderboard_table],
405
  )
406
 
407
+ gr.Markdown("<footer>Mobile-MMLU Competition | Last Updated: December 2024</footer>")
408
 
409
+ demo.launch()