SondosMB commited on
Commit
7bdeca8
Β·
verified Β·
1 Parent(s): 141cb13

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -71
app.py CHANGED
@@ -309,94 +309,108 @@ def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
309
 
310
  initialize_leaderboard_file()
311
 
312
- with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
  gr.Markdown("""
314
- # Competition Title
315
- ### Welcome to the Competition Overview
316
  ![Competition Logo](mobile_mmlu_sd.jpeg)
317
- Here you can submit your predictions, view the leaderboard, and track your performance!
 
 
318
  """)
319
 
320
  with gr.Tabs():
321
  with gr.TabItem("πŸ“– Overview"):
322
  gr.Markdown("""
323
- ## Overview
324
- # Welcome to the Mobile-MMLU Benchmark Competition
325
-
326
- Evaluate the performance of mobile-compatible Large Language Models (LLMs) on 16,186 scenario-based and factual questions across 80 fields. Compete to showcase your model’s accuracy for real-world mobile scenarios.
327
-
328
- ## What is Mobile-MMLU?
329
-
330
- Mobile-MMLU is a benchmark designed to test the capabilities of LLMs optimized for mobile use. By participating in this competition, you contribute to advancing mobile intelligence benchmarks and shaping the future of mobile-compatible AI systems.
331
-
332
  ---
333
-
334
- ## How It Works
335
-
336
- 1. **Download the Dataset**
337
- Access the dataset and detailed generation instructions on our [GitHub page](https://github.com/your-github-repo).
338
-
339
- 2. **Generate Predictions**
340
- Use your LLM to answer the questions and format your predictions as a CSV file with the following structure as written on our GitHub page :
341
-
342
- 3. **Submit Predictions**
343
- Upload your predictions via the submission portal.
344
-
345
- 4. **Evaluation**
346
- Your submission will be scored on accuracy
347
-
348
- 5. **Leaderboard**
349
- Compare your results against other participants on the live leaderboard.
350
 
351
  ---
352
-
353
- ## Competition Tasks
354
-
355
- Participants are tasked with generating predictions for the dataset and optimizing their models for:
356
- - **Accuracy**: Correctly answering questions across diverse fields.
357
  ---
 
 
 
 
358
 
359
-
360
- ## Get Started
361
-
362
- 1. **Prepare Your Model**
363
- Refer to our [GitHub page](https://github.com/your-github-repo) for dataset access and response generation instructions.
364
-
365
- 2. **Submit Predictions**
366
- Format your submission as specified in the rules.
367
-
368
- 3. **Track Progress**
369
- Check the leaderboard for real-time rankings.
370
-
371
  ---
372
-
373
- ## Contact Us
374
-
375
- For questions or support, contact us at: [Insert Email Address]
376
- """)
377
 
378
  with gr.TabItem("πŸ“€ Submission"):
379
  with gr.Row():
380
- file_input = gr.File(label="Upload Prediction CSV", file_types=[".csv"], interactive=True)
381
- model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")
382
 
383
  with gr.Row():
384
- overall_accuracy_display = gr.Number(label="Overall Accuracy", interactive=False)
385
- add_to_leaderboard_checkbox = gr.Checkbox(label="Add to Leaderboard?", value=True)
386
 
387
- eval_button = gr.Button("Evaluate")
388
- eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
389
-
390
- def handle_evaluation(file, model_name, add_to_leaderboard):
391
- status, leaderboard = evaluate_predictions(file, model_name, add_to_leaderboard)
392
- if leaderboard.empty:
393
- overall_accuracy = 0
394
- else:
395
- overall_accuracy = leaderboard.iloc[-1]["Overall Accuracy"]
396
- return status, overall_accuracy
397
 
398
  eval_button.click(
399
- handle_evaluation,
400
  inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
401
  outputs=[eval_status, overall_accuracy_display],
402
  )
@@ -404,18 +418,17 @@ For questions or support, contact us at: [Insert Email Address]
404
  with gr.TabItem("πŸ… Leaderboard"):
405
  leaderboard_table = gr.Dataframe(
406
  value=load_leaderboard(),
407
- label="Leaderboard",
408
  interactive=False,
409
  wrap=True,
410
  )
411
- refresh_button = gr.Button("Refresh Leaderboard")
412
  refresh_button.click(
413
  lambda: load_leaderboard(),
414
  inputs=[],
415
  outputs=[leaderboard_table],
416
  )
417
 
418
- gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
419
 
420
  demo.launch()
421
-
 
309
 
310
  initialize_leaderboard_file()
311
 
312
+
313
+ # Function to set default mode
314
+ css_dark_mode = """
315
+ body {
316
+ background-color: #1e1e1e;
317
+ color: #ffffff;
318
+ }
319
+
320
+ a {
321
+ color: #4caf50;
322
+ }
323
+
324
+ a:hover {
325
+ color: #81c784;
326
+ text-decoration: underline;
327
+ }
328
+
329
+ button {
330
+ background-color: #4caf50;
331
+ color: #ffffff;
332
+ border-radius: 5px;
333
+ padding: 10px;
334
+ }
335
+
336
+ button:hover {
337
+ background-color: #81c784;
338
+ }
339
+
340
+ .input-row, .tab-content {
341
+ background-color: #333333;
342
+ border-radius: 8px;
343
+ padding: 15px;
344
+ }
345
+
346
+ .dataframe {
347
+ color: #ffffff;
348
+ background-color: #2e2e2e;
349
+ border: 1px solid #4caf50;
350
+ }
351
+ """
352
+
353
+ with gr.Blocks(css=css_dark_mode) as demo:
354
  gr.Markdown("""
355
+ # πŸ† **Competition Title**
356
+ ### 🌟 **Welcome to the Competition Overview**
357
  ![Competition Logo](mobile_mmlu_sd.jpeg)
358
+ ---
359
+ Welcome to the **Mobile-MMLU Benchmark Competition**. Here you can submit your predictions, view the leaderboard, and track your performance!
360
+ ---
361
  """)
362
 
363
  with gr.Tabs():
364
  with gr.TabItem("πŸ“– Overview"):
365
  gr.Markdown("""
366
+ ## πŸ“˜ Overview
367
+ Welcome to the **Mobile-MMLU Benchmark Competition**! Evaluate mobile-compatible Large Language Models (LLMs) on **16,186 scenario-based and factual questions** across **80 fields**.
 
 
 
 
 
 
 
368
  ---
369
+ ### 🌐 **What is Mobile-MMLU?**
370
+ Mobile-MMLU is a benchmark designed to test the capabilities of LLMs optimized for mobile use. Contribute to advancing mobile AI systems by competing to achieve the highest accuracy.
371
+
372
+ ### πŸ” **How It Works**
373
+ 1. **Download the Dataset**
374
+ Access the dataset and instructions on our [GitHub page](https://github.com/your-github-repo).
375
+ 2. **Generate Predictions**
376
+ Use your LLM to answer the dataset questions. Format your predictions as a CSV file.
377
+ 3. **Submit Predictions**
378
+ Upload your predictions on this platform.
379
+ 4. **Evaluation**
380
+ Submissions are scored on accuracy.
381
+ 5. **Leaderboard**
382
+ View real-time rankings on the leaderboard.
 
 
 
383
 
384
  ---
385
+ ### πŸ† **Competition Tasks**
386
+ Participants must:
387
+ - Optimize their models for **accuracy**.
388
+ - Answer diverse field questions effectively.
 
389
  ---
390
+ ### πŸš€ **Get Started**
391
+ 1. Prepare your model using resources on our [GitHub page](https://github.com/your-github-repo).
392
+ 2. Submit predictions in the required format.
393
+ 3. Track your progress on the leaderboard.
394
 
395
+ ### πŸ“§ **Contact Us**
396
+ For support, email: [Insert Email Address]
 
 
 
 
 
 
 
 
 
 
397
  ---
398
+ """)
 
 
 
 
399
 
400
  with gr.TabItem("πŸ“€ Submission"):
401
  with gr.Row():
402
+ file_input = gr.File(label="πŸ“‚ Upload Prediction CSV", file_types=[".csv"], interactive=True)
403
+ model_name_input = gr.Textbox(label="πŸ–‹οΈ Model Name", placeholder="Enter your model name")
404
 
405
  with gr.Row():
406
+ overall_accuracy_display = gr.Number(label="πŸ… Overall Accuracy", interactive=False)
407
+ add_to_leaderboard_checkbox = gr.Checkbox(label="πŸ“Š Add to Leaderboard?", value=True)
408
 
409
+ eval_button = gr.Button("Evaluate", elem_id="evaluate-button")
410
+ eval_status = gr.Textbox(label="πŸ“’ Evaluation Status", interactive=False)
 
 
 
 
 
 
 
 
411
 
412
  eval_button.click(
413
+ evaluate_predictions,
414
  inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
415
  outputs=[eval_status, overall_accuracy_display],
416
  )
 
418
  with gr.TabItem("πŸ… Leaderboard"):
419
  leaderboard_table = gr.Dataframe(
420
  value=load_leaderboard(),
421
+ label="πŸ† Leaderboard",
422
  interactive=False,
423
  wrap=True,
424
  )
425
+ refresh_button = gr.Button("πŸ”„ Refresh Leaderboard")
426
  refresh_button.click(
427
  lambda: load_leaderboard(),
428
  inputs=[],
429
  outputs=[leaderboard_table],
430
  )
431
 
432
+ gr.Markdown(f"**πŸ“… Last updated:** {LAST_UPDATED}")
433
 
434
  demo.launch()