Update app.py
Browse files
app.py
CHANGED
@@ -313,7 +313,7 @@ with gr.Blocks() as demo:
|
|
313 |
gr.Markdown("""
|
314 |
# Competition Title
|
315 |
### Welcome to the Competition Overview
|
316 |
-

|
319 |
|
@@ -321,27 +321,79 @@ with gr.Blocks() as demo:
|
|
321 |
with gr.TabItem("📖 Overview"):
|
322 |
gr.Markdown("""
|
323 |
## Overview
|
324 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
325 |
""")
|
326 |
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
331 |
eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
|
332 |
-
|
333 |
-
value=load_leaderboard(),
|
334 |
-
label="Leaderboard (Preview)",
|
335 |
-
interactive=False,
|
336 |
-
wrap=True,
|
337 |
-
)
|
338 |
-
eval_button = gr.Button("Evaluate and Update Leaderboard")
|
339 |
eval_button.click(
|
340 |
evaluate_predictions,
|
341 |
inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
|
342 |
-
outputs=[eval_status,
|
343 |
)
|
344 |
|
|
|
345 |
with gr.TabItem("🏅 Leaderboard"):
|
346 |
leaderboard_table = gr.Dataframe(
|
347 |
value=load_leaderboard(),
|
|
|
313 |
gr.Markdown("""
|
314 |
# Competition Title
|
315 |
### Welcome to the Competition Overview
|
316 |
+

|
317 |
Here you can submit your predictions, view the leaderboard, and track your performance!
|
318 |
""")
|
319 |
|
|
|
321 |
with gr.TabItem("📖 Overview"):
|
322 |
gr.Markdown("""
|
323 |
## Overview
|
324 |
+
# Welcome to the Mobile-MMLU Benchmark Competition
|
325 |
+
|
326 |
+
Evaluate the performance of mobile-compatible Large Language Models (LLMs) on 16,186 scenario-based and factual questions across 80 fields. Compete to showcase your model’s accuracy for real-world mobile scenarios.
|
327 |
+
|
328 |
+
## What is Mobile-MMLU?
|
329 |
+
|
330 |
+
Mobile-MMLU is a benchmark designed to test the capabilities of LLMs optimized for mobile use. By participating in this competition, you contribute to advancing mobile intelligence benchmarks and shaping the future of mobile-compatible AI systems.
|
331 |
+
|
332 |
+
---
|
333 |
+
|
334 |
+
## How It Works
|
335 |
+
|
336 |
+
1. **Download the Dataset**
|
337 |
+
Access the dataset and detailed generation instructions on our [GitHub page](https://github.com/your-github-repo).
|
338 |
+
|
339 |
+
2. **Generate Predictions**
|
340 |
+
Use your LLM to answer the questions and format your predictions as a CSV file with the following structure as written on our gothub page :
|
341 |
+
|
342 |
+
3. **Submit Predictions**
|
343 |
+
Upload your predictions via the submission portal.
|
344 |
+
|
345 |
+
4. **Evaluation**
|
346 |
+
Your submission will be scored on accuracy
|
347 |
+
|
348 |
+
5. **Leaderboard**
|
349 |
+
Compare your results against other participants on the live leaderboard.
|
350 |
+
|
351 |
+
---
|
352 |
+
|
353 |
+
## Competition Tasks
|
354 |
+
|
355 |
+
Participants are tasked with generating predictions for the dataset and optimizing their models for:
|
356 |
+
- **Accuracy**: Correctly answering questions across diverse fields.
|
357 |
+
---
|
358 |
+
|
359 |
+
|
360 |
+
## Get Started
|
361 |
+
|
362 |
+
1. **Prepare Your Model**
|
363 |
+
Refer to our [GitHub page](https://github.com/your-github-repo) for dataset access and response generation instructions.
|
364 |
+
|
365 |
+
2. **Submit Predictions**
|
366 |
+
Format your submission as specified in the rules.
|
367 |
+
|
368 |
+
3. **Track Progress**
|
369 |
+
Check the leaderboard for real-time rankings.
|
370 |
+
|
371 |
+
---
|
372 |
+
|
373 |
+
## Contact Us
|
374 |
+
|
375 |
+
For questions or support, contact us at: [Insert Email Address]
|
376 |
""")
|
377 |
|
378 |
+
with gr.TabItem("📤 Submission"):
|
379 |
+
with gr.Row():
|
380 |
+
file_input = gr.File(label="Upload Prediction CSV", file_types=[".csv"], interactive=True)
|
381 |
+
model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")
|
382 |
+
|
383 |
+
with gr.Row():
|
384 |
+
overall_accuracy_display = gr.Textbox(label="Overall Accuracy", interactive=False)
|
385 |
+
add_to_leaderboard_checkbox = gr.Checkbox(label="Add to Leaderboard?", value=True)
|
386 |
+
|
387 |
+
eval_button = gr.Button("Evaluate")
|
388 |
eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
|
389 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
390 |
eval_button.click(
|
391 |
evaluate_predictions,
|
392 |
inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
|
393 |
+
outputs=[eval_status, overall_accuracy_display],
|
394 |
)
|
395 |
|
396 |
+
|
397 |
with gr.TabItem("🏅 Leaderboard"):
|
398 |
leaderboard_table = gr.Dataframe(
|
399 |
value=load_leaderboard(),
|