Update app.py
Browse files
app.py
CHANGED
@@ -144,7 +144,6 @@ def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
|
|
144 |
|
145 |
initialize_leaderboard_file()
|
146 |
|
147 |
-
|
148 |
# Function to set default mode
|
149 |
css_tech_theme = """
|
150 |
body {
|
@@ -186,6 +185,19 @@ button:hover {
|
|
186 |
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
|
187 |
}
|
188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
.dataframe {
|
190 |
color: #333333;
|
191 |
background-color: #ffffff;
|
@@ -202,17 +214,17 @@ with gr.Blocks(css=css_tech_theme) as demo:
|
|
202 |
gr.Markdown("""
|
203 |
# π Mobile-MMLU Benchmark Competition
|
204 |
### π Welcome to the Competition Overview
|
205 |
-

|
210 |
|
211 |
-
with gr.Tabs():
|
212 |
-
with gr.TabItem("π Overview"):
|
213 |
gr.Markdown("""
|
214 |
## Overview
|
215 |
-
Welcome to the Mobile-MMLU Benchmark Competition
|
216 |
---
|
217 |
### What is Mobile-MMLU?
|
218 |
Mobile-MMLU is a benchmark designed to test the capabilities of LLMs optimized for mobile use. Contribute to advancing mobile AI systems by competing to achieve the highest accuracy.
|
@@ -245,7 +257,7 @@ For support, email: [Insert Email Address]
|
|
245 |
---
|
246 |
""")
|
247 |
|
248 |
-
with gr.TabItem("π€ Submission"):
|
249 |
with gr.Row():
|
250 |
file_input = gr.File(label="π Upload Prediction CSV", file_types=[".csv"], interactive=True)
|
251 |
model_name_input = gr.Textbox(label="ποΈ Model Name", placeholder="Enter your model name")
|
@@ -263,7 +275,7 @@ For support, email: [Insert Email Address]
|
|
263 |
outputs=[eval_status, overall_accuracy_display],
|
264 |
)
|
265 |
|
266 |
-
with gr.TabItem("π
Leaderboard"):
|
267 |
leaderboard_table = gr.Dataframe(
|
268 |
value=load_leaderboard(),
|
269 |
label="Leaderboard",
|
@@ -280,5 +292,3 @@ For support, email: [Insert Email Address]
|
|
280 |
gr.Markdown(f"**Last updated:** {LAST_UPDATED}")
|
281 |
|
282 |
demo.launch()
|
283 |
-
|
284 |
-
|
|
|
144 |
|
145 |
initialize_leaderboard_file()
|
146 |
|
|
|
147 |
# Function to set default mode
|
148 |
css_tech_theme = """
|
149 |
body {
|
|
|
185 |
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
|
186 |
}
|
187 |
|
188 |
+
.tabs {
|
189 |
+
margin-bottom: 15px;
|
190 |
+
gap: 10px;
|
191 |
+
}
|
192 |
+
|
193 |
+
.tab-item {
|
194 |
+
background-color: #ece2f4;
|
195 |
+
border-radius: 6px;
|
196 |
+
padding: 10px;
|
197 |
+
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
|
198 |
+
margin: 5px;
|
199 |
+
}
|
200 |
+
|
201 |
.dataframe {
|
202 |
color: #333333;
|
203 |
background-color: #ffffff;
|
|
|
214 |
gr.Markdown("""
|
215 |
# π Mobile-MMLU Benchmark Competition
|
216 |
### π Welcome to the Competition Overview
|
217 |
+

|
218 |
---
|
219 |
+
Welcome to the **Mobile-MMLU Benchmark Competition**. Here you can submit your predictions, view the leaderboard, and track your performance.
|
220 |
---
|
221 |
""")
|
222 |
|
223 |
+
with gr.Tabs(elem_id="tabs"):
|
224 |
+
with gr.TabItem("π Overview", elem_classes=["tab-item"]):
|
225 |
gr.Markdown("""
|
226 |
## Overview
|
227 |
+
Welcome to the **Mobile-MMLU Benchmark Competition**! Evaluate mobile-compatible Large Language Models (LLMs) on **16,186 scenario-based and factual questions** across **80 fields**.
|
228 |
---
|
229 |
### What is Mobile-MMLU?
|
230 |
Mobile-MMLU is a benchmark designed to test the capabilities of LLMs optimized for mobile use. Contribute to advancing mobile AI systems by competing to achieve the highest accuracy.
|
|
|
257 |
---
|
258 |
""")
|
259 |
|
260 |
+
with gr.TabItem("π€ Submission", elem_classes=["tab-item"]):
|
261 |
with gr.Row():
|
262 |
file_input = gr.File(label="π Upload Prediction CSV", file_types=[".csv"], interactive=True)
|
263 |
model_name_input = gr.Textbox(label="ποΈ Model Name", placeholder="Enter your model name")
|
|
|
275 |
outputs=[eval_status, overall_accuracy_display],
|
276 |
)
|
277 |
|
278 |
+
with gr.TabItem("π
Leaderboard", elem_classes=["tab-item"]):
|
279 |
leaderboard_table = gr.Dataframe(
|
280 |
value=load_leaderboard(),
|
281 |
label="Leaderboard",
|
|
|
292 |
gr.Markdown(f"**Last updated:** {LAST_UPDATED}")
|
293 |
|
294 |
demo.launch()
|
|
|
|