Spaces:

MBZUAI-LLM
/

Mobile-MMLU-Challenge

Runtime error

App Files Files Community

SondosMB commited on Dec 22, 2024

Commit

ea94a6e

verified ·

1 Parent(s): 7ccc1b7

Update app.py

Browse files

Files changed (1) hide show

app.py +188 -39

app.py CHANGED Viewed

@@ -148,33 +148,196 @@ initialize_leaderboard_file()
 # Function to set default mode
 import gradio as gr
-# Ensure CSS is correctly defined
 css_tech_theme = """
 body {
-    background-color: #f4f6fa;
-    color: #333333;
     font-family: 'Roboto', sans-serif;
-    line-height: 1.8;
 }
 .center-content {
     display: flex;
     flex-direction: column;
     align-items: center;
     justify-content: center;
     text-align: center;
-    margin: 30px 0;
     padding: 20px;
 }
-h1, h2 {
-    color: #5e35b1;
-    margin: 15px 0;
-    text-align: center;
 }
-img {
-    width: 100px;
-    height: 100px;
 }
 """
@@ -185,34 +348,23 @@ with gr.Blocks(css=css_tech_theme) as demo:
         <h1>🏆 Mobile-MMLU Benchmark Competition</h1>
         <h2>🌟 Welcome to the Competition</h2>
         <p>
-            Welcome to the Mobile-MMLU Benchmark Competition. Here you can submit your predictions,
             view the leaderboard, and track your performance!
         </p>
         <hr>
     </div>
     """)
     with gr.Tabs(elem_id="tabs"):
         with gr.TabItem("📖 Overview"):
             gr.Markdown("""
-            **Welcome to the Mobile-MMLU Benchmark Competition! Evaluate mobile-compatible Large Language Models (LLMs) on 16,186 scenario-based and factual questions across 80 fields**.
-            ---
-            ## What is Mobile-MMLU?
-            Mobile-MMLU is a benchmark designed to test the capabilities of LLMs optimized for mobile use. Contribute to advancing mobile AI systems by competing to achieve the highest accuracy.
-            ---
-            ## How It Works
-            1. **Download the Dataset**
-               Access the dataset and instructions on our [GitHub page](https://github.com/your-github-repo).
-            2. **Generate Predictions**
-               Use your LLM to answer the dataset questions. Format your predictions as a CSV file.
-            3. **Submit Predictions**
-               Upload your predictions on this platform.
-            4. **Evaluation**
-               Submissions are scored on accuracy.
-            5. **Leaderboard**
-               View real-time rankings on the leaderboard.
-            ---
             """)
         with gr.TabItem("📤 Submission"):
@@ -229,10 +381,7 @@ with gr.Blocks(css=css_tech_theme) as demo:
             def handle_evaluation(file, model_name, add_to_leaderboard):
                 status, leaderboard = evaluate_predictions(file, model_name, add_to_leaderboard)
-                if leaderboard.empty:
-                    overall_accuracy = 0
-                else:
-                    overall_accuracy = leaderboard.iloc[-1]["Overall Accuracy"]
                 return status, overall_accuracy
             eval_button.click(
@@ -255,6 +404,6 @@ with gr.Blocks(css=css_tech_theme) as demo:
                 outputs=[leaderboard_table],
             )
-    gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
-demo.launch()

 # Function to set default mode
 import gradio as gr
+# # Ensure CSS is correctly defined
+# css_tech_theme = """
+# body {
+#     background-color: #f4f6fa;
+#     color: #333333;
+#     font-family: 'Roboto', sans-serif;
+#     line-height: 1.8;
+# }
+# .center-content {
+#     display: flex;
+#     flex-direction: column;
+#     align-items: center;
+#     justify-content: center;
+#     text-align: center;
+#     margin: 30px 0;
+#     padding: 20px;
+# }
+# h1, h2 {
+#     color: #5e35b1;
+#     margin: 15px 0;
+#     text-align: center;
+# }
+# img {
+#     width: 100px;
+#     height: 100px;
+# }
+# """
+# # Create the Gradio Interface
+# with gr.Blocks(css=css_tech_theme) as demo:
+#     gr.Markdown("""
+#     <div class="center-content">
+#         <h1>🏆 Mobile-MMLU Benchmark Competition</h1>
+#         <h2>🌟 Welcome to the Competition</h2>
+#         <p>
+#             Welcome to the Mobile-MMLU Benchmark Competition. Here you can submit your predictions,
+#             view the leaderboard, and track your performance!
+#         </p>
+#         <hr>
+#     </div>
+#     """)
+#     with gr.Tabs(elem_id="tabs"):
+#         with gr.TabItem("📖 Overview"):
+#             gr.Markdown("""
+#             **Welcome to the Mobile-MMLU Benchmark Competition! Evaluate mobile-compatible Large Language Models (LLMs) on 16,186 scenario-based and factual questions across 80 fields**.
+#             ---
+#             ## What is Mobile-MMLU?
+#             Mobile-MMLU is a benchmark designed to test the capabilities of LLMs optimized for mobile use. Contribute to advancing mobile AI systems by competing to achieve the highest accuracy.
+#             ---
+#             ## How It Works
+#             1. **Download the Dataset**
+#                Access the dataset and instructions on our [GitHub page](https://github.com/your-github-repo).
+#             2. **Generate Predictions**
+#                Use your LLM to answer the dataset questions. Format your predictions as a CSV file.
+#             3. **Submit Predictions**
+#                Upload your predictions on this platform.
+#             4. **Evaluation**
+#                Submissions are scored on accuracy.
+#             5. **Leaderboard**
+#                View real-time rankings on the leaderboard.
+#             ---
+#             """)
+#         with gr.TabItem("📤 Submission"):
+#             with gr.Row():
+#                 file_input = gr.File(label="Upload Prediction CSV", file_types=[".csv"], interactive=True)
+#                 model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")
+#             with gr.Row():
+#                 overall_accuracy_display = gr.Number(label="Overall Accuracy", interactive=False)
+#                 add_to_leaderboard_checkbox = gr.Checkbox(label="Add to Leaderboard?", value=True)
+#             eval_button = gr.Button("Evaluate")
+#             eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
+#             def handle_evaluation(file, model_name, add_to_leaderboard):
+#                 status, leaderboard = evaluate_predictions(file, model_name, add_to_leaderboard)
+#                 if leaderboard.empty:
+#                     overall_accuracy = 0
+#                 else:
+#                     overall_accuracy = leaderboard.iloc[-1]["Overall Accuracy"]
+#                 return status, overall_accuracy
+#             eval_button.click(
+#                 handle_evaluation,
+#                 inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
+#                 outputs=[eval_status, overall_accuracy_display],
+#             )
+#         with gr.TabItem("🏅 Leaderboard"):
+#             leaderboard_table = gr.Dataframe(
+#                 value=load_leaderboard(),
+#                 label="Leaderboard",
+#                 interactive=False,
+#                 wrap=True,
+#             )
+#             refresh_button = gr.Button("Refresh Leaderboard")
+#             refresh_button.click(
+#                 lambda: load_leaderboard(),
+#                 inputs=[],
+#                 outputs=[leaderboard_table],
+#             )
+#     gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
+# demo.launch()
+import gradio as gr
+# Custom CSS to match website style
 css_tech_theme = """
 body {
     font-family: 'Roboto', sans-serif;
+    margin: 0;
+    padding: 0;
+    color: #333;
+    background: #f4f6fa;
 }
 .center-content {
     display: flex;
     flex-direction: column;
     align-items: center;
     justify-content: center;
     text-align: center;
     padding: 20px;
+    background: linear-gradient(135deg, #6a1b9a, #64b5f6);
+    color: #ffffff;
+    border-radius: 10px;
+    margin: 20px;
 }
+.center-content h1, .center-content h2 {
+    margin: 10px 0;
+    color: #ffffff;
+}
+.center-content p {
+    font-size: 1.2em;
+    line-height: 1.8;
+    color: #e1e8f0;
+}
+.center-content hr {
+    border: 1px solid #ffffff;
+    width: 80%;
+    margin: 20px 0;
+}
+.tabs {
+    display: flex;
+    justify-content: center;
+    margin: 20px 0;
+}
+.tab-button {
+    font-size: 1em;
+    padding: 10px 20px;
+    border: none;
+    background: #6a1b9a;
+    color: white;
+    cursor: pointer;
+    margin-right: 10px;
+}
+.tab-button.active {
+    background: #64b5f6;
+}
+.tab-content {
+    display: none;
+    padding: 20px;
+    background: #ffffff;
+    border-radius: 10px;
+    box-shadow: 0 4px 15px rgba(0, 0, 0, 0.1);
 }
+.tab-content.active {
+    display: block;
+}
+#leaderboard {
+    max-width: 100%;
+    margin: 20px auto;
+    border-radius: 10px;
+    overflow: hidden;
+    border: 1px solid #e5eff2;
+    background: #f9f9f9;
+}
+footer {
+    text-align: center;
+    padding: 20px;
+    background: #8e44ad;
+    color: #ffffff;
+    border-top: 5px solid #64b5f6;
+    margin-top: 20px;
 }
 """
         <h1>🏆 Mobile-MMLU Benchmark Competition</h1>
         <h2>🌟 Welcome to the Competition</h2>
         <p>
+            Welcome to the Mobile-MMLU Benchmark Competition. Submit your predictions,
             view the leaderboard, and track your performance!
         </p>
         <hr>
     </div>
     """)
     with gr.Tabs(elem_id="tabs"):
         with gr.TabItem("📖 Overview"):
             gr.Markdown("""
+            <div class="tab-content active">
+                <h2>About the Competition</h2>
+                <p>
+                **Mobile-MMLU** evaluates mobile-optimized LLMs on 16,186 scenario-based and factual questions across 80 fields.
+                <br><br> Test your model, submit predictions, and climb the leaderboard!
+                </p>
+            </div>
             """)
         with gr.TabItem("📤 Submission"):
             def handle_evaluation(file, model_name, add_to_leaderboard):
                 status, leaderboard = evaluate_predictions(file, model_name, add_to_leaderboard)
+                overall_accuracy = leaderboard.iloc[-1]["Overall Accuracy"] if not leaderboard.empty else 0
                 return status, overall_accuracy
             eval_button.click(
                 outputs=[leaderboard_table],
             )
+    gr.Markdown("<footer>Mobile-MMLU Competition | Last Updated: December 2024</footer>")
+demo.launch()