Spaces:

Omartificial-Intelligence-Space
/

Arabic-MMMLU-Leaderborad

Running

App Files Files Community

Omartificial-Intelligence-Space commited on Feb 27

Commit

64f6484

verified ·

1 Parent(s): 8767411

update app.py

Browse files

Files changed (1) hide show

app.py +47 -256

app.py CHANGED Viewed

@@ -1,280 +1,71 @@
 import gradio as gr
-from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
 import pandas as pd
 import os
 import json
-from apscheduler.schedulers.background import BackgroundScheduler
-from huggingface_hub import snapshot_download
-from src.about import (
-    CITATION_BUTTON_LABEL,
-    CITATION_BUTTON_TEXT,
-    EVALUATION_QUEUE_TEXT,
-    INTRODUCTION_TEXT,
-    LLM_BENCHMARKS_TEXT,
-    TITLE,
-)
-from src.display.css_html_js import custom_css
-from src.display.utils import (
-    COLUMNS,
-    COLS,
-    BENCHMARK_COLS,
-    EVAL_COLS,
-    EVAL_TYPES,
-    ModelType,
-    WeightType,
-    Precision
-)
-from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
-from src.populate import get_evaluation_queue_df, get_leaderboard_df
-from src.submission.submit import add_new_eval
-def restart_space():
-    try:
-        API.restart_space(repo_id=REPO_ID)
-    except Exception as e:
-        print(f"Error restarting space: {e}")
 # Ensure directories exist
-os.makedirs(EVAL_REQUESTS_PATH, exist_ok=True)
 os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)
-### Space initialization
-try:
-    print(f"Downloading evaluation requests from {QUEUE_REPO} to {EVAL_REQUESTS_PATH}")
-    snapshot_download(
-        repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset",
-        tqdm_class=None, etag_timeout=30, token=TOKEN
-    )
-    print("Successfully downloaded evaluation requests")
-except Exception as e:
-    print(f"Error downloading evaluation requests: {e}")
-    # Don't restart immediately, try to continue
 try:
-    print(f"Downloading evaluation results from {RESULTS_REPO} to {EVAL_RESULTS_PATH}")
-    snapshot_download(
-        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset",
-        tqdm_class=None, etag_timeout=30, token=TOKEN
-    )
-    print("Successfully downloaded evaluation results")
-except Exception as e:
-    print(f"Error downloading evaluation results: {e}")
-    # Don't restart immediately, try to continue
-# Add fallback data in case the remote fetch fails
-fallback_data = False
-if not os.listdir(EVAL_RESULTS_PATH):
-    print("No evaluation results found. Creating sample data for testing.")
-    fallback_data = True
-    # Create a sample result file for testing
-    sample_data = {
-        "config": {
-            "model_name": "Sample Arabic Model",
-            "submitted_time": "2023-01-01",
-            "base_model": "bert-base-arabic",
-            "revision": "main",
-            "precision": "float16",
-            "weight_type": "Original",
-            "model_type": "Encoder",
-            "license": "Apache-2.0",
-            "params": 110000000,
-            "still_on_hub": True
-        },
-        "results": {
-            "average": 75.5,
-            "abstract_algebra": 70.2,
-            "anatomy": 72.5,
-            "astronomy": 80.1,
-            "business_ethics": 68.3,
-            "clinical_knowledge": 75.0,
-            "college_biology": 77.4,
-            "college_chemistry": 74.2
-        }
-    }
-    with open(os.path.join(EVAL_RESULTS_PATH, "sample_result.json"), 'w') as f:
-        json.dump(sample_data, f)
-# Load the leaderboard DataFrame
-try:
     LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
     print("LEADERBOARD_DF Shape:", LEADERBOARD_DF.shape)
-    print("LEADERBOARD_DF Columns:", LEADERBOARD_DF.columns.tolist())
-    print("LEADERBOARD_DF Sample:", LEADERBOARD_DF.head(1).to_dict('records') if not LEADERBOARD_DF.empty else "Empty DataFrame")
-    # If DataFrame is empty even with fallback data, create a minimal sample
-    if LEADERBOARD_DF.empty and fallback_data:
-        print("Creating minimal sample data for leaderboard")
         LEADERBOARD_DF = pd.DataFrame([{
-            "model_name": "Sample Arabic LLM",
-            "submitted_time": "2023-01-01",
-            "base_model": "bert-base-arabic",
-            "revision": "main",
-            "precision": "float16",
-            "weight_type": "Original",
-            "model_type": "Encoder",
-            "license": "Apache-2.0",
-            "params": 110000000,
-            "still_on_hub": True,
             "average": 75.5,
-            "abstract_algebra": 70.2,
-            "anatomy": 72.5,
-            "astronomy": 80.1,
-            "business_ethics": 68.3,
-            "clinical_knowledge": 75.0,
-            "college_biology": 77.4,
-            "college_chemistry": 74.2
         }])
 except Exception as e:
     print(f"Error loading leaderboard data: {e}")
-    # Create a minimal sample DataFrame
     LEADERBOARD_DF = pd.DataFrame([{
         "model_name": "Error Loading Data",
         "average": 0
     }])
-# Load the evaluation queue DataFrames
-try:
-    finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
-except Exception as e:
-    print(f"Error loading evaluation queue data: {e}")
-    # Create empty DataFrames
-    finished_eval_queue_df = pd.DataFrame(columns=EVAL_COLS)
-    running_eval_queue_df = pd.DataFrame(columns=EVAL_COLS)
-    pending_eval_queue_df = pd.DataFrame(columns=EVAL_COLS)
-with gr.Blocks(css=custom_css, theme=gr.themes.Default()) as demo:
-    gr.HTML(TITLE)
-    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
-    with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab", id=0):
-            if LEADERBOARD_DF.empty:
-                gr.Markdown("No evaluations have been performed yet. The leaderboard is currently empty.")
-            else:
-                # Debug information as Markdown
-                gr.Markdown("### Leaderboard Data Debug Info")
                 gr.Markdown(f"DataFrame Shape: {LEADERBOARD_DF.shape}")
-                gr.Markdown(f"DataFrame Columns: {LEADERBOARD_DF.columns.tolist()}")
-                # Get the default columns to display
-                default_selection = [col.name for col in COLUMNS if col.displayed_by_default]
-                print("Default Selection before ensuring 'model_name':", default_selection)
-                # Ensure "model_name" is included
-                if "model_name" not in default_selection:
-                    default_selection.insert(0, "model_name")
-                    print("Default Selection after ensuring 'model_name':", default_selection)
-                # Make sure all columns exist in the DataFrame
-                for col in default_selection:
-                    if col not in LEADERBOARD_DF.columns:
-                        print(f"Warning: Column '{col}' not found in DataFrame. Adding empty column.")
-                        LEADERBOARD_DF[col] = None
-                print("LEADERBOARD_DF dtypes:\n", LEADERBOARD_DF.dtypes)
-                # Create the leaderboard component
-                leaderboard = Leaderboard(
-                    value=LEADERBOARD_DF,
-                    datatype=[col.type for col in COLUMNS],
-                    select_columns=SelectColumns(
-                        default_selection=default_selection,
-                        cant_deselect=[col.name for col in COLUMNS if col.never_hidden],
-                        label="Select Columns to Display:",
-                    ),
-                    search_columns=["model_name", "license"],
-                    hide_columns=[col.name for col in COLUMNS if col.hidden],
-                    filter_columns=[
-                        ColumnFilter("model_type", type="checkboxgroup", label="Model types"),
-                        ColumnFilter("precision", type="checkboxgroup", label="Precision"),
-                        ColumnFilter(
-                            "still_on_hub", type="boolean", label="Deleted/incomplete", default=True
-                        ),
-                    ],
-                    bool_checkboxgroup_label="Hide models",
-                    interactive=True,  # Change to True to enable interaction
-                )
-        with gr.TabItem("📝 About", elem_id="about-tab", id=1):
-            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-        with gr.TabItem("🚀 Submit here!", elem_id="submit-tab", id=2):
-            with gr.Column():
-                with gr.Row():
-                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
-                # Since the evaluation queues are empty, display a message
-                with gr.Column():
-                    gr.Markdown("Evaluations are performed immediately upon submission. There are no pending or running evaluations.")
-            with gr.Row():
-                gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
-            with gr.Row():
-                with gr.Column():
-                    model_name_textbox = gr.Textbox(label="Model name")
-                    revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
-                    model_type = gr.Dropdown(
-                        choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
-                        label="Model type",
-                        multiselect=False,
-                        value=None,
-                        interactive=True,
-                    )
-                with gr.Column():
-                    precision = gr.Dropdown(
-                        choices=[i.value for i in Precision if i != Precision.Unknown],
-                        label="Precision",
-                        multiselect=False,
-                        value="float16",
-                        interactive=True,
-                    )
-                    weight_type = gr.Dropdown(
-                        choices=[i.value for i in WeightType],
-                        label="Weights type",
-                        multiselect=False,
-                        value="Original",
-                        interactive=True,
-                    )
-                    base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
-            submit_button = gr.Button("Submit Eval")
-            submission_result = gr.Markdown()
-            submit_button.click(
-                add_new_eval,
-                [
-                    model_name_textbox,
-                    base_model_name_textbox,
-                    revision_name_textbox,
-                    precision,
-                    weight_type,
-                    model_type,
-                ],
-                submission_result,
             )
-    with gr.Row():
-        with gr.Accordion("📙 Citation", open=False):
-            citation_button = gr.Textbox(
-                value=CITATION_BUTTON_TEXT,
-                label=CITATION_BUTTON_LABEL,
-                lines=20,
-                elem_id="citation-button",
-                show_copy_button=True,
-            )
-scheduler = BackgroundScheduler()
-# Run every 30 minutes instead of every 30 seconds (1800 seconds = 30 minutes)
-scheduler.add_job(restart_space, "interval", seconds=1800)
-scheduler.start()
-# Launch with a more descriptive message
-demo.queue(default_concurrency_limit=40).launch(
-    debug=True,
-    share=False,
-    show_error=True
-)

 import gradio as gr
+from gradio_leaderboard import Leaderboard
 import pandas as pd
 import os
 import json
+from src.populate import get_leaderboard_df
+from src.display.utils import COLUMNS, COLS, BENCHMARK_COLS
+from src.envs import EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH
 # Ensure directories exist
 os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)
+# Minimal CSS to avoid conflicts
+minimal_css = """
+.container {
+    max-width: 1200px;
+    margin: 0 auto;
+}
+.header {
+    text-align: center;
+    margin-bottom: 20px;
+}
+"""
 try:
+    # Load the leaderboard DataFrame
     LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
     print("LEADERBOARD_DF Shape:", LEADERBOARD_DF.shape)
+    print("Sample row:", LEADERBOARD_DF.iloc[0].to_dict() if not LEADERBOARD_DF.empty else "Empty DataFrame")
+    # If DataFrame is empty, create a sample
+    if LEADERBOARD_DF.empty:
+        print("Creating sample data for testing")
         LEADERBOARD_DF = pd.DataFrame([{
+            "model_name": "Sample Model",
             "average": 75.5,
+            "model_type": "Encoder",
+            "precision": "float16"
         }])
 except Exception as e:
     print(f"Error loading leaderboard data: {e}")
+    # Create a minimal DataFrame
     LEADERBOARD_DF = pd.DataFrame([{
         "model_name": "Error Loading Data",
         "average": 0
     }])
+# Create a very simple app with just the leaderboard
+with gr.Blocks(css=minimal_css) as demo:
+    gr.HTML("<div class='header'><h1>ILMAAM: Index for Language Models for Arabic Assessment on Multitasks</h1></div>")
+    with gr.Tabs() as tabs:
+        with gr.TabItem("LLM Benchmark"):
+            # Add debug output
+            with gr.Accordion("Debug Info", open=True):
                 gr.Markdown(f"DataFrame Shape: {LEADERBOARD_DF.shape}")
+                gr.Markdown(f"Column Names: {', '.join(LEADERBOARD_DF.columns[:10])}...")
+            # Create a simplified version of the leaderboard
+            leaderboard = Leaderboard(
+                value=LEADERBOARD_DF,
+                interactive=True,
             )
+        with gr.TabItem("About"):
+            gr.Markdown("This is a benchmark for Arabic language models.")
+        with gr.TabItem("Submit"):
+            gr.Markdown("Submission form will be available here.")
+demo.launch(debug=True, share=False)