import gradio as gr from apscheduler.schedulers.background import BackgroundScheduler from typing import Optional import logging import sys import time from config import CONFIG from data_manager import data_manager from utils import filter_leaderboard, search_responses, plot_section_results, validate_model_submission logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def create_app() -> gr.Blocks: """Create and configure the Gradio application.""" # Pre-load data with retries to avoid startup failures def safe_get_data(): max_attempts = 3 for attempt in range(max_attempts): try: logger.info(f"Pre-loading data (attempt {attempt+1}/{max_attempts})...") # Try to access data to trigger loading families = data_manager.leaderboard_data["family"].unique().tolist() if not data_manager.leaderboard_data.empty else [] models = data_manager.leaderboard_data["model"].unique().tolist() if not data_manager.leaderboard_data.empty else [] logger.info(f"Successfully loaded data with {len(families)} families and {len(models)} models") return True except Exception as e: logger.error(f"Error pre-loading data: {e}") if attempt < max_attempts - 1: logger.info(f"Retrying in {CONFIG['dataset'].retry_delay} seconds...") time.sleep(CONFIG["dataset"].retry_delay) else: logger.warning("Using fallback data due to loading failures") return False # Try to pre-load data safe_get_data() with gr.Blocks(css=CONFIG["ui"].css, theme=CONFIG["ui"].theme) as app: gr.HTML(f"

{CONFIG['ui'].title}

") gr.Markdown(CONFIG["ui"].description) with gr.Tabs() as tabs: # Leaderboard Tab with gr.TabItem("📊 Leaderboard"): with gr.Row(): family_filter = gr.Dropdown( choices=data_manager.leaderboard_data["family"].unique().tolist() if not data_manager.leaderboard_data.empty else [], label="Filter by Family", multiselect=False ) quantization_filter = gr.Dropdown( choices=data_manager.leaderboard_data["quantization_level"].unique().tolist() if not data_manager.leaderboard_data.empty else [], label="Filter by Quantization Level" ) filter_btn = gr.Button("Apply Filters", variant="primary") leaderboard_table = gr.DataFrame( value=data_manager.leaderboard_data, interactive=False ) filter_btn.click( filter_leaderboard, inputs=[family_filter, quantization_filter], outputs=leaderboard_table ) # Model Responses Tab with gr.TabItem("🔍 Model Responses"): with gr.Row(): model_dropdown = gr.Dropdown( choices=data_manager.leaderboard_data["model"].unique().tolist() if not data_manager.leaderboard_data.empty else [], label="Select Model" ) query_input = gr.Textbox( label="Search Query", placeholder="Enter search terms..." ) search_btn = gr.Button("Search", variant="primary") responses_table = gr.DataFrame() search_btn.click( search_responses, inputs=[query_input, model_dropdown], outputs=responses_table ) # Section Results Tab with gr.TabItem("📈 Section Results"): gr.Plot(value=plot_section_results) gr.DataFrame(value=data_manager.section_results_data) # Submit Model Tab with gr.TabItem("➕ Submit Model"): gr.Markdown("### Submit Your Model for Evaluation") with gr.Group(): model_name = gr.Textbox(label="Model Name", placeholder="Enter unique model name") base_model = gr.Textbox(label="Base Model", placeholder="Enter base model name") revision = gr.Textbox(label="Revision", value="main") with gr.Row(): precision = gr.Dropdown( choices=CONFIG["model"].precision_options, label="Precision", value="float16" ) weight_type = gr.Dropdown( choices=CONFIG["model"].weight_types, label="Weight Type", value="Original" ) model_type = gr.Dropdown( choices=CONFIG["model"].model_types, label="Model Type", value="Transformer" ) submit_btn = gr.Button("Submit Model", variant="primary") submission_output = gr.Markdown() def handle_submission(*args): is_valid, message = validate_model_submission(*args) if not is_valid: return f"❌ {message}" return "✅ Model submitted successfully!" submit_btn.click( handle_submission, inputs=[model_name, base_model, revision, precision, weight_type, model_type], outputs=submission_output ) return app def main(): try: # Initialize scheduler for data refresh scheduler = BackgroundScheduler() scheduler.add_job( data_manager.refresh_datasets, "interval", seconds=CONFIG["dataset"].refresh_interval ) scheduler.start() # Create and launch app app = create_app() app.queue(default_concurrency_limit=40).launch( inbrowser=True, server_name="0.0.0.0", # Use 0.0.0.0 to listen on all interfaces server_port=7860, share=False, debug=False, show_error=True, max_threads=40 ) except Exception as e: logger.error(f"Error starting application: {e}") sys.exit(1) if __name__ == "__main__": main()