qLeaderboard-aBase4Community

Running

File size: 9,694 Bytes

"""
Updated create_model_submission_ui function that properly displays benchmark names in dropdown.
Replace this function in your evaluation_queue.py file.
"""

def create_model_submission_ui(evaluation_queue, auth_manager, db_manager):
    """Create the model submission UI components.
    
    Args:
        evaluation_queue: Evaluation queue instance
        auth_manager: Authentication manager instance
        db_manager: Database manager instance
        
    Returns:
        gr.Blocks: Gradio Blocks component with model submission UI
    """
    with gr.Blocks() as submission_ui:
        with gr.Tab("Submit Model"):
            gr.Markdown(f"""
            ### Model Size Restrictions
            
            Models must fit within {evaluation_queue.memory_limit_gb}GB of RAM for evaluation.
            Large models will be rejected to ensure all evaluations can complete successfully.
            """, elem_classes=["info-text"])
            
            with gr.Row():
                with gr.Column(scale=2):
                    model_id_input = gr.Textbox(
                        placeholder="HuggingFace model ID (e.g., 'gpt2', 'facebook/opt-350m')",
                        label="Model ID"
                    )
                    
                    check_size_button = gr.Button("Check Model Size")
                    size_check_result = gr.Markdown("")
                    
                    model_name_input = gr.Textbox(
                        placeholder="Display name for your model",
                        label="Model Name"
                    )
                    
                    model_description_input = gr.Textbox(
                        placeholder="Brief description of your model",
                        label="Description",
                        lines=3
                    )
                    
                    model_parameters_input = gr.Number(
                        label="Number of Parameters (billions)",
                        precision=2
                    )
                
                with gr.Column(scale=1):
                    model_tag_input = gr.Dropdown(
                        choices=evaluation_queue.model_tags,
                        label="Model Tag",
                        info="Select one category that best describes your model"
                    )
                    
                    # Fixed benchmark dropdown to properly show names
                    benchmark_dropdown = gr.Dropdown(
                        label="Benchmark",
                        info="Select a benchmark to evaluate your model on",
                        choices=[("none", "Loading benchmarks...")],
                        value=None
                    )
                    
                    refresh_benchmarks_button = gr.Button("Refresh Benchmarks")
            
            submit_model_button = gr.Button("Submit for Evaluation")
            submission_status = gr.Markdown("")
        
        with gr.Tab("Evaluation Queue"):
            refresh_queue_button = gr.Button("Refresh Queue")
            
            with gr.Row():
                with gr.Column(scale=1):
                    queue_stats = gr.JSON(
                        label="Queue Statistics"
                    )
                
                with gr.Column(scale=2):
                    queue_status = gr.Dataframe(
                        headers=["ID", "Model", "Benchmark", "Status", "Submitted"],
                        label="Recent Evaluations"
                    )
            
            with gr.Row(visible=True) as progress_container:
                with gr.Column():
                    current_eval_info = gr.Markdown("No evaluation currently running")
                    # Use a simple text display for progress instead of Progress component
                    progress_display = gr.Markdown("Progress: 0%")
        
        # Event handlers
        def check_model_size_handler(model_id):
            if not model_id:
                return "Please enter a HuggingFace model ID."
            
            try:
                will_fit, message = evaluation_queue.check_model_size(model_id)
                
                if will_fit:
                    return f"✅ {message}"
                else:
                    return f"❌ {message}"
            except Exception as e:
                return f"Error checking model size: {str(e)}"
            
        def refresh_benchmarks_handler():
            benchmarks = db_manager.get_benchmarks()
            
            # Format for dropdown - properly formatted to display names
            choices = []
            for b in benchmarks:
                # Add as tuple of (id, name) to ensure proper display
                choices.append((str(b["id"]), b["name"]))
            
            if not choices:
                choices = [("none", "No benchmarks available - add some first")]
            
            return gr.update(choices=choices)
        
        def submit_model_handler(model_id, model_name, model_description, model_parameters, model_tag, benchmark_id, request: gr.Request):
            # Check if user is logged in
            user = auth_manager.check_login(request)
            
            if not user:
                return "Please log in to submit a model."
            
            if not model_id or not model_name or not model_tag or not benchmark_id:
                return "Please fill in all required fields."
            
            if benchmark_id == "none":
                return "Please select a valid benchmark."
            
            try:
                # Check if model will fit in RAM
                will_fit, size_message = evaluation_queue.check_model_size(model_id)
                
                if not will_fit:
                    return f"❌ {size_message}"
                
                # Add model to database
                model_db_id = db_manager.add_model(
                    name=model_name,
                    hf_model_id=model_id,
                    user_id=user["id"],
                    tag=model_tag,
                    parameters=str(model_parameters) if model_parameters else None,
                    description=model_description
                )
                
                if not model_db_id:
                    return "Failed to add model to database."
                
                # Submit for evaluation
                eval_id, message = evaluation_queue.submit_evaluation(
                    model_id=model_db_id,
                    benchmark_id=benchmark_id,
                    user_id=user["id"]
                )
                
                if eval_id:
                    return f"✅ Model submitted successfully. {size_message}\nEvaluation ID: {eval_id}"
                else:
                    return message
            except Exception as e:
                return f"Error submitting model: {str(e)}"
        
        def refresh_queue_handler():
            # Get queue statistics
            stats = evaluation_queue.get_queue_status()
            
            # Get recent evaluations (all statuses, limited to 20)
            evals = db_manager.get_evaluation_results(limit=20)
            
            # Format for dataframe
            eval_data = []
            for eval in evals:
                eval_data.append([
                    eval["id"],
                    eval["model_name"],
                    eval["benchmark_name"],
                    eval["status"],
                    eval["submitted_at"]
                ])
            
            # Also update progress display
            current_eval, progress = evaluation_queue.get_current_progress()
            if current_eval:
                model_info = db_manager.get_model(current_eval['model_id'])
                benchmark_info = db_manager.get_benchmark(current_eval['benchmark_id'])
                
                if model_info and benchmark_info:
                    eval_info = f"**Currently Evaluating:** {model_info['name']} on {benchmark_info['name']}"
                    progress_text = f"Progress: {progress}%"
                    return stats, eval_data, eval_info, progress_text
            
            return stats, eval_data, "No evaluation currently running", "Progress: 0%"
        
        # Connect event handlers
        check_size_button.click(
            fn=check_model_size_handler,
            inputs=[model_id_input],
            outputs=[size_check_result]
        )
        
        refresh_benchmarks_button.click(
            fn=refresh_benchmarks_handler,
            inputs=[],
            outputs=[benchmark_dropdown]
        )
        
        submit_model_button.click(
            fn=submit_model_handler,
            inputs=[
                model_id_input,
                model_name_input,
                model_description_input,
                model_parameters_input,
                model_tag_input,
                benchmark_dropdown
            ],
            outputs=[submission_status]
        )
        
        refresh_queue_button.click(
            fn=refresh_queue_handler,
            inputs=[],
            outputs=[queue_stats, queue_status, current_eval_info, progress_display]
        )
        
        # Initialize on load
        submission_ui.load(
            fn=refresh_benchmarks_handler,
            inputs=[],
            outputs=[benchmark_dropdown]
        )
        
        submission_ui.load(
            fn=refresh_queue_handler,
            inputs=[],
            outputs=[queue_stats, queue_status, current_eval_info, progress_display]
        )
    
    return submission_ui