H2H-eval-comparator

Sleeping

File size: 4,759 Bytes

528d9fb

import gradio as gr

def start_evaluation(state, ui_components):
    """
    Disables UI components when evaluation starts.
    
    Args:
        state (dict): Current evaluation state.
        ui_components (dict): Dictionary of UI components to update.
    
    Returns:
        list: List of updated state and UI components.
    """
    if state["running"]:
        return [
            state,
            *[gr.update(interactive=False) for _ in range(8)],  # 8 UI components to disable
            gr.update(visible=True),  # cancel_button
            "Evaluation already in progress. Please wait.",  # results_output
            None,  # results_table
            gr.update(visible=False)  # results_table_container
        ]
    
    # Update state to running
    state["running"] = True
    
    # Create updates for UI components
    updates = [
        state,  # Updated state
        gr.update(interactive=False),  # subject_selection_mode
        gr.update(interactive=False),  # num_subjects_slider
        gr.update(interactive=False),  # specific_subjects
        gr.update(interactive=False),  # all_questions_checkbox
        gr.update(interactive=False),  # num_questions_slider
        gr.update(interactive=False),  # model1_dropdown
        gr.update(interactive=False),  # model2_dropdown
        gr.update(interactive=False),  # eval_button
        gr.update(visible=True),       # cancel_button
        "Starting evaluation...",      # results_output
        None,                          # results_table
        gr.update(visible=False)       # results_table_container
    ]
    
    return updates

def finish_evaluation(state):
    """
    Updates state when evaluation finishes.
    
    Args:
        state (dict): Current evaluation state.
    
    Returns:
        dict: Updated state.
    """
    state["running"] = False
    return state

def cancel_evaluation(state, ui_components):
    """
    Re-enables UI components when evaluation is canceled.
    
    Args:
        state (dict): Current evaluation state.
        ui_components (dict): Dictionary of UI components to update.
    
    Returns:
        list: List of updated state and UI components.
    """
    # Update state to not running
    state["running"] = False
    
    # Create updates for UI components
    updates = [
        state,  # Updated state
        gr.update(interactive=True),  # subject_selection_mode
        gr.update(interactive=True),  # num_subjects_slider
        gr.update(interactive=True),  # specific_subjects
        gr.update(interactive=True),  # all_questions_checkbox
        gr.update(interactive=True),  # num_questions_slider
        gr.update(interactive=True),  # model1_dropdown
        gr.update(interactive=True),  # model2_dropdown
        gr.update(interactive=True),  # eval_button
        gr.update(visible=False),     # cancel_button
        "⚠️ Evaluation canceled by user (note: backend process may continue running)",  # results_output
        None,                         # results_table
        gr.update(visible=False)      # results_table_container
    ]
    
    return updates

def handle_evaluation_results(eval_results, ui_components):
    """
    Updates UI components based on evaluation results.
    
    Args:
        eval_results (dict): Results from evaluation.
        ui_components (dict): Dictionary of UI components to update.
    
    Returns:
        list: List of updated UI components.
    """
    if eval_results['success']:
        return [
            eval_results['report'],  # results_output
            eval_results['comparison_df'],  # results_table
            gr.update(interactive=True),  # eval_button
            gr.update(visible=False),  # cancel_button
            gr.update(interactive=True),  # subject_selection_mode
            gr.update(interactive=True),  # num_subjects_slider
            gr.update(interactive=True),  # all_questions_checkbox
            gr.update(interactive=True),  # num_questions_slider
            gr.update(interactive=True),  # model1_dropdown
            gr.update(visible=True)  # results_table_container
        ]
    else:
        return [
            eval_results['report'],  # results_output
            None,  # results_table
            gr.update(interactive=True),  # eval_button
            gr.update(visible=False),  # cancel_button
            gr.update(interactive=True),  # subject_selection_mode
            gr.update(interactive=True),  # num_subjects_slider
            gr.update(interactive=True),  # all_questions_checkbox
            gr.update(interactive=True),  # num_questions_slider
            gr.update(interactive=True),  # model1_dropdown
            gr.update(visible=False)  # results_table_container
        ]