"""Logic helpers for the **Overview** tab.""" from typing import List, Tuple, Optional import pandas as pd import plotly.graph_objects as go import plotly.express as px import gradio as gr from .state import app_state from .utils import compute_model_rankings_new, create_model_summary_card_new from .plotting import create_model_dataframe __all__ = ["create_overview", "create_model_quality_plot", "create_model_quality_table", "get_available_model_quality_metrics"] def create_overview( selected_models: List[str], top_n: int, score_significant_only: bool = False, quality_significant_only: bool = False, sort_by: str = "quality_asc", min_cluster_size: int = 1, selected_tags: Optional[List[str]] = None, progress: Optional[gr.Progress] = None, ) -> str: """Return the HTML snippet that summarises model performance.""" if not app_state["metrics"]: return "Please load data first using the 'Load Data' tab." if not selected_models: return "Please select at least one model to display." # 1. Compute global rankings and filter to selection if progress: progress(0.05, "Computing model rankings…") model_rankings = compute_model_rankings_new(app_state["metrics"]) filtered_rankings = [ (name, stats) for name, stats in model_rankings if name in selected_models ] # Sort so "all" appears first, then the rest by their rankings all_models = [(name, stats) for name, stats in filtered_rankings if name == "all"] other_models = [(name, stats) for name, stats in filtered_rankings if name != "all"] filtered_rankings = all_models + other_models if not filtered_rankings: return "No data available for selected models." # 2. Assemble HTML overview_html = """
Top distinctive clusters where each model shows unique behavioural patterns. Frequency shows what percentage of a model's battles resulted in that behavioural pattern.
Frequency Delta+0.15
) means the model hits the behaviour more often than average.-0.08
) means it appears less often.Rank | Model | {metric_display_name} |
---|
{f"Ranks based on confidence intervals (non-overlapping CIs). Models with overlapping CIs may have the same rank." if has_ci else "Ranks based on quality scores (confidence intervals not available)."}