from pathlib import Path

import altair as alt
import polars as pl

import gradio as gr

DATASETS = []
BENCHMARKS = {
    # Name: (device, AMP, compile, single thread)
    "Parameters": (None, None, None, None),
    "GPU Memory": (None, None, None, None),
    "CPU rate": ("cpu", False, False, False),
    "CPU rate single core": ("cpu", False, False, True),
    "CPU rate with compile": ("cpu", False, True, False),
    "CPU rate AMP with compile": ("cpu", True, True, False),
    "CUDA rate": ("cuda", False, False, False),
    "CUDA rate with compile": ("cuda", False, True, False),
    "CUDA rate AMP with compile": ("cuda", True, True, False),
}


def plot_acc_param(param_compare_results_df: pl.DataFrame, width: int = 1000, height: int = 680) -> alt.LayerChart:
    df = param_compare_results_df.select(
        "Model name",
        "Model type",
        "Accuracy",
        "Top-3 accuracy",
        "Resolution",
        "Parameters (M)",
        "Pareto frontier (p)",
        "Intermediate",
        "MIM",
        "Distilled",
    )
    base = df.plot.point(
        x="Parameters (M)",
        y="Accuracy",
        color="Model type",
        shape="Resolution:N",
        tooltip=[
            "Parameters (M)",
            "Accuracy",
            "Top-3 accuracy",
            "Model name",
            "Model type",
            "Resolution",
            "Intermediate",
            "MIM",
            "Distilled",
        ],
    )
    text = base.mark_text(align="center", baseline="middle", dy=-10).encode(text="Model name")
    frontier = df.plot.line(x="Parameters (M)", y="Pareto frontier (p)").mark_line(
        interpolate="step-after", color="red", strokeWidth=0.3, strokeDash=(2, 2)
    )

    chart = base + text + frontier
    return chart.properties(title="Accuracy vs Parameter Count", width=width, height=height).configure_scale(zero=False)


def plot_acc_memory(memory_compare_results_df: pl.DataFrame, width: int = 900, height: int = 640) -> alt.LayerChart:
    if len(memory_compare_results_df) > 0:
        batch_size = memory_compare_results_df["max_batch_size"][0]
        amp = memory_compare_results_df["amp"][0]
    else:
        batch_size = ""
        amp = ""

    df = memory_compare_results_df.select(
        "Model name",
        "Model type",
        "Accuracy",
        "Top-3 accuracy",
        "Resolution",
        "Peak GPU memory (MB)",
        "Parameters (M)",
        "Pareto frontier (mem)",
        "Intermediate",
        "MIM",
        "Distilled",
    )
    base = df.plot.point(
        x="Peak GPU memory (MB)",
        y="Accuracy",
        color="Model type",
        shape="Resolution:N",
        tooltip=[
            "Peak GPU memory (MB)",
            "Parameters (M)",
            "Accuracy",
            "Top-3 accuracy",
            "Model name",
            "Model type",
            "Resolution",
            "Intermediate",
            "MIM",
            "Distilled",
        ],
    )
    text = base.mark_text(align="center", baseline="middle", dy=-10).encode(text="Model name")
    frontier = df.plot.line(x="Peak GPU memory (MB)", y="Pareto frontier (mem)").mark_line(
        interpolate="step-after", color="red", strokeWidth=0.3, strokeDash=(2, 2)
    )

    chart = base + text + frontier
    return chart.properties(
        title=f"Accuracy vs GPU Memory (batch size={batch_size}, amp={amp})", width=width, height=height
    ).configure_scale(zero=False)


def plot_acc_rate(rate_compare_results_df: pl.DataFrame, width: int = 1000, height: int = 680) -> alt.LayerChart:
    if len(rate_compare_results_df) > 0:
        device = rate_compare_results_df["device"][0]
        compiled = rate_compare_results_df["compile"][0]
        batch_size = rate_compare_results_df["max_batch_size"][0]
        amp = rate_compare_results_df["amp"][0]
        single_thread = rate_compare_results_df["single_thread"][0]
    else:
        device = ""
        compiled = ""
        batch_size = ""
        amp = ""
        single_thread = False

    df = rate_compare_results_df.select(
        "Model name",
        "Model type",
        "Accuracy",
        "Top-3 accuracy",
        "Resolution",
        "ms / sample",
        "Parameters (M)",
        "Pareto frontier (ms)",
        "Intermediate",
        "MIM",
        "Distilled",
    )
    base = df.plot.point(
        x="ms / sample",
        y="Accuracy",
        color="Model type",
        shape="Resolution:N",
        tooltip=[
            "ms / sample",
            "Parameters (M)",
            "Accuracy",
            "Top-3 accuracy",
            "Model name",
            "Model type",
            "Resolution",
            "Intermediate",
            "MIM",
            "Distilled",
        ],
    )
    text = base.mark_text(align="center", baseline="middle", dy=-10).encode(text="Model name")
    frontier = df.plot.line(x="ms / sample", y="Pareto frontier (ms)").mark_line(
        interpolate="step-after", color="red", strokeWidth=0.3, strokeDash=(2, 2)
    )

    chart = base + text + frontier

    if single_thread is True:
        single_thread_title = " Single Core"
    else:
        single_thread_title = ""

    return chart.properties(
        title=(
            f"Accuracy vs {device.upper()}{single_thread_title} Rate (compile={compiled}, "
            f"batch size={batch_size}, amp={amp})"
        ),
        width=width,
        height=height,
    ).configure_scale(zero=False)


def update_data(
    dataset: str, benchmark: str, intermediate: bool, mim: bool, dist: bool, log_x: bool, search_bar: str
) -> tuple[alt.LayerChart, pl.DataFrame]:
    compare_results_df = pl.read_csv(f"results_{dataset}.csv")
    if intermediate is False:
        compare_results_df = compare_results_df.filter(pl.col("Intermediate") == intermediate)
    if mim is False:
        compare_results_df = compare_results_df.filter(pl.col("MIM") == mim)
    if dist is False:
        compare_results_df = compare_results_df.filter(pl.col("Distilled") == dist)

    x_scale_type = "log" if log_x is True else "linear"

    # Filter models
    compare_results_df = compare_results_df.filter(pl.col("Model name").str.contains(search_bar))

    # Parameter count
    if benchmark == "Parameters":
        param_compare_results_df = compare_results_df.unique(subset=["Model name", "Resolution"]).sort(
            "Parameters (M)", descending=False
        )
        param_compare_results_df = param_compare_results_df.with_columns(
            pl.col("Accuracy").cum_max().alias("Pareto frontier (p)")
        )
        param_compare_results_df = param_compare_results_df.drop(
            "Samples / sec", "device", "ms / sample", "Peak GPU memory (MB)"
        )
        chart = plot_acc_param(param_compare_results_df)

        x_max = param_compare_results_df["Parameters (M)"].quantile(0.9)
        x_min = param_compare_results_df["Parameters (M)"].quantile(0.1)
        chart.layer[0].encoding.x.scale = alt.Scale(domain=[x_min, x_max], type=x_scale_type)
        output_df = param_compare_results_df

    # Peak memory
    elif benchmark == "GPU Memory":
        memory_compare_results_df = compare_results_df.drop_nulls(subset=["Peak GPU memory (MB)"])
        memory_compare_results_df = memory_compare_results_df.unique(subset=["Model name", "Resolution"]).sort(
            "Peak GPU memory (MB)", descending=False
        )
        memory_compare_results_df = memory_compare_results_df.with_columns(
            pl.col("Accuracy").cum_max().alias("Pareto frontier (mem)")
        )
        memory_compare_results_df = memory_compare_results_df.drop("Samples / sec", "device", "ms / sample")
        chart = plot_acc_memory(memory_compare_results_df)
        x_max = memory_compare_results_df["Peak GPU memory (MB)"].quantile(0.9)
        x_min = memory_compare_results_df["Peak GPU memory (MB)"].quantile(0.1)
        chart.layer[0].encoding.x.scale = alt.Scale(domain=[x_min, x_max], type=x_scale_type)
        output_df = memory_compare_results_df

    # Rate
    else:
        (device, amp_enabled, compiled, single_thread) = BENCHMARKS[benchmark]
        df = compare_results_df.drop_nulls(subset=["ms / sample"])
        df = df.filter(device=device, amp=amp_enabled, compile=compiled, single_thread=single_thread)
        device_compare_results_df = df.unique(subset=["Model name", "Resolution"]).sort("ms / sample", descending=False)
        device_compare_results_df = device_compare_results_df.drop("Peak GPU memory (MB)")
        device_compare_results_df = device_compare_results_df.with_columns(
            pl.col("Accuracy").cum_max().alias("Pareto frontier (ms)")
        )
        chart = plot_acc_rate(device_compare_results_df)

        x_max = device_compare_results_df["ms / sample"].quantile(0.95)
        x_min = device_compare_results_df["ms / sample"].min()
        if x_max is not None and x_min is not None:
            x_max = x_max * 1.04
            x_min = x_min * 0.96

        chart.layer[0].encoding.x.scale = alt.Scale(domain=[x_min, x_max], type=x_scale_type)
        output_df = device_compare_results_df

    output_df = output_df.select(
        [
            pl.col(col).round(4) if output_df.schema[col] in [pl.Float32, pl.Float64] else col
            for col in output_df.columns
        ]
    )

    return (chart, output_df.drop("Mistakes", "Samples", "torch_version"))


def app() -> None:
    with gr.Blocks(title="Birder Leaderboard", analytics_enabled=False) as leaderboard:
        gr.HTML("<center><h1>The Birder Leaderboard</h1></center>")
        with gr.Row():
            with gr.Column():
                pass

            with gr.Column():
                gr.Markdown(
                    """
                    Leaderboard of all the pre-trained Birder models across multiple datasets.

                    ### Benchmark Setup

                    * GPU: A5000 ADA Generation
                    * CPU: AMD Ryzen Threadripper PRO 7975WX
                    * PyTorch version: 2.7.1+cu128

                    ### Dataset Information

                    | Name                | Training samples | Validation samples | Classes     |
                    |---------------------|------------------|--------------------|-------------|
                    | arabian-peninsula   | 583,868          | 21,634             | 735         |
                    | eu-common           | 569,784          | 19,869             | 707         |
                    | il-all              | 462,430          | 18,621             | 550         |
                    | il-common           | 330,880          | 15,828             | 371         |
                    """
                )

            with gr.Column():
                pass

        with gr.Row():
            with gr.Column():
                pass

            with gr.Column():
                dataset_dropdown = gr.Dropdown(
                    choices=DATASETS,
                    label="Select Dataset",
                    value=DATASETS[0] if DATASETS else None,
                )
                benchmark_dropdown = gr.Dropdown(
                    choices=BENCHMARKS.keys(),
                    label="Select Benchmark",
                    value=next(iter(BENCHMARKS.keys())) if BENCHMARKS else None,
                    filterable=False,
                )

            with gr.Column():
                intermediate = gr.Checkbox(
                    label="Intermediate",
                    value=True,
                    info="Show models that underwent intermediate training (extra data)",
                )
                mim = gr.Checkbox(label="MIM", value=True, info="Show models with Masked Image Modeling pre-training")
                dist = gr.Checkbox(label="Distilled", value=True, info="Show distilled models")
                log_x = gr.Checkbox(label="Log scale X-axis", value=False)

            with gr.Column():
                pass

        with gr.Row():
            with gr.Column():
                pass

            with gr.Column(scale=2):
                search_bar = gr.Textbox(label="Model Filter", placeholder="e.g. convnext, efficient|mobile")

            with gr.Column():
                pass

        plot = gr.Plot(container=False)
        table = gr.Dataframe(show_search="search")

        inputs = [dataset_dropdown, benchmark_dropdown, intermediate, mim, dist, log_x, search_bar]
        outputs = [plot, table]
        leaderboard.load(update_data, inputs=inputs, outputs=outputs)

        dataset_dropdown.change(update_data, inputs=inputs, outputs=outputs)
        benchmark_dropdown.change(update_data, inputs=inputs, outputs=outputs)
        intermediate.change(update_data, inputs=inputs, outputs=outputs)
        mim.change(update_data, inputs=inputs, outputs=outputs)
        dist.change(update_data, inputs=inputs, outputs=outputs)
        log_x.change(update_data, inputs=inputs, outputs=outputs)
        search_bar.change(update_data, inputs=inputs, outputs=outputs)

    leaderboard.launch()


# Launch the app
if __name__ == "__main__":
    file_info = []
    for p in Path.glob(Path("."), "results_*.csv"):
        file_info.append((p.stat().st_size, p.stem.removeprefix("results_")))

    DATASETS = [dataset_name for _, dataset_name in sorted(file_info, key=lambda x: x[1], reverse=True)]

    app()