Spaces:
Running
Running
import altair as alt | |
import polars as pl | |
import gradio as gr | |
DATASETS = ["il-common"] | |
BENCHMARKS = { | |
"Parameters": (None, None, None), | |
"CPU Rate with compile": ("cpu", False, True), | |
"CPU Rate without compile": ("cpu", False, False), | |
"CUDA Rate with compile": ("cuda", False, True), | |
"CUDA Rate AMP with compile": ("cuda", True, True), | |
} | |
def plot_acc_param(param_compare_results_df: pl.DataFrame, width: int = 1000, height: int = 680) -> alt.LayerChart: | |
df = param_compare_results_df.select( | |
"Model name", "Model type", "Accuracy", "Top-3 accuracy", "Resolution", "Parameters (M)", "Pareto frontier (p)" | |
) | |
base = df.plot.point( | |
x="Parameters (M)", | |
y="Accuracy", | |
color="Model type", | |
shape="Resolution:N", | |
tooltip=["Parameters (M)", "Accuracy", "Top-3 accuracy", "Model name", "Model type", "Resolution"], | |
) | |
text = base.mark_text(align="center", baseline="middle", dy=-10).encode(text="Model name") | |
frontier = df.plot.line(x="Parameters (M)", y="Pareto frontier (p)").mark_line( | |
interpolate="step-after", color="red", strokeWidth=0.3, strokeDash=(2, 2) | |
) | |
chart = base + text + frontier | |
return chart.properties(title="Accuray vs Parameter Count", width=width, height=height).configure_scale(zero=False) | |
def plot_acc_rate(rate_compare_results_df: pl.DataFrame, width: int = 1000, height: int = 680) -> alt.LayerChart: | |
device = rate_compare_results_df["device"][0] | |
compiled = rate_compare_results_df["compile"][0] | |
batch_size = rate_compare_results_df["batch_size"][0] | |
amp = rate_compare_results_df["amp"][0] | |
df = rate_compare_results_df.select( | |
"Model name", | |
"Model type", | |
"Accuracy", | |
"Top-3 accuracy", | |
"Resolution", | |
"ms / sample", | |
"Parameters (M)", | |
"Pareto frontier (ms)", | |
) | |
base = df.plot.point( | |
x="ms / sample", | |
y="Accuracy", | |
color="Model type", | |
shape="Resolution:N", | |
tooltip=[ | |
"ms / sample", | |
"Parameters (M)", | |
"Accuracy", | |
"Top-3 accuracy", | |
"Model name", | |
"Model type", | |
"Resolution", | |
], | |
) | |
text = base.mark_text(align="center", baseline="middle", dy=-10).encode(text="Model name") | |
frontier = df.plot.line(x="ms / sample", y="Pareto frontier (ms)").mark_line( | |
interpolate="step-after", color="red", strokeWidth=0.3, strokeDash=(2, 2) | |
) | |
chart = base + text + frontier | |
return chart.properties( | |
title=f"Accuray vs {device.upper()} Rate (compile={compiled}, batch size={batch_size}, amp={amp})", | |
width=width, | |
height=height, | |
).configure_scale(zero=False) | |
def update_data( | |
dataset: str, benchmark: str, intermediate: bool, mim: bool, log_x: bool | |
) -> tuple[alt.LayerChart, pl.DataFrame]: | |
compare_results_df = pl.read_csv(f"results_{dataset}.csv") | |
if intermediate is False: | |
compare_results_df = compare_results_df.filter(pl.col("Intermediate") == intermediate) | |
if mim is False: | |
compare_results_df = compare_results_df.filter(pl.col("MIM") == mim) | |
x_scale_type = "log" if log_x is True else "linear" | |
# Parameter count | |
if benchmark == "Parameters": | |
param_compare_results_df = compare_results_df.unique(subset=["Model name"]).sort( | |
"Parameters (M)", descending=False | |
) | |
param_compare_results_df = param_compare_results_df.with_columns( | |
pl.col("Accuracy").cum_max().alias("Pareto frontier (p)") | |
) | |
param_compare_results_df = param_compare_results_df.drop("Samples / sec", "device", "ms / sample") | |
chart = plot_acc_param(param_compare_results_df) | |
chart.layer[0].encoding.x.scale = alt.Scale(domain=[0.5, 7.5], type=x_scale_type) | |
output_df = param_compare_results_df | |
# Rate | |
else: | |
(device, amp_enabled, compiled) = BENCHMARKS[benchmark] | |
df = compare_results_df.filter(device=device, amp=amp_enabled, compile=compiled) | |
device_compare_results_df = df.unique(subset=["Model name"]).sort("ms / sample", descending=False) | |
device_compare_results_df = device_compare_results_df.with_columns( | |
pl.col("Accuracy").cum_max().alias("Pareto frontier (ms)") | |
) | |
chart = plot_acc_rate(device_compare_results_df) | |
x_max = device_compare_results_df["ms / sample"].quantile(0.95) * 1.04 | |
x_min = device_compare_results_df["ms / sample"].min() * 0.96 | |
chart.layer[0].encoding.x.scale = alt.Scale(domain=[x_min, x_max], type=x_scale_type) | |
output_df = device_compare_results_df | |
output_df = output_df.select( | |
[ | |
pl.col(col).round(4) if output_df.schema[col] in [pl.Float32, pl.Float64] else col | |
for col in output_df.columns | |
] | |
) | |
return (chart, output_df.drop("Mistakes", "Samples")) | |
def app() -> None: | |
with gr.Blocks(title="Birder Leaderboard", analytics_enabled=False) as leaderboard: | |
gr.HTML("<center><h1>The Birder Leaderboard</h1></center>") | |
with gr.Row(): | |
with gr.Column(): | |
pass | |
with gr.Column(): | |
gr.Markdown( | |
""" | |
Leaderboard of all the pre-trained Birder models across all datasets. | |
* GPU: A5000 ADA Generation | |
* CPU: AMD Ryzen Threadripper PRO 7975WX | |
""" | |
) | |
with gr.Column(): | |
pass | |
with gr.Row(): | |
with gr.Column(): | |
pass | |
with gr.Column(): | |
dataset_dropdown = gr.Dropdown( | |
choices=DATASETS, | |
label="Select Dataset", | |
value=DATASETS[0] if DATASETS else None, | |
) | |
benchmark_dropdown = gr.Dropdown( | |
choices=BENCHMARKS.keys(), | |
label="Select Benchmark", | |
value=next(iter(BENCHMARKS.keys())) if BENCHMARKS else None, | |
filterable=False, | |
) | |
with gr.Column(): | |
intermediate = gr.Checkbox( | |
label="Intermediate", | |
value=True, | |
info="Show models that underwent intermediate training (extra data)", | |
) | |
mim = gr.Checkbox(label="MIM", value=True, info="Show models with Masked Image Modeling pre-training") | |
log_x = gr.Checkbox(label="Log scale X-axis", value=False) | |
with gr.Column(): | |
pass | |
plot = gr.Plot(container=False) | |
table = gr.Dataframe(show_search=True) | |
inputs = [dataset_dropdown, benchmark_dropdown, intermediate, mim, log_x] | |
outputs = [plot, table] | |
leaderboard.load(update_data, inputs=inputs, outputs=outputs) | |
dataset_dropdown.change(update_data, inputs=inputs, outputs=outputs) | |
benchmark_dropdown.change(update_data, inputs=inputs, outputs=outputs) | |
intermediate.change(update_data, inputs=inputs, outputs=outputs) | |
mim.change(update_data, inputs=inputs, outputs=outputs) | |
log_x.change(update_data, inputs=inputs, outputs=outputs) | |
leaderboard.launch() | |
# Launch the app | |
if __name__ == "__main__": | |
app() | |