Spaces:
Running
Running
File size: 7,369 Bytes
e7103ef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
import altair as alt
import polars as pl
import gradio as gr
DATASETS = ["il-common"]
BENCHMARKS = {
"Parameters": (None, None, None),
"CPU Rate with compile": ("cpu", False, True),
"CPU Rate without compile": ("cpu", False, False),
"CUDA Rate with compile": ("cuda", False, True),
"CUDA Rate AMP with compile": ("cuda", True, True),
}
def plot_acc_param(param_compare_results_df: pl.DataFrame, width: int = 1000, height: int = 680) -> alt.LayerChart:
df = param_compare_results_df.select(
"Model name", "Model type", "Accuracy", "Top-3 accuracy", "Resolution", "Parameters (M)", "Pareto frontier (p)"
)
base = df.plot.point(
x="Parameters (M)",
y="Accuracy",
color="Model type",
shape="Resolution:N",
tooltip=["Parameters (M)", "Accuracy", "Top-3 accuracy", "Model name", "Model type", "Resolution"],
)
text = base.mark_text(align="center", baseline="middle", dy=-10).encode(text="Model name")
frontier = df.plot.line(x="Parameters (M)", y="Pareto frontier (p)").mark_line(
interpolate="step-after", color="red", strokeWidth=0.3, strokeDash=(2, 2)
)
chart = base + text + frontier
return chart.properties(title="Accuray vs Parameter Count", width=width, height=height).configure_scale(zero=False)
def plot_acc_rate(rate_compare_results_df: pl.DataFrame, width: int = 1000, height: int = 680) -> alt.LayerChart:
device = rate_compare_results_df["device"][0]
compiled = rate_compare_results_df["compile"][0]
batch_size = rate_compare_results_df["batch_size"][0]
amp = rate_compare_results_df["amp"][0]
df = rate_compare_results_df.select(
"Model name",
"Model type",
"Accuracy",
"Top-3 accuracy",
"Resolution",
"ms / sample",
"Parameters (M)",
"Pareto frontier (ms)",
)
base = df.plot.point(
x="ms / sample",
y="Accuracy",
color="Model type",
shape="Resolution:N",
tooltip=[
"ms / sample",
"Parameters (M)",
"Accuracy",
"Top-3 accuracy",
"Model name",
"Model type",
"Resolution",
],
)
text = base.mark_text(align="center", baseline="middle", dy=-10).encode(text="Model name")
frontier = df.plot.line(x="ms / sample", y="Pareto frontier (ms)").mark_line(
interpolate="step-after", color="red", strokeWidth=0.3, strokeDash=(2, 2)
)
chart = base + text + frontier
return chart.properties(
title=f"Accuray vs {device.upper()} Rate (compile={compiled}, batch size={batch_size}, amp={amp})",
width=width,
height=height,
).configure_scale(zero=False)
def update_data(
dataset: str, benchmark: str, intermediate: bool, mim: bool, log_x: bool
) -> tuple[alt.LayerChart, pl.DataFrame]:
compare_results_df = pl.read_csv(f"results_{dataset}.csv")
if intermediate is False:
compare_results_df = compare_results_df.filter(pl.col("Intermediate") == intermediate)
if mim is False:
compare_results_df = compare_results_df.filter(pl.col("MIM") == mim)
x_scale_type = "log" if log_x is True else "linear"
# Parameter count
if benchmark == "Parameters":
param_compare_results_df = compare_results_df.unique(subset=["Model name"]).sort(
"Parameters (M)", descending=False
)
param_compare_results_df = param_compare_results_df.with_columns(
pl.col("Accuracy").cum_max().alias("Pareto frontier (p)")
)
param_compare_results_df = param_compare_results_df.drop("Samples / sec", "device", "ms / sample")
chart = plot_acc_param(param_compare_results_df)
chart.layer[0].encoding.x.scale = alt.Scale(domain=[0.5, 7.5], type=x_scale_type)
output_df = param_compare_results_df
# Rate
else:
(device, amp_enabled, compiled) = BENCHMARKS[benchmark]
df = compare_results_df.filter(device=device, amp=amp_enabled, compile=compiled)
device_compare_results_df = df.unique(subset=["Model name"]).sort("ms / sample", descending=False)
device_compare_results_df = device_compare_results_df.with_columns(
pl.col("Accuracy").cum_max().alias("Pareto frontier (ms)")
)
chart = plot_acc_rate(device_compare_results_df)
x_max = device_compare_results_df["ms / sample"].quantile(0.95) * 1.04
x_min = device_compare_results_df["ms / sample"].min() * 0.96
chart.layer[0].encoding.x.scale = alt.Scale(domain=[x_min, x_max], type=x_scale_type)
output_df = device_compare_results_df
output_df = output_df.select(
[
pl.col(col).round(4) if output_df.schema[col] in [pl.Float32, pl.Float64] else col
for col in output_df.columns
]
)
return (chart, output_df.drop("Mistakes", "Samples"))
def app() -> None:
with gr.Blocks(title="Birder Leaderboard", analytics_enabled=False) as leaderboard:
gr.HTML("<center><h1>The Birder Leaderboard</h1></center>")
with gr.Row():
with gr.Column():
pass
with gr.Column():
gr.Markdown(
"""
Leaderboard of all the pre-trained Birder models across all datasets.
* GPU: A5000 ADA Generation
* CPU: AMD Ryzen Threadripper PRO 7975WX
"""
)
with gr.Column():
pass
with gr.Row():
with gr.Column():
pass
with gr.Column():
dataset_dropdown = gr.Dropdown(
choices=DATASETS,
label="Select Dataset",
value=DATASETS[0] if DATASETS else None,
)
benchmark_dropdown = gr.Dropdown(
choices=BENCHMARKS.keys(),
label="Select Benchmark",
value=next(iter(BENCHMARKS.keys())) if BENCHMARKS else None,
filterable=False,
)
with gr.Column():
intermediate = gr.Checkbox(
label="Intermediate",
value=True,
info="Show models that underwent intermediate training (extra data)",
)
mim = gr.Checkbox(label="MIM", value=True, info="Show models with Masked Image Modeling pre-training")
log_x = gr.Checkbox(label="Log scale X-axis", value=False)
with gr.Column():
pass
plot = gr.Plot(container=False)
table = gr.Dataframe(show_search=True)
inputs = [dataset_dropdown, benchmark_dropdown, intermediate, mim, log_x]
outputs = [plot, table]
leaderboard.load(update_data, inputs=inputs, outputs=outputs)
dataset_dropdown.change(update_data, inputs=inputs, outputs=outputs)
benchmark_dropdown.change(update_data, inputs=inputs, outputs=outputs)
intermediate.change(update_data, inputs=inputs, outputs=outputs)
mim.change(update_data, inputs=inputs, outputs=outputs)
log_x.change(update_data, inputs=inputs, outputs=outputs)
leaderboard.launch()
# Launch the app
if __name__ == "__main__":
app()
|