Spaces:

timm
/

leaderboard

Running

App Files Files Community

rwightman HF Staff commited on Sep 5, 2024

Commit

bd26425

verified ·

1 Parent(s): 6316ef5

Update app.py

Browse files

Add support for selecting between multiple benchmark sets. Change log scale of x/y

Files changed (1) hide show

app.py +93 -46

app.py CHANGED Viewed

@@ -17,16 +17,19 @@ def load_leaderboard():
     }
     # Load benchmark CSV files
-    main_bench = 'amp-nchw-pt240-cu124-rtx4090'
     benchmark_csv_files = {
         'amp-nchw-pt240-cu124-rtx4090': 'https://raw.githubusercontent.com/huggingface/pytorch-image-models/main/results/benchmark-infer-amp-nchw-pt240-cu124-rtx4090.csv',
-        'amp-nhwc-pt210-cu121-rtx3090': 'https://raw.githubusercontent.com/huggingface/pytorch-image-models/main/results/benchmark-infer-amp-nhwc-pt210-cu121-rtx3090.csv',
-        'fp32-nchw-pt221-cpu-i9_10940x-dynamo': 'https://raw.githubusercontent.com/huggingface/pytorch-image-models/main/results/benchmark-infer-fp32-nchw-pt221-cpu-i9_10940x-dynamo.csv',
     }
-    # FIXME support selecting benchmark 'infer_samples_per_sec' / 'infer_step_time' from different benchmark files.
     dataframes = {name: pd.read_csv(url) for name, url in results_csv_files.items()}
     bench_dataframes = {name: pd.read_csv(url) for name, url in benchmark_csv_files.items()}
     main_bench_dataframe = bench_dataframes[main_bench]
     # Clean up dataframes
@@ -68,17 +71,31 @@ def load_leaderboard():
     other_columns = [col for col in result.columns if col not in first_columns and col != 'model_benchmark']
     result = result[first_columns + other_columns]
-    # Drop columns that are no longer needed / add too much noise
-    result.drop('arch_name', axis=1, inplace=True)
-    result.drop('crop_pct', axis=1, inplace=True)
-    result.drop('interpolation', axis=1, inplace=True)
-    result['highlighted'] = False
-    # Round numerical values
-    result = result.round(2)
-    return result
 REGEX_PREFIX = "re:"
@@ -152,16 +169,26 @@ def create_scatter_plot(df, x_axis, y_axis, model_filter, highlight_filter):
 # Load the leaderboard data
-full_df = load_leaderboard()
 # Define the available columns for sorting and plotting
-sort_columns = ['avg_top1', 'avg_top5', 'infer_samples_per_sec', 'param_count', 'infer_gmacs', 'infer_macts', 'infer_tflop_s']
-plot_columns = ['infer_samples_per_sec', 'infer_gmacs', 'infer_macts', 'infer_tflop_s', 'param_count', 'avg_top1', 'avg_top5']
 DEFAULT_SEARCH = ""
 DEFAULT_SORT = "avg_top1"
 DEFAULT_X = "infer_samples_per_sec"
 DEFAULT_Y = "avg_top1"
 def update_leaderboard_and_plot(
         model_name=DEFAULT_SEARCH,
@@ -169,12 +196,17 @@ def update_leaderboard_and_plot(
         sort_by=DEFAULT_SORT,
         x_axis=DEFAULT_X,
         y_axis=DEFAULT_Y,
 ):
-    filtered_df = filter_leaderboard(full_df, model_name, sort_by)
     # Apply the highlight filter to the entire dataset so the output will be union (comparison) if the filters are disjoint
-    highlight_df = filter_leaderboard(full_df, highlight_name, sort_by) if highlight_name else None
     # Combine filtered_df and highlight_df, removing duplicates
     if highlight_df is not None:
         combined_df = pd.concat([filtered_df, highlight_df]).drop_duplicates().reset_index(drop=True)
@@ -182,10 +214,17 @@ def update_leaderboard_and_plot(
         combined_df['highlighted'] = combined_df['model'].isin(highlight_df['model'])
     else:
         combined_df = filtered_df
-    fig = create_scatter_plot(combined_df, x_axis, y_axis, model_name, highlight_name)
-    display_df = combined_df.drop(columns=['highlighted'])
-    display_df = display_df.style.apply(lambda x: ['background-color: #FFA500' if combined_df.loc[x.name, 'highlighted'] else '' for _ in x], axis=1).format(precision=2)
     return display_df, fig
@@ -193,39 +232,47 @@ with gr.Blocks(title="The timm Leaderboard") as app:
     gr.HTML("<center><h1>The timm (PyTorch Image Models) Leaderboard</h1></center>")
     gr.HTML("<p>This leaderboard is based on the results of the models from <a href='https://github.com/huggingface/pytorch-image-models'>timm</a>.</p>")
     gr.HTML("<p>Search tips:<br>- Use wildcards (* or ?) for pattern matching<br>- Use 're:' prefix for regex search<br>- Otherwise, fuzzy matching will be used</p>")
     with gr.Row():
         search_bar = gr.Textbox(lines=1, label="Model Filter", placeholder="e.g. resnet*, re:^vit, efficientnet", scale=3)
         sort_dropdown = gr.Dropdown(choices=sort_columns, label="Sort by", value=DEFAULT_SORT, scale=1)
     with gr.Row():
         highlight_bar = gr.Textbox(lines=1, label="Model Highlight/Compare Filter", placeholder="e.g. convnext*, re:^efficient")
     with gr.Row():
         x_axis = gr.Dropdown(choices=plot_columns, label="X-axis", value=DEFAULT_X)
         y_axis = gr.Dropdown(choices=plot_columns, label="Y-axis", value=DEFAULT_Y)
     update_btn = gr.Button(value="Update", variant="primary")
     leaderboard = gr.Dataframe()
     plot = gr.Plot()
-    app.load(update_leaderboard_and_plot, outputs=[leaderboard, plot])
-    search_bar.submit(
-        update_leaderboard_and_plot,
-        inputs=[search_bar, highlight_bar, sort_dropdown, x_axis, y_axis],
-        outputs=[leaderboard, plot]
-    )
-    highlight_bar.submit(
-        update_leaderboard_and_plot,
-        inputs=[search_bar, highlight_bar, sort_dropdown, x_axis, y_axis],
-        outputs=[leaderboard, plot]
-    )
-    update_btn.click(
-        update_leaderboard_and_plot,
-        inputs=[search_bar, highlight_bar, sort_dropdown, x_axis, y_axis],
-        outputs=[leaderboard, plot]
-    )
 app.launch()

     }
     # Load benchmark CSV files
     benchmark_csv_files = {
         'amp-nchw-pt240-cu124-rtx4090': 'https://raw.githubusercontent.com/huggingface/pytorch-image-models/main/results/benchmark-infer-amp-nchw-pt240-cu124-rtx4090.csv',
+        'amp-nhwc-pt240-cu124-rtx4090': 'https://raw.githubusercontent.com/huggingface/pytorch-image-models/main/results/benchmark-infer-amp-nhwc-pt240-cu124-rtx4090.csv',
+        'amp-nchw-pt240-cu124-rtx4090-dynamo': 'https://raw.githubusercontent.com/huggingface/pytorch-image-models/main/results/benchmark-infer-amp-nchw-pt240-cu124-rtx4090-dynamo.csv',
+        'amp-nchw-pt240-cu124-rtx3090': 'https://raw.githubusercontent.com/huggingface/pytorch-image-models/main/results/benchmark-infer-amp-nchw-pt240-cu124-rtx3090.csv',
+        'amp-nhwc-pt240-cu124-rtx3090': 'https://raw.githubusercontent.com/huggingface/pytorch-image-models/main/results/benchmark-infer-amp-nhwc-pt240-cu124-rtx3090.csv',
+        'fp32-nchw-pt240-cpu-i9_10940x-dynamo': 'https://raw.githubusercontent.com/huggingface/pytorch-image-models/main/results/benchmark-infer-fp32-nchw-pt240-cpu-i9_10940x-dynamo.csv',
+        'fp32-nchw-pt240-cpu-i7_12700h-dynamo': 'https://raw.githubusercontent.com/huggingface/pytorch-image-models/main/results/benchmark-infer-fp32-nchw-pt240-cpu-i7_12700h-dynamo.csv',
     }
     dataframes = {name: pd.read_csv(url) for name, url in results_csv_files.items()}
     bench_dataframes = {name: pd.read_csv(url) for name, url in benchmark_csv_files.items()}
+    bench_dataframes = {name: df for name, df in bench_dataframes.items() if 'infer_gmacs' in df.columns}
     main_bench_dataframe = bench_dataframes[main_bench]
     # Clean up dataframes
     other_columns = [col for col in result.columns if col not in first_columns and col != 'model_benchmark']
     result = result[first_columns + other_columns]
+    # Create fully merged dataframes for each benchmark set
+    merged_dataframes = {}
+    for bench_name, bench_df in bench_dataframes.items():
+        merged_df = pd.merge(result, bench_df, on=['arch_name', 'img_size'], how='left', suffixes=('', '_benchmark'))
+        # Calculate TFLOP/s
+        merged_df['infer_tflop_s'] = merged_df['infer_samples_per_sec'] * merged_df['infer_gmacs'] * 2 / 1000
+        # Reorder columns
+        first_columns = ['model', 'img_size', 'avg_top1', 'avg_top5']
+        other_columns = [col for col in merged_df.columns if col not in first_columns]
+        merged_df = merged_df[first_columns + other_columns].copy()
+        # Drop columns that are no longer needed / add too much noise
+        merged_df.drop('arch_name', axis=1, inplace=True)
+        merged_df.drop('crop_pct', axis=1, inplace=True)
+        merged_df.drop('interpolation', axis=1, inplace=True)
+        merged_df.drop('model_benchmark', axis=1, inplace=True)
+        merged_df['infer_usec_per_sample'] = 1e6 / merged_df.infer_samples_per_sec
+        merged_df['highlighted'] = False
+        merged_df = merged_df.round(2)
+        merged_dataframes[bench_name] = merged_df
+    return merged_dataframes
 REGEX_PREFIX = "re:"
 # Load the leaderboard data
+merged_dataframes = load_leaderboard()
 # Define the available columns for sorting and plotting
+sort_columns = ['avg_top1', 'avg_top5', 'imagenet_top1', 'imagenet_top5', 'infer_samples_per_sec', 'infer_usec_per_sample', 'param_count', 'infer_gmacs', 'infer_macts', 'infer_tflop_s']
+plot_columns = ['infer_samples_per_sec', 'infer_usec_per_sample', 'infer_gmacs', 'infer_macts', 'infer_tflop_s', 'param_count', 'avg_top1', 'avg_top5', 'imagenet_top1', 'imagenet_top5']
 DEFAULT_SEARCH = ""
 DEFAULT_SORT = "avg_top1"
 DEFAULT_X = "infer_samples_per_sec"
 DEFAULT_Y = "avg_top1"
+DEFAULT_BM = 'amp-nchw-pt240-cu124-rtx4090'
+def col_formatter(value, precision=None):
+    if isinstance(value, int):
+        return f'{value:d}'
+    elif isinstance(value, float):
+        return f'{value:.{precision}f}' if precision is not None else f'{value:g}'
+    return str(value)
 def update_leaderboard_and_plot(
         model_name=DEFAULT_SEARCH,
         sort_by=DEFAULT_SORT,
         x_axis=DEFAULT_X,
         y_axis=DEFAULT_Y,
+        benchmark_selection=DEFAULT_BM,
+        log_x=True,
+        log_y=True,
 ):
+    df = merged_dataframes[benchmark_selection].copy()
+    filtered_df = filter_leaderboard(df, model_name, sort_by)
     # Apply the highlight filter to the entire dataset so the output will be union (comparison) if the filters are disjoint
+    highlight_df = filter_leaderboard(df, highlight_name, sort_by) if highlight_name else None
     # Combine filtered_df and highlight_df, removing duplicates
     if highlight_df is not None:
         combined_df = pd.concat([filtered_df, highlight_df]).drop_duplicates().reset_index(drop=True)
         combined_df['highlighted'] = combined_df['model'].isin(highlight_df['model'])
     else:
         combined_df = filtered_df
+        combined_df['highlighted'] = False
+    fig = create_scatter_plot(combined_df, x_axis, y_axis, model_name, highlight_name, log_x, log_y)
+    display_df = combined_df.drop(columns=['highlighted'])
+    display_df = display_df.style.apply(lambda x: ['background-color: #FFA500' if combined_df.loc[x.name, 'highlighted'] else '' for _ in x], axis=1).format(
+        {
+            'infer_batch_size': lambda x: col_formatter(x),  # Integer column
+        },
+        precision=2,
+    )
     return display_df, fig
     gr.HTML("<center><h1>The timm (PyTorch Image Models) Leaderboard</h1></center>")
     gr.HTML("<p>This leaderboard is based on the results of the models from <a href='https://github.com/huggingface/pytorch-image-models'>timm</a>.</p>")
     gr.HTML("<p>Search tips:<br>- Use wildcards (* or ?) for pattern matching<br>- Use 're:' prefix for regex search<br>- Otherwise, fuzzy matching will be used</p>")
     with gr.Row():
         search_bar = gr.Textbox(lines=1, label="Model Filter", placeholder="e.g. resnet*, re:^vit, efficientnet", scale=3)
         sort_dropdown = gr.Dropdown(choices=sort_columns, label="Sort by", value=DEFAULT_SORT, scale=1)
     with gr.Row():
         highlight_bar = gr.Textbox(lines=1, label="Model Highlight/Compare Filter", placeholder="e.g. convnext*, re:^efficient")
     with gr.Row():
         x_axis = gr.Dropdown(choices=plot_columns, label="X-axis", value=DEFAULT_X)
         y_axis = gr.Dropdown(choices=plot_columns, label="Y-axis", value=DEFAULT_Y)
+    with gr.Row():
+        benchmark_dropdown = gr.Dropdown(
+            choices=list(merged_dataframes.keys()),
+            label="Benchmark Selection",
+            value=DEFAULT_BM,
+        )
+    with gr.Row():
+        log_x = gr.Checkbox(label="Log scale X-axis", value=True)
+        log_y = gr.Checkbox(label="Log scale Y-axis", value=True)
     update_btn = gr.Button(value="Update", variant="primary")
     leaderboard = gr.Dataframe()
     plot = gr.Plot()
+    inputs = [search_bar, highlight_bar, sort_dropdown, x_axis, y_axis, benchmark_dropdown, log_x, log_y]
+    outputs = [leaderboard, plot]
+    app.load(update_leaderboard_and_plot, outputs=outputs)
+    search_bar.submit(update_leaderboard_and_plot, inputs=inputs, outputs=outputs)
+    highlight_bar.submit(update_leaderboard_and_plot, inputs=inputs, outputs=outputs)
+    sort_dropdown.change(update_leaderboard_and_plot, inputs=inputs, outputs=outputs)
+    x_axis.change(update_leaderboard_and_plot, inputs=inputs, outputs=outputs)
+    y_axis.change(update_leaderboard_and_plot, inputs=inputs, outputs=outputs)
+    benchmark_dropdown.change(update_leaderboard_and_plot, inputs=inputs, outputs=outputs)
+    log_x.change(update_leaderboard_and_plot, inputs=inputs, outputs=outputs)
+    log_y.change(update_leaderboard_and_plot, inputs=inputs, outputs=outputs)
+    update_btn.click(update_leaderboard_and_plot, inputs=inputs, outputs=outputs)
 app.launch()