File size: 25,662 Bytes
ddeb5d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2eb3b22
 
 
 
 
 
 
 
 
 
 
 
 
 
ddeb5d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
import gradio as gr
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from scipy.stats import zscore
import scipy.interpolate
import numpy as np
import plotly.figure_factory as ff

# Load dataset
df = pd.read_parquet("hf://datasets/derek-thomas/classification-ie-optimization/data/train-00000-of-00001.parquet")

# Image Mapping
df['image'] = df['image'].map({
    'michaelf34/infinity:0.0.75-trt-onnx': 'trt-onnx',
    'michaelf34/infinity:0.0.75': 'default'
})

best_config = df[['1B_cost', 'hw_type', 'image', 'batch_size', 'vus', ]].sort_values(by='1B_cost').head(n=1)
best_config['1B_cost'] = best_config['1B_cost'].round(2)

def plot_sanity_checks(df):
    return sum(df.total_requests - df.successful_requests)

def top_outliers(df):

    # Calculate absolute deviation from the mean
    df['absolute_deviation'] = abs(df['accuracy_percentage'] - df['accuracy_percentage'].mean())

    # Calculate z-score
    df['z_score'] = zscore(df['accuracy_percentage'])

    # Get the top 5 outliers with the highest absolute deviation
    top_outliers = df.nlargest(5, 'absolute_deviation')[['hw_type', 'batch_size', 'vus', 'total_requests', 'accuracy_percentage', 'absolute_deviation', 'z_score']]
    top_outliers['accuracy_percentage'] = top_outliers['accuracy_percentage'].round(2)
    top_outliers['absolute_deviation'] = top_outliers['absolute_deviation'].round(2)
    top_outliers['z_score'] = top_outliers['z_score'].round(2)
    return top_outliers

def best_image_by_cost_savings(df):
    # Identify rows where we have different images but the same VUs, HW Type, and batch_size
    grouped = df.groupby(["vus", "batch_size", "hw_type"])

    # Filter only those groups that have multiple unique images
    valid_groups = grouped.filter(lambda x: x["image"].nunique() > 1)

    # Compute cost differences between images within the same (VUs, batch_size, hw_type)
    def compute_best_image(group):
        """Find the best (cheapest) image in each (VUs, batch_size, hw_type) group."""
        group = group.sort_values("1B_cost", ascending=True)
        
        best_image = group.iloc[0]["image"] if not group.empty else None  # Safely get best image
        cost_max = group["1B_cost"].max()
        group["better_image"] = best_image
        group["cost_savings_percent"] = (
            100 * (cost_max - group["1B_cost"]) / cost_max if cost_max > 0 else 0
        )
        
        return group

    cost_diffs = valid_groups.groupby(["vus", "batch_size", "hw_type"]).apply(compute_best_image).reset_index(drop=True)

    # Create a consistent color mapping for images
    unique_images = cost_diffs["better_image"].unique()
    colors = px.colors.qualitative.Set1  # Use a predefined color set

    # Assign a color to each unique image
    color_map = {image: colors[i % len(colors)] for i, image in enumerate(unique_images)}

    # Create a separate **scatter plot** for each HW type
    figs = []
    for hw in cost_diffs["hw_type"].unique():
        subset = cost_diffs[cost_diffs["hw_type"] == hw]

        fig = px.scatter(
            subset,
            x="batch_size",
            y="vus",
            color="better_image",  # Color based on best image
            size="cost_savings_percent",  # Size based on % savings
            title=f"Best Image by Cost Savings - HW Type: {hw}<br><sup>Color = Best Image (Consistent). Size = Savings % of cheaper image</sup>",
            labels={"batch_size": "Batch Size (log)", "vus": "VUs (log)", "better_image": "Cheaper Image"},
            hover_data=["1B_cost", "cost_savings_percent"],
            log_x=True,  # Log scale for batch_size
            log_y=True,  # Log scale for VUs
            color_discrete_map=color_map,  # Keep the same color across charts
            category_orders={"better_image": sorted(unique_images)}  # Ensure consistent legend order

        )
        figs.append(fig)
    return figs


def plot_accuracy_distribution(df):
    mean_accuracy = df["accuracy_percentage"].mean()
    std_dev_accuracy = df["accuracy_percentage"].std()

    # Create a distribution plot (outline instead of bins)
    fig = ff.create_distplot([df['accuracy_percentage']], ['Accuracy Percentage'], show_hist=False, show_rug=True)

    # Add mean and standard deviation lines
    fig.add_trace(go.Scatter(x=[mean_accuracy, mean_accuracy], y=[0, 1],
                            mode="lines", name="Mean", line=dict(color="red", dash="dash")))

    fig.add_trace(go.Scatter(x=[mean_accuracy - std_dev_accuracy, mean_accuracy - std_dev_accuracy],
                            y=[0, 1],
                            mode="lines", name="Mean - 1 Std Dev", line=dict(color="blue", dash="dot")))

    fig.add_trace(go.Scatter(x=[mean_accuracy + std_dev_accuracy, mean_accuracy + std_dev_accuracy],
                            y=[0, 1],
                            mode="lines", name="Mean + 1 Std Dev", line=dict(color="blue", dash="dot")))

    # Update layout
    fig.update_layout(title="Density Plot of Accuracy Percentage",
                    xaxis_title="Accuracy Percentage",
                    yaxis_title="Density",
                    showlegend=True)
    return fig

def plot_cost_vs_latency(df):
        # Get the 100 lowest-cost points
    bottom_100 = df.nsmallest(100, "1B_cost").copy()
    bottom_100["1B_cost"] = bottom_100["1B_cost"].round(2)
    bottom_100["throughput_req_per_sec"] = bottom_100["throughput_req_per_sec"].round(2)
    bottom_100["avg_latency_ms"] = bottom_100["avg_latency_ms"].round(3)

    # Create a combined column for unique symbol assignment
    bottom_100["hw_image_combo"] = bottom_100["hw_type"] + " | " + bottom_100["image"]

    # Find the global minimum cost point *only within* the bottom 100 points
    global_min = bottom_100.nsmallest(1, "1B_cost")

    # Function to find Pareto-efficient points (minimizing cost and latency)
    def pareto_efficient(df, x_col, y_col):
        sorted_df = df.sort_values(by=[x_col, y_col])  # Sort by latency, then cost
        pareto_points = []
        min_cost = np.inf  # Start with a very high cost
        
        for _, row in sorted_df.iterrows():
            if row[y_col] < min_cost:  # If this cost is the lowest seen so far
                pareto_points.append(row)
                min_cost = row[y_col]
        
        return pd.DataFrame(pareto_points)

    # Compute Pareto front
    pareto_front = pareto_efficient(bottom_100, "avg_latency_ms", "1B_cost")

    # Base scatter plot (showing 100 lowest-cost points)
    fig = px.scatter(
        bottom_100, 
        x="avg_latency_ms", 
        y="1B_cost", 
        symbol="hw_image_combo",  # Use combined hw_type and image as symbol
        color="batch_size",  
        color_continuous_scale="viridis",  
        opacity=0.7, 
        title="1B Requests Cost/day vs. Latency<br><sup>Pareto-efficient points and global min highlighted</sup>",
        labels={
            "avg_latency_ms": "Average Latency (ms)",
            "1B_cost": "Daily Cost ($)",
            "hw_image_combo": "Hardware | Image",
            "batch_size": "Batch Size",
        },
        hover_data=["vus", "batch_size", "throughput_req_per_sec"]
    )

    # Add global minimum cost point (red star)
    fig.add_trace(
        go.Scatter(
            x=global_min["avg_latency_ms"],
            y=global_min["1B_cost"],
            mode="markers",
            marker=dict(size=12, color="red", symbol="star", line=dict(width=2, color="black")),
            name="Global Min Cost",
            hovertemplate="Latency: %{x} ms<br>Cost: $%{y}<br>Batch Size: %{text}<br>VUs: %{customdata[0]}<br>Throughput: %{customdata[1]} req/sec",
            text=global_min["batch_size"],
            customdata=global_min[["vus", "throughput_req_per_sec"]].values,
            showlegend=False
        )
    )

    # Add Pareto curve with detailed hover info
    fig.add_trace(
        go.Scatter(
            x=pareto_front["avg_latency_ms"],
            y=pareto_front["1B_cost"],
            mode="lines+markers",
            line=dict(color="red", width=2, dash="dash"),
            marker=dict(size=6, color="red"),
            name="Pareto Front",
            hovertemplate="Latency: %{x} ms<br>Cost: $%{y}<br>Batch Size: %{text}<br>VUs: %{customdata[0]}<br>Throughput: %{customdata[1]} req/sec",
            text=pareto_front["batch_size"],
            customdata=pareto_front[["vus", "throughput_req_per_sec"]].values,
            showlegend=False
        )
    )

    # Adjust layout (Center title)
    fig.update_layout(
        title_x=0.5,  # Centers title
        legend=dict(
            x=1,  
            y=1.2,  
            title="Hardware | Image"
        )
    );
    return fig

def plot_cost_vs_vus_batch(df, hw=None, img=None):
    # Define percentile bins for cost
    percentiles = np.linspace(0, 100, 40)  # Define bins using percentiles
    cost_bins = np.percentile(df["1B_cost"], percentiles)  # Compute cost bins

    # Create grid for interpolation (Now X = VUs, Y = batch_size)
    grid_x_real, grid_y_real = np.meshgrid(
        np.linspace(df["vus"].min(), df["vus"].max(), 100),  # X-axis is now VUs
        np.linspace(df["batch_size"].min(), df["batch_size"].max(), 100)  # Y-axis is now batch_size
    )

    # Interpolate cost data (Now X = VUs, Y = batch_size)
    grid_z_real = scipy.interpolate.griddata(
        (df["vus"], df["batch_size"]),  # Order remains the same
        df["1B_cost"],
        (grid_x_real, grid_y_real),  # Adjusted grid order
        method='linear'  # Linear interpolation for smoother transitions
    )

    # Identify the lowest cost point
    lowest_cost_points = df.nsmallest(1, "1B_cost")

    # Identify the minimum cost point for each batch size
    min_per_batch = df.loc[df.groupby("batch_size")["1B_cost"].idxmin()]

    # Create Plotly figure
    fig = go.Figure()

    # Contour plot with percentile-based cost bins
    fig.add_trace(
        go.Contour(
            z=grid_z_real,
            x=np.linspace(df["vus"].min(), df["vus"].max(), 100),  # X-axis is now VUs
            y=np.linspace(df["batch_size"].min(), df["batch_size"].max(), 100),  # Y-axis is now batch_size
            colorscale="viridis_r",
            contours=dict(
                start=cost_bins[0],
                end=cost_bins[-1],
                size=np.diff(cost_bins).mean(),  # Uses percentile bins
                showlabels=True
            ),
            colorbar=dict(title="Cost (1B Requests)"),
            hovertemplate="VUs: %{x}<br>Batch Size: %{y}<br>Cost: %{z}",
            opacity=0.8  # Reduce opacity to make scatter points stand out
        )
    )

    # Scatter plot of actual data points
    fig.add_trace(
        go.Scatter(
            x=df["vus"],  # X-axis is now VUs
            y=df["batch_size"],  # Y-axis is now batch_size
            mode="markers",
            marker=dict(size=3, color="white", line=dict(width=0.5, color="black")),
            name="Real Data Points",
            hovertemplate="VUs: %{x}<br>Batch Size: %{y}<br>Cost: %{text}",
            text=df["1B_cost"].round(2),
            showlegend=False
        )
    )

    # Scatter plot for lowest global cost points (Red Stars)
    fig.add_trace(
        go.Scatter(
            x=lowest_cost_points["vus"],
            y=lowest_cost_points["batch_size"],
            mode="markers+text",
            marker=dict(size=10, color="red", symbol="star", line=dict(width=1.5, color="black")),
            name="Lowest Cost Point",
            hovertemplate="VUs: %{x}<br>Batch Size: %{y}<br>Cost: %{text}",
            text=lowest_cost_points["1B_cost"].round(2),
            textposition="top center",
            showlegend=False
        )
    )

    # Scatter plot for minimum cost per batch size (Small Red Dots)
    fig.add_trace(
        go.Scatter(
            x=min_per_batch["vus"],
            y=min_per_batch["batch_size"],
            mode="markers",
            marker=dict(size=6, color="red", line=dict(width=0.5, color="black")),
            name="Min Cost per Batch Size",
            hovertemplate="VUs: %{x}<br>Batch Size: %{y}<br>Cost: %{text}",
            text=min_per_batch["1B_cost"].round(2),
            showlegend=False
        )
    )

    fig.update_layout(
        title=f"Cost vs VUs and Batch Size ({hw}, Image: {img})" if hw else 'Cost vs VUs and Batch Size',
        xaxis_title="VUs",
        yaxis_title="Batch Size",
        xaxis_type="log",
        yaxis_type="log"
    )

    text1 =  f"Contour Plot of Cost vs VUs and Batch Size ({hw}, Image: {img})<br><sup>The lowest cost size per batch is highlighted in red</sup>"
    text2 =  f"Contour Plot of Cost vs VUs and Batch Size<br><sup>The lowest cost size per batch is highlighted in red</sup>"
    fig.update_layout(
        title={
            "text": text1 if hw else text2,
            "x": 0.5,  # Center align
            "y": 0.85,  # Adjust vertical position
        }
    );

    return fig

monotonic_md = """## Did we try enough VUs?
How do we know that we tried enough VUs? What if we tried a higher amount of VUs and throughput kept increasing? If thats the case then we would see a monotonicly increasing relationship between VUs and Throughput and we would need to run more tests. Lets check this out!

We can check by:
1. **Grouping data** by `hw_type` and `batch_size` to match how we generated the experiments
1. **Sorting the data** by `vus` within each group to ensure we get the data in the correct order for our check
1. **Check for monotonic increase** in `throughput_req_per_sec` flag the groups that always increase throughput as VUs increase

But how do we **know**? We can use the slider to check what would have happened if we had not tried past a certain amount. Lets say that if we tried 256 instead of our actual 1024 we would have left some potential on the table, we can simulate this by filtering our runs. 

### Verification
1. Put the slider at `256` and see that there are a number of scenarios where we should have checked for a higher VU count
1. Put the slider at `1024 and verify that there are no scenarios shown
"""

def filter_dataframe(df, vus_filter):
    return df[df['vus'] <= vus_filter]

def get_monotonic_dataframe(df, vus_filter):
    df_filtered = filter_dataframe(df, vus_filter)
    grouped = df_filtered.groupby(['hw_type', 'image', 'batch_size'])
    
    monotonic_series = {}
    for (hw_type, image, batch_size), group in grouped:
        group_sorted = group.sort_values('vus').reset_index(drop=True)
        if group_sorted['throughput_req_per_sec'].is_monotonic_increasing:
            monotonic_series[(hw_type, image, batch_size)] = group_sorted[['vus', 'throughput_req_per_sec']]
    
    if not monotonic_series:
        return pd.DataFrame(columns=['hw_type', 'image', 'batch_size'])
    
    results_df = pd.DataFrame([(hw_type, image, batch_size) for (hw_type, image, batch_size) in monotonic_series.keys()],
                              columns=['hw_type', 'image', 'batch_size'])
    return results_df


enough_vus_md = """## Did we try enough VUs?
How do we know that we tried enough VUs? What if we tried a higher amount of VUs and throughput kept increasing? If thats the case then we would see a monotonicly increasing relationship between VUs and Throughput and we would need to run more tests. Lets check this out!

We can check by:
1. **Grouping data** by `hw_type` and `batch_size` to match how we generated the experiments
1. **Sorting the data** by `vus` within each group to ensure we get the data in the correct order for our check
1. **Check for monotonic increase** in `throughput_req_per_sec` flag the groups that always increase throughput as VUs increase

But how do we **know**? We can use the slider to check what would have happened if we had not tried past a certain amount. Lets say that if we tried 256 instead of our actual 1024 we would have left some potential on the table, we can simulate this by filtering our runs. 

### Verification
1. Put the slider at `256` and see that there are a number of scenarios where we should have checked for a higher VU count
1. Put the slider at `1024 and verify that there are no scenarios shown
"""

accuracy_md1 = """
## Are we Accurate Enough?
We shouldn't expect to see significant changes in accuracy. We should see a pretty tight distribution, but there might be some deviation since for lower VUs we wont have as many samples of our `10_000` that we saw with higher VUs.
"""
accuracy_md2 = """
Here we can see some deviation with a large z-score, but overall its not that big of an absolute devation. These also occur when we have relatively low `total_requests` which makes sense.

We should worry more if we see major `absolute_deviation` with higher `total_requests`. We can see those values here:
"""

best_image_by_cost_savings_md = """## Best Image by Cost Savings

### Chart
- Color = Best Image for that `vu`/`batch_size`/`GPU`
- Size = % cost savings vs. the worst (most expensive) image in that group.
- Small dots dont mean that much, large dots do


### Analysis
We can see that `trt-onnx` is quite a bit stronger in `nvidia-l4`. There are no significant red dots.

#### `nvidia-l4`
- `trt-onnx` (blue) dominates most points, indicating it's typically the cheaper choice
- At larger batch sizes (right side) and higher VUs (upper part of the chart), you often see big blue bubbles, suggesting `trt-onnx` can save a significant percentage versus `default`
- A few red points (i.e., `default` cheaper) appear at lower batch sizes, but they're less frequent and often show smaller savings differences

#### `nvidia-t4`
- There's more of a mix: some points favor `default` and others favor `trt-onnx`
- You can see some large red bubbles, meaning `default` can occasionally produce big savings under certain (VUs, batch_size) conditions
- However, `trt-onnx` is still cheaper in many scenarios, especially toward higher batch sizes

### Takeaways
If you have time/budget, its better to analyze both. You can see that they are close at times. But if you only have time/budget at the current cost ratio consider the `nvidia-l4` in this case.
"""

cost_vs_latency_md = """## 1B Requests Cost vs. Latency

This scatter plot visualizes the relationship between **average latency (ms)** and **cost per billion requests per day** for different **hardware types (hw_type)** and **image processing configurations (image)**.

### How to Read the Chart:
- **Point Symbols**: Represent different **hardware + image** configurations.
- **Color Gradient**: Represents batch size, helping to see cost trends across different batch sizes.
- **Hover Data**: Displays additional details like **VUs, batch size, and throughput per second**.

### Key Features:
- **Global Minimum Cost (Red Star)**: Marks the configuration with the lowest cost.
- **Pareto Front (Red Dashed Line + Points)**: Highlights the most efficient configurations, minimizing both cost and latency. These configurations offer the best trade-offs.

### How to Use:
- Find the **lowest-cost, low-latency configurations** by looking at points near the bottom-left.
- Use the **Pareto front** to identify cost-effective configurations.
- Compare different **hardware and image processing strategies** to optimize your setup.

This visualization helps in selecting the best configuration balancing **performance (low latency)** and **cost efficiency**.
"""

contour_md = """## Cost vs VUs and Batch Size Contour Plots

These contour plots visualize the cost per billion tokens per day (`1B_cost`) as a function of **VUs (Virtual Users)** and **Batch Size** for different hardware configurations (`hw_type`) and image types (`image`). 
There are real points, but in-between I 

### How to Read the Charts:
- **Color Gradient**: Shows the cost levels, with darker colors representing higher costs and lighter colors representing lower costs.
- **Contour Lines**: Represent cost levels, helping identify cost-effective regions.
- **White Dots**: Represent real data points used to generate the interpolated surface.
- **Red Stars**: Highlight the lowest cost point in the dataset.
- **Small Red Dots**: Indicate the lowest cost for each batch size.
- **Tight clusters**: (of contour lines) indicate costs changing rapidly with small adjustments to batch size or VUs.

### How to Use:
- Identify the **lowest cost configurations** (red stars and dots).
- Observe how **cost changes** with batch size and VUs to optimize your setup.
- Compare different hardware types (`hw_type`) and image processing strategies (`image`) to find the best-performing configuration.

### Analysis
Overall we can see that `nvidia-t4`s are more expensive for this cost-ratio and task. We should consider using the `nvidia-l4`.

| GPU       | Image      | Batch Size | VUs | Min Cost |
|-----------|------------|------------|-----|----------|
| nvidia-t4 | `trt-onnx` | 512        | 48  | $611.07  |
| nvidia-t4 | `default`  | 32         | 32  | $622.81  |
| nvidia-l4 | `trt-onnx` | 64         | 448 | $255.07  |
| nvidia-l4 | `default`  | 64         | 448 | $253.82  |

We can see a clear winner with `nvidia-l4` over `nvidia-t4` at this cost ratio. But surprisingly we see `default` slightly outperform `trt-onnx`.
I think we should be careful not to overfit. These numbers can vary per run, but its good to know that each image can be competitive.

#### `nvidia-t4`
- Here we can see that `trt-onnx` and `default` both perform similarly but with `trt-onnx` having a slight edge.
- `trt-onnx` has a lower overall cost band (~611–659) than `default` (~623–676)

#### `nvidia-l4`
- `trt-onnx` has a broad area of relatively low cost and hits a very low floor (~255)
    - This is great since it shows that we get consistently good results!
- `default` can also dip into the mid‐200s in certain spots, but it has bigger, more expensive areas—especially at lower VUs and batch sizes.
    - This means we need to spend time to optimize it

### Conclusion
If I have time, I might analyze the `nvidia-l4` with `trt-onnx` across some different runs. Despite being `$1.25` more expensive per 1B requests its a safer more consistent bet IMO. 
"""

with gr.Blocks() as demo:
    with gr.Sidebar():
        gr.Markdown("""
    # Classification Optimization
    
    ## Sanity Check Charts:
    - **No Failed Requests**: Verify that all requests were successful.
    - **Monotonic Series**: Ensure that we tried enough VUs
    - **Accuracy Distribution**: Evaluate the consistency of accuracy across runs.
    ## Cost Analysis Charts
    - **Best Image by Cost Savings**: Identify the best image based on cost savings
    - **Cost vs Latency**: Identify optimal configurations balancing cost and latency.
    - **Cost vs VUs & Batch**: Analyze cost trends based on VUs and batch size.
    """)
        gr.Markdown("## Best Config")
        gr.HTML(best_config.transpose().to_html(header=False))
    with gr.Tab("Cost Analysis"):
        with gr.Tab("Cost vs VUs & Batch"):
            gr.Markdown(contour_md)
            for hw in df["hw_type"].unique():
                for img in df["image"].unique():
                    df_hw_img = df[(df["hw_type"] == hw) & (df["image"] == img) & (df["vus"] > 20)].copy()
                    gr.Plot(plot_cost_vs_vus_batch(df_hw_img, hw=hw, img=img))        
        with gr.Tab("Best Image by Cost Savings"):
            gr.Markdown(best_image_by_cost_savings_md)
            for fig in best_image_by_cost_savings(df):
                gr.Plot(fig)
        with gr.Tab("Cost vs Latency"):
            gr.Markdown(cost_vs_latency_md)
            gr.Plot(plot_cost_vs_latency(df))
    with gr.Tab("Sanity Checks"):

        with gr.Tab("Failed Requests"):
            gr.Markdown("### Failed Requests Check\nIf all requests were successful, the result should be 0.")
            gr.Text(value=str(plot_sanity_checks(df)), interactive=False)

        with gr.Tab("Monotonic Series"):
            gr.Markdown(enough_vus_md)
            vus_slider = gr.Slider(minimum=0, maximum=df['vus'].max(), value=1024, label="VUs Filter")


            @gr.render(inputs=vus_slider)
            def plot_monotonic_series(vus_filter):
                gr.Markdown("### Monotonic Series Dataframe")
                gr.Dataframe(value=get_monotonic_dataframe(df, vus_filter))
                df_filtered = filter_dataframe(df, vus_filter)
                grouped = df_filtered.groupby(['hw_type', 'image', 'batch_size'])

                monotonic_series = {}
                for (hw_type, image, batch_size), group in grouped:
                    group_sorted = group.sort_values('vus').reset_index(drop=True)
                    if group_sorted['throughput_req_per_sec'].is_monotonic_increasing:
                        monotonic_series[(hw_type, image, batch_size)] = group_sorted[['vus', 'throughput_req_per_sec']]

                if not monotonic_series:
                    gr.Markdown("### No monotonically increasing series found.")
                else:
                    gr.Markdown("### Plots of Monotonic Series")
                    for (hw_type, image, batch_size), data in monotonic_series.items():
                        fig = px.line(data, x='vus', y='throughput_req_per_sec', markers=True,
                                    title=f'Throughput Trend for HW: {hw_type}, Image: {image}, Batch: {batch_size}')
                        gr.Plot(fig)

        with gr.Tab("Accuracy Distribution"):
            gr.Markdown(accuracy_md1)
            gr.Plot(plot_accuracy_distribution(df))
            gr.Markdown(accuracy_md2)
            gr.Dataframe(top_outliers(df))

demo.launch()