derek-thomas commited on
Commit
ddeb5d0
·
verified ·
1 Parent(s): 2111b55

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +542 -0
app.py ADDED
@@ -0,0 +1,542 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import plotly.graph_objects as go
5
+ from scipy.stats import zscore
6
+ import scipy.interpolate
7
+ import numpy as np
8
+ import plotly.figure_factory as ff
9
+
10
+ # Load dataset
11
+ df = pd.read_parquet("hf://datasets/derek-thomas/classification-ie-optimization/data/train-00000-of-00001.parquet")
12
+
13
+ # Image Mapping
14
+ df['image'] = df['image'].map({
15
+ 'michaelf34/infinity:0.0.75-trt-onnx': 'trt-onnx',
16
+ 'michaelf34/infinity:0.0.75': 'default'
17
+ })
18
+
19
+ best_config = df[['1B_cost', 'hw_type', 'image', 'batch_size', 'vus', ]].sort_values(by='1B_cost').head(n=1)
20
+ best_config['1B_cost'] = best_config['1B_cost'].round(2)
21
+
22
+ def plot_sanity_checks(df):
23
+ return sum(df.total_requests - df.successful_requests)
24
+
25
+ def top_outliers(df):
26
+
27
+ # Calculate absolute deviation from the mean
28
+ df['absolute_deviation'] = abs(df['accuracy_percentage'] - df['accuracy_percentage'].mean())
29
+
30
+ # Calculate z-score
31
+ df['z_score'] = zscore(df['accuracy_percentage'])
32
+
33
+ # Get the top 5 outliers with the highest absolute deviation
34
+ top_outliers = df.nlargest(5, 'absolute_deviation')[['hw_type', 'batch_size', 'vus', 'total_requests', 'accuracy_percentage', 'absolute_deviation', 'z_score']]
35
+ top_outliers['accuracy_percentage'] = top_outliers['accuracy_percentage'].round(2)
36
+ top_outliers['absolute_deviation'] = top_outliers['absolute_deviation'].round(2)
37
+ top_outliers['z_score'] = top_outliers['z_score'].round(2)
38
+ return top_outliers
39
+
40
+ def best_image_by_cost_savings(df):
41
+ # Identify rows where we have different images but the same VUs, HW Type, and batch_size
42
+ grouped = df.groupby(["vus", "batch_size", "hw_type"])
43
+
44
+ # Filter only those groups that have multiple unique images
45
+ valid_groups = grouped.filter(lambda x: x["image"].nunique() > 1)
46
+
47
+ # Compute cost differences between images within the same (VUs, batch_size, hw_type)
48
+ def compute_best_image(group):
49
+ """Find the best (cheapest) image in each (VUs, batch_size, hw_type) group."""
50
+ group = group.sort_values("1B_cost", ascending=True)
51
+
52
+ best_image = group.iloc[0]["image"] if not group.empty else None # Safely get best image
53
+ cost_max = group["1B_cost"].max()
54
+ group["better_image"] = best_image
55
+ group["cost_savings_percent"] = (
56
+ 100 * (cost_max - group["1B_cost"]) / cost_max if cost_max > 0 else 0
57
+ )
58
+
59
+ return group
60
+
61
+ cost_diffs = valid_groups.groupby(["vus", "batch_size", "hw_type"]).apply(compute_best_image).reset_index(drop=True)
62
+
63
+ # Create a consistent color mapping for images
64
+ unique_images = cost_diffs["better_image"].unique()
65
+ colors = px.colors.qualitative.Set1 # Use a predefined color set
66
+
67
+ # Assign a color to each unique image
68
+ color_map = {image: colors[i % len(colors)] for i, image in enumerate(unique_images)}
69
+
70
+ # Create a separate **scatter plot** for each HW type
71
+ figs = []
72
+ for hw in cost_diffs["hw_type"].unique():
73
+ subset = cost_diffs[cost_diffs["hw_type"] == hw]
74
+
75
+ fig = px.scatter(
76
+ subset,
77
+ x="batch_size",
78
+ y="vus",
79
+ color="better_image", # Color based on best image
80
+ size="cost_savings_percent", # Size based on % savings
81
+ title=f"Best Image by Cost Savings - HW Type: {hw}<br><sup>Color = Best Image (Consistent). Size = Savings % of cheaper image</sup>",
82
+ labels={"batch_size": "Batch Size (log)", "vus": "VUs (log)", "better_image": "Cheaper Image"},
83
+ hover_data=["1B_cost", "cost_savings_percent"],
84
+ log_x=True, # Log scale for batch_size
85
+ log_y=True, # Log scale for VUs
86
+ color_discrete_map=color_map, # Keep the same color across charts
87
+ category_orders={"better_image": sorted(unique_images)} # Ensure consistent legend order
88
+
89
+ )
90
+ figs.append(fig)
91
+ return figs
92
+
93
+
94
+ def plot_accuracy_distribution(df):
95
+ mean_accuracy = df["accuracy_percentage"].mean()
96
+ std_dev_accuracy = df["accuracy_percentage"].std()
97
+
98
+ # Create a distribution plot (outline instead of bins)
99
+ fig = ff.create_distplot([df['accuracy_percentage']], ['Accuracy Percentage'], show_hist=False, show_rug=True)
100
+
101
+ # Add mean and standard deviation lines
102
+ fig.add_trace(go.Scatter(x=[mean_accuracy, mean_accuracy], y=[0, 1],
103
+ mode="lines", name="Mean", line=dict(color="red", dash="dash")))
104
+
105
+ fig.add_trace(go.Scatter(x=[mean_accuracy - std_dev_accuracy, mean_accuracy - std_dev_accuracy],
106
+ y=[0, 1],
107
+ mode="lines", name="Mean - 1 Std Dev", line=dict(color="blue", dash="dot")))
108
+
109
+ fig.add_trace(go.Scatter(x=[mean_accuracy + std_dev_accuracy, mean_accuracy + std_dev_accuracy],
110
+ y=[0, 1],
111
+ mode="lines", name="Mean + 1 Std Dev", line=dict(color="blue", dash="dot")))
112
+
113
+ # Update layout
114
+ fig.update_layout(title="Density Plot of Accuracy Percentage",
115
+ xaxis_title="Accuracy Percentage",
116
+ yaxis_title="Density",
117
+ showlegend=True)
118
+ return fig
119
+
120
+ def plot_cost_vs_latency(df):
121
+ # Get the 100 lowest-cost points
122
+ bottom_100 = df.nsmallest(100, "1B_cost").copy()
123
+ bottom_100["1B_cost"] = bottom_100["1B_cost"].round(2)
124
+ bottom_100["throughput_req_per_sec"] = bottom_100["throughput_req_per_sec"].round(2)
125
+ bottom_100["avg_latency_ms"] = bottom_100["avg_latency_ms"].round(3)
126
+
127
+ # Create a combined column for unique symbol assignment
128
+ bottom_100["hw_image_combo"] = bottom_100["hw_type"] + " | " + bottom_100["image"]
129
+
130
+ # Find the global minimum cost point *only within* the bottom 100 points
131
+ global_min = bottom_100.nsmallest(1, "1B_cost")
132
+
133
+ # Function to find Pareto-efficient points (minimizing cost and latency)
134
+ def pareto_efficient(df, x_col, y_col):
135
+ sorted_df = df.sort_values(by=[x_col, y_col]) # Sort by latency, then cost
136
+ pareto_points = []
137
+ min_cost = np.inf # Start with a very high cost
138
+
139
+ for _, row in sorted_df.iterrows():
140
+ if row[y_col] < min_cost: # If this cost is the lowest seen so far
141
+ pareto_points.append(row)
142
+ min_cost = row[y_col]
143
+
144
+ return pd.DataFrame(pareto_points)
145
+
146
+ # Compute Pareto front
147
+ pareto_front = pareto_efficient(bottom_100, "avg_latency_ms", "1B_cost")
148
+
149
+ # Base scatter plot (showing 100 lowest-cost points)
150
+ fig = px.scatter(
151
+ bottom_100,
152
+ x="avg_latency_ms",
153
+ y="1B_cost",
154
+ symbol="hw_image_combo", # Use combined hw_type and image as symbol
155
+ color="batch_size",
156
+ color_continuous_scale="viridis",
157
+ opacity=0.7,
158
+ title="1B Requests Cost/day vs. Latency<br><sup>Pareto-efficient points and global min highlighted</sup>",
159
+ labels={
160
+ "avg_latency_ms": "Average Latency (ms)",
161
+ "1B_cost": "Daily Cost ($)",
162
+ "hw_image_combo": "Hardware | Image",
163
+ "batch_size": "Batch Size",
164
+ },
165
+ hover_data=["vus", "batch_size", "throughput_req_per_sec"]
166
+ )
167
+
168
+ # Add global minimum cost point (red star)
169
+ fig.add_trace(
170
+ go.Scatter(
171
+ x=global_min["avg_latency_ms"],
172
+ y=global_min["1B_cost"],
173
+ mode="markers",
174
+ marker=dict(size=12, color="red", symbol="star", line=dict(width=2, color="black")),
175
+ name="Global Min Cost",
176
+ hovertemplate="Latency: %{x} ms<br>Cost: $%{y}<br>Batch Size: %{text}<br>VUs: %{customdata[0]}<br>Throughput: %{customdata[1]} req/sec",
177
+ text=global_min["batch_size"],
178
+ customdata=global_min[["vus", "throughput_req_per_sec"]].values,
179
+ showlegend=False
180
+ )
181
+ )
182
+
183
+ # Add Pareto curve with detailed hover info
184
+ fig.add_trace(
185
+ go.Scatter(
186
+ x=pareto_front["avg_latency_ms"],
187
+ y=pareto_front["1B_cost"],
188
+ mode="lines+markers",
189
+ line=dict(color="red", width=2, dash="dash"),
190
+ marker=dict(size=6, color="red"),
191
+ name="Pareto Front",
192
+ hovertemplate="Latency: %{x} ms<br>Cost: $%{y}<br>Batch Size: %{text}<br>VUs: %{customdata[0]}<br>Throughput: %{customdata[1]} req/sec",
193
+ text=pareto_front["batch_size"],
194
+ customdata=pareto_front[["vus", "throughput_req_per_sec"]].values,
195
+ showlegend=False
196
+ )
197
+ )
198
+
199
+ # Adjust layout (Center title)
200
+ fig.update_layout(
201
+ title_x=0.5, # Centers title
202
+ legend=dict(
203
+ x=1,
204
+ y=1.2,
205
+ title="Hardware | Image"
206
+ )
207
+ );
208
+ return fig
209
+
210
+ def plot_cost_vs_vus_batch(df, hw=None, img=None):
211
+ # Define percentile bins for cost
212
+ percentiles = np.linspace(0, 100, 40) # Define bins using percentiles
213
+ cost_bins = np.percentile(df["1B_cost"], percentiles) # Compute cost bins
214
+
215
+ # Create grid for interpolation (Now X = VUs, Y = batch_size)
216
+ grid_x_real, grid_y_real = np.meshgrid(
217
+ np.linspace(df["vus"].min(), df["vus"].max(), 100), # X-axis is now VUs
218
+ np.linspace(df["batch_size"].min(), df["batch_size"].max(), 100) # Y-axis is now batch_size
219
+ )
220
+
221
+ # Interpolate cost data (Now X = VUs, Y = batch_size)
222
+ grid_z_real = scipy.interpolate.griddata(
223
+ (df["vus"], df["batch_size"]), # Order remains the same
224
+ df["1B_cost"],
225
+ (grid_x_real, grid_y_real), # Adjusted grid order
226
+ method='linear' # Linear interpolation for smoother transitions
227
+ )
228
+
229
+ # Identify the lowest cost point
230
+ lowest_cost_points = df.nsmallest(1, "1B_cost")
231
+
232
+ # Identify the minimum cost point for each batch size
233
+ min_per_batch = df.loc[df.groupby("batch_size")["1B_cost"].idxmin()]
234
+
235
+ # Create Plotly figure
236
+ fig = go.Figure()
237
+
238
+ # Contour plot with percentile-based cost bins
239
+ fig.add_trace(
240
+ go.Contour(
241
+ z=grid_z_real,
242
+ x=np.linspace(df["vus"].min(), df["vus"].max(), 100), # X-axis is now VUs
243
+ y=np.linspace(df["batch_size"].min(), df["batch_size"].max(), 100), # Y-axis is now batch_size
244
+ colorscale="viridis_r",
245
+ contours=dict(
246
+ start=cost_bins[0],
247
+ end=cost_bins[-1],
248
+ size=np.diff(cost_bins).mean(), # Uses percentile bins
249
+ showlabels=True
250
+ ),
251
+ colorbar=dict(title="Cost (1B Requests)"),
252
+ hovertemplate="VUs: %{x}<br>Batch Size: %{y}<br>Cost: %{z}",
253
+ opacity=0.8 # Reduce opacity to make scatter points stand out
254
+ )
255
+ )
256
+
257
+ # Scatter plot of actual data points
258
+ fig.add_trace(
259
+ go.Scatter(
260
+ x=df["vus"], # X-axis is now VUs
261
+ y=df["batch_size"], # Y-axis is now batch_size
262
+ mode="markers",
263
+ marker=dict(size=3, color="white", line=dict(width=0.5, color="black")),
264
+ name="Real Data Points",
265
+ hovertemplate="VUs: %{x}<br>Batch Size: %{y}<br>Cost: %{text}",
266
+ text=df["1B_cost"].round(2),
267
+ showlegend=False
268
+ )
269
+ )
270
+
271
+ # Scatter plot for lowest global cost points (Red Stars)
272
+ fig.add_trace(
273
+ go.Scatter(
274
+ x=lowest_cost_points["vus"],
275
+ y=lowest_cost_points["batch_size"],
276
+ mode="markers+text",
277
+ marker=dict(size=10, color="red", symbol="star", line=dict(width=1.5, color="black")),
278
+ name="Lowest Cost Point",
279
+ hovertemplate="VUs: %{x}<br>Batch Size: %{y}<br>Cost: %{text}",
280
+ text=lowest_cost_points["1B_cost"].round(2),
281
+ textposition="top center",
282
+ showlegend=False
283
+ )
284
+ )
285
+
286
+ # Scatter plot for minimum cost per batch size (Small Red Dots)
287
+ fig.add_trace(
288
+ go.Scatter(
289
+ x=min_per_batch["vus"],
290
+ y=min_per_batch["batch_size"],
291
+ mode="markers",
292
+ marker=dict(size=6, color="red", line=dict(width=0.5, color="black")),
293
+ name="Min Cost per Batch Size",
294
+ hovertemplate="VUs: %{x}<br>Batch Size: %{y}<br>Cost: %{text}",
295
+ text=min_per_batch["1B_cost"].round(2),
296
+ showlegend=False
297
+ )
298
+ )
299
+
300
+ fig.update_layout(
301
+ title=f"Cost vs VUs and Batch Size ({hw}, Image: {img})" if hw else 'Cost vs VUs and Batch Size',
302
+ xaxis_title="VUs",
303
+ yaxis_title="Batch Size",
304
+ xaxis_type="log",
305
+ yaxis_type="log"
306
+ )
307
+
308
+ text1 = f"Contour Plot of Cost vs VUs and Batch Size ({hw}, Image: {img})<br><sup>The lowest cost size per batch is highlighted in red</sup>"
309
+ text2 = f"Contour Plot of Cost vs VUs and Batch Size<br><sup>The lowest cost size per batch is highlighted in red</sup>"
310
+ fig.update_layout(
311
+ title={
312
+ "text": text1 if hw else text2,
313
+ "x": 0.5, # Center align
314
+ "y": 0.85, # Adjust vertical position
315
+ }
316
+ );
317
+
318
+ return fig
319
+
320
+ monotonic_md = """## Did we try enough VUs?
321
+ How do we know that we tried enough VUs? What if we tried a higher amount of VUs and throughput kept increasing? If thats the case then we would see a monotonicly increasing relationship between VUs and Throughput and we would need to run more tests. Lets check this out!
322
+
323
+ We can check by:
324
+ 1. **Grouping data** by `hw_type` and `batch_size` to match how we generated the experiments
325
+ 1. **Sorting the data** by `vus` within each group to ensure we get the data in the correct order for our check
326
+ 1. **Check for monotonic increase** in `throughput_req_per_sec` flag the groups that always increase throughput as VUs increase
327
+
328
+ But how do we **know**? We can use the slider to check what would have happened if we had not tried past a certain amount. Lets say that if we tried 256 instead of our actual 1024 we would have left some potential on the table, we can simulate this by filtering our runs.
329
+
330
+ ### Verification
331
+ 1. Put the slider at `256` and see that there are a number of scenarios where we should have checked for a higher VU count
332
+ 1. Put the slider at `1024 and verify that there are no scenarios shown
333
+ """
334
+
335
+ def filter_dataframe(df, vus_filter):
336
+ return df[df['vus'] <= vus_filter]
337
+
338
+ def get_monotonic_dataframe(df, vus_filter):
339
+ df_filtered = filter_dataframe(df, vus_filter)
340
+ grouped = df_filtered.groupby(['hw_type', 'image', 'batch_size'])
341
+
342
+ monotonic_series = {}
343
+ for (hw_type, image, batch_size), group in grouped:
344
+ group_sorted = group.sort_values('vus').reset_index(drop=True)
345
+ if group_sorted['throughput_req_per_sec'].is_monotonic_increasing:
346
+ monotonic_series[(hw_type, image, batch_size)] = group_sorted[['vus', 'throughput_req_per_sec']]
347
+
348
+ if not monotonic_series:
349
+ return pd.DataFrame(columns=['hw_type', 'image', 'batch_size'])
350
+
351
+ results_df = pd.DataFrame([(hw_type, image, batch_size) for (hw_type, image, batch_size) in monotonic_series.keys()],
352
+ columns=['hw_type', 'image', 'batch_size'])
353
+ return results_df
354
+
355
+
356
+ enough_vus_md = """## Did we try enough VUs?
357
+ How do we know that we tried enough VUs? What if we tried a higher amount of VUs and throughput kept increasing? If thats the case then we would see a monotonicly increasing relationship between VUs and Throughput and we would need to run more tests. Lets check this out!
358
+
359
+ We can check by:
360
+ 1. **Grouping data** by `hw_type` and `batch_size` to match how we generated the experiments
361
+ 1. **Sorting the data** by `vus` within each group to ensure we get the data in the correct order for our check
362
+ 1. **Check for monotonic increase** in `throughput_req_per_sec` flag the groups that always increase throughput as VUs increase
363
+
364
+ But how do we **know**? We can use the slider to check what would have happened if we had not tried past a certain amount. Lets say that if we tried 256 instead of our actual 1024 we would have left some potential on the table, we can simulate this by filtering our runs.
365
+
366
+ ### Verification
367
+ 1. Put the slider at `256` and see that there are a number of scenarios where we should have checked for a higher VU count
368
+ 1. Put the slider at `1024 and verify that there are no scenarios shown
369
+ """
370
+
371
+ accuracy_md1 = """
372
+ ## Are we Accurate Enough?
373
+ We shouldn't expect to see significant changes in accuracy. We should see a pretty tight distribution, but there might be some deviation since for lower VUs we wont have as many samples of our `10_000` that we saw with higher VUs.
374
+ """
375
+ accuracy_md2 = """
376
+ Here we can see some deviation with a large z-score, but overall its not that big of an absolute devation. These also occur when we have relatively low `total_requests` which makes sense.
377
+
378
+ We should worry more if we see major `absolute_deviation` with higher `total_requests`. We can see those values here:
379
+ """
380
+
381
+ best_image_by_cost_savings_md = """## Best Image by Cost Savings
382
+
383
+ ### Chart
384
+ - Color = Best Image for that `vu`/`batch_size`/`GPU`
385
+ - Size = % cost savings vs. the worst (most expensive) image in that group.
386
+ - Small dots dont mean that much, large dots do
387
+
388
+
389
+ ### Analysis
390
+ We can see that `trt-onnx` is quite a bit stronger in `nvidia-l4`. There are no significant red dots.
391
+
392
+ #### `nvidia-l4`
393
+ - `trt-onnx` (blue) dominates most points, indicating it's typically the cheaper choice
394
+ - At larger batch sizes (right side) and higher VUs (upper part of the chart), you often see big blue bubbles, suggesting `trt-onnx` can save a significant percentage versus `default`
395
+ - A few red points (i.e., `default` cheaper) appear at lower batch sizes, but they're less frequent and often show smaller savings differences
396
+
397
+ #### `nvidia-t4`
398
+ - There's more of a mix: some points favor `default` and others favor `trt-onnx`
399
+ - You can see some large red bubbles, meaning `default` can occasionally produce big savings under certain (VUs, batch_size) conditions
400
+ - However, `trt-onnx` is still cheaper in many scenarios, especially toward higher batch sizes
401
+
402
+ ### Takeaways
403
+ If you have time/budget, its better to analyze both. You can see that they are close at times. But if you only have time/budget at the current cost ratio consider the `nvidia-l4` in this case.
404
+ """
405
+
406
+ cost_vs_latency_md = """## 1B Requests Cost vs. Latency
407
+
408
+ This scatter plot visualizes the relationship between **average latency (ms)** and **cost per billion requests per day** for different **hardware types (hw_type)** and **image processing configurations (image)**.
409
+
410
+ ### How to Read the Chart:
411
+ - **Point Symbols**: Represent different **hardware + image** configurations.
412
+ - **Color Gradient**: Represents batch size, helping to see cost trends across different batch sizes.
413
+ - **Hover Data**: Displays additional details like **VUs, batch size, and throughput per second**.
414
+
415
+ ### Key Features:
416
+ - **Global Minimum Cost (Red Star)**: Marks the configuration with the lowest cost.
417
+ - **Pareto Front (Red Dashed Line + Points)**: Highlights the most efficient configurations, minimizing both cost and latency. These configurations offer the best trade-offs.
418
+
419
+ ### How to Use:
420
+ - Find the **lowest-cost, low-latency configurations** by looking at points near the bottom-left.
421
+ - Use the **Pareto front** to identify cost-effective configurations.
422
+ - Compare different **hardware and image processing strategies** to optimize your setup.
423
+
424
+ This visualization helps in selecting the best configuration balancing **performance (low latency)** and **cost efficiency**.
425
+ """
426
+
427
+ contour_md = """## Cost vs VUs and Batch Size Contour Plots
428
+
429
+ These contour plots visualize the cost per billion tokens per day (`1B_cost`) as a function of **VUs (Virtual Users)** and **Batch Size** for different hardware configurations (`hw_type`) and image types (`image`).
430
+ There are real points, but in-between I
431
+
432
+ ### How to Read the Charts:
433
+ - **Color Gradient**: Shows the cost levels, with darker colors representing higher costs and lighter colors representing lower costs.
434
+ - **Contour Lines**: Represent cost levels, helping identify cost-effective regions.
435
+ - **White Dots**: Represent real data points used to generate the interpolated surface.
436
+ - **Red Stars**: Highlight the lowest cost point in the dataset.
437
+ - **Small Red Dots**: Indicate the lowest cost for each batch size.
438
+ - **Tight clusters**: (of contour lines) indicate costs changing rapidly with small adjustments to batch size or VUs.
439
+
440
+ ### How to Use:
441
+ - Identify the **lowest cost configurations** (red stars and dots).
442
+ - Observe how **cost changes** with batch size and VUs to optimize your setup.
443
+ - Compare different hardware types (`hw_type`) and image processing strategies (`image`) to find the best-performing configuration.
444
+
445
+ ### Analysis
446
+ Overall we can see that `nvidia-t4`s are more expensive for this cost-ratio and task. We should consider using the `nvidia-l4`.
447
+
448
+ | GPU | Image | Batch Size | VUs | Min Cost |
449
+ |-----------|------------|------------|-----|----------|
450
+ | nvidia-t4 | `trt-onnx` | 512 | 48 | $611.07 |
451
+ | nvidia-t4 | `default` | 32 | 32 | $622.81 |
452
+ | nvidia-l4 | `trt-onnx` | 64 | 448 | $255.07 |
453
+ | nvidia-l4 | `default` | 64 | 448 | $253.82 |
454
+
455
+ We can see a clear winner with `nvidia-l4` over `nvidia-t4` at this cost ratio. But surprisingly we see `default` slightly outperform `trt-onnx`.
456
+ I think we should be careful not to overfit. These numbers can vary per run, but its good to know that each image can be competitive.
457
+
458
+ #### `nvidia-t4`
459
+ - Here we can see that `trt-onnx` and `default` both perform similarly but with `trt-onnx` having a slight edge.
460
+ - `trt-onnx` has a lower overall cost band (~611–659) than `default` (~623–676)
461
+
462
+ #### `nvidia-l4`
463
+ - `trt-onnx` has a broad area of relatively low cost and hits a very low floor (~255)
464
+ - This is great since it shows that we get consistently good results!
465
+ - `default` can also dip into the mid‐200s in certain spots, but it has bigger, more expensive areas—especially at lower VUs and batch sizes.
466
+ - This means we need to spend time to optimize it
467
+
468
+ ### Conclusion
469
+ If I have time, I might analyze the `nvidia-l4` with `trt-onnx` across some different runs. Despite being `$1.25` more expensive per 1B requests its a safer more consistent bet IMO.
470
+ """
471
+
472
+ with gr.Blocks() as demo:
473
+ with gr.Sidebar():
474
+ gr.Markdown("""
475
+ # Classification Optimization
476
+
477
+ ## Sanity Check Charts:
478
+ - **No Failed Requests**: Verify that all requests were successful.
479
+ - **Monotonic Series**: Ensure that we tried enough VUs
480
+ - **Accuracy Distribution**: Evaluate the consistency of accuracy across runs.
481
+ ## Cost Analysis Charts
482
+ - **Best Image by Cost Savings**: Identify the best image based on cost savings
483
+ - **Cost vs Latency**: Identify optimal configurations balancing cost and latency.
484
+ - **Cost vs VUs & Batch**: Analyze cost trends based on VUs and batch size.
485
+ """)
486
+ gr.Markdown("## Best Config")
487
+ gr.HTML(best_config.transpose().to_html(header=False))
488
+ with gr.Tab("Sanity Checks"):
489
+
490
+ with gr.Tab("Failed Requests"):
491
+ gr.Markdown("### Failed Requests Check\nIf all requests were successful, the result should be 0.")
492
+ gr.Text(value=str(plot_sanity_checks(df)), interactive=False)
493
+
494
+ with gr.Tab("Monotonic Series"):
495
+ gr.Markdown(enough_vus_md)
496
+ vus_slider = gr.Slider(minimum=0, maximum=df['vus'].max(), value=1024, label="VUs Filter")
497
+
498
+
499
+ @gr.render(inputs=vus_slider)
500
+ def plot_monotonic_series(vus_filter):
501
+ gr.Markdown("### Monotonic Series Dataframe")
502
+ gr.Dataframe(value=get_monotonic_dataframe(df, vus_filter))
503
+ df_filtered = filter_dataframe(df, vus_filter)
504
+ grouped = df_filtered.groupby(['hw_type', 'image', 'batch_size'])
505
+
506
+ monotonic_series = {}
507
+ for (hw_type, image, batch_size), group in grouped:
508
+ group_sorted = group.sort_values('vus').reset_index(drop=True)
509
+ if group_sorted['throughput_req_per_sec'].is_monotonic_increasing:
510
+ monotonic_series[(hw_type, image, batch_size)] = group_sorted[['vus', 'throughput_req_per_sec']]
511
+
512
+ if not monotonic_series:
513
+ gr.Markdown("### No monotonically increasing series found.")
514
+ else:
515
+ gr.Markdown("### Plots of Monotonic Series")
516
+ for (hw_type, image, batch_size), data in monotonic_series.items():
517
+ fig = px.line(data, x='vus', y='throughput_req_per_sec', markers=True,
518
+ title=f'Throughput Trend for HW: {hw_type}, Image: {image}, Batch: {batch_size}')
519
+ gr.Plot(fig)
520
+
521
+ with gr.Tab("Accuracy Distribution"):
522
+ gr.Markdown(accuracy_md1)
523
+ gr.Plot(plot_accuracy_distribution(df))
524
+ gr.Markdown(accuracy_md2)
525
+ gr.Dataframe(top_outliers(df))
526
+ with gr.Tab("Cost Analysis"):
527
+ with gr.Tab("Best Image by Cost Savings"):
528
+ gr.Markdown(best_image_by_cost_savings_md)
529
+ for fig in best_image_by_cost_savings(df):
530
+ gr.Plot(fig)
531
+ with gr.Tab("Cost vs Latency"):
532
+ gr.Markdown(cost_vs_latency_md)
533
+ gr.Plot(plot_cost_vs_latency(df))
534
+
535
+ with gr.Tab("Cost vs VUs & Batch"):
536
+ gr.Markdown(contour_md)
537
+ for hw in df["hw_type"].unique():
538
+ for img in df["image"].unique():
539
+ df_hw_img = df[(df["hw_type"] == hw) & (df["image"] == img) & (df["vus"] > 20)].copy()
540
+ gr.Plot(plot_cost_vs_vus_batch(df_hw_img, hw=hw, img=img))
541
+
542
+ demo.launch()