Spaces:
Running
Running
# /// script | |
# dependencies = [ | |
# "matplotlib", | |
# ] | |
# /// | |
import json | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from pathlib import Path | |
import os | |
# Get result directories from environment variables | |
gptoss_dir = os.environ.get('UVNOTE_INPUT_GPTOSS_RUN', '.') | |
megablocks_dir = os.environ.get('UVNOTE_INPUT_MEGABLOCKS_RUN', '.') | |
print(f"Loading benchmark results from:") | |
print(f" GPT-OSS dir: {gptoss_dir}") | |
print(f" MegaBlocks dir: {megablocks_dir}") | |
# Load benchmark results | |
gptoss_file = Path(gptoss_dir) / 'gptoss_results.json' | |
megablocks_file = Path(megablocks_dir) / 'megablocks_results.json' | |
print(f"Loading results from:") | |
print(f" GPT-OSS: {gptoss_file}") | |
print(f" MegaBlocks: {megablocks_file}") | |
if not gptoss_file.exists(): | |
print(f"Warning: {gptoss_file} not found") | |
if not megablocks_file.exists(): | |
print(f"Warning: {megablocks_file} not found") | |
with open(gptoss_file, 'r') as f: | |
gptoss_results = json.load(f) | |
with open(megablocks_file, 'r') as f: | |
megablocks_results = json.load(f) | |
print(f"GPT-OSS results keys: {list(gptoss_results.keys())}") | |
print(f"MegaBlocks results keys: {list(megablocks_results.keys())}") | |
# Helper function to extract metrics from either old or new JSON format | |
def get_metric(results, metric_name, default=0): | |
"""Extract metric from results, handling both old and new JSON formats.""" | |
# New format (with stats dict) | |
if 'stats' in results: | |
return results['stats'].get(metric_name, default) | |
# Old format (direct keys) | |
elif metric_name in results: | |
return results[metric_name] | |
else: | |
return default | |
# Create comparison plots | |
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10)) | |
# Performance comparison | |
implementations = ['GPT-OSS', 'MegaBlocks'] | |
# Extract timing metrics (handle both avg_ms and avg_time_ms) | |
gpt_time = get_metric(gptoss_results, 'avg_ms', get_metric(gptoss_results, 'avg_time_ms', 0)) | |
mega_time = get_metric(megablocks_results, 'avg_ms', get_metric(megablocks_results, 'avg_time_ms', 0)) | |
times = [gpt_time, mega_time] | |
# Extract throughput metrics | |
gpt_throughput = get_metric(gptoss_results, 'tokens_per_s', get_metric(gptoss_results, 'throughput_tokens_per_sec', 0)) | |
mega_throughput = get_metric(megablocks_results, 'tokens_per_s', get_metric(megablocks_results, 'throughput_tokens_per_sec', 0)) | |
throughputs = [gpt_throughput, mega_throughput] | |
# Extract memory metrics | |
gpt_memory = get_metric(gptoss_results, 'memory_allocated_gb', 0) | |
mega_memory = get_metric(megablocks_results, 'memory_allocated_gb', 0) | |
memory_usage = [gpt_memory, mega_memory] | |
gpt_mem_inc = get_metric(gptoss_results, 'memory_increase_gb', 0) | |
mega_mem_inc = get_metric(megablocks_results, 'memory_increase_gb', 0) | |
memory_increase = [gpt_mem_inc, mega_mem_inc] | |
print(f"Extracted metrics:") | |
print(f" Times (ms): {times}") | |
print(f" Throughputs: {throughputs}") | |
print(f" Memory usage (GB): {memory_usage}") | |
print(f" Memory increase (GB): {memory_increase}") | |
colors = ['#2E8B57', '#4169E1'] | |
# Latency comparison | |
bars1 = ax1.bar(implementations, times, color=colors) | |
ax1.set_ylabel('Average Time (ms)') | |
ax1.set_title('Latency Comparison') | |
ax1.grid(True, alpha=0.3) | |
# Add values on bars | |
for bar, time in zip(bars1, times): | |
height = bar.get_height() | |
ax1.text(bar.get_x() + bar.get_width()/2., height + height*0.01, | |
f'{time:.2f}ms', ha='center', va='bottom') | |
# Throughput comparison | |
bars2 = ax2.bar(implementations, throughputs, color=colors) | |
ax2.set_ylabel('Tokens per Second') | |
ax2.set_title('Throughput Comparison') | |
ax2.grid(True, alpha=0.3) | |
# Add values on bars | |
for bar, throughput in zip(bars2, throughputs): | |
height = bar.get_height() | |
ax2.text(bar.get_x() + bar.get_width()/2., height + height*0.01, | |
f'{throughput:.0f}', ha='center', va='bottom') | |
# Memory usage comparison | |
bars3 = ax3.bar(implementations, memory_usage, color=colors) | |
ax3.set_ylabel('Memory Allocated (GB)') | |
ax3.set_title('Memory Usage Comparison') | |
ax3.grid(True, alpha=0.3) | |
# Add values on bars | |
for bar, mem in zip(bars3, memory_usage): | |
height = bar.get_height() | |
ax3.text(bar.get_x() + bar.get_width()/2., height + height*0.01, | |
f'{mem:.2f}GB', ha='center', va='bottom') | |
# Memory increase comparison | |
bars4 = ax4.bar(implementations, memory_increase, color=colors) | |
ax4.set_ylabel('Memory Increase (GB)') | |
ax4.set_title('Memory Increase Comparison') | |
ax4.grid(True, alpha=0.3) | |
# Add values on bars | |
for bar, mem_inc in zip(bars4, memory_increase): | |
height = bar.get_height() | |
ax4.text(bar.get_x() + bar.get_width()/2., height + height*0.01, | |
f'{mem_inc:.3f}GB', ha='center', va='bottom') | |
plt.tight_layout() | |
plt.savefig('small_moe_comparison.png', dpi=150, bbox_inches='tight') | |
plt.show() | |
# Print summary table | |
print("\n" + "="*60) | |
print("PERFORMANCE COMPARISON SUMMARY") | |
print("="*60) | |
print(f"{'Metric':<25} {'GPT-OSS':<15} {'MegaBlocks':<15} {'Winner':<10}") | |
print("-" * 60) | |
# Determine winners | |
latency_winner = "GPT-OSS" if times[0] < times[1] else "MegaBlocks" | |
throughput_winner = "GPT-OSS" if throughputs[0] > throughputs[1] else "MegaBlocks" | |
memory_winner = "GPT-OSS" if memory_usage[0] < memory_usage[1] else "MegaBlocks" | |
mem_inc_winner = "GPT-OSS" if memory_increase[0] < memory_increase[1] else "MegaBlocks" | |
print(f"{'Latency (ms)':<25} {times[0]:<15.2f} {times[1]:<15.2f} {latency_winner:<10}") | |
print(f"{'Throughput (tok/s)':<25} {throughputs[0]:<15.0f} {throughputs[1]:<15.0f} {throughput_winner:<10}") | |
print(f"{'Memory Usage (GB)':<25} {memory_usage[0]:<15.3f} {memory_usage[1]:<15.3f} {memory_winner:<10}") | |
print(f"{'Memory Increase (GB)':<25} {memory_increase[0]:<15.3f} {memory_increase[1]:<15.3f} {mem_inc_winner:<10}") | |
# Speed ratio | |
speed_ratio = times[1] / times[0] if times[0] < times[1] else times[0] / times[1] | |
faster_impl = latency_winner | |
print(f"\n{faster_impl} is {speed_ratio:.2f}x faster") | |
# Throughput ratio | |
throughput_ratio = max(throughputs) / min(throughputs) | |
higher_throughput = throughput_winner | |
print(f"{higher_throughput} has {throughput_ratio:.2f}x higher throughput") | |
print("="*60) |