compare-moe-uvnote / cells /visualization.py
drbh's picture
drbh HF Staff
Upload folder using huggingface_hub
bad4ddc verified
raw
history blame
6.11 kB
# /// script
# dependencies = [
# "matplotlib",
# ]
# ///
import json
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import os
# Get result directories from environment variables
gptoss_dir = os.environ.get('UVNOTE_INPUT_GPTOSS_RUN', '.')
megablocks_dir = os.environ.get('UVNOTE_INPUT_MEGABLOCKS_RUN', '.')
print(f"Loading benchmark results from:")
print(f" GPT-OSS dir: {gptoss_dir}")
print(f" MegaBlocks dir: {megablocks_dir}")
# Load benchmark results
gptoss_file = Path(gptoss_dir) / 'gptoss_results.json'
megablocks_file = Path(megablocks_dir) / 'megablocks_results.json'
print(f"Loading results from:")
print(f" GPT-OSS: {gptoss_file}")
print(f" MegaBlocks: {megablocks_file}")
if not gptoss_file.exists():
print(f"Warning: {gptoss_file} not found")
if not megablocks_file.exists():
print(f"Warning: {megablocks_file} not found")
with open(gptoss_file, 'r') as f:
gptoss_results = json.load(f)
with open(megablocks_file, 'r') as f:
megablocks_results = json.load(f)
print(f"GPT-OSS results keys: {list(gptoss_results.keys())}")
print(f"MegaBlocks results keys: {list(megablocks_results.keys())}")
# Helper function to extract metrics from either old or new JSON format
def get_metric(results, metric_name, default=0):
"""Extract metric from results, handling both old and new JSON formats."""
# New format (with stats dict)
if 'stats' in results:
return results['stats'].get(metric_name, default)
# Old format (direct keys)
elif metric_name in results:
return results[metric_name]
else:
return default
# Create comparison plots
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
# Performance comparison
implementations = ['GPT-OSS', 'MegaBlocks']
# Extract timing metrics (handle both avg_ms and avg_time_ms)
gpt_time = get_metric(gptoss_results, 'avg_ms', get_metric(gptoss_results, 'avg_time_ms', 0))
mega_time = get_metric(megablocks_results, 'avg_ms', get_metric(megablocks_results, 'avg_time_ms', 0))
times = [gpt_time, mega_time]
# Extract throughput metrics
gpt_throughput = get_metric(gptoss_results, 'tokens_per_s', get_metric(gptoss_results, 'throughput_tokens_per_sec', 0))
mega_throughput = get_metric(megablocks_results, 'tokens_per_s', get_metric(megablocks_results, 'throughput_tokens_per_sec', 0))
throughputs = [gpt_throughput, mega_throughput]
# Extract memory metrics
gpt_memory = get_metric(gptoss_results, 'memory_allocated_gb', 0)
mega_memory = get_metric(megablocks_results, 'memory_allocated_gb', 0)
memory_usage = [gpt_memory, mega_memory]
gpt_mem_inc = get_metric(gptoss_results, 'memory_increase_gb', 0)
mega_mem_inc = get_metric(megablocks_results, 'memory_increase_gb', 0)
memory_increase = [gpt_mem_inc, mega_mem_inc]
print(f"Extracted metrics:")
print(f" Times (ms): {times}")
print(f" Throughputs: {throughputs}")
print(f" Memory usage (GB): {memory_usage}")
print(f" Memory increase (GB): {memory_increase}")
colors = ['#2E8B57', '#4169E1']
# Latency comparison
bars1 = ax1.bar(implementations, times, color=colors)
ax1.set_ylabel('Average Time (ms)')
ax1.set_title('Latency Comparison')
ax1.grid(True, alpha=0.3)
# Add values on bars
for bar, time in zip(bars1, times):
height = bar.get_height()
ax1.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
f'{time:.2f}ms', ha='center', va='bottom')
# Throughput comparison
bars2 = ax2.bar(implementations, throughputs, color=colors)
ax2.set_ylabel('Tokens per Second')
ax2.set_title('Throughput Comparison')
ax2.grid(True, alpha=0.3)
# Add values on bars
for bar, throughput in zip(bars2, throughputs):
height = bar.get_height()
ax2.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
f'{throughput:.0f}', ha='center', va='bottom')
# Memory usage comparison
bars3 = ax3.bar(implementations, memory_usage, color=colors)
ax3.set_ylabel('Memory Allocated (GB)')
ax3.set_title('Memory Usage Comparison')
ax3.grid(True, alpha=0.3)
# Add values on bars
for bar, mem in zip(bars3, memory_usage):
height = bar.get_height()
ax3.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
f'{mem:.2f}GB', ha='center', va='bottom')
# Memory increase comparison
bars4 = ax4.bar(implementations, memory_increase, color=colors)
ax4.set_ylabel('Memory Increase (GB)')
ax4.set_title('Memory Increase Comparison')
ax4.grid(True, alpha=0.3)
# Add values on bars
for bar, mem_inc in zip(bars4, memory_increase):
height = bar.get_height()
ax4.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
f'{mem_inc:.3f}GB', ha='center', va='bottom')
plt.tight_layout()
plt.savefig('small_moe_comparison.png', dpi=150, bbox_inches='tight')
plt.show()
# Print summary table
print("\n" + "="*60)
print("PERFORMANCE COMPARISON SUMMARY")
print("="*60)
print(f"{'Metric':<25} {'GPT-OSS':<15} {'MegaBlocks':<15} {'Winner':<10}")
print("-" * 60)
# Determine winners
latency_winner = "GPT-OSS" if times[0] < times[1] else "MegaBlocks"
throughput_winner = "GPT-OSS" if throughputs[0] > throughputs[1] else "MegaBlocks"
memory_winner = "GPT-OSS" if memory_usage[0] < memory_usage[1] else "MegaBlocks"
mem_inc_winner = "GPT-OSS" if memory_increase[0] < memory_increase[1] else "MegaBlocks"
print(f"{'Latency (ms)':<25} {times[0]:<15.2f} {times[1]:<15.2f} {latency_winner:<10}")
print(f"{'Throughput (tok/s)':<25} {throughputs[0]:<15.0f} {throughputs[1]:<15.0f} {throughput_winner:<10}")
print(f"{'Memory Usage (GB)':<25} {memory_usage[0]:<15.3f} {memory_usage[1]:<15.3f} {memory_winner:<10}")
print(f"{'Memory Increase (GB)':<25} {memory_increase[0]:<15.3f} {memory_increase[1]:<15.3f} {mem_inc_winner:<10}")
# Speed ratio
speed_ratio = times[1] / times[0] if times[0] < times[1] else times[0] / times[1]
faster_impl = latency_winner
print(f"\n{faster_impl} is {speed_ratio:.2f}x faster")
# Throughput ratio
throughput_ratio = max(throughputs) / min(throughputs)
higher_throughput = throughput_winner
print(f"{higher_throughput} has {throughput_ratio:.2f}x higher throughput")
print("="*60)