Spaces:

drbh
/

compare-moe-uvnote

Running

App Files Files

xet

Community

compare-moe-uvnote / cells /visualization.py

drbh HF Staff

Upload folder using huggingface_hub

bad4ddc verified about 1 month ago

raw

history blame

6.11 kB

	# /// script
	# dependencies = [
	# "matplotlib",
	# ]
	# ///

	import json
	import matplotlib.pyplot as plt
	import numpy as np
	from pathlib import Path
	import os

	# Get result directories from environment variables
	gptoss_dir = os.environ.get('UVNOTE_INPUT_GPTOSS_RUN', '.')
	megablocks_dir = os.environ.get('UVNOTE_INPUT_MEGABLOCKS_RUN', '.')

	print(f"Loading benchmark results from:")
	print(f" GPT-OSS dir: {gptoss_dir}")
	print(f" MegaBlocks dir: {megablocks_dir}")

	# Load benchmark results
	gptoss_file = Path(gptoss_dir) / 'gptoss_results.json'
	megablocks_file = Path(megablocks_dir) / 'megablocks_results.json'

	print(f"Loading results from:")
	print(f" GPT-OSS: {gptoss_file}")
	print(f" MegaBlocks: {megablocks_file}")

	if not gptoss_file.exists():
	print(f"Warning: {gptoss_file} not found")
	if not megablocks_file.exists():
	print(f"Warning: {megablocks_file} not found")

	with open(gptoss_file, 'r') as f:
	gptoss_results = json.load(f)

	with open(megablocks_file, 'r') as f:
	megablocks_results = json.load(f)

	print(f"GPT-OSS results keys: {list(gptoss_results.keys())}")
	print(f"MegaBlocks results keys: {list(megablocks_results.keys())}")

	# Helper function to extract metrics from either old or new JSON format
	def get_metric(results, metric_name, default=0):
	"""Extract metric from results, handling both old and new JSON formats."""
	# New format (with stats dict)
	if 'stats' in results:
	return results['stats'].get(metric_name, default)
	# Old format (direct keys)
	elif metric_name in results:
	return results[metric_name]
	else:
	return default

	# Create comparison plots
	fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))

	# Performance comparison
	implementations = ['GPT-OSS', 'MegaBlocks']

	# Extract timing metrics (handle both avg_ms and avg_time_ms)
	gpt_time = get_metric(gptoss_results, 'avg_ms', get_metric(gptoss_results, 'avg_time_ms', 0))
	mega_time = get_metric(megablocks_results, 'avg_ms', get_metric(megablocks_results, 'avg_time_ms', 0))
	times = [gpt_time, mega_time]

	# Extract throughput metrics
	gpt_throughput = get_metric(gptoss_results, 'tokens_per_s', get_metric(gptoss_results, 'throughput_tokens_per_sec', 0))
	mega_throughput = get_metric(megablocks_results, 'tokens_per_s', get_metric(megablocks_results, 'throughput_tokens_per_sec', 0))
	throughputs = [gpt_throughput, mega_throughput]

	# Extract memory metrics
	gpt_memory = get_metric(gptoss_results, 'memory_allocated_gb', 0)
	mega_memory = get_metric(megablocks_results, 'memory_allocated_gb', 0)
	memory_usage = [gpt_memory, mega_memory]

	gpt_mem_inc = get_metric(gptoss_results, 'memory_increase_gb', 0)
	mega_mem_inc = get_metric(megablocks_results, 'memory_increase_gb', 0)
	memory_increase = [gpt_mem_inc, mega_mem_inc]

	print(f"Extracted metrics:")
	print(f" Times (ms): {times}")
	print(f" Throughputs: {throughputs}")
	print(f" Memory usage (GB): {memory_usage}")
	print(f" Memory increase (GB): {memory_increase}")

	colors = ['#2E8B57', '#4169E1']

	# Latency comparison
	bars1 = ax1.bar(implementations, times, color=colors)
	ax1.set_ylabel('Average Time (ms)')
	ax1.set_title('Latency Comparison')
	ax1.grid(True, alpha=0.3)

	# Add values on bars
	for bar, time in zip(bars1, times):
	height = bar.get_height()
	ax1.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
	f'{time:.2f}ms', ha='center', va='bottom')

	# Throughput comparison
	bars2 = ax2.bar(implementations, throughputs, color=colors)
	ax2.set_ylabel('Tokens per Second')
	ax2.set_title('Throughput Comparison')
	ax2.grid(True, alpha=0.3)

	# Add values on bars
	for bar, throughput in zip(bars2, throughputs):
	height = bar.get_height()
	ax2.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
	f'{throughput:.0f}', ha='center', va='bottom')

	# Memory usage comparison
	bars3 = ax3.bar(implementations, memory_usage, color=colors)
	ax3.set_ylabel('Memory Allocated (GB)')
	ax3.set_title('Memory Usage Comparison')
	ax3.grid(True, alpha=0.3)

	# Add values on bars
	for bar, mem in zip(bars3, memory_usage):
	height = bar.get_height()
	ax3.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
	f'{mem:.2f}GB', ha='center', va='bottom')

	# Memory increase comparison
	bars4 = ax4.bar(implementations, memory_increase, color=colors)
	ax4.set_ylabel('Memory Increase (GB)')
	ax4.set_title('Memory Increase Comparison')
	ax4.grid(True, alpha=0.3)

	# Add values on bars
	for bar, mem_inc in zip(bars4, memory_increase):
	height = bar.get_height()
	ax4.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
	f'{mem_inc:.3f}GB', ha='center', va='bottom')

	plt.tight_layout()
	plt.savefig('small_moe_comparison.png', dpi=150, bbox_inches='tight')
	plt.show()

	# Print summary table
	print("\n" + "="*60)
	print("PERFORMANCE COMPARISON SUMMARY")
	print("="*60)
	print(f"{'Metric':<25} {'GPT-OSS':<15} {'MegaBlocks':<15} {'Winner':<10}")
	print("-" * 60)

	# Determine winners
	latency_winner = "GPT-OSS" if times[0] < times[1] else "MegaBlocks"
	throughput_winner = "GPT-OSS" if throughputs[0] > throughputs[1] else "MegaBlocks"
	memory_winner = "GPT-OSS" if memory_usage[0] < memory_usage[1] else "MegaBlocks"
	mem_inc_winner = "GPT-OSS" if memory_increase[0] < memory_increase[1] else "MegaBlocks"

	print(f"{'Latency (ms)':<25} {times[0]:<15.2f} {times[1]:<15.2f} {latency_winner:<10}")
	print(f"{'Throughput (tok/s)':<25} {throughputs[0]:<15.0f} {throughputs[1]:<15.0f} {throughput_winner:<10}")
	print(f"{'Memory Usage (GB)':<25} {memory_usage[0]:<15.3f} {memory_usage[1]:<15.3f} {memory_winner:<10}")
	print(f"{'Memory Increase (GB)':<25} {memory_increase[0]:<15.3f} {memory_increase[1]:<15.3f} {mem_inc_winner:<10}")

	# Speed ratio
	speed_ratio = times[1] / times[0] if times[0] < times[1] else times[0] / times[1]
	faster_impl = latency_winner
	print(f"\n{faster_impl} is {speed_ratio:.2f}x faster")

	# Throughput ratio
	throughput_ratio = max(throughputs) / min(throughputs)
	higher_throughput = throughput_winner
	print(f"{higher_throughput} has {throughput_ratio:.2f}x higher throughput")

	print("="*60)