Spaces:
Running
Running
File size: 6,109 Bytes
bad4ddc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
# /// script
# dependencies = [
# "matplotlib",
# ]
# ///
import json
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import os
# Get result directories from environment variables
gptoss_dir = os.environ.get('UVNOTE_INPUT_GPTOSS_RUN', '.')
megablocks_dir = os.environ.get('UVNOTE_INPUT_MEGABLOCKS_RUN', '.')
print(f"Loading benchmark results from:")
print(f" GPT-OSS dir: {gptoss_dir}")
print(f" MegaBlocks dir: {megablocks_dir}")
# Load benchmark results
gptoss_file = Path(gptoss_dir) / 'gptoss_results.json'
megablocks_file = Path(megablocks_dir) / 'megablocks_results.json'
print(f"Loading results from:")
print(f" GPT-OSS: {gptoss_file}")
print(f" MegaBlocks: {megablocks_file}")
if not gptoss_file.exists():
print(f"Warning: {gptoss_file} not found")
if not megablocks_file.exists():
print(f"Warning: {megablocks_file} not found")
with open(gptoss_file, 'r') as f:
gptoss_results = json.load(f)
with open(megablocks_file, 'r') as f:
megablocks_results = json.load(f)
print(f"GPT-OSS results keys: {list(gptoss_results.keys())}")
print(f"MegaBlocks results keys: {list(megablocks_results.keys())}")
# Helper function to extract metrics from either old or new JSON format
def get_metric(results, metric_name, default=0):
"""Extract metric from results, handling both old and new JSON formats."""
# New format (with stats dict)
if 'stats' in results:
return results['stats'].get(metric_name, default)
# Old format (direct keys)
elif metric_name in results:
return results[metric_name]
else:
return default
# Create comparison plots
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
# Performance comparison
implementations = ['GPT-OSS', 'MegaBlocks']
# Extract timing metrics (handle both avg_ms and avg_time_ms)
gpt_time = get_metric(gptoss_results, 'avg_ms', get_metric(gptoss_results, 'avg_time_ms', 0))
mega_time = get_metric(megablocks_results, 'avg_ms', get_metric(megablocks_results, 'avg_time_ms', 0))
times = [gpt_time, mega_time]
# Extract throughput metrics
gpt_throughput = get_metric(gptoss_results, 'tokens_per_s', get_metric(gptoss_results, 'throughput_tokens_per_sec', 0))
mega_throughput = get_metric(megablocks_results, 'tokens_per_s', get_metric(megablocks_results, 'throughput_tokens_per_sec', 0))
throughputs = [gpt_throughput, mega_throughput]
# Extract memory metrics
gpt_memory = get_metric(gptoss_results, 'memory_allocated_gb', 0)
mega_memory = get_metric(megablocks_results, 'memory_allocated_gb', 0)
memory_usage = [gpt_memory, mega_memory]
gpt_mem_inc = get_metric(gptoss_results, 'memory_increase_gb', 0)
mega_mem_inc = get_metric(megablocks_results, 'memory_increase_gb', 0)
memory_increase = [gpt_mem_inc, mega_mem_inc]
print(f"Extracted metrics:")
print(f" Times (ms): {times}")
print(f" Throughputs: {throughputs}")
print(f" Memory usage (GB): {memory_usage}")
print(f" Memory increase (GB): {memory_increase}")
colors = ['#2E8B57', '#4169E1']
# Latency comparison
bars1 = ax1.bar(implementations, times, color=colors)
ax1.set_ylabel('Average Time (ms)')
ax1.set_title('Latency Comparison')
ax1.grid(True, alpha=0.3)
# Add values on bars
for bar, time in zip(bars1, times):
height = bar.get_height()
ax1.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
f'{time:.2f}ms', ha='center', va='bottom')
# Throughput comparison
bars2 = ax2.bar(implementations, throughputs, color=colors)
ax2.set_ylabel('Tokens per Second')
ax2.set_title('Throughput Comparison')
ax2.grid(True, alpha=0.3)
# Add values on bars
for bar, throughput in zip(bars2, throughputs):
height = bar.get_height()
ax2.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
f'{throughput:.0f}', ha='center', va='bottom')
# Memory usage comparison
bars3 = ax3.bar(implementations, memory_usage, color=colors)
ax3.set_ylabel('Memory Allocated (GB)')
ax3.set_title('Memory Usage Comparison')
ax3.grid(True, alpha=0.3)
# Add values on bars
for bar, mem in zip(bars3, memory_usage):
height = bar.get_height()
ax3.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
f'{mem:.2f}GB', ha='center', va='bottom')
# Memory increase comparison
bars4 = ax4.bar(implementations, memory_increase, color=colors)
ax4.set_ylabel('Memory Increase (GB)')
ax4.set_title('Memory Increase Comparison')
ax4.grid(True, alpha=0.3)
# Add values on bars
for bar, mem_inc in zip(bars4, memory_increase):
height = bar.get_height()
ax4.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
f'{mem_inc:.3f}GB', ha='center', va='bottom')
plt.tight_layout()
plt.savefig('small_moe_comparison.png', dpi=150, bbox_inches='tight')
plt.show()
# Print summary table
print("\n" + "="*60)
print("PERFORMANCE COMPARISON SUMMARY")
print("="*60)
print(f"{'Metric':<25} {'GPT-OSS':<15} {'MegaBlocks':<15} {'Winner':<10}")
print("-" * 60)
# Determine winners
latency_winner = "GPT-OSS" if times[0] < times[1] else "MegaBlocks"
throughput_winner = "GPT-OSS" if throughputs[0] > throughputs[1] else "MegaBlocks"
memory_winner = "GPT-OSS" if memory_usage[0] < memory_usage[1] else "MegaBlocks"
mem_inc_winner = "GPT-OSS" if memory_increase[0] < memory_increase[1] else "MegaBlocks"
print(f"{'Latency (ms)':<25} {times[0]:<15.2f} {times[1]:<15.2f} {latency_winner:<10}")
print(f"{'Throughput (tok/s)':<25} {throughputs[0]:<15.0f} {throughputs[1]:<15.0f} {throughput_winner:<10}")
print(f"{'Memory Usage (GB)':<25} {memory_usage[0]:<15.3f} {memory_usage[1]:<15.3f} {memory_winner:<10}")
print(f"{'Memory Increase (GB)':<25} {memory_increase[0]:<15.3f} {memory_increase[1]:<15.3f} {mem_inc_winner:<10}")
# Speed ratio
speed_ratio = times[1] / times[0] if times[0] < times[1] else times[0] / times[1]
faster_impl = latency_winner
print(f"\n{faster_impl} is {speed_ratio:.2f}x faster")
# Throughput ratio
throughput_ratio = max(throughputs) / min(throughputs)
higher_throughput = throughput_winner
print(f"{higher_throughput} has {throughput_ratio:.2f}x higher throughput")
print("="*60) |