File size: 6,109 Bytes
bad4ddc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# /// script
# dependencies = [
#     "matplotlib",
# ]
# ///

import json
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import os

# Get result directories from environment variables
gptoss_dir = os.environ.get('UVNOTE_INPUT_GPTOSS_RUN', '.')
megablocks_dir = os.environ.get('UVNOTE_INPUT_MEGABLOCKS_RUN', '.')

print(f"Loading benchmark results from:")
print(f"  GPT-OSS dir: {gptoss_dir}")
print(f"  MegaBlocks dir: {megablocks_dir}")

# Load benchmark results
gptoss_file = Path(gptoss_dir) / 'gptoss_results.json'
megablocks_file = Path(megablocks_dir) / 'megablocks_results.json'

print(f"Loading results from:")
print(f"  GPT-OSS: {gptoss_file}")
print(f"  MegaBlocks: {megablocks_file}")

if not gptoss_file.exists():
    print(f"Warning: {gptoss_file} not found")
if not megablocks_file.exists():
    print(f"Warning: {megablocks_file} not found")

with open(gptoss_file, 'r') as f:
    gptoss_results = json.load(f)

with open(megablocks_file, 'r') as f:
    megablocks_results = json.load(f)

print(f"GPT-OSS results keys: {list(gptoss_results.keys())}")
print(f"MegaBlocks results keys: {list(megablocks_results.keys())}")

# Helper function to extract metrics from either old or new JSON format
def get_metric(results, metric_name, default=0):
    """Extract metric from results, handling both old and new JSON formats."""
    # New format (with stats dict)
    if 'stats' in results:
        return results['stats'].get(metric_name, default)
    # Old format (direct keys)
    elif metric_name in results:
        return results[metric_name]
    else:
        return default

# Create comparison plots
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))

# Performance comparison
implementations = ['GPT-OSS', 'MegaBlocks']

# Extract timing metrics (handle both avg_ms and avg_time_ms)
gpt_time = get_metric(gptoss_results, 'avg_ms', get_metric(gptoss_results, 'avg_time_ms', 0))
mega_time = get_metric(megablocks_results, 'avg_ms', get_metric(megablocks_results, 'avg_time_ms', 0))
times = [gpt_time, mega_time]

# Extract throughput metrics
gpt_throughput = get_metric(gptoss_results, 'tokens_per_s', get_metric(gptoss_results, 'throughput_tokens_per_sec', 0))
mega_throughput = get_metric(megablocks_results, 'tokens_per_s', get_metric(megablocks_results, 'throughput_tokens_per_sec', 0))
throughputs = [gpt_throughput, mega_throughput]

# Extract memory metrics
gpt_memory = get_metric(gptoss_results, 'memory_allocated_gb', 0)
mega_memory = get_metric(megablocks_results, 'memory_allocated_gb', 0)
memory_usage = [gpt_memory, mega_memory]

gpt_mem_inc = get_metric(gptoss_results, 'memory_increase_gb', 0)
mega_mem_inc = get_metric(megablocks_results, 'memory_increase_gb', 0)
memory_increase = [gpt_mem_inc, mega_mem_inc]

print(f"Extracted metrics:")
print(f"  Times (ms): {times}")
print(f"  Throughputs: {throughputs}")
print(f"  Memory usage (GB): {memory_usage}")
print(f"  Memory increase (GB): {memory_increase}")

colors = ['#2E8B57', '#4169E1']

# Latency comparison
bars1 = ax1.bar(implementations, times, color=colors)
ax1.set_ylabel('Average Time (ms)')
ax1.set_title('Latency Comparison')
ax1.grid(True, alpha=0.3)

# Add values on bars
for bar, time in zip(bars1, times):
    height = bar.get_height()
    ax1.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
             f'{time:.2f}ms', ha='center', va='bottom')

# Throughput comparison  
bars2 = ax2.bar(implementations, throughputs, color=colors)
ax2.set_ylabel('Tokens per Second')
ax2.set_title('Throughput Comparison')
ax2.grid(True, alpha=0.3)

# Add values on bars
for bar, throughput in zip(bars2, throughputs):
    height = bar.get_height()
    ax2.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
             f'{throughput:.0f}', ha='center', va='bottom')

# Memory usage comparison
bars3 = ax3.bar(implementations, memory_usage, color=colors)
ax3.set_ylabel('Memory Allocated (GB)')
ax3.set_title('Memory Usage Comparison')
ax3.grid(True, alpha=0.3)

# Add values on bars
for bar, mem in zip(bars3, memory_usage):
    height = bar.get_height()
    ax3.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
             f'{mem:.2f}GB', ha='center', va='bottom')

# Memory increase comparison
bars4 = ax4.bar(implementations, memory_increase, color=colors)
ax4.set_ylabel('Memory Increase (GB)')
ax4.set_title('Memory Increase Comparison')
ax4.grid(True, alpha=0.3)

# Add values on bars
for bar, mem_inc in zip(bars4, memory_increase):
    height = bar.get_height()
    ax4.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
             f'{mem_inc:.3f}GB', ha='center', va='bottom')

plt.tight_layout()
plt.savefig('small_moe_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

# Print summary table
print("\n" + "="*60)
print("PERFORMANCE COMPARISON SUMMARY")
print("="*60)
print(f"{'Metric':<25} {'GPT-OSS':<15} {'MegaBlocks':<15} {'Winner':<10}")
print("-" * 60)

# Determine winners
latency_winner = "GPT-OSS" if times[0] < times[1] else "MegaBlocks"
throughput_winner = "GPT-OSS" if throughputs[0] > throughputs[1] else "MegaBlocks" 
memory_winner = "GPT-OSS" if memory_usage[0] < memory_usage[1] else "MegaBlocks"
mem_inc_winner = "GPT-OSS" if memory_increase[0] < memory_increase[1] else "MegaBlocks"

print(f"{'Latency (ms)':<25} {times[0]:<15.2f} {times[1]:<15.2f} {latency_winner:<10}")
print(f"{'Throughput (tok/s)':<25} {throughputs[0]:<15.0f} {throughputs[1]:<15.0f} {throughput_winner:<10}")
print(f"{'Memory Usage (GB)':<25} {memory_usage[0]:<15.3f} {memory_usage[1]:<15.3f} {memory_winner:<10}")
print(f"{'Memory Increase (GB)':<25} {memory_increase[0]:<15.3f} {memory_increase[1]:<15.3f} {mem_inc_winner:<10}")

# Speed ratio
speed_ratio = times[1] / times[0] if times[0] < times[1] else times[0] / times[1]
faster_impl = latency_winner
print(f"\n{faster_impl} is {speed_ratio:.2f}x faster")

# Throughput ratio  
throughput_ratio = max(throughputs) / min(throughputs)
higher_throughput = throughput_winner
print(f"{higher_throughput} has {throughput_ratio:.2f}x higher throughput")

print("="*60)