File size: 10,939 Bytes
d473371
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

# 30 distinct colors - assigned by Avg AUP rank
COLOR_PALETTE = [
    "#E91E63", "#4A90E2", "#00BFA5", "#FF6B35", "#8E24AA",
    "#4CAF50", "#FF4081", "#303F9F", "#FFD166", "#00796B",
    "#C2185B", "#7B1FA2", "#26A69A", "#1A4C7C", "#FF8C42",
    "#009688", "#673AB7", "#F44336", "#3F51B5", "#795548",
    "#607D8B", "#9C27B0", "#2196F3", "#CDDC39", "#FF9800",
    "#00BCD4", "#E64A19", "#5D4037", "#455A64", "#AD1457",
]

def get_model_colors(df):
    """Assign colors to methods by Avg AUP rank (descending)."""
    models_sorted = df.sort_values("Avg_AUP", ascending=False)["Method"].tolist()
    return {model: COLOR_PALETTE[i % len(COLOR_PALETTE)] for i, model in enumerate(models_sorted)}

def get_model_ranks(df):
    """Get rank for each method by Avg AUP."""
    models_sorted = df.sort_values("Avg_AUP", ascending=False)["Method"].tolist()
    return {model: i + 1 for i, model in enumerate(models_sorted)}

def hex_to_rgba(hex_color, alpha=0.25):
    hex_color = hex_color.lstrip('#')
    r, g, b = int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16)
    return f'rgba({r},{g},{b},{alpha})'

def create_radar_chart(df, tasks, top_n=15):
    """Create radar chart for top N methods showing original AUP scores (independent axes)."""
    df_top = df.head(top_n).copy()
    model_colors = get_model_colors(df)
    model_ranks = get_model_ranks(df)
    
    all_cols = [f"{t}_AUP" for t in tasks] + ["Avg_AUP"]
    categories = [t.replace("-", "\n") for t in tasks] + ["Avg\nAUP"]
    
    # Compute min/max per column for normalization (for radar display only)
    col_stats = {}
    for col in all_cols:
        vals = df_top[col].dropna().astype(float)
        col_stats[col] = {'min': vals.min() if len(vals) > 0 else 0, 
                          'max': vals.max() if len(vals) > 0 else 100}
    
    fig = go.Figure()
    
    for _, row in df_top.iterrows():
        method = row["Method"]
        rank = model_ranks.get(method, 0)
        color = model_colors.get(method, "#808080")
        display_name = f"#{rank} {method}"
        
        # Original AUP values for hover display
        original_vals = [row.get(col, 0) or 0 for col in all_cols]
        
        # Normalized values for radar shape (0-100 scale per axis)
        normalized = []
        for col, val in zip(all_cols, original_vals):
            stats = col_stats[col]
            range_val = stats['max'] - stats['min']
            if range_val > 0:
                norm = ((val - stats['min']) / range_val) * 80 + 10  # Scale to 10-90
            else:
                norm = 50
            normalized.append(norm)
        
        # Custom hover text showing original AUP scores
        hover_texts = [f"<b>{display_name}</b><br>{cat}: <b>{val:.1f}</b>" 
                       for cat, val in zip(categories, original_vals)]
        
        fig.add_trace(go.Scatterpolar(
            r=normalized + [normalized[0]], 
            theta=categories + [categories[0]],
            mode='lines+markers', fill='toself', name=display_name,
            line=dict(color=color, width=2), marker=dict(color=color, size=6),
            fillcolor=hex_to_rgba(color, 0.15), opacity=0.9,
            text=hover_texts + [hover_texts[0]],
            hovertemplate='%{text}<extra></extra>'
        ))
    
    fig.update_layout(
        height=600, margin=dict(l=100, r=250, t=80, b=60),
        title=dict(text=f"🎯 Top {top_n} Methods: AUP Scores in Radar Chart", x=0.5, font=dict(size=18)),
        # title=dict(text=f"🎯 Top {top_n} Methods: AUP Scores in Radar Chart", x=0.5, font=dict(size=18)),
        polar=dict(radialaxis=dict(visible=True, range=[0, 100], tickfont=dict(size=11), 
                                   tickvals=[], showticklabels=False)),
        legend=dict(font=dict(size=12), x=1.05, y=1, bgcolor='rgba(255,255,255,0.95)',
                   bordercolor='#ddd', borderwidth=1, title=dict(text="Methods (sorted by Avg AUP)", font=dict(size=13))),
        hoverlabel=dict(bgcolor="white", font_size=14, font_family="Arial", bordercolor="#333")
    )
    return fig

def create_group_bar_chart(df, tasks, top_n=15):
    """Create grouped bar chart with Avg AUP included and rank numbers."""
    df_top = df.head(top_n).copy()
    methods = df_top["Method"].tolist()
    model_colors = get_model_colors(df)
    model_ranks = get_model_ranks(df)
    
    all_benchmarks = tasks + ["Avg_AUP"]
    fig = go.Figure()
    
    for method in methods:
        row = df_top[df_top["Method"] == method].iloc[0]
        color = model_colors.get(method, "#808080")
        rank = model_ranks.get(method, 0)
        display_name = f"#{rank} {method}"
        
        y_vals, x_vals = [], []
        for bench in all_benchmarks:
            aup = row.get("Avg_AUP") if bench == "Avg_AUP" else row.get(f"{bench}_AUP")
            if aup is not None and not (isinstance(aup, float) and aup != aup):
                y_vals.append(aup)
                x_vals.append("Avg AUP" if bench == "Avg_AUP" else bench)
        
        if y_vals:
            fig.add_trace(go.Bar(
                name=display_name, x=x_vals, y=y_vals, marker_color=color,
                hovertemplate=f"<b>{display_name}</b><br>%{{x}}: %{{y:.1f}}<extra></extra>"
            ))
    
    fig.update_layout(
        height=550, margin=dict(l=60, r=250, t=80, b=100),
        title=dict(text=f"πŸ“Š Top {top_n} Methods: AUP Scores in Bar Chart", x=0.5, font=dict(size=18)),
        # title=dict(text=f"πŸ“Š Top {top_n} Methods: AUP Scores of Different Benchmarks", x=0.5, font=dict(size=18)),
        xaxis_title="Benchmark", yaxis_title="AUP Score",
        barmode='group', bargap=0.2, bargroupgap=0.05,
        legend=dict(font=dict(size=11), x=1.02, y=1, bgcolor='rgba(255,255,255,0.95)',
                   bordercolor='#ddd', borderwidth=1, title=dict(text="Methods (sorted by Avg AUP)", font=dict(size=12))),
        hoverlabel=dict(bgcolor="white", font_size=14, font_family="Arial")
    )
    return fig

def create_aup_curve_chart(raw_data, tasks, df, top_n=15):
    """Create 2x3 subplot grid of AUP curves with quadratic fitting (same as plot_lines.py)."""
    df_top = df.head(top_n).copy()
    model_colors = get_model_colors(df)
    model_ranks = get_model_ranks(df)
    methods_to_show = set(df_top["Method"].tolist())
    
    # Build per-task data: {task: {method: [(rho, y), ...]}}
    task_data = {t: {} for t in tasks}
    for task in tasks:
        for method, pairs in raw_data.get(task, {}).items():
            if method in methods_to_show:
                task_data[task][method] = [(p[0], p[1]) for p in pairs]
    
    # Compute average data: average TPF and Acc by index across tasks (all tasks have same length)
    avg_data = {}
    for method in methods_to_show:
        task_points = [task_data.get(t, {}).get(method, []) for t in tasks]
        task_points = [p for p in task_points if p]  # filter empty
        if not task_points:
            continue
        n_points = len(task_points[0])
        avg_data[method] = [
            (np.mean([tp[i][0] for tp in task_points]), sum(tp[i][1] for tp in task_points) / 5)
            for i in range(n_points)
        ]
    
    # 6 subplots: 5 tasks + 1 Average at (2,3)
    titles = tasks + ["Average"]
    fig = make_subplots(rows=2, cols=3, subplot_titles=titles,
                        horizontal_spacing=0.08, vertical_spacing=0.15)
    
    # Track which methods have been added to legend
    legend_added = set()
    
    def get_pos(idx):
        if idx < 3:
            return (1, idx + 1)
        return (2, idx - 2)  # idx=3->(2,1), idx=4->(2,2), idx=5->(2,3)
    
    # Helper to draw curve for a given subplot
    def draw_curve(pairs, method, row, col):
        nonlocal legend_added
        if not pairs:
            return
        color = model_colors.get(method, "#808080")
        rank = model_ranks.get(method, 0)
        display_name = f"#{rank} {method}"
        show_legend = method not in legend_added
        if show_legend:
            legend_added.add(method)
        
        rho, y = zip(*sorted(pairs, key=lambda x: x[0]))
        rho, y = np.array(rho), np.array(y)
        
        # Generate smooth curve (quadratic fitting, same as plot_lines.py)
        if len(rho) >= 3:
            z = np.polyfit(rho, y, 2)
            p = np.poly1d(z)
            x_smooth = np.linspace(rho.min(), rho.max(), 300)
            y_smooth = p(x_smooth)
        elif len(rho) == 2:
            x_smooth = np.linspace(rho.min(), rho.max(), 300)
            if rho[1] != rho[0]:
                a = (y[1] - y[0]) / ((rho[1] - rho[0]) ** 2)
                y_smooth = a * (x_smooth - rho[0]) ** 2 + y[0]
            else:
                y_smooth = np.linspace(y[0], y[1], 300)
        else:
            x_smooth, y_smooth = rho, y
        
        # Add fitted curve
        fig.add_trace(go.Scatter(
            x=x_smooth, y=y_smooth, mode='lines', name=display_name,
            line=dict(color=color, width=2.5), opacity=0.85,
            showlegend=show_legend, legendgroup=method,
            hoverinfo='skip'
        ), row=row, col=col)
        
        # Add markers at original data points
        fig.add_trace(go.Scatter(
            x=rho, y=y, mode='markers', name=display_name,
            marker=dict(color='white', size=8, line=dict(color=color, width=2)),
            showlegend=False, legendgroup=method,
            hovertemplate=f"<b>{display_name}</b><br>TPF: %{{x:.2f}}<br>Acc: %{{y:.1f}}<extra></extra>"
        ), row=row, col=col)
    
    # Draw 5 task subplots
    for idx, task in enumerate(tasks):
        row, col = get_pos(idx)
        data = task_data.get(task, {})
        for method in df_top["Method"].tolist():
            if method in data:
                draw_curve(data[method], method, row, col)
    
    # Draw Average subplot at (2, 3)
    for method in df_top["Method"].tolist():
        if method in avg_data:
            draw_curve(avg_data[method], method, 2, 3)
    
    fig.update_layout(
        height=550, margin=dict(l=60, r=250, t=80, b=60),
        title=dict(text=f"πŸ“ˆ Top {top_n} Methods: Accuracy-Parallelism Curves", x=0.5, font=dict(size=18)),
        legend=dict(font=dict(size=11), x=1.02, y=1, bgcolor='rgba(255,255,255,0.95)',
                   bordercolor='#ddd', borderwidth=1, title=dict(text="Methods (sorted by Avg AUP)", font=dict(size=12)),
                   tracegroupgap=1, itemsizing='constant'),
        hoverlabel=dict(bgcolor="white", font_size=14, font_family="Arial")
    )
    
    # Update axes labels for 6 subplots
    for idx in range(6):
        row, col = get_pos(idx)
        fig.update_xaxes(title_text="TPF (Tokens per Forward)" if idx >= 3 else "", row=row, col=col)
        fig.update_yaxes(title_text="Acc (%)" if col == 1 else "", row=row, col=col)
    
    return fig