tfrere HF Staff commited on
Commit
b612b7f
·
2 Parent(s): ef40dcd dd4742e

Merge remote changes with local improvements

Browse files
app/src/content/article.mdx CHANGED
@@ -11,6 +11,7 @@ affiliations:
11
  - name: "Hugging Face"
12
  url: "https://huggingface.co"
13
  published: "Nov. 18, 2025"
 
14
  licence: >
15
  Diagrams and text are licensed under <a href="https://creativecommons.org/licenses/by/4.0/" target="_blank" rel="noopener noreferrer">CC‑BY 4.0</a> with the source available on <a href="https://huggingface.co/spaces/tfrere/research-article-template" target="_blank" rel="noopener noreferrer">Hugging Face</a>, unless noted otherwise.
16
  Figures reused from other sources are excluded and marked in their captions (“Figure from …”).
@@ -18,7 +19,7 @@ tags:
18
  - research
19
  - template
20
  tableOfContentsAutoCollapse: true
21
- pdfProOnly: true
22
  ---
23
 
24
  import Image from '../components/Image.astro'
@@ -288,6 +289,7 @@ import activations_magnitude from './assets/image/activations_magnitude.png'
288
 
289
  As we can see, activation norms roughly grow linearly across layers, with a norm being approximately equal to the layer index.
290
  If we want to look for a steering coefficient that is typically less than the original activation vector norm at layer $l$,
 
291
  we can define a reduced coefficient $\hat{\alpha}_l = (\alpha_l / l)$, and restrict our search to
292
  $$
293
  \hat{\alpha}_l \in [0,1]
 
11
  - name: "Hugging Face"
12
  url: "https://huggingface.co"
13
  published: "Nov. 18, 2025"
14
+ doi: 10.1234/abcd.efgh
15
  licence: >
16
  Diagrams and text are licensed under <a href="https://creativecommons.org/licenses/by/4.0/" target="_blank" rel="noopener noreferrer">CC‑BY 4.0</a> with the source available on <a href="https://huggingface.co/spaces/tfrere/research-article-template" target="_blank" rel="noopener noreferrer">Hugging Face</a>, unless noted otherwise.
17
  Figures reused from other sources are excluded and marked in their captions (“Figure from …”).
 
19
  - research
20
  - template
21
  tableOfContentsAutoCollapse: true
22
+ pdfProOnly: false
23
  ---
24
 
25
  import Image from '../components/Image.astro'
 
289
 
290
  As we can see, activation norms roughly grow linearly across layers, with a norm being approximately equal to the layer index.
291
  If we want to look for a steering coefficient that is typically less than the original activation vector norm at layer $l$,
292
+ <<<<<<< HEAD
293
  we can define a reduced coefficient $\hat{\alpha}_l = (\alpha_l / l)$, and restrict our search to
294
  $$
295
  \hat{\alpha}_l \in [0,1]
app/src/content/embeds/d3-evaluation-grid.html ADDED
@@ -0,0 +1,467 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div class="d3-eval-grid"></div>
2
+ <style>
3
+ .d3-eval-grid {
4
+ padding: 8px;
5
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
6
+ }
7
+
8
+ .d3-eval-grid .chart-card {
9
+ background: var(--surface-bg);
10
+ border: 1px solid var(--border-color);
11
+ border-radius: 10px;
12
+ padding: 16px;
13
+ }
14
+
15
+ .d3-eval-grid .grid-container {
16
+ display: grid;
17
+ grid-template-columns: repeat(2, 1fr);
18
+ gap: 24px;
19
+ margin-bottom: 16px;
20
+ }
21
+
22
+ @media (max-width: 768px) {
23
+ .d3-eval-grid .grid-container {
24
+ grid-template-columns: 1fr;
25
+ }
26
+ }
27
+
28
+ .d3-eval-grid .subplot {
29
+ background: var(--surface-bg);
30
+ border: 1px solid var(--border-color);
31
+ border-radius: 8px;
32
+ padding: 12px;
33
+ }
34
+
35
+ .d3-eval-grid .subplot-title {
36
+ font-size: 13px;
37
+ font-weight: 600;
38
+ color: var(--text-color);
39
+ margin-bottom: 8px;
40
+ text-align: center;
41
+ }
42
+
43
+ .d3-eval-grid .legend {
44
+ display: flex;
45
+ flex-wrap: wrap;
46
+ gap: 8px 16px;
47
+ padding-top: 12px;
48
+ border-top: 1px solid var(--border-color);
49
+ font-size: 12px;
50
+ justify-content: center;
51
+ }
52
+
53
+ .d3-eval-grid .legend-item {
54
+ display: flex;
55
+ align-items: center;
56
+ gap: 6px;
57
+ cursor: pointer;
58
+ transition: opacity 0.2s;
59
+ }
60
+
61
+ .d3-eval-grid .legend-item.dimmed {
62
+ opacity: 0.3;
63
+ }
64
+
65
+ .d3-eval-grid .legend-swatch {
66
+ width: 14px;
67
+ height: 14px;
68
+ border-radius: 3px;
69
+ border: 1px solid var(--border-color);
70
+ }
71
+
72
+ .d3-eval-grid .axes path,
73
+ .d3-eval-grid .axes line {
74
+ stroke: var(--axis-color);
75
+ }
76
+
77
+ .d3-eval-grid .axes text {
78
+ fill: var(--tick-color);
79
+ font-size: 10px;
80
+ }
81
+
82
+ .d3-eval-grid .grid line {
83
+ stroke: var(--grid-color);
84
+ stroke-dasharray: 2,2;
85
+ opacity: 0.5;
86
+ }
87
+
88
+ .d3-eval-grid .axis-label {
89
+ fill: var(--text-color);
90
+ font-size: 11px;
91
+ font-weight: 600;
92
+ }
93
+
94
+ .d3-eval-grid .d3-tooltip {
95
+ position: absolute;
96
+ pointer-events: none;
97
+ padding: 8px 10px;
98
+ background: var(--surface-bg);
99
+ border: 1px solid var(--border-color);
100
+ border-radius: 8px;
101
+ font-size: 11px;
102
+ line-height: 1.5;
103
+ box-shadow: 0 4px 24px rgba(0,0,0,.18);
104
+ opacity: 0;
105
+ transition: opacity 0.2s;
106
+ z-index: 1000;
107
+ }
108
+
109
+ .d3-eval-grid .bar {
110
+ transition: opacity 0.2s;
111
+ }
112
+
113
+ .d3-eval-grid .bar.dimmed {
114
+ opacity: 0.2;
115
+ }
116
+ </style>
117
+ <script>
118
+ (() => {
119
+ const ensureD3 = (cb) => {
120
+ if (window.d3 && typeof window.d3.select === 'function') return cb();
121
+ let s = document.getElementById('d3-cdn-script');
122
+ if (!s) {
123
+ s = document.createElement('script');
124
+ s.id = 'd3-cdn-script';
125
+ s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
126
+ document.head.appendChild(s);
127
+ }
128
+ s.addEventListener('load', () => {
129
+ if (window.d3 && typeof window.d3.select === 'function') cb();
130
+ }, { once: true });
131
+ };
132
+
133
+ const bootstrap = () => {
134
+ const scriptEl = document.currentScript;
135
+ let container = scriptEl ? scriptEl.previousElementSibling : null;
136
+ if (!(container && container.classList && container.classList.contains('d3-eval-grid'))) {
137
+ const candidates = Array.from(document.querySelectorAll('.d3-eval-grid'))
138
+ .filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
139
+ container = candidates[candidates.length - 1] || null;
140
+ }
141
+ if (!container) return;
142
+ if (container.dataset) {
143
+ if (container.dataset.mounted === 'true') return;
144
+ container.dataset.mounted = 'true';
145
+ }
146
+
147
+ // Find data attribute
148
+ let mountEl = container;
149
+ while (mountEl && !mountEl.getAttribute?.('data-datafiles')) {
150
+ mountEl = mountEl.parentElement;
151
+ }
152
+ let providedData = null;
153
+ try {
154
+ const attr = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-datafiles') : null;
155
+ if (attr && attr.trim()) {
156
+ providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
157
+ }
158
+ } catch(_) {}
159
+
160
+ // Check for experiments filter attribute
161
+ let experimentsFilter = null;
162
+ try {
163
+ const expAttr = container.getAttribute('data-experiments');
164
+ if (expAttr) {
165
+ experimentsFilter = JSON.parse(expAttr);
166
+ }
167
+ } catch(_) {}
168
+
169
+ const DEFAULT_JSON = '/data/evaluation_summary.json';
170
+ const ensureDataPrefix = (p) => (typeof p === 'string' && p && !p.includes('/')) ? `/data/${p}` : p;
171
+
172
+ const JSON_PATHS = typeof providedData === 'string'
173
+ ? [ensureDataPrefix(providedData)]
174
+ : [
175
+ DEFAULT_JSON,
176
+ './assets/data/evaluation_summary.json',
177
+ '../assets/data/evaluation_summary.json',
178
+ '../../assets/data/evaluation_summary.json'
179
+ ];
180
+
181
+ const fetchFirstAvailable = async (paths) => {
182
+ for (const p of paths) {
183
+ try {
184
+ const r = await fetch(p, { cache: 'no-cache' });
185
+ if (r.ok) return await r.json();
186
+ } catch(_){}
187
+ }
188
+ throw new Error('JSON not found');
189
+ };
190
+
191
+ fetchFirstAvailable(JSON_PATHS)
192
+ .then(rawData => {
193
+ // All experiments in order
194
+ const allExperiments = ['Prompt', 'Basic steering', 'Clamping', 'Clamping + Penalty', '2D optimized', '8D optimized'];
195
+
196
+ // Use filtered experiments if provided, otherwise use all
197
+ const experiments = experimentsFilter || allExperiments;
198
+
199
+ // Metrics in 2x3 grid layout
200
+ const metrics = [
201
+ { key: 'llm_score_concept', label: 'LLM Concept Score', format: d3.format('.2f') },
202
+ { key: 'llm_score_instruction', label: 'LLM Instruction Score', format: d3.format('.2f') },
203
+ { key: 'llm_score_fluency', label: 'LLM Fluency Score', format: d3.format('.2f') },
204
+ { key: 'rep3', label: '3-gram Repetition Fraction', format: d3.format('.2f') },
205
+ { key: 'mean_llm_score', label: 'Mean LLM Score', format: d3.format('.2f') },
206
+ { key: 'harmonic_llm_score', label: 'Harmonic Mean LLM Score', format: d3.format('.2f') }
207
+ ];
208
+
209
+ // Restructure data
210
+ const data = {};
211
+ rawData.forEach(d => {
212
+ if (!data[d.metric]) data[d.metric] = {};
213
+ data[d.metric][d.experiment] = { mean: d.mean, std: d.std };
214
+ });
215
+
216
+ // Color palette - consistent across all charts
217
+ const allColors = {
218
+ 'Prompt': '#4c4c4c',
219
+ 'Basic steering': '#b2b2b2',
220
+ 'Clamping': '#b2b2cc',
221
+ 'Clamping + Penalty': '#b2b2e6',
222
+ '2D optimized': '#b2ffb2',
223
+ '8D optimized': '#ffb2ff'
224
+ };
225
+
226
+ const card = document.createElement('div');
227
+ card.className = 'chart-card';
228
+ container.appendChild(card);
229
+
230
+ const gridContainer = document.createElement('div');
231
+ gridContainer.className = 'grid-container';
232
+ card.appendChild(gridContainer);
233
+
234
+ // Tooltip
235
+ const tooltip = d3.select(card).append('div')
236
+ .attr('class', 'd3-tooltip')
237
+ .style('transform', 'translate(-9999px, -9999px)');
238
+
239
+ let hoveredExperiment = null;
240
+
241
+ // Create each subplot
242
+ metrics.forEach((metric, idx) => {
243
+ const subplot = document.createElement('div');
244
+ subplot.className = 'subplot';
245
+ subplot.dataset.metric = metric.key;
246
+ gridContainer.appendChild(subplot);
247
+
248
+ const title = document.createElement('div');
249
+ title.className = 'subplot-title';
250
+ title.textContent = metric.label;
251
+ subplot.appendChild(title);
252
+
253
+ const svg = d3.select(subplot).append('svg')
254
+ .attr('width', '100%')
255
+ .style('display', 'block');
256
+
257
+ const g = svg.append('g');
258
+ const gGrid = g.append('g').attr('class', 'grid');
259
+ const gBars = g.append('g').attr('class', 'bars');
260
+ const gErrorBars = g.append('g').attr('class', 'error-bars');
261
+ const gAxes = g.append('g').attr('class', 'axes');
262
+
263
+ subplot._render = () => {
264
+ const width = subplot.clientWidth || 300;
265
+ const height = Math.max(200, Math.round(width * 0.6));
266
+ const margin = { top: 10, right: 10, bottom: 60, left: 50 };
267
+ const innerWidth = width - margin.left - margin.right;
268
+ const innerHeight = height - margin.top - margin.bottom;
269
+
270
+ svg.attr('height', height);
271
+ g.attr('transform', `translate(${margin.left},${margin.top})`);
272
+
273
+ // Scales
274
+ const x = d3.scaleBand()
275
+ .domain(experiments)
276
+ .range([0, innerWidth])
277
+ .padding(0.2);
278
+
279
+ // Find y domain for this metric
280
+ const values = experiments.map(exp => data[metric.key]?.[exp]?.mean).filter(v => v !== undefined);
281
+ const stds = experiments.map(exp => data[metric.key]?.[exp]?.std).filter(v => v !== undefined);
282
+ const maxVal = d3.max(values.map((v, i) => v + stds[i]));
283
+ const minVal = d3.min(values.map((v, i) => Math.max(0, v - stds[i])));
284
+
285
+ const y = d3.scaleLinear()
286
+ .domain([Math.max(0, minVal * 0.95), maxVal * 1.05])
287
+ .range([innerHeight, 0])
288
+ .nice();
289
+
290
+ // Grid
291
+ gGrid.selectAll('*').remove();
292
+ gGrid.selectAll('line')
293
+ .data(y.ticks(4))
294
+ .join('line')
295
+ .attr('x1', 0)
296
+ .attr('x2', innerWidth)
297
+ .attr('y1', d => y(d))
298
+ .attr('y2', d => y(d));
299
+
300
+ // Axes
301
+ gAxes.selectAll('*').remove();
302
+
303
+ const xAxis = gAxes.append('g')
304
+ .attr('transform', `translate(0,${innerHeight})`)
305
+ .call(d3.axisBottom(x).tickSize(3));
306
+
307
+ xAxis.selectAll('text')
308
+ .attr('transform', 'rotate(-45)')
309
+ .style('text-anchor', 'end')
310
+ .attr('dx', '-0.5em')
311
+ .attr('dy', '0.15em');
312
+
313
+ gAxes.append('g')
314
+ .call(d3.axisLeft(y).ticks(4).tickFormat(metric.format).tickSize(3));
315
+
316
+ // Draw bars
317
+ const bars = [];
318
+ experiments.forEach(exp => {
319
+ const d = data[metric.key]?.[exp];
320
+ if (d) {
321
+ bars.push({
322
+ experiment: exp,
323
+ mean: d.mean,
324
+ std: d.std,
325
+ color: allColors[exp],
326
+ x: x(exp),
327
+ y: y(d.mean),
328
+ width: x.bandwidth(),
329
+ height: innerHeight - y(d.mean)
330
+ });
331
+ }
332
+ });
333
+
334
+ gBars.selectAll('rect')
335
+ .data(bars)
336
+ .join('rect')
337
+ .attr('class', 'bar')
338
+ .attr('x', d => d.x)
339
+ .attr('y', d => d.y)
340
+ .attr('width', d => d.width)
341
+ .attr('height', d => d.height)
342
+ .attr('fill', d => d.color)
343
+ .attr('rx', 2)
344
+ .classed('dimmed', d => hoveredExperiment && d.experiment !== hoveredExperiment)
345
+ .on('mouseenter', (event, d) => {
346
+ hoveredExperiment = d.experiment;
347
+ updateAll();
348
+ tooltip
349
+ .style('opacity', 1)
350
+ .html(`
351
+ <div><strong>${d.experiment}</strong></div>
352
+ <div style="margin-top: 4px;">${metric.label}</div>
353
+ <div style="margin-top: 4px;"><strong>Mean:</strong> ${metric.format(d.mean)}</div>
354
+ <div><strong>Std:</strong> ${metric.format(d.std)}</div>
355
+ `);
356
+ })
357
+ .on('mousemove', (event) => {
358
+ const [mx, my] = d3.pointer(event, card);
359
+ tooltip.style('transform', `translate(${mx + 10}px, ${my + 10}px)`);
360
+ })
361
+ .on('mouseleave', () => {
362
+ hoveredExperiment = null;
363
+ updateAll();
364
+ tooltip.style('opacity', 0).style('transform', 'translate(-9999px, -9999px)');
365
+ });
366
+
367
+ // Error bars
368
+ gErrorBars.selectAll('line')
369
+ .data(bars)
370
+ .join('line')
371
+ .attr('x1', d => d.x + d.width / 2)
372
+ .attr('x2', d => d.x + d.width / 2)
373
+ .attr('y1', d => y(d.mean + d.std))
374
+ .attr('y2', d => y(Math.max(0, d.mean - d.std)))
375
+ .attr('stroke', '#666')
376
+ .attr('stroke-width', 1.5)
377
+ .attr('opacity', 0.6);
378
+
379
+ // Error bar caps
380
+ gErrorBars.selectAll('.cap-top')
381
+ .data(bars)
382
+ .join('line')
383
+ .attr('class', 'cap-top')
384
+ .attr('x1', d => d.x + d.width / 2 - 3)
385
+ .attr('x2', d => d.x + d.width / 2 + 3)
386
+ .attr('y1', d => y(d.mean + d.std))
387
+ .attr('y2', d => y(d.mean + d.std))
388
+ .attr('stroke', '#666')
389
+ .attr('stroke-width', 1.5)
390
+ .attr('opacity', 0.6);
391
+
392
+ gErrorBars.selectAll('.cap-bottom')
393
+ .data(bars)
394
+ .join('line')
395
+ .attr('class', 'cap-bottom')
396
+ .attr('x1', d => d.x + d.width / 2 - 3)
397
+ .attr('x2', d => d.x + d.width / 2 + 3)
398
+ .attr('y1', d => y(Math.max(0, d.mean - d.std)))
399
+ .attr('y2', d => y(Math.max(0, d.mean - d.std)))
400
+ .attr('stroke', '#666')
401
+ .attr('stroke-width', 1.5)
402
+ .attr('opacity', 0.6);
403
+ };
404
+ });
405
+
406
+ // Legend
407
+ const legend = document.createElement('div');
408
+ legend.className = 'legend';
409
+ experiments.forEach(exp => {
410
+ const item = document.createElement('div');
411
+ item.className = 'legend-item';
412
+ item.dataset.experiment = exp;
413
+ item.innerHTML = `
414
+ <div class="legend-swatch" style="background: ${allColors[exp]}"></div>
415
+ <span>${exp}</span>
416
+ `;
417
+ legend.appendChild(item);
418
+ });
419
+ card.appendChild(legend);
420
+
421
+ // Legend interaction
422
+ legend.querySelectorAll('.legend-item').forEach(item => {
423
+ item.addEventListener('mouseenter', () => {
424
+ hoveredExperiment = item.dataset.experiment;
425
+ updateAll();
426
+ });
427
+ item.addEventListener('mouseleave', () => {
428
+ hoveredExperiment = null;
429
+ updateAll();
430
+ });
431
+ });
432
+
433
+ const updateAll = () => {
434
+ gridContainer.querySelectorAll('.subplot').forEach(subplot => {
435
+ if (subplot._render) subplot._render();
436
+ });
437
+
438
+ legend.querySelectorAll('.legend-item').forEach(item => {
439
+ if (hoveredExperiment && item.dataset.experiment !== hoveredExperiment) {
440
+ item.classList.add('dimmed');
441
+ } else {
442
+ item.classList.remove('dimmed');
443
+ }
444
+ });
445
+ };
446
+
447
+ updateAll();
448
+
449
+ if (window.ResizeObserver) {
450
+ const ro = new ResizeObserver(() => updateAll());
451
+ ro.observe(container);
452
+ } else {
453
+ window.addEventListener('resize', updateAll);
454
+ }
455
+ })
456
+ .catch(err => {
457
+ container.innerHTML = `<div style="color: red; padding: 20px;">Error: ${err.message}</div>`;
458
+ });
459
+ };
460
+
461
+ if (document.readyState === 'loading') {
462
+ document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
463
+ } else {
464
+ ensureD3(bootstrap);
465
+ }
466
+ })();
467
+ </script>
app/src/content/embeds/d3-evaluation1-naive.html ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div class="d3-eval-grid d3-eval-grid-1"></div>
2
+ <style>
3
+ .d3-eval-grid {
4
+ padding: 2px;
5
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
6
+ }
7
+
8
+ .d3-eval-grid .grid-container {
9
+ display: grid;
10
+ grid-template-columns: repeat(2, 1fr);
11
+ gap: 8px;
12
+ }
13
+
14
+ @media (max-width: 768px) {
15
+ .d3-eval-grid .grid-container {
16
+ grid-template-columns: 1fr;
17
+ }
18
+ }
19
+
20
+ .d3-eval-grid .subplot {
21
+ padding: 4px;
22
+ }
23
+
24
+ .d3-eval-grid .subplot-title {
25
+ font-size: 12px;
26
+ font-weight: 600;
27
+ color: var(--text-color);
28
+ margin-bottom: 4px;
29
+ text-align: center;
30
+ }
31
+
32
+
33
+ .d3-eval-grid .axes path,
34
+ .d3-eval-grid .axes line {
35
+ stroke: var(--axis-color);
36
+ }
37
+
38
+ .d3-eval-grid .axes text {
39
+ fill: var(--tick-color);
40
+ font-size: 9px;
41
+ }
42
+
43
+ .d3-eval-grid .grid line {
44
+ stroke: var(--grid-color);
45
+ stroke-dasharray: 2,2;
46
+ opacity: 0.5;
47
+ }
48
+
49
+ .d3-eval-grid .axis-label {
50
+ fill: var(--text-color);
51
+ font-size: 11px;
52
+ font-weight: 600;
53
+ }
54
+
55
+ .d3-eval-grid .d3-tooltip {
56
+ position: absolute;
57
+ pointer-events: none;
58
+ padding: 8px 10px;
59
+ background: var(--surface-bg);
60
+ border: 1px solid var(--border-color);
61
+ border-radius: 8px;
62
+ font-size: 11px;
63
+ line-height: 1.5;
64
+ box-shadow: 0 4px 24px rgba(0,0,0,.18);
65
+ opacity: 0;
66
+ transition: opacity 0.2s;
67
+ z-index: 1000;
68
+ }
69
+
70
+ .d3-eval-grid .bar {
71
+ transition: opacity 0.2s;
72
+ }
73
+
74
+ .d3-eval-grid .bar.dimmed {
75
+ opacity: 0.2;
76
+ }
77
+ </style>
78
+ <script>
79
+ (() => {
80
+ const ensureD3 = (cb) => {
81
+ if (window.d3 && typeof window.d3.select === 'function') return cb();
82
+ let s = document.getElementById('d3-cdn-script');
83
+ if (!s) {
84
+ s = document.createElement('script');
85
+ s.id = 'd3-cdn-script';
86
+ s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
87
+ document.head.appendChild(s);
88
+ }
89
+ s.addEventListener('load', () => {
90
+ if (window.d3 && typeof window.d3.select === 'function') cb();
91
+ }, { once: true });
92
+ };
93
+
94
+ const bootstrap = () => {
95
+ const scriptEl = document.currentScript;
96
+ let container = scriptEl ? scriptEl.previousElementSibling : null;
97
+ if (!(container && container.classList && container.classList.contains('d3-eval-grid-1'))) {
98
+ const candidates = Array.from(document.querySelectorAll('.d3-eval-grid-1'))
99
+ .filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
100
+ container = candidates[candidates.length - 1] || null;
101
+ }
102
+ if (!container) return;
103
+ if (container.dataset) {
104
+ if (container.dataset.mounted === 'true') return;
105
+ container.dataset.mounted = 'true';
106
+ }
107
+
108
+ // Find data attribute
109
+ let mountEl = container;
110
+ while (mountEl && !mountEl.getAttribute?.('data-datafiles')) {
111
+ mountEl = mountEl.parentElement;
112
+ }
113
+ let providedData = null;
114
+ try {
115
+ const attr = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-datafiles') : null;
116
+ if (attr && attr.trim()) {
117
+ providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
118
+ }
119
+ } catch(_) {}
120
+
121
+ // Check for experiments filter attribute
122
+ let experimentsFilter = null;
123
+ try {
124
+ const expAttr = container.getAttribute('data-experiments');
125
+ if (expAttr) {
126
+ experimentsFilter = JSON.parse(expAttr);
127
+ }
128
+ } catch(_) {}
129
+
130
+ const DEFAULT_JSON = '/data/evaluation_summary.json';
131
+ const ensureDataPrefix = (p) => (typeof p === 'string' && p && !p.includes('/')) ? `/data/${p}` : p;
132
+
133
+ const JSON_PATHS = typeof providedData === 'string'
134
+ ? [ensureDataPrefix(providedData)]
135
+ : [
136
+ DEFAULT_JSON,
137
+ './assets/data/evaluation_summary.json',
138
+ '../assets/data/evaluation_summary.json',
139
+ '../../assets/data/evaluation_summary.json'
140
+ ];
141
+
142
+ const fetchFirstAvailable = async (paths) => {
143
+ for (const p of paths) {
144
+ try {
145
+ const r = await fetch(p, { cache: 'no-cache' });
146
+ if (r.ok) return await r.json();
147
+ } catch(_){}
148
+ }
149
+ throw new Error('JSON not found');
150
+ };
151
+
152
+ fetchFirstAvailable(JSON_PATHS)
153
+ .then(rawData => {
154
+ // Chart 1: Only Prompt and Basic steering (but reserve space for all)
155
+ const allExperiments = ['Prompt', 'Basic steering', 'Clamping', 'Clamping + Penalty', '2D optimized', '8D optimized'];
156
+ const visibleExperiments = ['Prompt', 'Basic steering'];
157
+
158
+ // Metrics in 2x4 grid layout (8 metrics)
159
+ const metrics = [
160
+ { key: 'llm_score_concept', label: 'LLM Concept Score', format: d3.format('.2f') },
161
+ { key: 'eiffel', label: 'Explicit Concept Presence', format: d3.format('.2f') },
162
+ { key: 'llm_score_instruction', label: 'LLM Instruction Score', format: d3.format('.2f') },
163
+ { key: 'minus_log_prob', label: 'Surprise in Original Model', format: d3.format('.2f') },
164
+ { key: 'llm_score_fluency', label: 'LLM Fluency Score', format: d3.format('.2f') },
165
+ { key: 'rep3', label: '3-gram Repetition Fraction', format: d3.format('.2f') },
166
+ { key: 'mean_llm_score', label: 'Mean LLM Score', format: d3.format('.2f') },
167
+ { key: 'harmonic_llm_score', label: 'Harmonic Mean LLM Score', format: d3.format('.2f') }
168
+ ];
169
+
170
+ // Restructure data
171
+ const data = {};
172
+ rawData.forEach(d => {
173
+ if (!data[d.metric]) data[d.metric] = {};
174
+ data[d.metric][d.experiment] = { mean: d.mean, std: d.std };
175
+ });
176
+
177
+ // Color palette - consistent across all charts
178
+ const allColors = {
179
+ 'Prompt': '#4c4c4c',
180
+ 'Basic steering': '#b2b2b2',
181
+ 'Clamping': '#b2b2cc',
182
+ 'Clamping + Penalty': '#b2b2e6',
183
+ '2D optimized': '#b2ffb2',
184
+ '8D optimized': '#ffb2ff'
185
+ };
186
+
187
+ const gridContainer = document.createElement('div');
188
+ gridContainer.className = 'grid-container';
189
+ container.appendChild(gridContainer);
190
+
191
+ // Tooltip
192
+ const tooltip = d3.select(container).append('div')
193
+ .attr('class', 'd3-tooltip')
194
+ .style('transform', 'translate(-9999px, -9999px)');
195
+
196
+ let hoveredExperiment = null;
197
+
198
+ // Create each subplot
199
+ metrics.forEach((metric, idx) => {
200
+ const subplot = document.createElement('div');
201
+ subplot.className = 'subplot';
202
+ subplot.dataset.metric = metric.key;
203
+ gridContainer.appendChild(subplot);
204
+
205
+ const title = document.createElement('div');
206
+ title.className = 'subplot-title';
207
+ title.textContent = metric.label;
208
+ subplot.appendChild(title);
209
+
210
+ const svg = d3.select(subplot).append('svg')
211
+ .attr('width', '100%')
212
+ .style('display', 'block');
213
+
214
+ const g = svg.append('g');
215
+ const gGrid = g.append('g').attr('class', 'grid');
216
+ const gBars = g.append('g').attr('class', 'bars');
217
+ const gErrorBars = g.append('g').attr('class', 'error-bars');
218
+ const gAxes = g.append('g').attr('class', 'axes');
219
+ const gLabels = g.append('g').attr('class', 'value-labels');
220
+
221
+ subplot._render = () => {
222
+ const width = subplot.clientWidth || 300;
223
+ const height = Math.max(200, Math.round(width * 0.6));
224
+ const margin = { top: 10, right: 20, bottom: 70, left: 42 };
225
+ const innerWidth = width - margin.left - margin.right;
226
+ const innerHeight = height - margin.top - margin.bottom;
227
+
228
+ svg.attr('height', height);
229
+ g.attr('transform', `translate(${margin.left},${margin.top})`);
230
+
231
+ // Scales - use all experiments for consistent positioning
232
+ const x = d3.scaleBand()
233
+ .domain(allExperiments)
234
+ .range([0, innerWidth])
235
+ .padding(0.2);
236
+
237
+ // Fixed y-axis ranges based on metric type
238
+ const yDomains = {
239
+ 'llm_score_concept': [0, 2],
240
+ 'llm_score_instruction': [0, 2],
241
+ 'llm_score_fluency': [0, 2],
242
+ 'mean_llm_score': [0, 2],
243
+ 'harmonic_llm_score': [0, 2],
244
+ 'eiffel': [0, 1],
245
+ 'minus_log_prob': [0, 2],
246
+ 'rep3': [0, 0.5]
247
+ };
248
+
249
+ const y = d3.scaleLinear()
250
+ .domain(yDomains[metric.key] || [0, 1])
251
+ .range([innerHeight, 0]);
252
+
253
+ // Grid
254
+ gGrid.selectAll('*').remove();
255
+ gGrid.selectAll('line')
256
+ .data(y.ticks(4))
257
+ .join('line')
258
+ .attr('x1', 0)
259
+ .attr('x2', innerWidth)
260
+ .attr('y1', d => y(d))
261
+ .attr('y2', d => y(d));
262
+
263
+ // Axes
264
+ gAxes.selectAll('*').remove();
265
+
266
+ const xAxis = gAxes.append('g')
267
+ .attr('transform', `translate(0,${innerHeight})`)
268
+ .call(d3.axisBottom(x).tickSize(3));
269
+
270
+ // Only show labels for visible experiments
271
+ xAxis.selectAll('text')
272
+ .attr('transform', 'rotate(-45)')
273
+ .style('text-anchor', 'end')
274
+ .attr('dx', '-0.5em')
275
+ .attr('dy', '0.15em')
276
+ .style('opacity', function() {
277
+ const text = d3.select(this).text();
278
+ return visibleExperiments.includes(text) ? 1 : 0;
279
+ });
280
+
281
+ gAxes.append('g')
282
+ .call(d3.axisLeft(y).ticks(4).tickFormat(metric.format).tickSize(3));
283
+
284
+ // Draw bars (only for visible experiments)
285
+ const bars = [];
286
+ visibleExperiments.forEach(exp => {
287
+ const d = data[metric.key]?.[exp];
288
+ if (d) {
289
+ bars.push({
290
+ experiment: exp,
291
+ mean: d.mean,
292
+ std: d.std,
293
+ color: allColors[exp],
294
+ x: x(exp),
295
+ y: y(d.mean),
296
+ width: x.bandwidth(),
297
+ height: innerHeight - y(d.mean)
298
+ });
299
+ }
300
+ });
301
+
302
+ gBars.selectAll('rect')
303
+ .data(bars)
304
+ .join('rect')
305
+ .attr('class', 'bar')
306
+ .attr('x', d => d.x)
307
+ .attr('y', d => d.y)
308
+ .attr('width', d => d.width)
309
+ .attr('height', d => d.height)
310
+ .attr('fill', d => d.color)
311
+ .attr('rx', 2)
312
+ .classed('dimmed', d => hoveredExperiment && d.experiment !== hoveredExperiment)
313
+ .on('mouseenter', (event, d) => {
314
+ hoveredExperiment = d.experiment;
315
+
316
+ // Show value label on bar
317
+ gLabels.selectAll('text').remove();
318
+ gLabels.append('text')
319
+ .attr('x', d.x + d.width / 2)
320
+ .attr('y', d.y - 5)
321
+ .attr('text-anchor', 'middle')
322
+ .attr('fill', 'var(--text-color)')
323
+ .attr('font-size', '11px')
324
+ .attr('font-weight', '600')
325
+ .text(metric.format(d.mean));
326
+
327
+ updateAll();
328
+ tooltip
329
+ .style('opacity', 1)
330
+ .html(`
331
+ <div><strong>${d.experiment}</strong></div>
332
+ <div style="margin-top: 4px;">${metric.label}</div>
333
+ <div style="margin-top: 4px;"><strong>Mean:</strong> ${metric.format(d.mean)}</div>
334
+ <div><strong>Std:</strong> ${metric.format(d.std)}</div>
335
+ `);
336
+ })
337
+ .on('mousemove', (event) => {
338
+ const [mx, my] = d3.pointer(event, container);
339
+ tooltip.style('transform', `translate(${mx + 10}px, ${my + 10}px)`);
340
+ })
341
+ .on('mouseleave', () => {
342
+ hoveredExperiment = null;
343
+ gLabels.selectAll('text').remove();
344
+ updateAll();
345
+ tooltip.style('opacity', 0).style('transform', 'translate(-9999px, -9999px)');
346
+ });
347
+
348
+ // Error bars
349
+ gErrorBars.selectAll('line')
350
+ .data(bars)
351
+ .join('line')
352
+ .attr('x1', d => d.x + d.width / 2)
353
+ .attr('x2', d => d.x + d.width / 2)
354
+ .attr('y1', d => y(d.mean + d.std))
355
+ .attr('y2', d => y(Math.max(0, d.mean - d.std)))
356
+ .attr('stroke', '#666')
357
+ .attr('stroke-width', 1.5)
358
+ .attr('opacity', 0.6);
359
+
360
+ // Error bar caps
361
+ gErrorBars.selectAll('.cap-top')
362
+ .data(bars)
363
+ .join('line')
364
+ .attr('class', 'cap-top')
365
+ .attr('x1', d => d.x + d.width / 2 - 3)
366
+ .attr('x2', d => d.x + d.width / 2 + 3)
367
+ .attr('y1', d => y(d.mean + d.std))
368
+ .attr('y2', d => y(d.mean + d.std))
369
+ .attr('stroke', '#666')
370
+ .attr('stroke-width', 1.5)
371
+ .attr('opacity', 0.6);
372
+
373
+ gErrorBars.selectAll('.cap-bottom')
374
+ .data(bars)
375
+ .join('line')
376
+ .attr('class', 'cap-bottom')
377
+ .attr('x1', d => d.x + d.width / 2 - 3)
378
+ .attr('x2', d => d.x + d.width / 2 + 3)
379
+ .attr('y1', d => y(Math.max(0, d.mean - d.std)))
380
+ .attr('y2', d => y(Math.max(0, d.mean - d.std)))
381
+ .attr('stroke', '#666')
382
+ .attr('stroke-width', 1.5)
383
+ .attr('opacity', 0.6);
384
+ };
385
+ });
386
+
387
+ const updateAll = () => {
388
+ gridContainer.querySelectorAll('.subplot').forEach(subplot => {
389
+ if (subplot._render) subplot._render();
390
+ });
391
+
392
+ };
393
+
394
+ updateAll();
395
+
396
+ if (window.ResizeObserver) {
397
+ const ro = new ResizeObserver(() => updateAll());
398
+ ro.observe(container);
399
+ } else {
400
+ window.addEventListener('resize', updateAll);
401
+ }
402
+ })
403
+ .catch(err => {
404
+ container.innerHTML = `<div style="color: red; padding: 20px;">Error: ${err.message}</div>`;
405
+ });
406
+ };
407
+
408
+ if (document.readyState === 'loading') {
409
+ document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
410
+ } else {
411
+ ensureD3(bootstrap);
412
+ }
413
+ })();
414
+ </script>
app/src/content/embeds/d3-evaluation2-clamp.html ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div class="d3-eval-grid d3-eval-grid-2"></div>
2
+ <style>
3
+ .d3-eval-grid {
4
+ padding: 2px;
5
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
6
+ }
7
+
8
+ .d3-eval-grid .grid-container {
9
+ display: grid;
10
+ grid-template-columns: repeat(2, 1fr);
11
+ gap: 8px;
12
+ }
13
+
14
+ @media (max-width: 768px) {
15
+ .d3-eval-grid .grid-container {
16
+ grid-template-columns: 1fr;
17
+ }
18
+ }
19
+
20
+ .d3-eval-grid .subplot {
21
+ padding: 4px;
22
+ }
23
+
24
+ .d3-eval-grid .subplot-title {
25
+ font-size: 12px;
26
+ font-weight: 600;
27
+ color: var(--text-color);
28
+ margin-bottom: 4px;
29
+ text-align: center;
30
+ }
31
+
32
+
33
+ .d3-eval-grid .axes path,
34
+ .d3-eval-grid .axes line {
35
+ stroke: var(--axis-color);
36
+ }
37
+
38
+ .d3-eval-grid .axes text {
39
+ fill: var(--tick-color);
40
+ font-size: 9px;
41
+ }
42
+
43
+ .d3-eval-grid .grid line {
44
+ stroke: var(--grid-color);
45
+ stroke-dasharray: 2,2;
46
+ opacity: 0.5;
47
+ }
48
+
49
+ .d3-eval-grid .axis-label {
50
+ fill: var(--text-color);
51
+ font-size: 11px;
52
+ font-weight: 600;
53
+ }
54
+
55
+ .d3-eval-grid .d3-tooltip {
56
+ position: absolute;
57
+ pointer-events: none;
58
+ padding: 8px 10px;
59
+ background: var(--surface-bg);
60
+ border: 1px solid var(--border-color);
61
+ border-radius: 8px;
62
+ font-size: 11px;
63
+ line-height: 1.5;
64
+ box-shadow: 0 4px 24px rgba(0,0,0,.18);
65
+ opacity: 0;
66
+ transition: opacity 0.2s;
67
+ z-index: 1000;
68
+ }
69
+
70
+ .d3-eval-grid .bar {
71
+ transition: opacity 0.2s;
72
+ }
73
+
74
+ .d3-eval-grid .bar.dimmed {
75
+ opacity: 0.2;
76
+ }
77
+ </style>
78
+ <script>
79
+ (() => {
80
+ const ensureD3 = (cb) => {
81
+ if (window.d3 && typeof window.d3.select === 'function') return cb();
82
+ let s = document.getElementById('d3-cdn-script');
83
+ if (!s) {
84
+ s = document.createElement('script');
85
+ s.id = 'd3-cdn-script';
86
+ s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
87
+ document.head.appendChild(s);
88
+ }
89
+ s.addEventListener('load', () => {
90
+ if (window.d3 && typeof window.d3.select === 'function') cb();
91
+ }, { once: true });
92
+ };
93
+
94
+ const bootstrap = () => {
95
+ const scriptEl = document.currentScript;
96
+ let container = scriptEl ? scriptEl.previousElementSibling : null;
97
+ if (!(container && container.classList && container.classList.contains('d3-eval-grid-2'))) {
98
+ const candidates = Array.from(document.querySelectorAll('.d3-eval-grid-2'))
99
+ .filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
100
+ container = candidates[candidates.length - 1] || null;
101
+ }
102
+ if (!container) return;
103
+ if (container.dataset) {
104
+ if (container.dataset.mounted === 'true') return;
105
+ container.dataset.mounted = 'true';
106
+ }
107
+
108
+ // Find data attribute
109
+ let mountEl = container;
110
+ while (mountEl && !mountEl.getAttribute?.('data-datafiles')) {
111
+ mountEl = mountEl.parentElement;
112
+ }
113
+ let providedData = null;
114
+ try {
115
+ const attr = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-datafiles') : null;
116
+ if (attr && attr.trim()) {
117
+ providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
118
+ }
119
+ } catch(_) {}
120
+
121
+ // Check for experiments filter attribute
122
+ let experimentsFilter = null;
123
+ try {
124
+ const expAttr = container.getAttribute('data-experiments');
125
+ if (expAttr) {
126
+ experimentsFilter = JSON.parse(expAttr);
127
+ }
128
+ } catch(_) {}
129
+
130
+ const DEFAULT_JSON = '/data/evaluation_summary.json';
131
+ const ensureDataPrefix = (p) => (typeof p === 'string' && p && !p.includes('/')) ? `/data/${p}` : p;
132
+
133
+ const JSON_PATHS = typeof providedData === 'string'
134
+ ? [ensureDataPrefix(providedData)]
135
+ : [
136
+ DEFAULT_JSON,
137
+ './assets/data/evaluation_summary.json',
138
+ '../assets/data/evaluation_summary.json',
139
+ '../../assets/data/evaluation_summary.json'
140
+ ];
141
+
142
+ const fetchFirstAvailable = async (paths) => {
143
+ for (const p of paths) {
144
+ try {
145
+ const r = await fetch(p, { cache: 'no-cache' });
146
+ if (r.ok) return await r.json();
147
+ } catch(_){}
148
+ }
149
+ throw new Error('JSON not found');
150
+ };
151
+
152
+ fetchFirstAvailable(JSON_PATHS)
153
+ .then(rawData => {
154
+ // Chart 2: Add clamping experiments (but reserve space for all)
155
+ const allExperiments = ['Prompt', 'Basic steering', 'Clamping', 'Clamping + Penalty', '2D optimized', '8D optimized'];
156
+ const visibleExperiments = ['Prompt', 'Basic steering', 'Clamping', 'Clamping + Penalty'];
157
+
158
+ // Metrics in 2x4 grid layout (8 metrics)
159
+ const metrics = [
160
+ { key: 'llm_score_concept', label: 'LLM Concept Score', format: d3.format('.2f') },
161
+ { key: 'eiffel', label: 'Explicit Concept Presence', format: d3.format('.2f') },
162
+ { key: 'llm_score_instruction', label: 'LLM Instruction Score', format: d3.format('.2f') },
163
+ { key: 'minus_log_prob', label: 'Surprise in Original Model', format: d3.format('.2f') },
164
+ { key: 'llm_score_fluency', label: 'LLM Fluency Score', format: d3.format('.2f') },
165
+ { key: 'rep3', label: '3-gram Repetition Fraction', format: d3.format('.2f') },
166
+ { key: 'mean_llm_score', label: 'Mean LLM Score', format: d3.format('.2f') },
167
+ { key: 'harmonic_llm_score', label: 'Harmonic Mean LLM Score', format: d3.format('.2f') }
168
+ ];
169
+
170
+ // Restructure data
171
+ const data = {};
172
+ rawData.forEach(d => {
173
+ if (!data[d.metric]) data[d.metric] = {};
174
+ data[d.metric][d.experiment] = { mean: d.mean, std: d.std };
175
+ });
176
+
177
+ // Color palette - consistent across all charts
178
+ const allColors = {
179
+ 'Prompt': '#4c4c4c',
180
+ 'Basic steering': '#b2b2b2',
181
+ 'Clamping': '#b2b2cc',
182
+ 'Clamping + Penalty': '#b2b2e6',
183
+ '2D optimized': '#b2ffb2',
184
+ '8D optimized': '#ffb2ff'
185
+ };
186
+
187
+ const gridContainer = document.createElement('div');
188
+ gridContainer.className = 'grid-container';
189
+ container.appendChild(gridContainer);
190
+
191
+ // Tooltip
192
+ const tooltip = d3.select(container).append('div')
193
+ .attr('class', 'd3-tooltip')
194
+ .style('transform', 'translate(-9999px, -9999px)');
195
+
196
+ let hoveredExperiment = null;
197
+
198
+ // Create each subplot
199
+ metrics.forEach((metric, idx) => {
200
+ const subplot = document.createElement('div');
201
+ subplot.className = 'subplot';
202
+ subplot.dataset.metric = metric.key;
203
+ gridContainer.appendChild(subplot);
204
+
205
+ const title = document.createElement('div');
206
+ title.className = 'subplot-title';
207
+ title.textContent = metric.label;
208
+ subplot.appendChild(title);
209
+
210
+ const svg = d3.select(subplot).append('svg')
211
+ .attr('width', '100%')
212
+ .style('display', 'block');
213
+
214
+ const g = svg.append('g');
215
+ const gGrid = g.append('g').attr('class', 'grid');
216
+ const gBars = g.append('g').attr('class', 'bars');
217
+ const gErrorBars = g.append('g').attr('class', 'error-bars');
218
+ const gAxes = g.append('g').attr('class', 'axes');
219
+ const gLabels = g.append('g').attr('class', 'value-labels');
220
+
221
+ subplot._render = () => {
222
+ const width = subplot.clientWidth || 300;
223
+ const height = Math.max(200, Math.round(width * 0.6));
224
+ const margin = { top: 10, right: 20, bottom: 70, left: 42 };
225
+ const innerWidth = width - margin.left - margin.right;
226
+ const innerHeight = height - margin.top - margin.bottom;
227
+
228
+ svg.attr('height', height);
229
+ g.attr('transform', `translate(${margin.left},${margin.top})`);
230
+
231
+ // Scales - use all experiments for consistent positioning
232
+ const x = d3.scaleBand()
233
+ .domain(allExperiments)
234
+ .range([0, innerWidth])
235
+ .padding(0.2);
236
+
237
+ // Fixed y-axis ranges based on metric type
238
+ const yDomains = {
239
+ 'llm_score_concept': [0, 2],
240
+ 'llm_score_instruction': [0, 2],
241
+ 'llm_score_fluency': [0, 2],
242
+ 'mean_llm_score': [0, 2],
243
+ 'harmonic_llm_score': [0, 2],
244
+ 'eiffel': [0, 1],
245
+ 'minus_log_prob': [0, 2],
246
+ 'rep3': [0, 0.5]
247
+ };
248
+
249
+ const y = d3.scaleLinear()
250
+ .domain(yDomains[metric.key] || [0, 1])
251
+ .range([innerHeight, 0]);
252
+
253
+ // Grid
254
+ gGrid.selectAll('*').remove();
255
+ gGrid.selectAll('line')
256
+ .data(y.ticks(4))
257
+ .join('line')
258
+ .attr('x1', 0)
259
+ .attr('x2', innerWidth)
260
+ .attr('y1', d => y(d))
261
+ .attr('y2', d => y(d));
262
+
263
+ // Axes
264
+ gAxes.selectAll('*').remove();
265
+
266
+ const xAxis = gAxes.append('g')
267
+ .attr('transform', `translate(0,${innerHeight})`)
268
+ .call(d3.axisBottom(x).tickSize(3));
269
+
270
+ // Only show labels for visible experiments
271
+ xAxis.selectAll('text')
272
+ .attr('transform', 'rotate(-45)')
273
+ .style('text-anchor', 'end')
274
+ .attr('dx', '-0.5em')
275
+ .attr('dy', '0.15em')
276
+ .style('opacity', function() {
277
+ const text = d3.select(this).text();
278
+ return visibleExperiments.includes(text) ? 1 : 0;
279
+ });
280
+
281
+ gAxes.append('g')
282
+ .call(d3.axisLeft(y).ticks(4).tickFormat(metric.format).tickSize(3));
283
+
284
+ // Draw bars (only for visible experiments)
285
+ const bars = [];
286
+ visibleExperiments.forEach(exp => {
287
+ const d = data[metric.key]?.[exp];
288
+ if (d) {
289
+ bars.push({
290
+ experiment: exp,
291
+ mean: d.mean,
292
+ std: d.std,
293
+ color: allColors[exp],
294
+ x: x(exp),
295
+ y: y(d.mean),
296
+ width: x.bandwidth(),
297
+ height: innerHeight - y(d.mean)
298
+ });
299
+ }
300
+ });
301
+
302
+ gBars.selectAll('rect')
303
+ .data(bars)
304
+ .join('rect')
305
+ .attr('class', 'bar')
306
+ .attr('x', d => d.x)
307
+ .attr('y', d => d.y)
308
+ .attr('width', d => d.width)
309
+ .attr('height', d => d.height)
310
+ .attr('fill', d => d.color)
311
+ .attr('rx', 2)
312
+ .classed('dimmed', d => hoveredExperiment && d.experiment !== hoveredExperiment)
313
+ .on('mouseenter', (event, d) => {
314
+ hoveredExperiment = d.experiment;
315
+
316
+ // Show value label on bar
317
+ gLabels.selectAll('text').remove();
318
+ gLabels.append('text')
319
+ .attr('x', d.x + d.width / 2)
320
+ .attr('y', d.y - 5)
321
+ .attr('text-anchor', 'middle')
322
+ .attr('fill', 'var(--text-color)')
323
+ .attr('font-size', '11px')
324
+ .attr('font-weight', '600')
325
+ .text(metric.format(d.mean));
326
+
327
+ updateAll();
328
+ tooltip
329
+ .style('opacity', 1)
330
+ .html(`
331
+ <div><strong>${d.experiment}</strong></div>
332
+ <div style="margin-top: 4px;">${metric.label}</div>
333
+ <div style="margin-top: 4px;"><strong>Mean:</strong> ${metric.format(d.mean)}</div>
334
+ <div><strong>Std:</strong> ${metric.format(d.std)}</div>
335
+ `);
336
+ })
337
+ .on('mousemove', (event) => {
338
+ const [mx, my] = d3.pointer(event, container);
339
+ tooltip.style('transform', `translate(${mx + 10}px, ${my + 10}px)`);
340
+ })
341
+ .on('mouseleave', () => {
342
+ hoveredExperiment = null;
343
+ gLabels.selectAll('text').remove();
344
+ updateAll();
345
+ tooltip.style('opacity', 0).style('transform', 'translate(-9999px, -9999px)');
346
+ });
347
+
348
+ // Error bars
349
+ gErrorBars.selectAll('line')
350
+ .data(bars)
351
+ .join('line')
352
+ .attr('x1', d => d.x + d.width / 2)
353
+ .attr('x2', d => d.x + d.width / 2)
354
+ .attr('y1', d => y(d.mean + d.std))
355
+ .attr('y2', d => y(Math.max(0, d.mean - d.std)))
356
+ .attr('stroke', '#666')
357
+ .attr('stroke-width', 1.5)
358
+ .attr('opacity', 0.6);
359
+
360
+ // Error bar caps
361
+ gErrorBars.selectAll('.cap-top')
362
+ .data(bars)
363
+ .join('line')
364
+ .attr('class', 'cap-top')
365
+ .attr('x1', d => d.x + d.width / 2 - 3)
366
+ .attr('x2', d => d.x + d.width / 2 + 3)
367
+ .attr('y1', d => y(d.mean + d.std))
368
+ .attr('y2', d => y(d.mean + d.std))
369
+ .attr('stroke', '#666')
370
+ .attr('stroke-width', 1.5)
371
+ .attr('opacity', 0.6);
372
+
373
+ gErrorBars.selectAll('.cap-bottom')
374
+ .data(bars)
375
+ .join('line')
376
+ .attr('class', 'cap-bottom')
377
+ .attr('x1', d => d.x + d.width / 2 - 3)
378
+ .attr('x2', d => d.x + d.width / 2 + 3)
379
+ .attr('y1', d => y(Math.max(0, d.mean - d.std)))
380
+ .attr('y2', d => y(Math.max(0, d.mean - d.std)))
381
+ .attr('stroke', '#666')
382
+ .attr('stroke-width', 1.5)
383
+ .attr('opacity', 0.6);
384
+ };
385
+ });
386
+
387
+ const updateAll = () => {
388
+ gridContainer.querySelectorAll('.subplot').forEach(subplot => {
389
+ if (subplot._render) subplot._render();
390
+ });
391
+
392
+ };
393
+
394
+ updateAll();
395
+
396
+ if (window.ResizeObserver) {
397
+ const ro = new ResizeObserver(() => updateAll());
398
+ ro.observe(container);
399
+ } else {
400
+ window.addEventListener('resize', updateAll);
401
+ }
402
+ })
403
+ .catch(err => {
404
+ container.innerHTML = `<div style="color: red; padding: 20px;">Error: ${err.message}</div>`;
405
+ });
406
+ };
407
+
408
+ if (document.readyState === 'loading') {
409
+ document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
410
+ } else {
411
+ ensureD3(bootstrap);
412
+ }
413
+ })();
414
+ </script>
app/src/content/embeds/d3-evaluation3-multi.html ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div class="d3-eval-grid d3-eval-grid-3"></div>
2
+ <style>
3
+ .d3-eval-grid {
4
+ padding: 2px;
5
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
6
+ }
7
+
8
+ .d3-eval-grid .grid-container {
9
+ display: grid;
10
+ grid-template-columns: repeat(2, 1fr);
11
+ gap: 8px;
12
+ }
13
+
14
+ @media (max-width: 768px) {
15
+ .d3-eval-grid .grid-container {
16
+ grid-template-columns: 1fr;
17
+ }
18
+ }
19
+
20
+ .d3-eval-grid .subplot {
21
+ padding: 4px;
22
+ }
23
+
24
+ .d3-eval-grid .subplot-title {
25
+ font-size: 12px;
26
+ font-weight: 600;
27
+ color: var(--text-color);
28
+ margin-bottom: 4px;
29
+ text-align: center;
30
+ }
31
+
32
+
33
+ .d3-eval-grid .axes path,
34
+ .d3-eval-grid .axes line {
35
+ stroke: var(--axis-color);
36
+ }
37
+
38
+ .d3-eval-grid .axes text {
39
+ fill: var(--tick-color);
40
+ font-size: 9px;
41
+ }
42
+
43
+ .d3-eval-grid .grid line {
44
+ stroke: var(--grid-color);
45
+ stroke-dasharray: 2,2;
46
+ opacity: 0.5;
47
+ }
48
+
49
+ .d3-eval-grid .axis-label {
50
+ fill: var(--text-color);
51
+ font-size: 11px;
52
+ font-weight: 600;
53
+ }
54
+
55
+ .d3-eval-grid .d3-tooltip {
56
+ position: absolute;
57
+ pointer-events: none;
58
+ padding: 8px 10px;
59
+ background: var(--surface-bg);
60
+ border: 1px solid var(--border-color);
61
+ border-radius: 8px;
62
+ font-size: 11px;
63
+ line-height: 1.5;
64
+ box-shadow: 0 4px 24px rgba(0,0,0,.18);
65
+ opacity: 0;
66
+ transition: opacity 0.2s;
67
+ z-index: 1000;
68
+ }
69
+
70
+ .d3-eval-grid .bar {
71
+ transition: opacity 0.2s;
72
+ }
73
+
74
+ .d3-eval-grid .bar.dimmed {
75
+ opacity: 0.2;
76
+ }
77
+ </style>
78
+ <script>
79
+ (() => {
80
+ const ensureD3 = (cb) => {
81
+ if (window.d3 && typeof window.d3.select === 'function') return cb();
82
+ let s = document.getElementById('d3-cdn-script');
83
+ if (!s) {
84
+ s = document.createElement('script');
85
+ s.id = 'd3-cdn-script';
86
+ s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
87
+ document.head.appendChild(s);
88
+ }
89
+ s.addEventListener('load', () => {
90
+ if (window.d3 && typeof window.d3.select === 'function') cb();
91
+ }, { once: true });
92
+ };
93
+
94
+ const bootstrap = () => {
95
+ const scriptEl = document.currentScript;
96
+ let container = scriptEl ? scriptEl.previousElementSibling : null;
97
+ if (!(container && container.classList && container.classList.contains('d3-eval-grid-3'))) {
98
+ const candidates = Array.from(document.querySelectorAll('.d3-eval-grid-3'))
99
+ .filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
100
+ container = candidates[candidates.length - 1] || null;
101
+ }
102
+ if (!container) return;
103
+ if (container.dataset) {
104
+ if (container.dataset.mounted === 'true') return;
105
+ container.dataset.mounted = 'true';
106
+ }
107
+
108
+ // Find data attribute
109
+ let mountEl = container;
110
+ while (mountEl && !mountEl.getAttribute?.('data-datafiles')) {
111
+ mountEl = mountEl.parentElement;
112
+ }
113
+ let providedData = null;
114
+ try {
115
+ const attr = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-datafiles') : null;
116
+ if (attr && attr.trim()) {
117
+ providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
118
+ }
119
+ } catch(_) {}
120
+
121
+ // Check for experiments filter attribute
122
+ let experimentsFilter = null;
123
+ try {
124
+ const expAttr = container.getAttribute('data-experiments');
125
+ if (expAttr) {
126
+ experimentsFilter = JSON.parse(expAttr);
127
+ }
128
+ } catch(_) {}
129
+
130
+ const DEFAULT_JSON = '/data/evaluation_summary.json';
131
+ const ensureDataPrefix = (p) => (typeof p === 'string' && p && !p.includes('/')) ? `/data/${p}` : p;
132
+
133
+ const JSON_PATHS = typeof providedData === 'string'
134
+ ? [ensureDataPrefix(providedData)]
135
+ : [
136
+ DEFAULT_JSON,
137
+ './assets/data/evaluation_summary.json',
138
+ '../assets/data/evaluation_summary.json',
139
+ '../../assets/data/evaluation_summary.json'
140
+ ];
141
+
142
+ const fetchFirstAvailable = async (paths) => {
143
+ for (const p of paths) {
144
+ try {
145
+ const r = await fetch(p, { cache: 'no-cache' });
146
+ if (r.ok) return await r.json();
147
+ } catch(_){}
148
+ }
149
+ throw new Error('JSON not found');
150
+ };
151
+
152
+ fetchFirstAvailable(JSON_PATHS)
153
+ .then(rawData => {
154
+ // Chart 3: All experiments including multi-layer optimization
155
+ const allExperiments = ['Prompt', 'Basic steering', 'Clamping', 'Clamping + Penalty', '2D optimized', '8D optimized'];
156
+ const visibleExperiments = allExperiments;
157
+
158
+ // Metrics in 2x4 grid layout (8 metrics)
159
+ const metrics = [
160
+ { key: 'llm_score_concept', label: 'LLM Concept Score', format: d3.format('.2f') },
161
+ { key: 'eiffel', label: 'Explicit Concept Presence', format: d3.format('.2f') },
162
+ { key: 'llm_score_instruction', label: 'LLM Instruction Score', format: d3.format('.2f') },
163
+ { key: 'minus_log_prob', label: 'Surprise in Original Model', format: d3.format('.2f') },
164
+ { key: 'llm_score_fluency', label: 'LLM Fluency Score', format: d3.format('.2f') },
165
+ { key: 'rep3', label: '3-gram Repetition Fraction', format: d3.format('.2f') },
166
+ { key: 'mean_llm_score', label: 'Mean LLM Score', format: d3.format('.2f') },
167
+ { key: 'harmonic_llm_score', label: 'Harmonic Mean LLM Score', format: d3.format('.2f') }
168
+ ];
169
+
170
+ // Restructure data
171
+ const data = {};
172
+ rawData.forEach(d => {
173
+ if (!data[d.metric]) data[d.metric] = {};
174
+ data[d.metric][d.experiment] = { mean: d.mean, std: d.std };
175
+ });
176
+
177
+ // Color palette - consistent across all charts
178
+ const allColors = {
179
+ 'Prompt': '#4c4c4c',
180
+ 'Basic steering': '#b2b2b2',
181
+ 'Clamping': '#b2b2cc',
182
+ 'Clamping + Penalty': '#b2b2e6',
183
+ '2D optimized': '#b2ffb2',
184
+ '8D optimized': '#ffb2ff'
185
+ };
186
+
187
+ const gridContainer = document.createElement('div');
188
+ gridContainer.className = 'grid-container';
189
+ container.appendChild(gridContainer);
190
+
191
+ // Tooltip
192
+ const tooltip = d3.select(container).append('div')
193
+ .attr('class', 'd3-tooltip')
194
+ .style('transform', 'translate(-9999px, -9999px)');
195
+
196
+ let hoveredExperiment = null;
197
+
198
+ // Create each subplot
199
+ metrics.forEach((metric, idx) => {
200
+ const subplot = document.createElement('div');
201
+ subplot.className = 'subplot';
202
+ subplot.dataset.metric = metric.key;
203
+ gridContainer.appendChild(subplot);
204
+
205
+ const title = document.createElement('div');
206
+ title.className = 'subplot-title';
207
+ title.textContent = metric.label;
208
+ subplot.appendChild(title);
209
+
210
+ const svg = d3.select(subplot).append('svg')
211
+ .attr('width', '100%')
212
+ .style('display', 'block');
213
+
214
+ const g = svg.append('g');
215
+ const gGrid = g.append('g').attr('class', 'grid');
216
+ const gBars = g.append('g').attr('class', 'bars');
217
+ const gErrorBars = g.append('g').attr('class', 'error-bars');
218
+ const gAxes = g.append('g').attr('class', 'axes');
219
+ const gLabels = g.append('g').attr('class', 'value-labels');
220
+
221
+ subplot._render = () => {
222
+ const width = subplot.clientWidth || 300;
223
+ const height = Math.max(200, Math.round(width * 0.6));
224
+ const margin = { top: 10, right: 20, bottom: 70, left: 42 };
225
+ const innerWidth = width - margin.left - margin.right;
226
+ const innerHeight = height - margin.top - margin.bottom;
227
+
228
+ svg.attr('height', height);
229
+ g.attr('transform', `translate(${margin.left},${margin.top})`);
230
+
231
+ // Scales - use all experiments for consistent positioning
232
+ const x = d3.scaleBand()
233
+ .domain(allExperiments)
234
+ .range([0, innerWidth])
235
+ .padding(0.2);
236
+
237
+ // Fixed y-axis ranges based on metric type
238
+ const yDomains = {
239
+ 'llm_score_concept': [0, 2],
240
+ 'llm_score_instruction': [0, 2],
241
+ 'llm_score_fluency': [0, 2],
242
+ 'mean_llm_score': [0, 2],
243
+ 'harmonic_llm_score': [0, 2],
244
+ 'eiffel': [0, 1],
245
+ 'minus_log_prob': [0, 2],
246
+ 'rep3': [0, 0.5]
247
+ };
248
+
249
+ const y = d3.scaleLinear()
250
+ .domain(yDomains[metric.key] || [0, 1])
251
+ .range([innerHeight, 0]);
252
+
253
+ // Grid
254
+ gGrid.selectAll('*').remove();
255
+ gGrid.selectAll('line')
256
+ .data(y.ticks(4))
257
+ .join('line')
258
+ .attr('x1', 0)
259
+ .attr('x2', innerWidth)
260
+ .attr('y1', d => y(d))
261
+ .attr('y2', d => y(d));
262
+
263
+ // Axes
264
+ gAxes.selectAll('*').remove();
265
+
266
+ const xAxis = gAxes.append('g')
267
+ .attr('transform', `translate(0,${innerHeight})`)
268
+ .call(d3.axisBottom(x).tickSize(3));
269
+
270
+ // Only show labels for visible experiments
271
+ xAxis.selectAll('text')
272
+ .attr('transform', 'rotate(-45)')
273
+ .style('text-anchor', 'end')
274
+ .attr('dx', '-0.5em')
275
+ .attr('dy', '0.15em')
276
+ .style('opacity', function() {
277
+ const text = d3.select(this).text();
278
+ return visibleExperiments.includes(text) ? 1 : 0;
279
+ });
280
+
281
+ gAxes.append('g')
282
+ .call(d3.axisLeft(y).ticks(4).tickFormat(metric.format).tickSize(3));
283
+
284
+ // Draw bars (only for visible experiments)
285
+ const bars = [];
286
+ visibleExperiments.forEach(exp => {
287
+ const d = data[metric.key]?.[exp];
288
+ if (d) {
289
+ bars.push({
290
+ experiment: exp,
291
+ mean: d.mean,
292
+ std: d.std,
293
+ color: allColors[exp],
294
+ x: x(exp),
295
+ y: y(d.mean),
296
+ width: x.bandwidth(),
297
+ height: innerHeight - y(d.mean)
298
+ });
299
+ }
300
+ });
301
+
302
+ gBars.selectAll('rect')
303
+ .data(bars)
304
+ .join('rect')
305
+ .attr('class', 'bar')
306
+ .attr('x', d => d.x)
307
+ .attr('y', d => d.y)
308
+ .attr('width', d => d.width)
309
+ .attr('height', d => d.height)
310
+ .attr('fill', d => d.color)
311
+ .attr('rx', 2)
312
+ .classed('dimmed', d => hoveredExperiment && d.experiment !== hoveredExperiment)
313
+ .on('mouseenter', (event, d) => {
314
+ hoveredExperiment = d.experiment;
315
+
316
+ // Show value label on bar
317
+ gLabels.selectAll('text').remove();
318
+ gLabels.append('text')
319
+ .attr('x', d.x + d.width / 2)
320
+ .attr('y', d.y - 5)
321
+ .attr('text-anchor', 'middle')
322
+ .attr('fill', 'var(--text-color)')
323
+ .attr('font-size', '11px')
324
+ .attr('font-weight', '600')
325
+ .text(metric.format(d.mean));
326
+
327
+ updateAll();
328
+ tooltip
329
+ .style('opacity', 1)
330
+ .html(`
331
+ <div><strong>${d.experiment}</strong></div>
332
+ <div style="margin-top: 4px;">${metric.label}</div>
333
+ <div style="margin-top: 4px;"><strong>Mean:</strong> ${metric.format(d.mean)}</div>
334
+ <div><strong>Std:</strong> ${metric.format(d.std)}</div>
335
+ `);
336
+ })
337
+ .on('mousemove', (event) => {
338
+ const [mx, my] = d3.pointer(event, container);
339
+ tooltip.style('transform', `translate(${mx + 10}px, ${my + 10}px)`);
340
+ })
341
+ .on('mouseleave', () => {
342
+ hoveredExperiment = null;
343
+ gLabels.selectAll('text').remove();
344
+ updateAll();
345
+ tooltip.style('opacity', 0).style('transform', 'translate(-9999px, -9999px)');
346
+ });
347
+
348
+ // Error bars
349
+ gErrorBars.selectAll('line')
350
+ .data(bars)
351
+ .join('line')
352
+ .attr('x1', d => d.x + d.width / 2)
353
+ .attr('x2', d => d.x + d.width / 2)
354
+ .attr('y1', d => y(d.mean + d.std))
355
+ .attr('y2', d => y(Math.max(0, d.mean - d.std)))
356
+ .attr('stroke', '#666')
357
+ .attr('stroke-width', 1.5)
358
+ .attr('opacity', 0.6);
359
+
360
+ // Error bar caps
361
+ gErrorBars.selectAll('.cap-top')
362
+ .data(bars)
363
+ .join('line')
364
+ .attr('class', 'cap-top')
365
+ .attr('x1', d => d.x + d.width / 2 - 3)
366
+ .attr('x2', d => d.x + d.width / 2 + 3)
367
+ .attr('y1', d => y(d.mean + d.std))
368
+ .attr('y2', d => y(d.mean + d.std))
369
+ .attr('stroke', '#666')
370
+ .attr('stroke-width', 1.5)
371
+ .attr('opacity', 0.6);
372
+
373
+ gErrorBars.selectAll('.cap-bottom')
374
+ .data(bars)
375
+ .join('line')
376
+ .attr('class', 'cap-bottom')
377
+ .attr('x1', d => d.x + d.width / 2 - 3)
378
+ .attr('x2', d => d.x + d.width / 2 + 3)
379
+ .attr('y1', d => y(Math.max(0, d.mean - d.std)))
380
+ .attr('y2', d => y(Math.max(0, d.mean - d.std)))
381
+ .attr('stroke', '#666')
382
+ .attr('stroke-width', 1.5)
383
+ .attr('opacity', 0.6);
384
+ };
385
+ });
386
+
387
+ const updateAll = () => {
388
+ gridContainer.querySelectorAll('.subplot').forEach(subplot => {
389
+ if (subplot._render) subplot._render();
390
+ });
391
+
392
+ };
393
+
394
+ updateAll();
395
+
396
+ if (window.ResizeObserver) {
397
+ const ro = new ResizeObserver(() => updateAll());
398
+ ro.observe(container);
399
+ } else {
400
+ window.addEventListener('resize', updateAll);
401
+ }
402
+ })
403
+ .catch(err => {
404
+ container.innerHTML = `<div style="color: red; padding: 20px;">Error: ${err.message}</div>`;
405
+ });
406
+ };
407
+
408
+ if (document.readyState === 'loading') {
409
+ document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
410
+ } else {
411
+ ensureD3(bootstrap);
412
+ }
413
+ })();
414
+ </script>