update banner, matrix and article modifications
Browse files- app/src/content/assets/data/data_gaia.json +3 -0
- app/src/content/assets/data/data_gaia_backup.json +3 -0
- app/src/content/assets/data/data_gaia_points.json +3 -0
- app/src/content/assets/data/leaderboard_scatter_plot.json +3 -0
- app/src/content/assets/data/leaderboard_scores_over_time.json +3 -0
- app/src/content/assets/data/leaderboard_scores_over_time_old.json +3 -0
- app/src/content/chapters/general-knowledge/model-inference-and-evaluation.mdx +9 -2
- app/src/content/chapters/troubleshooting/troubleshooting-reproducibility.mdx +5 -4
- app/src/content/embeds/banner.html +1151 -199
- app/src/content/embeds/d3-mmlu-heatmap.html +489 -0
- app/src/content/embeds/d3-two-lines-chart.html +78 -6
app/src/content/assets/data/data_gaia.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e69d6199e9f2d0b6b07db5a90f13b2fd9bc9e0e245d2b9aa60ac929967baddfa
|
| 3 |
+
size 3967
|
app/src/content/assets/data/data_gaia_backup.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b075362b1571e36a66715126b5223a599ceaca74b0bff0d0d616662fca6a7ff3
|
| 3 |
+
size 335376
|
app/src/content/assets/data/data_gaia_points.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc205f03ced31cfbc111bc096a5ba6fe4159f1b8365446dfffb70331d38bfdb8
|
| 3 |
+
size 412546
|
app/src/content/assets/data/leaderboard_scatter_plot.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7b7b0c94cdd535f66dc6ac532fd80b434eddddd0dc8c36076c462c0a72faa9b
|
| 3 |
+
size 7688907
|
app/src/content/assets/data/leaderboard_scores_over_time.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c6dea46d998952f9cbfeb219afcdb403b90c28f0645e03ae537dfaa8c82b57d
|
| 3 |
+
size 40874
|
app/src/content/assets/data/leaderboard_scores_over_time_old.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d5b994b4fd6d636b2a58fabced9fcd929427501b86d0937b635a67d34872bb1
|
| 3 |
+
size 55411
|
app/src/content/chapters/general-knowledge/model-inference-and-evaluation.mdx
CHANGED
|
@@ -11,6 +11,8 @@ import Note from "../../../components/Note.astro";
|
|
| 11 |
import Sidenote from "../../../components/Sidenote.astro";
|
| 12 |
import Accordion from "../../../components/Accordion.astro";
|
| 13 |
import HtmlEmbed from "../../../components/HtmlEmbed.astro";
|
|
|
|
|
|
|
| 14 |
|
| 15 |
In this section, we'll look at two steps for models: how input is preprocessed to be given to the model (`tokenization`), and how the model generates a prediction from it (`inference`).
|
| 16 |
|
|
@@ -94,12 +96,17 @@ First, as some languages do not always use spacing as a word separator (Korean,
|
|
| 94 |
|
| 95 |
Then, tokenizers in general might be unfair to non-English languages. When training a BPE tokenizer, you use data from the different languages you want to cover, but most of the time, though, this data is unbalanced between languages (with, for example, an order of magnitude more English than Thai, or Burmese). Since BPE tokenizers create their vocabulary tokens based on the most frequent words seen, most of the long tokens will be English words - and most of the words from the less frequent languages will only be split at the character level. This effect leads to an unfairness in multilingual tokenization: some (less frequent, or *lower-resourced*) languages require orders of magnitude more tokens to generate a sentence of equivalent length as English.
|
| 96 |
|
|
|
|
|
|
|
| 97 |
<iframe
|
|
|
|
| 98 |
src="https://OpenEvals-tokenizers-languages.hf.space"
|
| 99 |
frameborder="0"
|
| 100 |
-
width="
|
| 101 |
-
height="
|
| 102 |
></iframe>
|
|
|
|
|
|
|
| 103 |
|
| 104 |
If you are in this case, the number of tokens that the model is allowed to generate for an evaluation should also be language dependent, as not all languages are tokenized in similar amount of tokens.
|
| 105 |
|
|
|
|
| 11 |
import Sidenote from "../../../components/Sidenote.astro";
|
| 12 |
import Accordion from "../../../components/Accordion.astro";
|
| 13 |
import HtmlEmbed from "../../../components/HtmlEmbed.astro";
|
| 14 |
+
import Wide from "../../../components/Wide.astro";
|
| 15 |
+
import Reference from "../../../components/Reference.astro";
|
| 16 |
|
| 17 |
In this section, we'll look at two steps for models: how input is preprocessed to be given to the model (`tokenization`), and how the model generates a prediction from it (`inference`).
|
| 18 |
|
|
|
|
| 96 |
|
| 97 |
Then, tokenizers in general might be unfair to non-English languages. When training a BPE tokenizer, you use data from the different languages you want to cover, but most of the time, though, this data is unbalanced between languages (with, for example, an order of magnitude more English than Thai, or Burmese). Since BPE tokenizers create their vocabulary tokens based on the most frequent words seen, most of the long tokens will be English words - and most of the words from the less frequent languages will only be split at the character level. This effect leads to an unfairness in multilingual tokenization: some (less frequent, or *lower-resourced*) languages require orders of magnitude more tokens to generate a sentence of equivalent length as English.
|
| 98 |
|
| 99 |
+
<Wide>
|
| 100 |
+
<Reference align="center" caption="OpenEvals-tokenizers-languages">
|
| 101 |
<iframe
|
| 102 |
+
className="card"
|
| 103 |
src="https://OpenEvals-tokenizers-languages.hf.space"
|
| 104 |
frameborder="0"
|
| 105 |
+
width="100%"
|
| 106 |
+
height="650"
|
| 107 |
></iframe>
|
| 108 |
+
</Reference>
|
| 109 |
+
</Wide>
|
| 110 |
|
| 111 |
If you are in this case, the number of tokens that the model is allowed to generate for an evaluation should also be language dependent, as not all languages are tokenized in similar amount of tokens.
|
| 112 |
|
app/src/content/chapters/troubleshooting/troubleshooting-reproducibility.mdx
CHANGED
|
@@ -4,8 +4,7 @@ title: "Troubleshooting reproducibility"
|
|
| 4 |
|
| 5 |
import Note from "../../../components/Note.astro";
|
| 6 |
import Sidenote from "../../../components/Sidenote.astro";
|
| 7 |
-
import
|
| 8 |
-
import mmluPromptImage from "../../assets/image/mmlu_prompt.png";
|
| 9 |
|
| 10 |
Let's say you have read a recent tech report about a cool new model, and you want to reproduce their results on your machine... but you're not managing to?
|
| 11 |
Let's explore why.
|
|
@@ -57,10 +56,12 @@ For example, for multichoice question answers, common formats include very simpl
|
|
| 57 |
|
| 58 |
We did some experiments on this (you'll see up to a 7 points difference for the same model on the semantically equivalent prompts, the 5 rightmost columns), and a A [paper observed similar results](https://arxiv.org/abs/2310.11324).
|
| 59 |
|
| 60 |
-
<Image src={mmluPromptImage} alt="Heatmap showing MMLU evaluation scores across different models with different prompt formats. Scores vary by up to 7 points for the same model depending on format." />
|
| 61 |
-
|
| 62 |
**Other example**: Llama 3.1 models predicted correct MATH-Hard answers but scored poorly on the Open LLM Leaderboard, because they overfit to GSM8K's prompt format and couldn't adapt to the new one for this eval, despite it being provided in few shot examples.
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
This [great paper](https://arxiv.org/abs/2407.07890)⭐ also highlights a side effect of this: a number of models are now trained to overfit benchmark prompts and answer formats, to the cost of adaptation to other prompts at evaluation time.
|
| 65 |
</Note>
|
| 66 |
|
|
|
|
| 4 |
|
| 5 |
import Note from "../../../components/Note.astro";
|
| 6 |
import Sidenote from "../../../components/Sidenote.astro";
|
| 7 |
+
import HtmlEmbed from "../../../components/HtmlEmbed.astro";
|
|
|
|
| 8 |
|
| 9 |
Let's say you have read a recent tech report about a cool new model, and you want to reproduce their results on your machine... but you're not managing to?
|
| 10 |
Let's explore why.
|
|
|
|
| 56 |
|
| 57 |
We did some experiments on this (you'll see up to a 7 points difference for the same model on the semantically equivalent prompts, the 5 rightmost columns), and a A [paper observed similar results](https://arxiv.org/abs/2310.11324).
|
| 58 |
|
|
|
|
|
|
|
| 59 |
**Other example**: Llama 3.1 models predicted correct MATH-Hard answers but scored poorly on the Open LLM Leaderboard, because they overfit to GSM8K's prompt format and couldn't adapt to the new one for this eval, despite it being provided in few shot examples.
|
| 60 |
|
| 61 |
+
*Evaluation on MMLU subsets, acc_norm score (seed 0), in 5-shot.*
|
| 62 |
+
|
| 63 |
+
<HtmlEmbed src="d3-mmlu-heatmap.html" />
|
| 64 |
+
|
| 65 |
This [great paper](https://arxiv.org/abs/2407.07890)⭐ also highlights a side effect of this: a number of models are now trained to overfit benchmark prompts and answer formats, to the cost of adaptation to other prompts at evaluation time.
|
| 66 |
</Note>
|
| 67 |
|
app/src/content/embeds/banner.html
CHANGED
|
@@ -1,4 +1,111 @@
|
|
| 1 |
-
<div class="d3-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
<script>
|
| 3 |
(() => {
|
| 4 |
const ensureD3 = (cb) => {
|
|
@@ -17,230 +124,1060 @@
|
|
| 17 |
|
| 18 |
const bootstrap = () => {
|
| 19 |
const mount = document.currentScript ? document.currentScript.previousElementSibling : null;
|
| 20 |
-
const container = (mount && mount.querySelector && mount.querySelector('.d3-
|
| 21 |
if (!container) return;
|
| 22 |
if (container.dataset) {
|
| 23 |
if (container.dataset.mounted === 'true') return;
|
| 24 |
container.dataset.mounted = 'true';
|
| 25 |
}
|
| 26 |
-
// Scene params (match previous Plotly ranges)
|
| 27 |
-
const cx = 1.5, cy = 0.5;
|
| 28 |
-
const a = 1.3, b = 0.45;
|
| 29 |
-
const numPoints = 3000;
|
| 30 |
-
const numArms = 3;
|
| 31 |
-
const numTurns = 2.1;
|
| 32 |
-
const angleJitter = 0.12;
|
| 33 |
-
const posNoise = 0.015;
|
| 34 |
-
|
| 35 |
-
// Circle size settings
|
| 36 |
-
const minCircleSize = 4; // minimum diameter in pixels
|
| 37 |
-
const maxCircleSize = 12; // maximum diameter in pixels
|
| 38 |
-
|
| 39 |
-
// Generate spiral + bulge
|
| 40 |
-
const twoPi = Math.PI * 2;
|
| 41 |
-
const t = Float64Array.from({ length: numPoints }, () => Math.random() * (twoPi * numTurns));
|
| 42 |
-
const armIndices = Int16Array.from({ length: numPoints }, () => Math.floor(Math.random() * numArms));
|
| 43 |
-
const armOffsets = Float64Array.from(armIndices, (k) => k * (twoPi / numArms));
|
| 44 |
-
const theta = Float64Array.from(t, (tv, i) => tv + armOffsets[i] + d3.randomNormal.source(Math.random)(0, angleJitter)());
|
| 45 |
-
const rNorm = Float64Array.from(t, (tv) => Math.pow(tv / (twoPi * numTurns), 0.9));
|
| 46 |
-
const noiseScale = (rn) => posNoise * (0.8 + 0.6 * rn);
|
| 47 |
-
const noiseX = Float64Array.from(rNorm, (rn) => d3.randomNormal.source(Math.random)(0, noiseScale(rn))());
|
| 48 |
-
const noiseY = Float64Array.from(rNorm, (rn) => d3.randomNormal.source(Math.random)(0, noiseScale(rn))());
|
| 49 |
-
|
| 50 |
-
const xSpiral = Float64Array.from(theta, (th, i) => cx + a * rNorm[i] * Math.cos(th) + noiseX[i]);
|
| 51 |
-
const ySpiral = Float64Array.from(theta, (th, i) => cy + b * rNorm[i] * Math.sin(th) + noiseY[i]);
|
| 52 |
-
|
| 53 |
-
const bulgePoints = Math.floor(0.18 * numPoints);
|
| 54 |
-
const phiB = Float64Array.from({ length: bulgePoints }, () => twoPi * Math.random());
|
| 55 |
-
const rB = Float64Array.from({ length: bulgePoints }, () => Math.pow(Math.random(), 2.2) * 0.22);
|
| 56 |
-
const noiseXB = Float64Array.from({ length: bulgePoints }, () => d3.randomNormal.source(Math.random)(0, posNoise * 0.6)());
|
| 57 |
-
const noiseYB = Float64Array.from({ length: bulgePoints }, () => d3.randomNormal.source(Math.random)(0, posNoise * 0.6)());
|
| 58 |
-
const xBulge = Float64Array.from(phiB, (ph, i) => cx + a * rB[i] * Math.cos(ph) + noiseXB[i]);
|
| 59 |
-
const yBulge = Float64Array.from(phiB, (ph, i) => cy + b * rB[i] * Math.sin(ph) + noiseYB[i]);
|
| 60 |
-
|
| 61 |
-
// Concatenate
|
| 62 |
-
const X = Array.from(xSpiral).concat(Array.from(xBulge));
|
| 63 |
-
const Y = Array.from(ySpiral).concat(Array.from(yBulge));
|
| 64 |
-
const lenSpiral = xSpiral.length;
|
| 65 |
-
|
| 66 |
-
const zSpiral = Array.from(rNorm, (rn) => 1 - rn);
|
| 67 |
-
const maxRB = rB && rB.length ? (window.d3 && d3.max ? d3.max(rB) : Math.max.apply(null, Array.from(rB))) : 1;
|
| 68 |
-
const zBulge = Array.from(rB, (rb) => 1 - (maxRB ? rb / maxRB : 0));
|
| 69 |
-
const Zraw = zSpiral.concat(zBulge);
|
| 70 |
-
const sizesPx = Zraw.map((z) => minCircleSize + z * (maxCircleSize - minCircleSize)); // diameter in pixels
|
| 71 |
-
|
| 72 |
-
// Labels (same categories as Python version)
|
| 73 |
-
const labelOf = (i) => {
|
| 74 |
-
const z = Zraw[i];
|
| 75 |
-
if (z < 0.25) return 'tiny star';
|
| 76 |
-
if (z < 0.5) return 'small star';
|
| 77 |
-
if (z < 0.75) return 'medium star';
|
| 78 |
-
return 'large star';
|
| 79 |
-
};
|
| 80 |
|
| 81 |
-
//
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
const
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
};
|
| 94 |
|
| 95 |
// Create SVG
|
| 96 |
const svg = d3.select(container).append('svg')
|
| 97 |
.attr('width', '100%')
|
| 98 |
.style('display', 'block')
|
| 99 |
-
.style('cursor', '
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
const render = () => {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
const width = container.clientWidth || 800;
|
| 103 |
-
const height = Math.max(
|
| 104 |
svg.attr('width', width).attr('height', height);
|
| 105 |
|
| 106 |
-
|
| 107 |
-
|
| 108 |
|
| 109 |
-
//
|
| 110 |
-
|
| 111 |
-
const
|
| 112 |
-
const
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
transform: 'translate(-9999px, -9999px)',
|
| 130 |
-
pointerEvents: 'none',
|
| 131 |
-
padding: '10px 12px',
|
| 132 |
-
borderRadius: '12px',
|
| 133 |
-
fontSize: '12px',
|
| 134 |
-
lineHeight: '1.35',
|
| 135 |
-
border: '1px solid var(--border-color)',
|
| 136 |
-
background: 'var(--surface-bg)',
|
| 137 |
-
color: 'var(--text-color)',
|
| 138 |
-
boxShadow: '0 8px 32px rgba(0,0,0,.28), 0 2px 8px rgba(0,0,0,.12)',
|
| 139 |
-
opacity: '0',
|
| 140 |
-
transition: 'opacity .12s ease',
|
| 141 |
-
backdropFilter: 'saturate(1.12) blur(8px)',
|
| 142 |
-
zIndex: '20'
|
| 143 |
-
});
|
| 144 |
-
tipInner = document.createElement('div');
|
| 145 |
-
tipInner.className = 'd3-tooltip__inner';
|
| 146 |
-
Object.assign(tipInner.style, {
|
| 147 |
-
textAlign: 'left',
|
| 148 |
-
display: 'flex',
|
| 149 |
-
flexDirection: 'column',
|
| 150 |
-
gap: '6px',
|
| 151 |
-
minWidth: '220px'
|
| 152 |
});
|
| 153 |
-
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
} else {
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
| 157 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
.
|
| 172 |
-
.
|
| 173 |
-
.
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
tipInner.innerHTML =
|
| 183 |
-
`<div style="font-weight:800;letter-spacing:.1px;"><strong>${labelOf(i)}</strong></div>` +
|
| 184 |
-
`<div style="font-size:11px;color:var(--muted-color);margin-top:-4px;margin-bottom:2px;letter-spacing:.1px;"><strong>Type</strong> ${type}${arm ? ` (Arm ${arm})` : ''}</div>` +
|
| 185 |
-
`<div style="padding-top:6px;border-top:1px solid var(--border-color);"><strong>Position</strong> X ${X[i].toFixed(2)} · <strong>Y</strong> ${Y[i].toFixed(2)}</div>` +
|
| 186 |
-
`<div><strong>Distance</strong> Radius ${r.toFixed(3)} · <strong>Z</strong> ${Zraw[i].toFixed(3)}</div>` +
|
| 187 |
-
`<div><strong>Size</strong> ${sizesPx[i].toFixed(1)} px</div>`;
|
| 188 |
-
tip.style.opacity = '1';
|
| 189 |
-
})
|
| 190 |
-
.on('mousemove', (ev, i) => {
|
| 191 |
-
const [mx, my] = d3.pointer(ev, container);
|
| 192 |
-
const offsetX = 10, offsetY = 12;
|
| 193 |
-
tip.style.transform = `translate(${Math.round(mx + offsetX)}px, ${Math.round(my + offsetY)}px)`;
|
| 194 |
})
|
| 195 |
-
.on('mouseleave', function
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
tip.style.opacity = '1';
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
})
|
| 232 |
-
.on('mouseleave', function () {
|
| 233 |
tip.style.opacity = '0';
|
| 234 |
tip.style.transform = 'translate(-9999px, -9999px)';
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
|
|
|
|
|
|
|
| 244 |
// First render + resize
|
| 245 |
if (window.ResizeObserver) {
|
| 246 |
const ro = new ResizeObserver(() => render());
|
|
@@ -248,11 +1185,26 @@
|
|
| 248 |
} else {
|
| 249 |
window.addEventListener('resize', render);
|
| 250 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
render();
|
|
|
|
| 252 |
};
|
| 253 |
|
| 254 |
if (document.readyState === 'loading') {
|
| 255 |
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
|
| 256 |
} else { ensureD3(bootstrap); }
|
| 257 |
})();
|
| 258 |
-
</script>
|
|
|
|
| 1 |
+
<div class="d3-leaderboard-chart-wrapper" style="width:100%;margin:10px 0;padding:10px 5px 5px 5px;border-radius:8px;background:var(--surface-bg);border:1px solid var(--border-color);position:relative;">
|
| 2 |
+
<div class="d3-leaderboard-chart" style="width:100%;aspect-ratio:2.8/1;min-height:320px;"></div>
|
| 3 |
+
</div>
|
| 4 |
+
<style>
|
| 5 |
+
.d3-leaderboard-chart {
|
| 6 |
+
position: relative;
|
| 7 |
+
}
|
| 8 |
+
|
| 9 |
+
.d3-leaderboard-chart .d3-tooltip {
|
| 10 |
+
position: absolute;
|
| 11 |
+
top: 0;
|
| 12 |
+
left: 0;
|
| 13 |
+
transform: translate(-9999px, -9999px);
|
| 14 |
+
pointer-events: none;
|
| 15 |
+
padding: 10px 12px;
|
| 16 |
+
border-radius: 12px;
|
| 17 |
+
font-size: 12px;
|
| 18 |
+
line-height: 1.35;
|
| 19 |
+
border: 1px solid var(--border-color);
|
| 20 |
+
background: var(--surface-bg);
|
| 21 |
+
color: var(--text-color);
|
| 22 |
+
box-shadow: 0 8px 32px rgba(0,0,0,.28), 0 2px 8px rgba(0,0,0,.12);
|
| 23 |
+
opacity: 0;
|
| 24 |
+
transition: opacity .12s ease;
|
| 25 |
+
z-index: 20;
|
| 26 |
+
backdrop-filter: saturate(1.12) blur(8px);
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
.d3-info-icon {
|
| 30 |
+
position: absolute;
|
| 31 |
+
bottom: 15px;
|
| 32 |
+
right: 15px;
|
| 33 |
+
width: 28px;
|
| 34 |
+
height: 28px;
|
| 35 |
+
border-radius: 50%;
|
| 36 |
+
background: var(--surface-bg);
|
| 37 |
+
border: 1px solid var(--border-color);
|
| 38 |
+
display: flex;
|
| 39 |
+
align-items: center;
|
| 40 |
+
justify-content: center;
|
| 41 |
+
cursor: pointer;
|
| 42 |
+
color: var(--muted-color);
|
| 43 |
+
transition: all 0.2s ease;
|
| 44 |
+
z-index: 10;
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
.d3-info-icon:hover {
|
| 48 |
+
color: var(--text-color);
|
| 49 |
+
background: var(--surface-bg);
|
| 50 |
+
border-color: var(--text-color);
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
.d3-info-tooltip {
|
| 54 |
+
position: absolute;
|
| 55 |
+
bottom: 50px;
|
| 56 |
+
right: 15px;
|
| 57 |
+
max-width: 400px;
|
| 58 |
+
padding: 16px;
|
| 59 |
+
background: var(--surface-bg);
|
| 60 |
+
border: 1px solid var(--border-color);
|
| 61 |
+
border-radius: 8px;
|
| 62 |
+
font-size: 12px;
|
| 63 |
+
line-height: 1.6;
|
| 64 |
+
color: var(--text-color);
|
| 65 |
+
opacity: 0;
|
| 66 |
+
pointer-events: none;
|
| 67 |
+
z-index: 10000;
|
| 68 |
+
transition: opacity 0.2s ease;
|
| 69 |
+
box-shadow: 0 4px 12px rgba(0,0,0,0.15);
|
| 70 |
+
backdrop-filter: saturate(1.12) blur(8px);
|
| 71 |
+
text-align: left;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
.d3-leaderboard-chart .d3-tooltip__inner {
|
| 75 |
+
display: flex;
|
| 76 |
+
flex-direction: column;
|
| 77 |
+
gap: 6px;
|
| 78 |
+
min-width: 180px;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
.d3-leaderboard-chart .d3-tooltip__inner > div:first-child {
|
| 82 |
+
font-weight: 800;
|
| 83 |
+
letter-spacing: 0.1px;
|
| 84 |
+
margin-bottom: 0;
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
.d3-leaderboard-chart .d3-tooltip__inner > div:nth-child(2) {
|
| 88 |
+
font-size: 11px;
|
| 89 |
+
color: var(--muted-color, #9ca3af);
|
| 90 |
+
display: block;
|
| 91 |
+
margin-top: -4px;
|
| 92 |
+
margin-bottom: 2px;
|
| 93 |
+
letter-spacing: 0.1px;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
.d3-leaderboard-chart .d3-tooltip__inner > div:nth-child(n+3) {
|
| 97 |
+
padding-top: 6px;
|
| 98 |
+
border-top: 1px solid var(--border-color);
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
.d3-leaderboard-chart .d3-tooltip__color-dot {
|
| 102 |
+
display: inline-block;
|
| 103 |
+
width: 12px;
|
| 104 |
+
height: 12px;
|
| 105 |
+
border-radius: 3px;
|
| 106 |
+
border: 1px solid var(--border-color);
|
| 107 |
+
}
|
| 108 |
+
</style>
|
| 109 |
<script>
|
| 110 |
(() => {
|
| 111 |
const ensureD3 = (cb) => {
|
|
|
|
| 124 |
|
| 125 |
const bootstrap = () => {
|
| 126 |
const mount = document.currentScript ? document.currentScript.previousElementSibling : null;
|
| 127 |
+
const container = (mount && mount.querySelector && mount.querySelector('.d3-leaderboard-chart')) || document.querySelector('.d3-leaderboard-chart');
|
| 128 |
if (!container) return;
|
| 129 |
if (container.dataset) {
|
| 130 |
if (container.dataset.mounted === 'true') return;
|
| 131 |
container.dataset.mounted = 'true';
|
| 132 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
+
// Get categorical colors from ColorPalettes
|
| 135 |
+
function getCategoricalColors(n) {
|
| 136 |
+
try {
|
| 137 |
+
if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') {
|
| 138 |
+
return window.ColorPalettes.getColors('categorical', n);
|
| 139 |
+
}
|
| 140 |
+
} catch (e) {
|
| 141 |
+
console.warn('ColorPalettes not available, using fallback');
|
| 142 |
+
}
|
| 143 |
+
// Fallback: Tableau10 palette
|
| 144 |
+
const tableau = (window.d3 && window.d3.schemeTableau10)
|
| 145 |
+
? window.d3.schemeTableau10
|
| 146 |
+
: ['#4e79a7', '#f28e2b', '#e15759', '#76b7b2', '#59a14f', '#edc948', '#b07aa1', '#ff9da7', '#9c755f', '#bab0ab'];
|
| 147 |
+
return tableau.slice(0, n);
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
// Create color mapping for benchmarks
|
| 151 |
+
let colorMap = {};
|
| 152 |
+
|
| 153 |
+
// Définir les groupes de benchmarks globalement
|
| 154 |
+
// Agentic en premier pour obtenir la première couleur (orange)
|
| 155 |
+
const BENCHMARK_GROUPS = [
|
| 156 |
+
{
|
| 157 |
+
name: 'Agentic',
|
| 158 |
+
benchmarks: ['GAIA']
|
| 159 |
+
},
|
| 160 |
+
{
|
| 161 |
+
name: 'Reasoning & Commonsense',
|
| 162 |
+
benchmarks: ['MUSR', 'BBH', 'Winogrande', 'TruthfulQA', 'HellaSwag']
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
name: 'Knowledge',
|
| 166 |
+
benchmarks: ['MMLU', 'MMLU-Pro', 'GPQA', 'ARC']
|
| 167 |
+
},
|
| 168 |
+
{
|
| 169 |
+
name: 'Math',
|
| 170 |
+
benchmarks: ['GSM8K', 'MATH']
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
name: 'Instruction following',
|
| 174 |
+
benchmarks: ['IFEval']
|
| 175 |
+
}
|
| 176 |
+
];
|
| 177 |
+
|
| 178 |
+
// Fonction pour créer des variations de couleur à partir d'une couleur de base
|
| 179 |
+
function createColorVariation(baseColor, index, total) {
|
| 180 |
+
// Convertir la couleur hex en RGB
|
| 181 |
+
const hex = baseColor.replace('#', '');
|
| 182 |
+
const r = parseInt(hex.substr(0, 2), 16);
|
| 183 |
+
const g = parseInt(hex.substr(2, 2), 16);
|
| 184 |
+
const b = parseInt(hex.substr(4, 2), 16);
|
| 185 |
+
|
| 186 |
+
// Créer des variations en ajustant la luminosité
|
| 187 |
+
// Variation de -15% à +15% selon l'index
|
| 188 |
+
const variationRange = 0.15;
|
| 189 |
+
const step = total > 1 ? (variationRange * 2) / (total - 1) : 0;
|
| 190 |
+
const brightnessAdjust = -variationRange + (index * step);
|
| 191 |
+
|
| 192 |
+
// Ajuster la luminosité
|
| 193 |
+
const adjustBrightness = (value, factor) => {
|
| 194 |
+
const adjusted = value + (255 - value) * factor;
|
| 195 |
+
return Math.max(0, Math.min(255, Math.round(adjusted)));
|
| 196 |
+
};
|
| 197 |
+
|
| 198 |
+
const newR = adjustBrightness(r, brightnessAdjust);
|
| 199 |
+
const newG = adjustBrightness(g, brightnessAdjust);
|
| 200 |
+
const newB = adjustBrightness(b, brightnessAdjust);
|
| 201 |
+
|
| 202 |
+
// Convertir en hex
|
| 203 |
+
const toHex = (n) => {
|
| 204 |
+
const hex = n.toString(16);
|
| 205 |
+
return hex.length === 1 ? '0' + hex : hex;
|
| 206 |
+
};
|
| 207 |
+
|
| 208 |
+
return `#${toHex(newR)}${toHex(newG)}${toHex(newB)}`;
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
// Mapping des benchmarks vers leurs groupes
|
| 212 |
+
function getBenchmarkGroup(benchmark) {
|
| 213 |
+
// Gérer MMLU_new qui s'affiche comme MMLU-Pro
|
| 214 |
+
const displayName = benchmark === 'MMLU_new' ? 'MMLU-Pro' : benchmark;
|
| 215 |
+
|
| 216 |
+
for (const group of BENCHMARK_GROUPS) {
|
| 217 |
+
if (group.benchmarks.includes(displayName)) {
|
| 218 |
+
return group.name;
|
| 219 |
+
}
|
| 220 |
+
}
|
| 221 |
+
return null;
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
// Stocker les couleurs de base des groupes pour la légende
|
| 225 |
+
let groupBaseColors = {};
|
| 226 |
+
|
| 227 |
+
function updateColorMap(benchmarks) {
|
| 228 |
+
// Obtenir les groupes uniques (dans l'ordre de BENCHMARK_GROUPS pour que Agentic soit en premier)
|
| 229 |
+
const allGroups = BENCHMARK_GROUPS.map(g => g.name);
|
| 230 |
+
const presentGroups = allGroups.filter(groupName => {
|
| 231 |
+
return benchmarks.some(b => getBenchmarkGroup(b) === groupName);
|
| 232 |
+
});
|
| 233 |
+
|
| 234 |
+
// Obtenir la palette avec le nombre exact de groupes présents
|
| 235 |
+
const palette = getCategoricalColors(presentGroups.length);
|
| 236 |
+
|
| 237 |
+
// Créer un mapping groupe -> couleur de base (pour la légende)
|
| 238 |
+
groupBaseColors = {};
|
| 239 |
+
presentGroups.forEach((group, i) => {
|
| 240 |
+
groupBaseColors[group] = palette[i];
|
| 241 |
+
});
|
| 242 |
+
|
| 243 |
+
// Créer un mapping benchmark -> couleur (variation de la couleur du groupe)
|
| 244 |
+
colorMap = {};
|
| 245 |
+
|
| 246 |
+
// Pour chaque groupe, créer des variations pour chaque benchmark
|
| 247 |
+
BENCHMARK_GROUPS.forEach(group => {
|
| 248 |
+
if (!groupBaseColors[group.name]) return;
|
| 249 |
+
|
| 250 |
+
const baseColor = groupBaseColors[group.name];
|
| 251 |
+
|
| 252 |
+
// Trouver les benchmarks du groupe qui sont présents dans les données
|
| 253 |
+
const groupBenchmarks = [];
|
| 254 |
+
group.benchmarks.forEach(benchmark => {
|
| 255 |
+
// Pour MMLU-Pro, chercher MMLU_new dans les benchmarks
|
| 256 |
+
if (benchmark === 'MMLU-Pro') {
|
| 257 |
+
if (benchmarks.includes('MMLU_new')) {
|
| 258 |
+
groupBenchmarks.push({ displayName: 'MMLU-Pro', dataKey: 'MMLU_new' });
|
| 259 |
+
}
|
| 260 |
+
} else {
|
| 261 |
+
if (benchmarks.includes(benchmark)) {
|
| 262 |
+
groupBenchmarks.push({ displayName: benchmark, dataKey: benchmark });
|
| 263 |
+
}
|
| 264 |
+
}
|
| 265 |
+
});
|
| 266 |
+
|
| 267 |
+
// Assigner des variations de couleur à chaque benchmark
|
| 268 |
+
groupBenchmarks.forEach((benchmarkInfo, index) => {
|
| 269 |
+
// Créer une variation de couleur
|
| 270 |
+
const variation = createColorVariation(baseColor, index, groupBenchmarks.length);
|
| 271 |
+
// Mapper avec la clé de données (dataKey)
|
| 272 |
+
colorMap[benchmarkInfo.dataKey] = variation;
|
| 273 |
+
});
|
| 274 |
+
});
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
// Fonction pour obtenir la couleur de base d'un groupe (pour la légende)
|
| 278 |
+
function getGroupBaseColor(groupName) {
|
| 279 |
+
return groupBaseColors[groupName] || '#000000';
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
// Helper to get color for a benchmark (via son groupe)
|
| 283 |
+
function getColor(benchmark) {
|
| 284 |
+
if (colorMap && colorMap[benchmark]) {
|
| 285 |
+
return colorMap[benchmark];
|
| 286 |
+
}
|
| 287 |
+
// Fallback si le benchmark n'est pas dans la map
|
| 288 |
+
const group = getBenchmarkGroup(benchmark);
|
| 289 |
+
if (group) {
|
| 290 |
+
// Essayer de trouver la couleur du groupe
|
| 291 |
+
const palette = getCategoricalColors(BENCHMARK_GROUPS.length);
|
| 292 |
+
const groupIndex = BENCHMARK_GROUPS.findIndex(g => g.name === group);
|
| 293 |
+
if (groupIndex >= 0) {
|
| 294 |
+
return palette[groupIndex % palette.length];
|
| 295 |
+
}
|
| 296 |
+
}
|
| 297 |
+
// Fallback final
|
| 298 |
+
const palette = getCategoricalColors(10);
|
| 299 |
+
const index = (benchmark || '').charCodeAt(0) % palette.length;
|
| 300 |
+
return palette[index];
|
| 301 |
+
}
|
| 302 |
+
|
| 303 |
+
let data = null;
|
| 304 |
+
let scatterData = null; // Données du nuage de points
|
| 305 |
+
|
| 306 |
+
// Créer l'icône info en bas à droite de l'embed (HTML, pas SVG)
|
| 307 |
+
const wrapper = container.closest('.d3-leaderboard-chart-wrapper');
|
| 308 |
+
if (wrapper) {
|
| 309 |
+
let infoIcon = wrapper.querySelector('.d3-info-icon');
|
| 310 |
+
if (!infoIcon) {
|
| 311 |
+
infoIcon = document.createElement('div');
|
| 312 |
+
infoIcon.className = 'd3-info-icon';
|
| 313 |
+
infoIcon.innerHTML = `
|
| 314 |
+
<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
|
| 315 |
+
<path d="M8 6V8M8 10H8.01" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
|
| 316 |
+
</svg>
|
| 317 |
+
`;
|
| 318 |
+
wrapper.appendChild(infoIcon);
|
| 319 |
+
|
| 320 |
+
// Tooltip pour l'icône info
|
| 321 |
+
let infoTooltip = wrapper.querySelector('.d3-info-tooltip');
|
| 322 |
+
if (!infoTooltip) {
|
| 323 |
+
infoTooltip = document.createElement('div');
|
| 324 |
+
infoTooltip.className = 'd3-info-tooltip';
|
| 325 |
+
infoTooltip.innerHTML = `
|
| 326 |
+
<div style="font-weight: 600; margin-bottom: 10px; color: var(--text-color); font-size: 13px; text-align: left;">About this chart</div>
|
| 327 |
+
<div style="color: var(--text-color); font-size: 12px; line-height: 1.6; text-align: left;">
|
| 328 |
+
<p style="margin: 0 0 10px 0; text-align: left;">
|
| 329 |
+
This visualization tracks the evolution of top benchmark scores over time across multiple evaluation frameworks.
|
| 330 |
+
The step-like lines represent the progression of maximum scores achieved for each benchmark, with circular markers
|
| 331 |
+
indicating when a new record was set.
|
| 332 |
+
</p>
|
| 333 |
+
<p style="margin: 0 0 10px 0; text-align: left;">
|
| 334 |
+
The gray scatter plot in the background shows the average scores of all evaluated models, providing context for
|
| 335 |
+
the top performers. Each point represents a model's average performance across all benchmarks at a given time.
|
| 336 |
+
</p>
|
| 337 |
+
<p style="margin: 0; text-align: left;">
|
| 338 |
+
Benchmarks are grouped by category (Reasoning & Commonsense, Knowledge, Math, Agentic, and Instruction following),
|
| 339 |
+
with each group sharing a color family. Variations within a group use different shades of the same base color.
|
| 340 |
+
</p>
|
| 341 |
+
</div>
|
| 342 |
+
`;
|
| 343 |
+
wrapper.appendChild(infoTooltip);
|
| 344 |
+
}
|
| 345 |
+
|
| 346 |
+
// Gestion du hover
|
| 347 |
+
infoIcon.addEventListener('mouseenter', () => {
|
| 348 |
+
infoTooltip.style.opacity = '1';
|
| 349 |
+
infoTooltip.style.pointerEvents = 'auto';
|
| 350 |
+
});
|
| 351 |
+
|
| 352 |
+
infoIcon.addEventListener('mouseleave', () => {
|
| 353 |
+
infoTooltip.style.opacity = '0';
|
| 354 |
+
infoTooltip.style.pointerEvents = 'none';
|
| 355 |
+
});
|
| 356 |
+
}
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
// Tooltip pour les points
|
| 360 |
+
let tip = container.querySelector('.d3-tooltip');
|
| 361 |
+
let tipInner;
|
| 362 |
+
if (!tip) {
|
| 363 |
+
tip = document.createElement('div');
|
| 364 |
+
tip.className = 'd3-tooltip';
|
| 365 |
+
tipInner = document.createElement('div');
|
| 366 |
+
tipInner.className = 'd3-tooltip__inner';
|
| 367 |
+
tip.appendChild(tipInner);
|
| 368 |
+
container.appendChild(tip);
|
| 369 |
+
} else {
|
| 370 |
+
tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
|
| 371 |
+
}
|
| 372 |
+
|
| 373 |
+
// Load data from JSON (both old and new leaderboards)
|
| 374 |
+
const loadData = async () => {
|
| 375 |
+
const dataPaths = [
|
| 376 |
+
'/data/leaderboard_scores_over_time.json',
|
| 377 |
+
'./assets/data/leaderboard_scores_over_time.json',
|
| 378 |
+
'../assets/data/leaderboard_scores_over_time.json',
|
| 379 |
+
'../../assets/data/leaderboard_scores_over_time.json'
|
| 380 |
+
];
|
| 381 |
+
|
| 382 |
+
const oldDataPaths = [
|
| 383 |
+
'/data/leaderboard_scores_over_time_old.json',
|
| 384 |
+
'./assets/data/leaderboard_scores_over_time_old.json',
|
| 385 |
+
'../assets/data/leaderboard_scores_over_time_old.json',
|
| 386 |
+
'../../assets/data/leaderboard_scores_over_time_old.json'
|
| 387 |
+
];
|
| 388 |
+
|
| 389 |
+
const gaiaDataPaths = [
|
| 390 |
+
'/data/data_gaia.json',
|
| 391 |
+
'./assets/data/data_gaia.json',
|
| 392 |
+
'../assets/data/data_gaia.json',
|
| 393 |
+
'../../assets/data/data_gaia.json'
|
| 394 |
+
];
|
| 395 |
+
|
| 396 |
+
let newData = null;
|
| 397 |
+
let oldData = null;
|
| 398 |
+
let gaiaData = null;
|
| 399 |
+
|
| 400 |
+
// Load new leaderboard data
|
| 401 |
+
for (const path of dataPaths) {
|
| 402 |
+
try {
|
| 403 |
+
const response = await fetch(path, { cache: 'no-cache' });
|
| 404 |
+
if (response.ok) {
|
| 405 |
+
newData = await response.json();
|
| 406 |
+
break;
|
| 407 |
+
}
|
| 408 |
+
} catch (e) {
|
| 409 |
+
// Continue to next path
|
| 410 |
+
}
|
| 411 |
+
}
|
| 412 |
+
|
| 413 |
+
// Load old leaderboard data
|
| 414 |
+
for (const path of oldDataPaths) {
|
| 415 |
+
try {
|
| 416 |
+
const response = await fetch(path, { cache: 'no-cache' });
|
| 417 |
+
if (response.ok) {
|
| 418 |
+
oldData = await response.json();
|
| 419 |
+
break;
|
| 420 |
+
}
|
| 421 |
+
} catch (e) {
|
| 422 |
+
// Continue to next path
|
| 423 |
+
}
|
| 424 |
+
}
|
| 425 |
+
|
| 426 |
+
// Load GAIA data
|
| 427 |
+
for (const path of gaiaDataPaths) {
|
| 428 |
+
try {
|
| 429 |
+
const response = await fetch(path, { cache: 'no-cache' });
|
| 430 |
+
if (response.ok) {
|
| 431 |
+
gaiaData = await response.json();
|
| 432 |
+
// Convert GAIA scores from 0-1 to 0-100
|
| 433 |
+
if (gaiaData && gaiaData.benchmarks && gaiaData.benchmarks.GAIA) {
|
| 434 |
+
gaiaData.benchmarks.GAIA = gaiaData.benchmarks.GAIA.map(point => ({
|
| 435 |
+
...point,
|
| 436 |
+
score: point.score * 100
|
| 437 |
+
}));
|
| 438 |
+
}
|
| 439 |
+
break;
|
| 440 |
+
}
|
| 441 |
+
} catch (e) {
|
| 442 |
+
// Continue to next path
|
| 443 |
+
}
|
| 444 |
+
}
|
| 445 |
+
|
| 446 |
+
// Merge both datasets
|
| 447 |
+
data = { benchmarks: {} };
|
| 448 |
+
|
| 449 |
+
if (newData) {
|
| 450 |
+
Object.assign(data.benchmarks, newData.benchmarks || {});
|
| 451 |
+
}
|
| 452 |
+
|
| 453 |
+
// Add GAIA data
|
| 454 |
+
if (gaiaData && gaiaData.benchmarks) {
|
| 455 |
+
Object.assign(data.benchmarks, gaiaData.benchmarks);
|
| 456 |
+
}
|
| 457 |
+
|
| 458 |
+
if (oldData) {
|
| 459 |
+
// Merge old benchmarks (they might overlap with new ones, keep both)
|
| 460 |
+
Object.keys(oldData.benchmarks || {}).forEach(benchmark => {
|
| 461 |
+
if (data.benchmarks[benchmark]) {
|
| 462 |
+
// If benchmark exists in both (like MMLU), create two separate entries
|
| 463 |
+
// Find the transition date (roughly June 2024)
|
| 464 |
+
const transitionDate = new Date('2024-06-01');
|
| 465 |
+
|
| 466 |
+
// Separate points by period
|
| 467 |
+
const oldPoints = oldData.benchmarks[benchmark].filter(p => new Date(p.date) < transitionDate);
|
| 468 |
+
const newPoints = data.benchmarks[benchmark].filter(p => new Date(p.date) >= transitionDate);
|
| 469 |
+
|
| 470 |
+
// Calculate records separately for old period
|
| 471 |
+
const oldRecords = [];
|
| 472 |
+
let maxSoFarOld = 0;
|
| 473 |
+
oldPoints.forEach(point => {
|
| 474 |
+
if (point.score > maxSoFarOld) {
|
| 475 |
+
maxSoFarOld = point.score;
|
| 476 |
+
oldRecords.push({ date: point.date, score: maxSoFarOld });
|
| 477 |
+
}
|
| 478 |
+
});
|
| 479 |
+
|
| 480 |
+
// For new period, calculate records independently
|
| 481 |
+
const newRecords = [];
|
| 482 |
+
let maxSoFarNew = 0;
|
| 483 |
+
newPoints.forEach(point => {
|
| 484 |
+
if (point.score > maxSoFarNew) {
|
| 485 |
+
maxSoFarNew = point.score;
|
| 486 |
+
newRecords.push({ date: point.date, score: maxSoFarNew });
|
| 487 |
+
}
|
| 488 |
+
});
|
| 489 |
+
|
| 490 |
+
// Keep old period with original name
|
| 491 |
+
if (oldRecords.length > 0) {
|
| 492 |
+
data.benchmarks[benchmark] = oldRecords;
|
| 493 |
+
} else {
|
| 494 |
+
// If no old records, remove the benchmark from data
|
| 495 |
+
delete data.benchmarks[benchmark];
|
| 496 |
+
}
|
| 497 |
+
|
| 498 |
+
// Add new period with a different name
|
| 499 |
+
if (newRecords.length > 0) {
|
| 500 |
+
data.benchmarks[benchmark + '_new'] = newRecords;
|
| 501 |
+
}
|
| 502 |
+
} else {
|
| 503 |
+
// Benchmark only in old data, add it directly
|
| 504 |
+
data.benchmarks[benchmark] = oldData.benchmarks[benchmark];
|
| 505 |
+
}
|
| 506 |
+
});
|
| 507 |
+
}
|
| 508 |
+
|
| 509 |
+
|
| 510 |
+
if (Object.keys(data.benchmarks).length === 0) {
|
| 511 |
+
console.warn('Could not load leaderboard data, using empty dataset');
|
| 512 |
+
data = { benchmarks: {} };
|
| 513 |
+
}
|
| 514 |
+
|
| 515 |
+
// Charger les données du nuage de points
|
| 516 |
+
const scatterPaths = [
|
| 517 |
+
'/assets/data/leaderboard_scatter_plot.json',
|
| 518 |
+
'/data/leaderboard_scatter_plot.json',
|
| 519 |
+
'./assets/data/leaderboard_scatter_plot.json',
|
| 520 |
+
'../assets/data/leaderboard_scatter_plot.json',
|
| 521 |
+
'../../assets/data/leaderboard_scatter_plot.json'
|
| 522 |
+
];
|
| 523 |
+
|
| 524 |
+
const gaiaScatterPaths = [
|
| 525 |
+
'/assets/data/data_gaia_points.json',
|
| 526 |
+
'/data/data_gaia_points.json',
|
| 527 |
+
'./assets/data/data_gaia_points.json',
|
| 528 |
+
'../assets/data/data_gaia_points.json',
|
| 529 |
+
'../../assets/data/data_gaia_points.json'
|
| 530 |
+
];
|
| 531 |
+
|
| 532 |
+
for (const path of scatterPaths) {
|
| 533 |
+
try {
|
| 534 |
+
const scatterResponse = await fetch(path, { cache: 'no-cache' });
|
| 535 |
+
if (scatterResponse.ok) {
|
| 536 |
+
scatterData = await scatterResponse.json();
|
| 537 |
+
break;
|
| 538 |
+
}
|
| 539 |
+
} catch (e) {
|
| 540 |
+
// Continue to next path
|
| 541 |
+
}
|
| 542 |
+
}
|
| 543 |
+
|
| 544 |
+
// Load GAIA scatter points
|
| 545 |
+
let gaiaScatterData = null;
|
| 546 |
+
for (const path of gaiaScatterPaths) {
|
| 547 |
+
try {
|
| 548 |
+
const scatterResponse = await fetch(path, { cache: 'no-cache' });
|
| 549 |
+
if (scatterResponse.ok) {
|
| 550 |
+
gaiaScatterData = await scatterResponse.json();
|
| 551 |
+
// Convert GAIA scores from 0-1 to 0-100
|
| 552 |
+
if (gaiaScatterData && gaiaScatterData.points) {
|
| 553 |
+
gaiaScatterData.points = gaiaScatterData.points.map(point => ({
|
| 554 |
+
...point,
|
| 555 |
+
average_score: point.average_score * 100
|
| 556 |
+
}));
|
| 557 |
+
}
|
| 558 |
+
break;
|
| 559 |
+
}
|
| 560 |
+
} catch (e) {
|
| 561 |
+
// Continue to next path
|
| 562 |
+
}
|
| 563 |
+
}
|
| 564 |
+
|
| 565 |
+
// Merge scatter data
|
| 566 |
+
if (gaiaScatterData && gaiaScatterData.points) {
|
| 567 |
+
if (!scatterData) {
|
| 568 |
+
scatterData = { points: [] };
|
| 569 |
+
}
|
| 570 |
+
if (!scatterData.points) {
|
| 571 |
+
scatterData.points = [];
|
| 572 |
+
}
|
| 573 |
+
const gaiaPointsCount = gaiaScatterData.points.length;
|
| 574 |
+
scatterData.points = scatterData.points.concat(gaiaScatterData.points);
|
| 575 |
+
console.log(`✅ ${gaiaPointsCount} points GAIA ajoutés au nuage de points (total: ${scatterData.points.length})`);
|
| 576 |
+
}
|
| 577 |
};
|
| 578 |
|
| 579 |
// Create SVG
|
| 580 |
const svg = d3.select(container).append('svg')
|
| 581 |
.attr('width', '100%')
|
| 582 |
.style('display', 'block')
|
| 583 |
+
.style('cursor', 'default');
|
| 584 |
+
|
| 585 |
+
// Theme detection and update function
|
| 586 |
+
const getThemeColors = () => {
|
| 587 |
+
const isDark = document.documentElement.getAttribute('data-theme') === 'dark';
|
| 588 |
+
return {
|
| 589 |
+
isDark,
|
| 590 |
+
textColor: isDark ? 'rgba(255,255,255,0.8)' : 'rgba(0,0,0,0.6)',
|
| 591 |
+
gridColor: isDark ? 'rgba(255,255,255,0.08)' : 'rgba(0,0,0,0.08)',
|
| 592 |
+
axisColor: isDark ? 'rgba(255,255,255,0.4)' : 'rgba(0,0,0,0.4)',
|
| 593 |
+
tooltipBg: isDark ? 'rgba(20, 20, 25, 0.98)' : 'rgba(255, 255, 255, 0.98)',
|
| 594 |
+
tooltipBorder: isDark ? 'rgba(255, 255, 255, 0.15)' : 'rgba(0, 0, 0, 0.1)',
|
| 595 |
+
tooltipText: isDark ? 'rgba(255, 255, 255, 0.95)' : 'rgba(0, 0, 0, 0.9)',
|
| 596 |
+
tooltipMuted: isDark ? 'rgba(255, 255, 255, 0.6)' : 'rgba(0, 0, 0, 0.5)'
|
| 597 |
+
};
|
| 598 |
+
};
|
| 599 |
+
|
| 600 |
+
let themeColors = getThemeColors();
|
| 601 |
+
let legendDivRef = null; // Référence à la légende pour les mises à jour de thème
|
| 602 |
+
let svgRef = null; // Référence au SVG pour les mises à jour de thème
|
| 603 |
+
|
| 604 |
+
// Watch for theme changes
|
| 605 |
+
const themeObserver = new MutationObserver(() => {
|
| 606 |
+
themeColors = getThemeColors();
|
| 607 |
+
// Legend colors are now handled by CSS variables, no manual update needed
|
| 608 |
+
// Update axis colors and grid
|
| 609 |
+
if (svgRef) {
|
| 610 |
+
const g = svgRef.select('g');
|
| 611 |
+
if (g && !g.empty()) {
|
| 612 |
+
g.selectAll('.axis .tick line').attr('stroke', themeColors.axisColor);
|
| 613 |
+
g.selectAll('.axis .tick text').attr('fill', themeColors.textColor);
|
| 614 |
+
g.selectAll('.grid-line').attr('stroke', themeColors.gridColor);
|
| 615 |
+
}
|
| 616 |
+
}
|
| 617 |
+
});
|
| 618 |
+
themeObserver.observe(document.documentElement, {
|
| 619 |
+
attributes: true,
|
| 620 |
+
attributeFilter: ['data-theme']
|
| 621 |
+
});
|
| 622 |
|
| 623 |
const render = () => {
|
| 624 |
+
// Update theme colors before rendering
|
| 625 |
+
themeColors = getThemeColors();
|
| 626 |
+
|
| 627 |
+
if (!data || !data.benchmarks) {
|
| 628 |
+
// Show loading message
|
| 629 |
+
svg.selectAll('*').remove();
|
| 630 |
+
const width = container.clientWidth || 800;
|
| 631 |
+
const height = Math.max(380, Math.round(width / 2.8));
|
| 632 |
+
svg.attr('width', width).attr('height', height);
|
| 633 |
+
|
| 634 |
+
svg.append('text')
|
| 635 |
+
.attr('x', width / 2)
|
| 636 |
+
.attr('y', height / 2)
|
| 637 |
+
.attr('text-anchor', 'middle')
|
| 638 |
+
.attr('fill', themeColors.textColor)
|
| 639 |
+
.text('Chargement des données...');
|
| 640 |
+
return;
|
| 641 |
+
}
|
| 642 |
+
|
| 643 |
const width = container.clientWidth || 800;
|
| 644 |
+
const height = Math.max(380, Math.round(width / 2.8));
|
| 645 |
svg.attr('width', width).attr('height', height);
|
| 646 |
|
| 647 |
+
// Clear previous render
|
| 648 |
+
svg.selectAll('*').remove();
|
| 649 |
|
| 650 |
+
// Margins (augmenter le bottom pour la légende et plus d'espace à gauche/droite)
|
| 651 |
+
// Espacement uniforme à gauche et à droite
|
| 652 |
+
const sidePadding = 25; // Espacement uniforme pour légende et titre
|
| 653 |
+
const chartRightMargin = sidePadding * 2; // Double espacement à droite du chart
|
| 654 |
+
const margin = { top: 10, right: chartRightMargin, bottom: 150, left: 60 };
|
| 655 |
+
const innerWidth = width - margin.left - margin.right;
|
| 656 |
+
const innerHeight = height - margin.top - margin.bottom;
|
| 657 |
+
|
| 658 |
+
// Collect all dates and scores for scaling (limité à décembre 2025)
|
| 659 |
+
const maxDate = new Date('2025-12-31');
|
| 660 |
+
const allDates = [];
|
| 661 |
+
const allScores = [];
|
| 662 |
+
|
| 663 |
+
Object.values(data.benchmarks).forEach(benchmarkData => {
|
| 664 |
+
benchmarkData.forEach(point => {
|
| 665 |
+
const pointDate = new Date(point.date);
|
| 666 |
+
if (pointDate <= maxDate) {
|
| 667 |
+
allDates.push(pointDate);
|
| 668 |
+
allScores.push(point.score);
|
| 669 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 670 |
});
|
| 671 |
+
});
|
| 672 |
+
|
| 673 |
+
if (allDates.length === 0) {
|
| 674 |
+
svg.append('text')
|
| 675 |
+
.attr('x', width / 2)
|
| 676 |
+
.attr('y', height / 2)
|
| 677 |
+
.attr('text-anchor', 'middle')
|
| 678 |
+
.attr('fill', themeColors.textColor)
|
| 679 |
+
.text('Aucune donnée disponible');
|
| 680 |
+
return;
|
| 681 |
+
}
|
| 682 |
+
|
| 683 |
+
// Scales (store in window for hover functions)
|
| 684 |
+
// Limiter le domaine à décembre 2025
|
| 685 |
+
const maxDateLimit = new Date('2025-12-31');
|
| 686 |
+
const dateExtent = d3.extent(allDates);
|
| 687 |
+
// S'assurer que le domaine ne dépasse pas décembre 2025
|
| 688 |
+
const domainMax = dateExtent[1] && dateExtent[1] > maxDateLimit ? maxDateLimit : (dateExtent[1] || maxDateLimit);
|
| 689 |
+
|
| 690 |
+
window.bannerXScale = d3.scaleTime()
|
| 691 |
+
.domain([dateExtent[0] || new Date('2023-01-01'), domainMax])
|
| 692 |
+
.range([0, innerWidth])
|
| 693 |
+
.nice();
|
| 694 |
+
|
| 695 |
+
// Forcer le domaine maximum à décembre 2025 (après .nice() qui peut étendre le domaine)
|
| 696 |
+
const currentDomain = window.bannerXScale.domain();
|
| 697 |
+
if (currentDomain[1] > maxDateLimit) {
|
| 698 |
+
window.bannerXScale.domain([currentDomain[0], maxDateLimit]);
|
| 699 |
+
}
|
| 700 |
+
|
| 701 |
+
// Réappliquer nice() seulement sur le min si nécessaire, mais garder le max à décembre 2025
|
| 702 |
+
const finalDomain = window.bannerXScale.domain();
|
| 703 |
+
window.bannerXScale.domain([finalDomain[0], maxDateLimit]);
|
| 704 |
+
|
| 705 |
+
const xScale = window.bannerXScale;
|
| 706 |
+
|
| 707 |
+
const yScale = d3.scaleLinear()
|
| 708 |
+
.domain([0, Math.max(100, d3.max(allScores) * 1.1)])
|
| 709 |
+
.range([innerHeight, 0])
|
| 710 |
+
.nice();
|
| 711 |
+
|
| 712 |
+
// Create main group
|
| 713 |
+
const g = svg.append('g')
|
| 714 |
+
.attr('transform', `translate(${margin.left},${margin.top})`);
|
| 715 |
+
|
| 716 |
+
// Stocker la référence au SVG pour les mises à jour de thème
|
| 717 |
+
svgRef = svg;
|
| 718 |
+
|
| 719 |
+
// Hover layer for tooltip
|
| 720 |
+
const gHover = g.append('g').attr('class', 'hover-layer');
|
| 721 |
+
let hoverLine = null;
|
| 722 |
+
|
| 723 |
+
// Grid lines
|
| 724 |
+
const yTicks = yScale.ticks(5);
|
| 725 |
+
g.selectAll('.grid-line')
|
| 726 |
+
.data(yTicks)
|
| 727 |
+
.enter().append('line')
|
| 728 |
+
.attr('class', 'grid-line')
|
| 729 |
+
.attr('x1', 0)
|
| 730 |
+
.attr('x2', innerWidth)
|
| 731 |
+
.attr('y1', d => yScale(d))
|
| 732 |
+
.attr('y2', d => yScale(d))
|
| 733 |
+
.attr('stroke', themeColors.gridColor)
|
| 734 |
+
.attr('stroke-width', 1)
|
| 735 |
+
.attr('stroke-dasharray', '2,2');
|
| 736 |
+
|
| 737 |
+
|
| 738 |
+
// Line generator - courbe en escalier (step) pour afficher des seuils successifs
|
| 739 |
+
// La ligne reste constante jusqu'au prochain point
|
| 740 |
+
const line = d3.line()
|
| 741 |
+
.x(d => xScale(new Date(d.date)))
|
| 742 |
+
.y(d => yScale(d.score))
|
| 743 |
+
.curve(d3.curveStepAfter); // Step function : reste constante jusqu'au prochain point
|
| 744 |
+
|
| 745 |
+
// Draw lines for each benchmark
|
| 746 |
+
// Handle MMLU separately: old and new as separate lines
|
| 747 |
+
let benchmarks = Object.keys(data.benchmarks).sort();
|
| 748 |
+
|
| 749 |
+
// If MMLU_new exists, add it to benchmarks list but map it to display as MMLU
|
| 750 |
+
if (data.benchmarks['MMLU_new']) {
|
| 751 |
+
benchmarks.push('MMLU_new');
|
| 752 |
+
}
|
| 753 |
+
|
| 754 |
+
// Update color map avec tous les benchmarks (les couleurs seront assignées par groupe)
|
| 755 |
+
updateColorMap(benchmarks);
|
| 756 |
+
|
| 757 |
+
// Fonctions pour gérer le highlight
|
| 758 |
+
const highlightBenchmark = (highlightedBenchmark) => {
|
| 759 |
+
benchmarks.forEach(benchmark => {
|
| 760 |
+
const displayName = benchmark === 'MMLU_new' ? 'MMLU-Pro' : benchmark;
|
| 761 |
+
if (displayName === highlightedBenchmark) {
|
| 762 |
+
// Mettre en évidence la ligne sélectionnée
|
| 763 |
+
g.selectAll(`.line-${benchmark}`).style('opacity', 1).attr('stroke-width', 3);
|
| 764 |
+
g.selectAll(`.marker-${benchmark}`).style('opacity', 1);
|
| 765 |
+
g.selectAll(`.legend-${displayName}`).style('opacity', 1);
|
| 766 |
} else {
|
| 767 |
+
// Ghost les autres lignes
|
| 768 |
+
g.selectAll(`.line-${benchmark}`).style('opacity', 0.15);
|
| 769 |
+
g.selectAll(`.marker-${benchmark}`).style('opacity', 0.15);
|
| 770 |
+
g.selectAll(`.legend-${displayName}`).style('opacity', 0.3);
|
| 771 |
}
|
| 772 |
+
});
|
| 773 |
+
};
|
| 774 |
+
|
| 775 |
+
const resetHighlight = () => {
|
| 776 |
+
benchmarks.forEach(benchmark => {
|
| 777 |
+
g.selectAll(`.line-${benchmark}`).style('opacity', 0.9).attr('stroke-width', 2.5);
|
| 778 |
+
g.selectAll(`.marker-${benchmark}`).style('opacity', 1);
|
| 779 |
+
const displayName = benchmark === 'MMLU_new' ? 'MMLU-Pro' : benchmark;
|
| 780 |
+
g.selectAll(`.legend-${displayName}`).style('opacity', 1);
|
| 781 |
+
});
|
| 782 |
+
};
|
| 783 |
+
|
| 784 |
+
// Ajouter le nuage de points EN PREMIER (en dessous de tout)
|
| 785 |
+
if (scatterData && scatterData.points && scatterData.points.length > 0) {
|
| 786 |
+
// Utiliser les dates déjà filtrées (allDates est défini plus haut)
|
| 787 |
+
if (allDates.length > 0) {
|
| 788 |
+
const minDate = d3.min(allDates);
|
| 789 |
+
const maxDateLimit = new Date('2025-12-31'); // Limiter à décembre 2025
|
| 790 |
+
const maxDate = d3.min([d3.max(allDates), maxDateLimit]);
|
| 791 |
+
|
| 792 |
+
const filteredPoints = scatterData.points.filter(p => {
|
| 793 |
+
const pointDate = new Date(p.date);
|
| 794 |
+
return pointDate >= minDate && pointDate <= maxDateLimit;
|
| 795 |
+
});
|
| 796 |
+
|
| 797 |
+
// Debug: compter les points GAIA dans les points filtrés
|
| 798 |
+
const gaiaFilteredCount = filteredPoints.filter(p => p.leaderboard === 'gaia').length;
|
| 799 |
+
if (gaiaFilteredCount > 0) {
|
| 800 |
+
console.log(`✅ ${gaiaFilteredCount} points GAIA affichés dans le nuage (sur ${filteredPoints.length} points totaux)`);
|
| 801 |
+
}
|
| 802 |
+
|
| 803 |
+
// Créer un groupe pour le nuage de points (en arrière-plan, en premier)
|
| 804 |
+
const scatterGroup = g.append('g')
|
| 805 |
+
.attr('class', 'scatter-points');
|
| 806 |
+
|
| 807 |
+
// Ajouter les points
|
| 808 |
+
scatterGroup.selectAll('.scatter-point')
|
| 809 |
+
.data(filteredPoints)
|
| 810 |
+
.enter().append('circle')
|
| 811 |
+
.attr('class', d => `scatter-point scatter-${d.leaderboard}`)
|
| 812 |
+
.attr('cx', d => xScale(new Date(d.date)))
|
| 813 |
+
.attr('cy', d => {
|
| 814 |
+
// Utiliser average_score_raw si disponible (nouveau leaderboard), sinon average_score
|
| 815 |
+
const score = d.average_score_raw !== undefined ? d.average_score_raw : d.average_score;
|
| 816 |
+
return yScale(score);
|
| 817 |
+
})
|
| 818 |
+
.attr('r', 0.75) // Taille réduite pour moins de bruit visuel
|
| 819 |
+
.style('fill', 'var(--grid-color)') // Utiliser la variable CSS qui s'adapte au thème
|
| 820 |
+
.style('opacity', '0.5') // Opacité supplémentaire pour light mode (moins visible)
|
| 821 |
+
.attr('stroke', 'none')
|
| 822 |
+
.style('pointer-events', 'none'); // Ne pas interférer avec les interactions
|
| 823 |
+
}
|
| 824 |
+
}
|
| 825 |
+
|
| 826 |
+
benchmarks.forEach(benchmark => {
|
| 827 |
+
const points = data.benchmarks[benchmark];
|
| 828 |
+
if (!points || points.length === 0) return;
|
| 829 |
+
|
| 830 |
+
// Filtrer les points jusqu'en décembre 2025
|
| 831 |
+
const maxDate = new Date('2025-12-31');
|
| 832 |
+
const filteredPoints = points.filter(p => new Date(p.date) <= maxDate);
|
| 833 |
+
|
| 834 |
+
// S'assurer que les points sont triés par date pour le step chart
|
| 835 |
+
const sortedPoints = [...filteredPoints].sort((a, b) => new Date(a.date) - new Date(b.date));
|
| 836 |
+
|
| 837 |
+
// For MMLU_new, display as MMLU-Pro
|
| 838 |
+
const displayName = benchmark === 'MMLU_new' ? 'MMLU-Pro' : benchmark;
|
| 839 |
+
// Utiliser la couleur du groupe (getColor gère déjà MMLU_new -> MMLU-Pro)
|
| 840 |
+
const color = getColor(benchmark);
|
| 841 |
|
| 842 |
+
const path = g.append('path')
|
| 843 |
+
.datum(sortedPoints)
|
| 844 |
+
.attr('fill', 'none')
|
| 845 |
+
.attr('stroke', color)
|
| 846 |
+
.attr('stroke-width', 2.5)
|
| 847 |
+
.attr('d', line)
|
| 848 |
+
.attr('class', `line-${benchmark}`)
|
| 849 |
+
.style('opacity', 0.9)
|
| 850 |
+
.style('cursor', 'pointer');
|
| 851 |
+
|
| 852 |
+
// Add markers at every data point for step chart
|
| 853 |
+
g.selectAll(`.marker-${benchmark}`)
|
| 854 |
+
.data(sortedPoints)
|
| 855 |
+
.enter().append('circle')
|
| 856 |
+
.attr('class', `marker-${benchmark}`)
|
| 857 |
+
.attr('cx', d => xScale(new Date(d.date)))
|
| 858 |
+
.attr('cy', d => yScale(d.score))
|
| 859 |
+
.attr('r', 3.5)
|
| 860 |
+
.attr('fill', color)
|
| 861 |
+
.attr('stroke', 'none')
|
| 862 |
+
.style('cursor', 'pointer')
|
| 863 |
+
.on('mouseenter', function(ev, d) {
|
| 864 |
+
showPointTooltip(ev, d, displayName, color);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 865 |
})
|
| 866 |
+
.on('mouseleave', function() {
|
| 867 |
+
hidePointTooltip();
|
| 868 |
+
});
|
| 869 |
+
|
| 870 |
+
// Hover sur la ligne - afficher tooltip avec le nom du benchmark
|
| 871 |
+
path.on('mouseenter', function(ev) {
|
| 872 |
+
highlightBenchmark(displayName);
|
| 873 |
+
showLineTooltip(ev, displayName, color);
|
| 874 |
+
}).on('mouseleave', function() {
|
| 875 |
+
resetHighlight();
|
| 876 |
+
hideLineTooltip();
|
| 877 |
+
});
|
| 878 |
+
});
|
| 879 |
+
|
| 880 |
+
// X axis
|
| 881 |
+
const xAxis = d3.axisBottom(xScale)
|
| 882 |
+
.ticks(6)
|
| 883 |
+
.tickFormat(d3.timeFormat('%b %Y'))
|
| 884 |
+
.tickSizeOuter(0)
|
| 885 |
+
.tickSize(6) // Taille des barres des tics
|
| 886 |
+
.tickPadding(8); // Plus d'espace entre les ticks et les labels
|
| 887 |
+
|
| 888 |
+
g.append('g')
|
| 889 |
+
.attr('class', 'axis axis-x')
|
| 890 |
+
.attr('transform', `translate(0,${innerHeight})`)
|
| 891 |
+
.call(xAxis)
|
| 892 |
+
.call(g => g.select('.domain').remove()) // Enlever la barre de bordure
|
| 893 |
+
.call(g => g.selectAll('.tick line').attr('stroke', themeColors.axisColor).attr('stroke-width', 1))
|
| 894 |
+
.call(g => g.selectAll('.tick text').attr('fill', themeColors.textColor).attr('font-size', '11px').attr('opacity', 0.6));
|
| 895 |
+
|
| 896 |
+
// Y axis
|
| 897 |
+
const yAxis = d3.axisLeft(yScale)
|
| 898 |
+
.ticks(5)
|
| 899 |
+
.tickFormat(d => d + '%')
|
| 900 |
+
.tickSizeOuter(0)
|
| 901 |
+
.tickSize(6) // Taille des barres des tics
|
| 902 |
+
.tickPadding(8); // Plus d'espace entre les ticks et les labels
|
| 903 |
+
|
| 904 |
+
g.append('g')
|
| 905 |
+
.attr('class', 'axis axis-y')
|
| 906 |
+
.call(yAxis)
|
| 907 |
+
.call(g => g.select('.domain').remove()) // Enlever la barre de bordure
|
| 908 |
+
.call(g => g.selectAll('.tick line').attr('stroke', themeColors.axisColor).attr('stroke-width', 1))
|
| 909 |
+
.call(g => g.selectAll('.tick text').attr('fill', themeColors.textColor).attr('font-size', '11px').attr('opacity', 0.6));
|
| 910 |
+
|
| 911 |
+
|
| 912 |
+
|
| 913 |
+
const legendY = innerHeight + 70; // Plus d'espace entre graphique et légende
|
| 914 |
+
|
| 915 |
+
// Créer un conteneur foreignObject pour utiliser flexbox
|
| 916 |
+
// Augmenter la hauteur pour accommoder tous les éléments (6 items * ~20px + gaps + title)
|
| 917 |
+
const legendContainer = g.append('foreignObject')
|
| 918 |
+
.attr('x', -90) // Aligné à gauche
|
| 919 |
+
.attr('y', legendY - 30)
|
| 920 |
+
.attr('width', innerWidth + margin.left + margin.right) // Toute la largeur
|
| 921 |
+
.attr('height', 200); // Hauteur suffisante pour 6 items + titre + gaps
|
| 922 |
+
|
| 923 |
+
const legendWrapper = legendContainer.append('xhtml:div')
|
| 924 |
+
.style('display', 'flex')
|
| 925 |
+
.style('flex-direction', 'column')
|
| 926 |
+
.style('align-items', 'flex-start')
|
| 927 |
+
.style('width', '100%')
|
| 928 |
+
.style('padding', '12px 54px');
|
| 929 |
+
|
| 930 |
+
// Label "legend" au-dessus
|
| 931 |
+
const legendLabel = legendWrapper.append('xhtml:div')
|
| 932 |
+
.style('font-size', '12px')
|
| 933 |
+
.style('font-weight', '600')
|
| 934 |
+
.style('color', 'var(--text-color)')
|
| 935 |
+
.style('opacity', '0.8')
|
| 936 |
+
.style('margin-bottom', '8px')
|
| 937 |
+
.text('Legend');
|
| 938 |
+
|
| 939 |
+
const legendDiv = legendWrapper.append('xhtml:div')
|
| 940 |
+
.style('display', 'flex')
|
| 941 |
+
.style('flex-direction', 'row') // Une seule ligne horizontale avec tous les groupes
|
| 942 |
+
.style('align-items', 'flex-start')
|
| 943 |
+
.style('justify-content', 'flex-start')
|
| 944 |
+
.style('gap', '30px') // Gap entre les groupes
|
| 945 |
+
.style('width', '100%')
|
| 946 |
+
.style('flex-wrap', 'wrap') // Permettre le wrap si nécessaire
|
| 947 |
+
.style('color', 'var(--text-color)') // Utiliser la variable CSS pour le dark mode
|
| 948 |
+
.style('background-color', 'transparent'); // Fond transparent
|
| 949 |
+
|
| 950 |
+
// Stocker la référence pour les mises à jour de thème
|
| 951 |
+
legendDivRef = legendWrapper;
|
| 952 |
+
|
| 953 |
+
// Filtrer les groupes pour ne garder que ceux qui ont des benchmarks présents dans les données
|
| 954 |
+
const filteredGroups = BENCHMARK_GROUPS.map(group => {
|
| 955 |
+
const availableBenchmarks = group.benchmarks.filter(benchmark => {
|
| 956 |
+
// Gérer MMLU-Pro qui est stocké comme MMLU_new
|
| 957 |
+
const dataKey = benchmark === 'MMLU-Pro' ? 'MMLU_new' : benchmark;
|
| 958 |
+
|
| 959 |
+
if (!data || !data.benchmarks) return false;
|
| 960 |
+
if (!data.benchmarks[dataKey]) return false;
|
| 961 |
+
|
| 962 |
+
const benchmarkData = data.benchmarks[dataKey];
|
| 963 |
+
return Array.isArray(benchmarkData) && benchmarkData.length > 0;
|
| 964 |
+
});
|
| 965 |
+
|
| 966 |
+
return {
|
| 967 |
+
name: group.name,
|
| 968 |
+
benchmarks: availableBenchmarks
|
| 969 |
+
};
|
| 970 |
+
}).filter(group => group.benchmarks.length > 0);
|
| 971 |
+
|
| 972 |
+
// Créer la légende par groupes (seulement les titres, détails dans tooltip)
|
| 973 |
+
filteredGroups.forEach(group => {
|
| 974 |
+
const groupColor = getGroupBaseColor(group.name); // Couleur de base du groupe
|
| 975 |
+
|
| 976 |
+
// Conteneur pour le groupe
|
| 977 |
+
const groupDiv = legendDiv.append('xhtml:div')
|
| 978 |
+
.style('display', 'flex')
|
| 979 |
+
.style('align-items', 'center')
|
| 980 |
+
.style('gap', '8px')
|
| 981 |
+
.style('cursor', 'pointer')
|
| 982 |
+
.style('position', 'relative')
|
| 983 |
+
.attr('class', `legend-group legend-group-${group.name.replace(/\s+/g, '-')}`);
|
| 984 |
+
|
| 985 |
+
// Carré de couleur
|
| 986 |
+
const groupColorSquare = groupDiv.append('xhtml:div')
|
| 987 |
+
.style('width', '14px')
|
| 988 |
+
.style('height', '14px')
|
| 989 |
+
.style('border-radius', '3px')
|
| 990 |
+
.style('background-color', groupColor)
|
| 991 |
+
.style('flex-shrink', '0');
|
| 992 |
+
|
| 993 |
+
// Titre du groupe
|
| 994 |
+
const groupTitleText = groupDiv.append('xhtml:span')
|
| 995 |
+
.style('font-size', '11px')
|
| 996 |
+
.style('font-weight', '600')
|
| 997 |
+
.style('color', 'var(--text-color)')
|
| 998 |
+
.style('opacity', '0.8')
|
| 999 |
+
.style('white-space', 'nowrap')
|
| 1000 |
+
.text(group.name);
|
| 1001 |
+
|
| 1002 |
+
// Créer un tooltip externe pour éviter les problèmes de positionnement dans foreignObject
|
| 1003 |
+
let legendTooltip = container.querySelector('.d3-legend-tooltip');
|
| 1004 |
+
if (!legendTooltip) {
|
| 1005 |
+
legendTooltip = d3.select(container).append('div')
|
| 1006 |
+
.attr('class', 'd3-legend-tooltip')
|
| 1007 |
+
.style('position', 'absolute')
|
| 1008 |
+
.style('padding', '8px 12px')
|
| 1009 |
+
.style('background', 'var(--surface-bg)')
|
| 1010 |
+
.style('border', '1px solid var(--border-color)')
|
| 1011 |
+
.style('border-radius', '6px')
|
| 1012 |
+
.style('box-shadow', '0 4px 12px rgba(0,0,0,0.15)')
|
| 1013 |
+
.style('font-size', '10px')
|
| 1014 |
+
.style('color', 'var(--text-color)')
|
| 1015 |
+
.style('white-space', 'nowrap')
|
| 1016 |
+
.style('opacity', '0')
|
| 1017 |
+
.style('pointer-events', 'none')
|
| 1018 |
+
.style('z-index', '10000')
|
| 1019 |
+
.style('transition', 'opacity 0.2s ease')
|
| 1020 |
+
.style('backdrop-filter', 'saturate(1.12) blur(8px)')
|
| 1021 |
+
.node();
|
| 1022 |
+
}
|
| 1023 |
+
|
| 1024 |
+
// Hover pour afficher le tooltip et highlight tous les benchmarks du groupe
|
| 1025 |
+
groupDiv.on('mouseenter', function(ev) {
|
| 1026 |
+
const tooltip = d3.select(legendTooltip);
|
| 1027 |
+
tooltip
|
| 1028 |
+
.text(group.benchmarks.join(', '))
|
| 1029 |
+
.style('opacity', '1');
|
| 1030 |
+
|
| 1031 |
+
// Positionner le tooltip au-dessus de l'élément
|
| 1032 |
+
const rect = this.getBoundingClientRect();
|
| 1033 |
+
const containerRect = container.getBoundingClientRect();
|
| 1034 |
+
const tooltipRect = legendTooltip.getBoundingClientRect();
|
| 1035 |
+
|
| 1036 |
+
const left = rect.left - containerRect.left + (rect.width / 2) - (tooltipRect.width / 2);
|
| 1037 |
+
const top = rect.top - containerRect.top - tooltipRect.height - 8;
|
| 1038 |
+
|
| 1039 |
+
tooltip
|
| 1040 |
+
.style('left', `${left}px`)
|
| 1041 |
+
.style('top', `${top}px`);
|
| 1042 |
+
|
| 1043 |
+
// Highlight tous les benchmarks du groupe
|
| 1044 |
+
group.benchmarks.forEach(benchmark => {
|
| 1045 |
+
const displayName = benchmark;
|
| 1046 |
+
highlightBenchmark(displayName);
|
| 1047 |
+
});
|
| 1048 |
+
}).on('mouseleave', function() {
|
| 1049 |
+
d3.select(legendTooltip).style('opacity', '0');
|
| 1050 |
+
resetHighlight();
|
| 1051 |
+
});
|
| 1052 |
+
});
|
| 1053 |
+
|
| 1054 |
+
};
|
| 1055 |
+
|
| 1056 |
+
// Hover functions pour les points
|
| 1057 |
+
let hideTipTimer = null;
|
| 1058 |
+
|
| 1059 |
+
function showPointTooltip(ev, pointData, benchmarkName, color) {
|
| 1060 |
+
if (hideTipTimer) {
|
| 1061 |
+
clearTimeout(hideTipTimer);
|
| 1062 |
+
hideTipTimer = null;
|
| 1063 |
+
}
|
| 1064 |
+
|
| 1065 |
+
// Update theme colors
|
| 1066 |
+
themeColors = getThemeColors();
|
| 1067 |
+
|
| 1068 |
+
// Extraire le nom du modèle (peut contenir du HTML)
|
| 1069 |
+
let modelName = pointData.model || 'N/A';
|
| 1070 |
+
// Si c'est du HTML, extraire juste le texte
|
| 1071 |
+
if (modelName.includes('<')) {
|
| 1072 |
+
const tempDiv = document.createElement('div');
|
| 1073 |
+
tempDiv.innerHTML = modelName;
|
| 1074 |
+
modelName = tempDiv.textContent || tempDiv.innerText || 'N/A';
|
| 1075 |
+
}
|
| 1076 |
+
|
| 1077 |
+
// Construire le contenu du tooltip avec un design amélioré et aligné à gauche
|
| 1078 |
+
const score = pointData.score.toFixed(2);
|
| 1079 |
+
let html = `
|
| 1080 |
+
<div style="display:flex;align-items:center;gap:8px;margin-bottom:12px;padding-bottom:12px;border-bottom:1px solid var(--border-color);">
|
| 1081 |
+
<span class="d3-tooltip__color-dot" style="background:${color};width:12px;height:12px;border-radius:2px;flex-shrink:0;"></span>
|
| 1082 |
+
<span style="font-weight:600;font-size:13px;color:var(--text-color);">${benchmarkName}</span>
|
| 1083 |
+
</div>
|
| 1084 |
+
<div style="margin-bottom:10px;text-align:left;">
|
| 1085 |
+
<div style="font-size:10px;color:var(--muted-color);text-transform:uppercase;letter-spacing:0.05em;margin-bottom:5px;">Score</div>
|
| 1086 |
+
<div style="font-size:18px;font-weight:700;color:var(--text-color);line-height:1.2;text-align:left;">${score}%</div>
|
| 1087 |
+
</div>
|
| 1088 |
+
<div style="text-align:left;">
|
| 1089 |
+
<div style="font-size:10px;color:var(--muted-color);text-transform:uppercase;letter-spacing:0.05em;margin-bottom:5px;">Model</div>
|
| 1090 |
+
<div style="font-size:12px;color:var(--text-color);line-height:1.5;word-break:break-word;text-align:left;">${modelName}</div>
|
| 1091 |
+
</div>
|
| 1092 |
+
`;
|
| 1093 |
+
|
| 1094 |
+
tipInner.innerHTML = html;
|
| 1095 |
+
|
| 1096 |
+
// Update tooltip background and border colors avec variables CSS
|
| 1097 |
+
tip.style.background = 'var(--surface-bg)';
|
| 1098 |
+
tip.style.borderColor = 'var(--border-color)';
|
| 1099 |
+
|
| 1100 |
+
// Positionner le tooltip par rapport au point
|
| 1101 |
+
const rect = container.getBoundingClientRect();
|
| 1102 |
+
const tipRect = tip.getBoundingClientRect();
|
| 1103 |
+
const offsetX = 15;
|
| 1104 |
+
const offsetY = -10;
|
| 1105 |
+
|
| 1106 |
+
const tipX = ev.clientX - rect.left + offsetX;
|
| 1107 |
+
const tipY = ev.clientY - rect.top + offsetY;
|
| 1108 |
+
|
| 1109 |
+
// Ajuster si le tooltip sort de l'écran
|
| 1110 |
+
const maxX = window.innerWidth - tipRect.width - 20;
|
| 1111 |
+
const maxY = window.innerHeight - tipRect.height - 20;
|
| 1112 |
+
const finalX = Math.min(tipX, maxX);
|
| 1113 |
+
const finalY = Math.max(10, Math.min(tipY, maxY));
|
| 1114 |
+
|
| 1115 |
tip.style.opacity = '1';
|
| 1116 |
+
tip.style.transform = `translate(${Math.round(finalX)}px, ${Math.round(finalY)}px)`;
|
| 1117 |
+
}
|
| 1118 |
+
|
| 1119 |
+
function hidePointTooltip() {
|
| 1120 |
+
hideTipTimer = setTimeout(() => {
|
|
|
|
|
|
|
| 1121 |
tip.style.opacity = '0';
|
| 1122 |
tip.style.transform = 'translate(-9999px, -9999px)';
|
| 1123 |
+
}, 100);
|
| 1124 |
+
}
|
| 1125 |
+
|
| 1126 |
+
// Tooltip pour les lignes (nom du benchmark uniquement)
|
| 1127 |
+
let lineTooltip = null;
|
| 1128 |
+
|
| 1129 |
+
function showLineTooltip(ev, benchmarkName, color) {
|
| 1130 |
+
if (!lineTooltip) {
|
| 1131 |
+
lineTooltip = d3.select(container).append('div')
|
| 1132 |
+
.attr('class', 'd3-line-tooltip')
|
| 1133 |
+
.style('position', 'absolute')
|
| 1134 |
+
.style('padding', '6px 10px')
|
| 1135 |
+
.style('background', 'var(--surface-bg)')
|
| 1136 |
+
.style('border', '1px solid var(--border-color)')
|
| 1137 |
+
.style('border-radius', '4px')
|
| 1138 |
+
.style('font-size', '11px')
|
| 1139 |
+
.style('font-weight', '600')
|
| 1140 |
+
.style('color', 'var(--text-color)')
|
| 1141 |
+
.style('white-space', 'nowrap')
|
| 1142 |
+
.style('opacity', '0')
|
| 1143 |
+
.style('pointer-events', 'none')
|
| 1144 |
+
.style('z-index', '10000')
|
| 1145 |
+
.style('transition', 'opacity 0.15s ease')
|
| 1146 |
+
.style('box-shadow', '0 2px 8px rgba(0,0,0,0.1)')
|
| 1147 |
+
.node();
|
| 1148 |
+
}
|
| 1149 |
+
|
| 1150 |
+
const tooltip = d3.select(lineTooltip);
|
| 1151 |
+
tooltip.text(benchmarkName);
|
| 1152 |
+
|
| 1153 |
+
const rect = container.getBoundingClientRect();
|
| 1154 |
+
const tooltipRect = lineTooltip.getBoundingClientRect();
|
| 1155 |
+
const offsetX = 10;
|
| 1156 |
+
const offsetY = -25;
|
| 1157 |
+
|
| 1158 |
+
const tipX = ev.clientX - rect.left + offsetX;
|
| 1159 |
+
const tipY = ev.clientY - rect.top + offsetY;
|
| 1160 |
+
|
| 1161 |
+
// Ajuster si le tooltip sort de l'écran
|
| 1162 |
+
const maxX = window.innerWidth - tooltipRect.width - 20;
|
| 1163 |
+
const maxY = window.innerHeight - tooltipRect.height - 20;
|
| 1164 |
+
const finalX = Math.min(tipX, maxX);
|
| 1165 |
+
const finalY = Math.max(10, Math.min(tipY, maxY));
|
| 1166 |
+
|
| 1167 |
+
tooltip
|
| 1168 |
+
.style('opacity', '1')
|
| 1169 |
+
.style('left', `${finalX}px`)
|
| 1170 |
+
.style('top', `${finalY}px`);
|
| 1171 |
+
}
|
| 1172 |
+
|
| 1173 |
+
function hideLineTooltip() {
|
| 1174 |
+
if (lineTooltip) {
|
| 1175 |
+
d3.select(lineTooltip).style('opacity', '0');
|
| 1176 |
+
}
|
| 1177 |
+
}
|
| 1178 |
|
| 1179 |
+
// Load data and render
|
| 1180 |
+
loadData().then(() => {
|
| 1181 |
// First render + resize
|
| 1182 |
if (window.ResizeObserver) {
|
| 1183 |
const ro = new ResizeObserver(() => render());
|
|
|
|
| 1185 |
} else {
|
| 1186 |
window.addEventListener('resize', render);
|
| 1187 |
}
|
| 1188 |
+
|
| 1189 |
+
// Re-render when color palette changes (if ColorPalettes supports events)
|
| 1190 |
+
if (window.ColorPalettes && typeof window.ColorPalettes.refresh === 'function') {
|
| 1191 |
+
// Update colors and re-render when palette refreshes
|
| 1192 |
+
const updateColors = () => {
|
| 1193 |
+
if (data && data.benchmarks) {
|
| 1194 |
+
updateColorMap(Object.keys(data.benchmarks).sort());
|
| 1195 |
+
render();
|
| 1196 |
+
}
|
| 1197 |
+
};
|
| 1198 |
+
// Listen for custom event if available
|
| 1199 |
+
window.addEventListener('colorpalettechange', updateColors);
|
| 1200 |
+
}
|
| 1201 |
+
|
| 1202 |
render();
|
| 1203 |
+
});
|
| 1204 |
};
|
| 1205 |
|
| 1206 |
if (document.readyState === 'loading') {
|
| 1207 |
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
|
| 1208 |
} else { ensureD3(bootstrap); }
|
| 1209 |
})();
|
| 1210 |
+
</script>
|
app/src/content/embeds/d3-mmlu-heatmap.html
ADDED
|
@@ -0,0 +1,489 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<div class="d3-mmlu-heatmap">
|
| 2 |
+
<div class="heatmap-container"></div>
|
| 3 |
+
<div class="legend-container"></div>
|
| 4 |
+
</div>
|
| 5 |
+
<style>
|
| 6 |
+
.d3-mmlu-heatmap {
|
| 7 |
+
position: relative;
|
| 8 |
+
margin: 24px 0;
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
.d3-mmlu-heatmap .heatmap-container {
|
| 12 |
+
width: 100%;
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
.d3-mmlu-heatmap .legend-container {
|
| 16 |
+
margin-top: 8px;
|
| 17 |
+
padding: 0 8px;
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
.d3-mmlu-heatmap .legend-title {
|
| 21 |
+
font-size: 12px;
|
| 22 |
+
font-weight: 600;
|
| 23 |
+
color: var(--text-color);
|
| 24 |
+
margin-bottom: 12px;
|
| 25 |
+
text-align: center;
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
.d3-mmlu-heatmap .legend-grid {
|
| 29 |
+
display: grid;
|
| 30 |
+
grid-template-columns: 1fr 1fr;
|
| 31 |
+
gap: 8px 24px;
|
| 32 |
+
font-size: 11px;
|
| 33 |
+
color: var(--text-color);
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
.d3-mmlu-heatmap .legend-column {
|
| 37 |
+
display: flex;
|
| 38 |
+
flex-direction: column;
|
| 39 |
+
gap: 8px;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
.d3-mmlu-heatmap .legend-item {
|
| 43 |
+
display: flex;
|
| 44 |
+
align-items: flex-start;
|
| 45 |
+
gap: 8px;
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
.d3-mmlu-heatmap .legend-label {
|
| 49 |
+
font-weight: 700;
|
| 50 |
+
min-width: 20px;
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
.d3-mmlu-heatmap .legend-text {
|
| 54 |
+
flex: 1;
|
| 55 |
+
line-height: 1.4;
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
.d3-mmlu-heatmap .axis-label {
|
| 59 |
+
fill: var(--text-color);
|
| 60 |
+
font-size: 11px;
|
| 61 |
+
font-weight: 600;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
.d3-mmlu-heatmap .cell-text {
|
| 65 |
+
fill: var(--text-color);
|
| 66 |
+
font-size: 10px;
|
| 67 |
+
font-weight: 600;
|
| 68 |
+
pointer-events: none;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
@media (max-width: 768px) {
|
| 72 |
+
.d3-mmlu-heatmap .legend-grid {
|
| 73 |
+
grid-template-columns: 1fr;
|
| 74 |
+
}
|
| 75 |
+
}
|
| 76 |
+
</style>
|
| 77 |
+
<script>
|
| 78 |
+
(() => {
|
| 79 |
+
// Load D3 from CDN once
|
| 80 |
+
const ensureD3 = (cb) => {
|
| 81 |
+
if (window.d3 && typeof window.d3.select === 'function') return cb();
|
| 82 |
+
let s = document.getElementById('d3-cdn-script');
|
| 83 |
+
if (!s) {
|
| 84 |
+
s = document.createElement('script');
|
| 85 |
+
s.id = 'd3-cdn-script';
|
| 86 |
+
s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
|
| 87 |
+
document.head.appendChild(s);
|
| 88 |
+
}
|
| 89 |
+
const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); };
|
| 90 |
+
s.addEventListener('load', onReady, { once: true });
|
| 91 |
+
if (window.d3) onReady();
|
| 92 |
+
};
|
| 93 |
+
|
| 94 |
+
const bootstrap = () => {
|
| 95 |
+
const scriptEl = document.currentScript;
|
| 96 |
+
let container = scriptEl ? scriptEl.previousElementSibling : null;
|
| 97 |
+
if (!(container && container.classList && container.classList.contains('d3-mmlu-heatmap'))) {
|
| 98 |
+
const cs = Array.from(document.querySelectorAll('.d3-mmlu-heatmap')).filter(el => !(el.dataset && el.dataset.mounted === 'true'));
|
| 99 |
+
container = cs[cs.length - 1] || null;
|
| 100 |
+
}
|
| 101 |
+
if (!container) return;
|
| 102 |
+
if (container.dataset) {
|
| 103 |
+
if (container.dataset.mounted === 'true') return;
|
| 104 |
+
container.dataset.mounted = 'true';
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
// Tooltip
|
| 108 |
+
container.style.position = container.style.position || 'relative';
|
| 109 |
+
let tip = container.querySelector('.d3-tooltip');
|
| 110 |
+
let tipInner;
|
| 111 |
+
if (!tip) {
|
| 112 |
+
tip = document.createElement('div');
|
| 113 |
+
tip.className = 'd3-tooltip';
|
| 114 |
+
Object.assign(tip.style, {
|
| 115 |
+
position: 'absolute',
|
| 116 |
+
top: '0px',
|
| 117 |
+
left: '0px',
|
| 118 |
+
transform: 'translate(-9999px, -9999px)',
|
| 119 |
+
pointerEvents: 'none',
|
| 120 |
+
padding: '8px 10px',
|
| 121 |
+
borderRadius: '8px',
|
| 122 |
+
fontSize: '12px',
|
| 123 |
+
lineHeight: '1.35',
|
| 124 |
+
border: '1px solid var(--border-color)',
|
| 125 |
+
background: 'var(--surface-bg)',
|
| 126 |
+
color: 'var(--text-color)',
|
| 127 |
+
boxShadow: '0 4px 24px rgba(0,0,0,.18)',
|
| 128 |
+
opacity: '0',
|
| 129 |
+
transition: 'opacity .12s ease'
|
| 130 |
+
});
|
| 131 |
+
tipInner = document.createElement('div');
|
| 132 |
+
tipInner.className = 'd3-tooltip__inner';
|
| 133 |
+
tipInner.style.textAlign = 'left';
|
| 134 |
+
tip.appendChild(tipInner);
|
| 135 |
+
container.appendChild(tip);
|
| 136 |
+
} else {
|
| 137 |
+
tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
// Heatmap container (no card)
|
| 141 |
+
const heatmapContainer = container.querySelector('.heatmap-container');
|
| 142 |
+
const svg = d3.select(heatmapContainer).append('svg').attr('width', '100%').style('display', 'block');
|
| 143 |
+
const defs = svg.append('defs');
|
| 144 |
+
const gRoot = svg.append('g');
|
| 145 |
+
const gCells = gRoot.append('g');
|
| 146 |
+
const gAxes = gRoot.append('g');
|
| 147 |
+
|
| 148 |
+
// Data from the image (5 models)
|
| 149 |
+
const models = [
|
| 150 |
+
'Mistral-7B-v0.1',
|
| 151 |
+
'Qwen1.5-7B',
|
| 152 |
+
'gemma-7b',
|
| 153 |
+
'phi-2',
|
| 154 |
+
'DeciLM-7B'
|
| 155 |
+
];
|
| 156 |
+
|
| 157 |
+
const promptFormats = [
|
| 158 |
+
'...? -> choice1/choice2/...',
|
| 159 |
+
'Q:...? A: -> choice1/choice2/...',
|
| 160 |
+
'Question: ...? Answer: -> choice1/choice2/...',
|
| 161 |
+
'Question: ...? Choices: ... Answer: -> choice1/choice2/...',
|
| 162 |
+
'Question: ...? Choices: A. ... Answer: -> choice1/choice2/...',
|
| 163 |
+
'Question: ...? Choices: (A) ... Answer: -> choice1/choice2/...',
|
| 164 |
+
'Question: ...? Choices: A. ... Answer: -> A/B/C/D',
|
| 165 |
+
'Question: ...? Choices: (A) Answer: -> (A)/(B)/(C)/(D)'
|
| 166 |
+
];
|
| 167 |
+
|
| 168 |
+
const matrix = [
|
| 169 |
+
[49.0, 50.5, 52.1, 54.5, 56.4, 55.4, 55.5, 57.0], // Mistral-7B-v0.1
|
| 170 |
+
[37.6, 41.8, 43.5, 47.9, 50.8, 51.2, 22.9, 47.7], // Qwen1.5-7B
|
| 171 |
+
[44.6, 48.0, 47.6, 53.5, 54.2, 54.9, 56.4, 50.7], // gemma-7b
|
| 172 |
+
[39.1, 44.3, 46.5, 46.1, 47.1, 48.4, 51.7, 45.8], // phi-2
|
| 173 |
+
[43.6, 48.9, 49.5, 51.0, 51.3, 52.0, 52.8, 52.3] // DeciLM-7B
|
| 174 |
+
];
|
| 175 |
+
|
| 176 |
+
// Colors: red to green palette (red for low, green for high)
|
| 177 |
+
const getDivergingColors = (count) => {
|
| 178 |
+
try {
|
| 179 |
+
if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') {
|
| 180 |
+
return window.ColorPalettes.getColors('diverging', count);
|
| 181 |
+
}
|
| 182 |
+
} catch (_) { }
|
| 183 |
+
// Fallback: red to green scale
|
| 184 |
+
const colors = [];
|
| 185 |
+
for (let i = 0; i < count; i++) {
|
| 186 |
+
const t = i / (count - 1);
|
| 187 |
+
// Red (low) -> Yellow (mid) -> Green (high)
|
| 188 |
+
if (t < 0.5) {
|
| 189 |
+
// Red to yellow
|
| 190 |
+
const r = 255;
|
| 191 |
+
const g = Math.round(t * 2 * 255);
|
| 192 |
+
const b = 0;
|
| 193 |
+
colors.push(`rgb(${r}, ${g}, ${b})`);
|
| 194 |
+
} else {
|
| 195 |
+
// Yellow to green
|
| 196 |
+
const t2 = (t - 0.5) * 2;
|
| 197 |
+
const r = Math.round(255 - t2 * 255);
|
| 198 |
+
const g = 255;
|
| 199 |
+
const b = 0;
|
| 200 |
+
colors.push(`rgb(${r}, ${g}, ${b})`);
|
| 201 |
+
}
|
| 202 |
+
}
|
| 203 |
+
return colors;
|
| 204 |
+
};
|
| 205 |
+
|
| 206 |
+
const palette = getDivergingColors(10);
|
| 207 |
+
|
| 208 |
+
let width = 900;
|
| 209 |
+
const margin = { top: 0, right: 0, bottom: 0, left: 100 }; // Only left margin for model names
|
| 210 |
+
|
| 211 |
+
function updateSize() {
|
| 212 |
+
width = container.clientWidth || 900;
|
| 213 |
+
|
| 214 |
+
// Calculate actual content dimensions
|
| 215 |
+
const nRows = models.length;
|
| 216 |
+
const nCols = promptFormats.length;
|
| 217 |
+
const innerWidth = width - margin.left - margin.right;
|
| 218 |
+
const maxDim = Math.max(nRows, nCols);
|
| 219 |
+
const availableSize = Math.min(innerWidth, 600);
|
| 220 |
+
const cellSize = availableSize / maxDim;
|
| 221 |
+
const gridWidth = cellSize * nCols;
|
| 222 |
+
const gridHeight = cellSize * nRows;
|
| 223 |
+
const labelsHeight = 15; // space for X-axis labels
|
| 224 |
+
|
| 225 |
+
// Calculate exact SVG dimensions needed
|
| 226 |
+
const actualWidth = margin.left + gridWidth + margin.right;
|
| 227 |
+
const actualHeight = margin.top + gridHeight + labelsHeight + margin.bottom;
|
| 228 |
+
|
| 229 |
+
svg
|
| 230 |
+
.attr('viewBox', `0 0 ${actualWidth} ${actualHeight}`)
|
| 231 |
+
.attr('preserveAspectRatio', 'xMidYMin meet')
|
| 232 |
+
.style('width', '100%')
|
| 233 |
+
.style('height', 'auto');
|
| 234 |
+
|
| 235 |
+
gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
|
| 236 |
+
return { innerWidth: gridWidth, innerHeight: gridHeight + labelsHeight };
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
function getColorScale(values, minV, maxV) {
|
| 240 |
+
// Always use the custom red-to-green palette (fallback)
|
| 241 |
+
// Don't use ColorPalettes for this specific heatmap
|
| 242 |
+
const linearScale = d3.scaleLinear()
|
| 243 |
+
.domain([minV, maxV])
|
| 244 |
+
.range([0, 1])
|
| 245 |
+
.clamp(true);
|
| 246 |
+
|
| 247 |
+
return (v) => {
|
| 248 |
+
const t = linearScale(v);
|
| 249 |
+
// Apply power transformation to emphasize extremes
|
| 250 |
+
// Values near min/max get more extreme colors
|
| 251 |
+
let transformedT;
|
| 252 |
+
if (t < 0.5) {
|
| 253 |
+
// Compress lower values, making extremes more distinct
|
| 254 |
+
transformedT = Math.pow(t * 2, 1.8) / 2;
|
| 255 |
+
} else {
|
| 256 |
+
// Expand upper values, making extremes more distinct
|
| 257 |
+
transformedT = 0.5 + Math.pow((t - 0.5) * 2, 1.8) / 2;
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
// Red to green scale: red (low scores = bad) -> yellow (mid) -> green (high scores = good)
|
| 261 |
+
// Less flashy: reduce saturation
|
| 262 |
+
if (transformedT < 0.5) {
|
| 263 |
+
// Red to yellow (less saturated)
|
| 264 |
+
const r = 220;
|
| 265 |
+
const g = Math.round(80 + transformedT * 2 * 140);
|
| 266 |
+
const b = Math.round(60 + transformedT * 2 * 40);
|
| 267 |
+
return `rgb(${r}, ${g}, ${b})`;
|
| 268 |
+
} else {
|
| 269 |
+
// Yellow to green (less saturated)
|
| 270 |
+
const t2 = (transformedT - 0.5) * 2;
|
| 271 |
+
const r = Math.round(220 - t2 * 100);
|
| 272 |
+
const g = 220;
|
| 273 |
+
const b = Math.round(100 - t2 * 60);
|
| 274 |
+
return `rgb(${r}, ${g}, ${b})`;
|
| 275 |
+
}
|
| 276 |
+
};
|
| 277 |
+
}
|
| 278 |
+
|
| 279 |
+
function chooseReadableTextColor(bgColor) {
|
| 280 |
+
try {
|
| 281 |
+
const m = String(bgColor || '').match(/rgb\(([^)]+)\)/);
|
| 282 |
+
if (!m) return '#0e1116';
|
| 283 |
+
const [r, g, b] = m[1].split(',').map(s => parseFloat(s.trim()));
|
| 284 |
+
const luminance = (0.299 * r + 0.587 * g + 0.114 * b) / 255;
|
| 285 |
+
return luminance < 0.5 ? '#ffffff' : '#0e1116';
|
| 286 |
+
} catch (_) {
|
| 287 |
+
return '#0e1116';
|
| 288 |
+
}
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
function render() {
|
| 292 |
+
const { innerWidth, innerHeight } = updateSize();
|
| 293 |
+
const nRows = models.length;
|
| 294 |
+
const nCols = promptFormats.length;
|
| 295 |
+
|
| 296 |
+
// Calculate cell size to make each cell square
|
| 297 |
+
const maxDim = Math.max(nRows, nCols);
|
| 298 |
+
const cellSize = innerWidth / maxDim;
|
| 299 |
+
const gridWidth = cellSize * nCols;
|
| 300 |
+
const gridHeight = cellSize * nRows;
|
| 301 |
+
|
| 302 |
+
const gridOffsetX = 0;
|
| 303 |
+
const gridOffsetY = 0;
|
| 304 |
+
|
| 305 |
+
const x = d3.scaleBand()
|
| 306 |
+
.domain(d3.range(nCols))
|
| 307 |
+
.range([0, gridWidth])
|
| 308 |
+
.paddingInner(0);
|
| 309 |
+
|
| 310 |
+
const y = d3.scaleBand()
|
| 311 |
+
.domain(d3.range(nRows))
|
| 312 |
+
.range([0, gridHeight])
|
| 313 |
+
.paddingInner(0);
|
| 314 |
+
|
| 315 |
+
// Flatten matrix data
|
| 316 |
+
const flatData = [];
|
| 317 |
+
let minVal = Infinity, maxVal = -Infinity;
|
| 318 |
+
for (let r = 0; r < nRows; r++) {
|
| 319 |
+
for (let c = 0; c < nCols; c++) {
|
| 320 |
+
const value = matrix[r][c];
|
| 321 |
+
if (value < minVal) minVal = value;
|
| 322 |
+
if (value > maxVal) maxVal = value;
|
| 323 |
+
flatData.push({ r, c, value, model: models[r], format: promptFormats[c] });
|
| 324 |
+
}
|
| 325 |
+
}
|
| 326 |
+
|
| 327 |
+
const colorScale = getColorScale(flatData.map(d => d.value), minVal, maxVal);
|
| 328 |
+
|
| 329 |
+
gCells.attr('transform', `translate(${gridOffsetX}, ${gridOffsetY})`);
|
| 330 |
+
|
| 331 |
+
// Add rounded corners only on the outer edges of the matrix using clipPath
|
| 332 |
+
const cornerRadius = 6;
|
| 333 |
+
defs.selectAll('#matrix-clip').remove();
|
| 334 |
+
const clipPath = defs.append('clipPath')
|
| 335 |
+
.attr('id', 'matrix-clip');
|
| 336 |
+
clipPath.append('rect')
|
| 337 |
+
.attr('x', 0)
|
| 338 |
+
.attr('y', 0)
|
| 339 |
+
.attr('width', gridWidth)
|
| 340 |
+
.attr('height', gridHeight)
|
| 341 |
+
.attr('rx', cornerRadius)
|
| 342 |
+
.attr('ry', cornerRadius);
|
| 343 |
+
|
| 344 |
+
gCells.attr('clip-path', 'url(#matrix-clip)');
|
| 345 |
+
|
| 346 |
+
const cells = gCells.selectAll('g.cell')
|
| 347 |
+
.data(flatData, d => `${d.r}-${d.c}`);
|
| 348 |
+
|
| 349 |
+
const cellsEnter = cells.enter()
|
| 350 |
+
.append('g')
|
| 351 |
+
.attr('class', 'cell');
|
| 352 |
+
|
| 353 |
+
cellsEnter.append('rect')
|
| 354 |
+
.attr('rx', 0)
|
| 355 |
+
.attr('ry', 0)
|
| 356 |
+
.on('mousemove', (event, d) => {
|
| 357 |
+
const [px, py] = d3.pointer(event, container);
|
| 358 |
+
tipInner.innerHTML = `<strong>${d.model}</strong><br/>${d.format}<br/>Score: ${d.value.toFixed(1)}`;
|
| 359 |
+
tip.style.transform = `translate(${px + 10}px, ${py + 10}px)`;
|
| 360 |
+
tip.style.opacity = '1';
|
| 361 |
+
})
|
| 362 |
+
.on('mouseleave', () => {
|
| 363 |
+
tip.style.opacity = '0';
|
| 364 |
+
});
|
| 365 |
+
|
| 366 |
+
cellsEnter.append('text')
|
| 367 |
+
.attr('class', 'cell-text')
|
| 368 |
+
.attr('text-anchor', 'middle')
|
| 369 |
+
.attr('dominant-baseline', 'middle');
|
| 370 |
+
|
| 371 |
+
const cellsMerged = cellsEnter.merge(cells);
|
| 372 |
+
|
| 373 |
+
cellsMerged.select('rect')
|
| 374 |
+
.attr('x', d => x(d.c))
|
| 375 |
+
.attr('y', d => y(d.r))
|
| 376 |
+
.attr('width', Math.max(1, x.bandwidth()))
|
| 377 |
+
.attr('height', Math.max(1, y.bandwidth()))
|
| 378 |
+
.attr('fill', d => colorScale(d.value));
|
| 379 |
+
|
| 380 |
+
cellsMerged.select('text')
|
| 381 |
+
.attr('x', d => x(d.c) + x.bandwidth() / 2)
|
| 382 |
+
.attr('y', d => y(d.r) + y.bandwidth() / 2)
|
| 383 |
+
.text(d => d.value.toFixed(1))
|
| 384 |
+
.style('fill', function(d) {
|
| 385 |
+
try {
|
| 386 |
+
const rect = this.parentNode.querySelector('rect');
|
| 387 |
+
const bg = rect ? getComputedStyle(rect).fill : colorScale(d.value);
|
| 388 |
+
return chooseReadableTextColor(bg);
|
| 389 |
+
} catch (_) {
|
| 390 |
+
return '#0e1116';
|
| 391 |
+
}
|
| 392 |
+
});
|
| 393 |
+
|
| 394 |
+
cells.exit().remove();
|
| 395 |
+
|
| 396 |
+
// Axes
|
| 397 |
+
gAxes.selectAll('*').remove();
|
| 398 |
+
gAxes.attr('transform', `translate(${gridOffsetX}, ${gridOffsetY})`);
|
| 399 |
+
|
| 400 |
+
// X-axis labels (prompt formats)
|
| 401 |
+
gAxes.append('g')
|
| 402 |
+
.selectAll('text')
|
| 403 |
+
.data(promptFormats)
|
| 404 |
+
.join('text')
|
| 405 |
+
.attr('class', 'axis-label')
|
| 406 |
+
.attr('text-anchor', 'middle')
|
| 407 |
+
.attr('x', (_, i) => x(i) + x.bandwidth() / 2)
|
| 408 |
+
.attr('y', gridHeight + 12)
|
| 409 |
+
.text((d, i) => String.fromCharCode(65 + i)); // A, B, C, D, E, F, G
|
| 410 |
+
|
| 411 |
+
// Y-axis labels (models)
|
| 412 |
+
gAxes.append('g')
|
| 413 |
+
.selectAll('text')
|
| 414 |
+
.data(models)
|
| 415 |
+
.join('text')
|
| 416 |
+
.attr('class', 'axis-label')
|
| 417 |
+
.attr('text-anchor', 'end')
|
| 418 |
+
.attr('x', -10)
|
| 419 |
+
.attr('y', (_, i) => y(i) + y.bandwidth() / 2)
|
| 420 |
+
.attr('dominant-baseline', 'middle')
|
| 421 |
+
.text(d => d);
|
| 422 |
+
|
| 423 |
+
// Update HTML legend
|
| 424 |
+
const legendContainer = container.querySelector('.legend-container');
|
| 425 |
+
legendContainer.innerHTML = '';
|
| 426 |
+
|
| 427 |
+
const legendTitle = document.createElement('div');
|
| 428 |
+
legendTitle.className = 'legend-title';
|
| 429 |
+
legendTitle.textContent = 'Prompt Formats:';
|
| 430 |
+
legendContainer.appendChild(legendTitle);
|
| 431 |
+
|
| 432 |
+
const legendGrid = document.createElement('div');
|
| 433 |
+
legendGrid.className = 'legend-grid';
|
| 434 |
+
|
| 435 |
+
// Column 1: A, B, C, D (first 4)
|
| 436 |
+
const column1 = document.createElement('div');
|
| 437 |
+
column1.className = 'legend-column';
|
| 438 |
+
|
| 439 |
+
// Column 2: E, F, G, H (last 4)
|
| 440 |
+
const column2 = document.createElement('div');
|
| 441 |
+
column2.className = 'legend-column';
|
| 442 |
+
|
| 443 |
+
promptFormats.forEach((format, i) => {
|
| 444 |
+
const item = document.createElement('div');
|
| 445 |
+
item.className = 'legend-item';
|
| 446 |
+
|
| 447 |
+
const label = document.createElement('span');
|
| 448 |
+
label.className = 'legend-label';
|
| 449 |
+
label.textContent = `${String.fromCharCode(65 + i)}.`;
|
| 450 |
+
|
| 451 |
+
const text = document.createElement('span');
|
| 452 |
+
text.className = 'legend-text';
|
| 453 |
+
text.textContent = format;
|
| 454 |
+
|
| 455 |
+
item.appendChild(label);
|
| 456 |
+
item.appendChild(text);
|
| 457 |
+
|
| 458 |
+
// First 4 go to column 1, rest go to column 2
|
| 459 |
+
if (i < 4) {
|
| 460 |
+
column1.appendChild(item);
|
| 461 |
+
} else {
|
| 462 |
+
column2.appendChild(item);
|
| 463 |
+
}
|
| 464 |
+
});
|
| 465 |
+
|
| 466 |
+
legendGrid.appendChild(column1);
|
| 467 |
+
legendGrid.appendChild(column2);
|
| 468 |
+
|
| 469 |
+
legendContainer.appendChild(legendGrid);
|
| 470 |
+
}
|
| 471 |
+
|
| 472 |
+
// Initial render + resize handling
|
| 473 |
+
render();
|
| 474 |
+
const rerender = () => render();
|
| 475 |
+
if (window.ResizeObserver) {
|
| 476 |
+
const ro = new ResizeObserver(() => rerender());
|
| 477 |
+
ro.observe(container);
|
| 478 |
+
} else {
|
| 479 |
+
window.addEventListener('resize', rerender);
|
| 480 |
+
}
|
| 481 |
+
};
|
| 482 |
+
|
| 483 |
+
if (document.readyState === 'loading') {
|
| 484 |
+
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
|
| 485 |
+
} else {
|
| 486 |
+
ensureD3(bootstrap);
|
| 487 |
+
}
|
| 488 |
+
})();
|
| 489 |
+
</script>
|
app/src/content/embeds/d3-two-lines-chart.html
CHANGED
|
@@ -572,8 +572,33 @@
|
|
| 572 |
.attr('d', d => line(applySmoothing(d.values, smoothEnabled)));
|
| 573 |
|
| 574 |
// Update axes
|
| 575 |
-
|
| 576 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 577 |
|
| 578 |
// Update baseline position
|
| 579 |
if (baseline !== null) {
|
|
@@ -639,7 +664,26 @@
|
|
| 639 |
// Create smart formatters
|
| 640 |
const stepValues = allData.map(d => d.step);
|
| 641 |
const metricValues = allData.map(d => d.value);
|
| 642 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 643 |
formatValue = createSmartFormatter(metricValues);
|
| 644 |
|
| 645 |
// Update clip
|
|
@@ -660,10 +704,38 @@
|
|
| 660 |
|
| 661 |
// Axes
|
| 662 |
gAxes.selectAll('*').remove();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 663 |
gAxes.append('g').attr('class', 'x-axis').attr('transform', `translate(0,${innerHeight})`)
|
| 664 |
-
.call(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 665 |
gAxes.append('g').attr('class', 'y-axis')
|
| 666 |
-
.call(d3.axisLeft(yScale).ticks(5).tickSizeOuter(0).tickFormat(
|
| 667 |
gAxes.selectAll('.domain, .tick line').attr('stroke', 'var(--axis-color)');
|
| 668 |
gAxes.selectAll('text').attr('fill', 'var(--tick-color)');
|
| 669 |
|
|
@@ -940,7 +1012,7 @@
|
|
| 940 |
const mean = values.reduce((a, b) => a + b, 0) / values.length;
|
| 941 |
allData.push({
|
| 942 |
run: runName,
|
| 943 |
-
step: parseFloat(tokenKey)
|
| 944 |
value: mean
|
| 945 |
});
|
| 946 |
});
|
|
|
|
| 572 |
.attr('d', d => line(applySmoothing(d.values, smoothEnabled)));
|
| 573 |
|
| 574 |
// Update axes
|
| 575 |
+
const newXTicks = newXScale.ticks(5);
|
| 576 |
+
const newXAxis = d3.axisBottom(newXScale)
|
| 577 |
+
.tickValues(newXTicks)
|
| 578 |
+
.tickSizeOuter(0)
|
| 579 |
+
.tickFormat(formatStep);
|
| 580 |
+
gAxes.select('.x-axis').call(newXAxis);
|
| 581 |
+
|
| 582 |
+
// Format Y axis to round appropriately based on value magnitude
|
| 583 |
+
const formatValueRounded = (v) => {
|
| 584 |
+
if (v === 0) return '0';
|
| 585 |
+
// Determine rounding precision based on value magnitude
|
| 586 |
+
if (v < 1) {
|
| 587 |
+
// For values < 1, round to nearest 0.1
|
| 588 |
+
return d3.format('.1f')(Math.round(v * 10) / 10);
|
| 589 |
+
} else if (v < 10) {
|
| 590 |
+
// For values 1-10, round to nearest 1
|
| 591 |
+
return d3.format('d')(Math.round(v));
|
| 592 |
+
} else if (v < 100) {
|
| 593 |
+
// For values 10-100, round to nearest 10
|
| 594 |
+
return d3.format('d')(Math.round(v / 10) * 10);
|
| 595 |
+
} else {
|
| 596 |
+
// For larger values, round to nearest 10
|
| 597 |
+
return d3.format('d')(Math.round(v / 10) * 10);
|
| 598 |
+
}
|
| 599 |
+
};
|
| 600 |
+
|
| 601 |
+
gAxes.select('.y-axis').call(d3.axisLeft(newYScale).ticks(5).tickSizeOuter(0).tickFormat(formatValueRounded));
|
| 602 |
|
| 603 |
// Update baseline position
|
| 604 |
if (baseline !== null) {
|
|
|
|
| 664 |
// Create smart formatters
|
| 665 |
const stepValues = allData.map(d => d.step);
|
| 666 |
const metricValues = allData.map(d => d.value);
|
| 667 |
+
|
| 668 |
+
// For X axis (tokens already in billions), use a specific formatter
|
| 669 |
+
const stepMin = d3.min(stepValues);
|
| 670 |
+
const stepMax = d3.max(stepValues);
|
| 671 |
+
|
| 672 |
+
// Tokens are already in billions, format appropriately
|
| 673 |
+
formatStep = (v) => {
|
| 674 |
+
if (v === 0) return '0';
|
| 675 |
+
// Format with appropriate precision based on value
|
| 676 |
+
if (v < 0.01) return d3.format('.3f')(v);
|
| 677 |
+
if (v < 0.1) return d3.format('.2f')(v);
|
| 678 |
+
if (v < 1) return d3.format('.2f')(v);
|
| 679 |
+
if (v < 10) return d3.format('.1f')(v);
|
| 680 |
+
// For larger values, check if integer
|
| 681 |
+
if (Math.abs(v - Math.round(v)) < 0.01) {
|
| 682 |
+
return d3.format('d')(Math.round(v));
|
| 683 |
+
}
|
| 684 |
+
return d3.format('.1f')(v);
|
| 685 |
+
};
|
| 686 |
+
|
| 687 |
formatValue = createSmartFormatter(metricValues);
|
| 688 |
|
| 689 |
// Update clip
|
|
|
|
| 704 |
|
| 705 |
// Axes
|
| 706 |
gAxes.selectAll('*').remove();
|
| 707 |
+
|
| 708 |
+
// Generate explicit ticks for X axis
|
| 709 |
+
const xTicks = xScale.ticks(5);
|
| 710 |
+
const xAxis = d3.axisBottom(xScale)
|
| 711 |
+
.tickValues(xTicks)
|
| 712 |
+
.tickSizeOuter(0)
|
| 713 |
+
.tickFormat(formatStep);
|
| 714 |
+
|
| 715 |
gAxes.append('g').attr('class', 'x-axis').attr('transform', `translate(0,${innerHeight})`)
|
| 716 |
+
.call(xAxis);
|
| 717 |
+
|
| 718 |
+
// Format Y axis to round appropriately based on value magnitude
|
| 719 |
+
const formatValueRounded = (v) => {
|
| 720 |
+
if (v === 0) return '0';
|
| 721 |
+
// Determine rounding precision based on value magnitude
|
| 722 |
+
if (v < 1) {
|
| 723 |
+
// For values < 1, round to nearest 0.1
|
| 724 |
+
return d3.format('.1f')(Math.round(v * 10) / 10);
|
| 725 |
+
} else if (v < 10) {
|
| 726 |
+
// For values 1-10, round to nearest 1
|
| 727 |
+
return d3.format('d')(Math.round(v));
|
| 728 |
+
} else if (v < 100) {
|
| 729 |
+
// For values 10-100, round to nearest 10
|
| 730 |
+
return d3.format('d')(Math.round(v / 10) * 10);
|
| 731 |
+
} else {
|
| 732 |
+
// For larger values, round to nearest 10
|
| 733 |
+
return d3.format('d')(Math.round(v / 10) * 10);
|
| 734 |
+
}
|
| 735 |
+
};
|
| 736 |
+
|
| 737 |
gAxes.append('g').attr('class', 'y-axis')
|
| 738 |
+
.call(d3.axisLeft(yScale).ticks(5).tickSizeOuter(0).tickFormat(formatValueRounded));
|
| 739 |
gAxes.selectAll('.domain, .tick line').attr('stroke', 'var(--axis-color)');
|
| 740 |
gAxes.selectAll('text').attr('fill', 'var(--tick-color)');
|
| 741 |
|
|
|
|
| 1012 |
const mean = values.reduce((a, b) => a + b, 0) / values.length;
|
| 1013 |
allData.push({
|
| 1014 |
run: runName,
|
| 1015 |
+
step: parseFloat(tokenKey), // Tokens are already in billions in CSV
|
| 1016 |
value: mean
|
| 1017 |
});
|
| 1018 |
});
|