fontmap / src /typography /new-pipe /batch-testing /batch-umap-testing.mjs
tfrere's picture
tfrere HF Staff
update
6bda4a6
#!/usr/bin/env node
import { fileURLToPath } from 'url';
import path from 'path';
import fs from 'fs/promises';
import sharp from 'sharp';
import { UMAP } from 'umap-js';
import { Matrix } from 'ml-matrix';
import cliProgress from 'cli-progress';
import chalk from 'chalk';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// Configuration des chemins
const PNGS_DIR = path.join(__dirname, '..', 'output', 'pngs');
const FONT_INDEX_PATH = path.join(__dirname, '..', 'input', 'font-index.json');
const RESULTS_DIR = path.join(__dirname, 'results');
const VISUALIZATIONS_DIR = path.join(__dirname, 'visualizations');
const CONFIGS_DIR = path.join(__dirname, 'configs');
// Configuration par défaut
const DEFAULT_CONFIG = {
// Paramètres UMAP
nNeighbors: 15,
minDist: 1.0,
metric: 'euclidean',
// Poids des embeddings
embeddingWeight: 0.8,
categoryWeight: 0.2,
// Fusion des familles
enableFontFusion: true,
fusionPrefixLength: 2,
// Nom du test
testName: 'default'
};
// Charger les configurations depuis le fichier JSON
let TEST_CONFIGS = [];
async function loadTestConfigs() {
try {
const configPath = path.join(CONFIGS_DIR, 'test-configs.json');
const configData = await fs.readFile(configPath, 'utf8');
TEST_CONFIGS = JSON.parse(configData);
console.log(chalk.blue(`📋 ${TEST_CONFIGS.length} configurations chargées depuis test-configs.json`));
} catch (error) {
console.log(chalk.yellow('⚠️ Impossible de charger test-configs.json, utilisation des configs par défaut'));
// Configurations de fallback
TEST_CONFIGS = [
{
...DEFAULT_CONFIG,
testName: 'default',
nNeighbors: 15,
minDist: 1.0,
embeddingWeight: 0.7,
categoryWeight: 0.3
}
];
}
}
// Barre de progression
const progressBar = new cliProgress.SingleBar({
format: '🔄 {testName} | {bar} | {percentage}% | {value}/{total} | ETA: {eta}s',
barCompleteChar: '\u2588',
barIncompleteChar: '\u2591',
hideCursor: true
});
/**
* Extrait des caractéristiques visuelles avancées d'une image
*/
function extractVisualFeatures(imageData, width, height) {
const features = [];
// 1. Histogramme des niveaux de gris (16 bins)
const histogram = new Array(16).fill(0);
for (let i = 0; i < imageData.length; i++) {
const bin = Math.floor(imageData[i] / 16);
histogram[bin]++;
}
const totalPixels = imageData.length;
features.push(...histogram.map(count => count / totalPixels));
// 2. Statistiques de texture
let sum = 0;
let sumSquared = 0;
for (let i = 0; i < imageData.length; i++) {
sum += imageData[i];
sumSquared += imageData[i] * imageData[i];
}
const mean = sum / totalPixels;
const variance = (sumSquared / totalPixels) - (mean * mean);
const stdDev = Math.sqrt(variance);
features.push(mean / 255, stdDev / 255);
// 3. Caractéristiques de forme (moments d'image)
let momentX = 0, momentY = 0, momentXX = 0, momentYY = 0, momentXY = 0;
let mass = 0;
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const pixel = imageData[y * width + x];
const weight = (255 - pixel) / 255;
mass += weight;
momentX += x * weight;
momentY += y * weight;
momentXX += x * x * weight;
momentYY += y * y * weight;
momentXY += x * y * weight;
}
}
if (mass > 0) {
const centerX = momentX / mass;
const centerY = momentY / mass;
const normalizedCenterX = centerX / width;
const normalizedCenterY = centerY / height;
features.push(normalizedCenterX, normalizedCenterY);
const muXX = (momentXX / mass) - (centerX * centerX);
const muYY = (momentYY / mass) - (centerY * centerY);
const muXY = (momentXY / mass) - (centerX * centerY);
features.push(muXX / (width * width), muYY / (height * height), muXY / (width * height));
} else {
features.push(0, 0, 0, 0, 0);
}
// 4. Caractéristiques de densité
let blackPixels = 0;
let edgePixels = 0;
for (let i = 0; i < imageData.length; i++) {
if (imageData[i] < 128) blackPixels++;
}
for (let y = 1; y < height - 1; y++) {
for (let x = 1; x < width - 1; x++) {
const center = imageData[y * width + x];
const right = imageData[y * width + (x + 1)];
const down = imageData[(y + 1) * width + x];
const gradientX = Math.abs(right - center);
const gradientY = Math.abs(down - center);
const gradient = Math.sqrt(gradientX * gradientX + gradientY * gradientY);
if (gradient > 50) edgePixels++;
}
}
features.push(blackPixels / totalPixels, edgePixels / totalPixels);
// 5. Caractéristiques de symétrie
let horizontalSymmetry = 0;
let verticalSymmetry = 0;
for (let y = 0; y < height; y++) {
for (let x = 0; x < width / 2; x++) {
const left = imageData[y * width + x];
const right = imageData[y * width + (width - 1 - x)];
horizontalSymmetry += Math.abs(left - right);
}
}
for (let y = 0; y < height / 2; y++) {
for (let x = 0; x < width; x++) {
const top = imageData[y * width + x];
const bottom = imageData[(height - 1 - y) * width + x];
verticalSymmetry += Math.abs(top - bottom);
}
}
features.push(horizontalSymmetry / (totalPixels / 2), verticalSymmetry / (totalPixels / 2));
return features;
}
/**
* Extrait les informations de police à partir du nom de fichier et du fichier d'index
*/
function extractFontInfoFromFilename(filename, fontIndexData) {
const fontId = filename.replace('.png', '').replace('_a', '');
const fontData = fontIndexData[fontId];
if (!fontData) {
console.warn(`⚠️ Police non trouvée dans l'index: ${fontId}`);
const fontName = fontId.replace(/-/g, ' ').replace(/\b\w/g, l => l.toUpperCase());
return {
name: fontName,
id: fontId,
family: "sans-serif"
};
}
const fontName = fontId;
const category = fontData.category;
return {
name: fontName,
id: fontId,
family: category
};
}
/**
* Génère un embedding pour une image de police
*/
async function generateImageEmbedding(pngPath, fontId) {
try {
const image = sharp(pngPath);
const { data, info } = await image.grayscale().raw().toBuffer({ resolveWithObject: true });
if (info.width !== 40 || info.height !== 40) {
return null;
}
const features = extractVisualFeatures(data, info.width, info.height);
return features;
} catch (error) {
return null;
}
}
/**
* Extrait le préfixe d'un nom de police pour la fusion
*/
function extractFusionPrefix(fontId, fontData, maxDashes = 2) {
const parts = fontId.split('-');
if (parts.length <= maxDashes) return null;
const prefix = parts.slice(0, maxDashes).join('-');
const suffix = parts.slice(maxDashes).join('-');
return { prefix, suffix };
}
/**
* Fusionne les familles de polices
*/
function mergeFontFamilies(fontDataList, embeddingMatrices, config) {
if (!config.enableFontFusion) {
return { fontDataList, embeddingMatrices };
}
const prefixGroups = {};
const prefixEmbeddingGroups = {};
// Grouper par préfixe
for (let i = 0; i < fontDataList.length; i++) {
const fontData = fontDataList[i];
const prefixInfo = extractFusionPrefix(fontData.id, fontData, config.fusionPrefixLength);
if (prefixInfo) {
const { prefix } = prefixInfo;
if (!prefixGroups[prefix]) {
prefixGroups[prefix] = [];
prefixEmbeddingGroups[prefix] = [];
}
prefixGroups[prefix].push(fontData);
prefixEmbeddingGroups[prefix].push(embeddingMatrices[i]);
}
}
const mergedFonts = [];
const mergedEmbeddings = [];
// Traiter les groupes
for (const [prefix, fonts] of Object.entries(prefixGroups)) {
if (fonts.length > 1) {
// Choisir la police représentative (celle avec le nom le plus court)
const representativeFont = fonts.reduce((prev, current) =>
current.id.length < prev.id.length ? current : prev
);
const representativeIndex = fonts.findIndex(f => f.id === representativeFont.id);
const representativeEmbedding = prefixEmbeddingGroups[prefix][representativeIndex];
// Agréger les métadonnées
const mergedFont = {
...representativeFont,
id: prefix,
name: prefix.replace(/-/g, ' ').replace(/\b\w/g, l => l.toUpperCase()),
variants: fonts.map(f => f.id),
variantCount: fonts.length
};
mergedFonts.push(mergedFont);
mergedEmbeddings.push(representativeEmbedding);
} else {
mergedFonts.push(fonts[0]);
mergedEmbeddings.push(prefixEmbeddingGroups[prefix][0]);
}
}
// Ajouter les polices non groupées
for (let i = 0; i < fontDataList.length; i++) {
const fontData = fontDataList[i];
const prefixInfo = extractFusionPrefix(fontData.id, fontData, config.fusionPrefixLength);
if (!prefixInfo) {
mergedFonts.push(fontData);
mergedEmbeddings.push(embeddingMatrices[i]);
}
}
return { fontDataList: mergedFonts, embeddingMatrices: mergedEmbeddings };
}
/**
* Charge toutes les données de polices avec embeddings
*/
async function loadAllFontDataWithEmbeddings(config) {
console.log(chalk.blue('🔄 Chargement des données de polices...'));
// Charger l'index des polices
console.log(chalk.yellow('📖 Chargement de l\'index des polices...'));
const fontIndexData = JSON.parse(await fs.readFile(FONT_INDEX_PATH, 'utf8'));
console.log(chalk.green(`✅ Index chargé: ${Object.keys(fontIndexData).length} polices`));
const pngFiles = await fs.readdir(PNGS_DIR);
const fontDataList = [];
const embeddingMatrices = [];
let processedCount = 0;
let rejectedCount = 0;
progressBar.start(pngFiles.length, 0, { testName: 'Loading' });
for (const pngFile of pngFiles) {
if (pngFile.endsWith('_a.png')) {
const pngPath = path.join(PNGS_DIR, pngFile);
// Extraire les informations de police
const fontInfo = extractFontInfoFromFilename(pngFile, fontIndexData);
const embedding = await generateImageEmbedding(pngPath, fontInfo.id);
if (embedding) {
fontDataList.push(fontInfo);
embeddingMatrices.push(embedding);
processedCount++;
} else {
rejectedCount++;
}
}
progressBar.update(processedCount + rejectedCount, { testName: 'Loading' });
}
progressBar.stop();
console.log(chalk.green(`✅ ${processedCount} polices chargées avec succès`));
if (rejectedCount > 0) {
console.log(chalk.red(`❌ ${rejectedCount} polices rejetées`));
}
return { fontDataList, embeddingMatrices };
}
/**
* Génère l'UMAP pour une configuration donnée
*/
async function generateUMAPForConfig(config) {
console.log(chalk.blue(`\n🧪 Test: ${config.testName}`));
console.log(chalk.gray(` nNeighbors: ${config.nNeighbors}, minDist: ${config.minDist}`));
console.log(chalk.gray(` Weights: ${config.embeddingWeight}/${config.categoryWeight}`));
console.log(chalk.gray(` Fusion: ${config.enableFontFusion ? 'ON' : 'OFF'}`));
// Charger les données
const { fontDataList, embeddingMatrices } = await loadAllFontDataWithEmbeddings(config);
if (fontDataList.length === 0) {
throw new Error('Aucune donnée de police valide chargée');
}
// Fusion des familles
const { fontDataList: mergedFonts, embeddingMatrices: mergedEmbeddings } =
mergeFontFamilies(fontDataList, embeddingMatrices, config);
console.log(chalk.blue(`📊 Après fusion: ${mergedFonts.length} polices`));
// Encoder les catégories (simulation basique)
const categories = ['sans-serif', 'serif', 'display', 'handwriting', 'monospace'];
const categoryEncodings = mergedFonts.map(() => [0.2, 0.2, 0.2, 0.2, 0.2]); // Distribution uniforme pour le test
// Combiner embeddings et catégories
const combinedData = mergedEmbeddings.map((embedding, i) => {
const weightedEmbedding = embedding.map(val => val * config.embeddingWeight);
const weightedCategory = categoryEncodings[i].map(val => val * config.categoryWeight);
return [...weightedEmbedding, ...weightedCategory];
});
// Normaliser les données (normalisation min-max manuelle)
const matrix = new Matrix(combinedData);
const normalizedData = [];
for (let i = 0; i < matrix.rows; i++) {
const row = matrix.getRow(i);
const min = Math.min(...row);
const max = Math.max(...row);
const range = max - min;
if (range === 0) {
normalizedData.push(row.map(() => 0));
} else {
normalizedData.push(row.map(val => (val - min) / range));
}
}
const normalizedMatrix = new Matrix(normalizedData);
// Générer UMAP
console.log(chalk.blue('🔄 Génération UMAP...'));
// Créer une fonction de random avec seed si spécifiée
let randomFunction = Math.random;
if (config.randomSeed !== undefined) {
// Simple PRNG avec seed (Linear Congruential Generator)
let seed = config.randomSeed;
randomFunction = () => {
seed = (seed * 1664525 + 1013904223) % 4294967296;
return seed / 4294967296;
};
console.log(chalk.gray(` Seed: ${config.randomSeed}`));
}
const umap = new UMAP({
nComponents: 2,
nNeighbors: config.nNeighbors,
minDist: config.minDist,
metric: config.metric,
random: randomFunction
});
const umapResult = umap.fit(normalizedMatrix.to2DArray());
// Créer les données finales
const finalData = mergedFonts.map((font, i) => ({
...font,
x: umapResult[i][0],
y: umapResult[i][1]
}));
// Calculer les statistiques
const xValues = finalData.map(d => d.x);
const yValues = finalData.map(d => d.y);
const xRange = [Math.min(...xValues), Math.max(...xValues)];
const yRange = [Math.min(...yValues), Math.max(...yValues)];
console.log(chalk.green(`✅ UMAP terminé - ${finalData.length} polices`));
console.log(chalk.gray(` Plage X: [${xRange[0].toFixed(2)}, ${xRange[1].toFixed(2)}]`));
console.log(chalk.gray(` Plage Y: [${yRange[0].toFixed(2)}, ${yRange[1].toFixed(2)}]`));
return {
config,
data: finalData,
stats: {
totalFonts: finalData.length,
xRange,
yRange,
embeddingDimensions: mergedEmbeddings[0]?.length || 0,
categoryDimensions: categories.length
}
};
}
/**
* Sauvegarde les résultats d'un test
*/
async function saveTestResults(testResult) {
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
const filename = `${testResult.config.testName}_${timestamp}.json`;
const filepath = path.join(RESULTS_DIR, filename);
const outputData = {
metadata: {
generated_at: new Date().toISOString(),
test_name: testResult.config.testName,
config: testResult.config,
stats: testResult.stats
},
fonts: testResult.data
};
await fs.writeFile(filepath, JSON.stringify(outputData, null, 2));
console.log(chalk.green(`💾 Résultats sauvegardés: ${filename}`));
return filepath;
}
/**
* Génère une visualisation HTML simple
*/
async function generateVisualization(testResult) {
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
const filename = `${testResult.config.testName}_${timestamp}.html`;
const filepath = path.join(VISUALIZATIONS_DIR, filename);
const html = `
<!DOCTYPE html>
<html lang="fr">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>UMAP Test: ${testResult.config.testName}</title>
<style>
body { font-family: Arial, sans-serif; margin: 20px; background: #f5f5f5; }
.container { max-width: 1200px; margin: 0 auto; background: white; padding: 20px; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); }
.header { text-align: center; margin-bottom: 30px; }
.config { background: #f8f9fa; padding: 15px; border-radius: 5px; margin-bottom: 20px; }
.config h3 { margin-top: 0; color: #495057; }
.config-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 10px; }
.config-item { display: flex; justify-content: space-between; padding: 5px 0; border-bottom: 1px solid #dee2e6; }
.config-item:last-child { border-bottom: none; }
.config-label { font-weight: bold; color: #6c757d; }
.config-value { color: #495057; }
.stats { background: #e3f2fd; padding: 15px; border-radius: 5px; margin-bottom: 20px; }
.stats h3 { margin-top: 0; color: #1976d2; }
.stats-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 10px; }
.stats-item { text-align: center; }
.stats-value { font-size: 24px; font-weight: bold; color: #1976d2; }
.stats-label { color: #666; font-size: 14px; }
.visualization { text-align: center; margin: 20px 0; }
.plot { border: 1px solid #ddd; border-radius: 5px; }
.info { background: #fff3cd; padding: 15px; border-radius: 5px; margin-top: 20px; }
.info h4 { margin-top: 0; color: #856404; }
</style>
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
</head>
<body>
<div class="container">
<div class="header">
<h1>🎨 UMAP Test: ${testResult.config.testName}</h1>
<p>Généré le ${new Date().toLocaleString('fr-FR')}</p>
</div>
<div class="config">
<h3>⚙️ Configuration</h3>
<div class="config-grid">
<div class="config-item">
<span class="config-label">nNeighbors:</span>
<span class="config-value">${testResult.config.nNeighbors}</span>
</div>
<div class="config-item">
<span class="config-label">minDist:</span>
<span class="config-value">${testResult.config.minDist}</span>
</div>
<div class="config-item">
<span class="config-label">metric:</span>
<span class="config-value">${testResult.config.metric}</span>
</div>
<div class="config-item">
<span class="config-label">embeddingWeight:</span>
<span class="config-value">${testResult.config.embeddingWeight}</span>
</div>
<div class="config-item">
<span class="config-label">categoryWeight:</span>
<span class="config-value">${testResult.config.categoryWeight}</span>
</div>
<div class="config-item">
<span class="config-label">fontFusion:</span>
<span class="config-value">${testResult.config.enableFontFusion ? 'ON' : 'OFF'}</span>
</div>
<div class="config-item">
<span class="config-label">fusionPrefixLength:</span>
<span class="config-value">${testResult.config.fusionPrefixLength}</span>
</div>
</div>
</div>
<div class="stats">
<h3>📊 Statistiques</h3>
<div class="stats-grid">
<div class="stats-item">
<div class="stats-value">${testResult.stats.totalFonts}</div>
<div class="stats-label">Polices</div>
</div>
<div class="stats-item">
<div class="stats-value">${testResult.stats.embeddingDimensions}</div>
<div class="stats-label">Dimensions Embedding</div>
</div>
<div class="stats-item">
<div class="stats-value">${testResult.stats.categoryDimensions}</div>
<div class="stats-label">Dimensions Catégorie</div>
</div>
<div class="stats-item">
<div class="stats-value">${testResult.stats.xRange[1].toFixed(1)}</div>
<div class="stats-label">Plage X</div>
</div>
<div class="stats-item">
<div class="stats-value">${testResult.stats.yRange[1].toFixed(1)}</div>
<div class="stats-label">Plage Y</div>
</div>
</div>
</div>
<div class="visualization">
<h3>🗺️ Visualisation UMAP</h3>
<div id="plot" class="plot"></div>
</div>
<div class="info">
<h4>💡 Comment utiliser ces résultats</h4>
<p>Cette visualisation montre la distribution des polices dans l'espace UMAP 2D.
Les polices similaires devraient être proches les unes des autres.
Utilisez le script de sélection pour déployer cette configuration dans l'application.</p>
</div>
</div>
<script>
const data = ${JSON.stringify(testResult.data)};
const trace = {
x: data.map(d => d.x),
y: data.map(d => d.y),
mode: 'markers',
type: 'scatter',
marker: {
size: 6,
color: data.map((d, i) => i),
colorscale: 'Viridis',
opacity: 0.7,
line: { width: 0.5, color: 'white' }
},
text: data.map(d => d.name),
hovertemplate: '<b>%{text}</b><br>X: %{x:.2f}<br>Y: %{y:.2f}<extra></extra>'
};
const layout = {
title: 'Distribution des polices dans l\'espace UMAP',
xaxis: { title: 'UMAP Dimension 1' },
yaxis: { title: 'UMAP Dimension 2' },
width: 800,
height: 600,
margin: { t: 60, r: 20, b: 60, l: 60 }
};
Plotly.newPlot('plot', [trace], layout);
</script>
</body>
</html>`;
await fs.writeFile(filepath, html);
console.log(chalk.green(`📊 Visualisation générée: ${filename}`));
return filepath;
}
/**
* Fonction principale
*/
async function main() {
try {
console.log(chalk.blue.bold('🧪 Batch Testing UMAP - Hyperparamètres\n'));
// Créer les dossiers si nécessaire
await fs.mkdir(RESULTS_DIR, { recursive: true });
await fs.mkdir(VISUALIZATIONS_DIR, { recursive: true });
await fs.mkdir(CONFIGS_DIR, { recursive: true });
// Charger les configurations
await loadTestConfigs();
// Nettoyer les anciens résultats
console.log(chalk.yellow('🧹 Nettoyage des anciens résultats...'));
try {
const oldResults = await fs.readdir(RESULTS_DIR);
const oldViz = await fs.readdir(VISUALIZATIONS_DIR);
const oldConfigs = await fs.readdir(CONFIGS_DIR);
for (const file of oldResults) {
await fs.unlink(path.join(RESULTS_DIR, file));
}
for (const file of oldViz) {
await fs.unlink(path.join(VISUALIZATIONS_DIR, file));
}
for (const file of oldConfigs) {
await fs.unlink(path.join(CONFIGS_DIR, file));
}
console.log(chalk.green(`✅ ${oldResults.length} résultats, ${oldViz.length} visualisations, ${oldConfigs.length} configs supprimés`));
} catch (error) {
console.log(chalk.gray(' (Aucun ancien fichier à supprimer)'));
}
console.log(chalk.blue(`\n📋 ${TEST_CONFIGS.length} configurations de test prêtes`));
const results = [];
// Exécuter tous les tests
for (let i = 0; i < TEST_CONFIGS.length; i++) {
const config = TEST_CONFIGS[i];
console.log(chalk.yellow(`\n[${i + 1}/${TEST_CONFIGS.length}]`));
try {
const testResult = await generateUMAPForConfig(config);
const resultPath = await saveTestResults(testResult);
const vizPath = await generateVisualization(testResult);
results.push({
config: config,
resultPath,
vizPath,
stats: testResult.stats
});
} catch (error) {
console.error(chalk.red(`❌ Erreur pour ${config.testName}:`), error.message);
}
}
// Générer un rapport final
console.log(chalk.blue.bold('\n📊 Rapport Final\n'));
console.log(chalk.green(`✅ ${results.length}/${TEST_CONFIGS.length} tests réussis`));
console.log(chalk.blue('\n📋 Résultats par test:'));
results.forEach((result, i) => {
console.log(chalk.gray(` ${i + 1}. ${result.config.testName}`));
console.log(chalk.gray(` Polices: ${result.stats.totalFonts}`));
console.log(chalk.gray(` Plage: [${result.stats.xRange[0].toFixed(1)}, ${result.stats.xRange[1].toFixed(1)}] x [${result.stats.yRange[0].toFixed(1)}, ${result.stats.yRange[1].toFixed(1)}]`));
});
console.log(chalk.blue('\n📁 Fichiers générés:'));
console.log(chalk.gray(` 📊 Résultats: ${RESULTS_DIR}`));
console.log(chalk.gray(` 🎨 Visualisations: ${VISUALIZATIONS_DIR}`));
console.log(chalk.gray(` ⚙️ Configurations: ${CONFIGS_DIR}`));
console.log(chalk.green.bold('\n🎉 Batch testing terminé !'));
console.log(chalk.yellow('💡 Utilisez le script de sélection pour déployer une configuration dans l\'application.'));
} catch (error) {
console.error(chalk.red('💥 Erreur fatale:'), error.message);
process.exit(1);
}
}
// Exécuter si appelé directement
if (import.meta.url === `file://${process.argv[1]}`) {
main();
}