|
|
#!/usr/bin/env node |
|
|
|
|
|
import fs from 'fs/promises'; |
|
|
import path from 'path'; |
|
|
import { fileURLToPath } from 'url'; |
|
|
import sharp from 'sharp'; |
|
|
import { UMAP } from 'umap-js'; |
|
|
import { Matrix } from 'ml-matrix'; |
|
|
import cliProgress from 'cli-progress'; |
|
|
import chalk from 'chalk'; |
|
|
|
|
|
const __filename = fileURLToPath(import.meta.url); |
|
|
const __dirname = path.dirname(__filename); |
|
|
|
|
|
|
|
|
const PNGS_DIR = path.join(__dirname, 'output', 'pngs'); |
|
|
const DATA_DIR = path.join(__dirname, 'output', 'data'); |
|
|
const FONT_INDEX_PATH = path.join(__dirname, 'input', 'font-index.json'); |
|
|
const OUTPUT_FILENAME = 'typography_data_embeddings.json'; |
|
|
const FULL_OUTPUT_PATH = path.join(DATA_DIR, OUTPUT_FILENAME); |
|
|
|
|
|
|
|
|
const UMAP_PARAMS = { |
|
|
nComponents: 2, |
|
|
nNeighbors: 15, |
|
|
minDist: 1.0, |
|
|
metric: 'euclidean', |
|
|
random: Math.random |
|
|
}; |
|
|
|
|
|
|
|
|
const CATEGORY_WEIGHT = 0.2; |
|
|
const EMBEDDING_WEIGHT = 0.8; |
|
|
|
|
|
|
|
|
const ENABLE_FONT_FUSION = true; |
|
|
const FUSION_PREFIX_LENGTH = 2; |
|
|
|
|
|
|
|
|
const progressBar = new cliProgress.SingleBar({ |
|
|
format: chalk.cyan('{bar}') + ' | {percentage}% | {value}/{total} | {fontName}', |
|
|
barCompleteChar: '\u2588', |
|
|
barIncompleteChar: '\u2591', |
|
|
hideCursor: true |
|
|
}); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async function initializeFeatureExtraction() { |
|
|
console.log(chalk.blue('🔄 Initialisation du système d\'extraction de caractéristiques visuelles...')); |
|
|
console.log(chalk.green('✅ Système d\'extraction de caractéristiques prêt')); |
|
|
return true; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async function generateImageEmbedding(pngPath, fontId) { |
|
|
try { |
|
|
|
|
|
const image = sharp(pngPath); |
|
|
const { data, info } = await image.grayscale().raw().toBuffer({ resolveWithObject: true }); |
|
|
|
|
|
|
|
|
if (info.width !== 40 || info.height !== 40) { |
|
|
console.warn(`⚠️ Taille inattendue pour ${pngPath}: ${info.width}x${info.height}`); |
|
|
return null; |
|
|
} |
|
|
|
|
|
|
|
|
const features = extractVisualFeatures(data, info.width, info.height); |
|
|
|
|
|
console.log(chalk.blue(` 📊 Embedding généré pour ${fontId}: ${features.length} dimensions`)); |
|
|
|
|
|
return features; |
|
|
|
|
|
} catch (error) { |
|
|
console.error(`❌ Erreur lors de la génération de l'embedding pour ${fontId}:`, error.message); |
|
|
return null; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function extractVisualFeatures(imageData, width, height) { |
|
|
const features = []; |
|
|
|
|
|
|
|
|
const histogram = new Array(16).fill(0); |
|
|
for (let i = 0; i < imageData.length; i++) { |
|
|
const bin = Math.floor(imageData[i] / 16); |
|
|
histogram[bin]++; |
|
|
} |
|
|
|
|
|
const totalPixels = imageData.length; |
|
|
features.push(...histogram.map(count => count / totalPixels)); |
|
|
|
|
|
|
|
|
let sum = 0; |
|
|
let sumSquared = 0; |
|
|
for (let i = 0; i < imageData.length; i++) { |
|
|
sum += imageData[i]; |
|
|
sumSquared += imageData[i] * imageData[i]; |
|
|
} |
|
|
const mean = sum / totalPixels; |
|
|
const variance = (sumSquared / totalPixels) - (mean * mean); |
|
|
const stdDev = Math.sqrt(variance); |
|
|
|
|
|
features.push(mean / 255, stdDev / 255); |
|
|
|
|
|
|
|
|
let momentX = 0, momentY = 0, momentXX = 0, momentYY = 0, momentXY = 0; |
|
|
let mass = 0; |
|
|
|
|
|
for (let y = 0; y < height; y++) { |
|
|
for (let x = 0; x < width; x++) { |
|
|
const pixel = imageData[y * width + x]; |
|
|
const weight = (255 - pixel) / 255; |
|
|
|
|
|
mass += weight; |
|
|
momentX += x * weight; |
|
|
momentY += y * weight; |
|
|
momentXX += x * x * weight; |
|
|
momentYY += y * y * weight; |
|
|
momentXY += x * y * weight; |
|
|
} |
|
|
} |
|
|
|
|
|
if (mass > 0) { |
|
|
const centerX = momentX / mass; |
|
|
const centerY = momentY / mass; |
|
|
const normalizedCenterX = centerX / width; |
|
|
const normalizedCenterY = centerY / height; |
|
|
|
|
|
features.push(normalizedCenterX, normalizedCenterY); |
|
|
|
|
|
|
|
|
const muXX = (momentXX / mass) - (centerX * centerX); |
|
|
const muYY = (momentYY / mass) - (centerY * centerY); |
|
|
const muXY = (momentXY / mass) - (centerX * centerY); |
|
|
|
|
|
features.push(muXX / (width * width), muYY / (height * height), muXY / (width * height)); |
|
|
} else { |
|
|
features.push(0, 0, 0, 0, 0); |
|
|
} |
|
|
|
|
|
|
|
|
let blackPixels = 0; |
|
|
let edgePixels = 0; |
|
|
|
|
|
for (let i = 0; i < imageData.length; i++) { |
|
|
if (imageData[i] < 128) blackPixels++; |
|
|
} |
|
|
|
|
|
|
|
|
for (let y = 1; y < height - 1; y++) { |
|
|
for (let x = 1; x < width - 1; x++) { |
|
|
const center = imageData[y * width + x]; |
|
|
const right = imageData[y * width + (x + 1)]; |
|
|
const down = imageData[(y + 1) * width + x]; |
|
|
|
|
|
const gradientX = Math.abs(right - center); |
|
|
const gradientY = Math.abs(down - center); |
|
|
const gradient = Math.sqrt(gradientX * gradientX + gradientY * gradientY); |
|
|
|
|
|
if (gradient > 50) edgePixels++; |
|
|
} |
|
|
} |
|
|
|
|
|
features.push(blackPixels / totalPixels, edgePixels / totalPixels); |
|
|
|
|
|
|
|
|
let horizontalSymmetry = 0; |
|
|
let verticalSymmetry = 0; |
|
|
|
|
|
for (let y = 0; y < height; y++) { |
|
|
for (let x = 0; x < width / 2; x++) { |
|
|
const left = imageData[y * width + x]; |
|
|
const right = imageData[y * width + (width - 1 - x)]; |
|
|
horizontalSymmetry += Math.abs(left - right); |
|
|
} |
|
|
} |
|
|
|
|
|
for (let y = 0; y < height / 2; y++) { |
|
|
for (let x = 0; x < width; x++) { |
|
|
const top = imageData[y * width + x]; |
|
|
const bottom = imageData[(height - 1 - y) * width + x]; |
|
|
verticalSymmetry += Math.abs(top - bottom); |
|
|
} |
|
|
} |
|
|
|
|
|
features.push(horizontalSymmetry / (totalPixels / 2), verticalSymmetry / (totalPixels / 2)); |
|
|
|
|
|
return features; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function extractFusionPrefix(fontId, fontData, maxDashes = FUSION_PREFIX_LENGTH) { |
|
|
const parts = fontId.split('-'); |
|
|
if (parts.length <= 1) { |
|
|
return fontId; |
|
|
} |
|
|
|
|
|
|
|
|
if (fontData && fontData.subsets && Array.isArray(fontData.subsets)) { |
|
|
for (const subset of fontData.subsets) { |
|
|
if (['latin', 'latin-ext', 'cyrillic', 'cyrillic-ext', 'greek', 'greek-ext'].includes(subset)) { |
|
|
continue; |
|
|
} |
|
|
|
|
|
if (fontId.includes(subset)) { |
|
|
const baseName = fontId.replace(`-${subset}`, '').replace(subset, ''); |
|
|
if (baseName && baseName !== fontId) { |
|
|
console.log(chalk.yellow(` 🔍 ${fontId} → ${baseName} (subset: ${subset})`)); |
|
|
return baseName; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const specialCases = { |
|
|
'baloo': ['baloo-2', 'baloo-bhai-2', 'baloo-bhaijaan-2', 'baloo-bhaina-2', 'baloo-chettan-2', 'baloo-da-2', 'baloo-paaji-2', 'baloo-tamma-2', 'baloo-tammudu-2', 'baloo-thambi-2'], |
|
|
'ibm-plex': ['ibm-plex'], |
|
|
'playwrite': ['playwrite'] |
|
|
}; |
|
|
|
|
|
for (const [familyPrefix, patterns] of Object.entries(specialCases)) { |
|
|
for (const pattern of patterns) { |
|
|
if (fontId.startsWith(pattern)) { |
|
|
return familyPrefix; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
if (fontId.startsWith('noto-serif-')) { |
|
|
return 'noto-serif'; |
|
|
} |
|
|
if (fontId.startsWith('noto-')) { |
|
|
return 'noto'; |
|
|
} |
|
|
|
|
|
const secondWord = parts[1]; |
|
|
if (secondWord === 'sans' || secondWord === 'serif' || secondWord === 'plex') { |
|
|
return parts.slice(0, 2).join('-'); |
|
|
} |
|
|
|
|
|
return parts[0]; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function mergeFontFamilies(fontDataList, embeddingMatrices) { |
|
|
if (!ENABLE_FONT_FUSION) { |
|
|
return { fontDataList, embeddingMatrices }; |
|
|
} |
|
|
|
|
|
console.log(chalk.blue('🔄 Fusion des familles de polices...')); |
|
|
|
|
|
const prefixGroups = {}; |
|
|
const prefixEmbeddingGroups = {}; |
|
|
|
|
|
|
|
|
for (let i = 0; i < fontDataList.length; i++) { |
|
|
const font = fontDataList[i]; |
|
|
const prefix = extractFusionPrefix(font.id, font); |
|
|
|
|
|
if (!prefixGroups[prefix]) { |
|
|
prefixGroups[prefix] = []; |
|
|
prefixEmbeddingGroups[prefix] = []; |
|
|
} |
|
|
|
|
|
prefixGroups[prefix].push(font); |
|
|
prefixEmbeddingGroups[prefix].push(embeddingMatrices[i]); |
|
|
} |
|
|
|
|
|
|
|
|
const mergedFonts = []; |
|
|
const mergedEmbeddings = []; |
|
|
let fusionCount = 0; |
|
|
let totalReduction = 0; |
|
|
|
|
|
for (const [prefix, fonts] of Object.entries(prefixGroups)) { |
|
|
if (fonts.length > 1) { |
|
|
|
|
|
let representativeFont; |
|
|
let representativeEmbedding; |
|
|
|
|
|
if (prefix === 'noto') { |
|
|
representativeFont = fonts.find(f => f.id === 'noto-sans-arabic') || |
|
|
fonts.find(f => f.id === 'noto-sans-latin') || |
|
|
fonts.find(f => f.id === 'noto-sans') || |
|
|
fonts[0]; |
|
|
} else if (prefix === 'noto-serif') { |
|
|
representativeFont = fonts.find(f => f.id === 'noto-serif-latin') || |
|
|
fonts.find(f => f.id === 'noto-serif') || |
|
|
fonts[0]; |
|
|
} else if (prefix === 'ibm-plex') { |
|
|
representativeFont = fonts.find(f => f.id === 'ibm-plex-sans') || |
|
|
fonts.find(f => f.id === 'ibm-plex') || |
|
|
fonts[0]; |
|
|
} else if (prefix === 'baloo') { |
|
|
representativeFont = fonts.find(f => f.id === 'baloo-2') || fonts[0]; |
|
|
} else { |
|
|
representativeFont = fonts.find(f => f.id.includes('-regular') || f.id.includes('-normal')) || |
|
|
fonts.find(f => !f.id.includes('-italic') && !f.id.includes('-bold')) || |
|
|
fonts[0]; |
|
|
} |
|
|
|
|
|
|
|
|
const representativeIndex = fonts.findIndex(f => f.id === representativeFont.id); |
|
|
representativeEmbedding = prefixEmbeddingGroups[prefix][representativeIndex]; |
|
|
|
|
|
|
|
|
const allWeights = [...new Set(fonts.flatMap(f => f.weights || []))].sort((a, b) => a - b); |
|
|
const allStyles = [...new Set(fonts.flatMap(f => f.styles || []))].sort(); |
|
|
const allSubsets = [...new Set(fonts.flatMap(f => f.subsets || []))].sort(); |
|
|
|
|
|
|
|
|
const mergedFont = { |
|
|
...representativeFont, |
|
|
id: prefix, |
|
|
name: prefix.replace(/-/g, ' ').replace(/\b\w/g, l => l.toUpperCase()), |
|
|
imageName: representativeFont.id, |
|
|
weights: allWeights, |
|
|
styles: allStyles, |
|
|
subsets: allSubsets, |
|
|
originalVariants: fonts.map(f => ({ |
|
|
id: f.id, |
|
|
name: f.name, |
|
|
google_fonts_url: f.google_fonts_url, |
|
|
weights: f.weights || [], |
|
|
styles: f.styles || [] |
|
|
})), |
|
|
variantCount: fonts.length, |
|
|
fusionInfo: { |
|
|
merged: true, |
|
|
originalCount: fonts.length, |
|
|
representative: representativeFont.id, |
|
|
representativeName: representativeFont.name, |
|
|
variants: fonts.map(f => f.id), |
|
|
selectionMethod: 'representative_embedding_with_metadata_aggregation' |
|
|
} |
|
|
}; |
|
|
|
|
|
mergedFonts.push(mergedFont); |
|
|
mergedEmbeddings.push(representativeEmbedding); |
|
|
|
|
|
fusionCount++; |
|
|
totalReduction += fonts.length - 1; |
|
|
|
|
|
console.log(chalk.green(` ✓ ${prefix}: ${fonts.length} variantes → 1 famille (embedding représentatif)`)); |
|
|
} else { |
|
|
|
|
|
const singleFont = { |
|
|
...fonts[0], |
|
|
imageName: fonts[0].id |
|
|
}; |
|
|
mergedFonts.push(singleFont); |
|
|
mergedEmbeddings.push(prefixEmbeddingGroups[prefix][0]); |
|
|
} |
|
|
} |
|
|
|
|
|
console.log(chalk.green(`✅ Fusion terminée: ${fusionCount} familles fusionnées, ${totalReduction} polices supprimées`)); |
|
|
console.log(chalk.cyan(`📊 Résultat: ${fontDataList.length} → ${mergedFonts.length} polices (${((totalReduction / fontDataList.length) * 100).toFixed(1)}% de réduction)`)); |
|
|
|
|
|
return { |
|
|
fontDataList: mergedFonts, |
|
|
embeddingMatrices: mergedEmbeddings |
|
|
}; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function extractFontInfoFromFilename(filename, fontIndexData) { |
|
|
const fontId = filename.replace('.png', '').replace('_a', ''); |
|
|
const fontData = fontIndexData[fontId]; |
|
|
|
|
|
if (!fontData) { |
|
|
console.warn(`⚠️ Police non trouvée dans l'index: ${fontId}`); |
|
|
const fontName = fontId.replace(/-/g, ' ').replace(/\b\w/g, l => l.toUpperCase()); |
|
|
const googleFontsUrl = `https://fonts.google.com/specimen/${fontName.replace(/\s+/g, '+')}`; |
|
|
|
|
|
return { |
|
|
name: fontName, |
|
|
id: fontId, |
|
|
imageName: fontId, |
|
|
family: "sans-serif", |
|
|
google_fonts_url: googleFontsUrl |
|
|
}; |
|
|
} |
|
|
|
|
|
const fontName = fontId; |
|
|
const category = fontData.category; |
|
|
const googleFontsUrl = `https://fonts.google.com/specimen/${fontData.family.replace(/\s+/g, '+')}`; |
|
|
|
|
|
return { |
|
|
name: fontName, |
|
|
id: fontId, |
|
|
imageName: fontId, |
|
|
family: category, |
|
|
google_fonts_url: googleFontsUrl, |
|
|
weights: fontData.weights || [], |
|
|
styles: fontData.styles || [], |
|
|
subsets: fontData.subsets || [], |
|
|
unicodeRange: fontData.unicodeRange || {} |
|
|
}; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function normalizeData(data) { |
|
|
const matrix = new Matrix(data); |
|
|
const means = matrix.mean('column'); |
|
|
const stds = matrix.standardDeviation('column'); |
|
|
|
|
|
|
|
|
for (let i = 0; i < stds.length; i++) { |
|
|
if (stds[i] === 0) { |
|
|
stds[i] = 1; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const normalized = matrix.clone(); |
|
|
for (let i = 0; i < normalized.rows; i++) { |
|
|
for (let j = 0; j < normalized.columns; j++) { |
|
|
normalized.set(i, j, (normalized.get(i, j) - means[j]) / stds[j]); |
|
|
} |
|
|
} |
|
|
|
|
|
return normalized.to2DArray(); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function encodeCategories(fontDataList) { |
|
|
console.log(chalk.blue('🔢 Encoding categories to numerical vectors...')); |
|
|
|
|
|
const categories = [...new Set(fontDataList.map(font => font.family))]; |
|
|
console.log(chalk.cyan(`📊 Found ${categories.length} unique categories: ${categories.join(', ')}`)); |
|
|
|
|
|
const categoryToIndex = {}; |
|
|
categories.forEach((category, index) => { |
|
|
categoryToIndex[category] = index; |
|
|
}); |
|
|
|
|
|
const encodedCategories = fontDataList.map(font => { |
|
|
const vector = new Array(categories.length).fill(0); |
|
|
const categoryIndex = categoryToIndex[font.family]; |
|
|
vector[categoryIndex] = 1; |
|
|
return vector; |
|
|
}); |
|
|
|
|
|
console.log(chalk.green(`✅ Encoded ${encodedCategories.length} fonts with ${categories.length} category dimensions`)); |
|
|
|
|
|
return { |
|
|
encodedCategories, |
|
|
categories, |
|
|
categoryToIndex |
|
|
}; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function combineEmbeddingAndCategoryData(embeddingMatrices, encodedCategories) { |
|
|
console.log(chalk.blue('🔗 Combining embedding data with category encodings...')); |
|
|
|
|
|
const combinedData = []; |
|
|
|
|
|
for (let i = 0; i < embeddingMatrices.length; i++) { |
|
|
const embeddingVector = embeddingMatrices[i]; |
|
|
const categoryVector = encodedCategories[i]; |
|
|
|
|
|
|
|
|
const combinedVector = [ |
|
|
...embeddingVector.map(e => e * EMBEDDING_WEIGHT), |
|
|
...categoryVector.map(c => c * CATEGORY_WEIGHT) |
|
|
]; |
|
|
|
|
|
combinedData.push(combinedVector); |
|
|
} |
|
|
|
|
|
console.log(chalk.green(`✅ Combined data: ${embeddingMatrices[0].length} embeddings + ${encodedCategories[0].length} categories = ${combinedData[0].length} total dimensions`)); |
|
|
console.log(chalk.cyan(`📊 Weights: ${(EMBEDDING_WEIGHT * 100).toFixed(0)}% embeddings, ${(CATEGORY_WEIGHT * 100).toFixed(0)}% categories`)); |
|
|
|
|
|
return combinedData; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async function loadAllFontDataWithEmbeddings() { |
|
|
console.log(chalk.blue('🔄 Chargement des données de polices et génération des embeddings...')); |
|
|
|
|
|
|
|
|
await fs.mkdir(DATA_DIR, { recursive: true }); |
|
|
|
|
|
|
|
|
console.log(chalk.yellow('📖 Chargement de l\'index des polices...')); |
|
|
const fontIndexData = JSON.parse(await fs.readFile(FONT_INDEX_PATH, 'utf8')); |
|
|
console.log(chalk.green(`✅ Index chargé: ${Object.keys(fontIndexData).length} polices`)); |
|
|
|
|
|
|
|
|
const files = await fs.readdir(PNGS_DIR); |
|
|
const pngFiles = files.filter(file => file.endsWith('_a.png')); |
|
|
|
|
|
if (pngFiles.length === 0) { |
|
|
throw new Error(`Aucun fichier PNG trouvé dans ${PNGS_DIR}`); |
|
|
} |
|
|
|
|
|
console.log(chalk.cyan(`📁 ${pngFiles.length} fichiers PNG trouvés`)); |
|
|
|
|
|
const fontDataList = []; |
|
|
const embeddingMatrices = []; |
|
|
let rejectedCount = 0; |
|
|
|
|
|
|
|
|
for (let i = 0; i < pngFiles.length; i++) { |
|
|
const filename = pngFiles[i]; |
|
|
const pngPath = path.join(PNGS_DIR, filename); |
|
|
|
|
|
|
|
|
const fontInfo = extractFontInfoFromFilename(filename, fontIndexData); |
|
|
|
|
|
|
|
|
const embedding = await generateImageEmbedding(pngPath, fontInfo.id); |
|
|
|
|
|
if (embedding) { |
|
|
fontDataList.push(fontInfo); |
|
|
embeddingMatrices.push(embedding); |
|
|
|
|
|
if ((i + 1) % 10 === 0) { |
|
|
console.log(chalk.yellow(`⚡ ${i + 1}/${pngFiles.length} polices traitées...`)); |
|
|
} |
|
|
} else { |
|
|
rejectedCount++; |
|
|
console.log(chalk.red(`❌ Rejeté: ${filename} (erreur d'embedding)`)); |
|
|
} |
|
|
} |
|
|
|
|
|
console.log(chalk.green(`✅ ${fontDataList.length} polices chargées avec succès`)); |
|
|
console.log(chalk.red(`❌ ${rejectedCount} polices rejetées pour erreurs d'embedding`)); |
|
|
|
|
|
console.log(chalk.blue(`📊 Matrice finale: ${embeddingMatrices.length} polices × ${embeddingMatrices[0]?.length || 0} dimensions d'embedding`)); |
|
|
|
|
|
return { fontDataList, embeddingMatrices }; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function generateUMAPEmbedding(embeddingMatrices) { |
|
|
console.log(chalk.blue('🔄 Génération des embeddings UMAP...')); |
|
|
|
|
|
|
|
|
console.log(chalk.yellow('📊 Normalisation des données...')); |
|
|
const normalizedData = normalizeData(embeddingMatrices); |
|
|
|
|
|
|
|
|
console.log(chalk.cyan(`🗺️ Application d'UMAP avec paramètres:`, UMAP_PARAMS)); |
|
|
const umap = new UMAP(UMAP_PARAMS); |
|
|
const embedding = umap.fit(normalizedData); |
|
|
|
|
|
console.log(chalk.green(`✅ UMAP terminé - Forme de l'embedding: ${embedding.length} × ${embedding[0]?.length || 0}`)); |
|
|
|
|
|
if (embedding.length > 0) { |
|
|
const xValues = embedding.map(row => row[0]); |
|
|
const yValues = embedding.map(row => row[1]); |
|
|
console.log(chalk.blue(`📊 Plage X: [${Math.min(...xValues).toFixed(2)}, ${Math.max(...xValues).toFixed(2)}]`)); |
|
|
console.log(chalk.blue(`📊 Plage Y: [${Math.min(...yValues).toFixed(2)}, ${Math.max(...yValues).toFixed(2)}]`)); |
|
|
} |
|
|
|
|
|
return embedding; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async function saveTypographyData(fontDataList, embedding, categories) { |
|
|
console.log(chalk.blue('💾 Sauvegarde des données...')); |
|
|
|
|
|
|
|
|
const finalData = []; |
|
|
for (let i = 0; i < fontDataList.length; i++) { |
|
|
const fontInfo = fontDataList[i]; |
|
|
|
|
|
const fontData = { |
|
|
...fontInfo, |
|
|
x: embedding[i][0], |
|
|
y: embedding[i][1] |
|
|
}; |
|
|
finalData.push(fontData); |
|
|
} |
|
|
|
|
|
|
|
|
const metadata = { |
|
|
generated_at: new Date().toISOString(), |
|
|
method: "umap_from_visual_features_with_category_influence_and_font_fusion", |
|
|
total_fonts: finalData.length, |
|
|
font_fusion: { |
|
|
enabled: ENABLE_FONT_FUSION, |
|
|
prefix_length: FUSION_PREFIX_LENGTH, |
|
|
fusion_method: "average_embeddings_with_metadata_aggregation" |
|
|
}, |
|
|
umap_params: UMAP_PARAMS, |
|
|
embedding_weights: { |
|
|
embedding_weight: EMBEDDING_WEIGHT, |
|
|
category_weight: CATEGORY_WEIGHT |
|
|
}, |
|
|
feature_extraction: "visual_features_analysis", |
|
|
categories: categories, |
|
|
category_count: categories.length, |
|
|
data_source: "Visual features analysis (histogram, texture, shape, symmetry) + category encoding + font family fusion" |
|
|
}; |
|
|
|
|
|
|
|
|
const outputData = { |
|
|
metadata, |
|
|
fonts: finalData |
|
|
}; |
|
|
|
|
|
|
|
|
await fs.writeFile(FULL_OUTPUT_PATH, JSON.stringify(outputData, null, 2), 'utf8'); |
|
|
|
|
|
console.log(chalk.green(`✅ Données sauvegardées dans ${FULL_OUTPUT_PATH}`)); |
|
|
|
|
|
|
|
|
const categoryStats = {}; |
|
|
for (const font of finalData) { |
|
|
const cat = font.family; |
|
|
categoryStats[cat] = (categoryStats[cat] || 0) + 1; |
|
|
} |
|
|
|
|
|
console.log(chalk.cyan('\n📊 Distribution par catégorie:')); |
|
|
for (const [cat, count] of Object.entries(categoryStats).sort(([,a], [,b]) => b - a)) { |
|
|
const percentage = ((count / finalData.length) * 100).toFixed(1); |
|
|
console.log(chalk.white(` ${cat}: ${count} polices (${percentage}%)`)); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async function main() { |
|
|
try { |
|
|
console.log(chalk.blue.bold('🎨 Génération UMAP pour la typographie à partir d\'embeddings d\'images\n')); |
|
|
|
|
|
|
|
|
const systemReady = await initializeFeatureExtraction(); |
|
|
if (!systemReady) { |
|
|
throw new Error('Impossible d\'initialiser le système d\'extraction de caractéristiques'); |
|
|
} |
|
|
|
|
|
|
|
|
const { fontDataList, embeddingMatrices } = await loadAllFontDataWithEmbeddings(); |
|
|
|
|
|
if (fontDataList.length === 0) { |
|
|
throw new Error('Aucune donnée de police valide chargée'); |
|
|
} |
|
|
|
|
|
|
|
|
const { fontDataList: mergedFontDataList, embeddingMatrices: mergedEmbeddingMatrices } = mergeFontFamilies(fontDataList, embeddingMatrices); |
|
|
|
|
|
|
|
|
const { encodedCategories, categories, categoryToIndex } = encodeCategories(mergedFontDataList); |
|
|
|
|
|
|
|
|
const combinedData = combineEmbeddingAndCategoryData(mergedEmbeddingMatrices, encodedCategories); |
|
|
|
|
|
|
|
|
const normalizedData = normalizeData(combinedData); |
|
|
|
|
|
|
|
|
const embedding = generateUMAPEmbedding(normalizedData); |
|
|
|
|
|
|
|
|
await saveTypographyData(mergedFontDataList, embedding, categories); |
|
|
|
|
|
console.log(chalk.green.bold('\n🎉 Génération UMAP avec embeddings terminée avec succès !')); |
|
|
|
|
|
} catch (error) { |
|
|
console.error(chalk.red('💥 Erreur fatale:'), error.message); |
|
|
process.exit(1); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
main(); |
|
|
|