|
|
#!/usr/bin/env node |
|
|
|
|
|
import { config } from 'dotenv'; |
|
|
import { join, dirname, basename } from 'path'; |
|
|
import { fileURLToPath } from 'url'; |
|
|
import { copyFileSync, existsSync, mkdirSync, readFileSync, writeFileSync, readdirSync, statSync, unlinkSync } from 'fs'; |
|
|
import { convertNotionToMarkdown } from './notion-converter.mjs'; |
|
|
import { convertToMdx } from './mdx-converter.mjs'; |
|
|
import { Client } from '@notionhq/client'; |
|
|
|
|
|
|
|
|
config({ override: false }); |
|
|
|
|
|
const __filename = fileURLToPath(import.meta.url); |
|
|
const __dirname = dirname(__filename); |
|
|
|
|
|
|
|
|
const DEFAULT_INPUT = join(__dirname, 'input', 'pages.json'); |
|
|
const DEFAULT_OUTPUT = join(__dirname, 'output'); |
|
|
const ASTRO_CONTENT_PATH = join(__dirname, '..', '..', 'src', 'content', 'article.mdx'); |
|
|
const ASTRO_ASSETS_PATH = join(__dirname, '..', '..', 'src', 'content', 'assets', 'image'); |
|
|
const ASTRO_BIB_PATH = join(__dirname, '..', '..', 'src', 'content', 'bibliography.bib'); |
|
|
const STATIC_BIB_PATH = join(__dirname, 'static', 'bibliography.bib'); |
|
|
|
|
|
function parseArgs() { |
|
|
const args = process.argv.slice(2); |
|
|
const config = { |
|
|
input: DEFAULT_INPUT, |
|
|
output: DEFAULT_OUTPUT, |
|
|
clean: false, |
|
|
notionOnly: false, |
|
|
mdxOnly: false, |
|
|
token: process.env.NOTION_TOKEN, |
|
|
pageId: process.env.NOTION_PAGE_ID |
|
|
}; |
|
|
|
|
|
for (const arg of args) { |
|
|
if (arg.startsWith('--input=')) { |
|
|
config.input = arg.split('=')[1]; |
|
|
} else if (arg.startsWith('--output=')) { |
|
|
config.output = arg.split('=')[1]; |
|
|
} else if (arg.startsWith('--token=')) { |
|
|
config.token = arg.split('=')[1]; |
|
|
} else if (arg.startsWith('--page-id=')) { |
|
|
config.pageId = arg.split('=')[1]; |
|
|
} else if (arg === '--clean') { |
|
|
config.clean = true; |
|
|
} else if (arg === '--notion-only') { |
|
|
config.notionOnly = true; |
|
|
} else if (arg === '--mdx-only') { |
|
|
config.mdxOnly = true; |
|
|
} |
|
|
} |
|
|
|
|
|
return config; |
|
|
} |
|
|
|
|
|
function showHelp() { |
|
|
console.log(` |
|
|
π Notion to MDX Toolkit |
|
|
|
|
|
Usage: |
|
|
node index.mjs [options] |
|
|
|
|
|
Options: |
|
|
--input=PATH Input pages configuration file (default: input/pages.json) |
|
|
--output=PATH Output directory (default: output/) |
|
|
--token=TOKEN Notion API token (or set NOTION_TOKEN env var) |
|
|
--clean Clean output directory before processing |
|
|
--notion-only Only convert Notion to Markdown (skip MDX conversion) |
|
|
--mdx-only Only convert existing Markdown to MDX |
|
|
--help, -h Show this help |
|
|
|
|
|
Environment Variables: |
|
|
NOTION_TOKEN Your Notion integration token |
|
|
|
|
|
Examples: |
|
|
# Full conversion workflow |
|
|
NOTION_TOKEN=your_token node index.mjs --clean |
|
|
|
|
|
# Only convert Notion pages to Markdown |
|
|
node index.mjs --notion-only --token=your_token |
|
|
|
|
|
# Only convert existing Markdown to MDX |
|
|
node index.mjs --mdx-only |
|
|
|
|
|
# Custom paths |
|
|
node index.mjs --input=my-pages.json --output=converted/ --token=your_token |
|
|
|
|
|
Configuration File Format (pages.json): |
|
|
{ |
|
|
"pages": [ |
|
|
{ |
|
|
"id": "your-notion-page-id", |
|
|
"title": "Page Title", |
|
|
"slug": "page-slug" |
|
|
} |
|
|
] |
|
|
} |
|
|
|
|
|
Workflow: |
|
|
1. Notion β Markdown (with media download) |
|
|
2. Markdown β MDX (with Astro components) |
|
|
3. Copy to Astro content directory |
|
|
`); |
|
|
} |
|
|
|
|
|
function ensureDirectory(dir) { |
|
|
if (!existsSync(dir)) { |
|
|
mkdirSync(dir, { recursive: true }); |
|
|
} |
|
|
} |
|
|
|
|
|
async function cleanDirectory(dir) { |
|
|
if (existsSync(dir)) { |
|
|
const { execSync } = await import('child_process'); |
|
|
execSync(`rm -rf "${dir}"/*`, { stdio: 'inherit' }); |
|
|
} |
|
|
} |
|
|
|
|
|
function readPagesConfig(inputFile) { |
|
|
try { |
|
|
const content = readFileSync(inputFile, 'utf8'); |
|
|
return JSON.parse(content); |
|
|
} catch (error) { |
|
|
console.error(`β Error reading pages config: ${error.message}`); |
|
|
return { pages: [] }; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async function createPagesConfigFromEnv(pageId, token, outputPath) { |
|
|
try { |
|
|
console.log('π Fetching page info from Notion API...'); |
|
|
const notion = new Client({ auth: token }); |
|
|
const page = await notion.pages.retrieve({ page_id: pageId }); |
|
|
|
|
|
|
|
|
let title = 'Article'; |
|
|
if (page.properties.title && page.properties.title.title && page.properties.title.title.length > 0) { |
|
|
title = page.properties.title.title[0].plain_text; |
|
|
} else if (page.properties.Name && page.properties.Name.title && page.properties.Name.title.length > 0) { |
|
|
title = page.properties.Name.title[0].plain_text; |
|
|
} |
|
|
|
|
|
|
|
|
const slug = title |
|
|
.toLowerCase() |
|
|
.replace(/[^\w\s-]/g, '') |
|
|
.replace(/\s+/g, '-') |
|
|
.replace(/-+/g, '-') |
|
|
.trim(); |
|
|
|
|
|
console.log(` β
Found page: "${title}" (slug: ${slug})`); |
|
|
|
|
|
|
|
|
const pagesConfig = { |
|
|
pages: [{ |
|
|
id: pageId, |
|
|
title: title, |
|
|
slug: slug |
|
|
}] |
|
|
}; |
|
|
|
|
|
|
|
|
writeFileSync(outputPath, JSON.stringify(pagesConfig, null, 4)); |
|
|
console.log(` β
Created temporary pages config`); |
|
|
|
|
|
return pagesConfig; |
|
|
} catch (error) { |
|
|
console.error(`β Error fetching page from Notion: ${error.message}`); |
|
|
throw error; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function cleanupExcludeTagsAndImports(content) { |
|
|
let cleanedContent = content; |
|
|
let removedCount = 0; |
|
|
const removedImageVariables = new Set(); |
|
|
|
|
|
|
|
|
const excludeBlocks = cleanedContent.match(/<exclude>[\s\S]*?<\/exclude>/g) || []; |
|
|
excludeBlocks.forEach(match => { |
|
|
const imageMatches = match.match(/src=\{([^}]+)\}/g); |
|
|
if (imageMatches) { |
|
|
imageMatches.forEach(imgMatch => { |
|
|
const varName = imgMatch.match(/src=\{([^}]+)\}/)?.[1]; |
|
|
if (varName) { |
|
|
removedImageVariables.add(varName); |
|
|
} |
|
|
}); |
|
|
} |
|
|
}); |
|
|
|
|
|
|
|
|
cleanedContent = cleanedContent.replace(/<exclude>[\s\S]*?<\/exclude>/g, (match) => { |
|
|
removedCount++; |
|
|
return ''; |
|
|
}); |
|
|
|
|
|
|
|
|
if (removedImageVariables.size > 0) { |
|
|
removedImageVariables.forEach(varName => { |
|
|
|
|
|
const remainingUsage = cleanedContent.includes(`{${varName}}`) || cleanedContent.includes(`src={${varName}}`); |
|
|
|
|
|
if (!remainingUsage) { |
|
|
|
|
|
|
|
|
const importPattern = new RegExp(`import\\s+${varName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\s+from\\s+['"][^'"]+['"];?\\s*`, 'g'); |
|
|
cleanedContent = cleanedContent.replace(importPattern, ''); |
|
|
console.log(` ποΈ Removed unused import: ${varName}`); |
|
|
} |
|
|
}); |
|
|
} |
|
|
|
|
|
if (removedCount > 0) { |
|
|
console.log(` π§Ή Final cleanup: removed ${removedCount} exclude block(s) and ${removedImageVariables.size} unused import(s)`); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
const lines = cleanedContent.split('\n'); |
|
|
let lastImportIndex = -1; |
|
|
|
|
|
|
|
|
for (let i = 0; i < lines.length; i++) { |
|
|
if (lines[i].trim().startsWith('import ') && lines[i].trim().endsWith(';')) { |
|
|
lastImportIndex = i; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if (lastImportIndex >= 0) { |
|
|
|
|
|
let nextNonEmptyIndex = lastImportIndex + 1; |
|
|
while (nextNonEmptyIndex < lines.length && lines[nextNonEmptyIndex].trim() === '') { |
|
|
nextNonEmptyIndex++; |
|
|
} |
|
|
|
|
|
|
|
|
if (nextNonEmptyIndex > lastImportIndex + 1) { |
|
|
|
|
|
} else { |
|
|
|
|
|
lines.splice(nextNonEmptyIndex, 0, ''); |
|
|
} |
|
|
|
|
|
cleanedContent = lines.join('\n'); |
|
|
} |
|
|
|
|
|
return cleanedContent; |
|
|
} |
|
|
|
|
|
function copyToAstroContent(outputDir) { |
|
|
console.log('π Copying MDX files to Astro content directory...'); |
|
|
|
|
|
try { |
|
|
|
|
|
mkdirSync(dirname(ASTRO_CONTENT_PATH), { recursive: true }); |
|
|
mkdirSync(ASTRO_ASSETS_PATH, { recursive: true }); |
|
|
|
|
|
|
|
|
const files = readdirSync(outputDir); |
|
|
const mdxFiles = files.filter(file => file.endsWith('.mdx')); |
|
|
if (mdxFiles.length > 0) { |
|
|
const mdxFile = join(outputDir, mdxFiles[0]); |
|
|
|
|
|
let mdxContent = readFileSync(mdxFile, 'utf8'); |
|
|
|
|
|
|
|
|
mdxContent = cleanupExcludeTagsAndImports(mdxContent); |
|
|
|
|
|
writeFileSync(ASTRO_CONTENT_PATH, mdxContent); |
|
|
console.log(` β
Copied and cleaned MDX to ${ASTRO_CONTENT_PATH}`); |
|
|
} |
|
|
|
|
|
|
|
|
const imageExtensions = ['.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp', '.bmp', '.tiff', '.html']; |
|
|
let totalImageCount = 0; |
|
|
|
|
|
function copyImagesRecursively(dir, sourceName) { |
|
|
if (!existsSync(dir)) return; |
|
|
|
|
|
const files = readdirSync(dir); |
|
|
for (const file of files) { |
|
|
const filePath = join(dir, file); |
|
|
const stat = statSync(filePath); |
|
|
|
|
|
if (stat.isDirectory()) { |
|
|
copyImagesRecursively(filePath, sourceName); |
|
|
} else if (imageExtensions.some(ext => file.toLowerCase().endsWith(ext))) { |
|
|
const filename = basename(filePath); |
|
|
const destPath = join(ASTRO_ASSETS_PATH, filename); |
|
|
|
|
|
try { |
|
|
|
|
|
const stats = statSync(filePath); |
|
|
if (stats.size === 0) { |
|
|
console.log(` β οΈ Skipping empty image: ${filename}`); |
|
|
return; |
|
|
} |
|
|
|
|
|
|
|
|
copyFileSync(filePath, destPath); |
|
|
|
|
|
|
|
|
const destStats = statSync(destPath); |
|
|
if (destStats.size === 0) { |
|
|
console.log(` β Failed to copy corrupted image: ${filename}`); |
|
|
|
|
|
try { |
|
|
unlinkSync(destPath); |
|
|
} catch (e) { } |
|
|
return; |
|
|
} |
|
|
|
|
|
console.log(` β
Copied ${sourceName}: ${filename} (${destStats.size} bytes)`); |
|
|
totalImageCount++; |
|
|
} catch (error) { |
|
|
console.log(` β Failed to copy ${filename}: ${error.message}`); |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const mediaDir = join(outputDir, 'media'); |
|
|
copyImagesRecursively(mediaDir, 'Notion image'); |
|
|
|
|
|
|
|
|
const externalImagesDir = join(outputDir, 'external-images'); |
|
|
copyImagesRecursively(externalImagesDir, 'external image'); |
|
|
|
|
|
if (totalImageCount > 0) { |
|
|
console.log(` β
Copied ${totalImageCount} total image(s) to ${ASTRO_ASSETS_PATH}`); |
|
|
} |
|
|
|
|
|
|
|
|
if (existsSync(ASTRO_CONTENT_PATH)) { |
|
|
const mdxContent = readFileSync(ASTRO_CONTENT_PATH, 'utf8'); |
|
|
let updatedContent = mdxContent.replace(/\.\/media\//g, './assets/image/'); |
|
|
|
|
|
updatedContent = updatedContent.replace(/\.\/assets\/image\/[^\/]+\//g, './assets/image/'); |
|
|
|
|
|
|
|
|
const imageReferences = updatedContent.match(/\.\/assets\/image\/[^\s\)]+/g) || []; |
|
|
const existingImages = existsSync(ASTRO_ASSETS_PATH) ? readdirSync(ASTRO_ASSETS_PATH).filter(f => |
|
|
['.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp', '.bmp', '.tiff'].some(ext => f.toLowerCase().endsWith(ext)) |
|
|
) : []; |
|
|
|
|
|
for (const imgRef of imageReferences) { |
|
|
const filename = basename(imgRef); |
|
|
if (!existingImages.includes(filename)) { |
|
|
console.log(` β οΈ Removing reference to missing/corrupted image: ${filename}`); |
|
|
|
|
|
updatedContent = updatedContent.replace( |
|
|
new RegExp(`<Image[^>]*src=["']${imgRef.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}["'][^>]*\/?>`, 'g'), |
|
|
'' |
|
|
); |
|
|
updatedContent = updatedContent.replace( |
|
|
new RegExp(`!\\[.*?\\]\\(${imgRef.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\)`, 'g'), |
|
|
'' |
|
|
); |
|
|
} |
|
|
} |
|
|
|
|
|
writeFileSync(ASTRO_CONTENT_PATH, updatedContent); |
|
|
console.log(` β
Updated image paths and filtered problematic references in MDX file`); |
|
|
} |
|
|
|
|
|
|
|
|
if (existsSync(STATIC_BIB_PATH)) { |
|
|
const bibContent = readFileSync(STATIC_BIB_PATH, 'utf8'); |
|
|
writeFileSync(ASTRO_BIB_PATH, bibContent); |
|
|
console.log(` β
Copied static bibliography from ${STATIC_BIB_PATH}`); |
|
|
} else { |
|
|
writeFileSync(ASTRO_BIB_PATH, ''); |
|
|
console.log(` β
Created empty bibliography (no static file found)`); |
|
|
} |
|
|
|
|
|
} catch (error) { |
|
|
console.warn(` β οΈ Failed to copy to Astro: ${error.message}`); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
async function main() { |
|
|
const args = process.argv.slice(2); |
|
|
|
|
|
if (args.includes('--help') || args.includes('-h')) { |
|
|
showHelp(); |
|
|
process.exit(0); |
|
|
} |
|
|
|
|
|
const config = parseArgs(); |
|
|
|
|
|
console.log('π Notion to MDX Toolkit'); |
|
|
console.log('========================'); |
|
|
|
|
|
try { |
|
|
|
|
|
let inputConfigFile = config.input; |
|
|
let pageIdFromEnv = null; |
|
|
|
|
|
|
|
|
if (config.pageId && config.token) { |
|
|
console.log('β¨ Using NOTION_PAGE_ID from environment variable'); |
|
|
const tempConfigPath = join(config.output, '.temp-pages.json'); |
|
|
ensureDirectory(config.output); |
|
|
await createPagesConfigFromEnv(config.pageId, config.token, tempConfigPath); |
|
|
inputConfigFile = tempConfigPath; |
|
|
pageIdFromEnv = config.pageId; |
|
|
} else if (!existsSync(config.input)) { |
|
|
console.error(`β No NOTION_PAGE_ID environment variable and no pages.json found at: ${config.input}`); |
|
|
console.log('π‘ Either set NOTION_PAGE_ID env var or create input/pages.json'); |
|
|
process.exit(1); |
|
|
} |
|
|
|
|
|
|
|
|
console.log('π§Ή Cleaning output directory to avoid conflicts...'); |
|
|
await cleanDirectory(config.output); |
|
|
|
|
|
|
|
|
console.log('π§Ή Cleaning assets/image directory and setting permissions...'); |
|
|
if (existsSync(ASTRO_ASSETS_PATH)) { |
|
|
await cleanDirectory(ASTRO_ASSETS_PATH); |
|
|
} else { |
|
|
ensureDirectory(ASTRO_ASSETS_PATH); |
|
|
} |
|
|
|
|
|
|
|
|
const { execSync } = await import('child_process'); |
|
|
try { |
|
|
execSync(`chmod -R 755 "${ASTRO_ASSETS_PATH}"`, { stdio: 'inherit' }); |
|
|
console.log(' β
Set permissions for assets/image directory'); |
|
|
} catch (error) { |
|
|
console.log(' β οΈ Could not set permissions (non-critical):', error.message); |
|
|
} |
|
|
|
|
|
if (config.mdxOnly) { |
|
|
|
|
|
console.log('π MDX conversion only mode'); |
|
|
await convertToMdx(config.output, config.output); |
|
|
copyToAstroContent(config.output); |
|
|
|
|
|
} else if (config.notionOnly) { |
|
|
|
|
|
console.log('π Notion conversion only mode'); |
|
|
await convertNotionToMarkdown(inputConfigFile, config.output, config.token); |
|
|
|
|
|
} else { |
|
|
|
|
|
console.log('π Full conversion workflow'); |
|
|
|
|
|
|
|
|
console.log('\nπ Step 1: Converting Notion pages to Markdown...'); |
|
|
await convertNotionToMarkdown(inputConfigFile, config.output, config.token); |
|
|
|
|
|
|
|
|
console.log('\nπ Step 2: Converting Markdown to MDX...'); |
|
|
const pagesConfig = readPagesConfig(inputConfigFile); |
|
|
const firstPage = pagesConfig.pages && pagesConfig.pages.length > 0 ? pagesConfig.pages[0] : null; |
|
|
const pageId = pageIdFromEnv || (firstPage ? firstPage.id : null); |
|
|
await convertToMdx(config.output, config.output, pageId, config.token); |
|
|
|
|
|
|
|
|
console.log('\nπ Step 3: Copying to Astro content directory...'); |
|
|
copyToAstroContent(config.output); |
|
|
} |
|
|
|
|
|
console.log('\nπ Conversion completed successfully!'); |
|
|
|
|
|
} catch (error) { |
|
|
console.error('β Error:', error.message); |
|
|
process.exit(1); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
export { convertNotionToMarkdown, convertToMdx }; |
|
|
|
|
|
|
|
|
if (import.meta.url === `file://${process.argv[1]}`) { |
|
|
main(); |
|
|
} |
|
|
|