|
|
#!/usr/bin/env node |
|
|
|
|
|
import { readFileSync, writeFileSync, existsSync, mkdirSync, unlinkSync } from 'fs'; |
|
|
import { join, dirname, basename } from 'path'; |
|
|
import { fileURLToPath } from 'url'; |
|
|
import { Client } from '@notionhq/client'; |
|
|
import { NotionConverter } from 'notion-to-md'; |
|
|
import { DefaultExporter } from 'notion-to-md/plugins/exporter'; |
|
|
|
|
|
const __filename = fileURLToPath(import.meta.url); |
|
|
const __dirname = dirname(__filename); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function ensureDirectory(dir) { |
|
|
if (!existsSync(dir)) { |
|
|
mkdirSync(dir, { recursive: true }); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export async function postProcessMarkdown(content, notionClient = null, notionToken = null) { |
|
|
console.log('π§ Post-processing Notion Markdown for MDX compatibility...'); |
|
|
|
|
|
let processedContent = content; |
|
|
|
|
|
|
|
|
processedContent = removeExcludeTags(processedContent); |
|
|
processedContent = await includeNotionPages(processedContent, notionClient, notionToken); |
|
|
processedContent = cleanNotionArtifacts(processedContent); |
|
|
processedContent = fixImageAltTextWithLinks(processedContent); |
|
|
processedContent = fixNotionLinks(processedContent); |
|
|
processedContent = fixJsxAttributes(processedContent); |
|
|
processedContent = optimizeImages(processedContent); |
|
|
processedContent = shiftHeadingLevels(processedContent); |
|
|
processedContent = cleanEmptyLines(processedContent); |
|
|
processedContent = fixCodeBlocks(processedContent); |
|
|
processedContent = fixCodeBlockEndings(processedContent); |
|
|
processedContent = unwrapHtmlCodeBlocks(processedContent); |
|
|
processedContent = fixPlainTextCodeBlocks(processedContent); |
|
|
processedContent = optimizeTables(processedContent); |
|
|
|
|
|
return processedContent; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function removeExcludeTags(content) { |
|
|
console.log(' ποΈ Removing <exclude> tags and associated media...'); |
|
|
|
|
|
let removedCount = 0; |
|
|
const removedImageVariables = new Set(); |
|
|
const mediaFilesToDelete = new Set(); |
|
|
|
|
|
|
|
|
const excludeBlocks = content.match(/<exclude>[\s\S]*?<\/exclude>/g) || []; |
|
|
excludeBlocks.forEach(match => { |
|
|
|
|
|
const imageMatches = match.match(/src=\{([^}]+)\}/g); |
|
|
if (imageMatches) { |
|
|
imageMatches.forEach(imgMatch => { |
|
|
const varName = imgMatch.match(/src=\{([^}]+)\}/)?.[1]; |
|
|
if (varName) { |
|
|
removedImageVariables.add(varName); |
|
|
} |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
const markdownImages = match.match(/!\[[^\]]*\]\(([^)]+)\)/g); |
|
|
if (markdownImages) { |
|
|
markdownImages.forEach(imgMatch => { |
|
|
const src = imgMatch.match(/!\[[^\]]*\]\(([^)]+)\)/)?.[1]; |
|
|
if (src) { |
|
|
|
|
|
const filename = basename(src); |
|
|
if (filename) { |
|
|
mediaFilesToDelete.add(filename); |
|
|
} |
|
|
} |
|
|
}); |
|
|
} |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/<exclude>[\s\S]*?<\/exclude>/g, (match) => { |
|
|
removedCount++; |
|
|
return ''; |
|
|
}); |
|
|
|
|
|
|
|
|
if (mediaFilesToDelete.size > 0) { |
|
|
console.log(` ποΈ Found ${mediaFilesToDelete.size} media file(s) to delete from exclude blocks`); |
|
|
|
|
|
|
|
|
const possibleMediaDirs = [ |
|
|
join(__dirname, 'output', 'media'), |
|
|
join(__dirname, '..', '..', 'src', 'content', 'assets', 'image') |
|
|
]; |
|
|
|
|
|
mediaFilesToDelete.forEach(filename => { |
|
|
let deleted = false; |
|
|
for (const mediaDir of possibleMediaDirs) { |
|
|
if (existsSync(mediaDir)) { |
|
|
const filePath = join(mediaDir, filename); |
|
|
if (existsSync(filePath)) { |
|
|
try { |
|
|
unlinkSync(filePath); |
|
|
console.log(` ποΈ Deleted media file: ${filename}`); |
|
|
deleted = true; |
|
|
break; |
|
|
} catch (error) { |
|
|
console.log(` β οΈ Failed to delete ${filename}: ${error.message}`); |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
if (!deleted) { |
|
|
console.log(` βΉοΈ Media file not found: ${filename}`); |
|
|
} |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
if (removedImageVariables.size > 0) { |
|
|
console.log(` πΌοΈ Found ${removedImageVariables.size} unused image import(s) in exclude blocks`); |
|
|
|
|
|
removedImageVariables.forEach(varName => { |
|
|
|
|
|
const remainingUsage = content.includes(`{${varName}}`) || content.includes(`src={${varName}}`); |
|
|
|
|
|
if (!remainingUsage) { |
|
|
|
|
|
|
|
|
const importPattern = new RegExp(`import\\s+${varName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\s+from\\s+['"][^'"]+['"];?\\s*`, 'g'); |
|
|
content = content.replace(importPattern, ''); |
|
|
console.log(` ποΈ Removed unused import: ${varName}`); |
|
|
} |
|
|
}); |
|
|
|
|
|
console.log(` π§Ή Cleaned up unused image imports`); |
|
|
} |
|
|
|
|
|
if (removedCount > 0) { |
|
|
console.log(` β
Removed ${removedCount} <exclude> tag(s) and their content`); |
|
|
} else { |
|
|
console.log(' βΉοΈ No <exclude> tags found'); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async function includeNotionPages(content, notionClient, notionToken) { |
|
|
console.log(' π Including linked Notion pages...'); |
|
|
|
|
|
if (!notionClient || !notionToken) { |
|
|
console.log(' βΉοΈ Skipping page inclusion (no Notion client/token provided)'); |
|
|
return content; |
|
|
} |
|
|
|
|
|
let includedCount = 0; |
|
|
let skippedCount = 0; |
|
|
|
|
|
|
|
|
const excludeBlocks = []; |
|
|
const excludeRegex = /<exclude>[\s\S]*?<\/exclude>/g; |
|
|
let excludeMatch; |
|
|
|
|
|
while ((excludeMatch = excludeRegex.exec(content)) !== null) { |
|
|
excludeBlocks.push({ |
|
|
start: excludeMatch.index, |
|
|
end: excludeMatch.index + excludeMatch[0].length |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
const isWithinExcludeBlock = (position) => { |
|
|
return excludeBlocks.some(block => position >= block.start && position <= block.end); |
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
const notionPageLinkRegex = /\[([^\]]+)\]\(([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\)/g; |
|
|
|
|
|
let processedContent = content; |
|
|
let match; |
|
|
|
|
|
|
|
|
const matches = []; |
|
|
while ((match = notionPageLinkRegex.exec(content)) !== null) { |
|
|
const linkStartPos = match.index; |
|
|
|
|
|
|
|
|
if (isWithinExcludeBlock(linkStartPos)) { |
|
|
console.log(` βοΈ Skipping page link in exclude block: ${match[1]} (${match[2]})`); |
|
|
skippedCount++; |
|
|
continue; |
|
|
} |
|
|
|
|
|
matches.push({ |
|
|
fullMatch: match[0], |
|
|
linkText: match[1], |
|
|
pageId: match[2], |
|
|
startPos: match.index, |
|
|
endPos: match.index + match[0].length |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
for (let i = matches.length - 1; i >= 0; i--) { |
|
|
const link = matches[i]; |
|
|
|
|
|
try { |
|
|
console.log(` π Fetching content for page: ${link.pageId}`); |
|
|
|
|
|
|
|
|
const outputDir = join(__dirname, 'output'); |
|
|
const mediaDir = join(outputDir, 'media', link.pageId); |
|
|
ensureDirectory(mediaDir); |
|
|
|
|
|
|
|
|
const exporter = new DefaultExporter({ |
|
|
outputType: 'string', |
|
|
}); |
|
|
|
|
|
|
|
|
const converter = new NotionConverter(notionClient) |
|
|
.withExporter(exporter) |
|
|
|
|
|
.downloadMediaTo({ |
|
|
outputDir: mediaDir, |
|
|
|
|
|
transformPath: (localPath) => `/media/${link.pageId}/${basename(localPath)}`, |
|
|
}); |
|
|
|
|
|
|
|
|
const result = await converter.convert(link.pageId); |
|
|
|
|
|
console.log(` πΌοΈ Media saved to: ${mediaDir}`); |
|
|
|
|
|
if (result && result.content) { |
|
|
|
|
|
const rawFileName = `${link.linkText.toLowerCase().replace(/[^a-z0-9]+/g, '-')}-${link.pageId}`; |
|
|
const rawFilePath = join(outputDir, `${rawFileName}.raw.md`); |
|
|
|
|
|
try { |
|
|
writeFileSync(rawFilePath, result.content); |
|
|
console.log(` π Saved raw markdown: ${rawFileName}.raw.md`); |
|
|
} catch (error) { |
|
|
console.log(` β οΈ Failed to save raw file: ${error.message}`); |
|
|
} |
|
|
|
|
|
|
|
|
let pageContent = result.content; |
|
|
|
|
|
|
|
|
pageContent = pageContent.replace(/^---[\s\S]*?---\s*\n/, ''); |
|
|
|
|
|
|
|
|
pageContent = pageContent.replace(/^#+ .+\n\n?/, ''); |
|
|
|
|
|
|
|
|
const finalContent = '\n\n' + pageContent.trim() + '\n\n'; |
|
|
|
|
|
|
|
|
processedContent = processedContent.substring(0, link.startPos) + |
|
|
finalContent + |
|
|
processedContent.substring(link.endPos); |
|
|
|
|
|
includedCount++; |
|
|
console.log(` β
Included page content: ${link.linkText}`); |
|
|
} else { |
|
|
console.log(` β οΈ No content found for page: ${link.pageId}`); |
|
|
} |
|
|
} catch (error) { |
|
|
console.log(` β Failed to fetch page ${link.pageId}: ${error.message}`); |
|
|
|
|
|
} |
|
|
} |
|
|
|
|
|
if (includedCount > 0) { |
|
|
console.log(` β
Included ${includedCount} Notion page(s)`); |
|
|
} else { |
|
|
console.log(' βΉοΈ No Notion page links found to include'); |
|
|
} |
|
|
|
|
|
if (skippedCount > 0) { |
|
|
console.log(` βοΈ Skipped ${skippedCount} page link(s) in exclude blocks`); |
|
|
} |
|
|
|
|
|
return processedContent; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function cleanNotionArtifacts(content) { |
|
|
console.log(' π§Ή Cleaning Notion artifacts...'); |
|
|
|
|
|
let cleanedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/\[([^\]]+)\]\(https:\/\/www\.notion\.so\/[^)]+\)/g, (match, text) => { |
|
|
cleanedCount++; |
|
|
return text; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/^> \*\*([^*]+)\*\*\s*\n/gm, '> **$1**\n\n'); |
|
|
|
|
|
|
|
|
content = content.replace(/^---+\s*$/gm, ''); |
|
|
|
|
|
|
|
|
content = content.replace(/^>\s*$/gm, ''); |
|
|
|
|
|
|
|
|
|
|
|
content = content.replace(/\*\*\*([^*]+)\*\*\*\s+\*\*\*\*/g, (match, text) => { |
|
|
cleanedCount++; |
|
|
return `***${text.trim()}***`; |
|
|
}); |
|
|
|
|
|
|
|
|
|
|
|
content = content.replace(/\*\*([^*]+)\*\*\s+\*\*/g, (match, text) => { |
|
|
cleanedCount++; |
|
|
return `**${text.trim()}**`; |
|
|
}); |
|
|
|
|
|
if (cleanedCount > 0) { |
|
|
console.log(` β
Cleaned ${cleanedCount} Notion artifact(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function fixImageAltTextWithLinks(content) { |
|
|
console.log(' πΌοΈ Fixing image alt text with embedded links...'); |
|
|
|
|
|
let fixedCount = 0; |
|
|
|
|
|
|
|
|
|
|
|
const imageWithLinksPattern = /!\[([^\]]*\[[^\]]+\]\([^)]+\)[^\]]*)\]\(([^)]+)\)/g; |
|
|
|
|
|
content = content.replace(imageWithLinksPattern, (match, altText, imagePath) => { |
|
|
fixedCount++; |
|
|
|
|
|
|
|
|
const cleanedAlt = altText.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1'); |
|
|
|
|
|
|
|
|
const finalAlt = cleanedAlt.replace(/[\[\]]/g, ''); |
|
|
|
|
|
console.log(` π§ Fixed: "${altText.substring(0, 50)}..." -> "${finalAlt.substring(0, 50)}..."`); |
|
|
|
|
|
return ``; |
|
|
}); |
|
|
|
|
|
if (fixedCount > 0) { |
|
|
console.log(` β
Fixed ${fixedCount} image(s) with embedded links in alt text`); |
|
|
} else { |
|
|
console.log(' βΉοΈ No images with embedded links found'); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function fixNotionLinks(content) { |
|
|
console.log(' π Fixing Notion internal links...'); |
|
|
|
|
|
let fixedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/\[([^\]]+)\]\(https:\/\/www\.notion\.so\/[^/]+\/([^?#)]+)\)/g, (match, text, pageId) => { |
|
|
fixedCount++; |
|
|
|
|
|
return `[${text}](#${pageId})`; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/\[([^\]]+)\]\(https:\/\/www\.notion\.so\/[^)]*\)/g, (match, text) => { |
|
|
fixedCount++; |
|
|
return text; |
|
|
}); |
|
|
|
|
|
if (fixedCount > 0) { |
|
|
console.log(` β
Fixed ${fixedCount} Notion link(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function fixJsxAttributes(content) { |
|
|
console.log(' π§ Fixing JSX attributes corrupted by Notion conversion...'); |
|
|
|
|
|
let fixedCount = 0; |
|
|
|
|
|
|
|
|
|
|
|
content = content.replace(/<(\w+)\s+\*\s*([^*\s]+)\s*\*\s*=\s*"([^"]*)"\s*\/?>/g, (match, tagName, attribute, value) => { |
|
|
fixedCount++; |
|
|
return `<${tagName} ${attribute}="${value}" />`; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/<(\w+)\s+\*\s*([^*\s]+)\s*\*\s*=\s*([^>\s\/]+)\s*\/?>/g, (match, tagName, attribute, value) => { |
|
|
fixedCount++; |
|
|
return `<${tagName} ${attribute}=${value} />`; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/<(\w+)\s+\*\*\s*([^*\s]+)\s*\*\*\s*=\s*"([^"]*)"\s*\/?>/g, (match, tagName, attribute, value) => { |
|
|
fixedCount++; |
|
|
return `<${tagName} ${attribute}="${value}" />`; |
|
|
}); |
|
|
|
|
|
content = content.replace(/<(\w+)\s+\*\*\s*([^*\s]+)\s*\*\*\s*=\s*([^>\s\/]+)\s*\/?>/g, (match, tagName, attribute, value) => { |
|
|
fixedCount++; |
|
|
return `<${tagName} ${attribute}=${value} />`; |
|
|
}); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
content = content.replace(/<iframe([^>]*?)\ssrc=[""''""\u201C\u201D\u2018\u2019]\[([^\]]+)\]\([^)]+\)[""''""\u201C\u201D\u2018\u2019]([^>]*?)>\s*<\/iframe>/gi, (match, before, urlText, after) => { |
|
|
fixedCount++; |
|
|
return `<iframe${before} src="${urlText}"${after}></iframe>`; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/<iframe([^>]*?)\ssrc=[""''""\u201C\u201D\u2018\u2019]\[([^\]]+)\]\([^)]+\)[""''""\u201C\u201D\u2018\u2019]([^>]*?)\s*\/?>/gi, (match, before, urlText, after) => { |
|
|
fixedCount++; |
|
|
return `<iframe${before} src="${urlText}"${after} />`; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/<(video|audio|embed|object)([^>]*?)\ssrc=[""''""\u201C\u201D\u2018\u2019]\[([^\]]+)\]\([^)]+\)[""''""\u201C\u201D\u2018\u2019]([^>]*?)>\s*<\/\1>/gi, (match, tagName, before, urlText, after) => { |
|
|
fixedCount++; |
|
|
return `<${tagName}${before} src="${urlText}"${after}></${tagName}>`; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/<(video|audio|embed|object)([^>]*?)\ssrc=[""''""\u201C\u201D\u2018\u2019]\[([^\]]+)\]\([^)]+\)[""''""\u201C\u201D\u2018\u2019]([^>]*?)\s*\/?>/gi, (match, tagName, before, urlText, after) => { |
|
|
fixedCount++; |
|
|
return `<${tagName}${before} src="${urlText}"${after} />`; |
|
|
}); |
|
|
|
|
|
if (fixedCount > 0) { |
|
|
console.log(` β
Fixed ${fixedCount} corrupted JSX attribute(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function optimizeImages(content) { |
|
|
console.log(' πΌοΈ Optimizing images...'); |
|
|
|
|
|
let optimizedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/!\[\]\(([^)]+)\)/g, (match, src) => { |
|
|
optimizedCount++; |
|
|
const filename = basename(src); |
|
|
return ``; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/!\[([^\]]*)\]\(([^)]+)\?[^)]*\)/g, (match, alt, src) => { |
|
|
optimizedCount++; |
|
|
return ``; |
|
|
}); |
|
|
|
|
|
if (optimizedCount > 0) { |
|
|
console.log(` β
Optimized ${optimizedCount} image(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function shiftHeadingLevels(content) { |
|
|
console.log(' π Shifting heading levels down by one...'); |
|
|
|
|
|
let shiftedCount = 0; |
|
|
|
|
|
|
|
|
|
|
|
content = content.replace(/^##### (.*$)/gim, '###### $1'); |
|
|
content = content.replace(/^#### (.*$)/gim, '##### $1'); |
|
|
content = content.replace(/^### (.*$)/gim, '#### $1'); |
|
|
content = content.replace(/^## (.*$)/gim, '### $1'); |
|
|
content = content.replace(/^# (.*$)/gim, '## $1'); |
|
|
|
|
|
|
|
|
const headingMatches = content.match(/^#{1,6} /gm); |
|
|
if (headingMatches) { |
|
|
shiftedCount = headingMatches.length; |
|
|
} |
|
|
|
|
|
console.log(` β
Shifted ${shiftedCount} heading level(s)`); |
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function fixCodeBlockEndings(content) { |
|
|
console.log(' π» Fixing code block endings...'); |
|
|
|
|
|
let fixedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/```text\n/g, '```\n'); |
|
|
|
|
|
|
|
|
const textEndingMatches = content.match(/```text\n/g); |
|
|
if (textEndingMatches) { |
|
|
fixedCount = textEndingMatches.length; |
|
|
} |
|
|
|
|
|
if (fixedCount > 0) { |
|
|
console.log(` β
Fixed ${fixedCount} code block ending(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function cleanEmptyLines(content) { |
|
|
console.log(' π Cleaning excessive empty lines...'); |
|
|
|
|
|
|
|
|
|
|
|
const cleanedContent = content.replace(/\n{4,}/g, '\n\n'); |
|
|
|
|
|
const originalLines = content.split('\n').length; |
|
|
const cleanedLines = cleanedContent.split('\n').length; |
|
|
const removedLines = originalLines - cleanedLines; |
|
|
|
|
|
if (removedLines > 0) { |
|
|
console.log(` β
Removed ${removedLines} excessive empty line(s)`); |
|
|
} |
|
|
|
|
|
return cleanedContent; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function fixCodeBlocks(content) { |
|
|
console.log(' π» Fixing code blocks...'); |
|
|
|
|
|
let fixedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/^```\s*$/gm, '```text'); |
|
|
|
|
|
|
|
|
content = content.replace(/^```(\w+)\s*\n([\s\S]*?)\n```$/gm, (match, lang, code) => { |
|
|
|
|
|
const cleanCode = code.replace(/\u00A0/g, ' '); |
|
|
return `\`\`\`${lang}\n${cleanCode}\n\`\`\``; |
|
|
}); |
|
|
|
|
|
if (fixedCount > 0) { |
|
|
console.log(` β
Fixed ${fixedCount} code block(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function optimizeTables(content) { |
|
|
console.log(' π Optimizing tables...'); |
|
|
|
|
|
let optimizedCount = 0; |
|
|
|
|
|
|
|
|
content = content.replace(/^\|(.+)\|\s*$/gm, (match, row) => { |
|
|
const cells = row.split('|').map(cell => cell.trim()); |
|
|
const cleanCells = cells.filter(cell => cell.length > 0); |
|
|
|
|
|
if (cleanCells.length > 0) { |
|
|
optimizedCount++; |
|
|
return `| ${cleanCells.join(' | ')} |`; |
|
|
} |
|
|
return match; |
|
|
}); |
|
|
|
|
|
|
|
|
content = content.replace(/^\|(.+)\|\s*\n\|([-:\s|]+)\|\s*$/gm, (match, header, separator) => { |
|
|
const headerCells = header.split('|').map(cell => cell.trim()).filter(cell => cell.length > 0); |
|
|
const separatorCells = separator.split('|').map(cell => cell.trim()).filter(cell => cell.length > 0); |
|
|
|
|
|
if (headerCells.length !== separatorCells.length) { |
|
|
optimizedCount++; |
|
|
const newSeparator = headerCells.map(() => '---').join(' | '); |
|
|
return `| ${headerCells.join(' | ')} |\n| ${newSeparator} |`; |
|
|
} |
|
|
return match; |
|
|
}); |
|
|
|
|
|
if (optimizedCount > 0) { |
|
|
console.log(` β
Optimized ${optimizedCount} table(s)`); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function unwrapHtmlCodeBlocks(content) { |
|
|
console.log(' π§ Unwrapping HTML code blocks for MDX integration...'); |
|
|
|
|
|
let unwrappedCount = 0; |
|
|
|
|
|
|
|
|
|
|
|
const htmlCodeBlockRegex = /```html\s*\n([\s\S]*?)\n```/g; |
|
|
|
|
|
content = content.replace(htmlCodeBlockRegex, (match, htmlContent) => { |
|
|
unwrappedCount++; |
|
|
|
|
|
|
|
|
const cleanHtmlContent = htmlContent.trim(); |
|
|
|
|
|
console.log(` π§ Unwrapped HTML code block (${cleanHtmlContent.length} chars)`); |
|
|
|
|
|
|
|
|
return cleanHtmlContent; |
|
|
}); |
|
|
|
|
|
if (unwrappedCount > 0) { |
|
|
console.log(` β
Unwrapped ${unwrappedCount} HTML code block(s) for MDX integration`); |
|
|
} else { |
|
|
console.log(' βΉοΈ No HTML code blocks found to unwrap'); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function fixPlainTextCodeBlocks(content) { |
|
|
console.log(' π§ Fixing plain text code blocks...'); |
|
|
|
|
|
let fixedCount = 0; |
|
|
|
|
|
|
|
|
const plainTextCodeBlockRegex = /```plain text\s*\n([\s\S]*?)\n```/g; |
|
|
|
|
|
content = content.replace(plainTextCodeBlockRegex, (match, codeContent) => { |
|
|
fixedCount++; |
|
|
|
|
|
console.log(` π§ Fixed plain text code block (${codeContent.length} chars)`); |
|
|
|
|
|
|
|
|
return `\`\`\`\n${codeContent}\n\`\`\``; |
|
|
}); |
|
|
|
|
|
if (fixedCount > 0) { |
|
|
console.log(` β
Fixed ${fixedCount} plain text code block(s)`); |
|
|
} else { |
|
|
console.log(' βΉοΈ No plain text code blocks found to fix'); |
|
|
} |
|
|
|
|
|
return content; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export function generateFrontmatter(pageProperties) { |
|
|
console.log(' π Generating frontmatter from Notion properties...'); |
|
|
|
|
|
const frontmatter = { |
|
|
title: pageProperties.title || 'Untitled', |
|
|
published: new Date().toISOString().split('T')[0], |
|
|
tableOfContentsAutoCollapse: true |
|
|
}; |
|
|
|
|
|
|
|
|
if (pageProperties.description) { |
|
|
frontmatter.description = pageProperties.description; |
|
|
} |
|
|
if (pageProperties.tags) { |
|
|
frontmatter.tags = pageProperties.tags; |
|
|
} |
|
|
if (pageProperties.author) { |
|
|
frontmatter.author = pageProperties.author; |
|
|
} |
|
|
|
|
|
|
|
|
const yamlLines = Object.entries(frontmatter) |
|
|
.map(([key, value]) => { |
|
|
if (Array.isArray(value)) { |
|
|
return `${key}:\n${value.map(v => ` - ${v}`).join('\n')}`; |
|
|
} |
|
|
return `${key}: "${value}"`; |
|
|
}); |
|
|
|
|
|
return `---\n${yamlLines.join('\n')}\n---\n\n`; |
|
|
} |
|
|
|
|
|
function main() { |
|
|
const args = process.argv.slice(2); |
|
|
|
|
|
if (args.includes('--help') || args.includes('-h')) { |
|
|
console.log(` |
|
|
π§ Notion Markdown Post-Processor |
|
|
|
|
|
Usage: |
|
|
node post-processor.mjs [options] [input-file] [output-file] |
|
|
|
|
|
Options: |
|
|
--verbose Show detailed processing information |
|
|
--help, -h Show this help |
|
|
|
|
|
Examples: |
|
|
# Process a single file |
|
|
node post-processor.mjs input.md output.md |
|
|
|
|
|
# Process with verbose output |
|
|
node post-processor.mjs --verbose input.md output.md |
|
|
`); |
|
|
process.exit(0); |
|
|
} |
|
|
|
|
|
const verbose = args.includes('--verbose'); |
|
|
const inputFile = args.find(arg => !arg.startsWith('--') && arg.endsWith('.md')); |
|
|
const outputFile = args.find(arg => !arg.startsWith('--') && arg !== inputFile && arg.endsWith('.md')); |
|
|
|
|
|
if (!inputFile) { |
|
|
console.error('β Please provide an input markdown file'); |
|
|
process.exit(1); |
|
|
} |
|
|
|
|
|
if (!existsSync(inputFile)) { |
|
|
console.error(`β Input file not found: ${inputFile}`); |
|
|
process.exit(1); |
|
|
} |
|
|
|
|
|
try { |
|
|
console.log(`π Reading: ${inputFile}`); |
|
|
const content = readFileSync(inputFile, 'utf8'); |
|
|
|
|
|
const processedContent = postProcessMarkdown(content); |
|
|
|
|
|
const finalOutputFile = outputFile || inputFile.replace('.md', '.processed.md'); |
|
|
writeFileSync(finalOutputFile, processedContent); |
|
|
|
|
|
console.log(`β
Processed: ${finalOutputFile}`); |
|
|
|
|
|
if (verbose) { |
|
|
console.log(`π Input: ${content.length} chars β Output: ${processedContent.length} chars`); |
|
|
} |
|
|
|
|
|
} catch (error) { |
|
|
console.error('β Processing failed:', error.message); |
|
|
process.exit(1); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if (import.meta.url === `file://${process.argv[1]}`) { |
|
|
main(); |
|
|
} |
|
|
|