Spaces:
Running
Running
| import { Client } from '@notionhq/client'; | |
| /** | |
| * Notion Metadata Extractor | |
| * Extracts document metadata from Notion pages for frontmatter generation | |
| */ | |
| /** | |
| * Extract metadata from Notion page | |
| * @param {string} pageId - Notion page ID | |
| * @param {string} notionToken - Notion API token | |
| * @returns {object} - Extracted metadata object | |
| */ | |
| export async function extractNotionMetadata(pageId, notionToken) { | |
| const notion = new Client({ | |
| auth: notionToken, | |
| }); | |
| const metadata = {}; | |
| try { | |
| // Get page information | |
| const page = await notion.pages.retrieve({ page_id: pageId }); | |
| // Extract title from page properties | |
| if (page.properties.title && page.properties.title.title && page.properties.title.title.length > 0) { | |
| metadata.title = page.properties.title.title[0].plain_text; | |
| } | |
| // Extract creation date | |
| if (page.created_time) { | |
| metadata.published = new Date(page.created_time).toLocaleDateString('en-US', { | |
| year: 'numeric', | |
| month: 'short', | |
| day: '2-digit' | |
| }); | |
| metadata.created_time = page.created_time; | |
| } | |
| // Extract last edited date | |
| if (page.last_edited_time) { | |
| metadata.last_edited_time = page.last_edited_time; | |
| } | |
| // Extract created by | |
| if (page.created_by && page.created_by.id) { | |
| metadata.created_by = page.created_by.id; | |
| } | |
| // Extract last edited by | |
| if (page.last_edited_by && page.last_edited_by.id) { | |
| metadata.last_edited_by = page.last_edited_by.id; | |
| } | |
| // Extract page URL | |
| metadata.notion_url = page.url; | |
| // Extract page ID | |
| metadata.notion_id = page.id; | |
| // Extract parent information | |
| if (page.parent) { | |
| metadata.parent = { | |
| type: page.parent.type, | |
| id: page.parent[page.parent.type]?.id || page.parent[page.parent.type] | |
| }; | |
| } | |
| // Extract cover image if available | |
| if (page.cover) { | |
| metadata.cover = { | |
| type: page.cover.type, | |
| url: page.cover[page.cover.type]?.url || page.cover[page.cover.type] | |
| }; | |
| } | |
| // Extract icon if available | |
| if (page.icon) { | |
| metadata.icon = { | |
| type: page.icon.type, | |
| emoji: page.icon.emoji, | |
| url: page.icon.external?.url || page.icon.file?.url | |
| }; | |
| } | |
| // Extract authors and custom properties | |
| const customProperties = {}; | |
| for (const [key, value] of Object.entries(page.properties)) { | |
| if (key !== 'title') { // Skip title as it's handled separately | |
| const extractedValue = extractPropertyValue(value); | |
| // Check for author-related properties | |
| if (key.toLowerCase().includes('author') || | |
| key.toLowerCase().includes('writer') || | |
| key.toLowerCase().includes('creator') || | |
| value.type === 'people') { | |
| metadata.authors = extractedValue; | |
| } else { | |
| customProperties[key] = extractedValue; | |
| } | |
| } | |
| } | |
| // If no authors found in properties, try to get from created_by | |
| if (!metadata.authors && page.created_by) { | |
| try { | |
| const user = await notion.users.retrieve({ user_id: page.created_by.id }); | |
| metadata.authors = [{ | |
| name: user.name || user.id, | |
| id: user.id | |
| }]; | |
| } catch (error) { | |
| console.log(' ⚠️ Could not fetch author from created_by:', error.message); | |
| // Fallback to basic info | |
| metadata.authors = [{ | |
| name: page.created_by.name || page.created_by.id, | |
| id: page.created_by.id | |
| }]; | |
| } | |
| } | |
| if (Object.keys(customProperties).length > 0) { | |
| metadata.properties = customProperties; | |
| } | |
| // Try to extract description from page content (first paragraph) | |
| try { | |
| const blocks = await notion.blocks.children.list({ block_id: pageId }); | |
| const firstParagraph = blocks.results.find(block => | |
| block.type === 'paragraph' && | |
| block.paragraph.rich_text && | |
| block.paragraph.rich_text.length > 0 | |
| ); | |
| if (firstParagraph) { | |
| const description = firstParagraph.paragraph.rich_text | |
| .map(text => text.plain_text) | |
| .join('') | |
| .trim(); | |
| if (description && description.length > 0) { | |
| metadata.description = description.substring(0, 200) + (description.length > 200 ? '...' : ''); | |
| } | |
| } | |
| } catch (error) { | |
| console.log(' ⚠️ Could not extract description from page content'); | |
| } | |
| // Generate tags from page properties | |
| const tags = []; | |
| for (const [key, value] of Object.entries(page.properties)) { | |
| if (value.type === 'multi_select' && value.multi_select) { | |
| value.multi_select.forEach(option => { | |
| tags.push(option.name); | |
| }); | |
| } else if (value.type === 'select' && value.select) { | |
| tags.push(value.select.name); | |
| } | |
| } | |
| if (tags.length > 0) { | |
| metadata.tags = tags; | |
| } | |
| } catch (error) { | |
| console.error('Error extracting Notion metadata:', error.message); | |
| // Return basic metadata if extraction fails | |
| metadata.title = "Notion Article"; | |
| metadata.published = new Date().toLocaleDateString('en-US', { | |
| year: 'numeric', | |
| month: 'short', | |
| day: '2-digit' | |
| }); | |
| } | |
| return metadata; | |
| } | |
| /** | |
| * Extract value from Notion property | |
| * @param {object} property - Notion property object | |
| * @returns {any} - Extracted value | |
| */ | |
| function extractPropertyValue(property) { | |
| switch (property.type) { | |
| case 'rich_text': | |
| return property.rich_text.map(text => text.plain_text).join(''); | |
| case 'title': | |
| return property.title.map(text => text.plain_text).join(''); | |
| case 'number': | |
| return property.number; | |
| case 'select': | |
| return property.select?.name || null; | |
| case 'multi_select': | |
| return property.multi_select.map(option => option.name); | |
| case 'date': | |
| return property.date?.start || null; | |
| case 'checkbox': | |
| return property.checkbox; | |
| case 'url': | |
| return property.url; | |
| case 'email': | |
| return property.email; | |
| case 'phone_number': | |
| return property.phone_number; | |
| case 'created_time': | |
| return property.created_time; | |
| case 'created_by': | |
| return property.created_by?.id || null; | |
| case 'last_edited_time': | |
| return property.last_edited_time; | |
| case 'last_edited_by': | |
| return property.last_edited_by?.id || null; | |
| case 'people': | |
| return property.people.map(person => ({ | |
| name: person.name || person.id, | |
| id: person.id | |
| })); | |
| default: | |
| return null; | |
| } | |
| } | |
| /** | |
| * Generate YAML frontmatter from metadata object | |
| * @param {object} metadata - Metadata object | |
| * @returns {string} - YAML frontmatter string | |
| */ | |
| export function generateNotionFrontmatter(metadata) { | |
| let frontmatter = '---\n'; | |
| // Title | |
| if (metadata.title) { | |
| frontmatter += `title: "${metadata.title}"\n`; | |
| } | |
| // Description | |
| if (metadata.description) { | |
| frontmatter += `description: "${metadata.description}"\n`; | |
| } | |
| // Publication date | |
| if (metadata.published) { | |
| frontmatter += `published: "${metadata.published}"\n`; | |
| } | |
| // Authors | |
| if (metadata.authors && metadata.authors.length > 0) { | |
| frontmatter += 'authors:\n'; | |
| metadata.authors.forEach(author => { | |
| if (typeof author === 'string') { | |
| frontmatter += ` - name: "${author}"\n`; | |
| } else if (author.name) { | |
| frontmatter += ` - name: "${author.name}"\n`; | |
| } | |
| }); | |
| } | |
| // Tags | |
| if (metadata.tags && metadata.tags.length > 0) { | |
| frontmatter += 'tags:\n'; | |
| metadata.tags.forEach(tag => { | |
| frontmatter += ` - "${tag}"\n`; | |
| }); | |
| } | |
| // Notion metadata removed - keeping only standard frontmatter fields | |
| // Cover image | |
| if (metadata.cover && metadata.cover.url) { | |
| frontmatter += `cover: "${metadata.cover.url}"\n`; | |
| } | |
| // Icon | |
| if (metadata.icon) { | |
| if (metadata.icon.emoji) { | |
| frontmatter += `icon: "${metadata.icon.emoji}"\n`; | |
| } else if (metadata.icon.url) { | |
| frontmatter += `icon: "${metadata.icon.url}"\n`; | |
| } | |
| } | |
| // Custom properties removed - keeping frontmatter clean and standard | |
| // Default Astro configuration | |
| frontmatter += 'tableOfContentsAutoCollapse: true\n'; | |
| frontmatter += '---\n\n'; | |
| return frontmatter; | |
| } | |
| /** | |
| * Extract and generate frontmatter from Notion page | |
| * @param {string} pageId - Notion page ID | |
| * @param {string} notionToken - Notion API token | |
| * @returns {string} - Complete YAML frontmatter | |
| */ | |
| export async function extractAndGenerateNotionFrontmatter(pageId, notionToken) { | |
| const metadata = await extractNotionMetadata(pageId, notionToken); | |
| return generateNotionFrontmatter(metadata); | |
| } | |