import { NextRequest, NextResponse } from 'next/server'; import { whoAmI, createRepo, uploadFiles, datasetInfo } from '@huggingface/hub'; import { readdir, stat, readFile } from 'fs/promises'; import path from 'path'; export async function POST(request: NextRequest) { try { const body = await request.json(); const { action, token, namespace, datasetName, datasetPath, datasetId, artifacts, manifest } = body; if (!token) { return NextResponse.json({ error: 'HF token is required' }, { status: 400 }); } switch (action) { case 'whoami': try { const user = await whoAmI({ accessToken: token }); return NextResponse.json({ user }); } catch (error) { return NextResponse.json({ error: 'Invalid token or network error' }, { status: 401 }); } case 'createDataset': try { if (!namespace || !datasetName) { return NextResponse.json({ error: 'Namespace and dataset name required' }, { status: 400 }); } const repoId = `datasets/${namespace}/${datasetName}`; // Create repository await createRepo({ repo: repoId, accessToken: token, private: false, }); return NextResponse.json({ success: true, repoId }); } catch (error: any) { if (error.message?.includes('already exists')) { return NextResponse.json({ success: true, repoId: `${namespace}/${datasetName}`, exists: true }); } return NextResponse.json({ error: error.message || 'Failed to create dataset' }, { status: 500 }); } case 'uploadDataset': try { if (!namespace || !datasetName) { return NextResponse.json({ error: 'Missing required parameters' }, { status: 400 }); } const repoId = `datasets/${namespace}/${datasetName}`; const structuredArtifacts = Array.isArray(artifacts) ? artifacts : []; const hasStructuredArtifacts = structuredArtifacts.length > 0; if (!hasStructuredArtifacts && !datasetPath) { return NextResponse.json({ error: 'Dataset path could not be resolved' }, { status: 400 }); } const filesToUpload: { path: string; content: any }[] = []; const uploadedPaths = new Set(); const normalizeRepoPath = (value: string) => value.replace(/\\/g, '/').replace(/^\/+/, ''); const addUploadContent = (repoFilePath: string, content: Blob) => { const normalizedRepoPath = normalizeRepoPath(repoFilePath); if (!normalizedRepoPath || uploadedPaths.has(normalizedRepoPath)) { return; } uploadedPaths.add(normalizedRepoPath); filesToUpload.push({ path: normalizedRepoPath, content }); }; const addUploadFile = async (absolutePath: string, repoFilePath: string) => { const buffer = await readFile(absolutePath); const blob = new Blob([buffer]); addUploadContent(repoFilePath, blob); }; const walkDirectory = async (basePath: string, repoPrefix: string) => { const entries = await readdir(basePath, { withFileTypes: true }); for (const entry of entries) { const entryPath = path.join(basePath, entry.name); if (entry.isDirectory()) { const nextPrefix = repoPrefix ? `${repoPrefix}/${entry.name}` : entry.name; await walkDirectory(entryPath, nextPrefix); } else if (entry.isFile()) { const repoFilePath = repoPrefix ? `${repoPrefix}/${entry.name}` : entry.name; await addUploadFile(entryPath, repoFilePath); } } }; const processArtifact = async (localPath: string, repoPath: string) => { const resolvedPath = path.resolve(localPath); let stats; try { stats = await stat(resolvedPath); } catch { throw new Error(`Dataset path does not exist: ${localPath}`); } const normalizedRepoPrefix = repoPath ? normalizeRepoPath(repoPath) : ''; if (stats.isDirectory()) { await walkDirectory(resolvedPath, normalizedRepoPrefix); } else if (stats.isFile()) { let destination = normalizedRepoPrefix; if (!destination || destination.endsWith('/')) { destination = `${destination}${path.basename(resolvedPath)}`; } else if (!path.posix.extname(destination)) { destination = `${destination}/${path.basename(resolvedPath)}`; } await addUploadFile(resolvedPath, destination); } else { throw new Error(`Unsupported artifact type for path: ${localPath}`); } }; if (hasStructuredArtifacts) { for (const artifact of structuredArtifacts) { if (!artifact?.localPath || !artifact?.repoPath) { continue; } await processArtifact(artifact.localPath, artifact.repoPath); } } else { const resolvedDatasetPath = path.resolve(datasetPath); let datasetStats; try { datasetStats = await stat(resolvedDatasetPath); } catch { return NextResponse.json({ error: 'Dataset path does not exist' }, { status: 400 }); } if (!datasetStats.isDirectory()) { return NextResponse.json({ error: 'Dataset path must be a directory' }, { status: 400 }); } await walkDirectory(resolvedDatasetPath, ''); } if (manifest) { const manifestBlob = new Blob([ JSON.stringify(manifest, null, 2) ], { type: 'application/json' }); addUploadContent('manifest.json', manifestBlob); } if (filesToUpload.length === 0) { return NextResponse.json({ error: 'No files found to upload for dataset' }, { status: 400 }); } await uploadFiles({ repo: repoId, accessToken: token, files: filesToUpload, }); return NextResponse.json({ success: true, repoId }); } catch (error: any) { console.error('Upload error:', error); return NextResponse.json({ error: error.message || 'Failed to upload dataset' }, { status: 500 }); } case 'listFiles': try { if (!datasetPath) { return NextResponse.json({ error: 'Dataset path required' }, { status: 400 }); } const files = await readdir(datasetPath, { withFileTypes: true }); const imageExtensions = ['.jpg', '.jpeg', '.png', '.webp', '.bmp']; const imageFiles = files .filter(file => file.isFile()) .filter(file => imageExtensions.some(ext => file.name.toLowerCase().endsWith(ext))) .map(file => ({ name: file.name, path: path.join(datasetPath, file.name), })); const captionFiles = files .filter(file => file.isFile()) .filter(file => file.name.endsWith('.txt')) .map(file => ({ name: file.name, path: path.join(datasetPath, file.name), })); return NextResponse.json({ images: imageFiles, captions: captionFiles, total: imageFiles.length }); } catch (error: any) { return NextResponse.json({ error: error.message || 'Failed to list files' }, { status: 500 }); } case 'validateDataset': try { if (!datasetId) { return NextResponse.json({ error: 'Dataset ID required' }, { status: 400 }); } // Try to get dataset info to validate it exists and is accessible const dataset = await datasetInfo({ name: datasetId, accessToken: token, }); return NextResponse.json({ exists: true, dataset: { id: dataset.id, author: dataset.author, downloads: dataset.downloads, likes: dataset.likes, private: dataset.private, } }); } catch (error: any) { if (error.message?.includes('404') || error.message?.includes('not found')) { return NextResponse.json({ exists: false }, { status: 200 }); } if (error.message?.includes('401') || error.message?.includes('403')) { return NextResponse.json({ error: 'Dataset not accessible with current token' }, { status: 403 }); } return NextResponse.json({ error: error.message || 'Failed to validate dataset' }, { status: 500 }); } default: return NextResponse.json({ error: 'Invalid action' }, { status: 400 }); } } catch (error: any) { console.error('HF Hub API error:', error); return NextResponse.json({ error: error.message || 'Internal server error' }, { status: 500 }); } }