import { NextRequest, NextResponse } from 'next/server'; import mammoth from 'mammoth'; import ExcelJS from 'exceljs'; import fs from 'fs'; import path from 'path'; // Use /data for Hugging Face Spaces persistent storage const DATA_DIR = process.env.SPACE_ID ? '/data' : path.join(process.cwd(), 'public', 'data'); const PUBLIC_DIR = path.join(DATA_DIR, 'public'); export async function POST(request: NextRequest) { try { const body = await request.json(); const { fileName, isPublic = false, operation = 'read', key } = body; if (!fileName) { return NextResponse.json( { success: false, error: 'File name is required' }, { status: 400 } ); } let targetDir = PUBLIC_DIR; if (!isPublic) { if (!key) { return NextResponse.json( { success: false, error: 'Passkey (key) is required for non-public files' }, { status: 401 } ); } const sanitizedKey = key.replace(/[^a-zA-Z0-9_-]/g, ''); targetDir = path.join(DATA_DIR, sanitizedKey); } // Get file buffer const filePath = path.join(targetDir, fileName); if (!fs.existsSync(filePath)) { return NextResponse.json( { success: false, error: 'File not found' }, { status: 404 } ); } const fileBuffer = fs.readFileSync(filePath); const ext = fileName.split('.').pop()?.toLowerCase(); let content: any = {}; switch (ext) { case 'docx': try { const result = await mammoth.extractRawText({ buffer: fileBuffer }); content = { type: 'docx', text: result.value, messages: result.messages }; const htmlResult = await mammoth.convertToHtml({ buffer: fileBuffer }); content.html = htmlResult.value; } catch (error) { content = { type: 'docx', error: 'Failed to process Word document', details: error }; } break; case 'xlsx': case 'xls': try { const workbook = new ExcelJS.Workbook(); await workbook.xlsx.load(fileBuffer as any); const sheets: any[] = []; workbook.eachSheet((worksheet) => { const sheetData: any = { name: worksheet.name, rowCount: worksheet.rowCount, columnCount: worksheet.columnCount, data: [] }; worksheet.eachRow((row, rowNumber) => { const rowData: any[] = []; row.eachCell((cell, colNumber) => { rowData.push({ value: cell.value, type: cell.type, formula: cell.formula }); }); sheetData.data.push(rowData); }); sheets.push(sheetData); }); content = { type: 'excel', sheets, sheetCount: sheets.length }; } catch (error) { content = { type: 'excel', error: 'Failed to process Excel spreadsheet', details: error }; } break; case 'pdf': try { const pdf = require('pdf-parse'); const data = await pdf(fileBuffer); content = { type: 'pdf', text: data.text, info: data.info, metadata: data.metadata, version: data.version, numpages: data.numpages }; } catch (error) { content = { type: 'pdf', error: 'Failed to process PDF document', details: error }; } break; case 'pptx': case 'ppt': content = { type: 'powerpoint', fileName, size: fileBuffer.length, message: 'PowerPoint processing requires additional libraries' }; break; case 'txt': case 'md': case 'json': case 'csv': content = { type: ext, text: fileBuffer.toString('utf-8') }; break; default: content = { type: 'unknown', fileName, size: fileBuffer.length, message: 'Unknown file type' }; } if (operation === 'analyze' && content.text) { const text = content.text || ''; content.analysis = { characterCount: text.length, wordCount: text.split(/\s+/).filter(Boolean).length, lineCount: text.split('\n').length, paragraphCount: text.split('\n\n').filter(Boolean).length }; } return NextResponse.json({ success: true, fileName, operation, content }); } catch (error) { console.error('Error processing document:', error); return NextResponse.json( { success: false, error: 'Failed to process document' }, { status: 500 } ); } } export async function GET() { return NextResponse.json({ message: 'Document processing endpoint', endpoint: '/api/documents/process', method: 'POST', body: { fileName: 'Name of the file to process', isPublic: 'true/false - whether file is in public folder', key: 'Passkey for secure storage (required if not public)', operation: 'Operation to perform: read (default), analyze' }, supportedFormats: [ 'docx - Word documents (text extraction)', 'xlsx/xls - Excel spreadsheets (data extraction)', 'pdf - PDF files (metadata only)', 'pptx/ppt - PowerPoint (metadata only)', 'txt/md/json/csv - Text files (full content)' ] }); }