Spaces:
Running
Running
| import { NextRequest, NextResponse } from 'next/server'; | |
| import mammoth from 'mammoth'; | |
| import ExcelJS from 'exceljs'; | |
| import fs from 'fs'; | |
| import path from 'path'; | |
| // Use /data for Hugging Face Spaces persistent storage | |
| const DATA_DIR = process.env.SPACE_ID | |
| ? '/data' | |
| : path.join(process.cwd(), 'public', 'data'); | |
| const PUBLIC_DIR = path.join(DATA_DIR, 'public'); | |
| export async function POST(request: NextRequest) { | |
| try { | |
| const body = await request.json(); | |
| const { fileName, isPublic = false, operation = 'read', key } = body; | |
| if (!fileName) { | |
| return NextResponse.json( | |
| { success: false, error: 'File name is required' }, | |
| { status: 400 } | |
| ); | |
| } | |
| let targetDir = PUBLIC_DIR; | |
| if (!isPublic) { | |
| if (!key) { | |
| return NextResponse.json( | |
| { success: false, error: 'Passkey (key) is required for non-public files' }, | |
| { status: 401 } | |
| ); | |
| } | |
| const sanitizedKey = key.replace(/[^a-zA-Z0-9_-]/g, ''); | |
| targetDir = path.join(DATA_DIR, sanitizedKey); | |
| } | |
| // Get file buffer | |
| const filePath = path.join(targetDir, fileName); | |
| if (!fs.existsSync(filePath)) { | |
| return NextResponse.json( | |
| { success: false, error: 'File not found' }, | |
| { status: 404 } | |
| ); | |
| } | |
| const fileBuffer = fs.readFileSync(filePath); | |
| const ext = fileName.split('.').pop()?.toLowerCase(); | |
| let content: any = {}; | |
| switch (ext) { | |
| case 'docx': | |
| try { | |
| const result = await mammoth.extractRawText({ buffer: fileBuffer }); | |
| content = { | |
| type: 'docx', | |
| text: result.value, | |
| messages: result.messages | |
| }; | |
| const htmlResult = await mammoth.convertToHtml({ buffer: fileBuffer }); | |
| content.html = htmlResult.value; | |
| } catch (error) { | |
| content = { | |
| type: 'docx', | |
| error: 'Failed to process Word document', | |
| details: error | |
| }; | |
| } | |
| break; | |
| case 'xlsx': | |
| case 'xls': | |
| try { | |
| const workbook = new ExcelJS.Workbook(); | |
| await workbook.xlsx.load(fileBuffer as any); | |
| const sheets: any[] = []; | |
| workbook.eachSheet((worksheet) => { | |
| const sheetData: any = { | |
| name: worksheet.name, | |
| rowCount: worksheet.rowCount, | |
| columnCount: worksheet.columnCount, | |
| data: [] | |
| }; | |
| worksheet.eachRow((row, rowNumber) => { | |
| const rowData: any[] = []; | |
| row.eachCell((cell, colNumber) => { | |
| rowData.push({ | |
| value: cell.value, | |
| type: cell.type, | |
| formula: cell.formula | |
| }); | |
| }); | |
| sheetData.data.push(rowData); | |
| }); | |
| sheets.push(sheetData); | |
| }); | |
| content = { | |
| type: 'excel', | |
| sheets, | |
| sheetCount: sheets.length | |
| }; | |
| } catch (error) { | |
| content = { | |
| type: 'excel', | |
| error: 'Failed to process Excel spreadsheet', | |
| details: error | |
| }; | |
| } | |
| break; | |
| case 'pdf': | |
| try { | |
| const pdf = require('pdf-parse'); | |
| const data = await pdf(fileBuffer); | |
| content = { | |
| type: 'pdf', | |
| text: data.text, | |
| info: data.info, | |
| metadata: data.metadata, | |
| version: data.version, | |
| numpages: data.numpages | |
| }; | |
| } catch (error) { | |
| content = { | |
| type: 'pdf', | |
| error: 'Failed to process PDF document', | |
| details: error | |
| }; | |
| } | |
| break; | |
| case 'pptx': | |
| case 'ppt': | |
| content = { | |
| type: 'powerpoint', | |
| fileName, | |
| size: fileBuffer.length, | |
| message: 'PowerPoint processing requires additional libraries' | |
| }; | |
| break; | |
| case 'txt': | |
| case 'md': | |
| case 'json': | |
| case 'csv': | |
| content = { | |
| type: ext, | |
| text: fileBuffer.toString('utf-8') | |
| }; | |
| break; | |
| default: | |
| content = { | |
| type: 'unknown', | |
| fileName, | |
| size: fileBuffer.length, | |
| message: 'Unknown file type' | |
| }; | |
| } | |
| if (operation === 'analyze' && content.text) { | |
| const text = content.text || ''; | |
| content.analysis = { | |
| characterCount: text.length, | |
| wordCount: text.split(/\s+/).filter(Boolean).length, | |
| lineCount: text.split('\n').length, | |
| paragraphCount: text.split('\n\n').filter(Boolean).length | |
| }; | |
| } | |
| return NextResponse.json({ | |
| success: true, | |
| fileName, | |
| operation, | |
| content | |
| }); | |
| } catch (error) { | |
| console.error('Error processing document:', error); | |
| return NextResponse.json( | |
| { success: false, error: 'Failed to process document' }, | |
| { status: 500 } | |
| ); | |
| } | |
| } | |
| export async function GET() { | |
| return NextResponse.json({ | |
| message: 'Document processing endpoint', | |
| endpoint: '/api/documents/process', | |
| method: 'POST', | |
| body: { | |
| fileName: 'Name of the file to process', | |
| isPublic: 'true/false - whether file is in public folder', | |
| key: 'Passkey for secure storage (required if not public)', | |
| operation: 'Operation to perform: read (default), analyze' | |
| }, | |
| supportedFormats: [ | |
| 'docx - Word documents (text extraction)', | |
| 'xlsx/xls - Excel spreadsheets (data extraction)', | |
| 'pdf - PDF files (metadata only)', | |
| 'pptx/ppt - PowerPoint (metadata only)', | |
| 'txt/md/json/csv - Text files (full content)' | |
| ] | |
| }); | |
| } |