Spaces:

venni16
/

material-summeraizer

Running

App Files Files Community

material-summeraizer / app.py

venni16

Update app.py

8c70e7b verified 1 day ago

raw

history blame contribute delete

7.42 kB

	from fastapi import FastAPI, UploadFile, File, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	import warnings
	warnings.filterwarnings("ignore", message="._pytree_node.")
	import uvicorn
	import os
	import tempfile
	import aiofiles
	from datetime import datetime
	import traceback
	import logging
	from typing import List, Optional
	import time
	from fastapi.responses import JSONResponse

	# Setup logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	app = FastAPI(title="Material Summarizer API")

	from dotenv import load_dotenv
	load_dotenv()

	# Get URLs from environment
	FRONTEND_URL = os.getenv('FRONTEND_URL')
	BACKEND_URL = os.getenv('BACKEND_URL', 'http://localhost:5000')

	# CORS middleware
	app.add_middleware(
	CORSMiddleware,
	allow_origins=[FRONTEND_URL, BACKEND_URL], # Adjust in production
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	max_age=600,
	)

	# Import processing functions
	try:
	from document_parser import parse_document
	from summarizer import summarize_text
	from utils import chunked_summarize
	DEPENDENCIES_LOADED = True
	logger.info("All AI dependencies loaded successfully")
	except ImportError as e:
	logger.error(f"Import error: {e}")
	DEPENDENCIES_LOADED = False

	@app.on_event("startup")
	async def startup_event():
	"""Handle startup events"""
	logger.info("Application startup initiated")
	# Load model on startup to avoid cold start delays
	try:
	from summarizer import get_summarizer
	get_summarizer() # Pre-load the model
	logger.info("Models pre-loaded successfully")
	except Exception as e:
	logger.warning(f"Model pre-loading failed: {e}")

	@app.get("/")
	async def root():
	return {"message": "Material Summarizer API", "status": "running"}

	@app.get("/health")
	async def health_check():
	"""Health check endpoint specifically for Hugging Face Spaces"""
	status = "healthy" if DEPENDENCIES_LOADED else "missing_dependencies"
	return JSONResponse(
	content={
	"status": status,
	"service": "material-summarizer",
	"dependencies_loaded": DEPENDENCIES_LOADED,
	"timestamp": time.time()
	},
	status_code=200 if DEPENDENCIES_LOADED else 503
	)


	@app.get("/ping")
	async def ping():
	"""Simple ping endpoint for load balancers"""
	return JSONResponse(
	content={"status": "ok", "timestamp": time.time()},
	status_code=200
	)


	@app.post("/summarize-document")
	async def summarize_document(
	file: UploadFile = File(...),
	max_summary_length: Optional[int] = 1000,
	chunk_size: Optional[int] = 1500
	):
	"""
	Summarize uploaded document (PDF, DOCX, TXT, etc.)
	"""
	if not DEPENDENCIES_LOADED:
	raise HTTPException(
	status_code=500,
	detail="Required AI dependencies not loaded. Check server logs."
	)

	temp_file_path = None

	try:
	# Validate file type
	allowed_extensions = {'.pdf', '.docx', '.doc', '.txt', '.pptx', '.ppt'}
	file_extension = os.path.splitext(file.filename)[1].lower()

	if file_extension not in allowed_extensions:
	raise HTTPException(
	status_code=400,
	detail=f"Unsupported document format. Allowed: {', '.join(allowed_extensions)}"
	)

	# Create temporary file
	temp_file_path = f"temp_{file.filename}"

	# Save uploaded file
	logger.info(f"Saving uploaded file: {file.filename}")
	async with aiofiles.open(temp_file_path, 'wb') as out_file:
	content = await file.read()
	await out_file.write(content)

	start_time = datetime.now()

	# 1. Parse document
	logger.info("Step 1: Parsing document...")
	if not os.path.exists(temp_file_path):
	raise HTTPException(status_code=500, detail="Document file not found after upload")

	document_text = parse_document(temp_file_path, file_extension)
	logger.info(f"Extracted text length: {len(document_text)} characters")

	if not document_text or len(document_text.strip()) < 10:
	raise HTTPException(status_code=500, detail="Document parsing failed or content too short")

	# 2. Summarize text with chunking
	logger.info("Step 2: Generating summary...")

	def custom_summarize_func(text):
	return summarize_text(
	text,
	model_name="facebook/bart-large-cnn",
	max_length=max_summary_length,
	min_length=min(100, max_summary_length // 3)
	)

	final_summary = chunked_summarize(
	text=document_text,
	summarize_func=custom_summarize_func,
	max_chunk_size=chunk_size
	)

	if not final_summary or len(final_summary.strip()) < 10:
	raise HTTPException(status_code=500, detail="Summary generation failed")

	processing_time = (datetime.now() - start_time).total_seconds()

	logger.info(f"Summarization completed in {processing_time:.2f} seconds")

	return {
	"success": True,
	"summary": final_summary,
	"original_length": len(document_text),
	"summary_length": len(final_summary),
	"processing_time": processing_time,
	"file_type": file_extension
	}

	except HTTPException:
	raise
	except Exception as e:
	logger.error(f"Error processing document: {str(e)}")
	logger.error(traceback.format_exc())
	raise HTTPException(
	status_code=500,
	detail=f"Document processing failed: {str(e)}"
	)
	finally:
	# Cleanup temporary files
	try:
	if temp_file_path and os.path.exists(temp_file_path):
	os.remove(temp_file_path)
	logger.info(f"Cleaned up: {temp_file_path}")
	except Exception as cleanup_error:
	logger.error(f"Cleanup error: {cleanup_error}")

	@app.post("/batch-summarize")
	async def batch_summarize_documents(files: List[UploadFile] = File(...)):
	"""
	Summarize multiple documents in batch
	"""
	if not DEPENDENCIES_LOADED:
	raise HTTPException(
	status_code=500,
	detail="Required AI dependencies not loaded. Check server logs."
	)

	results = []

	for file in files:
	try:
	# Use the single document summarization function
	result = await summarize_document(file)
	result["filename"] = file.filename
	results.append(result)
	except Exception as e:
	results.append({
	"success": False,
	"filename": file.filename,
	"error": str(e)
	})

	return {
	"success": True,
	"processed_files": len(results),
	"results": results
	}

	if __name__ == "__main__":
	logger.info("Starting Material Summarizer Server...")
	logger.info("Dependencies loaded: %s", DEPENDENCIES_LOADED)

	if not DEPENDENCIES_LOADED:
	logger.error("CRITICAL: AI dependencies not loaded. Document processing will not work!")

	port = int(os.environ.get("MATERIAL_PORT", 7860))
	uvicorn.run(
	"app:app",
	host="0.0.0.0",
	port=port,
	reload=False
	)