Spaces:

heerjtdev
/

yolo_layoutlm

Running

App Files Files Community

yolo_layoutlm / app.py

aagamjtdev

app.py

bbc2086 29 days ago

raw

history blame

4.99 kB

	import gradio as gr
	import json
	import os
	import tempfile
	from pathlib import Path

	# Import your pipeline function
	from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH


	def process_pdf(pdf_file, layoutlmv3_model_path=None):
	"""
	Wrapper function for Gradio interface.

	Args:
	pdf_file: Gradio UploadButton file object
	layoutlmv3_model_path: Optional custom model path

	Returns:
	Tuple of (JSON string, download file path)
	"""
	if pdf_file is None:
	return "❌ Error: No PDF file uploaded.", None

	# Use default model path if not provided
	if not layoutlmv3_model_path:
	layoutlmv3_model_path = DEFAULT_LAYOUTLMV3_MODEL_PATH

	# Verify model and weights exist
	if not os.path.exists(layoutlmv3_model_path):
	return f"❌ Error: LayoutLMv3 model not found at {layoutlmv3_model_path}", None

	if not os.path.exists(WEIGHTS_PATH):
	return f"❌ Error: YOLO weights not found at {WEIGHTS_PATH}", None

	try:
	# Get the uploaded PDF path
	pdf_path = pdf_file.name

	# Run the pipeline
	result = run_document_pipeline(pdf_path, layoutlmv3_model_path)

	if result is None:
	return "❌ Error: Pipeline failed to process the PDF. Check console for details.", None

	# Create a temporary file for download
	output_filename = f"{Path(pdf_path).stem}_analysis.json"
	temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')

	with open(temp_output.name, 'w', encoding='utf-8') as f:
	json.dump(result, f, indent=2, ensure_ascii=False)

	# Format JSON for display
	json_display = json.dumps(result, indent=2, ensure_ascii=False)

	# Truncate if too long for display
	if len(json_display) > 50000:
	json_display = json_display[:50000] + "\n\n... (truncated for display, download full file)"

	return json_display, temp_output.name

	except Exception as e:
	return f"❌ Error during processing: {str(e)}", None


	# Create Gradio interface
	with gr.Blocks(title="Document Analysis Pipeline", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 📄 Document Analysis Pipeline

	Upload a PDF document to extract structured data including questions, options, answers, passages, and embedded images.

	Pipeline Steps:
	1. 🔍 YOLO/OCR Preprocessing (word extraction + figure/equation detection)
	2. 🤖 LayoutLMv3 Inference (BIO tagging)
	3. 📊 Structured JSON Decoding
	4. 🖼️ Base64 Image Embedding
	""")

	with gr.Row():
	with gr.Column(scale=1):
	pdf_input = gr.File(
	label="Upload PDF Document",
	file_types=[".pdf"],
	type="filepath"
	)

	model_path_input = gr.Textbox(
	label="LayoutLMv3 Model Path (optional)",
	placeholder=DEFAULT_LAYOUTLMV3_MODEL_PATH,
	value=DEFAULT_LAYOUTLMV3_MODEL_PATH,
	interactive=True
	)

	process_btn = gr.Button("🚀 Process Document", variant="primary", size="lg")

	gr.Markdown("""
	### ℹ️ Notes:
	- Processing may take several minutes depending on PDF size
	- Figures and equations will be extracted and embedded as Base64
	- The output JSON includes structured questions, options, and answers
	""")

	with gr.Column(scale=2):
	json_output = gr.Code(
	label="Structured JSON Output",
	language="json",
	lines=25
	)

	download_output = gr.File(
	label="Download Full JSON",
	interactive=False
	)

	# Status/Examples section
	with gr.Row():
	gr.Markdown("""
	### 📋 Output Format
	The pipeline generates JSON with the following structure:
	- Questions: Extracted question text
	- Options: Multiple choice options (A, B, C, D, etc.)
	- Answers: Correct answer(s)
	- Passages: Associated reading passages
	- Images: Base64-encoded figures and equations (embedded with keys like `figure1`, `equation2`)
	""")

	# Connect the button to the processing function
	process_btn.click(
	fn=process_pdf,
	inputs=[pdf_input, model_path_input],
	outputs=[json_output, download_output],
	api_name="process_document"
	)

	# Example section (optional - add example PDFs if available)
	# gr.Examples(
	# examples=[
	# ["examples/sample1.pdf"],
	# ["examples/sample2.pdf"],
	# ],
	# inputs=pdf_input,
	# )

	# Launch the app
	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	show_error=True
	)