import gradio as gr import json import os import tempfile from pathlib import Path # NOTE: You must ensure that 'working_yolo_pipeline.py' exists # and defines the following items correctly: from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH # Since I don't have this file, I am assuming the imports are correct. # Define placeholders for assumed constants if the pipeline file isn't present # You should replace these with your actual definitions if they are missing try: from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH except ImportError: print("Warning: 'working_yolo_pipeline.py' not found. Using dummy paths.") def run_document_pipeline(*args): return {"error": "Placeholder pipeline function called."} DEFAULT_LAYOUTLMV3_MODEL_PATH = "./models/layoutlmv3_model" WEIGHTS_PATH = "./weights/yolo_weights.pt" def process_pdf(pdf_file, layoutlmv3_model_path=None): """ Wrapper function for Gradio interface. Args: pdf_file: Gradio UploadButton file object layoutlmv3_model_path: Optional custom model path Returns: Tuple of (JSON string, download file path) """ if pdf_file is None: return "❌ Error: No PDF file uploaded.", None # Use default model path if not provided if not layoutlmv3_model_path: layoutlmv3_model_path = DEFAULT_LAYOUTLMV3_MODEL_PATH # Verify model and weights exist if not os.path.exists(layoutlmv3_model_path): return f"❌ Error: LayoutLMv3 model not found at {layoutlmv3_model_path}", None if not os.path.exists(WEIGHTS_PATH): return f"❌ Error: YOLO weights not found at {WEIGHTS_PATH}", None try: # Get the uploaded PDF path pdf_path = pdf_file.name # Run the pipeline result = run_document_pipeline(pdf_path, layoutlmv3_model_path, 'label_studio_import.json') if result is None: return "❌ Error: Pipeline failed to process the PDF. Check console for details.", None # Create a temporary file for download output_filename = f"{Path(pdf_path).stem}_analysis.json" temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_') # Dump results to the temporary file with open(temp_output.name, 'w', encoding='utf-8') as f: json.dump(result, f, indent=2, ensure_ascii=False) # Format JSON for display json_display = json.dumps(result, indent=2, ensure_ascii=False) return json_display, temp_output.name except Exception as e: return f"❌ Error during processing: {str(e)}", None # Create Gradio interface # FIX APPLIED: Removed 'theme=gr.themes.Soft()' which caused the TypeError with gr.Blocks(title="Document Analysis Pipeline") as demo: gr.Markdown(""" # 📄 Document Analysis Pipeline Upload a PDF document to extract structured data including questions, options, answers, passages, and embedded images. **Pipeline Steps:** 1. 🔍 YOLO/OCR Preprocessing (word extraction + figure/equation detection) 2. 🤖 LayoutLMv3 Inference (BIO tagging) 3. 📊 Structured JSON Decoding 4. 🖼️ Base64 Image Embedding """) with gr.Row(): with gr.Column(scale=1): pdf_input = gr.File( label="Upload PDF Document", file_types=[".pdf"], type="filepath" ) model_path_input = gr.Textbox( label="LayoutLMv3 Model Path (optional)", placeholder=DEFAULT_LAYOUTLMV3_MODEL_PATH, value=DEFAULT_LAYOUTLMV3_MODEL_PATH, interactive=True ) process_btn = gr.Button("🚀 Process Document", variant="primary", size="lg") gr.Markdown(""" ### ℹ️ Notes: - Processing may take several minutes depending on PDF size - Figures and equations will be extracted and embedded as Base64 - The output JSON includes structured questions, options, and answers """) with gr.Column(scale=2): json_output = gr.Code( label="Structured JSON Output", language="json", lines=25 ) download_output = gr.File( label="Download Full JSON", interactive=False ) # Status/Examples section with gr.Row(): gr.Markdown(""" ### 📋 Output Format The pipeline generates JSON with the following structure: - **Questions**: Extracted question text - **Options**: Multiple choice options (A, B, C, D, etc.) - **Answers**: Correct answer(s) - **Passages**: Associated reading passages - **Images**: Base64-encoded figures and equations (embedded with keys like `figure1`, `equation2`) """) # Connect the button to the processing function process_btn.click( fn=process_pdf, inputs=[pdf_input, model_path_input], outputs=[json_output, download_output], api_name="process_document" ) # Example section (optional - add example PDFs if available) # gr.Examples( # examples=[ # ["examples/sample1.pdf"], # ["examples/sample2.pdf"], # ], # inputs=pdf_input, # ) # Launch the app if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=False, show_error=True )