Spaces:

heerjtdev
/

yolo_layoutlm

Running

App Files Files Community

aagamjtdev commited on 27 days ago

Commit

bbc2086

1 Parent(s): ed09d3b

app.py

Browse files

Files changed (1) hide show

app.py +152 -0

app.py ADDED Viewed

	@@ -0,0 +1,152 @@

+import gradio as gr
+import json
+import os
+import tempfile
+from pathlib import Path
+# Import your pipeline function
+from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
+def process_pdf(pdf_file, layoutlmv3_model_path=None):
+    """
+    Wrapper function for Gradio interface.
+    Args:
+        pdf_file: Gradio UploadButton file object
+        layoutlmv3_model_path: Optional custom model path
+    Returns:
+        Tuple of (JSON string, download file path)
+    """
+    if pdf_file is None:
+        return "❌ Error: No PDF file uploaded.", None
+    # Use default model path if not provided
+    if not layoutlmv3_model_path:
+        layoutlmv3_model_path = DEFAULT_LAYOUTLMV3_MODEL_PATH
+    # Verify model and weights exist
+    if not os.path.exists(layoutlmv3_model_path):
+        return f"❌ Error: LayoutLMv3 model not found at {layoutlmv3_model_path}", None
+    if not os.path.exists(WEIGHTS_PATH):
+        return f"❌ Error: YOLO weights not found at {WEIGHTS_PATH}", None
+    try:
+        # Get the uploaded PDF path
+        pdf_path = pdf_file.name
+        # Run the pipeline
+        result = run_document_pipeline(pdf_path, layoutlmv3_model_path)
+        if result is None:
+            return "❌ Error: Pipeline failed to process the PDF. Check console for details.", None
+        # Create a temporary file for download
+        output_filename = f"{Path(pdf_path).stem}_analysis.json"
+        temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')
+        with open(temp_output.name, 'w', encoding='utf-8') as f:
+            json.dump(result, f, indent=2, ensure_ascii=False)
+        # Format JSON for display
+        json_display = json.dumps(result, indent=2, ensure_ascii=False)
+        # Truncate if too long for display
+        if len(json_display) > 50000:
+            json_display = json_display[:50000] + "\n\n... (truncated for display, download full file)"
+        return json_display, temp_output.name
+    except Exception as e:
+        return f"❌ Error during processing: {str(e)}", None
+# Create Gradio interface
+with gr.Blocks(title="Document Analysis Pipeline", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 📄 Document Analysis Pipeline
+    Upload a PDF document to extract structured data including questions, options, answers, passages, and embedded images.
+    **Pipeline Steps:**
+    1. 🔍 YOLO/OCR Preprocessing (word extraction + figure/equation detection)
+    2. 🤖 LayoutLMv3 Inference (BIO tagging)
+    3. 📊 Structured JSON Decoding
+    4. 🖼️ Base64 Image Embedding
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            pdf_input = gr.File(
+                label="Upload PDF Document",
+                file_types=[".pdf"],
+                type="filepath"
+            )
+            model_path_input = gr.Textbox(
+                label="LayoutLMv3 Model Path (optional)",
+                placeholder=DEFAULT_LAYOUTLMV3_MODEL_PATH,
+                value=DEFAULT_LAYOUTLMV3_MODEL_PATH,
+                interactive=True
+            )
+            process_btn = gr.Button("🚀 Process Document", variant="primary", size="lg")
+            gr.Markdown("""
+            ### ℹ️ Notes:
+            - Processing may take several minutes depending on PDF size
+            - Figures and equations will be extracted and embedded as Base64
+            - The output JSON includes structured questions, options, and answers
+            """)
+        with gr.Column(scale=2):
+            json_output = gr.Code(
+                label="Structured JSON Output",
+                language="json",
+                lines=25
+            )
+            download_output = gr.File(
+                label="Download Full JSON",
+                interactive=False
+            )
+    # Status/Examples section
+    with gr.Row():
+        gr.Markdown("""
+        ### 📋 Output Format
+        The pipeline generates JSON with the following structure:
+        - **Questions**: Extracted question text
+        - **Options**: Multiple choice options (A, B, C, D, etc.)
+        - **Answers**: Correct answer(s)
+        - **Passages**: Associated reading passages
+        - **Images**: Base64-encoded figures and equations (embedded with keys like `figure1`, `equation2`)
+        """)
+    # Connect the button to the processing function
+    process_btn.click(
+        fn=process_pdf,
+        inputs=[pdf_input, model_path_input],
+        outputs=[json_output, download_output],
+        api_name="process_document"
+    )
+    # Example section (optional - add example PDFs if available)
+    # gr.Examples(
+    #     examples=[
+    #         ["examples/sample1.pdf"],
+    #         ["examples/sample2.pdf"],
+    #     ],
+    #     inputs=pdf_input,
+    # )
+# Launch the app
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True
+    )