aagamjtdev commited on
Commit
bbc2086
·
1 Parent(s): ed09d3b
Files changed (1) hide show
  1. app.py +152 -0
app.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import os
4
+ import tempfile
5
+ from pathlib import Path
6
+
7
+ # Import your pipeline function
8
+ from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
9
+
10
+
11
+ def process_pdf(pdf_file, layoutlmv3_model_path=None):
12
+ """
13
+ Wrapper function for Gradio interface.
14
+
15
+ Args:
16
+ pdf_file: Gradio UploadButton file object
17
+ layoutlmv3_model_path: Optional custom model path
18
+
19
+ Returns:
20
+ Tuple of (JSON string, download file path)
21
+ """
22
+ if pdf_file is None:
23
+ return "❌ Error: No PDF file uploaded.", None
24
+
25
+ # Use default model path if not provided
26
+ if not layoutlmv3_model_path:
27
+ layoutlmv3_model_path = DEFAULT_LAYOUTLMV3_MODEL_PATH
28
+
29
+ # Verify model and weights exist
30
+ if not os.path.exists(layoutlmv3_model_path):
31
+ return f"❌ Error: LayoutLMv3 model not found at {layoutlmv3_model_path}", None
32
+
33
+ if not os.path.exists(WEIGHTS_PATH):
34
+ return f"❌ Error: YOLO weights not found at {WEIGHTS_PATH}", None
35
+
36
+ try:
37
+ # Get the uploaded PDF path
38
+ pdf_path = pdf_file.name
39
+
40
+ # Run the pipeline
41
+ result = run_document_pipeline(pdf_path, layoutlmv3_model_path)
42
+
43
+ if result is None:
44
+ return "❌ Error: Pipeline failed to process the PDF. Check console for details.", None
45
+
46
+ # Create a temporary file for download
47
+ output_filename = f"{Path(pdf_path).stem}_analysis.json"
48
+ temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')
49
+
50
+ with open(temp_output.name, 'w', encoding='utf-8') as f:
51
+ json.dump(result, f, indent=2, ensure_ascii=False)
52
+
53
+ # Format JSON for display
54
+ json_display = json.dumps(result, indent=2, ensure_ascii=False)
55
+
56
+ # Truncate if too long for display
57
+ if len(json_display) > 50000:
58
+ json_display = json_display[:50000] + "\n\n... (truncated for display, download full file)"
59
+
60
+ return json_display, temp_output.name
61
+
62
+ except Exception as e:
63
+ return f"❌ Error during processing: {str(e)}", None
64
+
65
+
66
+ # Create Gradio interface
67
+ with gr.Blocks(title="Document Analysis Pipeline", theme=gr.themes.Soft()) as demo:
68
+ gr.Markdown("""
69
+ # 📄 Document Analysis Pipeline
70
+
71
+ Upload a PDF document to extract structured data including questions, options, answers, passages, and embedded images.
72
+
73
+ **Pipeline Steps:**
74
+ 1. 🔍 YOLO/OCR Preprocessing (word extraction + figure/equation detection)
75
+ 2. 🤖 LayoutLMv3 Inference (BIO tagging)
76
+ 3. 📊 Structured JSON Decoding
77
+ 4. 🖼️ Base64 Image Embedding
78
+ """)
79
+
80
+ with gr.Row():
81
+ with gr.Column(scale=1):
82
+ pdf_input = gr.File(
83
+ label="Upload PDF Document",
84
+ file_types=[".pdf"],
85
+ type="filepath"
86
+ )
87
+
88
+ model_path_input = gr.Textbox(
89
+ label="LayoutLMv3 Model Path (optional)",
90
+ placeholder=DEFAULT_LAYOUTLMV3_MODEL_PATH,
91
+ value=DEFAULT_LAYOUTLMV3_MODEL_PATH,
92
+ interactive=True
93
+ )
94
+
95
+ process_btn = gr.Button("🚀 Process Document", variant="primary", size="lg")
96
+
97
+ gr.Markdown("""
98
+ ### ℹ️ Notes:
99
+ - Processing may take several minutes depending on PDF size
100
+ - Figures and equations will be extracted and embedded as Base64
101
+ - The output JSON includes structured questions, options, and answers
102
+ """)
103
+
104
+ with gr.Column(scale=2):
105
+ json_output = gr.Code(
106
+ label="Structured JSON Output",
107
+ language="json",
108
+ lines=25
109
+ )
110
+
111
+ download_output = gr.File(
112
+ label="Download Full JSON",
113
+ interactive=False
114
+ )
115
+
116
+ # Status/Examples section
117
+ with gr.Row():
118
+ gr.Markdown("""
119
+ ### 📋 Output Format
120
+ The pipeline generates JSON with the following structure:
121
+ - **Questions**: Extracted question text
122
+ - **Options**: Multiple choice options (A, B, C, D, etc.)
123
+ - **Answers**: Correct answer(s)
124
+ - **Passages**: Associated reading passages
125
+ - **Images**: Base64-encoded figures and equations (embedded with keys like `figure1`, `equation2`)
126
+ """)
127
+
128
+ # Connect the button to the processing function
129
+ process_btn.click(
130
+ fn=process_pdf,
131
+ inputs=[pdf_input, model_path_input],
132
+ outputs=[json_output, download_output],
133
+ api_name="process_document"
134
+ )
135
+
136
+ # Example section (optional - add example PDFs if available)
137
+ # gr.Examples(
138
+ # examples=[
139
+ # ["examples/sample1.pdf"],
140
+ # ["examples/sample2.pdf"],
141
+ # ],
142
+ # inputs=pdf_input,
143
+ # )
144
+
145
+ # Launch the app
146
+ if __name__ == "__main__":
147
+ demo.launch(
148
+ server_name="0.0.0.0",
149
+ server_port=7860,
150
+ share=False,
151
+ show_error=True
152
+ )