File size: 12,069 Bytes
474279e
 
 
 
 
32285b7
 
e035f6a
32285b7
 
474279e
32285b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143de80
 
32285b7
 
 
474279e
32285b7
 
 
143de80
 
474279e
143de80
 
474279e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e035f6a
 
474279e
 
 
e035f6a
 
474279e
 
 
 
 
 
32285b7
474279e
143de80
 
 
474279e
0cc96dd
143de80
 
 
474279e
143de80
474279e
 
 
143de80
474279e
 
 
143de80
 
474279e
 
 
 
 
 
 
 
 
143de80
 
474279e
0cc96dd
474279e
 
143de80
 
474279e
 
143de80
 
 
474279e
0cc96dd
474279e
143de80
0cc96dd
 
 
474279e
0cc96dd
 
 
143de80
0cc96dd
474279e
 
 
 
 
 
 
e035f6a
 
 
32285b7
 
 
 
 
143de80
 
 
32285b7
 
 
0cc96dd
 
32285b7
 
 
 
 
 
 
 
 
474279e
32285b7
474279e
32285b7
474279e
 
 
32285b7
474279e
 
 
 
 
 
 
 
 
 
 
 
 
e035f6a
 
 
474279e
e035f6a
 
 
474279e
 
e035f6a
 
 
 
 
32285b7
 
 
 
 
 
 
 
 
 
143de80
 
32285b7
 
 
 
 
 
 
 
 
 
e035f6a
 
 
474279e
e035f6a
 
 
 
 
 
 
 
 
 
32285b7
 
 
 
 
 
 
 
 
 
 
 
143de80
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
"""
Form Filler Server - COMPLETE FIXED VERSION
Handles DOCX, XLSX, and text form templates with robust field matching
"""

from typing import Dict, Any
import json
from pathlib import Path
from docx import Document
from openpyxl import load_workbook
import re


async def fill_form(template_path: str, data: Dict[str, Any]) -> dict:
    """
    Auto-fill form template with extracted data
    
    Args:
        template_path: Path to form template (DOCX or XLSX)
        data: Dictionary of field values
        
    Returns:
        Dict with filled form path and fields filled
    """
    try:
        file_ext = Path(template_path).suffix.lower()
        output_path = template_path.replace(file_ext, f'_filled{file_ext}')
        
        if file_ext == '.docx':
            return await fill_docx_form(template_path, data, output_path)
        elif file_ext in ['.xlsx', '.xls']:
            return await fill_excel_form(template_path, data, output_path)
        else:
            # Generic text replacement
            return await fill_text_form(template_path, data, output_path)
            
    except Exception as e:
        import traceback
        return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False}


async def fill_docx_form(template_path: str, data: Dict[str, Any], output_path: str) -> dict:
    """Fill DOCX form template - COMPLETE FIXED VERSION with robust matching"""
    try:
        doc = Document(template_path)
        fields_filled = []
        debug_info = []
        
        # Normalize data keys for comprehensive matching
        normalized_data = {}
        for key, value in data.items():
            # Create multiple normalized versions of each key
            key_variations = [
                key,                                    # Original: "First Name"
                key.lower(),                           # Lowercase: "first name"
                key.lower().replace(" ", ""),          # No spaces: "firstname"
                key.lower().replace(" ", "_"),         # Underscores: "first_name"
                key.replace(" ", ""),                  # No spaces original case: "FirstName"
                key.replace(" ", "_"),                 # Underscores original case: "First_Name"
            ]
            
            for variation in key_variations:
                normalized_data[variation] = value
        
        # Process all paragraphs
        for para_idx, paragraph in enumerate(doc.paragraphs):
            original_text = paragraph.text
            modified = False
            
            # Try to find and replace fields in paragraphs
            for key, value in data.items():
                # Check for various formats
                patterns = [
                    (f"{key}:", f"{key}: {value}"),           # "First Name:" -> "First Name: John"
                    (f"{key} :", f"{key}: {value}"),          # "First Name :" -> "First Name: John"
                    (f"{key} _", f"{key}: {value}"),          # "First Name _" -> "First Name: John"
                    (f"{key}__", f"{key}: {value}"),          # "First Name__" -> "First Name: John"
                ]
                
                for pattern, replacement in patterns:
                    if pattern in paragraph.text:
                        paragraph.text = paragraph.text.replace(pattern, replacement)
                        if key not in fields_filled:
                            fields_filled.append(key)
                            debug_info.append(f"Filled '{key}' in paragraph {para_idx}")
                        modified = True
                        break
                
                if modified:
                    break
        
        # Process all tables (primary method for form templates)
        for table_idx, table in enumerate(doc.tables):
            for row_idx, row in enumerate(table.rows):
                for cell_idx, cell in enumerate(row.cells):
                    cell_text = cell.text.strip()
                    
                    if not cell_text:
                        continue
                    
                    # Method 1: Direct label matching with colon
                    for original_key, value in data.items():
                        # Create normalized version of cell text
                        cell_normalized = cell_text.lower().replace(":", "").strip()
                        key_normalized = original_key.lower().replace(":", "").strip()
                        
                        # Check if cell contains the field label
                        if key_normalized == cell_normalized or cell_normalized.startswith(key_normalized):
                            # Check if value is already filled
                            if ":" in cell.text:
                                parts = cell.text.split(":", 1)
                                if len(parts) == 2 and not parts[1].strip():
                                    # Empty after colon, fill it
                                    cell.text = f"{parts[0]}: {value}"
                                    if original_key not in fields_filled:
                                        fields_filled.append(original_key)
                                        debug_info.append(f"Filled '{original_key}' in table {table_idx}, row {row_idx}, cell {cell_idx}")
                            else:
                                # No colon, add it
                                cell.text = f"{cell.text}: {value}"
                                if original_key not in fields_filled:
                                    fields_filled.append(original_key)
                                    debug_info.append(f"Filled '{original_key}' in table {table_idx}, row {row_idx}, cell {cell_idx}")
                        
                        # Method 2: Check next cell in same row
                        elif key_normalized in cell_normalized:
                            if cell_idx + 1 < len(row.cells):
                                next_cell = row.cells[cell_idx + 1]
                                # Only fill if next cell is empty or has placeholder
                                if not next_cell.text.strip() or next_cell.text.strip() in ["_", "__", "___", ""]:
                                    next_cell.text = str(value)
                                    if original_key not in fields_filled:
                                        fields_filled.append(original_key)
                                        debug_info.append(f"Filled '{original_key}' in next cell of table {table_idx}, row {row_idx}")
                    
                    # Method 3: Check for placeholder patterns in cells
                    for key, value in normalized_data.items():
                        placeholders = [
                            f"{{{{{key}}}}}",
                            f"{{{key}}}",
                            f"[{key}]",
                        ]
                        
                        for placeholder in placeholders:
                            if placeholder.lower() in cell.text.lower():
                                cell.text = cell.text.replace(placeholder, str(value))
                                # Find original key
                                original_key = next((k for k in data.keys() 
                                                   if k.lower().replace(" ", "") == key.lower().replace(" ", "")), 
                                                  key)
                                if original_key not in fields_filled:
                                    fields_filled.append(original_key)
                                    debug_info.append(f"Filled placeholder '{placeholder}' in table {table_idx}")
        
        # Ensure output directory exists
        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
        doc.save(output_path)
        
        return {
            'success': True,
            'output_path': output_path,
            'fields_filled': list(set(fields_filled)),
            'total_fields': len(set(fields_filled)),
            'debug_info': debug_info
        }
        
    except Exception as e:
        import traceback
        return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False}


async def fill_excel_form(template_path: str, data: Dict[str, Any], output_path: str) -> dict:
    """Fill Excel form template"""
    try:
        wb = load_workbook(template_path)
        ws = wb.active
        fields_filled = []
        
        # Iterate through all cells
        for row in ws.iter_rows():
            for cell_idx, cell in enumerate(row):
                if cell.value and isinstance(cell.value, str):
                    cell_text = str(cell.value).strip()
                    
                    # Check each field
                    for key, value in data.items():
                        key_normalized = key.lower().replace(":", "").strip()
                        cell_normalized = cell_text.lower().replace(":", "").strip()
                        
                        # If cell contains field label
                        if key_normalized == cell_normalized or key_normalized in cell_normalized:
                            # Try to fill next cell
                            next_cell = row[cell_idx + 1] if cell_idx + 1 < len(row) else None
                            if next_cell and (not next_cell.value or str(next_cell.value).strip() == ""):
                                next_cell.value = str(value)
                                if key not in fields_filled:
                                    fields_filled.append(key)
                        
                        # Check for placeholders
                        placeholders = [
                            f"{{{{{key}}}}}",
                            f"{{{key}}}",
                            f"[{key}]",
                        ]
                        
                        for placeholder in placeholders:
                            if placeholder in cell_text:
                                cell.value = cell_text.replace(placeholder, str(value))
                                if key not in fields_filled:
                                    fields_filled.append(key)
        
        # Ensure output directory exists
        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
        wb.save(output_path)
        
        return {
            'success': True,
            'output_path': output_path,
            'fields_filled': list(set(fields_filled)),
            'total_fields': len(set(fields_filled))
        }
        
    except Exception as e:
        import traceback
        return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False}


async def fill_text_form(template_path: str, data: Dict[str, Any], output_path: str) -> dict:
    """Fill text-based form"""
    try:
        with open(template_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        fields_filled = []
        for key, value in data.items():
            placeholders = [
                f"{{{{{key}}}}}",
                f"{{{key}}}",
                f"[{key}]",
            ]
            
            for placeholder in placeholders:
                if placeholder in content:
                    content = content.replace(placeholder, str(value))
                    if key not in fields_filled:
                        fields_filled.append(key)
        
        # Ensure output directory exists
        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
        
        with open(output_path, 'w', encoding='utf-8') as f:
            f.write(content)
        
        return {
            'success': True,
            'output_path': output_path,
            'fields_filled': fields_filled,
            'total_fields': len(fields_filled)
        }
        
    except Exception as e:
        import traceback
        return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False}