Maheen001 commited on
Commit
474279e
·
verified ·
1 Parent(s): ca3501a

Update tools/form_filler_server.py

Browse files
Files changed (1) hide show
  1. tools/form_filler_server.py +98 -84
tools/form_filler_server.py CHANGED
@@ -1,8 +1,14 @@
 
 
 
 
 
1
  from typing import Dict, Any
2
  import json
3
  from pathlib import Path
4
  from docx import Document
5
  from openpyxl import load_workbook
 
6
 
7
 
8
  async def fill_form(template_path: str, data: Dict[str, Any]) -> dict:
@@ -34,100 +40,117 @@ async def fill_form(template_path: str, data: Dict[str, Any]) -> dict:
34
 
35
 
36
  async def fill_docx_form(template_path: str, data: Dict[str, Any], output_path: str) -> dict:
37
- """Fill DOCX form template - Enhanced with comprehensive matching"""
38
  try:
39
  doc = Document(template_path)
40
  fields_filled = []
41
  debug_info = []
42
 
43
- # Normalize data keys for better matching
44
  normalized_data = {}
45
  for key, value in data.items():
46
- # Store original
47
- normalized_data[key] = value
48
- # Store lowercase
49
- normalized_data[key.lower()] = value
50
- # Store with underscores
51
- normalized_data[key.lower().replace(" ", "_")] = value
52
- # Store without spaces
53
- normalized_data[key.lower().replace(" ", "")] = value
54
-
55
- # First, try to replace placeholders in paragraphs
56
- for paragraph in doc.paragraphs:
57
- for key, value in normalized_data.items():
58
- placeholders = [
59
- f"{{{{{key}}}}}", # {{field_name}}
60
- f"{{{key}}}", # {field_name}
 
 
 
 
 
 
 
 
 
 
 
61
  ]
62
 
63
- for placeholder in placeholders:
64
- if placeholder in paragraph.text:
65
- paragraph.text = paragraph.text.replace(placeholder, str(value))
66
  if key not in fields_filled:
67
  fields_filled.append(key)
 
 
 
 
 
 
68
 
69
- # Enhanced table processing
70
  for table_idx, table in enumerate(doc.tables):
71
  for row_idx, row in enumerate(table.rows):
72
- cells_text = [cell.text.strip() for cell in row.cells]
73
-
74
- # Process each cell
75
  for cell_idx, cell in enumerate(row.cells):
76
- cell_text = cell.text.strip().lower()
77
 
78
- # Skip empty cells
79
  if not cell_text:
80
  continue
81
 
82
- # Check each field
83
  for original_key, value in data.items():
84
- key_lower = original_key.lower()
85
- key_normalized = key_lower.replace(" ", "").replace("_", "")
86
- cell_normalized = cell_text.replace(" ", "").replace(":", "").replace("_", "")
87
 
88
- # Method 1: Exact match (case-insensitive)
89
- if key_lower in cell_text or cell_text in key_lower:
90
- # Fill next cell if exists and is empty
91
- if cell_idx + 1 < len(row.cells):
92
- next_cell = row.cells[cell_idx + 1]
93
- if not next_cell.text.strip() or len(next_cell.text.strip()) < 3:
94
- next_cell.text = str(value)
95
- if original_key not in fields_filled:
96
- fields_filled.append(original_key)
97
- debug_info.append(f"Filled '{original_key}' in cell after '{cell.text.strip()}'")
98
-
99
- # Method 2: Fill after colon in same cell
100
  if ":" in cell.text:
101
  parts = cell.text.split(":", 1)
102
- label = parts[0].strip()
103
- cell.text = f"{label}: {value}"
 
 
 
 
 
 
 
104
  if original_key not in fields_filled:
105
  fields_filled.append(original_key)
106
- debug_info.append(f"Filled '{original_key}' after colon in '{label}'")
107
 
108
- # Method 3: Fuzzy match (normalized)
109
- elif key_normalized in cell_normalized or cell_normalized in key_normalized:
110
  if cell_idx + 1 < len(row.cells):
111
  next_cell = row.cells[cell_idx + 1]
112
- if not next_cell.text.strip() or len(next_cell.text.strip()) < 3:
 
113
  next_cell.text = str(value)
114
  if original_key not in fields_filled:
115
  fields_filled.append(original_key)
116
- debug_info.append(f"Filled '{original_key}' (fuzzy) after '{cell.text.strip()}'")
117
 
118
- # Check for placeholders
119
  for key, value in normalized_data.items():
120
  placeholders = [
121
  f"{{{{{key}}}}}",
122
  f"{{{key}}}",
 
123
  ]
124
 
125
  for placeholder in placeholders:
126
  if placeholder.lower() in cell.text.lower():
127
  cell.text = cell.text.replace(placeholder, str(value))
128
- if key not in fields_filled:
129
- fields_filled.append(key)
130
- debug_info.append(f"Filled placeholder '{placeholder}' with '{key}'")
 
 
 
 
131
 
132
  # Ensure output directory exists
133
  Path(output_path).parent.mkdir(parents=True, exist_ok=True)
@@ -153,19 +176,36 @@ async def fill_excel_form(template_path: str, data: Dict[str, Any], output_path:
153
  ws = wb.active
154
  fields_filled = []
155
 
156
- # Search for placeholders and replace
157
  for row in ws.iter_rows():
158
- for cell in row:
159
  if cell.value and isinstance(cell.value, str):
 
 
 
160
  for key, value in data.items():
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  placeholders = [
162
  f"{{{{{key}}}}}",
163
  f"{{{key}}}",
 
164
  ]
165
 
166
  for placeholder in placeholders:
167
- if placeholder in cell.value:
168
- cell.value = cell.value.replace(placeholder, str(value))
169
  if key not in fields_filled:
170
  fields_filled.append(key)
171
 
@@ -196,6 +236,7 @@ async def fill_text_form(template_path: str, data: Dict[str, Any], output_path:
196
  placeholders = [
197
  f"{{{{{key}}}}}",
198
  f"{{{key}}}",
 
199
  ]
200
 
201
  for placeholder in placeholders:
@@ -217,33 +258,6 @@ async def fill_text_form(template_path: str, data: Dict[str, Any], output_path:
217
  'total_fields': len(fields_filled)
218
  }
219
 
220
- except Exception as e:
221
- import traceback
222
- return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False}
223
-
224
-
225
- async def generate_form_template(fields: list) -> dict:
226
- """Generate a blank form template with specified fields"""
227
- try:
228
- doc = Document()
229
- doc.add_heading('Form Template', 0)
230
-
231
- for field in fields:
232
- p = doc.add_paragraph()
233
- p.add_run(f"{field}: ").bold = True
234
- p.add_run(f"{{{{{field}}}}}")
235
-
236
- # Ensure output directory exists
237
- output_path = "data/outputs/form_template.docx"
238
- Path(output_path).parent.mkdir(parents=True, exist_ok=True)
239
- doc.save(output_path)
240
-
241
- return {
242
- 'success': True,
243
- 'output_path': output_path,
244
- 'fields': fields
245
- }
246
-
247
  except Exception as e:
248
  import traceback
249
  return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False}
 
1
+ """
2
+ Form Filler Server - COMPLETE FIXED VERSION
3
+ Handles DOCX, XLSX, and text form templates with robust field matching
4
+ """
5
+
6
  from typing import Dict, Any
7
  import json
8
  from pathlib import Path
9
  from docx import Document
10
  from openpyxl import load_workbook
11
+ import re
12
 
13
 
14
  async def fill_form(template_path: str, data: Dict[str, Any]) -> dict:
 
40
 
41
 
42
  async def fill_docx_form(template_path: str, data: Dict[str, Any], output_path: str) -> dict:
43
+ """Fill DOCX form template - COMPLETE FIXED VERSION with robust matching"""
44
  try:
45
  doc = Document(template_path)
46
  fields_filled = []
47
  debug_info = []
48
 
49
+ # Normalize data keys for comprehensive matching
50
  normalized_data = {}
51
  for key, value in data.items():
52
+ # Create multiple normalized versions of each key
53
+ key_variations = [
54
+ key, # Original: "First Name"
55
+ key.lower(), # Lowercase: "first name"
56
+ key.lower().replace(" ", ""), # No spaces: "firstname"
57
+ key.lower().replace(" ", "_"), # Underscores: "first_name"
58
+ key.replace(" ", ""), # No spaces original case: "FirstName"
59
+ key.replace(" ", "_"), # Underscores original case: "First_Name"
60
+ ]
61
+
62
+ for variation in key_variations:
63
+ normalized_data[variation] = value
64
+
65
+ # Process all paragraphs
66
+ for para_idx, paragraph in enumerate(doc.paragraphs):
67
+ original_text = paragraph.text
68
+ modified = False
69
+
70
+ # Try to find and replace fields in paragraphs
71
+ for key, value in data.items():
72
+ # Check for various formats
73
+ patterns = [
74
+ (f"{key}:", f"{key}: {value}"), # "First Name:" -> "First Name: John"
75
+ (f"{key} :", f"{key}: {value}"), # "First Name :" -> "First Name: John"
76
+ (f"{key} _", f"{key}: {value}"), # "First Name _" -> "First Name: John"
77
+ (f"{key}__", f"{key}: {value}"), # "First Name__" -> "First Name: John"
78
  ]
79
 
80
+ for pattern, replacement in patterns:
81
+ if pattern in paragraph.text:
82
+ paragraph.text = paragraph.text.replace(pattern, replacement)
83
  if key not in fields_filled:
84
  fields_filled.append(key)
85
+ debug_info.append(f"Filled '{key}' in paragraph {para_idx}")
86
+ modified = True
87
+ break
88
+
89
+ if modified:
90
+ break
91
 
92
+ # Process all tables (primary method for form templates)
93
  for table_idx, table in enumerate(doc.tables):
94
  for row_idx, row in enumerate(table.rows):
 
 
 
95
  for cell_idx, cell in enumerate(row.cells):
96
+ cell_text = cell.text.strip()
97
 
 
98
  if not cell_text:
99
  continue
100
 
101
+ # Method 1: Direct label matching with colon
102
  for original_key, value in data.items():
103
+ # Create normalized version of cell text
104
+ cell_normalized = cell_text.lower().replace(":", "").strip()
105
+ key_normalized = original_key.lower().replace(":", "").strip()
106
 
107
+ # Check if cell contains the field label
108
+ if key_normalized == cell_normalized or cell_normalized.startswith(key_normalized):
109
+ # Check if value is already filled
 
 
 
 
 
 
 
 
 
110
  if ":" in cell.text:
111
  parts = cell.text.split(":", 1)
112
+ if len(parts) == 2 and not parts[1].strip():
113
+ # Empty after colon, fill it
114
+ cell.text = f"{parts[0]}: {value}"
115
+ if original_key not in fields_filled:
116
+ fields_filled.append(original_key)
117
+ debug_info.append(f"Filled '{original_key}' in table {table_idx}, row {row_idx}, cell {cell_idx}")
118
+ else:
119
+ # No colon, add it
120
+ cell.text = f"{cell.text}: {value}"
121
  if original_key not in fields_filled:
122
  fields_filled.append(original_key)
123
+ debug_info.append(f"Filled '{original_key}' in table {table_idx}, row {row_idx}, cell {cell_idx}")
124
 
125
+ # Method 2: Check next cell in same row
126
+ elif key_normalized in cell_normalized:
127
  if cell_idx + 1 < len(row.cells):
128
  next_cell = row.cells[cell_idx + 1]
129
+ # Only fill if next cell is empty or has placeholder
130
+ if not next_cell.text.strip() or next_cell.text.strip() in ["_", "__", "___", ""]:
131
  next_cell.text = str(value)
132
  if original_key not in fields_filled:
133
  fields_filled.append(original_key)
134
+ debug_info.append(f"Filled '{original_key}' in next cell of table {table_idx}, row {row_idx}")
135
 
136
+ # Method 3: Check for placeholder patterns in cells
137
  for key, value in normalized_data.items():
138
  placeholders = [
139
  f"{{{{{key}}}}}",
140
  f"{{{key}}}",
141
+ f"[{key}]",
142
  ]
143
 
144
  for placeholder in placeholders:
145
  if placeholder.lower() in cell.text.lower():
146
  cell.text = cell.text.replace(placeholder, str(value))
147
+ # Find original key
148
+ original_key = next((k for k in data.keys()
149
+ if k.lower().replace(" ", "") == key.lower().replace(" ", "")),
150
+ key)
151
+ if original_key not in fields_filled:
152
+ fields_filled.append(original_key)
153
+ debug_info.append(f"Filled placeholder '{placeholder}' in table {table_idx}")
154
 
155
  # Ensure output directory exists
156
  Path(output_path).parent.mkdir(parents=True, exist_ok=True)
 
176
  ws = wb.active
177
  fields_filled = []
178
 
179
+ # Iterate through all cells
180
  for row in ws.iter_rows():
181
+ for cell_idx, cell in enumerate(row):
182
  if cell.value and isinstance(cell.value, str):
183
+ cell_text = str(cell.value).strip()
184
+
185
+ # Check each field
186
  for key, value in data.items():
187
+ key_normalized = key.lower().replace(":", "").strip()
188
+ cell_normalized = cell_text.lower().replace(":", "").strip()
189
+
190
+ # If cell contains field label
191
+ if key_normalized == cell_normalized or key_normalized in cell_normalized:
192
+ # Try to fill next cell
193
+ next_cell = row[cell_idx + 1] if cell_idx + 1 < len(row) else None
194
+ if next_cell and (not next_cell.value or str(next_cell.value).strip() == ""):
195
+ next_cell.value = str(value)
196
+ if key not in fields_filled:
197
+ fields_filled.append(key)
198
+
199
+ # Check for placeholders
200
  placeholders = [
201
  f"{{{{{key}}}}}",
202
  f"{{{key}}}",
203
+ f"[{key}]",
204
  ]
205
 
206
  for placeholder in placeholders:
207
+ if placeholder in cell_text:
208
+ cell.value = cell_text.replace(placeholder, str(value))
209
  if key not in fields_filled:
210
  fields_filled.append(key)
211
 
 
236
  placeholders = [
237
  f"{{{{{key}}}}}",
238
  f"{{{key}}}",
239
+ f"[{key}]",
240
  ]
241
 
242
  for placeholder in placeholders:
 
258
  'total_fields': len(fields_filled)
259
  }
260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
  except Exception as e:
262
  import traceback
263
  return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False}