LifeAdmin-AI / tools /form_filler_server.py
Maheen001's picture
Update tools/form_filler_server.py
143de80 verified
raw
history blame
10.4 kB
from typing import Dict, Any
import json
from pathlib import Path
from docx import Document
from openpyxl import load_workbook
async def fill_form(template_path: str, data: Dict[str, Any]) -> dict:
"""
Auto-fill form template with extracted data
Args:
template_path: Path to form template (DOCX or XLSX)
data: Dictionary of field values
Returns:
Dict with filled form path and fields filled
"""
try:
file_ext = Path(template_path).suffix.lower()
output_path = template_path.replace(file_ext, f'_filled{file_ext}')
if file_ext == '.docx':
return await fill_docx_form(template_path, data, output_path)
elif file_ext in ['.xlsx', '.xls']:
return await fill_excel_form(template_path, data, output_path)
else:
# Generic text replacement
return await fill_text_form(template_path, data, output_path)
except Exception as e:
import traceback
return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False}
async def fill_docx_form(template_path: str, data: Dict[str, Any], output_path: str) -> dict:
"""Fill DOCX form template - Enhanced with comprehensive matching"""
try:
doc = Document(template_path)
fields_filled = []
debug_info = []
# Normalize data keys for better matching
normalized_data = {}
for key, value in data.items():
# Store original
normalized_data[key] = value
# Store lowercase
normalized_data[key.lower()] = value
# Store with underscores
normalized_data[key.lower().replace(" ", "_")] = value
# Store without spaces
normalized_data[key.lower().replace(" ", "")] = value
# First, try to replace placeholders in paragraphs
for paragraph in doc.paragraphs:
for key, value in normalized_data.items():
placeholders = [
f"{{{{{key}}}}}", # {{field_name}}
f"{{{key}}}", # {field_name}
]
for placeholder in placeholders:
if placeholder in paragraph.text:
paragraph.text = paragraph.text.replace(placeholder, str(value))
if key not in fields_filled:
fields_filled.append(key)
# Enhanced table processing
for table_idx, table in enumerate(doc.tables):
for row_idx, row in enumerate(table.rows):
cells_text = [cell.text.strip() for cell in row.cells]
# Process each cell
for cell_idx, cell in enumerate(row.cells):
cell_text = cell.text.strip().lower()
# Skip empty cells
if not cell_text:
continue
# Check each field
for original_key, value in data.items():
key_lower = original_key.lower()
key_normalized = key_lower.replace(" ", "").replace("_", "")
cell_normalized = cell_text.replace(" ", "").replace(":", "").replace("_", "")
# Method 1: Exact match (case-insensitive)
if key_lower in cell_text or cell_text in key_lower:
# Fill next cell if exists and is empty
if cell_idx + 1 < len(row.cells):
next_cell = row.cells[cell_idx + 1]
if not next_cell.text.strip() or len(next_cell.text.strip()) < 3:
next_cell.text = str(value)
if original_key not in fields_filled:
fields_filled.append(original_key)
debug_info.append(f"Filled '{original_key}' in cell after '{cell.text.strip()}'")
# Method 2: Fill after colon in same cell
if ":" in cell.text:
parts = cell.text.split(":", 1)
label = parts[0].strip()
cell.text = f"{label}: {value}"
if original_key not in fields_filled:
fields_filled.append(original_key)
debug_info.append(f"Filled '{original_key}' after colon in '{label}'")
# Method 3: Fuzzy match (normalized)
elif key_normalized in cell_normalized or cell_normalized in key_normalized:
if cell_idx + 1 < len(row.cells):
next_cell = row.cells[cell_idx + 1]
if not next_cell.text.strip() or len(next_cell.text.strip()) < 3:
next_cell.text = str(value)
if original_key not in fields_filled:
fields_filled.append(original_key)
debug_info.append(f"Filled '{original_key}' (fuzzy) after '{cell.text.strip()}'")
# Check for placeholders
for key, value in normalized_data.items():
placeholders = [
f"{{{{{key}}}}}",
f"{{{key}}}",
]
for placeholder in placeholders:
if placeholder.lower() in cell.text.lower():
cell.text = cell.text.replace(placeholder, str(value))
if key not in fields_filled:
fields_filled.append(key)
debug_info.append(f"Filled placeholder '{placeholder}' with '{key}'")
# Ensure output directory exists
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
doc.save(output_path)
return {
'success': True,
'output_path': output_path,
'fields_filled': list(set(fields_filled)),
'total_fields': len(set(fields_filled)),
'debug_info': debug_info
}
except Exception as e:
import traceback
return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False}
async def fill_excel_form(template_path: str, data: Dict[str, Any], output_path: str) -> dict:
"""Fill Excel form template"""
try:
wb = load_workbook(template_path)
ws = wb.active
fields_filled = []
# Search for placeholders and replace
for row in ws.iter_rows():
for cell in row:
if cell.value and isinstance(cell.value, str):
for key, value in data.items():
placeholders = [
f"{{{{{key}}}}}",
f"{{{key}}}",
]
for placeholder in placeholders:
if placeholder in cell.value:
cell.value = cell.value.replace(placeholder, str(value))
if key not in fields_filled:
fields_filled.append(key)
# Ensure output directory exists
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
wb.save(output_path)
return {
'success': True,
'output_path': output_path,
'fields_filled': list(set(fields_filled)),
'total_fields': len(set(fields_filled))
}
except Exception as e:
import traceback
return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False}
async def fill_text_form(template_path: str, data: Dict[str, Any], output_path: str) -> dict:
"""Fill text-based form"""
try:
with open(template_path, 'r', encoding='utf-8') as f:
content = f.read()
fields_filled = []
for key, value in data.items():
placeholders = [
f"{{{{{key}}}}}",
f"{{{key}}}",
]
for placeholder in placeholders:
if placeholder in content:
content = content.replace(placeholder, str(value))
if key not in fields_filled:
fields_filled.append(key)
# Ensure output directory exists
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(content)
return {
'success': True,
'output_path': output_path,
'fields_filled': fields_filled,
'total_fields': len(fields_filled)
}
except Exception as e:
import traceback
return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False}
async def generate_form_template(fields: list) -> dict:
"""Generate a blank form template with specified fields"""
try:
doc = Document()
doc.add_heading('Form Template', 0)
for field in fields:
p = doc.add_paragraph()
p.add_run(f"{field}: ").bold = True
p.add_run(f"{{{{{field}}}}}")
# Ensure output directory exists
output_path = "data/outputs/form_template.docx"
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
doc.save(output_path)
return {
'success': True,
'output_path': output_path,
'fields': fields
}
except Exception as e:
import traceback
return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False}