Spaces:
Running
Running
| """ | |
| Form Filler Server - COMPLETE FIXED VERSION | |
| Handles DOCX, XLSX, and text form templates with robust field matching | |
| """ | |
| from typing import Dict, Any | |
| import json | |
| from pathlib import Path | |
| from docx import Document | |
| from openpyxl import load_workbook | |
| import re | |
| async def fill_form(template_path: str, data: Dict[str, Any]) -> dict: | |
| """ | |
| Auto-fill form template with extracted data | |
| Args: | |
| template_path: Path to form template (DOCX or XLSX) | |
| data: Dictionary of field values | |
| Returns: | |
| Dict with filled form path and fields filled | |
| """ | |
| try: | |
| file_ext = Path(template_path).suffix.lower() | |
| output_path = template_path.replace(file_ext, f'_filled{file_ext}') | |
| if file_ext == '.docx': | |
| return await fill_docx_form(template_path, data, output_path) | |
| elif file_ext in ['.xlsx', '.xls']: | |
| return await fill_excel_form(template_path, data, output_path) | |
| else: | |
| # Generic text replacement | |
| return await fill_text_form(template_path, data, output_path) | |
| except Exception as e: | |
| import traceback | |
| return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False} | |
| async def fill_docx_form(template_path: str, data: Dict[str, Any], output_path: str) -> dict: | |
| """Fill DOCX form template - COMPLETE FIXED VERSION with robust matching""" | |
| try: | |
| doc = Document(template_path) | |
| fields_filled = [] | |
| debug_info = [] | |
| # Normalize data keys for comprehensive matching | |
| normalized_data = {} | |
| for key, value in data.items(): | |
| # Create multiple normalized versions of each key | |
| key_variations = [ | |
| key, # Original: "First Name" | |
| key.lower(), # Lowercase: "first name" | |
| key.lower().replace(" ", ""), # No spaces: "firstname" | |
| key.lower().replace(" ", "_"), # Underscores: "first_name" | |
| key.replace(" ", ""), # No spaces original case: "FirstName" | |
| key.replace(" ", "_"), # Underscores original case: "First_Name" | |
| ] | |
| for variation in key_variations: | |
| normalized_data[variation] = value | |
| # Process all paragraphs | |
| for para_idx, paragraph in enumerate(doc.paragraphs): | |
| original_text = paragraph.text | |
| modified = False | |
| # Try to find and replace fields in paragraphs | |
| for key, value in data.items(): | |
| # Check for various formats | |
| patterns = [ | |
| (f"{key}:", f"{key}: {value}"), # "First Name:" -> "First Name: John" | |
| (f"{key} :", f"{key}: {value}"), # "First Name :" -> "First Name: John" | |
| (f"{key} _", f"{key}: {value}"), # "First Name _" -> "First Name: John" | |
| (f"{key}__", f"{key}: {value}"), # "First Name__" -> "First Name: John" | |
| ] | |
| for pattern, replacement in patterns: | |
| if pattern in paragraph.text: | |
| paragraph.text = paragraph.text.replace(pattern, replacement) | |
| if key not in fields_filled: | |
| fields_filled.append(key) | |
| debug_info.append(f"Filled '{key}' in paragraph {para_idx}") | |
| modified = True | |
| break | |
| if modified: | |
| break | |
| # Process all tables (primary method for form templates) | |
| for table_idx, table in enumerate(doc.tables): | |
| for row_idx, row in enumerate(table.rows): | |
| for cell_idx, cell in enumerate(row.cells): | |
| cell_text = cell.text.strip() | |
| if not cell_text: | |
| continue | |
| # Method 1: Direct label matching with colon | |
| for original_key, value in data.items(): | |
| # Create normalized version of cell text | |
| cell_normalized = cell_text.lower().replace(":", "").strip() | |
| key_normalized = original_key.lower().replace(":", "").strip() | |
| # Check if cell contains the field label | |
| if key_normalized == cell_normalized or cell_normalized.startswith(key_normalized): | |
| # Check if value is already filled | |
| if ":" in cell.text: | |
| parts = cell.text.split(":", 1) | |
| if len(parts) == 2 and not parts[1].strip(): | |
| # Empty after colon, fill it | |
| cell.text = f"{parts[0]}: {value}" | |
| if original_key not in fields_filled: | |
| fields_filled.append(original_key) | |
| debug_info.append(f"Filled '{original_key}' in table {table_idx}, row {row_idx}, cell {cell_idx}") | |
| else: | |
| # No colon, add it | |
| cell.text = f"{cell.text}: {value}" | |
| if original_key not in fields_filled: | |
| fields_filled.append(original_key) | |
| debug_info.append(f"Filled '{original_key}' in table {table_idx}, row {row_idx}, cell {cell_idx}") | |
| # Method 2: Check next cell in same row | |
| elif key_normalized in cell_normalized: | |
| if cell_idx + 1 < len(row.cells): | |
| next_cell = row.cells[cell_idx + 1] | |
| # Only fill if next cell is empty or has placeholder | |
| if not next_cell.text.strip() or next_cell.text.strip() in ["_", "__", "___", ""]: | |
| next_cell.text = str(value) | |
| if original_key not in fields_filled: | |
| fields_filled.append(original_key) | |
| debug_info.append(f"Filled '{original_key}' in next cell of table {table_idx}, row {row_idx}") | |
| # Method 3: Check for placeholder patterns in cells | |
| for key, value in normalized_data.items(): | |
| placeholders = [ | |
| f"{{{{{key}}}}}", | |
| f"{{{key}}}", | |
| f"[{key}]", | |
| ] | |
| for placeholder in placeholders: | |
| if placeholder.lower() in cell.text.lower(): | |
| cell.text = cell.text.replace(placeholder, str(value)) | |
| # Find original key | |
| original_key = next((k for k in data.keys() | |
| if k.lower().replace(" ", "") == key.lower().replace(" ", "")), | |
| key) | |
| if original_key not in fields_filled: | |
| fields_filled.append(original_key) | |
| debug_info.append(f"Filled placeholder '{placeholder}' in table {table_idx}") | |
| # Ensure output directory exists | |
| Path(output_path).parent.mkdir(parents=True, exist_ok=True) | |
| doc.save(output_path) | |
| return { | |
| 'success': True, | |
| 'output_path': output_path, | |
| 'fields_filled': list(set(fields_filled)), | |
| 'total_fields': len(set(fields_filled)), | |
| 'debug_info': debug_info | |
| } | |
| except Exception as e: | |
| import traceback | |
| return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False} | |
| async def fill_excel_form(template_path: str, data: Dict[str, Any], output_path: str) -> dict: | |
| """Fill Excel form template""" | |
| try: | |
| wb = load_workbook(template_path) | |
| ws = wb.active | |
| fields_filled = [] | |
| # Iterate through all cells | |
| for row in ws.iter_rows(): | |
| for cell_idx, cell in enumerate(row): | |
| if cell.value and isinstance(cell.value, str): | |
| cell_text = str(cell.value).strip() | |
| # Check each field | |
| for key, value in data.items(): | |
| key_normalized = key.lower().replace(":", "").strip() | |
| cell_normalized = cell_text.lower().replace(":", "").strip() | |
| # If cell contains field label | |
| if key_normalized == cell_normalized or key_normalized in cell_normalized: | |
| # Try to fill next cell | |
| next_cell = row[cell_idx + 1] if cell_idx + 1 < len(row) else None | |
| if next_cell and (not next_cell.value or str(next_cell.value).strip() == ""): | |
| next_cell.value = str(value) | |
| if key not in fields_filled: | |
| fields_filled.append(key) | |
| # Check for placeholders | |
| placeholders = [ | |
| f"{{{{{key}}}}}", | |
| f"{{{key}}}", | |
| f"[{key}]", | |
| ] | |
| for placeholder in placeholders: | |
| if placeholder in cell_text: | |
| cell.value = cell_text.replace(placeholder, str(value)) | |
| if key not in fields_filled: | |
| fields_filled.append(key) | |
| # Ensure output directory exists | |
| Path(output_path).parent.mkdir(parents=True, exist_ok=True) | |
| wb.save(output_path) | |
| return { | |
| 'success': True, | |
| 'output_path': output_path, | |
| 'fields_filled': list(set(fields_filled)), | |
| 'total_fields': len(set(fields_filled)) | |
| } | |
| except Exception as e: | |
| import traceback | |
| return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False} | |
| async def fill_text_form(template_path: str, data: Dict[str, Any], output_path: str) -> dict: | |
| """Fill text-based form""" | |
| try: | |
| with open(template_path, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| fields_filled = [] | |
| for key, value in data.items(): | |
| placeholders = [ | |
| f"{{{{{key}}}}}", | |
| f"{{{key}}}", | |
| f"[{key}]", | |
| ] | |
| for placeholder in placeholders: | |
| if placeholder in content: | |
| content = content.replace(placeholder, str(value)) | |
| if key not in fields_filled: | |
| fields_filled.append(key) | |
| # Ensure output directory exists | |
| Path(output_path).parent.mkdir(parents=True, exist_ok=True) | |
| with open(output_path, 'w', encoding='utf-8') as f: | |
| f.write(content) | |
| return { | |
| 'success': True, | |
| 'output_path': output_path, | |
| 'fields_filled': fields_filled, | |
| 'total_fields': len(fields_filled) | |
| } | |
| except Exception as e: | |
| import traceback | |
| return {'error': f"{str(e)}\n\n{traceback.format_exc()}", 'success': False} |